SimplifyCFG: If convert single conditional stores

This resurrects r179957, but adds code that makes sure we don't touch atomic/volatile stores: This transformation will transform a conditional store with a preceeding uncondtional store to the same location: a[i] = may-alias with a[i] load if (cond) a[i] = Y into an unconditional store. a[i] = X may-alias with a[i] load tmp = cond ? Y : X; a[i] = tmp We assume that on average the cost of a mispredicted branch is going to be higher than the cost of a second store to the same location, and that the secondary benefits of creating a bigger basic block for other optimizations to work on outway the potential case where the branch would be correctly predicted and the cost of the executing the second store would be noticably reflected in performance. hmmer's execution time improves by 30% on an imac12,2 on ref data sets. With this change we are on par with gcc's performance (gcc also performs this transformation). There was a 1.2 % performance improvement on a ARM swift chip. Other tests in the test-suite+external seem to be mostly uninfluenced in my experiments: This optimization was triggered on 41 tests such that the executable was different before/after the patch. Only 1 out of the 40 tests (dealII) was reproducable below 100% (by about .4%). Given that hmmer benefits so much I believe this to be a fair trade off. llvm-svn: 180731
author: Arnold Schwaighofer <aschwaighofer@apple.com> 2013-04-29 21:28:24 +0000
committer: Arnold Schwaighofer <aschwaighofer@apple.com> 2013-04-29 21:28:24 +0000
commit: 474df6d3ede6428fbd1f04b4501f0944a358c1f5 (patch)
tree: 0944b7e8468f3a89cbf91617030d4b144b7d2924 /llvm/test/Transforms/SimplifyCFG
parent: 29cb481ba064ce2da48488e0c9e9fafb0d52bb86 (diff)
download: bcm5719-llvm-474df6d3ede6428fbd1f04b4501f0944a358c1f5.tar.gz
bcm5719-llvm-474df6d3ede6428fbd1f04b4501f0944a358c1f5.zip
1 files changed, 108 insertions, 0 deletions
diff --git a/llvm/test/Transforms/SimplifyCFG/speculate-store.ll b/llvm/test/Transforms/SimplifyCFG/speculate-store.ll
new file mode 100644
index 00000000000..8d7fe79dcd4
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/speculate-store.ll
@@ -0,0 +1,108 @@
+; RUN: opt -simplifycfg -S < %s | FileCheck %s
+
+define void @ifconvertstore(i32 %m, i32* %A, i32* %B, i32 %C, i32 %D) {
+entry:
+  %arrayidx = getelementptr inbounds i32* %B, i64 0
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %C
+  %arrayidx2 = getelementptr inbounds i32* %A, i64 0
+
+; First store to the location.
+  store i32 %add, i32* %arrayidx2, align 4
+  %arrayidx4 = getelementptr inbounds i32* %B, i64 1
+  %1 = load i32* %arrayidx4, align 4
+  %add5 = add nsw i32 %1, %D
+  %cmp6 = icmp sgt i32 %add5, %C
+  br i1 %cmp6, label %if.then, label %ret.end
+
+; Make sure we speculate stores like the following one. It is cheap compared to
+; a mispredicated branch.
+; CHECK: @ifconvertstore
+; CHECK: %add5.add = select i1 %cmp6, i32 %add5, i32 %add
+; CHECK: store i32 %add5.add, i32* %arrayidx2, align 4
+if.then:
+  store i32 %add5, i32* %arrayidx2, align 4
+  br label %ret.end
+
+ret.end:
+  ret void
+}
+
+define void @noifconvertstore1(i32 %m, i32* %A, i32* %B, i32 %C, i32 %D) {
+entry:
+  %arrayidx = getelementptr inbounds i32* %B, i64 0
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %C
+  %arrayidx2 = getelementptr inbounds i32* %A, i64 0
+
+; Store to a different location.
+  store i32 %add, i32* %arrayidx, align 4
+  %arrayidx4 = getelementptr inbounds i32* %B, i64 1
+  %1 = load i32* %arrayidx4, align 4
+  %add5 = add nsw i32 %1, %D
+  %cmp6 = icmp sgt i32 %add5, %C
+  br i1 %cmp6, label %if.then, label %ret.end
+
+; CHECK: @noifconvertstore1
+; CHECK-NOT: select
+if.then:
+  store i32 %add5, i32* %arrayidx2, align 4
+  br label %ret.end
+
+ret.end:
+  ret void
+}
+
+declare void @unknown_fun()
+
+define void @noifconvertstore2(i32 %m, i32* %A, i32* %B, i32 %C, i32 %D) {
+entry:
+  %arrayidx = getelementptr inbounds i32* %B, i64 0
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %C
+  %arrayidx2 = getelementptr inbounds i32* %A, i64 0
+
+; First store to the location.
+  store i32 %add, i32* %arrayidx2, align 4
+  call void @unknown_fun()
+  %arrayidx4 = getelementptr inbounds i32* %B, i64 1
+  %1 = load i32* %arrayidx4, align 4
+  %add5 = add nsw i32 %1, %D
+  %cmp6 = icmp sgt i32 %add5, %C
+  br i1 %cmp6, label %if.then, label %ret.end
+
+; CHECK: @noifconvertstore2
+; CHECK-NOT: select
+if.then:
+  store i32 %add5, i32* %arrayidx2, align 4
+  br label %ret.end
+
+ret.end:
+  ret void
+}
+
+define void @noifconvertstore_volatile(i32 %m, i32* %A, i32* %B, i32 %C, i32 %D) {
+entry:
+  %arrayidx = getelementptr inbounds i32* %B, i64 0
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %C
+  %arrayidx2 = getelementptr inbounds i32* %A, i64 0
+
+; First store to the location.
+  store i32 %add, i32* %arrayidx2, align 4
+  %arrayidx4 = getelementptr inbounds i32* %B, i64 1
+  %1 = load i32* %arrayidx4, align 4
+  %add5 = add nsw i32 %1, %D
+  %cmp6 = icmp sgt i32 %add5, %C
+  br i1 %cmp6, label %if.then, label %ret.end
+
+; Make sure we don't speculate volatile stores.
+; CHECK: @noifconvertstore_volatile
+; CHECK-NOT: select
+if.then:
+  store volatile i32 %add5, i32* %arrayidx2, align 4
+  br label %ret.end
+
+ret.end:
+  ret void
+}
author	Arnold Schwaighofer <aschwaighofer@apple.com>	2013-04-29 21:28:24 +0000
committer	Arnold Schwaighofer <aschwaighofer@apple.com>	2013-04-29 21:28:24 +0000
commit	474df6d3ede6428fbd1f04b4501f0944a358c1f5 (patch)
tree	0944b7e8468f3a89cbf91617030d4b144b7d2924 /llvm/test/Transforms/SimplifyCFG
parent	29cb481ba064ce2da48488e0c9e9fafb0d52bb86 (diff)
download	bcm5719-llvm-474df6d3ede6428fbd1f04b4501f0944a358c1f5.tar.gz bcm5719-llvm-474df6d3ede6428fbd1f04b4501f0944a358c1f5.zip