diff options
| author | Chad Rosier <mcrosier@codeaurora.org> | 2015-06-09 20:59:41 +0000 |
|---|---|---|
| committer | Chad Rosier <mcrosier@codeaurora.org> | 2015-06-09 20:59:41 +0000 |
| commit | cf90acc1041fea47db7d862a21b24534b5df21b0 (patch) | |
| tree | d451df3b64c0c63a853612fa919c5dfca2821a1d /llvm/test | |
| parent | 8ae395de66bec80bfdfb72da8e1217abeec9b913 (diff) | |
| download | bcm5719-llvm-cf90acc1041fea47db7d862a21b24534b5df21b0.tar.gz bcm5719-llvm-cf90acc1041fea47db7d862a21b24534b5df21b0.zip | |
[AArch64] Remove an overly conservative check when generating store pairs.
Store instructions do not modify register values and therefore it's safe
to form a store pair even if the source register has been read in between
the two store instructions.
Previously, the read of w1 (see below) prevented the formation of a stp.
str w0, [x2]
ldr w8, [x2, #8]
add w0, w8, w1
str w1, [x2, #4]
ret
We now generate the following code.
stp w0, w1, [x2]
ldr w8, [x2, #8]
add w0, w8, w1
ret
All correctness tests with -Ofast on A57 with Spec200x and EEMBC pass.
Performance results for SPEC2K were within noise.
llvm-svn: 239432
Diffstat (limited to 'llvm/test')
| -rw-r--r-- | llvm/test/CodeGen/AArch64/arm64-stp.ll | 32 |
1 files changed, 32 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AArch64/arm64-stp.ll b/llvm/test/CodeGen/AArch64/arm64-stp.ll index 4d76396471a..72561aac6e8 100644 --- a/llvm/test/CodeGen/AArch64/arm64-stp.ll +++ b/llvm/test/CodeGen/AArch64/arm64-stp.ll @@ -99,3 +99,35 @@ entry: store <4 x i32> %p20, <4 x i32>* %p21, align 4 ret void } + +; Read of %b to compute %tmp2 shouldn't prevent formation of stp +; CHECK-LABEL: stp_int_rar_hazard +; CHECK: stp w0, w1, [x2] +; CHECK: ldr [[REG:w[0-9]+]], [x2, #8] +; CHECK: add w0, [[REG]], w1 +; CHECK: ret +define i32 @stp_int_rar_hazard(i32 %a, i32 %b, i32* nocapture %p) nounwind { + store i32 %a, i32* %p, align 4 + %ld.ptr = getelementptr inbounds i32, i32* %p, i64 2 + %tmp = load i32, i32* %ld.ptr, align 4 + %tmp2 = add i32 %tmp, %b + %add.ptr = getelementptr inbounds i32, i32* %p, i64 1 + store i32 %b, i32* %add.ptr, align 4 + ret i32 %tmp2 +} + +; Read of %b to compute %tmp2 shouldn't prevent formation of stp +; CHECK-LABEL: stp_int_rar_hazard_after +; CHECK: ldr [[REG:w[0-9]+]], [x3, #4] +; CHECK: add w0, [[REG]], w2 +; CHECK: stp w1, w2, [x3] +; CHECK: ret +define i32 @stp_int_rar_hazard_after(i32 %w0, i32 %a, i32 %b, i32* nocapture %p) nounwind { + store i32 %a, i32* %p, align 4 + %ld.ptr = getelementptr inbounds i32, i32* %p, i64 1 + %tmp = load i32, i32* %ld.ptr, align 4 + %tmp2 = add i32 %tmp, %b + %add.ptr = getelementptr inbounds i32, i32* %p, i64 1 + store i32 %b, i32* %add.ptr, align 4 + ret i32 %tmp2 +} |

