summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorChad Rosier <mcrosier@codeaurora.org>2016-03-18 19:21:02 +0000
committerChad Rosier <mcrosier@codeaurora.org>2016-03-18 19:21:02 +0000
commitcdfd7e7201960b9c932405cb2dbde4b55bfdaaae (patch)
tree64978669d79d5d92790ada47e7b5c6ca41ec11a6 /llvm/test
parent4c62c7c981e9c66bb21f7229b715fe833e39b955 (diff)
downloadbcm5719-llvm-cdfd7e7201960b9c932405cb2dbde4b55bfdaaae.tar.gz
bcm5719-llvm-cdfd7e7201960b9c932405cb2dbde4b55bfdaaae.zip
[AArch64] Enable more load clustering in the MI Scheduler.
This patch adds unscaled loads and sign-extend loads to the TII getMemOpBaseRegImmOfs API, which is used to control clustering in the MI scheduler. This is done to create more opportunities for load pairing. I've also added the scaled LDRSWui instruction, which was missing from the scaled instructions. Finally, I've added support in shouldClusterLoads for clustering adjacent sext and zext loads that too can be paired by the load/store optimizer. Differential Revision: http://reviews.llvm.org/D18048 llvm-svn: 263819
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-ldp-cluster.ll99
1 files changed, 99 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AArch64/arm64-ldp-cluster.ll b/llvm/test/CodeGen/AArch64/arm64-ldp-cluster.ll
new file mode 100644
index 00000000000..0d659eb0a4b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/arm64-ldp-cluster.ll
@@ -0,0 +1,99 @@
+; REQUIRES: asserts
+; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a57 -verify-misched -debug-only=misched -o - 2>&1 > /dev/null | FileCheck %s
+
+; Test ldr clustering.
+; CHECK: ********** MI Scheduling **********
+; CHECK-LABEL: ldr_int:BB#0
+; CHECK: Cluster loads SU(1) - SU(2)
+; CHECK: SU(1): %vreg{{[0-9]+}}<def> = LDRWui
+; CHECK: SU(2): %vreg{{[0-9]+}}<def> = LDRWui
+define i32 @ldr_int(i32* %a) nounwind {
+ %p1 = getelementptr inbounds i32, i32* %a, i32 1
+ %tmp1 = load i32, i32* %p1, align 2
+ %p2 = getelementptr inbounds i32, i32* %a, i32 2
+ %tmp2 = load i32, i32* %p2, align 2
+ %tmp3 = add i32 %tmp1, %tmp2
+ ret i32 %tmp3
+}
+
+; Test ldpsw clustering
+; CHECK: ********** MI Scheduling **********
+; CHECK-LABEL: ldp_sext_int:BB#0
+; CHECK: Cluster loads SU(1) - SU(2)
+; CHECK: SU(1): %vreg{{[0-9]+}}<def> = LDRSWui
+; CHECK: SU(2): %vreg{{[0-9]+}}<def> = LDRSWui
+define i64 @ldp_sext_int(i32* %p) nounwind {
+ %tmp = load i32, i32* %p, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
+ %tmp1 = load i32, i32* %add.ptr, align 4
+ %sexttmp = sext i32 %tmp to i64
+ %sexttmp1 = sext i32 %tmp1 to i64
+ %add = add nsw i64 %sexttmp1, %sexttmp
+ ret i64 %add
+}
+
+; Test ldur clustering.
+; CHECK: ********** MI Scheduling **********
+; CHECK-LABEL: ldur_int:BB#0
+; CHECK: Cluster loads SU(2) - SU(1)
+; CHECK: SU(1): %vreg{{[0-9]+}}<def> = LDURWi
+; CHECK: SU(2): %vreg{{[0-9]+}}<def> = LDURWi
+define i32 @ldur_int(i32* %a) nounwind {
+ %p1 = getelementptr inbounds i32, i32* %a, i32 -1
+ %tmp1 = load i32, i32* %p1, align 2
+ %p2 = getelementptr inbounds i32, i32* %a, i32 -2
+ %tmp2 = load i32, i32* %p2, align 2
+ %tmp3 = add i32 %tmp1, %tmp2
+ ret i32 %tmp3
+}
+
+; Test sext + zext clustering.
+; CHECK: ********** MI Scheduling **********
+; CHECK-LABEL: ldp_half_sext_zext_int:BB#0
+; CHECK: Cluster loads SU(3) - SU(4)
+; CHECK: SU(3): %vreg{{[0-9]+}}<def> = LDRSWui
+; CHECK: SU(4): %vreg{{[0-9]+}}:sub_32<def,read-undef> = LDRWui
+define i64 @ldp_half_sext_zext_int(i64* %q, i32* %p) nounwind {
+ %tmp0 = load i64, i64* %q, align 4
+ %tmp = load i32, i32* %p, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
+ %tmp1 = load i32, i32* %add.ptr, align 4
+ %sexttmp = sext i32 %tmp to i64
+ %sexttmp1 = zext i32 %tmp1 to i64
+ %add = add nsw i64 %sexttmp1, %sexttmp
+ %add1 = add nsw i64 %add, %tmp0
+ ret i64 %add1
+}
+
+; Test zext + sext clustering.
+; CHECK: ********** MI Scheduling **********
+; CHECK-LABEL: ldp_half_zext_sext_int:BB#0
+; CHECK: Cluster loads SU(3) - SU(4)
+; CHECK: SU(3): %vreg{{[0-9]+}}:sub_32<def,read-undef> = LDRWui
+; CHECK: SU(4): %vreg{{[0-9]+}}<def> = LDRSWui
+define i64 @ldp_half_zext_sext_int(i64* %q, i32* %p) nounwind {
+ %tmp0 = load i64, i64* %q, align 4
+ %tmp = load i32, i32* %p, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
+ %tmp1 = load i32, i32* %add.ptr, align 4
+ %sexttmp = zext i32 %tmp to i64
+ %sexttmp1 = sext i32 %tmp1 to i64
+ %add = add nsw i64 %sexttmp1, %sexttmp
+ %add1 = add nsw i64 %add, %tmp0
+ ret i64 %add1
+}
+
+; Verify we don't cluster volatile loads.
+; CHECK: ********** MI Scheduling **********
+; CHECK-LABEL: ldr_int_volatile:BB#0
+; CHECK-NOT: Cluster loads
+; CHECK: SU(1): %vreg{{[0-9]+}}<def> = LDRWui
+; CHECK: SU(2): %vreg{{[0-9]+}}<def> = LDRWui
+define i32 @ldr_int_volatile(i32* %a) nounwind {
+ %p1 = getelementptr inbounds i32, i32* %a, i32 1
+ %tmp1 = load volatile i32, i32* %p1, align 2
+ %p2 = getelementptr inbounds i32, i32* %a, i32 2
+ %tmp2 = load volatile i32, i32* %p2, align 2
+ %tmp3 = add i32 %tmp1, %tmp2
+ ret i32 %tmp3
+}
OpenPOWER on IntegriCloud