summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen
diff options
context:
space:
mode:
authorJun Bum Lim <junbuml@codeaurora.org>2016-04-15 14:58:38 +0000
committerJun Bum Lim <junbuml@codeaurora.org>2016-04-15 14:58:38 +0000
commit4c5bd58ebecd3aada7cebd37db8ff94f0b44770a (patch)
tree3e990665048d5848dff59e82ed0fcb124bbbb43b /llvm/test/CodeGen
parent061d496c511b02a0c2bd394539c757c5ad4511e3 (diff)
downloadbcm5719-llvm-4c5bd58ebecd3aada7cebd37db8ff94f0b44770a.tar.gz
bcm5719-llvm-4c5bd58ebecd3aada7cebd37db8ff94f0b44770a.zip
[MachineScheduler]Add support for store clustering
Perform store clustering just like load clustering. This change add StoreClusterMutation in machine-scheduler. To control StoreClusterMutation, added enableClusterStores() in TargetInstrInfo.h. This is enabled only on AArch64 for now. This change also add support for unscaled stores which were not handled in getMemOpBaseRegImmOfs(). llvm-svn: 266437
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r--llvm/test/CodeGen/AArch64/aarch64-stp-cluster.ll149
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-ldp-cluster.ll28
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-stp.ll4
-rw-r--r--llvm/test/CodeGen/AArch64/global-merge-group-by-use.ll4
4 files changed, 167 insertions, 18 deletions
diff --git a/llvm/test/CodeGen/AArch64/aarch64-stp-cluster.ll b/llvm/test/CodeGen/AArch64/aarch64-stp-cluster.ll
new file mode 100644
index 00000000000..5cab38eafb5
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/aarch64-stp-cluster.ll
@@ -0,0 +1,149 @@
+; REQUIRES: asserts
+; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a57 -verify-misched -debug-only=misched -aarch64-stp-suppress=false -o - 2>&1 > /dev/null | FileCheck %s
+
+; CHECK: ********** MI Scheduling **********
+; CHECK-LABEL: stp_i64_scale:BB#0
+; CHECK:Cluster ld/st SU(4) - SU(3)
+; CHECK:Cluster ld/st SU(2) - SU(5)
+; CHECK:SU(4): STRXui %vreg1, %vreg0, 1
+; CHECK:SU(3): STRXui %vreg1, %vreg0, 2
+; CHECK:SU(2): STRXui %vreg1, %vreg0, 3
+; CHECK:SU(5): STRXui %vreg1, %vreg0, 4
+define i64 @stp_i64_scale(i64* nocapture %P, i64 %v) {
+entry:
+ %arrayidx = getelementptr inbounds i64, i64* %P, i64 3
+ store i64 %v, i64* %arrayidx
+ %arrayidx1 = getelementptr inbounds i64, i64* %P, i64 2
+ store i64 %v, i64* %arrayidx1
+ %arrayidx2 = getelementptr inbounds i64, i64* %P, i64 1
+ store i64 %v, i64* %arrayidx2
+ %arrayidx3 = getelementptr inbounds i64, i64* %P, i64 4
+ store i64 %v, i64* %arrayidx3
+ ret i64 %v
+}
+
+; CHECK: ********** MI Scheduling **********
+; CHECK-LABEL: stp_i32_scale:BB#0
+; CHECK:Cluster ld/st SU(4) - SU(3)
+; CHECK:Cluster ld/st SU(2) - SU(5)
+; CHECK:SU(4): STRWui %vreg1, %vreg0, 1
+; CHECK:SU(3): STRWui %vreg1, %vreg0, 2
+; CHECK:SU(2): STRWui %vreg1, %vreg0, 3
+; CHECK:SU(5): STRWui %vreg1, %vreg0, 4
+define i32 @stp_i32_scale(i32* nocapture %P, i32 %v) {
+entry:
+ %arrayidx = getelementptr inbounds i32, i32* %P, i32 3
+ store i32 %v, i32* %arrayidx
+ %arrayidx1 = getelementptr inbounds i32, i32* %P, i32 2
+ store i32 %v, i32* %arrayidx1
+ %arrayidx2 = getelementptr inbounds i32, i32* %P, i32 1
+ store i32 %v, i32* %arrayidx2
+ %arrayidx3 = getelementptr inbounds i32, i32* %P, i32 4
+ store i32 %v, i32* %arrayidx3
+ ret i32 %v
+}
+
+; CHECK:********** MI Scheduling **********
+; CHECK-LABEL:stp_i64_unscale:BB#0 entry
+; CHECK:Cluster ld/st SU(5) - SU(2)
+; CHECK:Cluster ld/st SU(4) - SU(3)
+; CHECK:SU(5): STURXi %vreg1, %vreg0, -32
+; CHECK:SU(2): STURXi %vreg1, %vreg0, -24
+; CHECK:SU(4): STURXi %vreg1, %vreg0, -16
+; CHECK:SU(3): STURXi %vreg1, %vreg0, -8
+define void @stp_i64_unscale(i64* nocapture %P, i64 %v) #0 {
+entry:
+ %arrayidx = getelementptr inbounds i64, i64* %P, i64 -3
+ store i64 %v, i64* %arrayidx
+ %arrayidx1 = getelementptr inbounds i64, i64* %P, i64 -1
+ store i64 %v, i64* %arrayidx1
+ %arrayidx2 = getelementptr inbounds i64, i64* %P, i64 -2
+ store i64 %v, i64* %arrayidx2
+ %arrayidx3 = getelementptr inbounds i64, i64* %P, i64 -4
+ store i64 %v, i64* %arrayidx3
+ ret void
+}
+
+; CHECK:********** MI Scheduling **********
+; CHECK-LABEL:stp_i32_unscale:BB#0 entry
+; CHECK:Cluster ld/st SU(5) - SU(2)
+; CHECK:Cluster ld/st SU(4) - SU(3)
+; CHECK:SU(5): STURWi %vreg1, %vreg0, -16
+; CHECK:SU(2): STURWi %vreg1, %vreg0, -12
+; CHECK:SU(4): STURWi %vreg1, %vreg0, -8
+; CHECK:SU(3): STURWi %vreg1, %vreg0, -4
+define void @stp_i32_unscale(i32* nocapture %P, i32 %v) #0 {
+entry:
+ %arrayidx = getelementptr inbounds i32, i32* %P, i32 -3
+ store i32 %v, i32* %arrayidx
+ %arrayidx1 = getelementptr inbounds i32, i32* %P, i32 -1
+ store i32 %v, i32* %arrayidx1
+ %arrayidx2 = getelementptr inbounds i32, i32* %P, i32 -2
+ store i32 %v, i32* %arrayidx2
+ %arrayidx3 = getelementptr inbounds i32, i32* %P, i32 -4
+ store i32 %v, i32* %arrayidx3
+ ret void
+}
+
+; CHECK:********** MI Scheduling **********
+; CHECK-LABEL:stp_double:BB#0
+; CHECK:Cluster ld/st SU(3) - SU(4)
+; CHECK:Cluster ld/st SU(2) - SU(5)
+; CHECK:SU(3): STRDui %vreg1, %vreg0, 1
+; CHECK:SU(4): STRDui %vreg1, %vreg0, 2
+; CHECK:SU(2): STRDui %vreg1, %vreg0, 3
+; CHECK:SU(5): STRDui %vreg1, %vreg0, 4
+define void @stp_double(double* nocapture %P, double %v) {
+entry:
+ %arrayidx = getelementptr inbounds double, double* %P, i64 3
+ store double %v, double* %arrayidx
+ %arrayidx1 = getelementptr inbounds double, double* %P, i64 1
+ store double %v, double* %arrayidx1
+ %arrayidx2 = getelementptr inbounds double, double* %P, i64 2
+ store double %v, double* %arrayidx2
+ %arrayidx3 = getelementptr inbounds double, double* %P, i64 4
+ store double %v, double* %arrayidx3
+ ret void
+}
+
+; CHECK:********** MI Scheduling **********
+; CHECK-LABEL:stp_float:BB#0
+; CHECK:Cluster ld/st SU(3) - SU(4)
+; CHECK:Cluster ld/st SU(2) - SU(5)
+; CHECK:SU(3): STRSui %vreg1, %vreg0, 1
+; CHECK:SU(4): STRSui %vreg1, %vreg0, 2
+; CHECK:SU(2): STRSui %vreg1, %vreg0, 3
+; CHECK:SU(5): STRSui %vreg1, %vreg0, 4
+define void @stp_float(float* nocapture %P, float %v) {
+entry:
+ %arrayidx = getelementptr inbounds float, float* %P, i64 3
+ store float %v, float* %arrayidx
+ %arrayidx1 = getelementptr inbounds float, float* %P, i64 1
+ store float %v, float* %arrayidx1
+ %arrayidx2 = getelementptr inbounds float, float* %P, i64 2
+ store float %v, float* %arrayidx2
+ %arrayidx3 = getelementptr inbounds float, float* %P, i64 4
+ store float %v, float* %arrayidx3
+ ret void
+}
+
+; CHECK: ********** MI Scheduling **********
+; CHECK-LABEL: stp_volatile:BB#0
+; CHECK-NOT: Cluster ld/st
+; CHECK:SU(2): STRXui %vreg1, %vreg0, 3; mem:Volatile
+; CHECK:SU(3): STRXui %vreg1, %vreg0, 2; mem:Volatile
+; CHECK:SU(4): STRXui %vreg1, %vreg0, 1; mem:Volatile
+; CHECK:SU(5): STRXui %vreg1, %vreg0, 4; mem:Volatile
+define i64 @stp_volatile(i64* nocapture %P, i64 %v) {
+entry:
+ %arrayidx = getelementptr inbounds i64, i64* %P, i64 3
+ store volatile i64 %v, i64* %arrayidx
+ %arrayidx1 = getelementptr inbounds i64, i64* %P, i64 2
+ store volatile i64 %v, i64* %arrayidx1
+ %arrayidx2 = getelementptr inbounds i64, i64* %P, i64 1
+ store volatile i64 %v, i64* %arrayidx2
+ %arrayidx3 = getelementptr inbounds i64, i64* %P, i64 4
+ store volatile i64 %v, i64* %arrayidx3
+ ret i64 %v
+}
+
diff --git a/llvm/test/CodeGen/AArch64/arm64-ldp-cluster.ll b/llvm/test/CodeGen/AArch64/arm64-ldp-cluster.ll
index f7607089f76..0cfbe5958f4 100644
--- a/llvm/test/CodeGen/AArch64/arm64-ldp-cluster.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-ldp-cluster.ll
@@ -5,12 +5,12 @@
; Test ldr clustering.
; CHECK: ********** MI Scheduling **********
; CHECK-LABEL: ldr_int:BB#0
-; CHECK: Cluster loads SU(1) - SU(2)
+; CHECK: Cluster ld/st SU(1) - SU(2)
; CHECK: SU(1): %vreg{{[0-9]+}}<def> = LDRWui
; CHECK: SU(2): %vreg{{[0-9]+}}<def> = LDRWui
; EXYNOS: ********** MI Scheduling **********
; EXYNOS-LABEL: ldr_int:BB#0
-; EXYNOS: Cluster loads SU(1) - SU(2)
+; EXYNOS: Cluster ld/st SU(1) - SU(2)
; EXYNOS: SU(1): %vreg{{[0-9]+}}<def> = LDRWui
; EXYNOS: SU(2): %vreg{{[0-9]+}}<def> = LDRWui
define i32 @ldr_int(i32* %a) nounwind {
@@ -25,12 +25,12 @@ define i32 @ldr_int(i32* %a) nounwind {
; Test ldpsw clustering
; CHECK: ********** MI Scheduling **********
; CHECK-LABEL: ldp_sext_int:BB#0
-; CHECK: Cluster loads SU(1) - SU(2)
+; CHECK: Cluster ld/st SU(1) - SU(2)
; CHECK: SU(1): %vreg{{[0-9]+}}<def> = LDRSWui
; CHECK: SU(2): %vreg{{[0-9]+}}<def> = LDRSWui
; EXYNOS: ********** MI Scheduling **********
; EXYNOS-LABEL: ldp_sext_int:BB#0
-; EXYNOS: Cluster loads SU(1) - SU(2)
+; EXYNOS: Cluster ld/st SU(1) - SU(2)
; EXYNOS: SU(1): %vreg{{[0-9]+}}<def> = LDRSWui
; EXYNOS: SU(2): %vreg{{[0-9]+}}<def> = LDRSWui
define i64 @ldp_sext_int(i32* %p) nounwind {
@@ -46,12 +46,12 @@ define i64 @ldp_sext_int(i32* %p) nounwind {
; Test ldur clustering.
; CHECK: ********** MI Scheduling **********
; CHECK-LABEL: ldur_int:BB#0
-; CHECK: Cluster loads SU(2) - SU(1)
+; CHECK: Cluster ld/st SU(2) - SU(1)
; CHECK: SU(1): %vreg{{[0-9]+}}<def> = LDURWi
; CHECK: SU(2): %vreg{{[0-9]+}}<def> = LDURWi
; EXYNOS: ********** MI Scheduling **********
; EXYNOS-LABEL: ldur_int:BB#0
-; EXYNOS: Cluster loads SU(2) - SU(1)
+; EXYNOS: Cluster ld/st SU(2) - SU(1)
; EXYNOS: SU(1): %vreg{{[0-9]+}}<def> = LDURWi
; EXYNOS: SU(2): %vreg{{[0-9]+}}<def> = LDURWi
define i32 @ldur_int(i32* %a) nounwind {
@@ -66,12 +66,12 @@ define i32 @ldur_int(i32* %a) nounwind {
; Test sext + zext clustering.
; CHECK: ********** MI Scheduling **********
; CHECK-LABEL: ldp_half_sext_zext_int:BB#0
-; CHECK: Cluster loads SU(3) - SU(4)
+; CHECK: Cluster ld/st SU(3) - SU(4)
; CHECK: SU(3): %vreg{{[0-9]+}}<def> = LDRSWui
; CHECK: SU(4): %vreg{{[0-9]+}}:sub_32<def,read-undef> = LDRWui
; EXYNOS: ********** MI Scheduling **********
; EXYNOS-LABEL: ldp_half_sext_zext_int:BB#0
-; EXYNOS: Cluster loads SU(3) - SU(4)
+; EXYNOS: Cluster ld/st SU(3) - SU(4)
; EXYNOS: SU(3): %vreg{{[0-9]+}}<def> = LDRSWui
; EXYNOS: SU(4): %vreg{{[0-9]+}}:sub_32<def,read-undef> = LDRWui
define i64 @ldp_half_sext_zext_int(i64* %q, i32* %p) nounwind {
@@ -89,12 +89,12 @@ define i64 @ldp_half_sext_zext_int(i64* %q, i32* %p) nounwind {
; Test zext + sext clustering.
; CHECK: ********** MI Scheduling **********
; CHECK-LABEL: ldp_half_zext_sext_int:BB#0
-; CHECK: Cluster loads SU(3) - SU(4)
+; CHECK: Cluster ld/st SU(3) - SU(4)
; CHECK: SU(3): %vreg{{[0-9]+}}:sub_32<def,read-undef> = LDRWui
; CHECK: SU(4): %vreg{{[0-9]+}}<def> = LDRSWui
; EXYNOS: ********** MI Scheduling **********
; EXYNOS-LABEL: ldp_half_zext_sext_int:BB#0
-; EXYNOS: Cluster loads SU(3) - SU(4)
+; EXYNOS: Cluster ld/st SU(3) - SU(4)
; EXYNOS: SU(3): %vreg{{[0-9]+}}:sub_32<def,read-undef> = LDRWui
; EXYNOS: SU(4): %vreg{{[0-9]+}}<def> = LDRSWui
define i64 @ldp_half_zext_sext_int(i64* %q, i32* %p) nounwind {
@@ -112,12 +112,12 @@ define i64 @ldp_half_zext_sext_int(i64* %q, i32* %p) nounwind {
; Verify we don't cluster volatile loads.
; CHECK: ********** MI Scheduling **********
; CHECK-LABEL: ldr_int_volatile:BB#0
-; CHECK-NOT: Cluster loads
+; CHECK-NOT: Cluster ld/st
; CHECK: SU(1): %vreg{{[0-9]+}}<def> = LDRWui
; CHECK: SU(2): %vreg{{[0-9]+}}<def> = LDRWui
; EXYNOS: ********** MI Scheduling **********
; EXYNOS-LABEL: ldr_int_volatile:BB#0
-; EXYNOS-NOT: Cluster loads
+; EXYNOS-NOT: Cluster ld/st
; EXYNOS: SU(1): %vreg{{[0-9]+}}<def> = LDRWui
; EXYNOS: SU(2): %vreg{{[0-9]+}}<def> = LDRWui
define i32 @ldr_int_volatile(i32* %a) nounwind {
@@ -132,12 +132,12 @@ define i32 @ldr_int_volatile(i32* %a) nounwind {
; Test ldq clustering (no clustering for Exynos).
; CHECK: ********** MI Scheduling **********
; CHECK-LABEL: ldq_cluster:BB#0
-; CHECK: Cluster loads SU(1) - SU(3)
+; CHECK: Cluster ld/st SU(1) - SU(3)
; CHECK: SU(1): %vreg{{[0-9]+}}<def> = LDRQui
; CHECK: SU(3): %vreg{{[0-9]+}}<def> = LDRQui
; EXYNOS: ********** MI Scheduling **********
; EXYNOS-LABEL: ldq_cluster:BB#0
-; EXYNOS-NOT: Cluster loads
+; EXYNOS-NOT: Cluster ld/st
define <2 x i64> @ldq_cluster(i64* %p) {
%a1 = bitcast i64* %p to <2 x i64>*
%tmp1 = load <2 x i64>, < 2 x i64>* %a1, align 8
diff --git a/llvm/test/CodeGen/AArch64/arm64-stp.ll b/llvm/test/CodeGen/AArch64/arm64-stp.ll
index 98242d0bb57..5664c7d118c 100644
--- a/llvm/test/CodeGen/AArch64/arm64-stp.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-stp.ll
@@ -100,9 +100,9 @@ entry:
; Read of %b to compute %tmp2 shouldn't prevent formation of stp
; CHECK-LABEL: stp_int_rar_hazard
-; CHECK: stp w0, w1, [x2]
; CHECK: ldr [[REG:w[0-9]+]], [x2, #8]
-; CHECK: add w0, [[REG]], w1
+; CHECK: add w8, [[REG]], w1
+; CHECK: stp w0, w1, [x2]
; CHECK: ret
define i32 @stp_int_rar_hazard(i32 %a, i32 %b, i32* nocapture %p) nounwind {
store i32 %a, i32* %p, align 4
diff --git a/llvm/test/CodeGen/AArch64/global-merge-group-by-use.ll b/llvm/test/CodeGen/AArch64/global-merge-group-by-use.ll
index 8b3fc97c9e2..434c787b28d 100644
--- a/llvm/test/CodeGen/AArch64/global-merge-group-by-use.ll
+++ b/llvm/test/CodeGen/AArch64/global-merge-group-by-use.ll
@@ -64,8 +64,8 @@ define void @f3(i32 %a1, i32 %a2) #0 {
define void @f4(i32 %a1, i32 %a2, i32 %a3) #0 {
; CHECK-NEXT: adrp x8, [[SET3]]@PAGE
; CHECK-NEXT: add x8, x8, [[SET3]]@PAGEOFF
-; CHECK-NEXT: stp w0, w1, [x8, #4]
-; CHECK-NEXT: str w2, [x8]
+; CHECK-NEXT: stp w2, w0, [x8]
+; CHECK-NEXT: str w1, [x8, #8]
; CHECK-NEXT: ret
store i32 %a1, i32* @m4, align 4
store i32 %a2, i32* @n4, align 4
OpenPOWER on IntegriCloud