diff options
author | Jun Bum Lim <junbuml@codeaurora.org> | 2016-04-15 14:58:38 +0000 |
---|---|---|
committer | Jun Bum Lim <junbuml@codeaurora.org> | 2016-04-15 14:58:38 +0000 |
commit | 4c5bd58ebecd3aada7cebd37db8ff94f0b44770a (patch) | |
tree | 3e990665048d5848dff59e82ed0fcb124bbbb43b /llvm/test/CodeGen | |
parent | 061d496c511b02a0c2bd394539c757c5ad4511e3 (diff) | |
download | bcm5719-llvm-4c5bd58ebecd3aada7cebd37db8ff94f0b44770a.tar.gz bcm5719-llvm-4c5bd58ebecd3aada7cebd37db8ff94f0b44770a.zip |
[MachineScheduler]Add support for store clustering
Perform store clustering just like load clustering. This change add
StoreClusterMutation in machine-scheduler. To control StoreClusterMutation,
added enableClusterStores() in TargetInstrInfo.h. This is enabled only on
AArch64 for now.
This change also add support for unscaled stores which were not handled in
getMemOpBaseRegImmOfs().
llvm-svn: 266437
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r-- | llvm/test/CodeGen/AArch64/aarch64-stp-cluster.ll | 149 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/arm64-ldp-cluster.ll | 28 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/arm64-stp.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/global-merge-group-by-use.ll | 4 |
4 files changed, 167 insertions, 18 deletions
diff --git a/llvm/test/CodeGen/AArch64/aarch64-stp-cluster.ll b/llvm/test/CodeGen/AArch64/aarch64-stp-cluster.ll new file mode 100644 index 00000000000..5cab38eafb5 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/aarch64-stp-cluster.ll @@ -0,0 +1,149 @@ +; REQUIRES: asserts +; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a57 -verify-misched -debug-only=misched -aarch64-stp-suppress=false -o - 2>&1 > /dev/null | FileCheck %s + +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: stp_i64_scale:BB#0 +; CHECK:Cluster ld/st SU(4) - SU(3) +; CHECK:Cluster ld/st SU(2) - SU(5) +; CHECK:SU(4): STRXui %vreg1, %vreg0, 1 +; CHECK:SU(3): STRXui %vreg1, %vreg0, 2 +; CHECK:SU(2): STRXui %vreg1, %vreg0, 3 +; CHECK:SU(5): STRXui %vreg1, %vreg0, 4 +define i64 @stp_i64_scale(i64* nocapture %P, i64 %v) { +entry: + %arrayidx = getelementptr inbounds i64, i64* %P, i64 3 + store i64 %v, i64* %arrayidx + %arrayidx1 = getelementptr inbounds i64, i64* %P, i64 2 + store i64 %v, i64* %arrayidx1 + %arrayidx2 = getelementptr inbounds i64, i64* %P, i64 1 + store i64 %v, i64* %arrayidx2 + %arrayidx3 = getelementptr inbounds i64, i64* %P, i64 4 + store i64 %v, i64* %arrayidx3 + ret i64 %v +} + +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: stp_i32_scale:BB#0 +; CHECK:Cluster ld/st SU(4) - SU(3) +; CHECK:Cluster ld/st SU(2) - SU(5) +; CHECK:SU(4): STRWui %vreg1, %vreg0, 1 +; CHECK:SU(3): STRWui %vreg1, %vreg0, 2 +; CHECK:SU(2): STRWui %vreg1, %vreg0, 3 +; CHECK:SU(5): STRWui %vreg1, %vreg0, 4 +define i32 @stp_i32_scale(i32* nocapture %P, i32 %v) { +entry: + %arrayidx = getelementptr inbounds i32, i32* %P, i32 3 + store i32 %v, i32* %arrayidx + %arrayidx1 = getelementptr inbounds i32, i32* %P, i32 2 + store i32 %v, i32* %arrayidx1 + %arrayidx2 = getelementptr inbounds i32, i32* %P, i32 1 + store i32 %v, i32* %arrayidx2 + %arrayidx3 = getelementptr inbounds i32, i32* %P, i32 4 + store i32 %v, i32* %arrayidx3 + ret i32 %v +} + +; CHECK:********** MI Scheduling ********** +; CHECK-LABEL:stp_i64_unscale:BB#0 entry +; CHECK:Cluster ld/st SU(5) - SU(2) +; CHECK:Cluster ld/st SU(4) - SU(3) +; CHECK:SU(5): STURXi %vreg1, %vreg0, -32 +; CHECK:SU(2): STURXi %vreg1, %vreg0, -24 +; CHECK:SU(4): STURXi %vreg1, %vreg0, -16 +; CHECK:SU(3): STURXi %vreg1, %vreg0, -8 +define void @stp_i64_unscale(i64* nocapture %P, i64 %v) #0 { +entry: + %arrayidx = getelementptr inbounds i64, i64* %P, i64 -3 + store i64 %v, i64* %arrayidx + %arrayidx1 = getelementptr inbounds i64, i64* %P, i64 -1 + store i64 %v, i64* %arrayidx1 + %arrayidx2 = getelementptr inbounds i64, i64* %P, i64 -2 + store i64 %v, i64* %arrayidx2 + %arrayidx3 = getelementptr inbounds i64, i64* %P, i64 -4 + store i64 %v, i64* %arrayidx3 + ret void +} + +; CHECK:********** MI Scheduling ********** +; CHECK-LABEL:stp_i32_unscale:BB#0 entry +; CHECK:Cluster ld/st SU(5) - SU(2) +; CHECK:Cluster ld/st SU(4) - SU(3) +; CHECK:SU(5): STURWi %vreg1, %vreg0, -16 +; CHECK:SU(2): STURWi %vreg1, %vreg0, -12 +; CHECK:SU(4): STURWi %vreg1, %vreg0, -8 +; CHECK:SU(3): STURWi %vreg1, %vreg0, -4 +define void @stp_i32_unscale(i32* nocapture %P, i32 %v) #0 { +entry: + %arrayidx = getelementptr inbounds i32, i32* %P, i32 -3 + store i32 %v, i32* %arrayidx + %arrayidx1 = getelementptr inbounds i32, i32* %P, i32 -1 + store i32 %v, i32* %arrayidx1 + %arrayidx2 = getelementptr inbounds i32, i32* %P, i32 -2 + store i32 %v, i32* %arrayidx2 + %arrayidx3 = getelementptr inbounds i32, i32* %P, i32 -4 + store i32 %v, i32* %arrayidx3 + ret void +} + +; CHECK:********** MI Scheduling ********** +; CHECK-LABEL:stp_double:BB#0 +; CHECK:Cluster ld/st SU(3) - SU(4) +; CHECK:Cluster ld/st SU(2) - SU(5) +; CHECK:SU(3): STRDui %vreg1, %vreg0, 1 +; CHECK:SU(4): STRDui %vreg1, %vreg0, 2 +; CHECK:SU(2): STRDui %vreg1, %vreg0, 3 +; CHECK:SU(5): STRDui %vreg1, %vreg0, 4 +define void @stp_double(double* nocapture %P, double %v) { +entry: + %arrayidx = getelementptr inbounds double, double* %P, i64 3 + store double %v, double* %arrayidx + %arrayidx1 = getelementptr inbounds double, double* %P, i64 1 + store double %v, double* %arrayidx1 + %arrayidx2 = getelementptr inbounds double, double* %P, i64 2 + store double %v, double* %arrayidx2 + %arrayidx3 = getelementptr inbounds double, double* %P, i64 4 + store double %v, double* %arrayidx3 + ret void +} + +; CHECK:********** MI Scheduling ********** +; CHECK-LABEL:stp_float:BB#0 +; CHECK:Cluster ld/st SU(3) - SU(4) +; CHECK:Cluster ld/st SU(2) - SU(5) +; CHECK:SU(3): STRSui %vreg1, %vreg0, 1 +; CHECK:SU(4): STRSui %vreg1, %vreg0, 2 +; CHECK:SU(2): STRSui %vreg1, %vreg0, 3 +; CHECK:SU(5): STRSui %vreg1, %vreg0, 4 +define void @stp_float(float* nocapture %P, float %v) { +entry: + %arrayidx = getelementptr inbounds float, float* %P, i64 3 + store float %v, float* %arrayidx + %arrayidx1 = getelementptr inbounds float, float* %P, i64 1 + store float %v, float* %arrayidx1 + %arrayidx2 = getelementptr inbounds float, float* %P, i64 2 + store float %v, float* %arrayidx2 + %arrayidx3 = getelementptr inbounds float, float* %P, i64 4 + store float %v, float* %arrayidx3 + ret void +} + +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: stp_volatile:BB#0 +; CHECK-NOT: Cluster ld/st +; CHECK:SU(2): STRXui %vreg1, %vreg0, 3; mem:Volatile +; CHECK:SU(3): STRXui %vreg1, %vreg0, 2; mem:Volatile +; CHECK:SU(4): STRXui %vreg1, %vreg0, 1; mem:Volatile +; CHECK:SU(5): STRXui %vreg1, %vreg0, 4; mem:Volatile +define i64 @stp_volatile(i64* nocapture %P, i64 %v) { +entry: + %arrayidx = getelementptr inbounds i64, i64* %P, i64 3 + store volatile i64 %v, i64* %arrayidx + %arrayidx1 = getelementptr inbounds i64, i64* %P, i64 2 + store volatile i64 %v, i64* %arrayidx1 + %arrayidx2 = getelementptr inbounds i64, i64* %P, i64 1 + store volatile i64 %v, i64* %arrayidx2 + %arrayidx3 = getelementptr inbounds i64, i64* %P, i64 4 + store volatile i64 %v, i64* %arrayidx3 + ret i64 %v +} + diff --git a/llvm/test/CodeGen/AArch64/arm64-ldp-cluster.ll b/llvm/test/CodeGen/AArch64/arm64-ldp-cluster.ll index f7607089f76..0cfbe5958f4 100644 --- a/llvm/test/CodeGen/AArch64/arm64-ldp-cluster.ll +++ b/llvm/test/CodeGen/AArch64/arm64-ldp-cluster.ll @@ -5,12 +5,12 @@ ; Test ldr clustering. ; CHECK: ********** MI Scheduling ********** ; CHECK-LABEL: ldr_int:BB#0 -; CHECK: Cluster loads SU(1) - SU(2) +; CHECK: Cluster ld/st SU(1) - SU(2) ; CHECK: SU(1): %vreg{{[0-9]+}}<def> = LDRWui ; CHECK: SU(2): %vreg{{[0-9]+}}<def> = LDRWui ; EXYNOS: ********** MI Scheduling ********** ; EXYNOS-LABEL: ldr_int:BB#0 -; EXYNOS: Cluster loads SU(1) - SU(2) +; EXYNOS: Cluster ld/st SU(1) - SU(2) ; EXYNOS: SU(1): %vreg{{[0-9]+}}<def> = LDRWui ; EXYNOS: SU(2): %vreg{{[0-9]+}}<def> = LDRWui define i32 @ldr_int(i32* %a) nounwind { @@ -25,12 +25,12 @@ define i32 @ldr_int(i32* %a) nounwind { ; Test ldpsw clustering ; CHECK: ********** MI Scheduling ********** ; CHECK-LABEL: ldp_sext_int:BB#0 -; CHECK: Cluster loads SU(1) - SU(2) +; CHECK: Cluster ld/st SU(1) - SU(2) ; CHECK: SU(1): %vreg{{[0-9]+}}<def> = LDRSWui ; CHECK: SU(2): %vreg{{[0-9]+}}<def> = LDRSWui ; EXYNOS: ********** MI Scheduling ********** ; EXYNOS-LABEL: ldp_sext_int:BB#0 -; EXYNOS: Cluster loads SU(1) - SU(2) +; EXYNOS: Cluster ld/st SU(1) - SU(2) ; EXYNOS: SU(1): %vreg{{[0-9]+}}<def> = LDRSWui ; EXYNOS: SU(2): %vreg{{[0-9]+}}<def> = LDRSWui define i64 @ldp_sext_int(i32* %p) nounwind { @@ -46,12 +46,12 @@ define i64 @ldp_sext_int(i32* %p) nounwind { ; Test ldur clustering. ; CHECK: ********** MI Scheduling ********** ; CHECK-LABEL: ldur_int:BB#0 -; CHECK: Cluster loads SU(2) - SU(1) +; CHECK: Cluster ld/st SU(2) - SU(1) ; CHECK: SU(1): %vreg{{[0-9]+}}<def> = LDURWi ; CHECK: SU(2): %vreg{{[0-9]+}}<def> = LDURWi ; EXYNOS: ********** MI Scheduling ********** ; EXYNOS-LABEL: ldur_int:BB#0 -; EXYNOS: Cluster loads SU(2) - SU(1) +; EXYNOS: Cluster ld/st SU(2) - SU(1) ; EXYNOS: SU(1): %vreg{{[0-9]+}}<def> = LDURWi ; EXYNOS: SU(2): %vreg{{[0-9]+}}<def> = LDURWi define i32 @ldur_int(i32* %a) nounwind { @@ -66,12 +66,12 @@ define i32 @ldur_int(i32* %a) nounwind { ; Test sext + zext clustering. ; CHECK: ********** MI Scheduling ********** ; CHECK-LABEL: ldp_half_sext_zext_int:BB#0 -; CHECK: Cluster loads SU(3) - SU(4) +; CHECK: Cluster ld/st SU(3) - SU(4) ; CHECK: SU(3): %vreg{{[0-9]+}}<def> = LDRSWui ; CHECK: SU(4): %vreg{{[0-9]+}}:sub_32<def,read-undef> = LDRWui ; EXYNOS: ********** MI Scheduling ********** ; EXYNOS-LABEL: ldp_half_sext_zext_int:BB#0 -; EXYNOS: Cluster loads SU(3) - SU(4) +; EXYNOS: Cluster ld/st SU(3) - SU(4) ; EXYNOS: SU(3): %vreg{{[0-9]+}}<def> = LDRSWui ; EXYNOS: SU(4): %vreg{{[0-9]+}}:sub_32<def,read-undef> = LDRWui define i64 @ldp_half_sext_zext_int(i64* %q, i32* %p) nounwind { @@ -89,12 +89,12 @@ define i64 @ldp_half_sext_zext_int(i64* %q, i32* %p) nounwind { ; Test zext + sext clustering. ; CHECK: ********** MI Scheduling ********** ; CHECK-LABEL: ldp_half_zext_sext_int:BB#0 -; CHECK: Cluster loads SU(3) - SU(4) +; CHECK: Cluster ld/st SU(3) - SU(4) ; CHECK: SU(3): %vreg{{[0-9]+}}:sub_32<def,read-undef> = LDRWui ; CHECK: SU(4): %vreg{{[0-9]+}}<def> = LDRSWui ; EXYNOS: ********** MI Scheduling ********** ; EXYNOS-LABEL: ldp_half_zext_sext_int:BB#0 -; EXYNOS: Cluster loads SU(3) - SU(4) +; EXYNOS: Cluster ld/st SU(3) - SU(4) ; EXYNOS: SU(3): %vreg{{[0-9]+}}:sub_32<def,read-undef> = LDRWui ; EXYNOS: SU(4): %vreg{{[0-9]+}}<def> = LDRSWui define i64 @ldp_half_zext_sext_int(i64* %q, i32* %p) nounwind { @@ -112,12 +112,12 @@ define i64 @ldp_half_zext_sext_int(i64* %q, i32* %p) nounwind { ; Verify we don't cluster volatile loads. ; CHECK: ********** MI Scheduling ********** ; CHECK-LABEL: ldr_int_volatile:BB#0 -; CHECK-NOT: Cluster loads +; CHECK-NOT: Cluster ld/st ; CHECK: SU(1): %vreg{{[0-9]+}}<def> = LDRWui ; CHECK: SU(2): %vreg{{[0-9]+}}<def> = LDRWui ; EXYNOS: ********** MI Scheduling ********** ; EXYNOS-LABEL: ldr_int_volatile:BB#0 -; EXYNOS-NOT: Cluster loads +; EXYNOS-NOT: Cluster ld/st ; EXYNOS: SU(1): %vreg{{[0-9]+}}<def> = LDRWui ; EXYNOS: SU(2): %vreg{{[0-9]+}}<def> = LDRWui define i32 @ldr_int_volatile(i32* %a) nounwind { @@ -132,12 +132,12 @@ define i32 @ldr_int_volatile(i32* %a) nounwind { ; Test ldq clustering (no clustering for Exynos). ; CHECK: ********** MI Scheduling ********** ; CHECK-LABEL: ldq_cluster:BB#0 -; CHECK: Cluster loads SU(1) - SU(3) +; CHECK: Cluster ld/st SU(1) - SU(3) ; CHECK: SU(1): %vreg{{[0-9]+}}<def> = LDRQui ; CHECK: SU(3): %vreg{{[0-9]+}}<def> = LDRQui ; EXYNOS: ********** MI Scheduling ********** ; EXYNOS-LABEL: ldq_cluster:BB#0 -; EXYNOS-NOT: Cluster loads +; EXYNOS-NOT: Cluster ld/st define <2 x i64> @ldq_cluster(i64* %p) { %a1 = bitcast i64* %p to <2 x i64>* %tmp1 = load <2 x i64>, < 2 x i64>* %a1, align 8 diff --git a/llvm/test/CodeGen/AArch64/arm64-stp.ll b/llvm/test/CodeGen/AArch64/arm64-stp.ll index 98242d0bb57..5664c7d118c 100644 --- a/llvm/test/CodeGen/AArch64/arm64-stp.ll +++ b/llvm/test/CodeGen/AArch64/arm64-stp.ll @@ -100,9 +100,9 @@ entry: ; Read of %b to compute %tmp2 shouldn't prevent formation of stp ; CHECK-LABEL: stp_int_rar_hazard -; CHECK: stp w0, w1, [x2] ; CHECK: ldr [[REG:w[0-9]+]], [x2, #8] -; CHECK: add w0, [[REG]], w1 +; CHECK: add w8, [[REG]], w1 +; CHECK: stp w0, w1, [x2] ; CHECK: ret define i32 @stp_int_rar_hazard(i32 %a, i32 %b, i32* nocapture %p) nounwind { store i32 %a, i32* %p, align 4 diff --git a/llvm/test/CodeGen/AArch64/global-merge-group-by-use.ll b/llvm/test/CodeGen/AArch64/global-merge-group-by-use.ll index 8b3fc97c9e2..434c787b28d 100644 --- a/llvm/test/CodeGen/AArch64/global-merge-group-by-use.ll +++ b/llvm/test/CodeGen/AArch64/global-merge-group-by-use.ll @@ -64,8 +64,8 @@ define void @f3(i32 %a1, i32 %a2) #0 { define void @f4(i32 %a1, i32 %a2, i32 %a3) #0 { ; CHECK-NEXT: adrp x8, [[SET3]]@PAGE ; CHECK-NEXT: add x8, x8, [[SET3]]@PAGEOFF -; CHECK-NEXT: stp w0, w1, [x8, #4] -; CHECK-NEXT: str w2, [x8] +; CHECK-NEXT: stp w2, w0, [x8] +; CHECK-NEXT: str w1, [x8, #8] ; CHECK-NEXT: ret store i32 %a1, i32* @m4, align 4 store i32 %a2, i32* @n4, align 4 |