diff options
author | Nemanja Ivanovic <nemanja.i.ibm@gmail.com> | 2019-10-17 18:24:28 +0000 |
---|---|---|
committer | Nemanja Ivanovic <nemanja.i.ibm@gmail.com> | 2019-10-17 18:24:28 +0000 |
commit | 8a3d7c9cbd305764d1f6c2d42cdd82cf2876b99f (patch) | |
tree | 178578f734b0f1ce42449e7379d63691c7e0f662 | |
parent | 6237c9fe6ce966ce366005a55df8c267ea32a064 (diff) | |
download | bcm5719-llvm-8a3d7c9cbd305764d1f6c2d42cdd82cf2876b99f.tar.gz bcm5719-llvm-8a3d7c9cbd305764d1f6c2d42cdd82cf2876b99f.zip |
[PowerPC] Turn on CR-Logical reducer pass
Quite a while ago, we implemented a pass that will reduce the number of
CR-logical operations we emit. It does so by converting a CR-logical operation
into a branch. We have kept this off by default because it seemed to cause a
significant regression with one benchmark.
However, that regression turned out to be due to a completely unrelated
reason - AADB introducing a self-copy that is a priority-setting nop and it was
just exacerbated by this pass.
Now that we understand the reason for the only degradation, we can turn this
pass on by default. We have long since fixed the cause for the degradation.
Differential revision: https://reviews.llvm.org/D52431
llvm-svn: 375152
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCTargetMachine.cpp | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll | 39 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/brcond.ll | 6 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/pr42492.ll | 28 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/tocSaveInPrologue.ll | 15 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/vec-min-max.ll | 23 |
6 files changed, 66 insertions, 47 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp index 01579065984..abefee8b339 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -93,7 +93,7 @@ EnableMachineCombinerPass("ppc-machine-combiner", static cl::opt<bool> ReduceCRLogical("ppc-reduce-cr-logicals", cl::desc("Expand eligible cr-logical binary ops to branches"), - cl::init(false), cl::Hidden); + cl::init(true), cl::Hidden); extern "C" void LLVMInitializePowerPCTarget() { // Register the targets RegisterTargetMachine<PPCTargetMachine> A(getThePPC32Target()); diff --git a/llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll b/llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll index cc495b321fc..c851e73b1f1 100644 --- a/llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll +++ b/llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll @@ -36,7 +36,7 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone { ; CHECK-NEXT: # %bb.1: # %bb5 ; CHECK-NEXT: li 3, 0 ; CHECK-NEXT: li 4, 0 -; CHECK-NEXT: b .LBB0_16 +; CHECK-NEXT: b .LBB0_17 ; CHECK-NEXT: .LBB0_2: # %bb1 ; CHECK-NEXT: lfd 0, 400(1) ; CHECK-NEXT: lis 3, 15856 @@ -166,13 +166,11 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone { ; CHECK-NEXT: bl __gcc_qsub@PLT ; CHECK-NEXT: stfd 2, 176(1) ; CHECK-NEXT: stfd 1, 168(1) -; CHECK-NEXT: fcmpu 0, 2, 27 +; CHECK-NEXT: fcmpu 1, 2, 27 ; CHECK-NEXT: lwz 3, 180(1) -; CHECK-NEXT: fcmpu 1, 1, 27 -; CHECK-NEXT: crandc 20, 6, 0 -; CHECK-NEXT: cror 21, 5, 7 +; CHECK-NEXT: fcmpu 0, 1, 27 +; CHECK-NEXT: crandc 20, 2, 4 ; CHECK-NEXT: stw 3, 268(1) -; CHECK-NEXT: cror 20, 21, 20 ; CHECK-NEXT: lwz 3, 176(1) ; CHECK-NEXT: stw 3, 264(1) ; CHECK-NEXT: lwz 3, 172(1) @@ -181,8 +179,11 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone { ; CHECK-NEXT: lwz 3, 168(1) ; CHECK-NEXT: stw 3, 272(1) ; CHECK-NEXT: lfd 31, 272(1) -; CHECK-NEXT: bc 12, 20, .LBB0_13 -; CHECK-NEXT: # %bb.10: # %bb2 +; CHECK-NEXT: bc 12, 20, .LBB0_14 +; CHECK-NEXT: # %bb.10: # %bb1 +; CHECK-NEXT: cror 20, 1, 3 +; CHECK-NEXT: bc 12, 20, .LBB0_14 +; CHECK-NEXT: # %bb.11: # %bb2 ; CHECK-NEXT: fneg 28, 31 ; CHECK-NEXT: stfd 28, 48(1) ; CHECK-NEXT: lis 3, 16864 @@ -231,15 +232,15 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone { ; CHECK-NEXT: crandc 20, 6, 1 ; CHECK-NEXT: cror 20, 4, 20 ; CHECK-NEXT: addis 3, 3, -32768 -; CHECK-NEXT: bc 12, 20, .LBB0_12 -; CHECK-NEXT: # %bb.11: # %bb2 +; CHECK-NEXT: bc 12, 20, .LBB0_13 +; CHECK-NEXT: # %bb.12: # %bb2 ; CHECK-NEXT: ori 3, 4, 0 -; CHECK-NEXT: b .LBB0_12 -; CHECK-NEXT: .LBB0_12: # %bb2 +; CHECK-NEXT: b .LBB0_13 +; CHECK-NEXT: .LBB0_13: # %bb2 ; CHECK-NEXT: subfic 4, 3, 0 ; CHECK-NEXT: subfe 3, 29, 30 -; CHECK-NEXT: b .LBB0_16 -; CHECK-NEXT: .LBB0_13: # %bb3 +; CHECK-NEXT: b .LBB0_17 +; CHECK-NEXT: .LBB0_14: # %bb3 ; CHECK-NEXT: stfd 31, 112(1) ; CHECK-NEXT: li 3, 0 ; CHECK-NEXT: stw 3, 148(1) @@ -286,13 +287,13 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone { ; CHECK-NEXT: crandc 20, 6, 0 ; CHECK-NEXT: cror 20, 5, 20 ; CHECK-NEXT: addis 3, 3, -32768 -; CHECK-NEXT: bc 12, 20, .LBB0_14 -; CHECK-NEXT: b .LBB0_15 -; CHECK-NEXT: .LBB0_14: # %bb3 -; CHECK-NEXT: addi 4, 3, 0 +; CHECK-NEXT: bc 12, 20, .LBB0_15 +; CHECK-NEXT: b .LBB0_16 ; CHECK-NEXT: .LBB0_15: # %bb3 +; CHECK-NEXT: addi 4, 3, 0 +; CHECK-NEXT: .LBB0_16: # %bb3 ; CHECK-NEXT: mr 3, 30 -; CHECK-NEXT: .LBB0_16: # %bb5 +; CHECK-NEXT: .LBB0_17: # %bb5 ; CHECK-NEXT: lfd 31, 456(1) # 8-byte Folded Reload ; CHECK-NEXT: lfd 30, 448(1) # 8-byte Folded Reload ; CHECK-NEXT: lfd 29, 440(1) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/PowerPC/brcond.ll b/llvm/test/CodeGen/PowerPC/brcond.ll index 5bf8c6c1f35..b8c98427f10 100644 --- a/llvm/test/CodeGen/PowerPC/brcond.ll +++ b/llvm/test/CodeGen/PowerPC/brcond.ll @@ -1,5 +1,7 @@ -; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s -; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown \ +; RUN: -ppc-reduce-cr-logicals=false < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown \ +; RUN: -ppc-reduce-cr-logicals=false < %s | FileCheck %s define signext i32 @testi32slt(i32 signext %c1, i32 signext %c2, i32 signext %c3, i32 signext %c4, i32 signext %a1, i32 signext %a2) #0 { ; CHECK-LABEL: testi32slt diff --git a/llvm/test/CodeGen/PowerPC/pr42492.ll b/llvm/test/CodeGen/PowerPC/pr42492.ll index 8dface68b6a..e404d5d72cd 100644 --- a/llvm/test/CodeGen/PowerPC/pr42492.ll +++ b/llvm/test/CodeGen/PowerPC/pr42492.ll @@ -4,13 +4,27 @@ define void @f(i8*, i8*, i64*) { ; Check we don't assert and this is not a Hardware Loop ; CHECK-LABEL: f: -; CHECK: .LBB0_2: # -; CHECK-NEXT: cmplwi -; CHECK-NEXT: cmpd -; CHECK-NEXT: sldi -; CHECK-NEXT: cror -; CHECK-NEXT: addi -; CHECK-NEXT: bc +; CHECK: # %bb.0: +; CHECK-NEXT: cmpld 3, 4 +; CHECK-NEXT: beqlr 0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld 6, 8(5) +; CHECK-NEXT: not 3, 3 +; CHECK-NEXT: add 3, 3, 4 +; CHECK-NEXT: li 4, 0 +; CHECK-NEXT: .p2align 5 +; CHECK-NEXT: .LBB0_2: # +; CHECK-NEXT: sldi 6, 6, 4 +; CHECK-NEXT: cmplwi 4, 14 +; CHECK-NEXT: addi 7, 4, 1 +; CHECK-NEXT: bc 12, 1, .LBB0_4 +; CHECK-NEXT: # %bb.3: # +; CHECK-NEXT: cmpd 3, 4 +; CHECK-NEXT: mr 4, 7 +; CHECK-NEXT: bc 4, 2, .LBB0_2 +; CHECK-NEXT: .LBB0_4: +; CHECK-NEXT: std 6, 8(5) +; CHECK-NEXT: blr %4 = icmp eq i8* %0, %1 br i1 %4, label %9, label %5 diff --git a/llvm/test/CodeGen/PowerPC/tocSaveInPrologue.ll b/llvm/test/CodeGen/PowerPC/tocSaveInPrologue.ll index d9f85f17703..79f57eb05d1 100644 --- a/llvm/test/CodeGen/PowerPC/tocSaveInPrologue.ll +++ b/llvm/test/CodeGen/PowerPC/tocSaveInPrologue.ll @@ -16,16 +16,17 @@ define dso_local void @test(void (i32)* nocapture %fp, i32 signext %Arg, i32 sig ; CHECK-NEXT: std r0, 16(r1) ; CHECK-NEXT: stdu r1, -64(r1) ; CHECK-NEXT: mr r29, r5 -; CHECK-NEXT: cmpwi cr1, r4, 11 ; CHECK-NEXT: mr r30, r3 ; CHECK-NEXT: extsw r28, r4 ; CHECK-NEXT: std r2, 24(r1) ; CHECK-NEXT: cmpwi r29, 1 -; CHECK-NEXT: cror 4*cr5+lt, lt, 4*cr1+lt -; CHECK-NEXT: bc 12, 4*cr5+lt, .LBB0_2 +; CHECK-NEXT: bc 12, lt, .LBB0_3 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: cmpwi cr0, r4, 11 +; CHECK-NEXT: bc 12, lt, .LBB0_3 ; CHECK-NEXT: .p2align 5 -; CHECK-NEXT: .LBB0_1: # %for.body.us -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: .LBB0_2: # %for.body.us +; CHECK-NEXT: # ; CHECK-NEXT: mtctr r30 ; CHECK-NEXT: mr r3, r28 ; CHECK-NEXT: mr r12, r30 @@ -33,8 +34,8 @@ define dso_local void @test(void (i32)* nocapture %fp, i32 signext %Arg, i32 sig ; CHECK-NEXT: ld 2, 24(r1) ; CHECK-NEXT: addi r29, r29, -1 ; CHECK-NEXT: cmplwi r29, 0 -; CHECK-NEXT: bne cr0, .LBB0_1 -; CHECK-NEXT: .LBB0_2: # %for.cond.cleanup +; CHECK-NEXT: bne cr0, .LBB0_2 +; CHECK-NEXT: .LBB0_3: # %for.cond.cleanup ; CHECK-NEXT: mtctr r30 ; CHECK-NEXT: mr r3, r28 ; CHECK-NEXT: mr r12, r30 diff --git a/llvm/test/CodeGen/PowerPC/vec-min-max.ll b/llvm/test/CodeGen/PowerPC/vec-min-max.ll index 8269e1c3c7b..e29ef336879 100644 --- a/llvm/test/CodeGen/PowerPC/vec-min-max.ll +++ b/llvm/test/CodeGen/PowerPC/vec-min-max.ll @@ -240,22 +240,23 @@ entry: define i128 @invalidv1i128(<2 x i128> %v1, <2 x i128> %v2) { ; CHECK-LABEL: invalidv1i128: ; CHECK: # %bb.0: +; CHECK-NEXT: mfvsrd 3, 36 ; CHECK-NEXT: xxswapd 0, 36 -; CHECK-NEXT: mfvsrd 4, 36 -; CHECK-NEXT: mfvsrd 5, 34 +; CHECK-NEXT: mfvsrd 4, 34 +; CHECK-NEXT: xxswapd 1, 34 +; CHECK-NEXT: cmpld 4, 3 +; CHECK-NEXT: cmpd 1, 4, 3 ; CHECK-NEXT: mfvsrd 3, 0 -; CHECK-NEXT: xxswapd 0, 34 -; CHECK-NEXT: cmpld 5, 4 -; CHECK-NEXT: cmpd 1, 5, 4 ; CHECK-NEXT: crandc 20, 4, 2 -; CHECK-NEXT: mfvsrd 6, 0 -; CHECK-NEXT: cmpld 1, 6, 3 -; CHECK-NEXT: crand 21, 2, 4 -; CHECK-NEXT: cror 20, 21, 20 -; CHECK-NEXT: bc 12, 20, .LBB12_2 +; CHECK-NEXT: mfvsrd 4, 1 +; CHECK-NEXT: cmpld 1, 4, 3 +; CHECK-NEXT: bc 12, 20, .LBB12_3 ; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: crand 20, 2, 4 +; CHECK-NEXT: bc 12, 20, .LBB12_3 +; CHECK-NEXT: # %bb.2: ; CHECK-NEXT: vmr 2, 4 -; CHECK-NEXT: .LBB12_2: +; CHECK-NEXT: .LBB12_3: ; CHECK-NEXT: xxswapd 0, 34 ; CHECK-NEXT: mfvsrd 4, 34 ; CHECK-NEXT: mfvsrd 3, 0 |