summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNemanja Ivanovic <nemanja.i.ibm@gmail.com>2019-10-17 18:24:28 +0000
committerNemanja Ivanovic <nemanja.i.ibm@gmail.com>2019-10-17 18:24:28 +0000
commit8a3d7c9cbd305764d1f6c2d42cdd82cf2876b99f (patch)
tree178578f734b0f1ce42449e7379d63691c7e0f662
parent6237c9fe6ce966ce366005a55df8c267ea32a064 (diff)
downloadbcm5719-llvm-8a3d7c9cbd305764d1f6c2d42cdd82cf2876b99f.tar.gz
bcm5719-llvm-8a3d7c9cbd305764d1f6c2d42cdd82cf2876b99f.zip
[PowerPC] Turn on CR-Logical reducer pass
Quite a while ago, we implemented a pass that will reduce the number of CR-logical operations we emit. It does so by converting a CR-logical operation into a branch. We have kept this off by default because it seemed to cause a significant regression with one benchmark. However, that regression turned out to be due to a completely unrelated reason - AADB introducing a self-copy that is a priority-setting nop and it was just exacerbated by this pass. Now that we understand the reason for the only degradation, we can turn this pass on by default. We have long since fixed the cause for the degradation. Differential revision: https://reviews.llvm.org/D52431 llvm-svn: 375152
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetMachine.cpp2
-rw-r--r--llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll39
-rw-r--r--llvm/test/CodeGen/PowerPC/brcond.ll6
-rw-r--r--llvm/test/CodeGen/PowerPC/pr42492.ll28
-rw-r--r--llvm/test/CodeGen/PowerPC/tocSaveInPrologue.ll15
-rw-r--r--llvm/test/CodeGen/PowerPC/vec-min-max.ll23
6 files changed, 66 insertions, 47 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index 01579065984..abefee8b339 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -93,7 +93,7 @@ EnableMachineCombinerPass("ppc-machine-combiner",
static cl::opt<bool>
ReduceCRLogical("ppc-reduce-cr-logicals",
cl::desc("Expand eligible cr-logical binary ops to branches"),
- cl::init(false), cl::Hidden);
+ cl::init(true), cl::Hidden);
extern "C" void LLVMInitializePowerPCTarget() {
// Register the targets
RegisterTargetMachine<PPCTargetMachine> A(getThePPC32Target());
diff --git a/llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll b/llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll
index cc495b321fc..c851e73b1f1 100644
--- a/llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll
+++ b/llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll
@@ -36,7 +36,7 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone {
; CHECK-NEXT: # %bb.1: # %bb5
; CHECK-NEXT: li 3, 0
; CHECK-NEXT: li 4, 0
-; CHECK-NEXT: b .LBB0_16
+; CHECK-NEXT: b .LBB0_17
; CHECK-NEXT: .LBB0_2: # %bb1
; CHECK-NEXT: lfd 0, 400(1)
; CHECK-NEXT: lis 3, 15856
@@ -166,13 +166,11 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone {
; CHECK-NEXT: bl __gcc_qsub@PLT
; CHECK-NEXT: stfd 2, 176(1)
; CHECK-NEXT: stfd 1, 168(1)
-; CHECK-NEXT: fcmpu 0, 2, 27
+; CHECK-NEXT: fcmpu 1, 2, 27
; CHECK-NEXT: lwz 3, 180(1)
-; CHECK-NEXT: fcmpu 1, 1, 27
-; CHECK-NEXT: crandc 20, 6, 0
-; CHECK-NEXT: cror 21, 5, 7
+; CHECK-NEXT: fcmpu 0, 1, 27
+; CHECK-NEXT: crandc 20, 2, 4
; CHECK-NEXT: stw 3, 268(1)
-; CHECK-NEXT: cror 20, 21, 20
; CHECK-NEXT: lwz 3, 176(1)
; CHECK-NEXT: stw 3, 264(1)
; CHECK-NEXT: lwz 3, 172(1)
@@ -181,8 +179,11 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone {
; CHECK-NEXT: lwz 3, 168(1)
; CHECK-NEXT: stw 3, 272(1)
; CHECK-NEXT: lfd 31, 272(1)
-; CHECK-NEXT: bc 12, 20, .LBB0_13
-; CHECK-NEXT: # %bb.10: # %bb2
+; CHECK-NEXT: bc 12, 20, .LBB0_14
+; CHECK-NEXT: # %bb.10: # %bb1
+; CHECK-NEXT: cror 20, 1, 3
+; CHECK-NEXT: bc 12, 20, .LBB0_14
+; CHECK-NEXT: # %bb.11: # %bb2
; CHECK-NEXT: fneg 28, 31
; CHECK-NEXT: stfd 28, 48(1)
; CHECK-NEXT: lis 3, 16864
@@ -231,15 +232,15 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone {
; CHECK-NEXT: crandc 20, 6, 1
; CHECK-NEXT: cror 20, 4, 20
; CHECK-NEXT: addis 3, 3, -32768
-; CHECK-NEXT: bc 12, 20, .LBB0_12
-; CHECK-NEXT: # %bb.11: # %bb2
+; CHECK-NEXT: bc 12, 20, .LBB0_13
+; CHECK-NEXT: # %bb.12: # %bb2
; CHECK-NEXT: ori 3, 4, 0
-; CHECK-NEXT: b .LBB0_12
-; CHECK-NEXT: .LBB0_12: # %bb2
+; CHECK-NEXT: b .LBB0_13
+; CHECK-NEXT: .LBB0_13: # %bb2
; CHECK-NEXT: subfic 4, 3, 0
; CHECK-NEXT: subfe 3, 29, 30
-; CHECK-NEXT: b .LBB0_16
-; CHECK-NEXT: .LBB0_13: # %bb3
+; CHECK-NEXT: b .LBB0_17
+; CHECK-NEXT: .LBB0_14: # %bb3
; CHECK-NEXT: stfd 31, 112(1)
; CHECK-NEXT: li 3, 0
; CHECK-NEXT: stw 3, 148(1)
@@ -286,13 +287,13 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone {
; CHECK-NEXT: crandc 20, 6, 0
; CHECK-NEXT: cror 20, 5, 20
; CHECK-NEXT: addis 3, 3, -32768
-; CHECK-NEXT: bc 12, 20, .LBB0_14
-; CHECK-NEXT: b .LBB0_15
-; CHECK-NEXT: .LBB0_14: # %bb3
-; CHECK-NEXT: addi 4, 3, 0
+; CHECK-NEXT: bc 12, 20, .LBB0_15
+; CHECK-NEXT: b .LBB0_16
; CHECK-NEXT: .LBB0_15: # %bb3
+; CHECK-NEXT: addi 4, 3, 0
+; CHECK-NEXT: .LBB0_16: # %bb3
; CHECK-NEXT: mr 3, 30
-; CHECK-NEXT: .LBB0_16: # %bb5
+; CHECK-NEXT: .LBB0_17: # %bb5
; CHECK-NEXT: lfd 31, 456(1) # 8-byte Folded Reload
; CHECK-NEXT: lfd 30, 448(1) # 8-byte Folded Reload
; CHECK-NEXT: lfd 29, 440(1) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/PowerPC/brcond.ll b/llvm/test/CodeGen/PowerPC/brcond.ll
index 5bf8c6c1f35..b8c98427f10 100644
--- a/llvm/test/CodeGen/PowerPC/brcond.ll
+++ b/llvm/test/CodeGen/PowerPC/brcond.ll
@@ -1,5 +1,7 @@
-; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s
-; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown \
+; RUN: -ppc-reduce-cr-logicals=false < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown \
+; RUN: -ppc-reduce-cr-logicals=false < %s | FileCheck %s
define signext i32 @testi32slt(i32 signext %c1, i32 signext %c2, i32 signext %c3, i32 signext %c4, i32 signext %a1, i32 signext %a2) #0 {
; CHECK-LABEL: testi32slt
diff --git a/llvm/test/CodeGen/PowerPC/pr42492.ll b/llvm/test/CodeGen/PowerPC/pr42492.ll
index 8dface68b6a..e404d5d72cd 100644
--- a/llvm/test/CodeGen/PowerPC/pr42492.ll
+++ b/llvm/test/CodeGen/PowerPC/pr42492.ll
@@ -4,13 +4,27 @@
define void @f(i8*, i8*, i64*) {
; Check we don't assert and this is not a Hardware Loop
; CHECK-LABEL: f:
-; CHECK: .LBB0_2: #
-; CHECK-NEXT: cmplwi
-; CHECK-NEXT: cmpd
-; CHECK-NEXT: sldi
-; CHECK-NEXT: cror
-; CHECK-NEXT: addi
-; CHECK-NEXT: bc
+; CHECK: # %bb.0:
+; CHECK-NEXT: cmpld 3, 4
+; CHECK-NEXT: beqlr 0
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: ld 6, 8(5)
+; CHECK-NEXT: not 3, 3
+; CHECK-NEXT: add 3, 3, 4
+; CHECK-NEXT: li 4, 0
+; CHECK-NEXT: .p2align 5
+; CHECK-NEXT: .LBB0_2: #
+; CHECK-NEXT: sldi 6, 6, 4
+; CHECK-NEXT: cmplwi 4, 14
+; CHECK-NEXT: addi 7, 4, 1
+; CHECK-NEXT: bc 12, 1, .LBB0_4
+; CHECK-NEXT: # %bb.3: #
+; CHECK-NEXT: cmpd 3, 4
+; CHECK-NEXT: mr 4, 7
+; CHECK-NEXT: bc 4, 2, .LBB0_2
+; CHECK-NEXT: .LBB0_4:
+; CHECK-NEXT: std 6, 8(5)
+; CHECK-NEXT: blr
%4 = icmp eq i8* %0, %1
br i1 %4, label %9, label %5
diff --git a/llvm/test/CodeGen/PowerPC/tocSaveInPrologue.ll b/llvm/test/CodeGen/PowerPC/tocSaveInPrologue.ll
index d9f85f17703..79f57eb05d1 100644
--- a/llvm/test/CodeGen/PowerPC/tocSaveInPrologue.ll
+++ b/llvm/test/CodeGen/PowerPC/tocSaveInPrologue.ll
@@ -16,16 +16,17 @@ define dso_local void @test(void (i32)* nocapture %fp, i32 signext %Arg, i32 sig
; CHECK-NEXT: std r0, 16(r1)
; CHECK-NEXT: stdu r1, -64(r1)
; CHECK-NEXT: mr r29, r5
-; CHECK-NEXT: cmpwi cr1, r4, 11
; CHECK-NEXT: mr r30, r3
; CHECK-NEXT: extsw r28, r4
; CHECK-NEXT: std r2, 24(r1)
; CHECK-NEXT: cmpwi r29, 1
-; CHECK-NEXT: cror 4*cr5+lt, lt, 4*cr1+lt
-; CHECK-NEXT: bc 12, 4*cr5+lt, .LBB0_2
+; CHECK-NEXT: bc 12, lt, .LBB0_3
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: cmpwi cr0, r4, 11
+; CHECK-NEXT: bc 12, lt, .LBB0_3
; CHECK-NEXT: .p2align 5
-; CHECK-NEXT: .LBB0_1: # %for.body.us
-; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: .LBB0_2: # %for.body.us
+; CHECK-NEXT: #
; CHECK-NEXT: mtctr r30
; CHECK-NEXT: mr r3, r28
; CHECK-NEXT: mr r12, r30
@@ -33,8 +34,8 @@ define dso_local void @test(void (i32)* nocapture %fp, i32 signext %Arg, i32 sig
; CHECK-NEXT: ld 2, 24(r1)
; CHECK-NEXT: addi r29, r29, -1
; CHECK-NEXT: cmplwi r29, 0
-; CHECK-NEXT: bne cr0, .LBB0_1
-; CHECK-NEXT: .LBB0_2: # %for.cond.cleanup
+; CHECK-NEXT: bne cr0, .LBB0_2
+; CHECK-NEXT: .LBB0_3: # %for.cond.cleanup
; CHECK-NEXT: mtctr r30
; CHECK-NEXT: mr r3, r28
; CHECK-NEXT: mr r12, r30
diff --git a/llvm/test/CodeGen/PowerPC/vec-min-max.ll b/llvm/test/CodeGen/PowerPC/vec-min-max.ll
index 8269e1c3c7b..e29ef336879 100644
--- a/llvm/test/CodeGen/PowerPC/vec-min-max.ll
+++ b/llvm/test/CodeGen/PowerPC/vec-min-max.ll
@@ -240,22 +240,23 @@ entry:
define i128 @invalidv1i128(<2 x i128> %v1, <2 x i128> %v2) {
; CHECK-LABEL: invalidv1i128:
; CHECK: # %bb.0:
+; CHECK-NEXT: mfvsrd 3, 36
; CHECK-NEXT: xxswapd 0, 36
-; CHECK-NEXT: mfvsrd 4, 36
-; CHECK-NEXT: mfvsrd 5, 34
+; CHECK-NEXT: mfvsrd 4, 34
+; CHECK-NEXT: xxswapd 1, 34
+; CHECK-NEXT: cmpld 4, 3
+; CHECK-NEXT: cmpd 1, 4, 3
; CHECK-NEXT: mfvsrd 3, 0
-; CHECK-NEXT: xxswapd 0, 34
-; CHECK-NEXT: cmpld 5, 4
-; CHECK-NEXT: cmpd 1, 5, 4
; CHECK-NEXT: crandc 20, 4, 2
-; CHECK-NEXT: mfvsrd 6, 0
-; CHECK-NEXT: cmpld 1, 6, 3
-; CHECK-NEXT: crand 21, 2, 4
-; CHECK-NEXT: cror 20, 21, 20
-; CHECK-NEXT: bc 12, 20, .LBB12_2
+; CHECK-NEXT: mfvsrd 4, 1
+; CHECK-NEXT: cmpld 1, 4, 3
+; CHECK-NEXT: bc 12, 20, .LBB12_3
; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: crand 20, 2, 4
+; CHECK-NEXT: bc 12, 20, .LBB12_3
+; CHECK-NEXT: # %bb.2:
; CHECK-NEXT: vmr 2, 4
-; CHECK-NEXT: .LBB12_2:
+; CHECK-NEXT: .LBB12_3:
; CHECK-NEXT: xxswapd 0, 34
; CHECK-NEXT: mfvsrd 4, 34
; CHECK-NEXT: mfvsrd 3, 0
OpenPOWER on IntegriCloud