diff options
| author | Nemanja Ivanovic <nemanja.i.ibm@gmail.com> | 2018-03-05 19:27:16 +0000 | 
|---|---|---|
| committer | Nemanja Ivanovic <nemanja.i.ibm@gmail.com> | 2018-03-05 19:27:16 +0000 | 
| commit | 6cc31ca81446b3fc61e02e2c231436978746decd (patch) | |
| tree | 0b0f5df8546c7ed7475b83d4b1e28ade79af0cb5 /llvm | |
| parent | 77ae82b84a451f1a3af878da1a4efb5fb0a043b6 (diff) | |
| download | bcm5719-llvm-6cc31ca81446b3fc61e02e2c231436978746decd.tar.gz bcm5719-llvm-6cc31ca81446b3fc61e02e2c231436978746decd.zip  | |
[PowerPC] Do not emit record-form rotates when record-form andi suffices
Up until Power9, the performance profile for rlwinm., rldicl. and andi. looked
more or less equivalent. However with Power9, the rotates are still 2-way
cracked whereas the and-immediate is not.
This patch just ensures that we don't emit record-form rotates when an andi.
is adequate.
As first pointed out by Carrot in https://bugs.llvm.org/show_bug.cgi?id=30833
(this patch is a fix for that PR).
Differential Revision: https://reviews.llvm.org/D43977
llvm-svn: 326736
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 27 | ||||
| -rw-r--r-- | llvm/test/CodeGen/PowerPC/noPermuteFormasking.ll | 42 | ||||
| -rw-r--r-- | llvm/test/CodeGen/PowerPC/rlwinm-zero-ext.ll | 2 | 
3 files changed, 70 insertions, 1 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index ec74d309f68..4e0dd9d5e5e 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -55,6 +55,8 @@ STATISTIC(CmpIselsConverted,            "Number of ISELs that depend on comparison of constants converted");  STATISTIC(MissedConvertibleImmediateInstrs,            "Number of compare-immediate instructions fed by constants"); +STATISTIC(NumRcRotatesConvertedToRcAnd, +          "Number of record-form rotates converted to record-form andi");  static cl::  opt<bool> DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden, @@ -1897,6 +1899,31 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,      // specifically the case if this is the instruction directly after the      // compare). +    // Rotates are expensive instructions. If we're emitting a record-form +    // rotate that can just be an andi, we should just emit the andi. +    if ((MIOpC == PPC::RLWINM || MIOpC == PPC::RLWINM8) && +        MI->getOperand(2).getImm() == 0) { +      int64_t MB = MI->getOperand(3).getImm(); +      int64_t ME = MI->getOperand(4).getImm(); +      if (MB < ME && MB >= 16) { +        uint64_t Mask = ((1LLU << (32 - MB)) - 1) & ~((1LLU << (31 - ME)) - 1); +        NewOpC = MIOpC == PPC::RLWINM ? PPC::ANDIo : PPC::ANDIo8; +        MI->RemoveOperand(4); +        MI->RemoveOperand(3); +        MI->getOperand(2).setImm(Mask); +        NumRcRotatesConvertedToRcAnd++; +      } +    } else if (MIOpC == PPC::RLDICL && MI->getOperand(2).getImm() == 0) { +      int64_t MB = MI->getOperand(3).getImm(); +      if (MB >= 48) { +        uint64_t Mask = (1LLU << (63 - MB + 1)) - 1; +        NewOpC = PPC::ANDIo8; +        MI->RemoveOperand(3); +        MI->getOperand(2).setImm(Mask); +        NumRcRotatesConvertedToRcAnd++; +      } +    } +      const MCInstrDesc &NewDesc = get(NewOpC);      MI->setDesc(NewDesc); diff --git a/llvm/test/CodeGen/PowerPC/noPermuteFormasking.ll b/llvm/test/CodeGen/PowerPC/noPermuteFormasking.ll new file mode 100644 index 00000000000..d4ce1a7ec66 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/noPermuteFormasking.ll @@ -0,0 +1,42 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unkknown-unknown \ +; RUN:   -verify-machineinstrs -O2 < %s | FileCheck %s +$test = comdat any + +; Function Attrs: noinline nounwind +define void @test() local_unnamed_addr #0 comdat align 2 { +; CHECK-LABEL: test: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    ld 3, 0(3) +; CHECK-NEXT:    cmpdi 1, 3, 0 +; CHECK-NEXT:    andi. 4, 3, 3 +; CHECK-NEXT:    crand 20, 2, 5 +; CHECK-NEXT:    isel 3, 0, 3, 20 +; CHECK-NEXT:    addi 3, 3, -1 +; CHECK-NEXT:    cmpldi 3, 3 +; CHECK-NEXT:    bltlr+ 0 +; CHECK-NEXT:  # %bb.1: # %for.body.i.i.i.i.i.i.i +entry: +  %0 = load float*, float** undef, align 8 +  %1 = load i64, i64* undef, align 8 +  %add.ptr.i.i.i.i = getelementptr inbounds float, float* %0, i64 undef +  %2 = ptrtoint float* %add.ptr.i.i.i.i to i64 +  %and.i.i.i.i.i.i.i = and i64 %2, 3 +  %tobool.i.i.i.i.i.i.i = icmp eq i64 %and.i.i.i.i.i.i.i, 0 +  %cmp.i.i.i.i.i.i.i = icmp slt i64 0, %1 +  %3 = and i1 %tobool.i.i.i.i.i.i.i, %cmp.i.i.i.i.i.i.i +  %spec.select.i.i.i.i.i.i.i = select i1 %3, i64 0, i64 %1 +  %4 = add i64 %spec.select.i.i.i.i.i.i.i, -1 +  %5 = sub i64 %4, 0 +  br label %for.body.i.i.i.i.i.i.i.prol.loopexit + +for.body.i.i.i.i.i.i.i.prol.loopexit:             ; preds = %entry +  %6 = icmp ult i64 %5, 3 +  br i1 %6, label %exitBB, label %for.body.i.i.i.i.i.i.i + +for.body.i.i.i.i.i.i.i:                           ; preds = %for.body.i.i.i.i.i.i.i.prol.loopexit +  unreachable + +exitBB: ; preds = %for.body.i.i.i.i.i.i.i.prol.loopexit +  ret void +} diff --git a/llvm/test/CodeGen/PowerPC/rlwinm-zero-ext.ll b/llvm/test/CodeGen/PowerPC/rlwinm-zero-ext.ll index 282a31b1e27..ccc76e232b6 100644 --- a/llvm/test/CodeGen/PowerPC/rlwinm-zero-ext.ll +++ b/llvm/test/CodeGen/PowerPC/rlwinm-zero-ext.ll @@ -6,7 +6,7 @@ target triple = "powerpc64le-unknown-linux-gnu"  define i8 @test1(i32 %a) {  entry:  ; CHECK-NOT: rlwinm {{{[0-9]+}}}, {{[0-9]+}}, 0, 24, 27 -; CHECK: rlwinm. [[REG:[0-9]+]], {{[0-9]+}}, 0, 24, 27 +; CHECK: andi. [[REG:[0-9]+]], {{[0-9]+}}, 240  ; CHECK-NOT: cmplwi [[REG]], 0  ; CHECK: beq 0    %0 = and i32 %a, 240  | 

