summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
authorAnton Korobeynikov <asl@math.spbu.ru>2011-10-07 16:15:08 +0000
committerAnton Korobeynikov <asl@math.spbu.ru>2011-10-07 16:15:08 +0000
commit318d6bae80b72b4572281d4c066ca7caa86b1eb5 (patch)
treeedfb4fceb3eb471e108279eeb279c0529a91f012 /llvm
parente19661e0ca5ac59545554d283bb62a75d6700ce4 (diff)
downloadbcm5719-llvm-318d6bae80b72b4572281d4c066ca7caa86b1eb5.tar.gz
bcm5719-llvm-318d6bae80b72b4572281d4c066ca7caa86b1eb5.zip
Peephole optimization for ABS on ARM.
Patch by Ana Pazos! llvm-svn: 141365
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp63
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp80
-rw-r--r--llvm/lib/Target/ARM/ARMInstrInfo.td11
-rw-r--r--llvm/lib/Target/ARM/ARMInstrThumb2.td8
-rw-r--r--llvm/test/CodeGen/ARM/iabs.ll8
-rw-r--r--llvm/test/CodeGen/Thumb/iabs.ll11
6 files changed, 174 insertions, 7 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index ce877b0d04e..8b34fa11f47 100644
--- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -47,6 +47,11 @@ CheckVMLxHazard("check-vmlx-hazard", cl::Hidden,
cl::desc("Check fp vmla / vmls hazard at isel time"),
cl::init(true));
+static cl::opt<bool>
+DisableARMIntABS("disable-arm-int-abs", cl::Hidden,
+ cl::desc("Enable / disable ARM integer abs transform"),
+ cl::init(false));
+
//===--------------------------------------------------------------------===//
/// ARMDAGToDAGISel - ARM specific code to select ARM machine
/// instructions for SelectionDAG operations.
@@ -252,6 +257,9 @@ private:
ARMCC::CondCodes CCVal, SDValue CCR,
SDValue InFlag);
+ // Select special operations if node forms integer ABS pattern
+ SDNode *SelectABSOp(SDNode *N);
+
SDNode *SelectConcatVector(SDNode *N);
SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
@@ -2295,6 +2303,53 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) {
return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5);
}
+/// Target-specific DAG combining for ISD::XOR.
+/// Target-independent combining lowers SELECT_CC nodes of the form
+/// select_cc setg[ge] X, 0, X, -X
+/// select_cc setgt X, -1, X, -X
+/// select_cc setl[te] X, 0, -X, X
+/// select_cc setlt X, 1, -X, X
+/// which represent Integer ABS into:
+/// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
+/// ARM instruction selection detects the latter and matches it to
+/// ARM::ABS or ARM::t2ABS machine node.
+SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){
+ SDValue XORSrc0 = N->getOperand(0);
+ SDValue XORSrc1 = N->getOperand(1);
+ DebugLoc DL = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+
+ if (DisableARMIntABS)
+ return NULL;
+
+ if (XORSrc0.getOpcode() != ISD::ADD ||
+ XORSrc1.getOpcode() != ISD::SRA)
+ return NULL;
+
+ SDValue ADDSrc0 = XORSrc0.getOperand(0);
+ SDValue ADDSrc1 = XORSrc0.getOperand(1);
+ SDValue SRASrc0 = XORSrc1.getOperand(0);
+ SDValue SRASrc1 = XORSrc1.getOperand(1);
+ ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1);
+ EVT XType = SRASrc0.getValueType();
+ unsigned Size = XType.getSizeInBits() - 1;
+
+ if (ADDSrc1 == XORSrc1 &&
+ ADDSrc0 == SRASrc0 &&
+ XType.isInteger() &&
+ SRAConstant != NULL &&
+ Size == SRAConstant->getZExtValue()) {
+
+ unsigned Opcode = ARM::ABS;
+ if (Subtarget->isThumb2())
+ Opcode = ARM::t2ABS;
+
+ return CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
+ }
+
+ return NULL;
+}
+
SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
// The only time a CONCAT_VECTORS operation can have legal types is when
// two 64-bit vectors are concatenated to a 128-bit vector.
@@ -2331,6 +2386,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
switch (N->getOpcode()) {
default: break;
+ case ISD::XOR: {
+ // Select special operations if XOR node forms integer ABS pattern
+ SDNode *ResNode = SelectABSOp(N);
+ if (ResNode)
+ return ResNode;
+ // Other cases are autogenerated.
+ break;
+ }
case ISD::Constant: {
unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
bool UseCP = true;
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index f69dab45300..f92ee379e13 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -6100,6 +6100,86 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
+
+ case ARM::ABS:
+ case ARM::t2ABS: {
+ // To insert an ABS instruction, we have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // source vreg to test against 0, the destination vreg to set,
+ // the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ // It transforms
+ // V1 = ABS V0
+ // into
+ // V2 = MOVS V0
+ // BCC (branch to SinkBB if V0 >= 0)
+ // RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0)
+ // SinkBB: V1 = PHI(V2, V3)
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator BBI = BB;
+ ++BBI;
+ MachineFunction *Fn = BB->getParent();
+ MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB);
+ Fn->insert(BBI, RSBBB);
+ Fn->insert(BBI, SinkBB);
+
+ unsigned int ABSSrcReg = MI->getOperand(1).getReg();
+ unsigned int ABSDstReg = MI->getOperand(0).getReg();
+ bool isThumb2 = Subtarget->isThumb2();
+ MachineRegisterInfo &MRI = Fn->getRegInfo();
+ // In Thumb mode S must not be specified if source register is the SP or
+ // PC and if destination register is the SP, so restrict register class
+ unsigned NewMovDstReg = MRI.createVirtualRegister(
+ isThumb2 ? ARM::rGPRRegisterClass : ARM::GPRRegisterClass);
+ unsigned NewRsbDstReg = MRI.createVirtualRegister(
+ isThumb2 ? ARM::rGPRRegisterClass : ARM::GPRRegisterClass);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ SinkBB->splice(SinkBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ SinkBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ BB->addSuccessor(RSBBB);
+ BB->addSuccessor(SinkBB);
+
+ // fall through to SinkMBB
+ RSBBB->addSuccessor(SinkBB);
+
+ // insert a movs at the end of BB
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVr : ARM::MOVr),
+ NewMovDstReg)
+ .addReg(ABSSrcReg, RegState::Kill)
+ .addImm((unsigned)ARMCC::AL).addReg(0)
+ .addReg(ARM::CPSR, RegState::Define);
+
+ // insert a bcc with opposite CC to ARMCC::MI at the end of BB
+ BuildMI(BB, dl,
+ TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB)
+ .addImm(ARMCC::getOppositeCondition(ARMCC::MI)).addReg(ARM::CPSR);
+
+ // insert rsbri in RSBBB
+ // Note: BCC and rsbri will be converted into predicated rsbmi
+ // by if-conversion pass
+ BuildMI(*RSBBB, RSBBB->begin(), dl,
+ TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
+ .addReg(NewMovDstReg, RegState::Kill)
+ .addImm(0).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+
+ // insert PHI in SinkBB,
+ // reuse ABSDstReg to not change uses of ABS instruction
+ BuildMI(*SinkBB, SinkBB->begin(), dl,
+ TII->get(ARM::PHI), ABSDstReg)
+ .addReg(NewRsbDstReg).addMBB(RSBBB)
+ .addReg(NewMovDstReg).addMBB(BB);
+
+ // remove ABS instruction
+ MI->eraseFromParent();
+
+ // return last added BB
+ return SinkBB;
+ }
}
}
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
index 4b58a150ad6..d81a699cc37 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -2848,6 +2848,9 @@ def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr,
let Inst{15-12} = Rd;
}
+def : ARMInstAlias<"movs${p} $Rd, $Rm",
+ (MOVr GPR:$Rd, GPR:$Rm, pred:$p, CPSR)>;
+
// A version for the smaller set of tail call registers.
let neverHasSideEffects = 1 in
def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm,
@@ -4025,6 +4028,14 @@ def ISB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
let Inst{3-0} = opt;
}
+// Pseudo isntruction that combines movs + predicated rsbmi
+// to implement integer ABS
+let usesCustomInserter = 1, Defs = [CPSR] in {
+def ABS : ARMPseudoInst<
+ (outs GPR:$dst), (ins GPR:$src),
+ 8, NoItinerary, []>;
+}
+
let usesCustomInserter = 1 in {
let Defs = [CPSR] in {
def ATOMIC_LOAD_ADD_I8 : PseudoInst<
diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td
index 9be4d4689e7..7f24f81c57e 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -3433,6 +3433,14 @@ def t2LDRpci_pic : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr, pclabel:$cp),
[(set rGPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
imm:$cp))]>,
Requires<[IsThumb2]>;
+
+// Pseudo isntruction that combines movs + predicated rsbmi
+// to implement integer ABS
+let usesCustomInserter = 1, Defs = [CPSR] in {
+def t2ABS : PseudoInst<(outs rGPR:$dst), (ins rGPR:$src),
+ NoItinerary, []>, Requires<[IsThumb2]>;
+}
+
//===----------------------------------------------------------------------===//
// Coprocessor load/store -- for disassembly only
//
diff --git a/llvm/test/CodeGen/ARM/iabs.ll b/llvm/test/CodeGen/ARM/iabs.ll
index c01c041cfe8..89e309d1606 100644
--- a/llvm/test/CodeGen/ARM/iabs.ll
+++ b/llvm/test/CodeGen/ARM/iabs.ll
@@ -1,8 +1,8 @@
; RUN: llc < %s -march=arm -mattr=+v4t | FileCheck %s
;; Integer absolute value, should produce something as good as: ARM:
-;; add r3, r0, r0, asr #31
-;; eor r0, r3, r0, asr #31
+;; movs r0, r0
+;; rsbmi r0, r0, #0
;; bx lr
define i32 @test(i32 %a) {
@@ -10,7 +10,7 @@ define i32 @test(i32 %a) {
%b = icmp sgt i32 %a, -1
%abs = select i1 %b, i32 %a, i32 %tmp1neg
ret i32 %abs
-; CHECK: add r1, r0, r0, asr #31
-; CHECK: eor r0, r1, r0, asr #31
+; CHECK: movs r0, r0
+; CHECK: rsbmi r0, r0, #0
; CHECK: bx lr
}
diff --git a/llvm/test/CodeGen/Thumb/iabs.ll b/llvm/test/CodeGen/Thumb/iabs.ll
index d7cdcd8149a..d03b5b2e3be 100644
--- a/llvm/test/CodeGen/Thumb/iabs.ll
+++ b/llvm/test/CodeGen/Thumb/iabs.ll
@@ -3,9 +3,9 @@
;; Integer absolute value, should produce something as good as:
;; Thumb:
-;; asr r2, r0, #31
-;; add r0, r0, r2
-;; eor r0, r2
+;; movs r0, r0
+;; bpl
+;; rsb r0, r0, #0 (with opitmization, bpl + rsb is if-converted into rsbmi)
;; bx lr
define i32 @test(i32 %a) {
@@ -13,5 +13,10 @@ define i32 @test(i32 %a) {
%b = icmp sgt i32 %a, -1
%abs = select i1 %b, i32 %a, i32 %tmp1neg
ret i32 %abs
+; CHECK: movs r0, r0
+; CHECK: bpl
+; CHECK: rsb r0, r0, #0
+; CHECK: bx lr
}
+
OpenPOWER on IntegriCloud