diff options
-rw-r--r-- | llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp | 47 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMBaseInstrInfo.h | 14 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp | 35 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMISelLowering.cpp | 9 | ||||
-rw-r--r-- | llvm/test/CodeGen/Thumb2/csel.ll | 8 |
5 files changed, 79 insertions, 34 deletions
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index 54ddb946a2d..4a7f26d4fb2 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -5354,3 +5354,50 @@ MachineInstr *llvm::findCMPToFoldIntoCBZ(MachineInstr *Br, return &*CmpMI; } + +unsigned llvm::ConstantMaterializationCost(unsigned Val, + const ARMSubtarget *Subtarget, + bool ForCodesize) { + if (Subtarget->isThumb()) { + if (Val <= 255) // MOV + return ForCodesize ? 2 : 1; + if (Subtarget->hasV6T2Ops() && (Val <= 0xffff || // MOV + ARM_AM::getT2SOImmVal(Val) != -1 || // MOVW + ARM_AM::getT2SOImmVal(~Val) != -1)) // MVN + return ForCodesize ? 4 : 1; + if (Val <= 510) // MOV + ADDi8 + return ForCodesize ? 4 : 2; + if (~Val <= 255) // MOV + MVN + return ForCodesize ? 4 : 2; + if (ARM_AM::isThumbImmShiftedVal(Val)) // MOV + LSL + return ForCodesize ? 4 : 2; + } else { + if (ARM_AM::getSOImmVal(Val) != -1) // MOV + return ForCodesize ? 4 : 1; + if (ARM_AM::getSOImmVal(~Val) != -1) // MVN + return ForCodesize ? 4 : 1; + if (Subtarget->hasV6T2Ops() && Val <= 0xffff) // MOVW + return ForCodesize ? 4 : 1; + if (ARM_AM::isSOImmTwoPartVal(Val)) // two instrs + return ForCodesize ? 8 : 2; + } + if (Subtarget->useMovt()) // MOVW + MOVT + return ForCodesize ? 8 : 2; + return ForCodesize ? 8 : 3; // Literal pool load +} + +bool llvm::HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2, + const ARMSubtarget *Subtarget, + bool ForCodesize) { + // Check with ForCodesize + unsigned Cost1 = ConstantMaterializationCost(Val1, Subtarget, ForCodesize); + unsigned Cost2 = ConstantMaterializationCost(Val2, Subtarget, ForCodesize); + if (Cost1 < Cost2) + return true; + if (Cost1 > Cost2) + return false; + + // If they are equal, try with !ForCodesize + return ConstantMaterializationCost(Val1, Subtarget, !ForCodesize) < + ConstantMaterializationCost(Val2, Subtarget, !ForCodesize); +} diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h index 6e9385e8f42..e70695a4d97 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -621,6 +621,20 @@ void addPredicatedMveVpredNOp(MachineInstrBuilder &MIB, unsigned Cond); void addPredicatedMveVpredROp(MachineInstrBuilder &MIB, unsigned Cond, unsigned Inactive); +/// Returns the number of instructions required to materialize the given +/// constant in a register, or 3 if a literal pool load is needed. +/// If ForCodesize is specified, an approximate cost in bytes is returned. +unsigned ConstantMaterializationCost(unsigned Val, + const ARMSubtarget *Subtarget, + bool ForCodesize = false); + +/// Returns true if Val1 has a lower Constant Materialization Cost than Val2. +/// Uses the cost from ConstantMaterializationCost, first with ForCodesize as +/// specified. If the scores are equal, return the comparison for !ForCodesize. +bool HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2, + const ARMSubtarget *Subtarget, + bool ForCodesize = false); + } // end namespace llvm #endif // LLVM_LIB_TARGET_ARM_ARMBASEINSTRINFO_H diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index 9cdf2eb9c32..a59a57327d1 100644 --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -250,10 +250,6 @@ private: SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs, bool is64BitVector); - /// Returns the number of instructions required to materialize the given - /// constant in a register, or 3 if a literal pool load is needed. - unsigned ConstantMaterializationCost(unsigned Val) const; - /// Checks if N is a multiplication by a constant where we can extract out a /// power of two from the constant so that it can be used in a shift, but only /// if it simplifies the materialization of the constant. Returns true if it @@ -454,27 +450,6 @@ bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift, (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1)); } -unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const { - if (Subtarget->isThumb()) { - if (Val <= 255) return 1; // MOV - if (Subtarget->hasV6T2Ops() && - (Val <= 0xffff || // MOV - ARM_AM::getT2SOImmVal(Val) != -1 || // MOVW - ARM_AM::getT2SOImmVal(~Val) != -1)) // MVN - return 1; - if (Val <= 510) return 2; // MOV + ADDi8 - if (~Val <= 255) return 2; // MOV + MVN - if (ARM_AM::isThumbImmShiftedVal(Val)) return 2; // MOV + LSL - } else { - if (ARM_AM::getSOImmVal(Val) != -1) return 1; // MOV - if (ARM_AM::getSOImmVal(~Val) != -1) return 1; // MVN - if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW - if (ARM_AM::isSOImmTwoPartVal(Val)) return 2; // two instrs - } - if (Subtarget->useMovt()) return 2; // MOVW + MOVT - return 3; // Literal pool load -} - bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N, unsigned MaxShift, unsigned &PowerOfTwo, @@ -504,8 +479,8 @@ bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N, // Only optimise if the new cost is better unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo); NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32); - unsigned OldCost = ConstantMaterializationCost(MulConstVal); - unsigned NewCost = ConstantMaterializationCost(NewMulConstVal); + unsigned OldCost = ConstantMaterializationCost(MulConstVal, Subtarget); + unsigned NewCost = ConstantMaterializationCost(NewMulConstVal, Subtarget); return NewCost < OldCost; } @@ -2791,7 +2766,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) { case ISD::Constant: { unsigned Val = cast<ConstantSDNode>(N)->getZExtValue(); // If we can't materialize the constant we need to use a literal pool - if (ConstantMaterializationCost(Val) > 2) { + if (ConstantMaterializationCost(Val, Subtarget) > 2) { SDValue CPIdx = CurDAG->getTargetConstantPool( ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val), TLI->getPointerTy(CurDAG->getDataLayout())); @@ -2932,8 +2907,8 @@ void ARMDAGToDAGISel::Select(SDNode *N) { bool PreferImmediateEncoding = Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm)); if (!PreferImmediateEncoding && - ConstantMaterializationCost(Imm) > - ConstantMaterializationCost(~Imm)) { + ConstantMaterializationCost(Imm, Subtarget) > + ConstantMaterializationCost(~Imm, Subtarget)) { // The current immediate costs more to materialize than a negated // immediate, so negate the immediate and use a BIC. SDValue NewImm = diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 0239d6af8ad..907517461e7 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -4841,6 +4841,15 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { } if (Opcode) { + // If one of the constants is cheaper than another, materialise the + // cheaper one and let the csel generate the other. + if (Opcode != ARMISD::CSINC && + HasLowerConstantMaterializationCost(FVal, TVal, Subtarget)) { + std::swap(TrueVal, FalseVal); + std::swap(TVal, FVal); + CC = ISD::getSetCCInverse(CC, true); + } + // Attempt to use ZR checking TVal is 0, possibly inverting the condition // to get there. CSINC not is invertable like the other two (~(~a) == a, // -(-a) == a, but (a+1)+1 != a). diff --git a/llvm/test/CodeGen/Thumb2/csel.ll b/llvm/test/CodeGen/Thumb2/csel.ll index 17a111278b6..1632b8b78f2 100644 --- a/llvm/test/CodeGen/Thumb2/csel.ll +++ b/llvm/test/CodeGen/Thumb2/csel.ll @@ -42,9 +42,9 @@ entry: define i32 @csinv_const_56(i32 %a) { ; CHECK-LABEL: csinv_const_56: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: mvn r1, #5 +; CHECK-NEXT: movs r1, #5 ; CHECK-NEXT: cmp r0, #45 -; CHECK-NEXT: csinv r0, r1, r1, gt +; CHECK-NEXT: csinv r0, r1, r1, le ; CHECK-NEXT: bx lr entry: %cmp = icmp sgt i32 %a, 45 @@ -93,9 +93,9 @@ entry: define i32 @csneg_const_r(i32 %a) { ; CHECK-LABEL: csneg_const_r: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: mov.w r1, #-1 +; CHECK-NEXT: movs r1, #1 ; CHECK-NEXT: cmp r0, #45 -; CHECK-NEXT: csneg r0, r1, r1, gt +; CHECK-NEXT: csneg r0, r1, r1, le ; CHECK-NEXT: bx lr entry: %cmp = icmp sgt i32 %a, 45 |