summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp100
1 files changed, 68 insertions, 32 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index bddfb190a3b..6146c49a0f1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -417,8 +417,10 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom);
if (Subtarget->hasFFBL())
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Legal);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom);
+ setOperationAction(ISD::CTTZ, MVT::i64, Custom);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Custom);
setOperationAction(ISD::CTLZ, MVT::i64, Custom);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
@@ -1113,9 +1115,11 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
case ISD::FP_TO_FP16: return LowerFP_TO_FP16(Op, DAG);
case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG);
+ case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF:
- return LowerCTLZ(Op, DAG);
+ return LowerCTLZ_CTTZ(Op, DAG);
case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
}
return Op;
@@ -2154,13 +2158,33 @@ SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
}
-SDValue AMDGPUTargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) const {
+static bool isCtlzOpc(unsigned Opc) {
+ return Opc == ISD::CTLZ || Opc == ISD::CTLZ_ZERO_UNDEF;
+}
+
+static bool isCttzOpc(unsigned Opc) {
+ return Opc == ISD::CTTZ || Opc == ISD::CTTZ_ZERO_UNDEF;
+}
+
+SDValue AMDGPUTargetLowering::LowerCTLZ_CTTZ(SDValue Op, SelectionDAG &DAG) const {
SDLoc SL(Op);
SDValue Src = Op.getOperand(0);
- bool ZeroUndef = Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF;
+ bool ZeroUndef = Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF ||
+ Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF;
+
+ unsigned ISDOpc, NewOpc;
+ if (isCtlzOpc(Op.getOpcode())) {
+ ISDOpc = ISD::CTLZ_ZERO_UNDEF;
+ NewOpc = AMDGPUISD::FFBH_U32;
+ } else if (isCttzOpc(Op.getOpcode())) {
+ ISDOpc = ISD::CTTZ_ZERO_UNDEF;
+ NewOpc = AMDGPUISD::FFBL_B32;
+ } else
+ llvm_unreachable("Unexpected OPCode!!!");
+
if (ZeroUndef && Src.getValueType() == MVT::i32)
- return DAG.getNode(AMDGPUISD::FFBH_U32, SL, MVT::i32, Src);
+ return DAG.getNode(NewOpc, SL, MVT::i32, Src);
SDValue Vec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Src);
@@ -2173,24 +2197,33 @@ SDValue AMDGPUTargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) const {
EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(),
*DAG.getContext(), MVT::i32);
- SDValue Hi0 = DAG.getSetCC(SL, SetCCVT, Hi, Zero, ISD::SETEQ);
+ SDValue ZeroOrOne = isCtlzOpc(Op.getOpcode()) ? Zero : One;
+ SDValue HiOrLo = isCtlzOpc(Op.getOpcode()) ? Hi : Lo;
+ SDValue Hi0orLo0 = DAG.getSetCC(SL, SetCCVT, HiOrLo, ZeroOrOne, ISD::SETEQ);
- SDValue CtlzLo = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SL, MVT::i32, Lo);
- SDValue CtlzHi = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SL, MVT::i32, Hi);
+ SDValue OprLo = DAG.getNode(ISDOpc, SL, MVT::i32, Lo);
+ SDValue OprHi = DAG.getNode(ISDOpc, SL, MVT::i32, Hi);
const SDValue Bits32 = DAG.getConstant(32, SL, MVT::i32);
- SDValue Add = DAG.getNode(ISD::ADD, SL, MVT::i32, CtlzLo, Bits32);
-
- // ctlz(x) = hi_32(x) == 0 ? ctlz(lo_32(x)) + 32 : ctlz(hi_32(x))
- SDValue NewCtlz = DAG.getNode(ISD::SELECT, SL, MVT::i32, Hi0, Add, CtlzHi);
+ SDValue Add, NewOpr;
+ if (isCtlzOpc(Op.getOpcode())) {
+ Add = DAG.getNode(ISD::ADD, SL, MVT::i32, OprLo, Bits32);
+ // ctlz(x) = hi_32(x) == 0 ? ctlz(lo_32(x)) + 32 : ctlz(hi_32(x))
+ NewOpr = DAG.getNode(ISD::SELECT, SL, MVT::i32, Hi0orLo0, Add, OprHi);
+ } else {
+ Add = DAG.getNode(ISD::ADD, SL, MVT::i32, OprHi, Bits32);
+ // cttz(x) = lo_32(x) == 0 ? cttz(hi_32(x)) + 32 : cttz(lo_32(x))
+ NewOpr = DAG.getNode(ISD::SELECT, SL, MVT::i32, Hi0orLo0, Add, OprLo);
+ }
if (!ZeroUndef) {
// Test if the full 64-bit input is zero.
// FIXME: DAG combines turn what should be an s_and_b64 into a v_or_b32,
// which we probably don't want.
- SDValue Lo0 = DAG.getSetCC(SL, SetCCVT, Lo, Zero, ISD::SETEQ);
- SDValue SrcIsZero = DAG.getNode(ISD::AND, SL, SetCCVT, Lo0, Hi0);
+ SDValue LoOrHi = isCtlzOpc(Op.getOpcode()) ? Lo : Hi;
+ SDValue Lo0OrHi0 = DAG.getSetCC(SL, SetCCVT, LoOrHi, ZeroOrOne, ISD::SETEQ);
+ SDValue SrcIsZero = DAG.getNode(ISD::AND, SL, SetCCVT, Lo0OrHi0, Hi0orLo0);
// TODO: If i64 setcc is half rate, it can result in 1 fewer instruction
// with the same cycles, otherwise it is slower.
@@ -2201,11 +2234,11 @@ SDValue AMDGPUTargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) const {
// The instruction returns -1 for 0 input, but the defined intrinsic
// behavior is to return the number of bits.
- NewCtlz = DAG.getNode(ISD::SELECT, SL, MVT::i32,
- SrcIsZero, Bits32, NewCtlz);
+ NewOpr = DAG.getNode(ISD::SELECT, SL, MVT::i32,
+ SrcIsZero, Bits32, NewOpr);
}
- return DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i64, NewCtlz);
+ return DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i64, NewOpr);
}
SDValue AMDGPUTargetLowering::LowerINT_TO_FP32(SDValue Op, SelectionDAG &DAG,
@@ -3117,13 +3150,10 @@ static bool isNegativeOne(SDValue Val) {
return false;
}
-static bool isCtlzOpc(unsigned Opc) {
- return Opc == ISD::CTLZ || Opc == ISD::CTLZ_ZERO_UNDEF;
-}
-
-SDValue AMDGPUTargetLowering::getFFBH_U32(SelectionDAG &DAG,
+SDValue AMDGPUTargetLowering::getFFBX_U32(SelectionDAG &DAG,
SDValue Op,
- const SDLoc &DL) const {
+ const SDLoc &DL,
+ unsigned Opc) const {
EVT VT = Op.getValueType();
EVT LegalVT = getTypeToTransformTo(*DAG.getContext(), VT);
if (LegalVT != MVT::i32 && (Subtarget->has16BitInsts() &&
@@ -3133,11 +3163,11 @@ SDValue AMDGPUTargetLowering::getFFBH_U32(SelectionDAG &DAG,
if (VT != MVT::i32)
Op = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Op);
- SDValue FFBH = DAG.getNode(AMDGPUISD::FFBH_U32, DL, MVT::i32, Op);
+ SDValue FFBX = DAG.getNode(Opc, DL, MVT::i32, Op);
if (VT != MVT::i32)
- FFBH = DAG.getNode(ISD::TRUNCATE, DL, VT, FFBH);
+ FFBX = DAG.getNode(ISD::TRUNCATE, DL, VT, FFBX);
- return FFBH;
+ return FFBX;
}
// The native instructions return -1 on 0 input. Optimize out a select that
@@ -3147,7 +3177,7 @@ SDValue AMDGPUTargetLowering::getFFBH_U32(SelectionDAG &DAG,
// against the bitwidth.
//
// TODO: Should probably combine against FFBH_U32 instead of ctlz directly.
-SDValue AMDGPUTargetLowering::performCtlzCombine(const SDLoc &SL, SDValue Cond,
+SDValue AMDGPUTargetLowering::performCtlz_CttzCombine(const SDLoc &SL, SDValue Cond,
SDValue LHS, SDValue RHS,
DAGCombinerInfo &DCI) const {
ConstantSDNode *CmpRhs = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
@@ -3158,20 +3188,25 @@ SDValue AMDGPUTargetLowering::performCtlzCombine(const SDLoc &SL, SDValue Cond,
ISD::CondCode CCOpcode = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
SDValue CmpLHS = Cond.getOperand(0);
+ unsigned Opc = isCttzOpc(RHS.getOpcode()) ? AMDGPUISD::FFBL_B32 :
+ AMDGPUISD::FFBH_U32;
+
// select (setcc x, 0, eq), -1, (ctlz_zero_undef x) -> ffbh_u32 x
+ // select (setcc x, 0, eq), -1, (cttz_zero_undef x) -> ffbl_u32 x
if (CCOpcode == ISD::SETEQ &&
- isCtlzOpc(RHS.getOpcode()) &&
+ (isCtlzOpc(RHS.getOpcode()) || isCttzOpc(RHS.getOpcode())) &&
RHS.getOperand(0) == CmpLHS &&
isNegativeOne(LHS)) {
- return getFFBH_U32(DAG, CmpLHS, SL);
+ return getFFBX_U32(DAG, CmpLHS, SL, Opc);
}
// select (setcc x, 0, ne), (ctlz_zero_undef x), -1 -> ffbh_u32 x
+ // select (setcc x, 0, ne), (cttz_zero_undef x), -1 -> ffbl_u32 x
if (CCOpcode == ISD::SETNE &&
- isCtlzOpc(LHS.getOpcode()) &&
+ (isCtlzOpc(LHS.getOpcode()) || isCttzOpc(RHS.getOpcode())) &&
LHS.getOperand(0) == CmpLHS &&
isNegativeOne(RHS)) {
- return getFFBH_U32(DAG, CmpLHS, SL);
+ return getFFBX_U32(DAG, CmpLHS, SL, Opc);
}
return SDValue();
@@ -3304,7 +3339,7 @@ SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N,
}
// There's no reason to not do this if the condition has other uses.
- return performCtlzCombine(SDLoc(N), Cond, True, False, DCI);
+ return performCtlz_CttzCombine(SDLoc(N), Cond, True, False, DCI);
}
static bool isConstantFPZero(SDValue N) {
@@ -3892,6 +3927,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(BFM)
NODE_NAME_CASE(FFBH_U32)
NODE_NAME_CASE(FFBH_I32)
+ NODE_NAME_CASE(FFBL_B32)
NODE_NAME_CASE(MUL_U24)
NODE_NAME_CASE(MUL_I24)
NODE_NAME_CASE(MULHI_U24)
OpenPOWER on IntegriCloud