diff options
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 93 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/VOPCInstructions.td | 26 |
2 files changed, 93 insertions, 26 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 16ebc8893ab..4e15fdb2f35 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -207,6 +207,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f32, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f16, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2i16, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f16, Custom); @@ -3664,6 +3666,69 @@ SDValue SITargetLowering::adjustLoadValueType(unsigned Opcode, return DAG.getMergeValues({ Adjusted, Load.getValue(1) }, DL); } +static SDValue lowerICMPIntrinsic(const SITargetLowering &TLI, + SDNode *N, SelectionDAG &DAG) { + EVT VT = N->getValueType(0); + const auto *CD = dyn_cast<ConstantSDNode>(N->getOperand(3)); + if (!CD) + return DAG.getUNDEF(VT); + + int CondCode = CD->getSExtValue(); + if (CondCode < ICmpInst::Predicate::FIRST_ICMP_PREDICATE || + CondCode > ICmpInst::Predicate::LAST_ICMP_PREDICATE) + return DAG.getUNDEF(VT); + + ICmpInst::Predicate IcInput = static_cast<ICmpInst::Predicate>(CondCode); + + + SDValue LHS = N->getOperand(1); + SDValue RHS = N->getOperand(2); + + SDLoc DL(N); + + EVT CmpVT = LHS.getValueType(); + if (CmpVT == MVT::i16 && !TLI.isTypeLegal(MVT::i16)) { + unsigned PromoteOp = ICmpInst::isSigned(IcInput) ? + ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + LHS = DAG.getNode(PromoteOp, DL, MVT::i32, LHS); + RHS = DAG.getNode(PromoteOp, DL, MVT::i32, RHS); + } + + ISD::CondCode CCOpcode = getICmpCondCode(IcInput); + + return DAG.getNode(AMDGPUISD::SETCC, DL, VT, LHS, RHS, + DAG.getCondCode(CCOpcode)); +} + +static SDValue lowerFCMPIntrinsic(const SITargetLowering &TLI, + SDNode *N, SelectionDAG &DAG) { + EVT VT = N->getValueType(0); + const auto *CD = dyn_cast<ConstantSDNode>(N->getOperand(3)); + if (!CD) + return DAG.getUNDEF(VT); + + int CondCode = CD->getSExtValue(); + if (CondCode < FCmpInst::Predicate::FIRST_FCMP_PREDICATE || + CondCode > FCmpInst::Predicate::LAST_FCMP_PREDICATE) { + return DAG.getUNDEF(VT); + } + + SDValue Src0 = N->getOperand(1); + SDValue Src1 = N->getOperand(2); + EVT CmpVT = Src0.getValueType(); + SDLoc SL(N); + + if (CmpVT == MVT::f16 && !TLI.isTypeLegal(CmpVT)) { + Src0 = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Src0); + Src1 = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Src1); + } + + FCmpInst::Predicate IcInput = static_cast<FCmpInst::Predicate>(CondCode); + ISD::CondCode CCOpcode = getFCmpCondCode(IcInput); + return DAG.getNode(AMDGPUISD::SETCC, SL, VT, Src0, + Src1, DAG.getCondCode(CCOpcode)); +} + void SITargetLowering::ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { @@ -4950,34 +5015,10 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, Denominator, Numerator); } case Intrinsic::amdgcn_icmp: { - const auto *CD = dyn_cast<ConstantSDNode>(Op.getOperand(3)); - if (!CD) - return DAG.getUNDEF(VT); - - int CondCode = CD->getSExtValue(); - if (CondCode < ICmpInst::Predicate::FIRST_ICMP_PREDICATE || - CondCode > ICmpInst::Predicate::LAST_ICMP_PREDICATE) - return DAG.getUNDEF(VT); - - ICmpInst::Predicate IcInput = static_cast<ICmpInst::Predicate>(CondCode); - ISD::CondCode CCOpcode = getICmpCondCode(IcInput); - return DAG.getNode(AMDGPUISD::SETCC, DL, VT, Op.getOperand(1), - Op.getOperand(2), DAG.getCondCode(CCOpcode)); + return lowerICMPIntrinsic(*this, Op.getNode(), DAG); } case Intrinsic::amdgcn_fcmp: { - const auto *CD = dyn_cast<ConstantSDNode>(Op.getOperand(3)); - if (!CD) - return DAG.getUNDEF(VT); - - int CondCode = CD->getSExtValue(); - if (CondCode < FCmpInst::Predicate::FIRST_FCMP_PREDICATE || - CondCode > FCmpInst::Predicate::LAST_FCMP_PREDICATE) - return DAG.getUNDEF(VT); - - FCmpInst::Predicate IcInput = static_cast<FCmpInst::Predicate>(CondCode); - ISD::CondCode CCOpcode = getFCmpCondCode(IcInput); - return DAG.getNode(AMDGPUISD::SETCC, DL, VT, Op.getOperand(1), - Op.getOperand(2), DAG.getCondCode(CCOpcode)); + return lowerFCMPIntrinsic(*this, Op.getNode(), DAG); } case Intrinsic::amdgcn_fmed3: return DAG.getNode(AMDGPUISD::FMED3, DL, VT, diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td index cc6b8116afe..091cac8cd35 100644 --- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td @@ -635,6 +635,17 @@ def : ICMP_Pattern <COND_SGE, V_CMP_GE_I64_e64, i64>; def : ICMP_Pattern <COND_SLT, V_CMP_LT_I64_e64, i64>; def : ICMP_Pattern <COND_SLE, V_CMP_LE_I64_e64, i64>; +def : ICMP_Pattern <COND_EQ, V_CMP_EQ_U16_e64, i16>; +def : ICMP_Pattern <COND_NE, V_CMP_NE_U16_e64, i16>; +def : ICMP_Pattern <COND_UGT, V_CMP_GT_U16_e64, i16>; +def : ICMP_Pattern <COND_UGE, V_CMP_GE_U16_e64, i16>; +def : ICMP_Pattern <COND_ULT, V_CMP_LT_U16_e64, i16>; +def : ICMP_Pattern <COND_ULE, V_CMP_LE_U16_e64, i16>; +def : ICMP_Pattern <COND_SGT, V_CMP_GT_I16_e64, i16>; +def : ICMP_Pattern <COND_SGE, V_CMP_GE_I16_e64, i16>; +def : ICMP_Pattern <COND_SLT, V_CMP_LT_I16_e64, i16>; +def : ICMP_Pattern <COND_SLE, V_CMP_LE_I16_e64, i16>; + class FCMP_Pattern <PatLeaf cond, Instruction inst, ValueType vt> : GCNPat < (i64 (AMDGPUsetcc (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)), (vt (VOP3Mods vt:$src1, i32:$src1_modifiers)), cond)), @@ -656,6 +667,14 @@ def : FCMP_Pattern <COND_OGE, V_CMP_GE_F64_e64, f64>; def : FCMP_Pattern <COND_OLT, V_CMP_LT_F64_e64, f64>; def : FCMP_Pattern <COND_OLE, V_CMP_LE_F64_e64, f64>; +def : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F16_e64, f16>; +def : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F16_e64, f16>; +def : FCMP_Pattern <COND_OGT, V_CMP_GT_F16_e64, f16>; +def : FCMP_Pattern <COND_OGE, V_CMP_GE_F16_e64, f16>; +def : FCMP_Pattern <COND_OLT, V_CMP_LT_F16_e64, f16>; +def : FCMP_Pattern <COND_OLE, V_CMP_LE_F16_e64, f16>; + + def : FCMP_Pattern <COND_UEQ, V_CMP_NLG_F32_e64, f32>; def : FCMP_Pattern <COND_UNE, V_CMP_NEQ_F32_e64, f32>; def : FCMP_Pattern <COND_UGT, V_CMP_NLE_F32_e64, f32>; @@ -670,6 +689,13 @@ def : FCMP_Pattern <COND_UGE, V_CMP_NLT_F64_e64, f64>; def : FCMP_Pattern <COND_ULT, V_CMP_NGE_F64_e64, f64>; def : FCMP_Pattern <COND_ULE, V_CMP_NGT_F64_e64, f64>; +def : FCMP_Pattern <COND_UEQ, V_CMP_NLG_F16_e64, f16>; +def : FCMP_Pattern <COND_UNE, V_CMP_NEQ_F16_e64, f16>; +def : FCMP_Pattern <COND_UGT, V_CMP_NLE_F16_e64, f16>; +def : FCMP_Pattern <COND_UGE, V_CMP_NLT_F16_e64, f16>; +def : FCMP_Pattern <COND_ULT, V_CMP_NGE_F16_e64, f16>; +def : FCMP_Pattern <COND_ULE, V_CMP_NGT_F16_e64, f16>; + //===----------------------------------------------------------------------===// // Target //===----------------------------------------------------------------------===// |