summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp93
-rw-r--r--llvm/lib/Target/AMDGPU/VOPCInstructions.td26
2 files changed, 93 insertions, 26 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 16ebc8893ab..4e15fdb2f35 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -207,6 +207,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f32, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f16, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2i16, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f16, Custom);
@@ -3664,6 +3666,69 @@ SDValue SITargetLowering::adjustLoadValueType(unsigned Opcode,
return DAG.getMergeValues({ Adjusted, Load.getValue(1) }, DL);
}
+static SDValue lowerICMPIntrinsic(const SITargetLowering &TLI,
+ SDNode *N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+ const auto *CD = dyn_cast<ConstantSDNode>(N->getOperand(3));
+ if (!CD)
+ return DAG.getUNDEF(VT);
+
+ int CondCode = CD->getSExtValue();
+ if (CondCode < ICmpInst::Predicate::FIRST_ICMP_PREDICATE ||
+ CondCode > ICmpInst::Predicate::LAST_ICMP_PREDICATE)
+ return DAG.getUNDEF(VT);
+
+ ICmpInst::Predicate IcInput = static_cast<ICmpInst::Predicate>(CondCode);
+
+
+ SDValue LHS = N->getOperand(1);
+ SDValue RHS = N->getOperand(2);
+
+ SDLoc DL(N);
+
+ EVT CmpVT = LHS.getValueType();
+ if (CmpVT == MVT::i16 && !TLI.isTypeLegal(MVT::i16)) {
+ unsigned PromoteOp = ICmpInst::isSigned(IcInput) ?
+ ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ LHS = DAG.getNode(PromoteOp, DL, MVT::i32, LHS);
+ RHS = DAG.getNode(PromoteOp, DL, MVT::i32, RHS);
+ }
+
+ ISD::CondCode CCOpcode = getICmpCondCode(IcInput);
+
+ return DAG.getNode(AMDGPUISD::SETCC, DL, VT, LHS, RHS,
+ DAG.getCondCode(CCOpcode));
+}
+
+static SDValue lowerFCMPIntrinsic(const SITargetLowering &TLI,
+ SDNode *N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+ const auto *CD = dyn_cast<ConstantSDNode>(N->getOperand(3));
+ if (!CD)
+ return DAG.getUNDEF(VT);
+
+ int CondCode = CD->getSExtValue();
+ if (CondCode < FCmpInst::Predicate::FIRST_FCMP_PREDICATE ||
+ CondCode > FCmpInst::Predicate::LAST_FCMP_PREDICATE) {
+ return DAG.getUNDEF(VT);
+ }
+
+ SDValue Src0 = N->getOperand(1);
+ SDValue Src1 = N->getOperand(2);
+ EVT CmpVT = Src0.getValueType();
+ SDLoc SL(N);
+
+ if (CmpVT == MVT::f16 && !TLI.isTypeLegal(CmpVT)) {
+ Src0 = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Src0);
+ Src1 = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Src1);
+ }
+
+ FCmpInst::Predicate IcInput = static_cast<FCmpInst::Predicate>(CondCode);
+ ISD::CondCode CCOpcode = getFCmpCondCode(IcInput);
+ return DAG.getNode(AMDGPUISD::SETCC, SL, VT, Src0,
+ Src1, DAG.getCondCode(CCOpcode));
+}
+
void SITargetLowering::ReplaceNodeResults(SDNode *N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const {
@@ -4950,34 +5015,10 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Denominator, Numerator);
}
case Intrinsic::amdgcn_icmp: {
- const auto *CD = dyn_cast<ConstantSDNode>(Op.getOperand(3));
- if (!CD)
- return DAG.getUNDEF(VT);
-
- int CondCode = CD->getSExtValue();
- if (CondCode < ICmpInst::Predicate::FIRST_ICMP_PREDICATE ||
- CondCode > ICmpInst::Predicate::LAST_ICMP_PREDICATE)
- return DAG.getUNDEF(VT);
-
- ICmpInst::Predicate IcInput = static_cast<ICmpInst::Predicate>(CondCode);
- ISD::CondCode CCOpcode = getICmpCondCode(IcInput);
- return DAG.getNode(AMDGPUISD::SETCC, DL, VT, Op.getOperand(1),
- Op.getOperand(2), DAG.getCondCode(CCOpcode));
+ return lowerICMPIntrinsic(*this, Op.getNode(), DAG);
}
case Intrinsic::amdgcn_fcmp: {
- const auto *CD = dyn_cast<ConstantSDNode>(Op.getOperand(3));
- if (!CD)
- return DAG.getUNDEF(VT);
-
- int CondCode = CD->getSExtValue();
- if (CondCode < FCmpInst::Predicate::FIRST_FCMP_PREDICATE ||
- CondCode > FCmpInst::Predicate::LAST_FCMP_PREDICATE)
- return DAG.getUNDEF(VT);
-
- FCmpInst::Predicate IcInput = static_cast<FCmpInst::Predicate>(CondCode);
- ISD::CondCode CCOpcode = getFCmpCondCode(IcInput);
- return DAG.getNode(AMDGPUISD::SETCC, DL, VT, Op.getOperand(1),
- Op.getOperand(2), DAG.getCondCode(CCOpcode));
+ return lowerFCMPIntrinsic(*this, Op.getNode(), DAG);
}
case Intrinsic::amdgcn_fmed3:
return DAG.getNode(AMDGPUISD::FMED3, DL, VT,
diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
index cc6b8116afe..091cac8cd35 100644
--- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
@@ -635,6 +635,17 @@ def : ICMP_Pattern <COND_SGE, V_CMP_GE_I64_e64, i64>;
def : ICMP_Pattern <COND_SLT, V_CMP_LT_I64_e64, i64>;
def : ICMP_Pattern <COND_SLE, V_CMP_LE_I64_e64, i64>;
+def : ICMP_Pattern <COND_EQ, V_CMP_EQ_U16_e64, i16>;
+def : ICMP_Pattern <COND_NE, V_CMP_NE_U16_e64, i16>;
+def : ICMP_Pattern <COND_UGT, V_CMP_GT_U16_e64, i16>;
+def : ICMP_Pattern <COND_UGE, V_CMP_GE_U16_e64, i16>;
+def : ICMP_Pattern <COND_ULT, V_CMP_LT_U16_e64, i16>;
+def : ICMP_Pattern <COND_ULE, V_CMP_LE_U16_e64, i16>;
+def : ICMP_Pattern <COND_SGT, V_CMP_GT_I16_e64, i16>;
+def : ICMP_Pattern <COND_SGE, V_CMP_GE_I16_e64, i16>;
+def : ICMP_Pattern <COND_SLT, V_CMP_LT_I16_e64, i16>;
+def : ICMP_Pattern <COND_SLE, V_CMP_LE_I16_e64, i16>;
+
class FCMP_Pattern <PatLeaf cond, Instruction inst, ValueType vt> : GCNPat <
(i64 (AMDGPUsetcc (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)),
(vt (VOP3Mods vt:$src1, i32:$src1_modifiers)), cond)),
@@ -656,6 +667,14 @@ def : FCMP_Pattern <COND_OGE, V_CMP_GE_F64_e64, f64>;
def : FCMP_Pattern <COND_OLT, V_CMP_LT_F64_e64, f64>;
def : FCMP_Pattern <COND_OLE, V_CMP_LE_F64_e64, f64>;
+def : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F16_e64, f16>;
+def : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F16_e64, f16>;
+def : FCMP_Pattern <COND_OGT, V_CMP_GT_F16_e64, f16>;
+def : FCMP_Pattern <COND_OGE, V_CMP_GE_F16_e64, f16>;
+def : FCMP_Pattern <COND_OLT, V_CMP_LT_F16_e64, f16>;
+def : FCMP_Pattern <COND_OLE, V_CMP_LE_F16_e64, f16>;
+
+
def : FCMP_Pattern <COND_UEQ, V_CMP_NLG_F32_e64, f32>;
def : FCMP_Pattern <COND_UNE, V_CMP_NEQ_F32_e64, f32>;
def : FCMP_Pattern <COND_UGT, V_CMP_NLE_F32_e64, f32>;
@@ -670,6 +689,13 @@ def : FCMP_Pattern <COND_UGE, V_CMP_NLT_F64_e64, f64>;
def : FCMP_Pattern <COND_ULT, V_CMP_NGE_F64_e64, f64>;
def : FCMP_Pattern <COND_ULE, V_CMP_NGT_F64_e64, f64>;
+def : FCMP_Pattern <COND_UEQ, V_CMP_NLG_F16_e64, f16>;
+def : FCMP_Pattern <COND_UNE, V_CMP_NEQ_F16_e64, f16>;
+def : FCMP_Pattern <COND_UGT, V_CMP_NLE_F16_e64, f16>;
+def : FCMP_Pattern <COND_UGE, V_CMP_NLT_F16_e64, f16>;
+def : FCMP_Pattern <COND_ULT, V_CMP_NGE_F16_e64, f16>;
+def : FCMP_Pattern <COND_ULE, V_CMP_NGT_F16_e64, f16>;
+
//===----------------------------------------------------------------------===//
// Target
//===----------------------------------------------------------------------===//
OpenPOWER on IntegriCloud