diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 24 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h | 1 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/R600ISelLowering.cpp | 21 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/R600Instructions.td | 8 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 25 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.h | 1 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.td | 2 |
8 files changed, 28 insertions, 56 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 187137be4b1..d0f12678204 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2898,28 +2898,6 @@ SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N, SN->getBasePtr(), SN->getMemOperand()); } -SDValue AMDGPUTargetLowering::performClampCombine(SDNode *N, - DAGCombinerInfo &DCI) const { - ConstantFPSDNode *CSrc = dyn_cast<ConstantFPSDNode>(N->getOperand(0)); - if (!CSrc) - return SDValue(); - - const APFloat &F = CSrc->getValueAPF(); - APFloat Zero = APFloat::getZero(F.getSemantics()); - APFloat::cmpResult Cmp0 = F.compare(Zero); - if (Cmp0 == APFloat::cmpLessThan || - (Cmp0 == APFloat::cmpUnordered && Subtarget->enableDX10Clamp())) { - return DCI.DAG.getConstantFP(Zero, SDLoc(N), N->getValueType(0)); - } - - APFloat One(F.getSemantics(), "1.0"); - APFloat::cmpResult Cmp1 = F.compare(One); - if (Cmp1 == APFloat::cmpGreaterThan) - return DCI.DAG.getConstantFP(One, SDLoc(N), N->getValueType(0)); - - return SDValue(CSrc, 0); -} - // FIXME: This should go in generic DAG combiner with an isTruncateFree check, // but isTruncateFree is inaccurate for i16 now because of SALU vs. VALU // issues. @@ -3960,8 +3938,6 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, return performLoadCombine(N, DCI); case ISD::STORE: return performStoreCombine(N, DCI); - case AMDGPUISD::CLAMP: - return performClampCombine(N, DCI); case AMDGPUISD::RCP: { if (const auto *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0))) { // XXX - Should this flush denormals? diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index 45c89235ac1..8d2183f411a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -78,7 +78,6 @@ protected: bool shouldCombineMemoryType(EVT VT) const; SDValue performLoadCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const; - SDValue performClampCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performAssertSZExtCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue splitBinaryBitConstantOpImpl(DAGCombinerInfo &DCI, const SDLoc &SL, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td index 65c483d85c5..8ac2758e4b5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -168,8 +168,6 @@ def AMDGPUfmul_legacy : SDNode<"AMDGPUISD::FMUL_LEGACY", SDTFPBinOp, [SDNPCommutative, SDNPAssociative] >; -def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPUnaryOp>; - // out = min(a, b) a and b are floats, where a nan comparison fails. def AMDGPUfmin_legacy : SDNode<"AMDGPUISD::FMIN_LEGACY", SDTFPBinOp, [] diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp index 502d754af48..75cefdc965c 100644 --- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -287,13 +287,6 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); } break; - case AMDGPU::CLAMP_R600: { - MachineInstr *NewMI = TII->buildDefaultInstruction( - *BB, I, AMDGPU::MOV, MI.getOperand(0).getReg(), - MI.getOperand(1).getReg()); - TII->addFlag(*NewMI, 0, MO_FLAG_CLAMP); - break; - } case AMDGPU::FABS_R600: { MachineInstr *NewMI = TII->buildDefaultInstruction( @@ -2180,20 +2173,6 @@ SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node, if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG)) return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops); } - } else if (Opcode == AMDGPU::CLAMP_R600) { - SDValue Src = Node->getOperand(0); - if (!Src.isMachineOpcode() || - !TII->hasInstrModifiers(Src.getMachineOpcode())) - return Node; - int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(), - AMDGPU::OpName::clamp); - if (ClampIdx < 0) - return Node; - SDLoc DL(Node); - std::vector<SDValue> Ops(Src->op_begin(), Src->op_end()); - Ops[ClampIdx - 1] = DAG.getTargetConstant(1, DL, MVT::i32); - return DAG.getMachineNode(Src.getMachineOpcode(), DL, - Node->getVTList(), Ops); } else { if (!TII->hasInstrModifiers(Opcode)) return Node; diff --git a/llvm/lib/Target/AMDGPU/R600Instructions.td b/llvm/lib/Target/AMDGPU/R600Instructions.td index 2c628d0466d..7dc5d8c4d80 100644 --- a/llvm/lib/Target/AMDGPU/R600Instructions.td +++ b/llvm/lib/Target/AMDGPU/R600Instructions.td @@ -663,13 +663,6 @@ let isCodeGenOnly = 1, isPseudo = 1 in { let usesCustomInserter = 1 in { -class CLAMP <RegisterClass rc> : AMDGPUShaderInst < - (outs rc:$dst), - (ins rc:$src0), - "CLAMP $dst, $src0", - [(set f32:$dst, (AMDGPUclamp f32:$src0))] ->; - class FABS <RegisterClass rc> : AMDGPUShaderInst < (outs rc:$dst), (ins rc:$src0), @@ -1194,7 +1187,6 @@ class COS_Common <bits<11> inst> : R600_1OP < let Itinerary = TransALU; } -def CLAMP_R600 : CLAMP <R600_Reg32>; def FABS_R600 : FABS<R600_Reg32>; def FNEG_R600 : FNEG<R600_Reg32>; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 0a84f5371f5..13f98b7f87e 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -7087,6 +7087,29 @@ SDValue SITargetLowering::performCvtF32UByteNCombine(SDNode *N, return SDValue(); } +SDValue SITargetLowering::performClampCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + ConstantFPSDNode *CSrc = dyn_cast<ConstantFPSDNode>(N->getOperand(0)); + if (!CSrc) + return SDValue(); + + const APFloat &F = CSrc->getValueAPF(); + APFloat Zero = APFloat::getZero(F.getSemantics()); + APFloat::cmpResult Cmp0 = F.compare(Zero); + if (Cmp0 == APFloat::cmpLessThan || + (Cmp0 == APFloat::cmpUnordered && Subtarget->enableDX10Clamp())) { + return DCI.DAG.getConstantFP(Zero, SDLoc(N), N->getValueType(0)); + } + + APFloat One(F.getSemantics(), "1.0"); + APFloat::cmpResult Cmp1 = F.compare(One); + if (Cmp1 == APFloat::cmpGreaterThan) + return DCI.DAG.getConstantFP(One, SDLoc(N), N->getValueType(0)); + + return SDValue(CSrc, 0); +} + + SDValue SITargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { switch (N->getOpcode()) { @@ -7179,6 +7202,8 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N, return performFMed3Combine(N, DCI); case AMDGPUISD::CVT_PKRTZ_F16_F32: return performCvtPkRTZCombine(N, DCI); + case AMDGPUISD::CLAMP: + return performClampCombine(N, DCI); case ISD::SCALAR_TO_VECTOR: { SelectionDAG &DAG = DCI.DAG; EVT VT = N->getValueType(0); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index 3acc91db287..15785761aa9 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -131,6 +131,7 @@ class SITargetLowering final : public AMDGPUTargetLowering { SDValue performFSubCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performSetCCCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performCvtF32UByteNCombine(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue performClampCombine(SDNode *N, DAGCombinerInfo &DCI) const; bool isLegalFlatAddressingMode(const AddrMode &AM) const; bool isLegalGlobalAddressingMode(const AddrMode &AM) const; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index ef7fbfba416..fd3110bb9f7 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -33,6 +33,8 @@ def SIEncodingFamily { // SI DAG Nodes //===----------------------------------------------------------------------===// +def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPUnaryOp>; + def SIload_constant : SDNode<"AMDGPUISD::LOAD_CONSTANT", SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisVT<1, v4i32>, SDTCisVT<2, i32>]>, [SDNPMayLoad, SDNPMemOperand] |