diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 28 | ||||
-rw-r--r-- | llvm/lib/Target/R600/AMDGPUISelLowering.cpp | 16 | ||||
-rw-r--r-- | llvm/lib/Target/R600/SIInstrInfo.td | 16 | ||||
-rw-r--r-- | llvm/lib/Target/R600/SIInstructions.td | 6 |
4 files changed, 62 insertions, 4 deletions
diff --git a/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp index 85d0f9dd691..37071bc3f1c 100644 --- a/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp @@ -86,6 +86,7 @@ private: bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); SDNode *SelectADD_SUB_I64(SDNode *N); + SDNode *SelectDIV_SCALE(SDNode *N); // Include the pieces autogenerated from the target description. #include "AMDGPUGenDAGISel.inc" @@ -454,6 +455,9 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { PackedOffsetWidth); } + case AMDGPUISD::DIV_SCALE: { + return SelectDIV_SCALE(N); + } } return SelectCode(N); } @@ -695,6 +699,30 @@ SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args); } +SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { + SDLoc SL(N); + EVT VT = N->getValueType(0); + + assert(VT == MVT::f32 || VT == MVT::f64); + + unsigned Opc + = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32; + + const SDValue Zero = CurDAG->getTargetConstant(0, MVT::i32); + + SDValue Ops[] = { + N->getOperand(0), + N->getOperand(1), + N->getOperand(2), + Zero, + Zero, + Zero, + Zero + }; + + return CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops); +} + void AMDGPUDAGToDAGISel::PostprocessISelDAG() { const AMDGPUTargetLowering& Lowering = *static_cast<const AMDGPUTargetLowering*>(getTargetLowering()); diff --git a/llvm/lib/Target/R600/AMDGPUISelLowering.cpp b/llvm/lib/Target/R600/AMDGPUISelLowering.cpp index 6ff1703c919..ca8d0a1626b 100644 --- a/llvm/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/R600/AMDGPUISelLowering.cpp @@ -771,9 +771,21 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(AMDGPUISD::CLAMP, DL, VT, Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); - case Intrinsic::AMDGPU_div_scale: + case Intrinsic::AMDGPU_div_scale: { + // 3rd parameter required to be a constant. + const ConstantSDNode *Param = dyn_cast<ConstantSDNode>(Op.getOperand(3)); + if (!Param) + return DAG.getUNDEF(VT); + + // Translate to the operands expected by the machine instruction. The + // first parameter must be the same as the first instruction. + SDValue Numerator = Op.getOperand(1); + SDValue Denominator = Op.getOperand(2); + SDValue Src0 = Param->isAllOnesValue() ? Numerator : Denominator; + return DAG.getNode(AMDGPUISD::DIV_SCALE, DL, VT, - Op.getOperand(1), Op.getOperand(2)); + Src0, Denominator, Numerator); + } case Intrinsic::AMDGPU_div_fmas: return DAG.getNode(AMDGPUISD::DIV_FMAS, DL, VT, diff --git a/llvm/lib/Target/R600/SIInstrInfo.td b/llvm/lib/Target/R600/SIInstrInfo.td index c4994a250a8..dd9a6dc527f 100644 --- a/llvm/lib/Target/R600/SIInstrInfo.td +++ b/llvm/lib/Target/R600/SIInstrInfo.td @@ -433,6 +433,22 @@ class VOP3_64 <bits<9> op, string opName, list<dag> pattern> : VOP3 < opName#" $dst, $src0_modifiers, $src1_modifiers, $src2_modifiers, $clamp, $omod", pattern >, VOP <opName>; + +class VOP3b_Helper <bits<9> op, RegisterClass vrc, RegisterClass arc, + string opName, list<dag> pattern> : VOP3 < + op, (outs vrc:$dst0, SReg_64:$dst1), + (ins arc:$src0, arc:$src1, arc:$src2, + InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg), + opName#" $dst0, $dst1, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", pattern +>, VOP <opName>; + + +class VOP3b_64 <bits<9> op, string opName, list<dag> pattern> : + VOP3b_Helper <op, VReg_64, VSrc_64, opName, pattern>; + +class VOP3b_32 <bits<9> op, string opName, list<dag> pattern> : + VOP3b_Helper <op, VReg_32, VSrc_32, opName, pattern>; + //===----------------------------------------------------------------------===// // Vector I/O classes //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/R600/SIInstructions.td b/llvm/lib/Target/R600/SIInstructions.td index f888b8e6e05..6a29dbe77dd 100644 --- a/llvm/lib/Target/R600/SIInstructions.td +++ b/llvm/lib/Target/R600/SIInstructions.td @@ -1455,8 +1455,10 @@ defm V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>; } // isCommutable = 1 -defm V_DIV_SCALE_F32 : VOP3_32 <0x0000016d, "V_DIV_SCALE_F32", []>; -def V_DIV_SCALE_F64 : VOP3_64 <0x0000016e, "V_DIV_SCALE_F64", []>; +def V_DIV_SCALE_F32 : VOP3b_32 <0x0000016d, "V_DIV_SCALE_F32", []>; + +// Double precision division pre-scale. +def V_DIV_SCALE_F64 : VOP3b_64 <0x0000016e, "V_DIV_SCALE_F64", []>; defm V_DIV_FMAS_F32 : VOP3_32 <0x0000016f, "V_DIV_FMAS_F32", [(set f32:$dst, (AMDGPUdiv_fmas f32:$src0, f32:$src1, f32:$src2))] |