diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 42 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h | 1 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstructions.td | 3 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 10 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.td | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 26 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/VOP1Instructions.td | 4 |
8 files changed, 84 insertions, 8 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 70eb3e1a82c..3350654daad 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -479,6 +479,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::FADD); setTargetDAGCombine(ISD::FSUB); setTargetDAGCombine(ISD::FNEG); + setTargetDAGCombine(ISD::FABS); } //===----------------------------------------------------------------------===// @@ -2968,6 +2969,45 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N, SDValue Neg = DAG.getNode(ISD::FNEG, SL, CvtSrc.getValueType(), CvtSrc); return DAG.getNode(ISD::FP_ROUND, SL, VT, Neg, N0.getOperand(1)); } + case ISD::FP16_TO_FP: { + // v_cvt_f32_f16 supports source modifiers on pre-VI targets without legal + // f16, but legalization of f16 fneg ends up pulling it out of the source. + // Put the fneg back as a legal source operation that can be matched later. + SDLoc SL(N); + + SDValue Src = N0.getOperand(0); + EVT SrcVT = Src.getValueType(); + + // fneg (fp16_to_fp x) -> fp16_to_fp (xor x, 0x8000) + SDValue IntFNeg = DAG.getNode(ISD::XOR, SL, SrcVT, Src, + DAG.getConstant(0x8000, SL, SrcVT)); + return DAG.getNode(ISD::FP16_TO_FP, SL, N->getValueType(0), IntFNeg); + } + default: + return SDValue(); + } +} + +SDValue AMDGPUTargetLowering::performFAbsCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + SDValue N0 = N->getOperand(0); + + if (!N0.hasOneUse()) + return SDValue(); + + switch (N0.getOpcode()) { + case ISD::FP16_TO_FP: { + assert(!Subtarget->has16BitInsts() && "should only see if f16 is illegal"); + SDLoc SL(N); + SDValue Src = N0.getOperand(0); + EVT SrcVT = Src.getValueType(); + + // fabs (fp16_to_fp x) -> fp16_to_fp (and x, 0x7fff) + SDValue IntFAbs = DAG.getNode(ISD::AND, SL, SrcVT, Src, + DAG.getConstant(0x7fff, SL, SrcVT)); + return DAG.getNode(ISD::FP16_TO_FP, SL, N->getValueType(0), IntFAbs); + } default: return SDValue(); } @@ -3080,6 +3120,8 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, return performSelectCombine(N, DCI); case ISD::FNEG: return performFNegCombine(N, DCI); + case ISD::FABS: + return performFAbsCombine(N, DCI); case AMDGPUISD::BFE_I32: case AMDGPUISD::BFE_U32: { assert(!N->getValueType(0).isVector() && diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index 880d571078b..58ac09f2698 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -85,6 +85,7 @@ protected: SDValue RHS, DAGCombinerInfo &DCI) const; SDValue performSelectCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performFNegCombine(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue performFAbsCombine(SDNode *N, DAGCombinerInfo &DCI) const; static EVT getEquivalentMemType(LLVMContext &Context, EVT VT); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index 004b10f5d20..7d2a52bccc8 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -637,6 +637,9 @@ def umax_oneuse : HasOneUseBinOp<umax>; def umin_oneuse : HasOneUseBinOp<umin>; def fminnum_oneuse : HasOneUseBinOp<fminnum>; def fmaxnum_oneuse : HasOneUseBinOp<fmaxnum>; +def and_oneuse : HasOneUseBinOp<and>; +def or_oneuse : HasOneUseBinOp<or>; +def xor_oneuse : HasOneUseBinOp<xor>; } // Properties = [SDNPCommutative, SDNPAssociative] def sub_oneuse : HasOneUseBinOp<sub>; diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp index 0c5bb0648a1..bdaa5aef83d 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp @@ -226,6 +226,8 @@ uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO, case 8: return getLit64Encoding(static_cast<uint64_t>(Imm), STI); case 2: + // FIXME Is this correct? What do inline immediates do on SI for f16 src + // which does not have f16 support? return getLit16Encoding(static_cast<uint16_t>(Imm), STI); default: llvm_unreachable("invalid operand size"); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index e718b459ec3..51006589b5c 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1824,7 +1824,8 @@ bool SIInstrInfo::isInlineConstant(const APInt &Imm) const { return AMDGPU::isInlinableLiteral64(Imm.getSExtValue(), ST.hasInv2PiInlineImm()); case 16: - return AMDGPU::isInlinableLiteral16(Imm.getSExtValue(), + return ST.has16BitInsts() && + AMDGPU::isInlinableLiteral16(Imm.getSExtValue(), ST.hasInv2PiInlineImm()); default: llvm_unreachable("invalid bitwidth"); @@ -1854,8 +1855,13 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO, } case 16: { if (isInt<16>(Imm) || isUInt<16>(Imm)) { + // A few special case instructions have 16-bit operands on subtargets + // where 16-bit instructions are not legal. + // TODO: Do the 32-bit immediates work? We shouldn't really need to handle + // constants in these cases int16_t Trunc = static_cast<int16_t>(Imm); - return AMDGPU::isInlinableLiteral16(Trunc, ST.hasInv2PiInlineImm()); + return ST.has16BitInsts() && + AMDGPU::isInlinableLiteral16(Trunc, ST.hasInv2PiInlineImm()); } return false; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index cf9b9c5a145..a691ef11a86 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -619,6 +619,8 @@ def SIOperand { def SRCMODS { int NONE = 0; int NEG = 1; + int ABS = 2; + int NEG_ABS = 3; } def DSTCLAMP { @@ -1132,6 +1134,8 @@ def VOP_F64_I32 : VOPProfile <[f64, i32, untyped, untyped]>; def VOP_I32_F32 : VOPProfile <[i32, f32, untyped, untyped]>; def VOP_I32_F64 : VOPProfile <[i32, f64, untyped, untyped]>; def VOP_I32_I32 : VOPProfile <[i32, i32, untyped, untyped]>; +def VOP_F16_F32 : VOPProfile <[f16, f32, untyped, untyped]>; +def VOP_F32_F16 : VOPProfile <[f32, f16, untyped, untyped]>; def VOP_F32_F32_F16 : VOPProfile <[f32, f32, f16, untyped]>; def VOP_F32_F32_F32 : VOPProfile <[f32, f32, f32, untyped]>; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 5cdd139a83f..ed0609da9be 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -430,9 +430,26 @@ def : Pat < } // End Predicates = [UnsafeFPMath] + +// f16_to_fp patterns +def : Pat < + (f32 (f16_to_fp i32:$src0)), + (V_CVT_F32_F16_e64 SRCMODS.NONE, $src0, DSTCLAMP.NONE, DSTOMOD.NONE) +>; + +def : Pat < + (f32 (f16_to_fp (and_oneuse i32:$src0, 0x7fff))), + (V_CVT_F32_F16_e64 SRCMODS.ABS, $src0, DSTCLAMP.NONE, DSTOMOD.NONE) +>; + +def : Pat < + (f32 (f16_to_fp (or_oneuse i32:$src0, 0x8000))), + (V_CVT_F32_F16_e64 SRCMODS.NEG_ABS, $src0, DSTCLAMP.NONE, DSTOMOD.NONE) +>; + def : Pat < - (f32 (fpextend f16:$src)), - (V_CVT_F32_F16_e32 $src) + (f32 (f16_to_fp (xor_oneuse i32:$src0, 0x8000))), + (V_CVT_F32_F16_e64 SRCMODS.NEG, $src0, DSTCLAMP.NONE, DSTOMOD.NONE) >; def : Pat < @@ -440,9 +457,10 @@ def : Pat < (V_CVT_F64_F32_e32 (V_CVT_F32_F16_e32 $src)) >; +// fp_to_fp16 patterns def : Pat < - (f16 (fpround f32:$src)), - (V_CVT_F16_F32_e32 $src) + (i32 (fp_to_f16 (f32 (VOP3Mods0 f32:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)))), + (V_CVT_F16_F32_e64 $src0_modifiers, f32:$src0, $clamp, $omod) >; def : Pat < diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index a15b9ceff2f..15abed4a76b 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -147,8 +147,8 @@ defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP_F32_I32, sint_to_fp>; defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP_F32_I32, uint_to_fp>; defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32, fp_to_uint>; defm V_CVT_I32_F32 : VOP1Inst <"v_cvt_i32_f32", VOP_I32_F32, fp_to_sint>; -defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_I32_F32, fp_to_f16>; -defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_I32, f16_to_fp>; +defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, fpround>; +defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, fpextend>; defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>; defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32_f32>; defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP_F32_I32>; |