diff options
author | Dmitry Preobrazhensky <dmitry.preobrazhensky@amd.com> | 2017-03-27 15:57:17 +0000 |
---|---|---|
committer | Dmitry Preobrazhensky <dmitry.preobrazhensky@amd.com> | 2017-03-27 15:57:17 +0000 |
commit | c512d44845a52b4ade20e42365ffd83889e62f85 (patch) | |
tree | 01ac0763044d9f03324b0d7faf2646e38302ded3 /llvm/lib | |
parent | 862a41270fbfdb7ee94f639ab1eaa3da8e4cdf99 (diff) | |
download | bcm5719-llvm-c512d44845a52b4ade20e42365ffd83889e62f85.tar.gz bcm5719-llvm-c512d44845a52b4ade20e42365ffd83889e62f85.zip |
[AMDGPU][MC] Fix for Bug 28207 + LIT tests
Enabled clamp and omod for v_cvt_* opcodes which have src0 of an integer type
Reviewers: vpykhtin, arsenm
Differential Revision: https://reviews.llvm.org/D31327
llvm-svn: 298852
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 15 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 33 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.td | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/VOP1Instructions.td | 54 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/VOPInstructions.td | 8 |
5 files changed, 95 insertions, 17 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index ca695c1d53c..fc3ea67fad0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -162,6 +162,9 @@ private: SDValue &Clamp, SDValue &Omod) const; + bool SelectVOP3OMods(SDValue In, SDValue &Src, + SDValue &Clamp, SDValue &Omod) const; + bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods, SDValue &Clamp) const; @@ -1669,6 +1672,18 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, return SelectVOP3Mods(In, Src, SrcMods); } +bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src, + SDValue &Clamp, SDValue &Omod) const { + Src = In; + + SDLoc DL(In); + // FIXME: Handle Clamp and Omod + Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32); + Omod = CurDAG->getTargetConstant(0, DL, MVT::i32); + + return true; +} + bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const { unsigned Mods = 0; diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 0d4f379c6b9..a352c6f0a5e 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1018,11 +1018,13 @@ public: void cvtId(MCInst &Inst, const OperandVector &Operands); void cvtVOP3_2_mod(MCInst &Inst, const OperandVector &Operands); + void cvtVOP3_omod(MCInst &Inst, const OperandVector &Operands); void cvtVOP3Impl(MCInst &Inst, const OperandVector &Operands, OptionalImmIndexMap &OptionalIdx); void cvtVOP3(MCInst &Inst, const OperandVector &Operands); + void cvtVOP3OMod(MCInst &Inst, const OperandVector &Operands); void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); void cvtMIMG(MCInst &Inst, const OperandVector &Operands); @@ -3678,6 +3680,15 @@ void AMDGPUAsmParser::cvtVOP3_2_mod(MCInst &Inst, const OperandVector &Operands) } } +void AMDGPUAsmParser::cvtVOP3_omod(MCInst &Inst, const OperandVector &Operands) { + uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; + if (TSFlags & SIInstrFlags::VOP3) { + cvtVOP3OMod(Inst, Operands); + } else { + cvtId(Inst, Operands); + } +} + static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { // 1. This operand is input modifiers return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS @@ -3737,6 +3748,28 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { } } +void AMDGPUAsmParser::cvtVOP3OMod(MCInst &Inst, const OperandVector &Operands) { + OptionalImmIndexMap OptionalIdx; + + unsigned I = 1; + const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); + for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { + ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); + } + + for (unsigned E = Operands.size(); I != E; ++I) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); + if (Op.isMod()) { + OptionalIdx[Op.getImmTy()] = I; + } else { + Op.addRegOrImmOperands(Inst, 1); + } + } + + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); +} + void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { OptionalImmIndexMap OptIdx; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 561feb98d59..c1eab1fa494 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -659,6 +659,8 @@ def VOP3NoMods : ComplexPattern<untyped, 2, "SelectVOP3NoMods">; // VOP3Mods, but the input source is known to never be NaN. def VOP3Mods_nnan : ComplexPattern<fAny, 2, "SelectVOP3Mods_NNaN">; +def VOP3OMods : ComplexPattern<untyped, 3, "SelectVOP3OMods">; + def VOP3PMods : ComplexPattern<untyped, 2, "SelectVOP3PMods">; def VOP3PMods0 : ComplexPattern<untyped, 3, "SelectVOP3PMods0">; diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index b4524a6946e..d833e74b754 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -85,10 +85,17 @@ class VOP1_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : } class getVOP1Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies { - list<dag> ret = !if(P.HasModifiers, - [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, - i32:$src0_modifiers, i1:$clamp, i32:$omod))))], - [(set P.DstVT:$vdst, (node P.Src0VT:$src0))]); + list<dag> ret = + !if(P.HasModifiers, + [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, + i32:$src0_modifiers, + i1:$clamp, i32:$omod))))], + !if(P.HasOMod, + [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3OMods P.Src0VT:$src0, + i1:$clamp, i32:$omod))))], + [(set P.DstVT:$vdst, (node P.Src0VT:$src0))] + ) + ); } multiclass VOP1Inst <string opName, VOPProfile P, @@ -98,6 +105,23 @@ multiclass VOP1Inst <string opName, VOPProfile P, def _sdwa : VOP1_SDWA_Pseudo <opName, P>; } +// Special profile for instructions which have clamp +// and output modifiers (but have no input modifiers) +class VOPProfileI2F<ValueType dstVt, ValueType srcVt> : + VOPProfile<[dstVt, srcVt, untyped, untyped]> { + + let Ins64 = (ins Src0RC64:$src0, clampmod:$clamp, omod:$omod); + let Asm64 = "$vdst, $src0$clamp$omod"; + + let HasModifiers = 0; + let HasClamp = 1; + let HasOMod = 1; +} + +def VOP1_F64_I32 : VOPProfileI2F <f64, i32>; +def VOP1_F32_I32 : VOPProfileI2F <f32, i32>; +def VOP1_F16_I16 : VOPProfileI2F <f16, i16>; + //===----------------------------------------------------------------------===// // VOP1 Instructions //===----------------------------------------------------------------------===// @@ -144,24 +168,24 @@ def V_READFIRSTLANE_B32 : let SchedRW = [WriteQuarterRate32] in { defm V_CVT_I32_F64 : VOP1Inst <"v_cvt_i32_f64", VOP_I32_F64, fp_to_sint>; -defm V_CVT_F64_I32 : VOP1Inst <"v_cvt_f64_i32", VOP_F64_I32, sint_to_fp>; -defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP_F32_I32, sint_to_fp>; -defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP_F32_I32, uint_to_fp>; +defm V_CVT_F64_I32 : VOP1Inst <"v_cvt_f64_i32", VOP1_F64_I32, sint_to_fp>; +defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP1_F32_I32, sint_to_fp>; +defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP1_F32_I32, uint_to_fp>; defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32, fp_to_uint>; defm V_CVT_I32_F32 : VOP1Inst <"v_cvt_i32_f32", VOP_I32_F32, fp_to_sint>; defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, fpround>; defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, fpextend>; defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>; defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32_f32>; -defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP_F32_I32>; +defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP1_F32_I32>; defm V_CVT_F32_F64 : VOP1Inst <"v_cvt_f32_f64", VOP_F32_F64, fpround>; defm V_CVT_F64_F32 : VOP1Inst <"v_cvt_f64_f32", VOP_F64_F32, fpextend>; -defm V_CVT_F32_UBYTE0 : VOP1Inst <"v_cvt_f32_ubyte0", VOP_F32_I32, AMDGPUcvt_f32_ubyte0>; -defm V_CVT_F32_UBYTE1 : VOP1Inst <"v_cvt_f32_ubyte1", VOP_F32_I32, AMDGPUcvt_f32_ubyte1>; -defm V_CVT_F32_UBYTE2 : VOP1Inst <"v_cvt_f32_ubyte2", VOP_F32_I32, AMDGPUcvt_f32_ubyte2>; -defm V_CVT_F32_UBYTE3 : VOP1Inst <"v_cvt_f32_ubyte3", VOP_F32_I32, AMDGPUcvt_f32_ubyte3>; +defm V_CVT_F32_UBYTE0 : VOP1Inst <"v_cvt_f32_ubyte0", VOP1_F32_I32, AMDGPUcvt_f32_ubyte0>; +defm V_CVT_F32_UBYTE1 : VOP1Inst <"v_cvt_f32_ubyte1", VOP1_F32_I32, AMDGPUcvt_f32_ubyte1>; +defm V_CVT_F32_UBYTE2 : VOP1Inst <"v_cvt_f32_ubyte2", VOP1_F32_I32, AMDGPUcvt_f32_ubyte2>; +defm V_CVT_F32_UBYTE3 : VOP1Inst <"v_cvt_f32_ubyte3", VOP1_F32_I32, AMDGPUcvt_f32_ubyte3>; defm V_CVT_U32_F64 : VOP1Inst <"v_cvt_u32_f64", VOP_I32_F64, fp_to_uint>; -defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP_F64_I32, uint_to_fp>; +defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP1_F64_I32, uint_to_fp>; } // End SchedRW = [WriteQuarterRate32] defm V_FRACT_F32 : VOP1Inst <"v_fract_f32", VOP_F32_F32, AMDGPUfract>; @@ -299,8 +323,8 @@ defm V_EXP_LEGACY_F32 : VOP1Inst <"v_exp_legacy_f32", VOP_F32_F32>; let SubtargetPredicate = isVI in { -defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP_F16_I16, uint_to_fp>; -defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP_F16_I16, sint_to_fp>; +defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP1_F16_I16, uint_to_fp>; +defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP1_F16_I16, sint_to_fp>; defm V_CVT_U16_F16 : VOP1Inst <"v_cvt_u16_f16", VOP_I16_F16, fp_to_uint>; defm V_CVT_I16_F16 : VOP1Inst <"v_cvt_i16_f16", VOP_I16_F16, fp_to_sint>; defm V_RCP_F16 : VOP1Inst <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>; diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td index ceb78b63d68..69906c419db 100644 --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -107,8 +107,12 @@ class VOP3_Pseudo <string opName, VOPProfile P, list<dag> pattern = [], let AsmVariantName = AMDGPUAsmVariants.VOP3; let AsmMatchConverter = !if(!eq(VOP3Only,1), - !if(!and(P.IsPacked, isVOP3P), "cvtVOP3P", "cvtVOP3"), - !if(!eq(P.HasModifiers, 1), "cvtVOP3_2_mod", "")); + !if(!and(P.IsPacked, isVOP3P), "cvtVOP3P", "cvtVOP3"), + !if(!eq(P.HasModifiers, 1), + "cvtVOP3_2_mod", + !if(!eq(P.HasOMod, 1), "cvtVOP3OMod", "") + ) + ); VOPProfile Pfl = P; } |