diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/VOP3Instructions.td')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/VOP3Instructions.td | 118 |
1 files changed, 98 insertions, 20 deletions
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 92ed0706dc0..8c3c8dffcc3 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -53,6 +53,46 @@ class getVOP3PModPat<VOPProfile P, SDPatternOperator node> { ret1)); } +class getVOP3OpSelPat<VOPProfile P, SDPatternOperator node> { + list<dag> ret3 = [(set P.DstVT:$vdst, + (node (P.Src0VT !if(P.HasClamp, (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp), + (VOP3OpSel P.Src0VT:$src0, i32:$src0_modifiers))), + (P.Src1VT (VOP3OpSel P.Src1VT:$src1, i32:$src1_modifiers)), + (P.Src2VT (VOP3OpSel P.Src2VT:$src2, i32:$src2_modifiers))))]; + + list<dag> ret2 = [(set P.DstVT:$vdst, + (node !if(P.HasClamp, (P.Src0VT (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)), + (P.Src0VT (VOP3OpSel P.Src0VT:$src0, i32:$src0_modifiers))), + (P.Src1VT (VOP3OpSel P.Src1VT:$src1, i32:$src1_modifiers))))]; + + list<dag> ret1 = [(set P.DstVT:$vdst, + (node (P.Src0VT (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))]; + + list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3, + !if(!eq(P.NumSrcArgs, 2), ret2, + ret1)); +} + +class getVOP3OpSelModPat<VOPProfile P, SDPatternOperator node> { + list<dag> ret3 = [(set P.DstVT:$vdst, + (node (P.Src0VT !if(P.HasClamp, (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp), + (VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers))), + (P.Src1VT (VOP3OpSelMods P.Src1VT:$src1, i32:$src1_modifiers)), + (P.Src2VT (VOP3OpSelMods P.Src2VT:$src2, i32:$src2_modifiers))))]; + + list<dag> ret2 = [(set P.DstVT:$vdst, + (node !if(P.HasClamp, (P.Src0VT (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)), + (P.Src0VT (VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers))), + (P.Src1VT (VOP3OpSelMods P.Src1VT:$src1, i32:$src1_modifiers))))]; + + list<dag> ret1 = [(set P.DstVT:$vdst, + (node (P.Src0VT (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))]; + + list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3, + !if(!eq(P.NumSrcArgs, 2), ret2, + ret1)); +} + class getVOP3Pat<VOPProfile P, SDPatternOperator node> { list<dag> ret3 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2))]; list<dag> ret2 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]; @@ -67,6 +107,16 @@ class VOP3Inst<string OpName, VOPProfile P, SDPatternOperator node = null_frag, !if(P.HasModifiers, getVOP3ModPat<P, node>.ret, getVOP3Pat<P, node>.ret), VOP3Only>; +class VOP3OpSelInst<string OpName, VOPProfile P, SDPatternOperator node = null_frag> : + VOP3_Pseudo<OpName, P, + !if(isFloatType<P.Src0VT>.ret, + getVOP3OpSelModPat<P, node>.ret, + getVOP3OpSelPat<P, node>.ret), + 1, 0, 1> { + + let AsmMatchConverter = "cvtVOP3OpSel"; +} + // Special case for v_div_fmas_{f32|f64}, since it seems to be the // only VOP instruction that implicitly reads VCC. let Asm64 = " $vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$omod" in { @@ -93,6 +143,11 @@ class VOP3_Profile<VOPProfile P> : VOPProfile<P.ArgVT> { let Asm64 = " " # P.Asm64; } +class VOP3OpSel_Profile<VOPProfile P> : VOP3_Profile<P> { + let HasClamp = 1; + let HasOpSel = 1; +} + class VOP3b_Profile<ValueType vt> : VOPProfile<[vt, vt, vt, vt]> { // v_div_scale_{f32|f64} do not support input modifiers. let HasModifiers = 0; @@ -303,7 +358,7 @@ defm: Ternary_i16_Pats<mul, add, V_MAD_I16, sext>; } // End Predicates = [Has16BitInsts] let SubtargetPredicate = isGFX9 in { -def V_PACK_B32_F16 : VOP3Inst <"v_pack_b32_f16", VOP3_Profile<VOP_B32_F16_F16>>; +def V_PACK_B32_F16 : VOP3OpSelInst <"v_pack_b32_f16", VOP3OpSel_Profile<VOP_B32_F16_F16>>; def V_LSHL_ADD_U32 : VOP3Inst <"v_lshl_add_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>; def V_ADD_LSHL_U32 : VOP3Inst <"v_add_lshl_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>; def V_ADD3_U32 : VOP3Inst <"v_add3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>; @@ -313,17 +368,26 @@ def V_OR3_B32 : VOP3Inst <"v_or3_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>; def V_XAD_U32 : VOP3Inst <"v_xad_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>; -def V_MED3_F16 : VOP3Inst <"v_med3_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, AMDGPUfmed3>; -def V_MED3_I16 : VOP3Inst <"v_med3_i16", VOP3_Profile<VOP_I16_I16_I16_I16>, AMDGPUsmed3>; -def V_MED3_U16 : VOP3Inst <"v_med3_u16", VOP3_Profile<VOP_I16_I16_I16_I16>, AMDGPUumed3>; +def V_MED3_F16 : VOP3OpSelInst <"v_med3_f16", VOP3OpSel_Profile<VOP_F16_F16_F16_F16>, AMDGPUfmed3>; +def V_MED3_I16 : VOP3OpSelInst <"v_med3_i16", VOP3OpSel_Profile<VOP_I16_I16_I16_I16>, AMDGPUsmed3>; +def V_MED3_U16 : VOP3OpSelInst <"v_med3_u16", VOP3OpSel_Profile<VOP_I16_I16_I16_I16>, AMDGPUumed3>; -def V_MIN3_F16 : VOP3Inst <"v_min3_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, AMDGPUfmin3>; -def V_MIN3_I16 : VOP3Inst <"v_min3_i16", VOP3_Profile<VOP_I16_I16_I16_I16>, AMDGPUsmin3>; -def V_MIN3_U16 : VOP3Inst <"v_min3_u16", VOP3_Profile<VOP_I16_I16_I16_I16>, AMDGPUumin3>; +def V_MIN3_F16 : VOP3OpSelInst <"v_min3_f16", VOP3OpSel_Profile<VOP_F16_F16_F16_F16>, AMDGPUfmin3>; +def V_MIN3_I16 : VOP3OpSelInst <"v_min3_i16", VOP3OpSel_Profile<VOP_I16_I16_I16_I16>, AMDGPUsmin3>; +def V_MIN3_U16 : VOP3OpSelInst <"v_min3_u16", VOP3OpSel_Profile<VOP_I16_I16_I16_I16>, AMDGPUumin3>; -def V_MAX3_F16 : VOP3Inst <"v_max3_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, AMDGPUfmax3>; -def V_MAX3_I16 : VOP3Inst <"v_max3_i16", VOP3_Profile<VOP_I16_I16_I16_I16>, AMDGPUsmax3>; -def V_MAX3_U16 : VOP3Inst <"v_max3_u16", VOP3_Profile<VOP_I16_I16_I16_I16>, AMDGPUumax3>; +def V_MAX3_F16 : VOP3OpSelInst <"v_max3_f16", VOP3OpSel_Profile<VOP_F16_F16_F16_F16>, AMDGPUfmax3>; +def V_MAX3_I16 : VOP3OpSelInst <"v_max3_i16", VOP3OpSel_Profile<VOP_I16_I16_I16_I16>, AMDGPUsmax3>; +def V_MAX3_U16 : VOP3OpSelInst <"v_max3_u16", VOP3OpSel_Profile<VOP_I16_I16_I16_I16>, AMDGPUumax3>; + +def V_ADD_I16 : VOP3OpSelInst <"v_add_i16", VOP3OpSel_Profile<VOP_I16_I16_I16>>; +def V_SUB_I16 : VOP3OpSelInst <"v_sub_i16", VOP3OpSel_Profile<VOP_I16_I16_I16>>; + +def V_MAD_U32_U16 : VOP3OpSelInst <"v_mad_u32_u16", VOP3OpSel_Profile<VOP_I32_I16_I16_I32>>; +def V_MAD_I32_I16 : VOP3OpSelInst <"v_mad_i32_i16", VOP3OpSel_Profile<VOP_I32_I16_I16_I32>>; + +def V_CVT_PKNORM_I16_F16 : VOP3OpSelInst <"v_cvt_pknorm_i16_f16", VOP3OpSel_Profile<VOP_B32_F16_F16>>; +def V_CVT_PKNORM_U16_F16 : VOP3OpSelInst <"v_cvt_pknorm_u16_f16", VOP3OpSel_Profile<VOP_B32_F16_F16>>; } // End SubtargetPredicate = isGFX9 @@ -443,6 +507,11 @@ multiclass VOP3be_Real_vi<bits<10> op> { VOP3be_vi <op, !cast<VOP3_Pseudo>(NAME).Pfl>; } +multiclass VOP3OpSel_Real_gfx9<bits<10> op> { + def _vi : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>, + VOP3OpSel_gfx9 <op, !cast<VOP3_Pseudo>(NAME).Pfl>; +} + } // End AssemblerPredicates = [isVI], DecoderNamespace = "VI" defm V_MAD_U64_U32 : VOP3be_Real_vi <0x1E8>; @@ -527,18 +596,27 @@ defm V_ADD3_U32 : VOP3_Real_vi <0x1ff>; defm V_LSHL_OR_B32 : VOP3_Real_vi <0x200>; defm V_AND_OR_B32 : VOP3_Real_vi <0x201>; defm V_OR3_B32 : VOP3_Real_vi <0x202>; -defm V_PACK_B32_F16 : VOP3_Real_vi <0x2a0>; +defm V_PACK_B32_F16 : VOP3OpSel_Real_gfx9 <0x2a0>; defm V_XAD_U32 : VOP3_Real_vi <0x1f3>; -defm V_MIN3_F16 : VOP3_Real_vi <0x1f4>; -defm V_MIN3_I16 : VOP3_Real_vi <0x1f5>; -defm V_MIN3_U16 : VOP3_Real_vi <0x1f6>; +defm V_MIN3_F16 : VOP3OpSel_Real_gfx9 <0x1f4>; +defm V_MIN3_I16 : VOP3OpSel_Real_gfx9 <0x1f5>; +defm V_MIN3_U16 : VOP3OpSel_Real_gfx9 <0x1f6>; + +defm V_MAX3_F16 : VOP3OpSel_Real_gfx9 <0x1f7>; +defm V_MAX3_I16 : VOP3OpSel_Real_gfx9 <0x1f8>; +defm V_MAX3_U16 : VOP3OpSel_Real_gfx9 <0x1f9>; + +defm V_MED3_F16 : VOP3OpSel_Real_gfx9 <0x1fa>; +defm V_MED3_I16 : VOP3OpSel_Real_gfx9 <0x1fb>; +defm V_MED3_U16 : VOP3OpSel_Real_gfx9 <0x1fc>; + +defm V_ADD_I16 : VOP3OpSel_Real_gfx9 <0x29e>; +defm V_SUB_I16 : VOP3OpSel_Real_gfx9 <0x29f>; -defm V_MAX3_F16 : VOP3_Real_vi <0x1f7>; -defm V_MAX3_I16 : VOP3_Real_vi <0x1f8>; -defm V_MAX3_U16 : VOP3_Real_vi <0x1f9>; +defm V_MAD_U32_U16 : VOP3OpSel_Real_gfx9 <0x1f1>; +defm V_MAD_I32_I16 : VOP3OpSel_Real_gfx9 <0x1f2>; -defm V_MED3_F16 : VOP3_Real_vi <0x1fa>; -defm V_MED3_I16 : VOP3_Real_vi <0x1fb>; -defm V_MED3_U16 : VOP3_Real_vi <0x1fc>; +defm V_CVT_PKNORM_I16_F16 : VOP3OpSel_Real_gfx9 <0x299>; +defm V_CVT_PKNORM_U16_F16 : VOP3OpSel_Real_gfx9 <0x29a>; |

