diff options
author | Dmitry Preobrazhensky <dmitry.preobrazhensky@amd.com> | 2017-08-16 13:51:56 +0000 |
---|---|---|
committer | Dmitry Preobrazhensky <dmitry.preobrazhensky@amd.com> | 2017-08-16 13:51:56 +0000 |
commit | ff64aa514b707cb42013f9389bac7535c7d2cef4 (patch) | |
tree | 56f9747c5149ac3acf3c242a141bd91625743b4b /llvm/lib/Target/AMDGPU/VOP3Instructions.td | |
parent | c63f93a197a14d263c49ea65e2c3df7af4ea3efc (diff) | |
download | bcm5719-llvm-ff64aa514b707cb42013f9389bac7535c7d2cef4.tar.gz bcm5719-llvm-ff64aa514b707cb42013f9389bac7535c7d2cef4.zip |
[AMDGPU][MC][GFX9] Added integer clamping support for VOP3 opcodes
See Bug 34152: https://bugs.llvm.org//show_bug.cgi?id=34152
Reviewers: SamWot, artem.tamazov, arsenm
Differential Revision: https://reviews.llvm.org/D36674
llvm-svn: 311006
Diffstat (limited to 'llvm/lib/Target/AMDGPU/VOP3Instructions.td')
-rw-r--r-- | llvm/lib/Target/AMDGPU/VOP3Instructions.td | 104 |
1 files changed, 84 insertions, 20 deletions
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index e81e656da6c..cd516b771f8 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -102,10 +102,25 @@ class getVOP3Pat<VOPProfile P, SDPatternOperator node> { ret1)); } +class getVOP3ClampPat<VOPProfile P, SDPatternOperator node> { + list<dag> ret3 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2, i1:$clamp))]; + list<dag> ret2 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, i1:$clamp))]; + list<dag> ret1 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, i1:$clamp))]; + list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3, + !if(!eq(P.NumSrcArgs, 2), ret2, + ret1)); +} + class VOP3Inst<string OpName, VOPProfile P, SDPatternOperator node = null_frag, bit VOP3Only = 0> : VOP3_Pseudo<OpName, P, - !if(P.HasModifiers, getVOP3ModPat<P, node>.ret, getVOP3Pat<P, node>.ret), - VOP3Only>; + !if(P.HasModifiers, + getVOP3ModPat<P, node>.ret, + !if(P.HasIntClamp, + getVOP3ClampPat<P, node>.ret, + getVOP3Pat<P, node>.ret)), + VOP3Only> { + let IntClamp = P.HasIntClamp; +} class VOP3OpSelInst<string OpName, VOPProfile P, SDPatternOperator node = null_frag> : VOP3_Pseudo<OpName, P, @@ -143,6 +158,14 @@ class VOP3_Profile<VOPProfile P> : VOPProfile<P.ArgVT> { let Asm64 = " " # P.Asm64; } +class VOP3Clamp_Profile<VOPProfile P> : VOPProfile<P.ArgVT> { + let HasClamp = 1; + + // FIXME: Hack to stop printing _e64 + let Outs64 = (outs DstRC.RegClass:$vdst); + let Asm64 = " " # getAsm64<HasDst, NumSrcArgs, HasIntClamp, HasModifiers, HasOMod, DstVT>.ret; +} + class VOP3OpSel_Profile<VOPProfile P> : VOP3_Profile<P> { let HasClamp = 1; let HasOpSel = 1; @@ -167,11 +190,13 @@ def VOP3b_F64_I1_F64_F64_F64 : VOP3b_Profile<f64> { } def VOP3b_I64_I1_I32_I32_I64 : VOPProfile<[i64, i32, i32, i64]> { + let HasClamp = 1; + // FIXME: Hack to stop printing _e64 let DstRC = RegisterOperand<VReg_64>; let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst); - let Asm64 = " $vdst, $sdst, $src0, $src1, $src2"; + let Asm64 = " $vdst, $sdst, $src0, $src1, $src2$clamp"; } //===----------------------------------------------------------------------===// @@ -244,8 +269,8 @@ let isCommutable = 1 in { def V_MAD_LEGACY_F32 : VOP3Inst <"v_mad_legacy_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>; def V_MAD_F32 : VOP3Inst <"v_mad_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, fmad>; -def V_MAD_I32_I24 : VOP3Inst <"v_mad_i32_i24", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUmad_i24>; -def V_MAD_U32_U24 : VOP3Inst <"v_mad_u32_u24", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUmad_u24>; +def V_MAD_I32_I24 : VOP3Inst <"v_mad_i32_i24", VOP3Clamp_Profile<VOP_I32_I32_I32_I32>>; +def V_MAD_U32_U24 : VOP3Inst <"v_mad_u32_u24", VOP3Clamp_Profile<VOP_I32_I32_I32_I32>>; def V_FMA_F32 : VOP3Inst <"v_fma_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, fma>; def V_FMA_F64 : VOP3Inst <"v_fma_f64", VOP3_Profile<VOP_F64_F64_F64_F64>, fma>; def V_LERP_U8 : VOP3Inst <"v_lerp_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_lerp>; @@ -305,10 +330,10 @@ def V_MAX3_U32 : VOP3Inst <"v_max3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDG def V_MED3_F32 : VOP3Inst <"v_med3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmed3>; def V_MED3_I32 : VOP3Inst <"v_med3_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUsmed3>; def V_MED3_U32 : VOP3Inst <"v_med3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUumed3>; -def V_SAD_U8 : VOP3Inst <"v_sad_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_sad_u8>; -def V_SAD_HI_U8 : VOP3Inst <"v_sad_hi_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_sad_hi_u8>; -def V_SAD_U16 : VOP3Inst <"v_sad_u16", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_sad_u16>; -def V_SAD_U32 : VOP3Inst <"v_sad_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>; +def V_SAD_U8 : VOP3Inst <"v_sad_u8", VOP3Clamp_Profile<VOP_I32_I32_I32_I32>>; +def V_SAD_HI_U8 : VOP3Inst <"v_sad_hi_u8", VOP3Clamp_Profile<VOP_I32_I32_I32_I32>>; +def V_SAD_U16 : VOP3Inst <"v_sad_u16", VOP3Clamp_Profile<VOP_I32_I32_I32_I32>>; +def V_SAD_U32 : VOP3Inst <"v_sad_u32", VOP3Clamp_Profile<VOP_I32_I32_I32_I32>>; def V_CVT_PK_U8_F32 : VOP3Inst<"v_cvt_pk_u8_f32", VOP3_Profile<VOP_I32_F32_I32_I32>, int_amdgcn_cvt_pk_u8_f32>; def V_DIV_FIXUP_F32 : VOP3Inst <"v_div_fixup_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUdiv_fixup>; @@ -330,10 +355,10 @@ def V_DIV_SCALE_F64 : VOP3_Pseudo <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64, let AsmMatchConverter = ""; } -def V_MSAD_U8 : VOP3Inst <"v_msad_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_msad_u8>; +def V_MSAD_U8 : VOP3Inst <"v_msad_u8", VOP3Clamp_Profile<VOP_I32_I32_I32_I32>>; let Constraints = "@earlyclobber $vdst" in { -def V_MQSAD_PK_U16_U8 : VOP3Inst <"v_mqsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64>, int_amdgcn_mqsad_pk_u16_u8>; +def V_MQSAD_PK_U16_U8 : VOP3Inst <"v_mqsad_pk_u16_u8", VOP3Clamp_Profile<VOP_I64_I64_I32_I64>>; } // End Constraints = "@earlyclobber $vdst" def V_TRIG_PREOP_F64 : VOP3Inst <"v_trig_preop_f64", VOP3_Profile<VOP_F64_F64_I32>, AMDGPUtrig_preop> { @@ -358,8 +383,8 @@ def V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile<VOP_I64_I32_I64>>; let SubtargetPredicate = isCIVI in { let Constraints = "@earlyclobber $vdst" in { -def V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64>, int_amdgcn_qsad_pk_u16_u8>; -def V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOP3_Profile<VOP_V4I32_I64_I32_V4I32>, int_amdgcn_mqsad_u32_u8>; +def V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3Clamp_Profile<VOP_I64_I64_I32_I64>>; +def V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOP3Clamp_Profile<VOP_V4I32_I64_I32_V4I32>>; } // End Constraints = "@earlyclobber $vdst" let isCommutable = 1 in { @@ -383,15 +408,15 @@ let isCommutable = 1 in { let F16_ZFILL = 1 in { def V_MAD_F16 : VOP3Inst <"v_mad_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fmad>; -def V_MAD_U16 : VOP3Inst <"v_mad_u16", VOP3_Profile<VOP_I16_I16_I16_I16>>; -def V_MAD_I16 : VOP3Inst <"v_mad_i16", VOP3_Profile<VOP_I16_I16_I16_I16>>; +def V_MAD_U16 : VOP3Inst <"v_mad_u16", VOP3Clamp_Profile<VOP_I16_I16_I16_I16>>; +def V_MAD_I16 : VOP3Inst <"v_mad_i16", VOP3Clamp_Profile<VOP_I16_I16_I16_I16>>; def V_FMA_F16 : VOP3Inst <"v_fma_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fma>; } let SubtargetPredicate = isGFX9 in { def V_MAD_F16_gfx9 : VOP3Inst <"v_mad_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16>>; -def V_MAD_U16_gfx9 : VOP3Inst <"v_mad_u16_gfx9", VOP3_Profile<VOP_I16_I16_I16_I16>>; -def V_MAD_I16_gfx9 : VOP3Inst <"v_mad_i16_gfx9", VOP3_Profile<VOP_I16_I16_I16_I16>>; +def V_MAD_U16_gfx9 : VOP3Inst <"v_mad_u16_gfx9", VOP3Clamp_Profile<VOP_I16_I16_I16_I16>>; +def V_MAD_I16_gfx9 : VOP3Inst <"v_mad_i16_gfx9", VOP3Clamp_Profile<VOP_I16_I16_I16_I16>>; def V_FMA_F16_gfx9 : VOP3Inst <"v_fma_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16>>; } // End SubtargetPredicate = isGFX9 @@ -416,18 +441,18 @@ multiclass Ternary_i16_Pats <SDPatternOperator op1, SDPatternOperator op2, Instruction inst, SDPatternOperator op3> { def : Pat< (op2 (op1 i16:$src0, i16:$src1), i16:$src2), - (inst i16:$src0, i16:$src1, i16:$src2) + (inst i16:$src0, i16:$src1, i16:$src2, (i1 0)) >; def : Pat< (i32 (op3 (op2 (op1 i16:$src0, i16:$src1), i16:$src2))), - (inst i16:$src0, i16:$src1, i16:$src2) + (inst i16:$src0, i16:$src1, i16:$src2, (i1 0)) >; def : Pat< (i64 (op3 (op2 (op1 i16:$src0, i16:$src1), i16:$src2))), (REG_SEQUENCE VReg_64, - (inst i16:$src0, i16:$src1, i16:$src2), sub0, + (inst i16:$src0, i16:$src1, i16:$src2, (i1 0)), sub0, (V_MOV_B32_e32 (i32 0)), sub1) >; } @@ -470,6 +495,45 @@ def V_CVT_PKNORM_I16_F16 : VOP3OpSelInst <"v_cvt_pknorm_i16_f16", VOP3OpSel_Prof def V_CVT_PKNORM_U16_F16 : VOP3OpSelInst <"v_cvt_pknorm_u16_f16", VOP3OpSel_Profile<VOP_B32_F16_F16>>; } // End SubtargetPredicate = isGFX9 +//===----------------------------------------------------------------------===// +// Integer Clamp Patterns +//===----------------------------------------------------------------------===// + +class getClampPat<VOPProfile P, SDPatternOperator node> { + dag ret3 = (P.DstVT (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2)); + dag ret2 = (P.DstVT (node P.Src0VT:$src0, P.Src1VT:$src1)); + dag ret1 = (P.DstVT (node P.Src0VT:$src0)); + dag ret = !if(!eq(P.NumSrcArgs, 3), ret3, + !if(!eq(P.NumSrcArgs, 2), ret2, + ret1)); +} + +class getClampRes<VOPProfile P, Instruction inst> { + dag ret3 = (inst P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2, (i1 0)); + dag ret2 = (inst P.Src0VT:$src0, P.Src1VT:$src1, (i1 0)); + dag ret1 = (inst P.Src0VT:$src0, (i1 0)); + dag ret = !if(!eq(P.NumSrcArgs, 3), ret3, + !if(!eq(P.NumSrcArgs, 2), ret2, + ret1)); +} + +class IntClampPat<VOP3Inst inst, SDPatternOperator node> : Pat< + getClampPat<inst.Pfl, node>.ret, + getClampRes<inst.Pfl, inst>.ret +>; + +def : IntClampPat<V_MAD_I32_I24, AMDGPUmad_i24>; +def : IntClampPat<V_MAD_U32_U24, AMDGPUmad_u24>; + +def : IntClampPat<V_SAD_U8, int_amdgcn_sad_u8>; +def : IntClampPat<V_SAD_HI_U8, int_amdgcn_sad_hi_u8>; +def : IntClampPat<V_SAD_U16, int_amdgcn_sad_u16>; + +def : IntClampPat<V_MSAD_U8, int_amdgcn_msad_u8>; +def : IntClampPat<V_MQSAD_PK_U16_U8, int_amdgcn_mqsad_pk_u16_u8>; + +def : IntClampPat<V_QSAD_PK_U16_U8, int_amdgcn_qsad_pk_u16_u8>; +def : IntClampPat<V_MQSAD_U32_U8, int_amdgcn_mqsad_u32_u8>; //===----------------------------------------------------------------------===// // Target |