diff options
-rw-r--r-- | llvm/lib/Target/AMDGPU/SOPInstructions.td | 6 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/VOP3Instructions.td | 57 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/VOPInstructions.td | 5 |
3 files changed, 45 insertions, 23 deletions
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index 5e462206bc2..5841dcb2b9c 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -442,19 +442,19 @@ def S_LSHL_B32 : SOP2_32 <"s_lshl_b32", [(set i32:$sdst, (UniformBinFrag<shl> i32:$src0, i32:$src1))] >; def S_LSHL_B64 : SOP2_64_32 <"s_lshl_b64", - [(set i64:$sdst, (shl i64:$src0, i32:$src1))] + [(set i64:$sdst, (UniformBinFrag<shl> i64:$src0, i32:$src1))] >; def S_LSHR_B32 : SOP2_32 <"s_lshr_b32", [(set i32:$sdst, (UniformBinFrag<srl> i32:$src0, i32:$src1))] >; def S_LSHR_B64 : SOP2_64_32 <"s_lshr_b64", - [(set i64:$sdst, (srl i64:$src0, i32:$src1))] + [(set i64:$sdst, (UniformBinFrag<srl> i64:$src0, i32:$src1))] >; def S_ASHR_I32 : SOP2_32 <"s_ashr_i32", [(set i32:$sdst, (UniformBinFrag<sra> i32:$src0, i32:$src1))] >; def S_ASHR_I64 : SOP2_64_32 <"s_ashr_i64", - [(set i64:$sdst, (sra i64:$src0, i32:$src1))] + [(set i64:$sdst, (UniformBinFrag<sra> i64:$src0, i32:$src1))] >; } // End Defs = [SCC] diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 2d558a34be3..96b233b5a38 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -17,16 +17,16 @@ class getVOP3ModPat<VOPProfile P, SDPatternOperator node> { (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)); list<dag> ret3 = [(set P.DstVT:$vdst, - (node (P.Src0VT src0), + (DivergentFragOrOp<node, P>.ret (P.Src0VT src0), (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)), (P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers))))]; list<dag> ret2 = [(set P.DstVT:$vdst, - (node (P.Src0VT src0), + (DivergentFragOrOp<node, P>.ret (P.Src0VT src0), (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))]; list<dag> ret1 = [(set P.DstVT:$vdst, - (node (P.Src0VT src0)))]; + (DivergentFragOrOp<node, P>.ret (P.Src0VT src0)))]; list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3, !if(!eq(P.NumSrcArgs, 2), ret2, @@ -35,18 +35,18 @@ class getVOP3ModPat<VOPProfile P, SDPatternOperator node> { class getVOP3PModPat<VOPProfile P, SDPatternOperator node> { list<dag> ret3 = [(set P.DstVT:$vdst, - (node (P.Src0VT !if(P.HasClamp, (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp), + (DivergentFragOrOp<node, P>.ret (P.Src0VT !if(P.HasClamp, (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp), (VOP3PMods P.Src0VT:$src0, i32:$src0_modifiers))), (P.Src1VT (VOP3PMods P.Src1VT:$src1, i32:$src1_modifiers)), (P.Src2VT (VOP3PMods P.Src2VT:$src2, i32:$src2_modifiers))))]; list<dag> ret2 = [(set P.DstVT:$vdst, - (node !if(P.HasClamp, (P.Src0VT (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)), + (DivergentFragOrOp<node, P>.ret !if(P.HasClamp, (P.Src0VT (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)), (P.Src0VT (VOP3PMods P.Src0VT:$src0, i32:$src0_modifiers))), (P.Src1VT (VOP3PMods P.Src1VT:$src1, i32:$src1_modifiers))))]; list<dag> ret1 = [(set P.DstVT:$vdst, - (node (P.Src0VT (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))]; + (DivergentFragOrOp<node, P>.ret (P.Src0VT (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))]; list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3, !if(!eq(P.NumSrcArgs, 2), ret2, @@ -55,18 +55,18 @@ class getVOP3PModPat<VOPProfile P, SDPatternOperator node> { class getVOP3OpSelPat<VOPProfile P, SDPatternOperator node> { list<dag> ret3 = [(set P.DstVT:$vdst, - (node (P.Src0VT !if(P.HasClamp, (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp), + (DivergentFragOrOp<node, P>.ret (P.Src0VT !if(P.HasClamp, (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp), (VOP3OpSel P.Src0VT:$src0, i32:$src0_modifiers))), (P.Src1VT (VOP3OpSel P.Src1VT:$src1, i32:$src1_modifiers)), (P.Src2VT (VOP3OpSel P.Src2VT:$src2, i32:$src2_modifiers))))]; list<dag> ret2 = [(set P.DstVT:$vdst, - (node !if(P.HasClamp, (P.Src0VT (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)), + (DivergentFragOrOp<node, P>.ret !if(P.HasClamp, (P.Src0VT (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)), (P.Src0VT (VOP3OpSel P.Src0VT:$src0, i32:$src0_modifiers))), (P.Src1VT (VOP3OpSel P.Src1VT:$src1, i32:$src1_modifiers))))]; list<dag> ret1 = [(set P.DstVT:$vdst, - (node (P.Src0VT (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))]; + (DivergentFragOrOp<node, P>.ret (P.Src0VT (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))]; list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3, !if(!eq(P.NumSrcArgs, 2), ret2, @@ -75,18 +75,18 @@ class getVOP3OpSelPat<VOPProfile P, SDPatternOperator node> { class getVOP3OpSelModPat<VOPProfile P, SDPatternOperator node> { list<dag> ret3 = [(set P.DstVT:$vdst, - (node (P.Src0VT !if(P.HasClamp, (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp), + (DivergentFragOrOp<node, P>.ret (P.Src0VT !if(P.HasClamp, (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp), (VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers))), (P.Src1VT (VOP3OpSelMods P.Src1VT:$src1, i32:$src1_modifiers)), (P.Src2VT (VOP3OpSelMods P.Src2VT:$src2, i32:$src2_modifiers))))]; list<dag> ret2 = [(set P.DstVT:$vdst, - (node !if(P.HasClamp, (P.Src0VT (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)), + (DivergentFragOrOp<node, P>.ret !if(P.HasClamp, (P.Src0VT (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)), (P.Src0VT (VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers))), (P.Src1VT (VOP3OpSelMods P.Src1VT:$src1, i32:$src1_modifiers))))]; list<dag> ret1 = [(set P.DstVT:$vdst, - (node (P.Src0VT (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))]; + (DivergentFragOrOp<node, P>.ret (P.Src0VT (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))]; list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3, !if(!eq(P.NumSrcArgs, 2), ret2, @@ -94,9 +94,9 @@ class getVOP3OpSelModPat<VOPProfile P, SDPatternOperator node> { } class getVOP3Pat<VOPProfile P, SDPatternOperator node> { - list<dag> ret3 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2))]; - list<dag> ret2 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]; - list<dag> ret1 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0))]; + list<dag> ret3 = [(set P.DstVT:$vdst, (DivergentFragOrOp<node, P>.ret P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2))]; + list<dag> ret2 = [(set P.DstVT:$vdst, (DivergentFragOrOp<node, P>.ret P.Src0VT:$src0, P.Src1VT:$src1))]; + list<dag> ret1 = [(set P.DstVT:$vdst, (DivergentFragOrOp<node, P>.ret P.Src0VT:$src0))]; list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3, !if(!eq(P.NumSrcArgs, 2), ret2, ret1)); @@ -185,6 +185,7 @@ class VOP3_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOPProf getAsm64<HasDst, NumSrcArgs, HasIntClamp, HasModifiers, HasOMod, DstVT>.ret, P.Asm64)); + let NeedPatGen = P.NeedPatGen; } class VOP3b_Profile<ValueType vt> : VOPProfile<[vt, vt, vt, vt]> { @@ -381,12 +382,12 @@ def V_TRIG_PREOP_F64 : VOP3Inst <"v_trig_preop_f64", VOP3_Profile<VOP_F64_F64_I3 let SchedRW = [Write64Bit] in { // These instructions only exist on SI and CI -let SubtargetPredicate = isSICI in { -def V_LSHL_B64 : VOP3Inst <"v_lshl_b64", VOP3_Profile<VOP_I64_I64_I32>>; -def V_LSHR_B64 : VOP3Inst <"v_lshr_b64", VOP3_Profile<VOP_I64_I64_I32>>; -def V_ASHR_I64 : VOP3Inst <"v_ashr_i64", VOP3_Profile<VOP_I64_I64_I32>>; +let SubtargetPredicate = isSICI, Predicates = [isSICI] in { +def V_LSHL_B64 : VOP3Inst <"v_lshl_b64", VOP3_Profile<VOP_PAT_GEN<VOP_I64_I64_I32>>, shl>; +def V_LSHR_B64 : VOP3Inst <"v_lshr_b64", VOP3_Profile<VOP_PAT_GEN<VOP_I64_I64_I32>>, srl>; +def V_ASHR_I64 : VOP3Inst <"v_ashr_i64", VOP3_Profile<VOP_PAT_GEN<VOP_I64_I64_I32>>, sra>; def V_MULLIT_F32 : VOP3Inst <"v_mullit_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>; -} // End SubtargetPredicate = isSICI +} // End SubtargetPredicate = isSICI, Predicates = [isSICI] let SubtargetPredicate = isVI in { def V_LSHLREV_B64 : VOP3Inst <"v_lshlrev_b64", VOP3_Profile<VOP_I64_I32_I64>>; @@ -395,6 +396,22 @@ def V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile<VOP_I64_I32_I64>>; } // End SubtargetPredicate = isVI } // End SchedRW = [Write64Bit] +let Predicates = [isVI] in { +def : GCNPat < + (getDivergentFrag<shl>.ret i64:$x, i32:$y), + (V_LSHLREV_B64 $y, $x) +>; +def : AMDGPUPat < + (getDivergentFrag<srl>.ret i64:$x, i32:$y), + (V_LSHRREV_B64 $y, $x) +>; +def : AMDGPUPat < + (getDivergentFrag<sra>.ret i64:$x, i32:$y), + (V_ASHRREV_I64 $y, $x) +>; +} + + let SubtargetPredicate = isCIVI in { let Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32] in { diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td index 755e030bfc5..e177b2fd081 100644 --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -572,6 +572,11 @@ class VOPPatOrNull<SDPatternOperator Op, VOPProfile P> { list<dag> ret = !if(!ne(P.NeedPatGen,PatGenMode.NoPattern), VOPPatGen<Op, P>.ret, []); } +class DivergentFragOrOp<SDPatternOperator Op, VOPProfile P> { + SDPatternOperator ret = !if(!eq(P.NeedPatGen,PatGenMode.Pattern), + !if(!isa<SDNode>(Op), getDivergentFrag<Op>.ret, Op), Op); +} + include "VOPCInstructions.td" include "VOP1Instructions.td" include "VOP2Instructions.td" |