diff options
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.td | 72 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 12 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIRegisterInfo.td | 10 | ||||
| -rw-r--r-- | llvm/test/MC/AMDGPU/vop2-err.s | 24 | ||||
| -rw-r--r-- | llvm/test/MC/AMDGPU/vop2.s | 61 |
6 files changed, 124 insertions, 59 deletions
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index df6f396a403..9ce6874cad3 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -215,6 +215,10 @@ public: (isReg() && isRegClass(AMDGPU::SReg_64RegClassID)); } + bool isSCSrc64() const { + return (isReg() && isRegClass(AMDGPU::SReg_64RegClassID)) || isInlineImm(); + } + bool isVCSrc32() const { return isInlineImm() || (isReg() && isRegClass(AMDGPU::VS_32RegClassID)); } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 1e5e04938be..8664c050e26 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -990,7 +990,14 @@ class getVOPSrc1ForVT<ValueType VT> { // Returns the register class to use for sources of VOP3 instructions for the // given VT. class getVOP3SrcForVT<ValueType VT> { - RegisterOperand ret = !if(!eq(VT.Size, 64), VCSrc_64, VCSrc_32); + RegisterOperand ret = + !if(!eq(VT.Size, 64), + VCSrc_64, + !if(!eq(VT.Value, i1.Value), + SCSrc_64, + VCSrc_32 + ) + ); } // Returns 1 if the source arguments have modifiers, 0 if they do not. @@ -1070,7 +1077,6 @@ class getAsm64 <int NumSrcArgs, bit HasModifiers> { "$dst, "#src0#src1#src2#"$clamp"#"$omod"); } - class VOPProfile <list<ValueType> _ArgVT> { field list<ValueType> ArgVT = _ArgVT; @@ -1132,17 +1138,26 @@ def VOP_I32_F32_F32 : VOPProfile <[i32, f32, f32, untyped]>; def VOP_I32_F32_I32 : VOPProfile <[i32, f32, i32, untyped]>; def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>; -class VOP2b_Profile<ValueType vt> : VOPProfile<[vt, vt, vt, untyped]> { +// Write out to vcc or arbitrary SGPR. +def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped]> { let Asm32 = "$dst, vcc, $src0, $src1"; let Asm64 = "$dst, $sdst, $src0, $src1"; let Outs32 = (outs DstRC:$dst); let Outs64 = (outs DstRC:$dst, SReg_64:$sdst); } -def VOP2b_I32_I1_I32_I32 : VOP2b_Profile<i32>; - -def VOP2b_I32_I1_I32_I32_VCC : VOP2b_Profile<i32> { +// Write out to vcc or arbitrary SGPR and read in from vcc or +// arbitrary SGPR. +def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> { let Src0RC32 = VCSrc_32; + let Asm32 = "$dst, vcc, $src0, $src1, vcc"; + let Asm64 = "$dst, $sdst, $src0, $src1, $src2"; + let Outs32 = (outs DstRC:$dst); + let Outs64 = (outs DstRC:$dst, SReg_64:$sdst); + + // Suppress src2 implied by type since the 32-bit encoding uses an + // implicit VCC use. + let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1); } // VOPC instructions are a special case because for the 32-bit @@ -1429,32 +1444,19 @@ multiclass VOP3SI_2_m <vop op, dag outs, dag ins, string asm, // No VI instruction. This class is for SI only. } -// XXX - Is v_div_scale_{f32|f64} only available in vop3b without -// option of implicit vcc use? -multiclass VOP3b_2_m <vop op, dag outs, dag ins, string asm, - list<dag> pattern, string opName, string revOp, - bit HasMods = 1, bit UseFullOp = 0> { - def "" : VOP3_Pseudo <outs, ins, pattern, opName>, - VOP2_REV<revOp#"_e64", !eq(revOp, opName)>; - - def _si : VOP3b_Real_si <op.SI3, outs, ins, asm, opName>, - VOP3DisableFields<1, 0, HasMods>; - - def _vi : VOP3b_Real_vi <op.VI3, outs, ins, asm, opName>, - VOP3DisableFields<1, 0, HasMods>; -} - -multiclass VOP3b_3_m <vop op, dag outs, dag ins, string asm, - list<dag> pattern, string opName, string revOp, - bit HasMods = 1, bit UseFullOp = 0> { +// Two operand VOP3b instruction that may have a 3rd SGPR bool operand +// instead of an implicit VCC as in the VOP2b format. +multiclass VOP3b_2_3_m <vop op, dag outs, dag ins, string asm, + list<dag> pattern, string opName, string revOp, + bit HasMods = 1, bit useSGPRInput = 0, + bit UseFullOp = 0> { def "" : VOP3_Pseudo <outs, ins, pattern, opName>; - def _si : VOP3b_Real_si <op.SI3, outs, ins, asm, opName>, - VOP3DisableFields<1, 1, HasMods>; + VOP3DisableFields<1, useSGPRInput, HasMods>; def _vi : VOP3b_Real_vi <op.VI3, outs, ins, asm, opName>, - VOP3DisableFields<1, 1, HasMods>; + VOP3DisableFields<1, useSGPRInput, HasMods>; } multiclass VOP3_C_m <vop op, dag outs, dag ins, string asm, @@ -1575,12 +1577,14 @@ multiclass VOP2InstSI <vop2 op, string opName, VOPProfile P, multiclass VOP2b_Helper <vop2 op, string opName, dag outs32, dag outs64, dag ins32, string asm32, list<dag> pat32, dag ins64, string asm64, list<dag> pat64, - string revOp, bit HasMods> { + string revOp, bit HasMods, bit useSGPRInput> { - defm _e32 : VOP2_m <op, outs32, ins32, asm32, pat32, opName, revOp>; + let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in { + defm _e32 : VOP2_m <op, outs32, ins32, asm32, pat32, opName, revOp>; + } - defm _e64 : VOP3b_2_m <op, - outs64, ins64, opName#asm64, pat64, opName, revOp, HasMods + defm _e64 : VOP3b_2_3_m <op, + outs64, ins64, opName#asm64, pat64, opName, revOp, HasMods, useSGPRInput >; } @@ -1596,7 +1600,7 @@ multiclass VOP2bInst <vop2 op, string opName, VOPProfile P, i1:$clamp, i32:$omod)), (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))], [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]), - revOp, P.HasModifiers + revOp, P.HasModifiers, !eq(P.NumSrcArgs, 3) >; // A VOP2 instruction that is VOP3-only on VI. @@ -1847,14 +1851,14 @@ multiclass VOP3_VCC_Inst <vop3 op, string opName, multiclass VOP3b_Helper <vop op, RegisterClass vrc, RegisterOperand arc, string opName, list<dag> pattern> : - VOP3b_3_m < + VOP3b_2_3_m < op, (outs vrc:$vdst, SReg_64:$sdst), (ins InputModsNoDefault:$src0_modifiers, arc:$src0, InputModsNoDefault:$src1_modifiers, arc:$src1, InputModsNoDefault:$src2_modifiers, arc:$src2, ClampMod:$clamp, omod:$omod), opName#" $vdst, $sdst, $src0_modifiers, $src1_modifiers, $src2_modifiers"#"$clamp"#"$omod", pattern, - opName, opName, 1, 1 + opName, opName, 1, 0, 1 >; multiclass VOP3b_64 <vop3 op, string opName, list<dag> pattern> : diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index c3835411d38..bd22e886920 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1507,7 +1507,7 @@ let isCommutable = 1 in { defm V_MADAK_F32 : VOP2MADK <vop2<0x21, 0x18>, "v_madak_f32">; } // End isCommutable = 1 -let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC +let isCommutable = 1 in { // No patterns so that the scalar instructions are always selected. // The scalar versions will be replaced with vector when needed later. @@ -1522,19 +1522,17 @@ defm V_SUBREV_I32 : VOP2bInst <vop2<0x27, 0x1b>, "v_subrev_i32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_i32" >; -let Uses = [VCC] in { // Carry-in comes from VCC defm V_ADDC_U32 : VOP2bInst <vop2<0x28, 0x1c>, "v_addc_u32", - VOP2b_I32_I1_I32_I32_VCC + VOP2b_I32_I1_I32_I32_I1 >; defm V_SUBB_U32 : VOP2bInst <vop2<0x29, 0x1d>, "v_subb_u32", - VOP2b_I32_I1_I32_I32_VCC + VOP2b_I32_I1_I32_I32_I1 >; defm V_SUBBREV_U32 : VOP2bInst <vop2<0x2a, 0x1e>, "v_subbrev_u32", - VOP2b_I32_I1_I32_I32_VCC, null_frag, "v_subb_u32" + VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32" >; -} // End Uses = [VCC] -} // End isCommutable = 1, Defs = [VCC] +} // End isCommutable = 1 defm V_READLANE_B32 : VOP2SI_3VI_m < vop3 <0x001, 0x289>, diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index 1ab9bc4569f..608fe44f485 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -281,3 +281,13 @@ def VCSrc_64 : RegisterOperand<VS_64> { let OperandType = "OPERAND_REG_INLINE_C"; let ParserMatchClass = RegImmMatcher<"VCSrc64">; } + +//===----------------------------------------------------------------------===// +// SCSrc_* Operands with an SGPR or an inline constant +//===----------------------------------------------------------------------===// + +def SCSrc_64 : RegisterOperand<SReg_64> { + let OperandNamespace = "AMDGPU"; + let OperandType = "OPERAND_REG_INLINE_C"; + let ParserMatchClass = RegImmMatcher<"SCSrc64">; +} diff --git a/llvm/test/MC/AMDGPU/vop2-err.s b/llvm/test/MC/AMDGPU/vop2-err.s index 47d7d5bbecb..8d282f9bf7e 100644 --- a/llvm/test/MC/AMDGPU/vop2-err.s +++ b/llvm/test/MC/AMDGPU/vop2-err.s @@ -35,4 +35,28 @@ v_mul_i32_i24_e64 v1, v2, 100 v_add_i32_e32 v1, s[0:1], v2, v3 // CHECK: error: invalid operand for instruction +v_addc_u32_e32 v1, vcc, v2, v3, s[2:3] +// CHECK: error: invalid operand for instruction + +v_addc_u32_e32 v1, s[0:1], v2, v3, s[2:3] +// CHECK: error: invalid operand for instruction + +v_addc_u32_e32 v1, vcc, v2, v3, -1 +// CHECK: error: invalid operand for instruction + +v_addc_u32_e32 v1, vcc, v2, v3, 123 +// CHECK: error: invalid operand for instruction + +v_addc_u32_e32 v1, vcc, v2, v3, s0 +// CHECK: error: invalid operand for instruction + +v_addc_u32_e32 v1, -1, v2, v3, s0 +// CHECK: error: invalid operand for instruction + +v_addc_u32_e64 v1, s[0:1], v2, v3, 123 +// CHECK: error: invalid operand for instruction + +v_addc_u32 v1, s[0:1], v2, v3, 123 +// CHECK: error: invalid operand for instruction + // TODO: Constant bus restrictions diff --git a/llvm/test/MC/AMDGPU/vop2.s b/llvm/test/MC/AMDGPU/vop2.s index 0a875608464..2b8249152b7 100644 --- a/llvm/test/MC/AMDGPU/vop2.s +++ b/llvm/test/MC/AMDGPU/vop2.s @@ -307,29 +307,54 @@ v_subrev_u32 v1, vcc, v2, v3 // VI: v_subrev_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1b,0xd1,0x02,0x07,0x02,0x00] v_subrev_u32 v1, s[0:1], v2, v3 -// SICI: v_addc_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x50] -// VI: v_addc_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x38] -v_addc_u32 v1, vcc, v2, v3 +// SICI: v_addc_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x50] +// VI: v_addc_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x38] +v_addc_u32 v1, vcc, v2, v3, vcc -// SICI: v_addc_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x02,0x00] -// VI: v_addc_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1c,0xd1,0x02,0x07,0x02,0x00] -v_addc_u32 v1, s[0:1], v2, v3 +// SICI: v_addc_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x50] +// VI: v_addc_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x38] +v_addc_u32_e32 v1, vcc, v2, v3, vcc -// SICI: v_subb_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x52] -// VI: v_subb_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x3a] -v_subb_u32 v1, vcc, v2, v3 -// SICI: v_subb_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x52,0xd2,0x02,0x07,0x02,0x00] -// VI: v_subb_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1d,0xd1,0x02,0x07,0x02,0x00] -v_subb_u32 v1, s[0:1], v2, v3 +// SI: v_addc_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0xaa,0x01] +// VI: v_addc_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x1c,0xd1,0x02,0x07,0xaa,0x01] +v_addc_u32 v1, s[0:1], v2, v3, vcc -// SICI: v_subbrev_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x54] -// VI: v_subbrev_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x3c] -v_subbrev_u32 v1, vcc, v2, v3 +// SI: v_addc_u32_e64 v1, s[0:1], v2, v3, s[2:3] ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x0a,0x00] +// VI: v_addc_u32_e64 v1, s[0:1], v2, v3, s[2:3] ; encoding: [0x01,0x00,0x1c,0xd1,0x02,0x07,0x0a,0x00] +v_addc_u32 v1, s[0:1], v2, v3, s[2:3] -// SICI: v_subbrev_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x54,0xd2,0x02,0x07,0x02,0x00] -// VI: v_subbrev_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1e,0xd1,0x02,0x07,0x02,0x00] -v_subbrev_u32 v1, s[0:1], v2, v3 +// SI: v_addc_u32_e64 v1, s[0:1], v2, v3, s[2:3] ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x0a,0x00] +// VI: v_addc_u32_e64 v1, s[0:1], v2, v3, s[2:3] ; encoding: [0x01,0x00,0x1c,0xd1,0x02,0x07,0x0a,0x00] +v_addc_u32_e64 v1, s[0:1], v2, v3, s[2:3] + +// SI: v_addc_u32_e64 v1, s[0:1], v2, v3, -1 ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x06,0x03] +// VI: v_addc_u32_e64 v1, s[0:1], v2, v3, -1 ; encoding: [0x01,0x00,0x1c,0xd1,0x02,0x07,0x06,0x03] +v_addc_u32_e64 v1, s[0:1], v2, v3, -1 + +// SI: v_addc_u32_e64 v1, vcc, v2, v3, -1 ; encoding: [0x01,0x6a,0x50,0xd2,0x02,0x07,0x06,0x03] +// VI: v_addc_u32_e64 v1, vcc, v2, v3, -1 ; encoding: [0x01,0x6a,0x1c,0xd1,0x02,0x07,0x06,0x03] +v_addc_u32_e64 v1, vcc, v2, v3, -1 + +// SI: v_addc_u32_e64 v1, vcc, v2, v3, vcc ; encoding: [0x01,0x6a,0x50,0xd2,0x02,0x07,0xaa,0x01] +// VI: v_addc_u32_e64 v1, vcc, v2, v3, vcc ; encoding: [0x01,0x6a,0x1c,0xd1,0x02,0x07,0xaa,0x01] +v_addc_u32_e64 v1, vcc, v2, v3, vcc + +// SI: v_subb_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x52] +// VI: v_subb_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x3a] +v_subb_u32 v1, vcc, v2, v3, vcc + +// SI: v_subb_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x52,0xd2,0x02,0x07,0xaa,0x01] +// VI: v_subb_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x1d,0xd1,0x02,0x07,0xaa,0x01] +v_subb_u32 v1, s[0:1], v2, v3, vcc + +// SICI: v_subbrev_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x54] +// VI: v_subbrev_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x3c] +v_subbrev_u32 v1, vcc, v2, v3, vcc + +// SICI: v_subbrev_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x54,0xd2,0x02,0x07,0xaa,0x01] +// VI: v_subbrev_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x1e,0xd1,0x02,0x07,0xaa,0x01] +v_subbrev_u32 v1, s[0:1], v2, v3, vcc // SICI: v_ldexp_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x56] // VI: v_ldexp_f32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x88,0xd2,0x02,0x07,0x02,0x00] |

