diff options
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.td | 12 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 4 | ||||
| -rw-r--r-- | llvm/test/MC/AMDGPU/vop3.s | 19 |
4 files changed, 29 insertions, 10 deletions
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index aef4716b0b2..76ac02d4019 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1855,7 +1855,7 @@ AMDGPUAsmParser::parseVOP3OptionalOps(OperandVector &Operands) { void AMDGPUAsmParser::cvtId(MCInst &Inst, const OperandVector &Operands) { unsigned I = 1; const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); - if (Desc.getNumDefs() > 0) { + for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); } for (unsigned E = Operands.size(); I != E; ++I) @@ -1885,7 +1885,7 @@ void AMDGPUAsmParser::cvtVOP3_only(MCInst &Inst, const OperandVector &Operands) void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { unsigned I = 1; const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); - if (Desc.getNumDefs() > 0) { + for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 3811d9033d5..0b17aa95459 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1588,13 +1588,13 @@ multiclass VOP3SI_2_m <vop op, dag outs, dag ins, string asm, // instead of an implicit VCC as in the VOP2b format. multiclass VOP3b_2_3_m <vop op, dag outs, dag ins, string asm, list<dag> pattern, string opName, string revOp, - bit HasMods = 1, bit useSrc2Input = 0> { - def "" : VOP3_Pseudo <outs, ins, pattern, opName>; + bit HasMods = 1, bit useSrc2Input = 0, bit VOP3Only = 0> { + def "" : VOP3_Pseudo <outs, ins, pattern, opName, HasMods, VOP3Only>; - def _si : VOP3b_Real_si <op.SI3, outs, ins, asm, opName>, + def _si : VOP3b_Real_si <op.SI3, outs, ins, asm, opName, HasMods, VOP3Only>, VOP3DisableFields<1, useSrc2Input, HasMods>; - def _vi : VOP3b_Real_vi <op.VI3, outs, ins, asm, opName>, + def _vi : VOP3b_Real_vi <op.VI3, outs, ins, asm, opName, HasMods, VOP3Only>, VOP3DisableFields<1, useSrc2Input, HasMods>; } @@ -1977,11 +1977,11 @@ multiclass VOP3_VCC_Inst <vop3 op, string opName, 3, 1 >; -multiclass VOP3bInst <vop op, string opName, VOPProfile P, list<dag> pattern = []> : +multiclass VOP3bInst <vop op, string opName, VOPProfile P, list<dag> pattern = [], bit VOP3Only = 0> : VOP3b_2_3_m < op, P.Outs64, P.Ins64, opName#" "#P.Asm64, pattern, - opName, "", 1, 1 + opName, "", 1, 1, VOP3Only >; class Vop3ModPat<Instruction Inst, VOPProfile P, SDPatternOperator node> : Pat< diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index c97be93e2d7..0b793166c8c 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1768,14 +1768,14 @@ defm V_MUL_HI_I32 : VOP3Inst <vop3<0x16c, 0x287>, "v_mul_hi_i32", let SchedRW = [WriteFloatFMA, WriteSALU] in { defm V_DIV_SCALE_F32 : VOP3bInst <vop3<0x16d, 0x1e0>, "v_div_scale_f32", - VOP3b_F32_I1_F32_F32_F32 + VOP3b_F32_I1_F32_F32_F32, [], 1 >; } let SchedRW = [WriteDouble, WriteSALU] in { // Double precision division pre-scale. defm V_DIV_SCALE_F64 : VOP3bInst <vop3<0x16e, 0x1e1>, "v_div_scale_f64", - VOP3b_F64_I1_F64_F64_F64 + VOP3b_F64_I1_F64_F64_F64, [], 1 >; } // End SchedRW = [WriteDouble] diff --git a/llvm/test/MC/AMDGPU/vop3.s b/llvm/test/MC/AMDGPU/vop3.s index 0161e33a12c..6e8c01dc460 100644 --- a/llvm/test/MC/AMDGPU/vop3.s +++ b/llvm/test/MC/AMDGPU/vop3.s @@ -270,3 +270,22 @@ v_add_f64_e64 v[0:1], -v[2:3], |v[5:6]| clamp mul:4 // SICI: v_add_f64 v[0:1], -v[2:3], |v[5:6]| clamp mul:4 ; encoding: [0x00,0x0a,0xc8,0xd2,0x02,0x0b,0x02,0x30] // VI: v_add_f64 v[0:1], -v[2:3], |v[5:6]| clamp mul:4 ; encoding: [0x00,0x82,0x80,0xd2,0x02,0x0b,0x02,0x30] +v_div_scale_f64 v[24:25], vcc, v[22:23], v[22:23], v[20:21] +// SICI: v_div_scale_f64 v[24:25], vcc, v[22:23], v[22:23], v[20:21] ; encoding: [0x18,0x6a,0xdc,0xd2,0x16,0x2d,0x52,0x04] +// VI: v_div_scale_f64 v[24:25], vcc, v[22:23], v[22:23], v[20:21] ; encoding: [0x18,0x6a,0xe1,0xd1,0x16,0x2d,0x52,0x04] + +v_div_scale_f64 v[24:25], s[10:11], v[22:23], v[20:21], v[20:21] +// SICI: v_div_scale_f64 v[24:25], s[10:11], v[22:23], v[20:21], v[20:21] ; encoding: [0x18,0x0a,0xdc,0xd2,0x16,0x29,0x52,0x04] +// VI: v_div_scale_f64 v[24:25], s[10:11], v[22:23], v[20:21], v[20:21] ; encoding: [0x18,0x0a,0xe1,0xd1,0x16,0x29,0x52,0x04] + +v_div_scale_f32 v24, vcc, v22, v22, v20 +// SICI: v_div_scale_f32 v24, vcc, v22, v22, v20 ; encoding: [0x18,0x6a,0xda,0xd2,0x16,0x2d,0x52,0x04] +// VI: v_div_scale_f32 v24, vcc, v22, v22, v20 ; encoding: [0x18,0x6a,0xe0,0xd1,0x16,0x2d,0x52,0x04] + +v_div_scale_f32 v24, vcc, s[10:11], v22, v20 +// SICI: v_div_scale_f32 v24, vcc, s[10:11], v22, v20 ; encoding: [0x18,0x6a,0xda,0xd2,0x0a,0x2c,0x52,0x04] +// VI: v_div_scale_f32 v24, vcc, s[10:11], v22, v20 ; encoding: [0x18,0x6a,0xe0,0xd1,0x0a,0x2c,0x52,0x04] + +v_div_scale_f32 v24, s[10:11], v22, v22, v20 +// SICI: v_div_scale_f32 v24, s[10:11], v22, v22, v20 ; encoding: [0x18,0x0a,0xda,0xd2,0x16,0x2d,0x52,0x04] +// VI: v_div_scale_f32 v24, s[10:11], v22, v22, v20 ; encoding: [0x18,0x0a,0xe0,0xd1,0x16,0x2d,0x52,0x04] |

