diff options
| author | Nikolay Haustov <Nikolay.Haustov@amd.com> | 2016-03-11 09:27:25 +0000 |
|---|---|---|
| committer | Nikolay Haustov <Nikolay.Haustov@amd.com> | 2016-03-11 09:27:25 +0000 |
| commit | 6560781c4f43f4c2d311db06070163a0ae1c0cd3 (patch) | |
| tree | ea5772c3c58c256125a6cc59bc85dfd8cf747fe2 /llvm | |
| parent | 45a9c203a01b46afe43885843134926449cc1a5f (diff) | |
| download | bcm5719-llvm-6560781c4f43f4c2d311db06070163a0ae1c0cd3.tar.gz bcm5719-llvm-6560781c4f43f4c2d311db06070163a0ae1c0cd3.zip | |
[AMDGPU] Assembler: change v_madmk operands to have same order as mad.
The constant is now at source operand 1 (previously at 2).
This is also how it is in legacy AMD sp3 assembler.
Update tests.
Differential Revision: http://reviews.llvm.org/D17984
llvm-svn: 263212
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 17 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.td | 22 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/VIInstructions.td | 4 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/madmk.ll | 4 | ||||
| -rw-r--r-- | llvm/test/MC/AMDGPU/vop2.s | 14 |
6 files changed, 28 insertions, 37 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index f3dfde7e104..70d131889a7 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1056,14 +1056,7 @@ bool SIInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI, if (!Src2->isReg() || RI.isSGPRClass(MRI->getRegClass(Src2->getReg()))) return false; - // We need to do some weird looking operand shuffling since the madmk - // operands are out of the normal expected order with the multiplied - // constant as the last operand. - // - // v_mad_f32 src0, src1, src2 -> v_madmk_f32 src0 * src2K + src1 - // src0 -> src2 K - // src1 -> src0 - // src2 -> src1 + // We need to swap operands 0 and 1 since madmk constant is at operand 1. const int64_t Imm = DefMI->getOperand(1).getImm(); @@ -1078,22 +1071,16 @@ bool SIInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI, unsigned Src1Reg = Src1->getReg(); unsigned Src1SubReg = Src1->getSubReg(); - unsigned Src2Reg = Src2->getReg(); - unsigned Src2SubReg = Src2->getSubReg(); Src0->setReg(Src1Reg); Src0->setSubReg(Src1SubReg); Src0->setIsKill(Src1->isKill()); - Src1->setReg(Src2Reg); - Src1->setSubReg(Src2SubReg); - Src1->setIsKill(Src2->isKill()); - if (Opc == AMDGPU::V_MAC_F32_e64) { UseMI->untieRegOperand( AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)); } - Src2->ChangeToImmediate(Imm); + Src1->ChangeToImmediate(Imm); removeModOperands(*UseMI); UseMI->setDesc(get(AMDGPU::V_MADMK_F32)); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 49fbee561a4..ac3192d5512 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1560,9 +1560,13 @@ def VOP_CNDMASK : VOPProfile <[i32, i32, i32, untyped]> { } def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>; -def VOP_MADK : VOPProfile <[f32, f32, f32, f32]> { - field dag Ins = (ins VCSrc_32:$src0, VGPR_32:$vsrc1, u32imm:$src2); - field string Asm = "$vdst, $src0, $vsrc1, $src2"; +def VOP_MADAK : VOPProfile <[f32, f32, f32, f32]> { + field dag Ins32 = (ins VCSrc_32:$src0, VGPR_32:$vsrc1, u32imm:$k); + field string Asm32 = "$vdst, $src0, $vsrc1, $k"; +} +def VOP_MADMK : VOPProfile <[f32, f32, f32, f32]> { + field dag Ins32 = (ins VCSrc_32:$src0, u32imm:$k, VGPR_32:$vsrc1); + field string Asm32 = "$vdst, $src0, $k, $vsrc1"; } def VOP_MAC : VOPProfile <[f32, f32, f32, f32]> { let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2); @@ -2098,13 +2102,13 @@ multiclass VOP2_VI3_Inst <vop23 op, string opName, VOPProfile P, revOp >; -multiclass VOP2MADK <vop2 op, string opName, list<dag> pattern = []> { +multiclass VOP2MADK <vop2 op, string opName, VOPProfile P, list<dag> pattern = []> { - def "" : VOP2_Pseudo <VOP_MADK.Outs, VOP_MADK.Ins, pattern, opName>; + def "" : VOP2_Pseudo <P.Outs, P.Ins32, pattern, opName>; let isCodeGenOnly = 0 in { - def _si : VOP2Common <VOP_MADK.Outs, VOP_MADK.Ins, - !strconcat(opName, VOP_MADK.Asm), []>, + def _si : VOP2Common <P.Outs, P.Ins32, + !strconcat(opName, P.Asm32), []>, SIMCInstr <opName#"_e32", SISubtarget.SI>, VOP2_MADKe <op.SI> { let AssemblerPredicates = [isSICI]; @@ -2112,8 +2116,8 @@ let isCodeGenOnly = 0 in { let DisableDecoder = DisableSIDecoder; } - def _vi : VOP2Common <VOP_MADK.Outs, VOP_MADK.Ins, - !strconcat(opName, VOP_MADK.Asm), []>, + def _vi : VOP2Common <P.Outs, P.Ins32, + !strconcat(opName, P.Asm32), []>, SIMCInstr <opName#"_e32", SISubtarget.VI>, VOP2_MADKe <op.VI> { let AssemblerPredicates = [isVI]; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 04d735fe39f..e598c146280 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1538,10 +1538,10 @@ defm V_MAC_F32 : VOP2Inst <vop2<0x1f, 0x16>, "v_mac_f32", VOP_MAC>; } } // End isCommutable = 1 -defm V_MADMK_F32 : VOP2MADK <vop2<0x20, 0x17>, "v_madmk_f32">; +defm V_MADMK_F32 : VOP2MADK <vop2<0x20, 0x17>, "v_madmk_f32", VOP_MADMK>; let isCommutable = 1 in { -defm V_MADAK_F32 : VOP2MADK <vop2<0x21, 0x18>, "v_madak_f32">; +defm V_MADAK_F32 : VOP2MADK <vop2<0x21, 0x18>, "v_madak_f32", VOP_MADAK>; } // End isCommutable = 1 let isCommutable = 1 in { diff --git a/llvm/lib/Target/AMDGPU/VIInstructions.td b/llvm/lib/Target/AMDGPU/VIInstructions.td index ddd164b9112..dd85fb194f7 100644 --- a/llvm/lib/Target/AMDGPU/VIInstructions.td +++ b/llvm/lib/Target/AMDGPU/VIInstructions.td @@ -54,9 +54,9 @@ defm V_SUBREV_F16 : VOP2Inst <vop2<0, 0x21>, "v_subrev_f16", VOP_F16_F16_F16, defm V_MUL_F16 : VOP2Inst <vop2<0, 0x22>, "v_mul_f16", VOP_F16_F16_F16>; defm V_MAC_F16 : VOP2Inst <vop2<0, 0x23>, "v_mac_f16", VOP_F16_F16_F16>; } // End isCommutable = 1 -defm V_MADMK_F16 : VOP2MADK <vop2<0,0x24>, "v_madmk_f16">; +defm V_MADMK_F16 : VOP2MADK <vop2<0,0x24>, "v_madmk_f16", VOP_MADMK>; let isCommutable = 1 in { -defm V_MADAK_F16 : VOP2MADK <vop2<0,0x25>, "v_madak_f16">; +defm V_MADAK_F16 : VOP2MADK <vop2<0,0x25>, "v_madak_f16", VOP_MADAK>; defm V_ADD_U16 : VOP2Inst <vop2<0,0x26>, "v_add_u16", VOP_I16_I16_I16>; defm V_SUB_U16 : VOP2Inst <vop2<0,0x27>, "v_sub_u16" , VOP_I16_I16_I16>; defm V_SUBREV_U16 : VOP2Inst <vop2<0,0x28>, "v_subrev_u16", VOP_I16_I16_I16>; diff --git a/llvm/test/CodeGen/AMDGPU/madmk.ll b/llvm/test/CodeGen/AMDGPU/madmk.ll index a9b95f9b2a9..be5fff7ab45 100644 --- a/llvm/test/CodeGen/AMDGPU/madmk.ll +++ b/llvm/test/CodeGen/AMDGPU/madmk.ll @@ -7,7 +7,7 @@ declare float @llvm.fabs.f32(float) nounwind readnone ; GCN-LABEL: {{^}}madmk_f32: ; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 -; GCN: v_madmk_f32_e32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000 +; GCN: v_madmk_f32_e32 {{v[0-9]+}}, [[VA]], 0x41200000, [[VB]] define void @madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind { %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid @@ -182,7 +182,7 @@ define void @madmk_add_inline_imm_f32(float addrspace(1)* noalias %out, float ad ; SI-LABEL: {{^}}kill_madmk_verifier_error: ; SI: s_xor_b64 -; SI: v_madmk_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, 0x472aee8c +; SI: v_madmk_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, 0x472aee8c, {{v[0-9]+}} ; SI: s_or_b64 define void @kill_madmk_verifier_error() nounwind { bb: diff --git a/llvm/test/MC/AMDGPU/vop2.s b/llvm/test/MC/AMDGPU/vop2.s index 2b8249152b7..f9d4ab3710f 100644 --- a/llvm/test/MC/AMDGPU/vop2.s +++ b/llvm/test/MC/AMDGPU/vop2.s @@ -231,12 +231,12 @@ v_bfm_b32 v1, v2, v3 // VI: v_mac_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x2c] v_mac_f32 v1, v2, v3 -// SICI: v_madmk_f32_e32 v1, v2, v3, 0x42800000 ; encoding: [0x02,0x07,0x02,0x40,0x00,0x00,0x80,0x42] -// VI: v_madmk_f32_e32 v1, v2, v3, 0x42800000 ; encoding: [0x02,0x07,0x02,0x2e,0x00,0x00,0x80,0x42] -v_madmk_f32 v1, v2, v3, 64.0 +// SICI: v_madmk_f32_e32 v1, v2, 0x42800000, v3 ; encoding: [0x02,0x07,0x02,0x40,0x00,0x00,0x80,0x42] +// VI: v_madmk_f32_e32 v1, v2, 0x42800000, v3 ; encoding: [0x02,0x07,0x02,0x2e,0x00,0x00,0x80,0x42] +v_madmk_f32 v1, v2, 64.0, v3 // SICI: v_madak_f32_e32 v1, v2, v3, 0x42800000 ; encoding: [0x02,0x07,0x02,0x42,0x00,0x00,0x80,0x42] -// VI: v_madak_f32_e32 v1, v2, v3, 0x42800000 ; encoding: [0x02,0x07,0x02,0x30,0x00,0x00,0x80,0x42] +// VI: v_madak_f32_e32 v1, v2, v3, 0x42800000 ; encoding: [0x02,0x07,0x02,0x30,0x00,0x00,0x80,0x42] v_madak_f32 v1, v2, v3, 64.0 // SICI: v_bcnt_u32_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x44] @@ -410,9 +410,9 @@ v_mul_f16 v1, v2, v3 v_mac_f16 v1, v2, v3 // NOSICI: error: instruction not supported on this GPU -// NOSICI: v_madmk_f16 v1, v2, v3, 64.0 -// VI: v_madmk_f16_e32 v1, v2, v3, 0x42800000 ; encoding: [0x02,0x07,0x02,0x48,0x00,0x00,0x80,0x42] -v_madmk_f16 v1, v2, v3, 64.0 +// NOSICI: v_madmk_f16 v1, v2, 64.0, v3 +// VI: v_madmk_f16_e32 v1, v2, 0x42800000, v3 ; encoding: [0x02,0x07,0x02,0x48,0x00,0x00,0x80,0x42] +v_madmk_f16 v1, v2, 64.0, v3 // NOSICI: error: instruction not supported on this GPU // NOSICI: v_madak_f16 v1, v2, v3, 64.0 |

