From 6c7d7eebda38340d8bbaf99c6ceedb55e32c3a72 Mon Sep 17 00:00:00 2001 From: Dmitry Preobrazhensky Date: Fri, 18 Oct 2019 14:49:53 +0000 Subject: [AMDGPU][MC][GFX10] Added sdwa/dpp versions of v_cndmask_b32 See https://bugs.llvm.org/show_bug.cgi?id=43608 Reviewers: arsenm, rampitec Differential Revision: https://reviews.llvm.org/D69096 llvm-svn: 375241 --- .../AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp | 3 + llvm/lib/Target/AMDGPU/VOP2Instructions.td | 129 ++++++++++++--------- 2 files changed, 80 insertions(+), 52 deletions(-) (limited to 'llvm/lib') diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp index d2ea94548df..a9888e6ed92 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -599,9 +599,11 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, case AMDGPU::V_ADD_CO_CI_U32_e32_gfx10: case AMDGPU::V_SUB_CO_CI_U32_e32_gfx10: case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: + case AMDGPU::V_CNDMASK_B32_dpp_gfx10: case AMDGPU::V_ADD_CO_CI_U32_dpp_gfx10: case AMDGPU::V_SUB_CO_CI_U32_dpp_gfx10: case AMDGPU::V_SUBREV_CO_CI_U32_dpp_gfx10: + case AMDGPU::V_CNDMASK_B32_dpp8_gfx10: case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx10: case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx10: case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx10: @@ -665,6 +667,7 @@ void AMDGPUInstPrinter::printOperandAndIntInputMods(const MCInst *MI, switch (MI->getOpcode()) { default: break; + case AMDGPU::V_CNDMASK_B32_sdwa_gfx10: case AMDGPU::V_ADD_CO_CI_U32_sdwa_gfx10: case AMDGPU::V_SUB_CO_CI_U32_sdwa_gfx10: case AMDGPU::V_SUBREV_CO_CI_U32_sdwa_gfx10: diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index f91096b0f72..1ab0fc1ab58 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -956,13 +956,15 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { } // End DecoderNamespace = "SDWA10" //===------------------------------ VOP2be ------------------------------===// - multiclass VOP2be_Real_gfx10 op, string opName, string asmName> { + multiclass VOP2be_Real_e32_gfx10 op, string opName, string asmName> { def _e32_gfx10 : VOP2_Real(opName#"_e32"), SIEncodingFamily.GFX10>, VOP2e(opName#"_e32").Pfl> { VOP2_Pseudo Ps = !cast(opName#"_e32"); let AsmString = asmName # !subst(", vcc", "", Ps.AsmOperands); } + } + multiclass VOP2be_Real_e64_gfx10 op, string opName, string asmName> { def _e64_gfx10 : VOP3_Real(opName#"_e64"), SIEncodingFamily.GFX10>, VOP3be_gfx10<{0, 1, 0, 0, op{5-0}}, @@ -970,6 +972,8 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { VOP3_Pseudo Ps = !cast(opName#"_e64"); let AsmString = asmName # Ps.AsmOperands; } + } + multiclass VOP2be_Real_sdwa_gfx10 op, string opName, string asmName> { foreach _ = BoolToList(opName#"_e32").Pfl.HasExtSDWA9>.ret in def _sdwa_gfx10 : VOP_SDWA10_Real(opName#"_sdwa")>, @@ -978,6 +982,28 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { let AsmString = asmName # !subst(", vcc", "", Ps.AsmOperands); let DecoderNamespace = "SDWA10"; } + foreach _ = BoolToList(opName#"_e32").Pfl.HasExtSDWA9>.ret in + def _sdwa_w32_gfx10 : + Base_VOP_SDWA10_Real(opName#"_sdwa")>, + VOP2_SDWA9Ae(opName#"_sdwa").Pfl> { + VOP2_SDWA_Pseudo Ps = !cast(opName#"_sdwa"); + let AsmString = asmName # !subst("vcc", "vcc_lo", Ps.AsmOperands); + let isAsmParserOnly = 1; + let DecoderNamespace = "SDWA10"; + let WaveSizePredicate = isWave32; + } + foreach _ = BoolToList(opName#"_e32").Pfl.HasExtSDWA9>.ret in + def _sdwa_w64_gfx10 : + Base_VOP_SDWA10_Real(opName#"_sdwa")>, + VOP2_SDWA9Ae(opName#"_sdwa").Pfl> { + VOP2_SDWA_Pseudo Ps = !cast(opName#"_sdwa"); + let AsmString = asmName # Ps.AsmOperands; + let isAsmParserOnly = 1; + let DecoderNamespace = "SDWA10"; + let WaveSizePredicate = isWave64; + } + } + multiclass VOP2be_Real_dpp_gfx10 op, string opName, string asmName> { foreach _ = BoolToList(opName#"_e32").Pfl.HasExtDPP>.ret in def _dpp_gfx10 : VOP2_DPP16(opName#"_dpp"), asmName> { @@ -986,60 +1012,46 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { let DecoderNamespace = "SDWA10"; } foreach _ = BoolToList(opName#"_e32").Pfl.HasExtDPP>.ret in + def _dpp_w32_gfx10 : + Base_VOP2_DPP16(opName#"_dpp"), asmName> { + string AsmDPP = !cast(opName#"_e32").Pfl.AsmDPP16; + let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP); + let isAsmParserOnly = 1; + let WaveSizePredicate = isWave32; + } + foreach _ = BoolToList(opName#"_e32").Pfl.HasExtDPP>.ret in + def _dpp_w64_gfx10 : + Base_VOP2_DPP16(opName#"_dpp"), asmName> { + string AsmDPP = !cast(opName#"_e32").Pfl.AsmDPP16; + let AsmString = asmName # AsmDPP; + let isAsmParserOnly = 1; + let WaveSizePredicate = isWave64; + } + } + multiclass VOP2be_Real_dpp8_gfx10 op, string opName, string asmName> { + foreach _ = BoolToList(opName#"_e32").Pfl.HasExtDPP>.ret in def _dpp8_gfx10 : VOP2_DPP8(opName#"_e32"), asmName> { string AsmDPP8 = !cast(opName#"_e32").Pfl.AsmDPP8; let AsmString = asmName # !subst(", vcc", "", AsmDPP8); let DecoderNamespace = "DPP8"; } - - let WaveSizePredicate = isWave32 in { - foreach _ = BoolToList(opName#"_e32").Pfl.HasExtSDWA9>.ret in - def _sdwa_w32_gfx10 : - Base_VOP_SDWA10_Real(opName#"_sdwa")>, - VOP2_SDWA9Ae(opName#"_sdwa").Pfl> { - VOP2_SDWA_Pseudo Ps = !cast(opName#"_sdwa"); - let AsmString = asmName # !subst("vcc", "vcc_lo", Ps.AsmOperands); - let isAsmParserOnly = 1; - let DecoderNamespace = "SDWA10"; - } - def _dpp_w32_gfx10 : - Base_VOP2_DPP16(opName#"_dpp"), asmName> { - string AsmDPP = !cast(opName#"_e32").Pfl.AsmDPP16; - let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP); - let isAsmParserOnly = 1; - } - def _dpp8_w32_gfx10 : - VOP2_DPP8(opName#"_e32"), asmName> { - string AsmDPP8 = !cast(opName#"_e32").Pfl.AsmDPP8; - let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP8); - let isAsmParserOnly = 1; - } - } // End WaveSizePredicate = isWave32 - - let WaveSizePredicate = isWave64 in { - foreach _ = BoolToList(opName#"_e32").Pfl.HasExtSDWA9>.ret in - def _sdwa_w64_gfx10 : - Base_VOP_SDWA10_Real(opName#"_sdwa")>, - VOP2_SDWA9Ae(opName#"_sdwa").Pfl> { - VOP2_SDWA_Pseudo Ps = !cast(opName#"_sdwa"); - let AsmString = asmName # Ps.AsmOperands; - let isAsmParserOnly = 1; - let DecoderNamespace = "SDWA10"; - } - def _dpp_w64_gfx10 : - Base_VOP2_DPP16(opName#"_dpp"), asmName> { - string AsmDPP = !cast(opName#"_e32").Pfl.AsmDPP16; - let AsmString = asmName # AsmDPP; - let isAsmParserOnly = 1; - } - def _dpp8_w64_gfx10 : - VOP2_DPP8(opName#"_e32"), asmName> { - string AsmDPP8 = !cast(opName#"_e32").Pfl.AsmDPP8; - let AsmString = asmName # AsmDPP8; - let isAsmParserOnly = 1; - } - } // End WaveSizePredicate = isWave64 + foreach _ = BoolToList(opName#"_e32").Pfl.HasExtDPP>.ret in + def _dpp8_w32_gfx10 : + VOP2_DPP8(opName#"_e32"), asmName> { + string AsmDPP8 = !cast(opName#"_e32").Pfl.AsmDPP8; + let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP8); + let isAsmParserOnly = 1; + let WaveSizePredicate = isWave32; + } + foreach _ = BoolToList(opName#"_e32").Pfl.HasExtDPP>.ret in + def _dpp8_w64_gfx10 : + VOP2_DPP8(opName#"_e32"), asmName> { + string AsmDPP8 = !cast(opName#"_e32").Pfl.AsmDPP8; + let AsmString = asmName # AsmDPP8; + let isAsmParserOnly = 1; + let WaveSizePredicate = isWave64; + } } //===----------------------------- VOP3Only -----------------------------===// @@ -1060,8 +1072,19 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { } } // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" -multiclass Base_VOP2_Real_gfx10 op> : - VOP2_Real_e32_gfx10, VOP2_Real_e64_gfx10; +multiclass VOP2be_Real_gfx10 op, string opName, string asmName> : + VOP2be_Real_e32_gfx10, + VOP2be_Real_e64_gfx10, + VOP2be_Real_sdwa_gfx10, + VOP2be_Real_dpp_gfx10, + VOP2be_Real_dpp8_gfx10; + +multiclass VOP2e_Real_gfx10 op, string opName, string asmName> : + VOP2_Real_e32_gfx10, + VOP2_Real_e64_gfx10, + VOP2be_Real_sdwa_gfx10, + VOP2be_Real_dpp_gfx10, + VOP2be_Real_dpp8_gfx10; multiclass VOP2_Real_gfx10 op> : VOP2_Real_e32_gfx10, VOP2_Real_e64_gfx10, @@ -1075,7 +1098,6 @@ multiclass VOP2_Real_gfx10_with_name op, string opName, VOP2_Real_dpp_gfx10_with_name, VOP2_Real_dpp8_gfx10_with_name; -defm V_CNDMASK_B32 : Base_VOP2_Real_gfx10<0x001>; defm V_XNOR_B32 : VOP2_Real_gfx10<0x01e>; defm V_FMAC_F32 : VOP2_Real_gfx10<0x02b>; defm V_FMAMK_F32 : VOP2Only_Real_MADK_gfx10<0x02c>; @@ -1108,6 +1130,9 @@ defm V_SUB_CO_CI_U32 : defm V_SUBREV_CO_CI_U32 : VOP2be_Real_gfx10<0x02a, "V_SUBBREV_U32", "v_subrev_co_ci_u32">; +defm V_CNDMASK_B32 : + VOP2e_Real_gfx10<0x001, "V_CNDMASK_B32", "v_cndmask_b32">; + // VOP3 only. defm V_BFM_B32 : VOP3Only_Real_gfx10<0x363>; defm V_BCNT_U32_B32 : VOP3Only_Real_gfx10<0x364>; -- cgit v1.2.3