diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-09-08 18:11:51 -0400 |
|---|---|---|
| committer | Matt Arsenault <arsenm2@gmail.com> | 2019-12-30 14:24:25 -0500 |
| commit | 7fa0bfe7d580e2b96b8d7f5bd0470287857e84cc (patch) | |
| tree | a8c579bc07bda84e7971460bc35be4ca7ecc3ffa | |
| parent | 94d08feaeff3591a36ed548ba7c732ddedd6f983 (diff) | |
| download | bcm5719-llvm-7fa0bfe7d580e2b96b8d7f5bd0470287857e84cc.tar.gz bcm5719-llvm-7fa0bfe7d580e2b96b8d7f5bd0470287857e84cc.zip | |
AMDGPU/GlobalISel: Select mul24 intrinsics
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td | 12 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/VOP2Instructions.td | 4 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mul.u24.mir | 65 |
3 files changed, 77 insertions, 4 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td index 13afcb4cdb9..50c451be4b8 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -290,10 +290,10 @@ def AMDGPUffbl_b32 : SDNode<"AMDGPUISD::FFBL_B32", SDTIntUnaryOp>; // Signed and unsigned 24-bit multiply. The highest 8-bits are ignore // when performing the mulitply. The result is a 32-bit value. -def AMDGPUmul_u24 : SDNode<"AMDGPUISD::MUL_U24", SDTIntBinOp, +def AMDGPUmul_u24_impl : SDNode<"AMDGPUISD::MUL_U24", SDTIntBinOp, [SDNPCommutative, SDNPAssociative] >; -def AMDGPUmul_i24 : SDNode<"AMDGPUISD::MUL_I24", SDTIntBinOp, +def AMDGPUmul_i24_impl : SDNode<"AMDGPUISD::MUL_I24", SDTIntBinOp, [SDNPCommutative, SDNPAssociative] >; @@ -465,3 +465,11 @@ def AMDGPUpk_u16_u32 : PatFrags<(ops node:$src0, node:$src1), def AMDGPUfmad_ftz : PatFrags<(ops node:$src0, node:$src1, node:$src2), [(int_amdgcn_fmad_ftz node:$src0, node:$src1, node:$src2), (AMDGPUfmad_ftz_impl node:$src0, node:$src1, node:$src2)]>; + +def AMDGPUmul_u24 : PatFrags<(ops node:$src0, node:$src1), + [(int_amdgcn_mul_u24 node:$src0, node:$src1), + (AMDGPUmul_u24_impl node:$src0, node:$src1)]>; + +def AMDGPUmul_i24 : PatFrags<(ops node:$src0, node:$src1), + [(int_amdgcn_mul_i24 node:$src0, node:$src1), + (AMDGPUmul_i24_impl node:$src0, node:$src1)]>; diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index 45928a54fd9..94e737d7641 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -467,9 +467,9 @@ defm V_SUB_F32 : VOP2Inst <"v_sub_f32", VOP_F32_F32_F32, fsub>; defm V_SUBREV_F32 : VOP2Inst <"v_subrev_f32", VOP_F32_F32_F32, null_frag, "v_sub_f32">; defm V_MUL_LEGACY_F32 : VOP2Inst <"v_mul_legacy_f32", VOP_F32_F32_F32, AMDGPUfmul_legacy>; defm V_MUL_F32 : VOP2Inst <"v_mul_f32", VOP_F32_F32_F32, fmul>; -defm V_MUL_I32_I24 : VOP2Inst <"v_mul_i32_i24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmul_i24>; +defm V_MUL_I32_I24 : VOP2Inst <"v_mul_i32_i24", VOP_I32_I32_I32, AMDGPUmul_i24>; defm V_MUL_HI_I32_I24 : VOP2Inst <"v_mul_hi_i32_i24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmulhi_i24>; -defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmul_u24>; +defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_I32_I32_I32, AMDGPUmul_u24>; defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmulhi_u24>; defm V_MIN_F32 : VOP2Inst <"v_min_f32", VOP_F32_F32_F32, fminnum_like>; defm V_MAX_F32 : VOP2Inst <"v_max_f32", VOP_F32_F32_F32, fmaxnum_like>; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mul.u24.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mul.u24.mir new file mode 100644 index 00000000000..b5b9368ed4e --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mul.u24.mir @@ -0,0 +1,65 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s + +--- +name: mul_u24_vsv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; GCN-LABEL: name: mul_u24_vsv + ; GCN: liveins: $sgpr0, $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[V_MUL_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_U32_U24_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_MUL_U32_U24_e64_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:vgpr(s32) = COPY $vgpr0 + %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mul.u24), %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: mul_u24_vvs +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; GCN-LABEL: name: mul_u24_vvs + ; GCN: liveins: $sgpr0, $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN: [[V_MUL_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_U32_U24_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_MUL_U32_U24_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:sgpr(s32) = COPY $sgpr0 + %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mul.u24), %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: mul_u24_vvv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; GCN-LABEL: name: mul_u24_vvv + ; GCN: liveins: $vgpr0, $vgpr1 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN: [[V_MUL_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_U32_U24_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_MUL_U32_U24_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mul.u24), %0, %1 + S_ENDPGM 0, implicit %2 +... |

