diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-09-10 17:17:05 +0000 | 
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-09-10 17:17:05 +0000 | 
| commit | 7df5b3fd26243a80d97382fdc09ce0374ab98d87 (patch) | |
| tree | 5808271281dd53f87ee90033e5cc7e6b7913c2cf | |
| parent | 37d1bda4f6b59c38c35334a86fc8430343db7925 (diff) | |
| download | bcm5719-llvm-7df5b3fd26243a80d97382fdc09ce0374ab98d87.tar.gz bcm5719-llvm-7df5b3fd26243a80d97382fdc09ce0374ab98d87.zip  | |
AMDGPU/GlobalISel: Select cvt pk intrinsics
llvm-svn: 371539
7 files changed, 347 insertions, 40 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td index a1079096928..944f782db24 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -195,15 +195,6 @@ multiclass GISelVop2IntrPat <  def : GISelSop2Pat <or, S_OR_B32, i32>;  def : GISelVop2Pat <or, V_OR_B32_e32, i32>; -// FIXME: We can't re-use SelectionDAG patterns here because they match -// against a custom SDNode and we would need to create a generic machine -// instruction that is equivalent to the custom SDNode.  This would also require -// us to custom legalize the intrinsic to the new generic machine instruction, -// but I can't get custom legalizing of intrinsic to work and I'm not sure if -// this is even supported yet. -def : GISelVop3Pat2ModsPat < -  int_amdgcn_cvt_pkrtz, V_CVT_PKRTZ_F16_F32_e64, v2f16, f32>; -  // Since GlobalISel is more flexible then SelectionDAG, I think we can get  // away with adding patterns for integer types and not legalizing all  // loads and stores to vector types.  This should help simplify the load/store diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td index 9b5976318e4..2a73738c141 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -132,11 +132,11 @@ def AMDGPUrsq_clamp_impl : SDNode<"AMDGPUISD::RSQ_CLAMP", SDTFPUnaryOp>;  def AMDGPUldexp_impl : SDNode<"AMDGPUISD::LDEXP", AMDGPULdExpOp>; -def AMDGPUpkrtz_f16_f32 : SDNode<"AMDGPUISD::CVT_PKRTZ_F16_F32", AMDGPUFPPackOp>; -def AMDGPUpknorm_i16_f32 : SDNode<"AMDGPUISD::CVT_PKNORM_I16_F32", AMDGPUFPPackOp>; -def AMDGPUpknorm_u16_f32 : SDNode<"AMDGPUISD::CVT_PKNORM_U16_F32", AMDGPUFPPackOp>; -def AMDGPUpk_i16_i32 : SDNode<"AMDGPUISD::CVT_PK_I16_I32", AMDGPUIntPackOp>; -def AMDGPUpk_u16_u32 : SDNode<"AMDGPUISD::CVT_PK_U16_U32", AMDGPUIntPackOp>; +def AMDGPUpkrtz_f16_f32_impl : SDNode<"AMDGPUISD::CVT_PKRTZ_F16_F32", AMDGPUFPPackOp>; +def AMDGPUpknorm_i16_f32_impl : SDNode<"AMDGPUISD::CVT_PKNORM_I16_F32", AMDGPUFPPackOp>; +def AMDGPUpknorm_u16_f32_impl : SDNode<"AMDGPUISD::CVT_PKNORM_U16_F32", AMDGPUFPPackOp>; +def AMDGPUpk_i16_i32_impl : SDNode<"AMDGPUISD::CVT_PK_I16_I32", AMDGPUIntPackOp>; +def AMDGPUpk_u16_u32_impl : SDNode<"AMDGPUISD::CVT_PK_U16_U32", AMDGPUIntPackOp>;  def AMDGPUfp_to_f16 : SDNode<"AMDGPUISD::FP_TO_FP16" , SDTFPToIntOp>;  def AMDGPUfp16_zext : SDNode<"AMDGPUISD::FP16_ZEXT" , SDTFPToIntOp>; @@ -449,3 +449,23 @@ def AMDGPUfmed3 : PatFrags<(ops node:$src0, node:$src1, node:$src2),  def AMDGPUffbh_i32 : PatFrags<(ops node:$src),    [(int_amdgcn_sffbh node:$src),     (AMDGPUffbh_i32_impl node:$src)]>; + +def AMDGPUpkrtz_f16_f32 : PatFrags<(ops node:$src0, node:$src1), +  [(int_amdgcn_cvt_pkrtz node:$src0, node:$src1), +  (AMDGPUpkrtz_f16_f32_impl node:$src0, node:$src1)]>; + +def AMDGPUpknorm_i16_f32 : PatFrags<(ops node:$src0, node:$src1), +  [(int_amdgcn_cvt_pknorm_i16 node:$src0, node:$src1), +  (AMDGPUpknorm_i16_f32_impl node:$src0, node:$src1)]>; + +def AMDGPUpknorm_u16_f32 : PatFrags<(ops node:$src0, node:$src1), +  [(int_amdgcn_cvt_pknorm_u16 node:$src0, node:$src1), +  (AMDGPUpknorm_u16_f32_impl node:$src0, node:$src1)]>; + +def AMDGPUpk_i16_i32 : PatFrags<(ops node:$src0, node:$src1), +  [(int_amdgcn_cvt_pk_i16 node:$src0, node:$src1), +  (AMDGPUpk_i16_i32_impl node:$src0, node:$src1)]>; + +def AMDGPUpk_u16_u32 : PatFrags<(ops node:$src0, node:$src1), +  [(int_amdgcn_cvt_pk_u16 node:$src0, node:$src1), +  (AMDGPUpk_u16_u32_impl node:$src0, node:$src1)]>; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pk.i16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pk.i16.mir new file mode 100644 index 00000000000..01aded24340 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pk.i16.mir @@ -0,0 +1,67 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o -  | FileCheck -check-prefix=GCN %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o -  | FileCheck -check-prefix=GCN %s + +--- +name: cvt_pk_i16_vsv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | +  bb.0: +    liveins: $sgpr0, $vgpr0 +    ; GCN-LABEL: name: cvt_pk_i16_vsv +    ; GCN: liveins: $sgpr0, $vgpr0 +    ; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 +    ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +    ; GCN: [[V_CVT_PK_I16_I32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PK_I16_I32_e64 [[COPY]], [[COPY1]], implicit $exec +    ; GCN: S_ENDPGM 0, implicit [[V_CVT_PK_I16_I32_e64_]] +    %0:sgpr(s32) = COPY $sgpr0 +    %1:vgpr(s32) = COPY $vgpr0 +    %2:vgpr(<2 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pk.i16), %0, %1 +    S_ENDPGM 0, implicit %2 +... + +--- +name: cvt_pk_i16_vvs +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | +  bb.0: +    liveins: $sgpr0, $vgpr0 + +    ; GCN-LABEL: name: cvt_pk_i16_vvs +    ; GCN: liveins: $sgpr0, $vgpr0 +    ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +    ; GCN: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 +    ; GCN: [[V_CVT_PK_I16_I32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PK_I16_I32_e64 [[COPY]], [[COPY1]], implicit $exec +    ; GCN: S_ENDPGM 0, implicit [[V_CVT_PK_I16_I32_e64_]] +    %0:vgpr(s32) = COPY $vgpr0 +    %1:sgpr(s32) = COPY $sgpr0 +    %2:vgpr(<2 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pk.i16), %0, %1 +    S_ENDPGM 0, implicit %2 +... + +--- +name: cvt_pk_i16_vvv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | +  bb.0: +    liveins: $vgpr0, $vgpr1 +    ; GCN-LABEL: name: cvt_pk_i16_vvv +    ; GCN: liveins: $vgpr0, $vgpr1 +    ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +    ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +    ; GCN: [[V_CVT_PK_I16_I32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PK_I16_I32_e64 [[COPY]], [[COPY1]], implicit $exec +    ; GCN: S_ENDPGM 0, implicit [[V_CVT_PK_I16_I32_e64_]] +    %0:vgpr(s32) = COPY $vgpr0 +    %1:vgpr(s32) = COPY $vgpr1 +    %2:vgpr(<2 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pk.i16), %0, %1 +    S_ENDPGM 0, implicit %2 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pk.u16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pk.u16.mir new file mode 100644 index 00000000000..49dcb86bab8 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pk.u16.mir @@ -0,0 +1,67 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o -  | FileCheck -check-prefix=GCN %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o -  | FileCheck -check-prefix=GCN %s + +--- +name: cvt_pk_u16_vsv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | +  bb.0: +    liveins: $sgpr0, $vgpr0 +    ; GCN-LABEL: name: cvt_pk_u16_vsv +    ; GCN: liveins: $sgpr0, $vgpr0 +    ; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 +    ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +    ; GCN: [[V_CVT_PK_U16_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PK_U16_U32_e64 [[COPY]], [[COPY1]], implicit $exec +    ; GCN: S_ENDPGM 0, implicit [[V_CVT_PK_U16_U32_e64_]] +    %0:sgpr(s32) = COPY $sgpr0 +    %1:vgpr(s32) = COPY $vgpr0 +    %2:vgpr(<2 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pk.u16), %0, %1 +    S_ENDPGM 0, implicit %2 +... + +--- +name: cvt_pk_u16_vvs +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | +  bb.0: +    liveins: $sgpr0, $vgpr0 + +    ; GCN-LABEL: name: cvt_pk_u16_vvs +    ; GCN: liveins: $sgpr0, $vgpr0 +    ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +    ; GCN: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 +    ; GCN: [[V_CVT_PK_U16_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PK_U16_U32_e64 [[COPY]], [[COPY1]], implicit $exec +    ; GCN: S_ENDPGM 0, implicit [[V_CVT_PK_U16_U32_e64_]] +    %0:vgpr(s32) = COPY $vgpr0 +    %1:sgpr(s32) = COPY $sgpr0 +    %2:vgpr(<2 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pk.u16), %0, %1 +    S_ENDPGM 0, implicit %2 +... + +--- +name: cvt_pk_u16_vvv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | +  bb.0: +    liveins: $vgpr0, $vgpr1 +    ; GCN-LABEL: name: cvt_pk_u16_vvv +    ; GCN: liveins: $vgpr0, $vgpr1 +    ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +    ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +    ; GCN: [[V_CVT_PK_U16_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PK_U16_U32_e64 [[COPY]], [[COPY1]], implicit $exec +    ; GCN: S_ENDPGM 0, implicit [[V_CVT_PK_U16_U32_e64_]] +    %0:vgpr(s32) = COPY $vgpr0 +    %1:vgpr(s32) = COPY $vgpr1 +    %2:vgpr(<2 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pk.u16), %0, %1 +    S_ENDPGM 0, implicit %2 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pknorm.i16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pknorm.i16.mir new file mode 100644 index 00000000000..18266bbc886 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pknorm.i16.mir @@ -0,0 +1,67 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o -  | FileCheck -check-prefix=GCN %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o -  | FileCheck -check-prefix=GCN %s + +--- +name: cvt_pknorm_i16_vsv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | +  bb.0: +    liveins: $sgpr0, $vgpr0 +    ; GCN-LABEL: name: cvt_pknorm_i16_vsv +    ; GCN: liveins: $sgpr0, $vgpr0 +    ; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 +    ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +    ; GCN: [[V_CVT_PKNORM_I16_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_I16_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec +    ; GCN: S_ENDPGM 0, implicit [[V_CVT_PKNORM_I16_F32_e64_]] +    %0:sgpr(s32) = COPY $sgpr0 +    %1:vgpr(s32) = COPY $vgpr0 +    %2:vgpr(<2 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pknorm.i16), %0, %1 +    S_ENDPGM 0, implicit %2 +... + +--- +name: cvt_pknorm_i16_vvs +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | +  bb.0: +    liveins: $sgpr0, $vgpr0 + +    ; GCN-LABEL: name: cvt_pknorm_i16_vvs +    ; GCN: liveins: $sgpr0, $vgpr0 +    ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +    ; GCN: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 +    ; GCN: [[V_CVT_PKNORM_I16_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_I16_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec +    ; GCN: S_ENDPGM 0, implicit [[V_CVT_PKNORM_I16_F32_e64_]] +    %0:vgpr(s32) = COPY $vgpr0 +    %1:sgpr(s32) = COPY $sgpr0 +    %2:vgpr(<2 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pknorm.i16), %0, %1 +    S_ENDPGM 0, implicit %2 +... + +--- +name: cvt_pknorm_i16_vvv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | +  bb.0: +    liveins: $vgpr0, $vgpr1 +    ; GCN-LABEL: name: cvt_pknorm_i16_vvv +    ; GCN: liveins: $vgpr0, $vgpr1 +    ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +    ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +    ; GCN: [[V_CVT_PKNORM_I16_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_I16_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec +    ; GCN: S_ENDPGM 0, implicit [[V_CVT_PKNORM_I16_F32_e64_]] +    %0:vgpr(s32) = COPY $vgpr0 +    %1:vgpr(s32) = COPY $vgpr1 +    %2:vgpr(<2 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pknorm.i16), %0, %1 +    S_ENDPGM 0, implicit %2 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pknorm.u16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pknorm.u16.mir new file mode 100644 index 00000000000..e105776f7ef --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pknorm.u16.mir @@ -0,0 +1,67 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o -  | FileCheck -check-prefix=GCN %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o -  | FileCheck -check-prefix=GCN %s + +--- +name: cvt_pknorm_u16_vsv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | +  bb.0: +    liveins: $sgpr0, $vgpr0 +    ; GCN-LABEL: name: cvt_pknorm_u16_vsv +    ; GCN: liveins: $sgpr0, $vgpr0 +    ; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 +    ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +    ; GCN: [[V_CVT_PKNORM_U16_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_U16_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec +    ; GCN: S_ENDPGM 0, implicit [[V_CVT_PKNORM_U16_F32_e64_]] +    %0:sgpr(s32) = COPY $sgpr0 +    %1:vgpr(s32) = COPY $vgpr0 +    %2:vgpr(<2 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pknorm.u16), %0, %1 +    S_ENDPGM 0, implicit %2 +... + +--- +name: cvt_pknorm_u16_vvs +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | +  bb.0: +    liveins: $sgpr0, $vgpr0 + +    ; GCN-LABEL: name: cvt_pknorm_u16_vvs +    ; GCN: liveins: $sgpr0, $vgpr0 +    ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +    ; GCN: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 +    ; GCN: [[V_CVT_PKNORM_U16_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_U16_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec +    ; GCN: S_ENDPGM 0, implicit [[V_CVT_PKNORM_U16_F32_e64_]] +    %0:vgpr(s32) = COPY $vgpr0 +    %1:sgpr(s32) = COPY $sgpr0 +    %2:vgpr(<2 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pknorm.u16), %0, %1 +    S_ENDPGM 0, implicit %2 +... + +--- +name: cvt_pknorm_u16_vvv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | +  bb.0: +    liveins: $vgpr0, $vgpr1 +    ; GCN-LABEL: name: cvt_pknorm_u16_vvv +    ; GCN: liveins: $vgpr0, $vgpr1 +    ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +    ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +    ; GCN: [[V_CVT_PKNORM_U16_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_U16_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec +    ; GCN: S_ENDPGM 0, implicit [[V_CVT_PKNORM_U16_F32_e64_]] +    %0:vgpr(s32) = COPY $vgpr0 +    %1:vgpr(s32) = COPY $vgpr1 +    %2:vgpr(<2 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pknorm.u16), %0, %1 +    S_ENDPGM 0, implicit %2 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pkrtz.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pkrtz.mir index d0e2ea2ba08..a2a6c1623d7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pkrtz.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pkrtz.mir @@ -1,39 +1,67 @@ -# RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN -# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o -  | FileCheck -check-prefix=GCN %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o -  | FileCheck -check-prefix=GCN %s  --- - -name:            cvt_pkrtz -legalized:       true +name: cvt_pkrtz_vsv +legalized: true  regBankSelected: true +tracksRegLiveness: true -# GCN-LABEL: name: cvt_pkrtz  body: |    bb.0: -    liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr3_vgpr4 - -    ; GCN: [[SGPR0:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 +    liveins: $sgpr0, $vgpr0 +    ; GCN-LABEL: name: cvt_pkrtz_vsv +    ; GCN: liveins: $sgpr0, $vgpr0 +    ; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 +    ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +    ; GCN: [[V_CVT_PKRTZ_F16_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PKRTZ_F16_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec +    ; GCN: S_ENDPGM 0, implicit [[V_CVT_PKRTZ_F16_F32_e64_]]      %0:sgpr(s32) = COPY $sgpr0 -    ; GCN: [[VGPR0:%[0-9]+]]:vgpr_32 = COPY $vgpr0      %1:vgpr(s32) = COPY $vgpr0 -    ; GCN: [[VGPR1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 -    %2:vgpr(s32) = COPY $vgpr1 -    %3:vgpr(p1) = COPY $vgpr3_vgpr4 +    %2:vgpr(<2 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), %0, %1 +    S_ENDPGM 0, implicit %2 +... -    ; cvt_pkrtz vs -    ; GCN: V_CVT_PKRTZ_F16_F32_e64 0, [[VGPR0]], 0, [[SGPR0]] -    %4:vgpr(<2 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), %1, %0 +--- +name: cvt_pkrtz_vvs +legalized: true +regBankSelected: true +tracksRegLiveness: true -    ; cvt_pkrtz sv -    ; GCN: V_CVT_PKRTZ_F16_F32_e64 0, [[SGPR0]], 0, [[VGPR0]] -    %5:vgpr(<2 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), %0, %1 +body: | +  bb.0: +    liveins: $sgpr0, $vgpr0 -    ; cvt_pkrtz vv -    ; GCN: V_CVT_PKRTZ_F16_F32_e64 0, [[VGPR0]], 0, [[VGPR1]] -    %6:vgpr(<2 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), %1, %2 +    ; GCN-LABEL: name: cvt_pkrtz_vvs +    ; GCN: liveins: $sgpr0, $vgpr0 +    ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +    ; GCN: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 +    ; GCN: [[V_CVT_PKRTZ_F16_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PKRTZ_F16_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec +    ; GCN: S_ENDPGM 0, implicit [[V_CVT_PKRTZ_F16_F32_e64_]] +    %0:vgpr(s32) = COPY $vgpr0 +    %1:sgpr(s32) = COPY $sgpr0 +    %2:vgpr(<2 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), %0, %1 +    S_ENDPGM 0, implicit %2 +... + +--- +name: cvt_pkrtz_vvv +legalized: true +regBankSelected: true +tracksRegLiveness: true -    %7:vgpr(s32) = G_BITCAST %4 -    %8:vgpr(s32) = G_BITCAST %5 -    %9:vgpr(s32) = G_BITCAST %6 -    S_ENDPGM 0, implicit %7, implicit %8, implicit %9 +body: | +  bb.0: +    liveins: $vgpr0, $vgpr1 +    ; GCN-LABEL: name: cvt_pkrtz_vvv +    ; GCN: liveins: $vgpr0, $vgpr1 +    ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +    ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +    ; GCN: [[V_CVT_PKRTZ_F16_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PKRTZ_F16_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec +    ; GCN: S_ENDPGM 0, implicit [[V_CVT_PKRTZ_F16_F32_e64_]] +    %0:vgpr(s32) = COPY $vgpr0 +    %1:vgpr(s32) = COPY $vgpr1 +    %2:vgpr(<2 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.cvt.pkrtz), %0, %1 +    S_ENDPGM 0, implicit %2  ...  | 

