diff options
4 files changed, 62 insertions, 60 deletions
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index 0dda9687a15..e5f215fbcd7 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -777,6 +777,23 @@ class ZExt_i16_i1_Pat <SDNode ext> : GCNPat < $src) >; +foreach vt = [i16, v2i16] in { +def : GCNPat < + (and vt:$src0, vt:$src1), + (V_AND_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) +>; + +def : GCNPat < + (or vt:$src0, vt:$src1), + (V_OR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) +>; + +def : GCNPat < + (xor vt:$src0, vt:$src1), + (V_XOR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) +>; +} + let Predicates = [Has16BitInsts] in { let Predicates = [Has16BitInsts, isGFX7GFX8GFX9] in { @@ -799,21 +816,6 @@ defm : Arithmetic_i16_Pats<umin, V_MIN_U16_e64, 1>; defm : Arithmetic_i16_Pats<umax, V_MAX_U16_e64, 1>; } -def : GCNPat < - (and i16:$src0, i16:$src1), - (V_AND_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) ->; - -def : GCNPat < - (or i16:$src0, i16:$src1), - (V_OR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) ->; - -def : GCNPat < - (xor i16:$src0, i16:$src1), - (V_XOR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) ->; - let Predicates = [Has16BitInsts, isGFX7GFX8GFX9] in { defm : Bits_OpsRev_i16_Pats<shl, V_LSHLREV_B16_e64>; defm : Bits_OpsRev_i16_Pats<srl, V_LSHRREV_B16_e64>; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir index 361486c1051..e449830e8fe 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=WAVE64 %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=WAVE64 %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=WAVE64 %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr="+wavefrontsize32" -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE32 %s --- @@ -148,12 +149,10 @@ body: | liveins: $vgpr0, $vgpr1 ; WAVE64-LABEL: name: and_s16_vgpr_vgpr_vgpr ; WAVE64: liveins: $vgpr0, $vgpr1 - ; WAVE64: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; WAVE64: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; WAVE64: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; WAVE64: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) - ; WAVE64: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[TRUNC]], [[TRUNC1]] - ; WAVE64: S_ENDPGM 0, implicit [[AND]](s16) + ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; WAVE64: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE64: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] ; WAVE32-LABEL: name: and_s16_vgpr_vgpr_vgpr ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32: $vcc_hi = IMPLICIT_DEF @@ -355,16 +354,17 @@ body: | liveins: $vgpr0, $vgpr1 ; WAVE64-LABEL: name: and_v2s16_vgpr_vgpr_vgpr ; WAVE64: liveins: $vgpr0, $vgpr1 - ; WAVE64: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; WAVE64: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1 - ; WAVE64: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[COPY]], [[COPY1]] - ; WAVE64: S_ENDPGM 0, implicit [[AND]](<2 x s16>) + ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; WAVE64: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE64: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] ; WAVE32-LABEL: name: and_v2s16_vgpr_vgpr_vgpr ; WAVE32: liveins: $vgpr0, $vgpr1 - ; WAVE32: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; WAVE32: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1 - ; WAVE32: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[COPY]], [[COPY1]] - ; WAVE32: S_ENDPGM 0, implicit [[AND]](<2 x s16>) + ; WAVE32: $vcc_hi = IMPLICIT_DEF + ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; WAVE32: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE32: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 %2:vgpr(<2 x s16>) = G_AND %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir index d4df9f9a403..1423b0a3471 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=WAVE64 %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=WAVE64 %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=WAVE64 %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr="+wavefrontsize32" -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE32 %s --- @@ -148,12 +149,10 @@ body: | liveins: $vgpr0, $vgpr1 ; WAVE64-LABEL: name: or_s16_vgpr_vgpr_vgpr ; WAVE64: liveins: $vgpr0, $vgpr1 - ; WAVE64: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; WAVE64: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; WAVE64: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; WAVE64: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) - ; WAVE64: [[OR:%[0-9]+]]:vgpr(s16) = G_OR [[TRUNC]], [[TRUNC1]] - ; WAVE64: S_ENDPGM 0, implicit [[OR]](s16) + ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; WAVE64: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE64: S_ENDPGM 0, implicit [[V_OR_B32_e64_]] ; WAVE32-LABEL: name: or_s16_vgpr_vgpr_vgpr ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32: $vcc_hi = IMPLICIT_DEF @@ -355,16 +354,17 @@ body: | liveins: $vgpr0, $vgpr1 ; WAVE64-LABEL: name: or_v2s16_vgpr_vgpr_vgpr ; WAVE64: liveins: $vgpr0, $vgpr1 - ; WAVE64: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; WAVE64: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1 - ; WAVE64: [[OR:%[0-9]+]]:vgpr(<2 x s16>) = G_OR [[COPY]], [[COPY1]] - ; WAVE64: S_ENDPGM 0, implicit [[OR]](<2 x s16>) + ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; WAVE64: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE64: S_ENDPGM 0, implicit [[V_OR_B32_e64_]] ; WAVE32-LABEL: name: or_v2s16_vgpr_vgpr_vgpr ; WAVE32: liveins: $vgpr0, $vgpr1 - ; WAVE32: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; WAVE32: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1 - ; WAVE32: [[OR:%[0-9]+]]:vgpr(<2 x s16>) = G_OR [[COPY]], [[COPY1]] - ; WAVE32: S_ENDPGM 0, implicit [[OR]](<2 x s16>) + ; WAVE32: $vcc_hi = IMPLICIT_DEF + ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; WAVE32: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE32: S_ENDPGM 0, implicit [[V_OR_B32_e64_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 %2:vgpr(<2 x s16>) = G_OR %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir index d6f38009f57..f915b3b0a61 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=WAVE64 %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=WAVE64 %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=WAVE64 %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr="+wavefrontsize32" -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE32 %s --- @@ -148,12 +149,10 @@ body: | liveins: $vgpr0, $vgpr1 ; WAVE64-LABEL: name: xor_s16_vgpr_vgpr_vgpr ; WAVE64: liveins: $vgpr0, $vgpr1 - ; WAVE64: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; WAVE64: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; WAVE64: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; WAVE64: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) - ; WAVE64: [[XOR:%[0-9]+]]:vgpr(s16) = G_XOR [[TRUNC]], [[TRUNC1]] - ; WAVE64: S_ENDPGM 0, implicit [[XOR]](s16) + ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; WAVE64: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE64: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] ; WAVE32-LABEL: name: xor_s16_vgpr_vgpr_vgpr ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32: $vcc_hi = IMPLICIT_DEF @@ -355,16 +354,17 @@ body: | liveins: $vgpr0, $vgpr1 ; WAVE64-LABEL: name: xor_v2s16_vgpr_vgpr_vgpr ; WAVE64: liveins: $vgpr0, $vgpr1 - ; WAVE64: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; WAVE64: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1 - ; WAVE64: [[XOR:%[0-9]+]]:vgpr(<2 x s16>) = G_XOR [[COPY]], [[COPY1]] - ; WAVE64: S_ENDPGM 0, implicit [[XOR]](<2 x s16>) + ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; WAVE64: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE64: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] ; WAVE32-LABEL: name: xor_v2s16_vgpr_vgpr_vgpr ; WAVE32: liveins: $vgpr0, $vgpr1 - ; WAVE32: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; WAVE32: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1 - ; WAVE32: [[XOR:%[0-9]+]]:vgpr(<2 x s16>) = G_XOR [[COPY]], [[COPY1]] - ; WAVE32: S_ENDPGM 0, implicit [[XOR]](<2 x s16>) + ; WAVE32: $vcc_hi = IMPLICIT_DEF + ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; WAVE32: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[COPY]], [[COPY1]], implicit $exec + ; WAVE32: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 %2:vgpr(<2 x s16>) = G_XOR %0, %1 |