diff options
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fabs.mir | 247 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcanonicalize.mir | 18 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fneg.mir | 513 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir | 158 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir | 158 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir | 146 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/fneg-combines.ll | 6 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/fneg.ll | 17 |
8 files changed, 945 insertions, 318 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fabs.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fabs.mir new file mode 100644 index 00000000000..e1dd3293f4c --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fabs.mir @@ -0,0 +1,247 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s +# XUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s +# XUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -global-isel-abort=0-verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s + +--- +name: fabs_s32_ss +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0 + ; GCN-LABEL: name: fabs_s32_ss + ; GCN: liveins: $sgpr0 + ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 + ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GCN: $sgpr0 = COPY [[S_AND_B32_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = G_FABS %0 + $sgpr0 = COPY %1 +... + +--- +name: fabs_s32_vv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: fabs_s32_vv + ; GCN: liveins: $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 + ; GCN: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GCN: $vgpr0 = COPY [[V_AND_B32_e32_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = G_FABS %0 + $vgpr0 = COPY %1 +... + +--- +name: fabs_s32_vs +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0 + ; GCN-LABEL: name: fabs_s32_vs + ; GCN: liveins: $sgpr0 + ; GCN: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GCN: [[FABS:%[0-9]+]]:vgpr_32(s32) = G_FABS [[COPY]] + ; GCN: $vgpr0 = COPY [[FABS]](s32) + %0:sgpr(s32) = COPY $sgpr0 + %1:vgpr(s32) = G_FABS %0 + $vgpr0 = COPY %1 +... + +--- +name: fabs_v2s16_ss +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + ; GCN-LABEL: name: fabs_v2s16_ss + ; GCN: liveins: $sgpr0_sgpr1 + ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147450879 + ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GCN: $sgpr0 = COPY [[S_AND_B32_]] + %0:sgpr(<2 x s16>) = COPY $sgpr0 + %1:sgpr(<2 x s16>) = G_FABS %0 + $sgpr0 = COPY %1 +... + +--- +name: fabs_s16_ss +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0 + ; GCN-LABEL: name: fabs_s16_ss + ; GCN: liveins: $sgpr0 + ; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32767 + ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GCN: $sgpr0 = COPY [[S_AND_B32_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s16) = G_TRUNC %0 + %2:sgpr(s16) = G_FABS %1 + %3:sgpr(s32) = G_ANYEXT %2 + $sgpr0 = COPY %3 +... + +--- +name: fabs_s16_vv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: fabs_s16_vv + ; GCN: liveins: $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32767 + ; GCN: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GCN: $vgpr0 = COPY [[V_AND_B32_e32_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s16) = G_TRUNC %0 + %2:vgpr(s16) = G_FABS %1 + %3:vgpr(s32) = G_ANYEXT %2 + $vgpr0 = COPY %3 +... + +--- +name: fabs_s16_vs +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0 + + ; GCN-LABEL: name: fabs_s16_vs + ; GCN: liveins: $sgpr0 + ; GCN: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GCN: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GCN: [[FABS:%[0-9]+]]:vgpr_32(s16) = G_FABS [[TRUNC]] + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]](s16) + ; GCN: $vgpr0 = COPY [[COPY1]](s32) + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s16) = G_TRUNC %0 + %2:vgpr(s16) = G_FABS %1 + %3:vgpr(s32) = G_ANYEXT %2 + $vgpr0 = COPY %3 +... + +--- +name: fabs_v2s16_vv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: fabs_v2s16_vv + ; GCN: liveins: $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147450879 + ; GCN: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GCN: $vgpr0 = COPY [[V_AND_B32_e32_]] + %0:vgpr(<2 x s16>) = COPY $vgpr0 + %1:vgpr(<2 x s16>) = G_FABS %0 + $vgpr0 = COPY %1 +... + +--- +name: fabs_v2s16_vs +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0 + ; GCN-LABEL: name: fabs_v2s16_vs + ; GCN: liveins: $sgpr0 + ; GCN: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GCN: [[FABS:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FABS [[COPY]] + ; GCN: $vgpr0 = COPY [[FABS]](<2 x s16>) + %0:sgpr(<2 x s16>) = COPY $sgpr0 + %1:vgpr(<2 x s16>) = G_FABS %0 + $vgpr0 = COPY %1 +... + +--- +name: fabs_s64_ss +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + ; GCN-LABEL: name: fabs_s64_ss + ; GCN: liveins: $sgpr0_sgpr1 + ; GCN: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 + ; GCN: [[FABS:%[0-9]+]]:sreg_64_xexec(s64) = G_FABS [[COPY]] + ; GCN: $sgpr0_sgpr1 = COPY [[FABS]](s64) + %0:sgpr(s64) = COPY $sgpr0_sgpr1 + %1:sgpr(s64) = G_FABS %0 + $sgpr0_sgpr1 = COPY %1 +... + +--- +name: fabs_s64_vv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; GCN-LABEL: name: fabs_s64_vv + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GCN: [[FABS:%[0-9]+]]:vreg_64(s64) = G_FABS [[COPY]] + ; GCN: $vgpr0_vgpr1 = COPY [[FABS]](s64) + %0:vgpr(s64) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_FABS %0 + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: fabs_s64_vs +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + ; GCN-LABEL: name: fabs_s64_vs + ; GCN: liveins: $sgpr0_sgpr1 + ; GCN: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 + ; GCN: [[FABS:%[0-9]+]]:vreg_64(s64) = G_FABS [[COPY]] + ; GCN: $vgpr0_vgpr1 = COPY [[FABS]](s64) + %0:sgpr(s64) = COPY $sgpr0_sgpr1 + %1:vgpr(s64) = G_FABS %0 + $vgpr0_vgpr1 = COPY %1 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcanonicalize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcanonicalize.mir index 058e5401d35..30b6089e458 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcanonicalize.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcanonicalize.mir @@ -147,15 +147,17 @@ body: | bb.0: liveins: $vgpr0 ; GFX9-DENORM-LABEL: name: fcanonicalize_fneg_fabs_f32 - ; GFX9-DENORM: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX9-DENORM: [[FNEG:%[0-9]+]]:vgpr_32(s32) = G_FNEG [[COPY]] - ; GFX9-DENORM: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32(s32) = V_MAX_F32_e64 2, [[FNEG]](s32), 2, [[FNEG]](s32), 0, 0, implicit $exec - ; GFX9-DENORM: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]](s32) + ; GFX9-DENORM: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-DENORM: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GFX9-DENORM: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GFX9-DENORM: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 2, [[V_XOR_B32_e32_]], 2, [[V_XOR_B32_e32_]], 0, 0, implicit $exec + ; GFX9-DENORM: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] ; GFX9-FLUSH-LABEL: name: fcanonicalize_fneg_fabs_f32 - ; GFX9-FLUSH: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX9-FLUSH: [[FNEG:%[0-9]+]]:vgpr_32(s32) = G_FNEG [[COPY]] - ; GFX9-FLUSH: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32(s32) = V_MUL_F32_e64 0, 1065353216, 2, [[FNEG]](s32), 0, 0, implicit $exec - ; GFX9-FLUSH: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]](s32) + ; GFX9-FLUSH: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-FLUSH: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GFX9-FLUSH: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GFX9-FLUSH: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, 1065353216, 2, [[V_XOR_B32_e32_]], 0, 0, implicit $exec + ; GFX9-FLUSH: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FNEG %0 %2:vgpr(s32) = G_FABS %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fneg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fneg.mir new file mode 100644 index 00000000000..29e3c2ea87e --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fneg.mir @@ -0,0 +1,513 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s +# XUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s +# XUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -global-isel-abort=0-verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s + +--- +name: fneg_s32_ss +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0 + ; GCN-LABEL: name: fneg_s32_ss + ; GCN: liveins: $sgpr0 + ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GCN: [[S_XOR_B32_:%[0-9]+]]:sreg_32_xm0 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GCN: $sgpr0 = COPY [[S_XOR_B32_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = G_FNEG %0 + $sgpr0 = COPY %1 +... + +--- +name: fneg_s32_vv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: fneg_s32_vv + ; GCN: liveins: $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GCN: $vgpr0 = COPY [[V_XOR_B32_e32_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = G_FNEG %0 + $vgpr0 = COPY %1 +... + +--- +name: fneg_s32_vs +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0 + ; GCN-LABEL: name: fneg_s32_vs + ; GCN: liveins: $sgpr0 + ; GCN: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GCN: [[FNEG:%[0-9]+]]:vgpr_32(s32) = G_FNEG [[COPY]] + ; GCN: $vgpr0 = COPY [[FNEG]](s32) + %0:sgpr(s32) = COPY $sgpr0 + %1:vgpr(s32) = G_FNEG %0 + $vgpr0 = COPY %1 +... + +--- +name: fneg_s16_ss +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0 + ; GCN-LABEL: name: fneg_s16_ss + ; GCN: liveins: $sgpr0 + ; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 + ; GCN: [[S_XOR_B32_:%[0-9]+]]:sreg_32_xm0 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GCN: $sgpr0 = COPY [[S_XOR_B32_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s16) = G_TRUNC %0 + %2:sgpr(s16) = G_FNEG %1 + %3:sgpr(s32) = G_ANYEXT %2 + $sgpr0 = COPY %3 +... + +--- +name: fneg_s16_vv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: fneg_s16_vv + ; GCN: liveins: $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GCN: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GCN: [[FNEG:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[TRUNC]] + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FNEG]](s16) + ; GCN: $vgpr0 = COPY [[COPY1]](s32) + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s16) = G_TRUNC %0 + %2:vgpr(s16) = G_FNEG %1 + %3:vgpr(s32) = G_ANYEXT %2 + $vgpr0 = COPY %3 +... + +--- +name: fneg_s16_vs +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0 + + ; GCN-LABEL: name: fneg_s16_vs + ; GCN: liveins: $sgpr0 + ; GCN: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GCN: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GCN: [[FNEG:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[TRUNC]] + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FNEG]](s16) + ; GCN: $vgpr0 = COPY [[COPY1]](s32) + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s16) = G_TRUNC %0 + %2:vgpr(s16) = G_FNEG %1 + %3:vgpr(s32) = G_ANYEXT %2 + $vgpr0 = COPY %3 +... + +--- +name: fneg_v2s16_ss +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + ; GCN-LABEL: name: fneg_v2s16_ss + ; GCN: liveins: $sgpr0_sgpr1 + ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 + ; GCN: [[S_XOR_B32_:%[0-9]+]]:sreg_32_xm0 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GCN: $sgpr0 = COPY [[S_XOR_B32_]] + %0:sgpr(<2 x s16>) = COPY $sgpr0 + %1:sgpr(<2 x s16>) = G_FNEG %0 + $sgpr0 = COPY %1 +... + +--- +name: fneg_v2s16_vv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: fneg_v2s16_vv + ; GCN: liveins: $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 + ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GCN: $vgpr0 = COPY [[V_XOR_B32_e32_]] + %0:vgpr(<2 x s16>) = COPY $vgpr0 + %1:vgpr(<2 x s16>) = G_FNEG %0 + $vgpr0 = COPY %1 +... + +--- +name: fneg_v2s16_vs +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0 + ; GCN-LABEL: name: fneg_v2s16_vs + ; GCN: liveins: $sgpr0 + ; GCN: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GCN: [[FNEG:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FNEG [[COPY]] + ; GCN: $vgpr0 = COPY [[FNEG]](<2 x s16>) + %0:sgpr(<2 x s16>) = COPY $sgpr0 + %1:vgpr(<2 x s16>) = G_FNEG %0 + $vgpr0 = COPY %1 +... + +--- +name: fneg_s64_ss +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + ; GCN-LABEL: name: fneg_s64_ss + ; GCN: liveins: $sgpr0_sgpr1 + ; GCN: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 + ; GCN: [[FNEG:%[0-9]+]]:sreg_64_xexec(s64) = G_FNEG [[COPY]] + ; GCN: $sgpr0_sgpr1 = COPY [[FNEG]](s64) + %0:sgpr(s64) = COPY $sgpr0_sgpr1 + %1:sgpr(s64) = G_FNEG %0 + $sgpr0_sgpr1 = COPY %1 +... + +--- +name: fneg_s64_vv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; GCN-LABEL: name: fneg_s64_vv + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GCN: [[FNEG:%[0-9]+]]:vreg_64(s64) = G_FNEG [[COPY]] + ; GCN: $vgpr0_vgpr1 = COPY [[FNEG]](s64) + %0:vgpr(s64) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_FNEG %0 + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: fneg_s64_vs +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + ; GCN-LABEL: name: fneg_s64_vs + ; GCN: liveins: $sgpr0_sgpr1 + ; GCN: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 + ; GCN: [[FNEG:%[0-9]+]]:vreg_64(s64) = G_FNEG [[COPY]] + ; GCN: $vgpr0_vgpr1 = COPY [[FNEG]](s64) + %0:sgpr(s64) = COPY $sgpr0_sgpr1 + %1:vgpr(s64) = G_FNEG %0 + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: fneg_fabs_s32_ss +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + ; GCN-LABEL: name: fneg_fabs_s32_ss + ; GCN: liveins: $sgpr0_sgpr1 + ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GCN: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GCN: $sgpr0 = COPY [[S_OR_B32_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = G_FABS %0 + %2:sgpr(s32) = G_FNEG %1 + $sgpr0 = COPY %2 +... + +--- +name: fneg_fabs_s32_vv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: fneg_fabs_s32_vv + ; GCN: liveins: $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GCN: $vgpr0 = COPY [[V_XOR_B32_e32_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = G_FABS %0 + %2:vgpr(s32) = G_FNEG %0 + $vgpr0 = COPY %2 +... + +--- +name: fneg_fabs_s32_vs +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0 + ; GCN-LABEL: name: fneg_fabs_s32_vs + ; GCN: liveins: $sgpr0 + ; GCN: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GCN: [[FABS:%[0-9]+]]:vgpr_32(s32) = G_FABS [[COPY]] + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s16) = S_MOV_B32 2147483648 + ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32(s32) = V_XOR_B32_e32 [[S_MOV_B32_]](s16), [[FABS]](s32), implicit $exec + ; GCN: $vgpr0 = COPY [[V_XOR_B32_e32_]](s32) + %0:sgpr(s32) = COPY $sgpr0 + %1:vgpr(s32) = G_FABS %0 + %2:vgpr(s32) = G_FNEG %1 + $vgpr0 = COPY %2 +... + +--- +name: fneg_fabs_s16_ss +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0 + ; GCN-LABEL: name: fneg_fabs_s16_ss + ; GCN: liveins: $sgpr0 + ; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 + ; GCN: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GCN: $sgpr0 = COPY [[S_OR_B32_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s16) = G_TRUNC %0 + %2:sgpr(s16) = G_FABS %1 + %3:sgpr(s16) = G_FNEG %2 + %4:sgpr(s32) = G_ANYEXT %3 + $sgpr0 = COPY %4 +... + +--- +name: fneg_fabs_s16_vv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: fneg_fabs_s16_vv + ; GCN: liveins: $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GCN: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GCN: [[FABS:%[0-9]+]]:vgpr(s16) = G_FABS [[TRUNC]] + ; GCN: [[FNEG:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[FABS]] + ; GCN: [[COPY1:%[0-9]+]]:sreg_32_xm0(s32) = COPY [[FNEG]](s16) + ; GCN: $vgpr0 = COPY [[COPY1]](s32) + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s16) = G_TRUNC %0 + %2:vgpr(s16) = G_FABS %1 + %3:vgpr(s16) = G_FNEG %2 + %4:sgpr(s32) = G_ANYEXT %3 + $vgpr0 = COPY %4 +... + +--- +name: fneg_fabs_s16_vs +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0 + + ; GCN-LABEL: name: fneg_fabs_s16_vs + ; GCN: liveins: $sgpr0 + ; GCN: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GCN: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GCN: [[FNEG:%[0-9]+]]:sgpr(s16) = G_FNEG [[TRUNC]] + ; GCN: [[FNEG1:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[FNEG]] + ; GCN: [[COPY1:%[0-9]+]]:sreg_32_xm0(s32) = COPY [[FNEG1]](s16) + ; GCN: $vgpr0 = COPY [[COPY1]](s32) + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s16) = G_TRUNC %0 + %2:sgpr(s16) = G_FNEG %1 + %3:vgpr(s16) = G_FNEG %2 + %4:sgpr(s32) = G_ANYEXT %3 + $vgpr0 = COPY %4 +... + +--- +name: fneg_fabs_v2s16_ss +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + ; GCN-LABEL: name: fneg_fabs_v2s16_ss + ; GCN: liveins: $sgpr0_sgpr1 + ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 + ; GCN: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GCN: $sgpr0 = COPY [[S_OR_B32_]] + %0:sgpr(<2 x s16>) = COPY $sgpr0 + %1:sgpr(<2 x s16>) = G_FABS %0 + %2:sgpr(<2 x s16>) = G_FNEG %1 + $sgpr0 = COPY %2 +... + +--- +name: fneg_fabs_v2s16_vv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: fneg_fabs_v2s16_vv + ; GCN: liveins: $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 + ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GCN: $vgpr0 = COPY [[V_XOR_B32_e32_]] + %0:vgpr(<2 x s16>) = COPY $vgpr0 + %1:vgpr(<2 x s16>) = G_FABS %0 + %2:vgpr(<2 x s16>) = G_FNEG %0 + $vgpr0 = COPY %2 +... + +--- +name: fneg_fabs_v2s16_vs +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0 + ; GCN-LABEL: name: fneg_fabs_v2s16_vs + ; GCN: liveins: $sgpr0 + ; GCN: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GCN: [[FABS:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FABS [[COPY]] + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s16) = S_MOV_B32 2147516416 + ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32(<2 x s16>) = V_XOR_B32_e32 [[S_MOV_B32_]](s16), [[FABS]](<2 x s16>), implicit $exec + ; GCN: $vgpr0 = COPY [[V_XOR_B32_e32_]](<2 x s16>) + %0:sgpr(<2 x s16>) = COPY $sgpr0 + %1:vgpr(<2 x s16>) = G_FABS %0 + %2:vgpr(<2 x s16>) = G_FNEG %1 + $vgpr0 = COPY %2 +... + +--- +name: fneg_fabs_s64_ss +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + ; GCN-LABEL: name: fneg_fabs_s64_ss + ; GCN: liveins: $sgpr0_sgpr1 + ; GCN: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 + ; GCN: [[FABS:%[0-9]+]]:sgpr(s64) = G_FABS [[COPY]] + ; GCN: [[FNEG:%[0-9]+]]:sreg_64_xexec(s64) = G_FNEG [[FABS]] + ; GCN: $sgpr0_sgpr1 = COPY [[FNEG]](s64) + %0:sgpr(s64) = COPY $sgpr0_sgpr1 + %1:sgpr(s64) = G_FABS %0 + %2:sgpr(s64) = G_FNEG %1 + $sgpr0_sgpr1 = COPY %2 +... + +--- +name: fneg_fabs_s64_vv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; GCN-LABEL: name: fneg_fabs_s64_vv + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GCN: [[FABS:%[0-9]+]]:vgpr(s64) = G_FABS [[COPY]] + ; GCN: [[FNEG:%[0-9]+]]:vreg_64(s64) = G_FNEG [[FABS]] + ; GCN: $vgpr0_vgpr1 = COPY [[FNEG]](s64) + %0:vgpr(s64) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_FABS %0 + %2:vgpr(s64) = G_FNEG %1 + $vgpr0_vgpr1 = COPY %2 +... + +--- +name: fneg_fabs_s64_vs +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + ; GCN-LABEL: name: fneg_fabs_s64_vs + ; GCN: liveins: $sgpr0_sgpr1 + ; GCN: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 + ; GCN: [[FABS:%[0-9]+]]:vgpr(s64) = G_FABS [[COPY]] + ; GCN: [[FNEG:%[0-9]+]]:vreg_64(s64) = G_FNEG [[FABS]] + ; GCN: $vgpr0_vgpr1 = COPY [[FNEG]](s64) + %0:sgpr(s64) = COPY $sgpr0_sgpr1 + %1:vgpr(s64) = G_FABS %0 + %2:vgpr(s64) = G_FNEG %1 + $vgpr0_vgpr1 = COPY %2 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir index ec6702f0672..297806e3050 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir @@ -58,10 +58,8 @@ body: | ; SI-LABEL: name: test_fabs_s16 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[FPEXT]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FABS]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; SI: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[TRUNC]] + ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FABS]](s16) ; SI: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_fabs_s16 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -189,30 +187,12 @@ body: | ; SI-LABEL: name: test_fabs_v2s16 ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[FPEXT]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FABS]](s32) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FABS1:%[0-9]+]]:_(s32) = G_FABS [[FPEXT1]] - ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FABS1]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) - ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; SI: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[COPY]] + ; SI: $vgpr0 = COPY [[FABS]](<2 x s16>) ; VI-LABEL: name: test_fabs_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[TRUNC]] - ; VI: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[TRUNC1]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FABS]](s16), [[FABS1]](s16) - ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; VI: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[COPY]] + ; VI: $vgpr0 = COPY [[FABS]](<2 x s16>) ; GFX9-LABEL: name: test_fabs_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[COPY]] @@ -231,54 +211,46 @@ body: | ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[FPEXT]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FABS]](s32) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FABS1:%[0-9]+]]:_(s32) = G_FABS [[FPEXT1]] - ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FABS1]](s32) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI: [[FABS2:%[0-9]+]]:_(s32) = G_FABS [[FPEXT2]] - ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FABS2]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) - ; SI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; SI: [[TRUNC3:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; SI: S_NOP 0, implicit [[TRUNC3]](<3 x s16>) + ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 + ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<3 x s16>), 0 + ; SI: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 + ; SI: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[EXTRACT2]] + ; SI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT1]](<3 x s16>), 0 + ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT1]], [[FABS]](<2 x s16>), 0 + ; SI: [[EXTRACT3:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT2]](<4 x s16>), 0 + ; SI: [[DEF4:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF4]], [[EXTRACT]](<3 x s16>), 0 + ; SI: [[EXTRACT4:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT3]](<4 x s16>), 32 + ; SI: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[EXTRACT4]] + ; SI: [[DEF5:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF5]], [[EXTRACT3]](<3 x s16>), 0 + ; SI: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT4]], [[FABS1]](s16), 32 + ; SI: [[EXTRACT5:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT5]](<4 x s16>), 0 + ; SI: S_NOP 0, implicit [[EXTRACT5]](<3 x s16>) ; VI-LABEL: name: test_fabs_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[TRUNC]] - ; VI: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[TRUNC1]] - ; VI: [[FABS2:%[0-9]+]]:_(s16) = G_FABS [[TRUNC2]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FABS]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FABS1]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FABS2]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; VI: S_NOP 0, implicit [[TRUNC3]](<3 x s16>) + ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 + ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<3 x s16>), 0 + ; VI: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 + ; VI: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[EXTRACT2]] + ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT1]](<3 x s16>), 0 + ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT1]], [[FABS]](<2 x s16>), 0 + ; VI: [[EXTRACT3:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT2]](<4 x s16>), 0 + ; VI: [[DEF4:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF4]], [[EXTRACT]](<3 x s16>), 0 + ; VI: [[EXTRACT4:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT3]](<4 x s16>), 32 + ; VI: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[EXTRACT4]] + ; VI: [[DEF5:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF5]], [[EXTRACT3]](<3 x s16>), 0 + ; VI: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT4]], [[FABS1]](s16), 32 + ; VI: [[EXTRACT5:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT5]](<4 x s16>), 0 + ; VI: S_NOP 0, implicit [[EXTRACT5]](<3 x s16>) ; GFX9-LABEL: name: test_fabs_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -315,47 +287,17 @@ body: | ; SI-LABEL: name: test_fabs_v4s16 ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[FPEXT]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FABS]](s32) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FABS1:%[0-9]+]]:_(s32) = G_FABS [[FPEXT1]] - ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FABS1]](s32) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI: [[FABS2:%[0-9]+]]:_(s32) = G_FABS [[FPEXT2]] - ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FABS2]](s32) - ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI: [[FABS3:%[0-9]+]]:_(s32) = G_FABS [[FPEXT3]] - ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FABS3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; SI: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[UV]] + ; SI: [[FABS1:%[0-9]+]]:_(<2 x s16>) = G_FABS [[UV1]] + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[FABS]](<2 x s16>), [[FABS1]](<2 x s16>) + ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_fabs_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[TRUNC]] - ; VI: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[TRUNC1]] - ; VI: [[FABS2:%[0-9]+]]:_(s16) = G_FABS [[TRUNC2]] - ; VI: [[FABS3:%[0-9]+]]:_(s16) = G_FABS [[TRUNC3]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FABS]](s16), [[FABS1]](s16), [[FABS2]](s16), [[FABS3]](s16) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; VI: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[UV]] + ; VI: [[FABS1:%[0-9]+]]:_(<2 x s16>) = G_FABS [[UV1]] + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[FABS]](<2 x s16>), [[FABS1]](<2 x s16>) + ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_fabs_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir index c39153384f6..468817987ea 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir @@ -56,10 +56,8 @@ body: | ; SI-LABEL: name: test_fneg_s16 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEG]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; SI: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC]] + ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FNEG]](s16) ; SI: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_fneg_s16 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -187,30 +185,12 @@ body: | ; SI-LABEL: name: test_fneg_v2s16 ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEG]](s32) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT1]] - ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEG1]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) - ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; SI: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[COPY]] + ; SI: $vgpr0 = COPY [[FNEG]](<2 x s16>) ; VI-LABEL: name: test_fneg_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC]] - ; VI: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC1]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FNEG]](s16), [[FNEG1]](s16) - ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; VI: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[COPY]] + ; VI: $vgpr0 = COPY [[FNEG]](<2 x s16>) ; GFX9-LABEL: name: test_fneg_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[COPY]] @@ -229,54 +209,46 @@ body: | ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEG]](s32) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT1]] - ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEG1]](s32) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT2]] - ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEG2]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) - ; SI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; SI: [[TRUNC3:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; SI: S_NOP 0, implicit [[TRUNC3]](<3 x s16>) + ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 + ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<3 x s16>), 0 + ; SI: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 + ; SI: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[EXTRACT2]] + ; SI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT1]](<3 x s16>), 0 + ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT1]], [[FNEG]](<2 x s16>), 0 + ; SI: [[EXTRACT3:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT2]](<4 x s16>), 0 + ; SI: [[DEF4:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF4]], [[EXTRACT]](<3 x s16>), 0 + ; SI: [[EXTRACT4:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT3]](<4 x s16>), 32 + ; SI: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[EXTRACT4]] + ; SI: [[DEF5:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF5]], [[EXTRACT3]](<3 x s16>), 0 + ; SI: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT4]], [[FNEG1]](s16), 32 + ; SI: [[EXTRACT5:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT5]](<4 x s16>), 0 + ; SI: S_NOP 0, implicit [[EXTRACT5]](<3 x s16>) ; VI-LABEL: name: test_fneg_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC]] - ; VI: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC1]] - ; VI: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC2]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FNEG]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FNEG1]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FNEG2]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; VI: S_NOP 0, implicit [[TRUNC3]](<3 x s16>) + ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 + ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<3 x s16>), 0 + ; VI: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 + ; VI: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[EXTRACT2]] + ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT1]](<3 x s16>), 0 + ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT1]], [[FNEG]](<2 x s16>), 0 + ; VI: [[EXTRACT3:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT2]](<4 x s16>), 0 + ; VI: [[DEF4:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF4]], [[EXTRACT]](<3 x s16>), 0 + ; VI: [[EXTRACT4:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT3]](<4 x s16>), 32 + ; VI: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[EXTRACT4]] + ; VI: [[DEF5:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF5]], [[EXTRACT3]](<3 x s16>), 0 + ; VI: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT4]], [[FNEG1]](s16), 32 + ; VI: [[EXTRACT5:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT5]](<4 x s16>), 0 + ; VI: S_NOP 0, implicit [[EXTRACT5]](<3 x s16>) ; GFX9-LABEL: name: test_fneg_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -313,47 +285,17 @@ body: | ; SI-LABEL: name: test_fneg_v4s16 ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEG]](s32) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT1]] - ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEG1]](s32) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT2]] - ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEG2]](s32) - ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI: [[FNEG3:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT3]] - ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEG3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; SI: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[UV]] + ; SI: [[FNEG1:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[UV1]] + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[FNEG]](<2 x s16>), [[FNEG1]](<2 x s16>) + ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_fneg_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC]] - ; VI: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC1]] - ; VI: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC2]] - ; VI: [[FNEG3:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC3]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FNEG]](s16), [[FNEG1]](s16), [[FNEG2]](s16), [[FNEG3]](s16) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; VI: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[UV]] + ; VI: [[FNEG1:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[UV1]] + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[FNEG]](<2 x s16>), [[FNEG1]](<2 x s16>) + ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_fneg_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir index 53f3315ced7..7beb00ad9e7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir @@ -100,14 +100,12 @@ body: | ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEG]](s32) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC]](s16) - ; SI: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT1]], [[FPEXT2]] - ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) + ; SI: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC1]] + ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG]](s16) + ; SI: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT]], [[FPEXT1]] + ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) + ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) ; SI: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_fsub_s16 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -311,47 +309,41 @@ body: | ; SI-LABEL: name: test_fsub_v2s16 ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; SI: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[COPY1]] + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEG]](s32) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT1]] - ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEG1]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[FNEG]](<2 x s16>) ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC]](s16) - ; SI: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT2]], [[FPEXT3]] - ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) - ; SI: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC1]](s16) - ; SI: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FPEXT4]], [[FPEXT5]] - ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) + ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; SI: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT]], [[FPEXT1]] + ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) + ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; SI: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FPEXT2]], [[FPEXT3]] + ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; VI-LABEL: name: test_fsub_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; VI: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[COPY1]] + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC]] - ; VI: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC1]] - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[FNEG]](<2 x s16>) ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FNEG]] - ; VI: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[TRUNC3]], [[FNEG1]] + ; VI: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[TRUNC2]] + ; VI: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[TRUNC3]] ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16) ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX9-LABEL: name: test_fsub_v2s16 @@ -398,30 +390,24 @@ body: | ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEG]](s32) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC]](s16) - ; SI: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT1]], [[FPEXT2]] - ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) - ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; SI: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT3]] - ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEG1]](s32) - ; SI: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC2]](s16) - ; SI: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FPEXT4]], [[FPEXT5]] - ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) - ; SI: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; SI: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT6]] - ; SI: [[FPTRUNC4:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEG2]](s32) - ; SI: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC4]](s16) - ; SI: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FPEXT7]], [[FPEXT8]] - ; SI: [[FPTRUNC5:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD2]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) - ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC3]](s16) - ; SI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC5]](s16) + ; SI: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC3]] + ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG]](s16) + ; SI: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT]], [[FPEXT1]] + ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) + ; SI: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC4]] + ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG1]](s16) + ; SI: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FPEXT2]], [[FPEXT3]] + ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) + ; SI: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC5]] + ; SI: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG2]](s16) + ; SI: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FPEXT4]], [[FPEXT5]] + ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD2]](s32) + ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) + ; SI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) ; SI: [[TRUNC6:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) ; SI: S_NOP 0, implicit [[TRUNC6]](<3 x s16>) @@ -535,35 +521,27 @@ body: | ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEG]](s32) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC]](s16) - ; SI: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT1]], [[FPEXT2]] - ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) - ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; SI: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT3]] - ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEG1]](s32) - ; SI: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC2]](s16) - ; SI: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FPEXT4]], [[FPEXT5]] - ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) - ; SI: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) - ; SI: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT6]] - ; SI: [[FPTRUNC4:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEG2]](s32) - ; SI: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC4]](s16) - ; SI: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FPEXT7]], [[FPEXT8]] - ; SI: [[FPTRUNC5:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD2]](s32) - ; SI: [[FPEXT9:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) - ; SI: [[FNEG3:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT9]] - ; SI: [[FPTRUNC6:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEG3]](s32) - ; SI: [[FPEXT10:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI: [[FPEXT11:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC6]](s16) - ; SI: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FPEXT10]], [[FPEXT11]] - ; SI: [[FPTRUNC7:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC1]](s16), [[FPTRUNC3]](s16), [[FPTRUNC5]](s16), [[FPTRUNC7]](s16) + ; SI: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC4]] + ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG]](s16) + ; SI: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT]], [[FPEXT1]] + ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) + ; SI: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC5]] + ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG1]](s16) + ; SI: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FPEXT2]], [[FPEXT3]] + ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) + ; SI: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC6]] + ; SI: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG2]](s16) + ; SI: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FPEXT4]], [[FPEXT5]] + ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD2]](s32) + ; SI: [[FNEG3:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC7]] + ; SI: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; SI: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG3]](s16) + ; SI: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FPEXT6]], [[FPEXT7]] + ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) ; VI-LABEL: name: test_fsub_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll index 458018fbf4f..5dda92dbd5e 100644 --- a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll @@ -157,13 +157,13 @@ define amdgpu_kernel void @v_fneg_add_fneg_fneg_f32(float addrspace(1)* %out, fl } ; GCN-LABEL: {{^}}v_fneg_add_store_use_fneg_x_f32: -; GCN-SAFE: v_bfrev_b32_e32 [[SIGNBIT:v[0-9]+]], 1{{$}} +; GCN-SAFE: s_brev_b32 [[SIGNBIT:s[0-9]+]], 1{{$}} ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] -; GCN-SAFE: v_xor_b32_e32 [[NEG_A:v[0-9]+]], [[A]], [[SIGNBIT]] +; GCN-SAFE: v_xor_b32_e32 [[NEG_A:v[0-9]+]], [[SIGNBIT]], [[A]] ; GCN-SAFE: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]] -; GCN-SAFE: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], [[ADD]], [[SIGNBIT]] +; GCN-SAFE: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], [[SIGNBIT]], [[ADD]] ; GCN-NSZ-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]] ; GCN-NSZ-DAG: v_sub_f32_e32 [[NEG_ADD:v[0-9]+]], [[A]], [[B]] diff --git a/llvm/test/CodeGen/AMDGPU/fneg.ll b/llvm/test/CodeGen/AMDGPU/fneg.ll index 94ec61622bd..708acbb78f7 100644 --- a/llvm/test/CodeGen/AMDGPU/fneg.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg.ll @@ -1,11 +1,13 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -enable-var-scope -check-prefix=R600 -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}s_fneg_f32: ; R600: -PV -; GCN: v_xor_b32 +; GCN: s_load_dword [[VAL:s[0-9]+]] +; GCN: s_xor_b32 [[NEG_VAL:s[0-9]+]], [[VAL]], 0x80000000 +; GCN: v_mov_b32_e32 v{{[0-9]+}}, [[NEG_VAL]] define amdgpu_kernel void @s_fneg_f32(float addrspace(1)* %out, float %in) { %fneg = fsub float -0.000000e+00, %in store float %fneg, float addrspace(1)* %out @@ -16,6 +18,7 @@ define amdgpu_kernel void @s_fneg_f32(float addrspace(1)* %out, float %in) { ; R600: -PV ; R600: -PV +; GCN: s_brev_b32 [[SIGNBIT:s[0-9]+]], 1 ; GCN: v_xor_b32 ; GCN: v_xor_b32 define amdgpu_kernel void @s_fneg_v2f32(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) { @@ -60,9 +63,9 @@ define amdgpu_kernel void @fsub0_f32(float addrspace(1)* %out, i32 %in) { ; SI: s_load_dword [[NEG_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb ; VI: s_load_dword [[NEG_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c -; GCN: v_bfrev_b32_e32 [[SIGNBIT:v[0-9]+]], 1{{$}} -; GCN: v_xor_b32_e32 [[RES:v[0-9]+]], [[NEG_VALUE]], [[SIGNBIT]] -; GCN: buffer_store_dword [[RES]] +; GCN: s_xor_b32 [[RES:s[0-9]+]], [[NEG_VALUE]], 0x80000000 +; GCN: v_mov_b32_e32 [[V_RES:v[0-9]+]], [[RES]] +; GCN: buffer_store_dword [[V_RES]] ; R600-NOT: XOR ; R600: -PV.W |