diff options
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUGISel.td | 20 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir | 39 |
2 files changed, 46 insertions, 13 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td index accb8eac9f0..ac2951ca532 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -70,6 +70,17 @@ class GISelVop2Pat < (inst src0_vt:$src0, src1_vt:$src1) >; +// FIXME: clamp operand should be OperandWithDefaultOps to 0, but it's badly broken. +class GISelVop2ClampingPat < + SDPatternOperator node, + Instruction inst, + ValueType dst_vt, + ValueType src0_vt = dst_vt, ValueType src1_vt = src0_vt> : GCNPat < + + (dst_vt (node (src0_vt (sd_vsrc0 src0_vt:$src0)), (src1_vt VGPR_32:$src1))), + (inst src0_vt:$src0, src1_vt:$src1, 0) +>; + class GISelVop2CommutePat < SDPatternOperator node, Instruction inst, @@ -129,7 +140,16 @@ def : GISelSop2Pat <or, S_OR_B32, i32>; def : GISelVop2Pat <or, V_OR_B32_e32, i32>; def : GISelSop2Pat <add, S_ADD_I32, i32>; + +let SubtargetPredicate = NotHasAddNoCarryInsts in { +// FIXME: This should use the VOP3 form +//def : GISelVop2ClampingPat <add, V_ADD_I32_e64, i32>; def : GISelVop2Pat <add, V_ADD_I32_e32, i32>; +} + +let SubtargetPredicate = HasAddNoCarryInsts in { +def : GISelVop2ClampingPat <add, V_ADD_U32_e64, i32>; +} def : GISelSop2Pat <sra, S_ASHR_I32, i32>; let AddedComplexity = 100 in { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir index f2db55685b5..8f7973d4eca 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir @@ -1,24 +1,37 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s --- -name: add_i32 +name: add_s32 legalized: true regBankSelected: true body: | bb.0: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr3_vgpr4 - ; GCN-LABEL: name: add_i32 - ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[COPY3:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GCN: [[S_ADD_I32_:%[0-9]+]]:sreg_32_xm0 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc - ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_ADD_I32_]], [[COPY2]], implicit-def $vcc, implicit $exec - ; GCN: [[V_ADD_I32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_ADD_I32_]], [[V_ADD_I32_e32_]], implicit-def $vcc, implicit $exec - ; GCN: [[V_ADD_I32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[V_ADD_I32_e32_1]], [[COPY2]], implicit-def $vcc, implicit $exec - ; GCN: FLAT_STORE_DWORD [[COPY3]], [[V_ADD_I32_e32_2]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + + ; GFX6-LABEL: name: add_s32 + ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[COPY3:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 + ; GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32_xm0 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX6: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_ADD_I32_]], [[COPY2]], implicit-def $vcc, implicit $exec + ; GFX6: [[V_ADD_I32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_ADD_I32_]], [[V_ADD_I32_e32_]], implicit-def $vcc, implicit $exec + ; GFX6: [[V_ADD_I32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[V_ADD_I32_e32_1]], [[COPY2]], implicit-def $vcc, implicit $exec + ; GFX6: FLAT_STORE_DWORD [[COPY3]], [[V_ADD_I32_e32_2]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX9-LABEL: name: add_s32 + ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY3:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 + ; GFX9: [[S_ADD_I32_:%[0-9]+]]:sreg_32_xm0 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[S_ADD_I32_]], [[COPY2]], 0, implicit $exec + ; GFX9: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[S_ADD_I32_]], [[V_ADD_U32_e64_]], 0, implicit $exec + ; GFX9: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_ADD_U32_e64_1]], [[COPY2]], 0, implicit $exec + ; GFX9: FLAT_STORE_DWORD [[COPY3]], [[V_ADD_U32_e64_2]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 %2:vgpr(s32) = COPY $vgpr0 |

