diff options
-rw-r--r-- | llvm/lib/Target/AMDGPU/BUFInstructions.td | 13 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir | 280 |
2 files changed, 287 insertions, 6 deletions
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index 5b6c8a7ed96..62a19d848af 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -1568,17 +1568,18 @@ defm : MUBUFStore_Pattern <BUFFER_STORE_SHORT_OFFSET, i16, store_global>; multiclass MUBUFScratchStorePat <MUBUF_Pseudo InstrOffen, MUBUF_Pseudo InstrOffset, - ValueType vt, PatFrag st> { + ValueType vt, PatFrag st, + RegisterClass rc = VGPR_32> { def : GCNPat < (st vt:$value, (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, i32:$soffset, u16imm:$offset)), - (InstrOffen $value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0) + (InstrOffen rc:$value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0) >; def : GCNPat < (st vt:$value, (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset)), - (InstrOffset $value, $srsrc, $soffset, $offset, 0, 0, 0, 0) + (InstrOffset rc:$value, $srsrc, $soffset, $offset, 0, 0, 0, 0) >; } @@ -1587,9 +1588,9 @@ defm : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, BUFFER_STORE_SHORT_OFFSET defm : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, BUFFER_STORE_BYTE_OFFSET, i16, truncstorei8_private>; defm : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, BUFFER_STORE_SHORT_OFFSET, i16, store_private>; defm : MUBUFScratchStorePat <BUFFER_STORE_DWORD_OFFEN, BUFFER_STORE_DWORD_OFFSET, i32, store_private>; -defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX2_OFFEN, BUFFER_STORE_DWORDX2_OFFSET, v2i32, store_private>; -defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX3_OFFEN, BUFFER_STORE_DWORDX3_OFFSET, v3i32, store_private>; -defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, BUFFER_STORE_DWORDX4_OFFSET, v4i32, store_private>; +defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX2_OFFEN, BUFFER_STORE_DWORDX2_OFFSET, v2i32, store_private, VReg_64>; +defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX3_OFFEN, BUFFER_STORE_DWORDX3_OFFSET, v3i32, store_private, VReg_96>; +defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, BUFFER_STORE_DWORDX4_OFFSET, v4i32, store_private, VReg_128>; let OtherPredicates = [D16PreservesUnusedBits] in { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir new file mode 100644 index 00000000000..822a1412d16 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir @@ -0,0 +1,280 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX6 %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s + +--- + +name: store_private_s32_to_4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: store_private_s32_to_4 + ; GFX6: liveins: $vgpr0, $vgpr1 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5) + ; GFX9-LABEL: name: store_private_s32_to_4 + ; GFX9: liveins: $vgpr0, $vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5) + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(p5) = COPY $vgpr1 + G_STORE %0, %1 :: (store 4, align 4, addrspace 5) + +... + +--- + +name: store_private_s32_to_2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: store_private_s32_to_2 + ; GFX6: liveins: $vgpr0, $vgpr1 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 5) + ; GFX9-LABEL: name: store_private_s32_to_2 + ; GFX9: liveins: $vgpr0, $vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 5) + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(p5) = COPY $vgpr1 + G_STORE %0, %1 :: (store 2, align 2, addrspace 5) + +... + +--- + +name: store_private_s32_to_1 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: store_private_s32_to_1 + ; GFX6: liveins: $vgpr0, $vgpr1 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5) + ; GFX9-LABEL: name: store_private_s32_to_1 + ; GFX9: liveins: $vgpr0, $vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5) + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(p5) = COPY $vgpr1 + G_STORE %0, %1 :: (store 1, align 1, addrspace 5) + +... + +--- + +name: store_private_v2s16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: store_private_v2s16 + ; GFX6: liveins: $vgpr0, $vgpr1 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p5) = COPY $vgpr1 + ; GFX6: G_STORE [[COPY]](<2 x s16>), [[COPY1]](p5) :: (store 4, addrspace 5) + ; GFX9-LABEL: name: store_private_v2s16 + ; GFX9: liveins: $vgpr0, $vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p5) = COPY $vgpr1 + ; GFX9: G_STORE [[COPY]](<2 x s16>), [[COPY1]](p5) :: (store 4, addrspace 5) + %0:vgpr(<2 x s16>) = COPY $vgpr0 + %1:vgpr(p5) = COPY $vgpr1 + G_STORE %0, %1 :: (store 4, align 4, addrspace 5) + +... + +--- + +name: store_private_p3 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: store_private_p3 + ; GFX6: liveins: $vgpr0, $vgpr1 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p5) = COPY $vgpr1 + ; GFX6: G_STORE [[COPY]](p3), [[COPY1]](p5) :: (store 4, addrspace 5) + ; GFX9-LABEL: name: store_private_p3 + ; GFX9: liveins: $vgpr0, $vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p5) = COPY $vgpr1 + ; GFX9: G_STORE [[COPY]](p3), [[COPY1]](p5) :: (store 4, addrspace 5) + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(p5) = COPY $vgpr1 + G_STORE %0, %1 :: (store 4, align 4, addrspace 5) + +... + +--- + +name: store_private_p5 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: store_private_p5 + ; GFX6: liveins: $vgpr0, $vgpr1 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p5) = COPY $vgpr1 + ; GFX6: G_STORE [[COPY]](p5), [[COPY1]](p5) :: (store 4, addrspace 5) + ; GFX9-LABEL: name: store_private_p5 + ; GFX9: liveins: $vgpr0, $vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p5) = COPY $vgpr1 + ; GFX9: G_STORE [[COPY]](p5), [[COPY1]](p5) :: (store 4, addrspace 5) + %0:vgpr(p5) = COPY $vgpr0 + %1:vgpr(p5) = COPY $vgpr1 + G_STORE %0, %1 :: (store 4, align 4, addrspace 5) + +... + +--- + +name: store_private_s32_to_1_fi_offset_4095 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 +stack: + - { id: 0, size: 4096, alignment: 4 } + +body: | + bb.0: + + ; GFX6-LABEL: name: store_private_s32_to_1_fi_offset_4095 + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec + ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, implicit $exec + ; GFX6: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_2]], %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5) + ; GFX9-LABEL: name: store_private_s32_to_1_fi_offset_4095 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX9: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4095, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5) + %0:vgpr(p5) = G_FRAME_INDEX %stack.0 + %1:vgpr(s32) = G_CONSTANT i32 4095 + %2:vgpr(p5) = G_GEP %0, %1 + %3:vgpr(s32) = G_CONSTANT i32 0 + G_STORE %3, %2 :: (store 1, align 1, addrspace 5) + +... + +--- + +name: store_private_s32_to_1_constant_4095 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 +stack: + - { id: 0, size: 4096, alignment: 4 } + +body: | + bb.0: + + ; GFX6-LABEL: name: store_private_s32_to_1_constant_4095 + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX6: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4095, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5) + ; GFX9-LABEL: name: store_private_s32_to_1_constant_4095 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX9: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4095, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5) + %0:vgpr(p5) = G_CONSTANT i32 4095 + %1:vgpr(s32) = G_CONSTANT i32 0 + G_STORE %1, %0 :: (store 1, align 1, addrspace 5) + +... + +--- + +name: store_private_s32_to_1_constant_4096 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 +stack: + - { id: 0, size: 4096, alignment: 4 } + +body: | + bb.0: + + ; GFX6-LABEL: name: store_private_s32_to_1_constant_4096 + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec + ; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5) + ; GFX9-LABEL: name: store_private_s32_to_1_constant_4096 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec + ; GFX9: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5) + %0:vgpr(p5) = G_CONSTANT i32 4096 + %1:vgpr(s32) = G_CONSTANT i32 0 + G_STORE %1, %0 :: (store 1, align 1, addrspace 5) + +... |