diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-06-05 22:20:47 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-06-05 22:20:47 +0000 |
commit | b812b7a45ed159fcc4b1b26f9200885d93b68fc5 (patch) | |
tree | 24ec293bca8be1a5b15f1e58c37f2615b2e667ab /llvm/test/CodeGen/AMDGPU/sgpr-spill-wrong-stack-id.mir | |
parent | 8f500a6f9ca0e56e41513435d0257c84ddabb566 (diff) | |
download | bcm5719-llvm-b812b7a45ed159fcc4b1b26f9200885d93b68fc5.tar.gz bcm5719-llvm-b812b7a45ed159fcc4b1b26f9200885d93b68fc5.zip |
AMDGPU: Invert frame index offset interpretation
Since the beginning, the offset of a frame index has been consistently
interpreted backwards. It was treating it as an offset from the
scratch wave offset register as a frame register. The correct
interpretation is the offset from the SP on entry to the function,
before the prolog. Frame index elimination then should select either
SP or another register as an FP.
Treat the scratch wave offset on kernel entry as the pre-incremented
SP. Rely more heavily on the standard hasFP and frame pointer
elimination logic, and clean up the private reservation code. This
saves a copy in most callee functions.
The kernel prolog emission code is still kind of a mess relying on
checking the uses of physical registers, which I would prefer to
eliminate.
Currently selection directly emits MUBUF instructions, which require
using a reference to some register. Use the register chosen for SP,
and then ignore this later. This should probably be cleaned up to use
pseudos that don't refer to any specific base register until frame
index elimination.
Add a workaround for shaders using large numbers of SGPRs. I'm not
sure these cases were ever working correctly, since as far as I can
tell the logic for figuring out which SGPR is the scratch wave offset
doesn't match up with the shader input initialization in the shader
programming guide.
llvm-svn: 362661
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/sgpr-spill-wrong-stack-id.mir')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/sgpr-spill-wrong-stack-id.mir | 52 |
1 files changed, 26 insertions, 26 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-wrong-stack-id.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-wrong-stack-id.mir index 906e37e9926..f69c324f948 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-wrong-stack-id.mir +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-wrong-stack-id.mir @@ -33,16 +33,16 @@ # SHARE: stack-id: 1, callee-saved-register: '', callee-saved-restored: true, # SHARE: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -# SHARE: SI_SPILL_S32_SAVE $sgpr5, %stack.2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5 :: (store 4 into %stack.2, addrspace 5) -# SHARE: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) -# SHARE: SI_SPILL_S64_SAVE killed renamable $sgpr6_sgpr7, %stack.1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5 :: (store 8 into %stack.1, align 4, addrspace 5) -# SHARE: renamable $sgpr6_sgpr7 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5 :: (load 8 from %stack.1, align 4, addrspace 5) +# SHARE: SI_SPILL_S32_SAVE $sgpr32, %stack.2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (store 4 into %stack.2, addrspace 5) +# SHARE: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) +# SHARE: SI_SPILL_S64_SAVE killed renamable $sgpr6_sgpr7, %stack.1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (store 8 into %stack.1, align 4, addrspace 5) +# SHARE: renamable $sgpr6_sgpr7 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (load 8 from %stack.1, align 4, addrspace 5) # SHARE: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr6_sgpr7, @func, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit undef $vgpr0 -# SHARE: $sgpr5 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5 :: (load 4 from %stack.2, addrspace 5) -# SHARE: $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) -# SHARE: renamable $sgpr6_sgpr7 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5 :: (load 8 from %stack.1, align 4, addrspace 5) +# SHARE: $sgpr32 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (load 4 from %stack.2, addrspace 5) +# SHARE: $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) +# SHARE: renamable $sgpr6_sgpr7 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (load 8 from %stack.1, align 4, addrspace 5) # SHARE: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr6_sgpr7, @func, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit $vgpr0 -# SHARE: $sgpr5 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5 :: (load 4 from %stack.2, addrspace 5) +# SHARE: $sgpr32 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (load 4 from %stack.2, addrspace 5) # NOSHARE: stack: # NOSHARE: - { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4, @@ -58,17 +58,17 @@ # NOSHARE: stack-id: 1, callee-saved-register: '', callee-saved-restored: true, # NOSHARE: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -# NOSHARE: SI_SPILL_S32_SAVE $sgpr5, %stack.2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5 :: (store 4 into %stack.2, addrspace 5) -# NOSHARE: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) -# NOSHARE: SI_SPILL_S64_SAVE killed renamable $sgpr6_sgpr7, %stack.1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5 :: (store 8 into %stack.1, align 4, addrspace 5) -# NOSHARE: renamable $sgpr6_sgpr7 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5 :: (load 8 from %stack.1, align 4, addrspace 5) +# NOSHARE: SI_SPILL_S32_SAVE $sgpr32, %stack.2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (store 4 into %stack.2, addrspace 5) +# NOSHARE: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) +# NOSHARE: SI_SPILL_S64_SAVE killed renamable $sgpr6_sgpr7, %stack.1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (store 8 into %stack.1, align 4, addrspace 5) +# NOSHARE: renamable $sgpr6_sgpr7 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (load 8 from %stack.1, align 4, addrspace 5) # NOSHARE: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr6_sgpr7, @func, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit undef $vgpr0 -# NOSHARE: $sgpr5 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5 :: (load 4 from %stack.2, addrspace 5) -# NOSHARE: SI_SPILL_S32_SAVE $sgpr5, %stack.3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5 :: (store 4 into %stack.3, addrspace 5) -# NOSHARE: $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) -# NOSHARE: renamable $sgpr6_sgpr7 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5 :: (load 8 from %stack.1, align 4, addrspace 5) +# NOSHARE: $sgpr32 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (load 4 from %stack.2, addrspace 5) +# NOSHARE: SI_SPILL_S32_SAVE $sgpr32, %stack.3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (store 4 into %stack.3, addrspace 5) +# NOSHARE: $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) +# NOSHARE: renamable $sgpr6_sgpr7 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (load 8 from %stack.1, align 4, addrspace 5) # NOSHARE: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr6_sgpr7, @func, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit $vgpr0 -# NOSHARE: $sgpr5 = SI_SPILL_S32_RESTORE %stack.3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5 :: (load 4 from %stack.3, addrspace 5) +# NOSHARE: $sgpr32 = SI_SPILL_S32_RESTORE %stack.3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (load 4 from %stack.3, addrspace 5) ... @@ -79,23 +79,23 @@ frameInfo: machineFunctionInfo: scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 scratchWaveOffsetReg: $sgpr4 - frameOffsetReg: $sgpr5 + frameOffsetReg: $sgpr32 stackPtrOffsetReg: $sgpr32 body: | bb.0: - %0:sreg_32_xm0 = COPY $sgpr5 + %0:sreg_32_xm0 = COPY $sgpr32 %1:vreg_64 = IMPLICIT_DEF %2:vgpr_32 = FLAT_LOAD_DWORD %1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr %3:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @func + 4, target-flags(amdgpu-rel32-hi) @func + 4, implicit-def dead $scc - ADJCALLSTACKUP 0, 0, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr5 + ADJCALLSTACKUP 0, 0, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32 dead $sgpr30_sgpr31 = SI_CALL %3, @func, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit undef $vgpr0 - $sgpr5 = COPY %0 - %4:sreg_32_xm0 = COPY $sgpr5 - ADJCALLSTACKDOWN 0, 0, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr5 - ADJCALLSTACKUP 0, 0, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr5 + $sgpr32 = COPY %0 + %4:sreg_32_xm0 = COPY $sgpr32 + ADJCALLSTACKDOWN 0, 0, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32 + ADJCALLSTACKUP 0, 0, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32 $vgpr0 = COPY %2 dead $sgpr30_sgpr31 = SI_CALL %3, @func, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit killed $vgpr0 - $sgpr5 = COPY %4 - ADJCALLSTACKDOWN 0, 0, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr5 + $sgpr32 = COPY %4 + ADJCALLSTACKDOWN 0, 0, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32 ... |