diff options
author | Tom Stellard <tstellar@redhat.com> | 2018-05-11 23:12:49 +0000 |
---|---|---|
committer | Tom Stellard <tstellar@redhat.com> | 2018-05-11 23:12:49 +0000 |
commit | 655fdd3f82fc2c25eccd921755708deed0a02405 (patch) | |
tree | 703377dbdcb174b640a905806c1a0b3da53d6e33 /llvm | |
parent | c3c14666b6262f6e0c0fc38d7f778cd879e26bbe (diff) | |
download | bcm5719-llvm-655fdd3f82fc2c25eccd921755708deed0a02405.tar.gz bcm5719-llvm-655fdd3f82fc2c25eccd921755708deed0a02405.zip |
AMDGPU/GlobalISel: Implement select() for >32-bit G_STORE
Reviewers: arsenm, nhaehnle
Subscribers: kzhuravl, wdng, yaxunl, rovka, kristof.beyls, dstuttard, tpr, llvm-commits, t-tye
Differential Revision: https://reviews.llvm.org/D46153
llvm-svn: 332154
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp | 23 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 6 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir | 23 |
3 files changed, 47 insertions, 5 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 52ecca76095..bac467928e4 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -160,10 +160,31 @@ bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const { bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const { MachineBasicBlock *BB = I.getParent(); + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); DebugLoc DL = I.getDebugLoc(); + unsigned StoreSize = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI); + unsigned Opcode; // FIXME: Select store instruction based on address space - MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(AMDGPU::FLAT_STORE_DWORD)) + switch (StoreSize) { + default: + return false; + case 32: + Opcode = AMDGPU::FLAT_STORE_DWORD; + break; + case 64: + Opcode = AMDGPU::FLAT_STORE_DWORDX2; + break; + case 96: + Opcode = AMDGPU::FLAT_STORE_DWORDX3; + break; + case 128: + Opcode = AMDGPU::FLAT_STORE_DWORDX4; + break; + } + + MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode)) .add(I.getOperand(1)) .add(I.getOperand(0)) .addImm(0) // offset diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index b30aa314299..879aa49b97b 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1579,6 +1579,12 @@ SIRegisterInfo::getConstrainedRegClassForOperand(const MachineOperand &MO, case 64: return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_64RegClass : &AMDGPU::SReg_64_XEXECRegClass; + case 96: + return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_96RegClass : + nullptr; + case 128: + return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_128RegClass : + &AMDGPU::SReg_128RegClass; default: llvm_unreachable("not implemented"); } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir index c1347abf785..ba38b5bff9a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir @@ -4,7 +4,10 @@ # REQUIRES: global-isel --- | - define amdgpu_kernel void @global_addrspace(i32 addrspace(1)* %global0) { ret void } + define amdgpu_kernel void @global_addrspace(i32 addrspace(1)* %global0, + i64 addrspace(1)* %global1, + i96 addrspace(1)* %global2, + i128 addrspace(1)* %global3) { ret void } ... --- @@ -14,16 +17,28 @@ regBankSelected: true # GCN: global_addrspace # GCN: [[PTR:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 -# GCN: [[VAL:%[0-9]+]]:vgpr_32 = COPY $vgpr2 -# GCN: FLAT_STORE_DWORD [[PTR]], [[VAL]], 0, 0, 0 +# GCN: [[VAL4:%[0-9]+]]:vgpr_32 = COPY $vgpr2 +# GCN: [[VAL8:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 +# GCN: [[VAL12:%[0-9]+]]:vreg_96 = COPY $vgpr5_vgpr6_vgpr7 +# GCN: [[VAL16:%[0-9]+]]:vreg_128 = COPY $vgpr8_vgpr9_vgpr10_vgpr11 +# GCN: FLAT_STORE_DWORD [[PTR]], [[VAL4]], 0, 0, 0 +# GCN: FLAT_STORE_DWORDX2 [[PTR]], [[VAL8]], 0, 0, 0 +# GCN: FLAT_STORE_DWORDX3 [[PTR]], [[VAL12]], 0, 0, 0 +# GCN: FLAT_STORE_DWORDX4 [[PTR]], [[VAL16]], 0, 0, 0 body: | bb.0: - liveins: $vgpr0_vgpr1, $vgpr2 + liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 + %2:vgpr(s64) = COPY $vgpr3_vgpr4 + %3:vgpr(s96) = COPY $vgpr5_vgpr6_vgpr7 + %4:vgpr(s128) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 G_STORE %1, %0 :: (store 4 into %ir.global0) + G_STORE %2, %0 :: (store 8 into %ir.global1) + G_STORE %3, %0 :: (store 12 into %ir.global2, align 16) + G_STORE %4, %0 :: (store 16 into %ir.global3) ... --- |