diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-08-01 03:52:40 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-08-01 03:52:40 +0000 |
| commit | 57495268acb2b304f7a89e321478c3a818d2c93f (patch) | |
| tree | d9cd4ff89a9f54a41324d733d9290322146198f2 /llvm/lib | |
| parent | ae87b9f2c2eac8c11ffd1e606f3841c7083dbf52 (diff) | |
| download | bcm5719-llvm-57495268acb2b304f7a89e321478c3a818d2c93f.tar.gz bcm5719-llvm-57495268acb2b304f7a89e321478c3a818d2c93f.zip | |
AMDGPU/GlobalISel: Remove manual store select code
This regresses the weird types that are newly treated as legal load
types, but fixes incorrectly using flat instrucions on SI.
llvm-svn: 367512
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp | 52 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/FLATInstructions.td | 29 |
2 files changed, 23 insertions, 58 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 9fe9b3782c8..b71bfce60b3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -842,56 +842,8 @@ bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const { bool AMDGPUInstructionSelector::selectG_STORE( MachineInstr &I, CodeGenCoverage &CoverageInfo) const { - MachineBasicBlock *BB = I.getParent(); - MachineFunction *MF = BB->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); - const DebugLoc &DL = I.getDebugLoc(); - - LLT PtrTy = MRI.getType(I.getOperand(1).getReg()); - if (PtrTy.getSizeInBits() != 64) { - initM0(I); - return selectImpl(I, CoverageInfo); - } - - if (selectImpl(I, CoverageInfo)) - return true; - - unsigned StoreSize = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI); - unsigned Opcode; - - // FIXME: Remove this when integers > s32 naturally selected. - switch (StoreSize) { - default: - return false; - case 32: - Opcode = AMDGPU::FLAT_STORE_DWORD; - break; - case 64: - Opcode = AMDGPU::FLAT_STORE_DWORDX2; - break; - case 96: - Opcode = AMDGPU::FLAT_STORE_DWORDX3; - break; - case 128: - Opcode = AMDGPU::FLAT_STORE_DWORDX4; - break; - } - - MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode)) - .add(I.getOperand(1)) - .add(I.getOperand(0)) - .addImm(0) // offset - .addImm(0) // glc - .addImm(0) // slc - .addImm(0); // dlc - - - // Now that we selected an opcode, we need to constrain the register - // operands to use appropriate classes. - bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI); - - I.eraseFromParent(); - return Ret; + initM0(I); + return selectImpl(I, CoverageInfo); } static int sizeToSubRegIndex(unsigned Size) { diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index a00ff76be7a..701f50892c0 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -777,8 +777,6 @@ def : FlatLoadPat <FLAT_LOAD_USHORT, extloadi16_flat, i32>; def : FlatLoadPat <FLAT_LOAD_USHORT, zextloadi16_flat, i32>; def : FlatLoadPat <FLAT_LOAD_USHORT, load_flat, i16>; def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_flat, i32>; -def : FlatLoadPat <FLAT_LOAD_DWORD, load_flat, i32>; -def : FlatLoadPat <FLAT_LOAD_DWORDX2, load_flat, v2i32>; def : FlatLoadPat <FLAT_LOAD_DWORDX3, load_flat, v3i32>; def : FlatLoadPat <FLAT_LOAD_DWORDX4, load_flat, v4i32>; @@ -787,8 +785,17 @@ def : FlatLoadAtomicPat <FLAT_LOAD_DWORDX2, atomic_load_64_flat, i64>; def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i32>; def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_flat, i32>; -def : FlatStorePat <FLAT_STORE_DWORD, store_flat, i32>; -def : FlatStorePat <FLAT_STORE_DWORDX2, store_flat, v2i32, VReg_64>; + +foreach vt = [i32, f32, v2i16, v2f16] in { +def : FlatLoadPat <FLAT_LOAD_DWORD, load_flat, vt>; +def : FlatStorePat <FLAT_STORE_DWORD, store_flat, vt>; +} + +foreach vt = VReg_64.RegTypes in { +def : FlatStorePat <FLAT_STORE_DWORDX2, store_flat, vt, VReg_64>; +def : FlatLoadPat <FLAT_LOAD_DWORDX2, load_flat, vt>; +} + def : FlatStorePat <FLAT_STORE_DWORDX3, store_flat, v3i32, VReg_96>; def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, v4i32, VReg_128>; @@ -860,8 +867,16 @@ def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, zextloadi16_global, i32>; def : FlatLoadSignedPat <GLOBAL_LOAD_SSHORT, sextloadi16_global, i32>; def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, load_global, i16>; -def : FlatLoadSignedPat <GLOBAL_LOAD_DWORD, load_global, i32>; -def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX2, load_global, v2i32>; +foreach vt = [i32, f32, v2i16, v2f16] in { +def : FlatLoadSignedPat <GLOBAL_LOAD_DWORD, load_global, vt>; +def : FlatStoreSignedPat <GLOBAL_STORE_DWORD, store_global, vt, VGPR_32>; +} + +foreach vt = VReg_64.RegTypes in { +def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX2, load_global, vt>; +def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX2, store_global, vt, VReg_64>; +} + def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX3, load_global, v3i32>; def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX4, load_global, v4i32>; @@ -872,8 +887,6 @@ def : FlatStoreSignedPat <GLOBAL_STORE_BYTE, truncstorei8_global, i32, VGPR_32>; def : FlatStoreSignedPat <GLOBAL_STORE_BYTE, truncstorei8_global, i16, VGPR_32>; def : FlatStoreSignedPat <GLOBAL_STORE_SHORT, truncstorei16_global, i32, VGPR_32>; def : FlatStoreSignedPat <GLOBAL_STORE_SHORT, store_global, i16, VGPR_32>; -def : FlatStoreSignedPat <GLOBAL_STORE_DWORD, store_global, i32, VGPR_32>; -def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX2, store_global, v2i32, VReg_64>; def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX3, store_global, v3i32, VReg_96>; def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX4, store_global, v4i32, VReg_128>; |

