summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2019-08-01 03:52:40 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2019-08-01 03:52:40 +0000
commit57495268acb2b304f7a89e321478c3a818d2c93f (patch)
treed9cd4ff89a9f54a41324d733d9290322146198f2 /llvm/lib
parentae87b9f2c2eac8c11ffd1e606f3841c7083dbf52 (diff)
downloadbcm5719-llvm-57495268acb2b304f7a89e321478c3a818d2c93f.tar.gz
bcm5719-llvm-57495268acb2b304f7a89e321478c3a818d2c93f.zip
AMDGPU/GlobalISel: Remove manual store select code
This regresses the weird types that are newly treated as legal load types, but fixes incorrectly using flat instrucions on SI. llvm-svn: 367512
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp52
-rw-r--r--llvm/lib/Target/AMDGPU/FLATInstructions.td29
2 files changed, 23 insertions, 58 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 9fe9b3782c8..b71bfce60b3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -842,56 +842,8 @@ bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const {
bool AMDGPUInstructionSelector::selectG_STORE(
MachineInstr &I, CodeGenCoverage &CoverageInfo) const {
- MachineBasicBlock *BB = I.getParent();
- MachineFunction *MF = BB->getParent();
- MachineRegisterInfo &MRI = MF->getRegInfo();
- const DebugLoc &DL = I.getDebugLoc();
-
- LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
- if (PtrTy.getSizeInBits() != 64) {
- initM0(I);
- return selectImpl(I, CoverageInfo);
- }
-
- if (selectImpl(I, CoverageInfo))
- return true;
-
- unsigned StoreSize = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI);
- unsigned Opcode;
-
- // FIXME: Remove this when integers > s32 naturally selected.
- switch (StoreSize) {
- default:
- return false;
- case 32:
- Opcode = AMDGPU::FLAT_STORE_DWORD;
- break;
- case 64:
- Opcode = AMDGPU::FLAT_STORE_DWORDX2;
- break;
- case 96:
- Opcode = AMDGPU::FLAT_STORE_DWORDX3;
- break;
- case 128:
- Opcode = AMDGPU::FLAT_STORE_DWORDX4;
- break;
- }
-
- MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
- .add(I.getOperand(1))
- .add(I.getOperand(0))
- .addImm(0) // offset
- .addImm(0) // glc
- .addImm(0) // slc
- .addImm(0); // dlc
-
-
- // Now that we selected an opcode, we need to constrain the register
- // operands to use appropriate classes.
- bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
-
- I.eraseFromParent();
- return Ret;
+ initM0(I);
+ return selectImpl(I, CoverageInfo);
}
static int sizeToSubRegIndex(unsigned Size) {
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index a00ff76be7a..701f50892c0 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -777,8 +777,6 @@ def : FlatLoadPat <FLAT_LOAD_USHORT, extloadi16_flat, i32>;
def : FlatLoadPat <FLAT_LOAD_USHORT, zextloadi16_flat, i32>;
def : FlatLoadPat <FLAT_LOAD_USHORT, load_flat, i16>;
def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_flat, i32>;
-def : FlatLoadPat <FLAT_LOAD_DWORD, load_flat, i32>;
-def : FlatLoadPat <FLAT_LOAD_DWORDX2, load_flat, v2i32>;
def : FlatLoadPat <FLAT_LOAD_DWORDX3, load_flat, v3i32>;
def : FlatLoadPat <FLAT_LOAD_DWORDX4, load_flat, v4i32>;
@@ -787,8 +785,17 @@ def : FlatLoadAtomicPat <FLAT_LOAD_DWORDX2, atomic_load_64_flat, i64>;
def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i32>;
def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_flat, i32>;
-def : FlatStorePat <FLAT_STORE_DWORD, store_flat, i32>;
-def : FlatStorePat <FLAT_STORE_DWORDX2, store_flat, v2i32, VReg_64>;
+
+foreach vt = [i32, f32, v2i16, v2f16] in {
+def : FlatLoadPat <FLAT_LOAD_DWORD, load_flat, vt>;
+def : FlatStorePat <FLAT_STORE_DWORD, store_flat, vt>;
+}
+
+foreach vt = VReg_64.RegTypes in {
+def : FlatStorePat <FLAT_STORE_DWORDX2, store_flat, vt, VReg_64>;
+def : FlatLoadPat <FLAT_LOAD_DWORDX2, load_flat, vt>;
+}
+
def : FlatStorePat <FLAT_STORE_DWORDX3, store_flat, v3i32, VReg_96>;
def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, v4i32, VReg_128>;
@@ -860,8 +867,16 @@ def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, zextloadi16_global, i32>;
def : FlatLoadSignedPat <GLOBAL_LOAD_SSHORT, sextloadi16_global, i32>;
def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, load_global, i16>;
-def : FlatLoadSignedPat <GLOBAL_LOAD_DWORD, load_global, i32>;
-def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX2, load_global, v2i32>;
+foreach vt = [i32, f32, v2i16, v2f16] in {
+def : FlatLoadSignedPat <GLOBAL_LOAD_DWORD, load_global, vt>;
+def : FlatStoreSignedPat <GLOBAL_STORE_DWORD, store_global, vt, VGPR_32>;
+}
+
+foreach vt = VReg_64.RegTypes in {
+def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX2, load_global, vt>;
+def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX2, store_global, vt, VReg_64>;
+}
+
def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX3, load_global, v3i32>;
def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX4, load_global, v4i32>;
@@ -872,8 +887,6 @@ def : FlatStoreSignedPat <GLOBAL_STORE_BYTE, truncstorei8_global, i32, VGPR_32>;
def : FlatStoreSignedPat <GLOBAL_STORE_BYTE, truncstorei8_global, i16, VGPR_32>;
def : FlatStoreSignedPat <GLOBAL_STORE_SHORT, truncstorei16_global, i32, VGPR_32>;
def : FlatStoreSignedPat <GLOBAL_STORE_SHORT, store_global, i16, VGPR_32>;
-def : FlatStoreSignedPat <GLOBAL_STORE_DWORD, store_global, i32, VGPR_32>;
-def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX2, store_global, v2i32, VReg_64>;
def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX3, store_global, v3i32, VReg_96>;
def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX4, store_global, v4i32, VReg_128>;
OpenPOWER on IntegriCloud