summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2019-08-01 00:53:38 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2019-08-01 00:53:38 +0000
commit3594011de0ef11895cb5123f328700463fee5f13 (patch)
tree4d16403b689d077aa2a42067f0fd734d381a1b13
parent87b668befe19d47dd14919b2f322a81f1478e49d (diff)
downloadbcm5719-llvm-3594011de0ef11895cb5123f328700463fee5f13.tar.gz
bcm5719-llvm-3594011de0ef11895cb5123f328700463fee5f13.zip
AMDGPU/GlobalISel: Select local loads
llvm-svn: 367498
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUGISel.td12
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp84
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h9
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructions.td2
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.td10
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir906
6 files changed, 1014 insertions, 9 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index cad4c2ef404..2b493935cd0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -64,6 +64,18 @@ def gi_mubuf_scratch_offen :
GIComplexOperandMatcher<s32, "selectMUBUFScratchOffen">,
GIComplexPatternEquiv<MUBUFScratchOffen>;
+def gi_ds_1addr_1offset :
+ GIComplexOperandMatcher<s32, "selectDS1Addr1Offset">,
+ GIComplexPatternEquiv<DS1Addr1Offset>;
+
+
+// Separate load nodes are defined to glue m0 initialization in
+// SelectionDAG. The GISel selector can just insert m0 initialization
+// directly before before selecting a glue-less load, so hide this
+// distinction.
+def : GINodeEquiv<G_LOAD, AMDGPUld_glue>;
+
+
class GISelSop2Pat <
SDPatternOperator node,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 5db2e6fb4e6..06ba7d53c76 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1243,10 +1243,22 @@ bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const {
return false;
}
-bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const {
- // TODO: Can/should we insert m0 initialization here for DS instructions and
- // call the normal selector?
- return false;
+bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I,
+ CodeGenCoverage &CoverageInfo) const {
+ MachineBasicBlock *BB = I.getParent();
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ const LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
+ unsigned AS = PtrTy.getAddressSpace();
+ if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) &&
+ STI.ldsRequiresM0Init()) {
+ // If DS instructions require M0 initializtion, insert it before selecting.
+ BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
+ .addImm(-1);
+ }
+
+ return selectImpl(I, CoverageInfo);
}
bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const {
@@ -1364,7 +1376,7 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I,
return true;
return selectImpl(I, CoverageInfo);
case TargetOpcode::G_LOAD:
- return selectImpl(I, CoverageInfo);
+ return selectG_LOAD(I, CoverageInfo);
case TargetOpcode::G_SELECT:
return selectG_SELECT(I);
case TargetOpcode::G_STORE:
@@ -1698,6 +1710,22 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const {
}}};
}
+bool AMDGPUInstructionSelector::isDSOffsetLegal(const MachineRegisterInfo &MRI,
+ const MachineOperand &Base,
+ int64_t Offset,
+ unsigned OffsetBits) const {
+ if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
+ (OffsetBits == 8 && !isUInt<8>(Offset)))
+ return false;
+
+ if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled())
+ return true;
+
+ // On Southern Islands instruction with a negative base value and an offset
+ // don't seem to work.
+ return signBitIsZero(Base, MRI);
+}
+
InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectMUBUFScratchOffset(
MachineOperand &Root) const {
@@ -1726,3 +1754,49 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffset(
[=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); } // offset
}};
}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectDS1Addr1Offset(MachineOperand &Root) const {
+ MachineInstr *MI = Root.getParent();
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+
+ const MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
+ if (!RootDef) {
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }
+ }};
+ }
+
+ int64_t ConstAddr = 0;
+ if (isBaseWithConstantOffset(Root, MRI)) {
+ const MachineOperand &LHS = RootDef->getOperand(1);
+ const MachineOperand &RHS = RootDef->getOperand(2);
+ const MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
+ const MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
+ if (LHSDef && RHSDef) {
+ int64_t PossibleOffset =
+ RHSDef->getOperand(1).getCImm()->getSExtValue();
+ if (isDSOffsetLegal(MRI, LHS, PossibleOffset, 16)) {
+ // (add n0, c0)
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(PossibleOffset); }
+ }};
+ }
+ }
+ } else if (RootDef->getOpcode() == AMDGPU::G_SUB) {
+
+
+
+ } else if (mi_match(Root.getReg(), MRI, m_ICst(ConstAddr))) {
+
+
+ }
+
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }
+ }};
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 4f489ddfb23..8ec241bac49 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -90,7 +90,7 @@ private:
void getAddrModeInfo(const MachineInstr &Load, const MachineRegisterInfo &MRI,
SmallVectorImpl<GEPInfo> &AddrInfo) const;
bool selectSMRD(MachineInstr &I, ArrayRef<GEPInfo> AddrInfo) const;
- bool selectG_LOAD(MachineInstr &I) const;
+ bool selectG_LOAD(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
bool selectG_SELECT(MachineInstr &I) const;
bool selectG_STORE(MachineInstr &I) const;
bool selectG_BRCOND(MachineInstr &I) const;
@@ -133,6 +133,13 @@ private:
InstructionSelector::ComplexRendererFns
selectMUBUFScratchOffset(MachineOperand &Root) const;
+ bool isDSOffsetLegal(const MachineRegisterInfo &MRI,
+ const MachineOperand &Base,
+ int64_t Offset, unsigned OffsetBits) const;
+
+ InstructionSelector::ComplexRendererFns
+ selectDS1Addr1Offset(MachineOperand &Root) const;
+
const SIInstrInfo &TII;
const SIRegisterInfo &TRI;
const AMDGPURegisterBankInfo &RBI;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index 0be9ec3eff1..07f5123ea51 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -480,11 +480,13 @@ def atomic_store_local : LocalStore <atomic_store>;
def load_align8_local : PatFrag <(ops node:$ptr), (load_local node:$ptr)> {
let IsLoad = 1;
+ let IsNonExtLoad = 1;
let MinAlignment = 8;
}
def load_align16_local : PatFrag <(ops node:$ptr), (load_local node:$ptr)> {
let IsLoad = 1;
+ let IsNonExtLoad = 1;
let MinAlignment = 16;
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index a035bb63e7c..92d6ab30bf9 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -328,13 +328,13 @@ def AMDGPUatomic_ld_glue : SDNode <"ISD::ATOMIC_LOAD", SDTAtomicLoad,
>;
def unindexedload_glue : PatFrag <(ops node:$ptr), (AMDGPUld_glue node:$ptr)> {
- let IsUnindexed = 1;
let IsLoad = 1;
+ let IsUnindexed = 1;
}
def load_glue : PatFrag <(ops node:$ptr), (unindexedload_glue node:$ptr)> {
- let IsNonExtLoad = 1;
let IsLoad = 1;
+ let IsNonExtLoad = 1;
}
def atomic_load_32_glue : PatFrag<(ops node:$ptr),
@@ -396,7 +396,9 @@ def sextloadi16_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr)> {
let IsLoad = 1, AddressSpaces = LoadAddress_local.AddrSpaces in {
-def load_local_m0 : PatFrag<(ops node:$ptr), (load_glue node:$ptr)>;
+def load_local_m0 : PatFrag<(ops node:$ptr), (load_glue node:$ptr)> {
+ let IsNonExtLoad = 1;
+}
let MemoryVT = i8 in {
def extloadi8_local_m0 : PatFrag<(ops node:$ptr), (extloadi8_glue node:$ptr)>;
@@ -412,9 +414,11 @@ def zextloadi16_local_m0 : PatFrag<(ops node:$ptr), (zextloadi16_glue node:$ptr)
def load_align8_local_m0 : LoadFrag <load_glue>, LocalAddress {
let MinAlignment = 8;
+ let IsNonExtLoad = 1;
}
def load_align16_local_m0 : LoadFrag <load_glue>, LocalAddress {
let MinAlignment = 16;
+ let IsNonExtLoad = 1;
}
} // End IsLoad = 1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir
new file mode 100644
index 00000000000..602b1141ed7
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir
@@ -0,0 +1,906 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX6 %s
+# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
+
+
+---
+
+name: load_local_s32_from_4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: load_local_s32_from_4
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4, addrspace 3)
+ ; GFX6: $vgpr0 = COPY [[DS_READ_B32_]]
+ ; GFX7-LABEL: name: load_local_s32_from_4
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4, addrspace 3)
+ ; GFX7: $vgpr0 = COPY [[DS_READ_B32_]]
+ ; GFX9-LABEL: name: load_local_s32_from_4
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load 4, addrspace 3)
+ ; GFX9: $vgpr0 = COPY [[DS_READ_B32_gfx9_]]
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 3)
+ $vgpr0 = COPY %1
+
+...
+
+---
+
+name: load_local_s32_from_2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: load_local_s32_from_2
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 2, addrspace 3)
+ ; GFX6: $vgpr0 = COPY [[DS_READ_U16_]]
+ ; GFX7-LABEL: name: load_local_s32_from_2
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 2, addrspace 3)
+ ; GFX7: $vgpr0 = COPY [[DS_READ_U16_]]
+ ; GFX9-LABEL: name: load_local_s32_from_2
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9: [[DS_READ_U16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[COPY]], 0, 0, implicit $exec :: (load 2, addrspace 3)
+ ; GFX9: $vgpr0 = COPY [[DS_READ_U16_gfx9_]]
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(s32) = G_LOAD %0 :: (load 2, align 2, addrspace 3)
+ $vgpr0 = COPY %1
+
+...
+
+---
+
+name: load_local_s32_from_1
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+ scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+ scratchWaveOffsetReg: $sgpr4
+ stackPtrOffsetReg: $sgpr32
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: load_local_s32_from_1
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3)
+ ; GFX6: $vgpr0 = COPY [[DS_READ_U8_]]
+ ; GFX7-LABEL: name: load_local_s32_from_1
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3)
+ ; GFX7: $vgpr0 = COPY [[DS_READ_U8_]]
+ ; GFX9-LABEL: name: load_local_s32_from_1
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[COPY]], 0, 0, implicit $exec :: (load 1, addrspace 3)
+ ; GFX9: $vgpr0 = COPY [[DS_READ_U8_gfx9_]]
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(s32) = G_LOAD %0 :: (load 1, align 1, addrspace 3)
+ $vgpr0 = COPY %1
+
+...
+
+---
+
+name: load_local_v2s32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORDX2_]]
+ ; GFX6-LABEL: name: load_local_v2s32
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 8, addrspace 3)
+ ; GFX6: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
+ ; GFX7-LABEL: name: load_local_v2s32
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 8, addrspace 3)
+ ; GFX7: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
+ ; GFX9-LABEL: name: load_local_v2s32
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load 8, addrspace 3)
+ ; GFX9: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]]
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load 8, align 8, addrspace 3)
+ $vgpr0_vgpr1 = COPY %1
+
+...
+
+---
+
+name: load_local_v2s32_align4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORDX2_]]
+ ; GFX6-LABEL: name: load_local_v2s32_align4
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
+ ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+ ; GFX7-LABEL: name: load_local_v2s32_align4
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
+ ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+ ; GFX9-LABEL: name: load_local_v2s32_align4
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
+ ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load 8, align 4, addrspace 3)
+ $vgpr0_vgpr1 = COPY %1
+
+...
+
+---
+
+name: load_local_v3s32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: load_local_v3s32
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX6: [[LOAD:%[0-9]+]]:vgpr(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3)
+ ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+ ; GFX7-LABEL: name: load_local_v3s32
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[LOAD:%[0-9]+]]:vreg_96(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3)
+ ; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+ ; GFX9-LABEL: name: load_local_v3s32
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX9: [[LOAD:%[0-9]+]]:vreg_96(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3)
+ ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(<3 x s32>) = G_LOAD %0 :: (load 12, align 4, addrspace 3)
+ $vgpr0_vgpr1_vgpr2 = COPY %1
+
+...
+
+---
+
+name: load_local_v4s32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: load_local_v4s32
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: [[LOAD:%[0-9]+]]:vreg_128(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
+ ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+ ; GFX7-LABEL: name: load_local_v4s32
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
+ ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+ ; GFX9-LABEL: name: load_local_v4s32
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
+ ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(<4 x s32>) = G_LOAD %0 :: (load 16, align 4, addrspace 3)
+ $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
+
+...
+
+---
+
+name: load_local_s64
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: load_local_s64
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
+ ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+ ; GFX7-LABEL: name: load_local_s64
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
+ ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+ ; GFX9-LABEL: name: load_local_s64
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
+ ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(s64) = G_LOAD %0 :: (load 8, align 8, addrspace 3)
+ $vgpr0_vgpr1 = COPY %1
+
+...
+
+---
+
+name: load_local_s64_align4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: load_local_s64_align4
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
+ ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+ ; GFX7-LABEL: name: load_local_s64_align4
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
+ ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+ ; GFX9-LABEL: name: load_local_s64_align4
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
+ ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(s64) = G_LOAD %0 :: (load 8, align 4, addrspace 3)
+ $vgpr0_vgpr1 = COPY %1
+
+...
+
+---
+
+name: load_local_v2s64
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: load_local_v2s64
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
+ ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+ ; GFX7-LABEL: name: load_local_v2s64
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
+ ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+ ; GFX9-LABEL: name: load_local_v2s64
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
+ ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(<2 x s64>) = G_LOAD %0 :: (load 16, align 4, addrspace 3)
+ $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
+
+...
+
+---
+
+name: load_local_v2p1
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: load_local_v2p1
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
+ ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
+ ; GFX7-LABEL: name: load_local_v2p1
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
+ ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
+ ; GFX9-LABEL: name: load_local_v2p1
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
+ ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(<2 x p1>) = G_LOAD %0 :: (load 16, align 4, addrspace 3)
+ $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
+
+...
+
+---
+
+name: load_local_s96
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: load_local_s96
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX6: [[LOAD:%[0-9]+]]:vgpr(s96) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3)
+ ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96)
+ ; GFX7-LABEL: name: load_local_s96
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3)
+ ; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96)
+ ; GFX9-LABEL: name: load_local_s96
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX9: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3)
+ ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96)
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(s96) = G_LOAD %0 :: (load 12, align 4, addrspace 3)
+ $vgpr0_vgpr1_vgpr2 = COPY %1
+
+...
+
+---
+
+name: load_local_s128
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: load_local_s128
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
+ ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
+ ; GFX7-LABEL: name: load_local_s128
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
+ ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
+ ; GFX9-LABEL: name: load_local_s128
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
+ ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(s128) = G_LOAD %0 :: (load 16, align 4, addrspace 3)
+ $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
+
+...
+
+---
+
+name: load_local_p3_from_4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: load_local_p3_from_4
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
+ ; GFX6: $vgpr0 = COPY [[LOAD]](p3)
+ ; GFX7-LABEL: name: load_local_p3_from_4
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
+ ; GFX7: $vgpr0 = COPY [[LOAD]](p3)
+ ; GFX9-LABEL: name: load_local_p3_from_4
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
+ ; GFX9: $vgpr0 = COPY [[LOAD]](p3)
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(p3) = G_LOAD %0 :: (load 4, align 4, addrspace 3)
+ $vgpr0 = COPY %1
+
+...
+
+---
+
+name: load_local_p5_from_4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: load_local_p5_from_4
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
+ ; GFX6: $vgpr0 = COPY [[LOAD]](p3)
+ ; GFX7-LABEL: name: load_local_p5_from_4
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
+ ; GFX7: $vgpr0 = COPY [[LOAD]](p3)
+ ; GFX9-LABEL: name: load_local_p5_from_4
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
+ ; GFX9: $vgpr0 = COPY [[LOAD]](p3)
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(p3) = G_LOAD %0 :: (load 4, align 4, addrspace 3)
+ $vgpr0 = COPY %1
+
+...
+
+---
+
+name: load_local_p1_align8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: load_local_p1_align8
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
+ ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+ ; GFX7-LABEL: name: load_local_p1_align8
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
+ ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+ ; GFX9-LABEL: name: load_local_p1_align8
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
+ ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(p1) = G_LOAD %0 :: (load 8, align 8, addrspace 3)
+ $vgpr0_vgpr1 = COPY %1
+
+...
+
+---
+
+name: load_local_p1_align4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: load_local_p1_align4
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
+ ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+ ; GFX7-LABEL: name: load_local_p1_align4
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
+ ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+ ; GFX9-LABEL: name: load_local_p1_align4
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
+ ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(p1) = G_LOAD %0 :: (load 8, align 4, addrspace 3)
+ $vgpr0_vgpr1 = COPY %1
+
+...
+
+---
+
+name: load_local_p999_from_8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: load_local_p999_from_8
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
+ ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
+ ; GFX7-LABEL: name: load_local_p999_from_8
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
+ ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
+ ; GFX9-LABEL: name: load_local_p999_from_8
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
+ ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(p999) = G_LOAD %0 :: (load 8, align 8, addrspace 3)
+ $vgpr0_vgpr1 = COPY %1
+
+...
+
+---
+
+name: load_local_v2p3
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: load_local_v2p3
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
+ ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; GFX7-LABEL: name: load_local_v2p3
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
+ ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; GFX9-LABEL: name: load_local_v2p3
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
+ ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(<2 x p3>) = G_LOAD %0 :: (load 8, align 8, addrspace 3)
+ $vgpr0_vgpr1 = COPY %1
+
+...
+
+---
+
+name: load_local_v2s16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: load_local_v2s16
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
+ ; GFX6: $vgpr0 = COPY [[LOAD]](<2 x s16>)
+ ; GFX7-LABEL: name: load_local_v2s16
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
+ ; GFX7: $vgpr0 = COPY [[LOAD]](<2 x s16>)
+ ; GFX9-LABEL: name: load_local_v2s16
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
+ ; GFX9: $vgpr0 = COPY [[LOAD]](<2 x s16>)
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load 4, align 4, addrspace 3)
+ $vgpr0 = COPY %1
+
+...
+
+---
+
+name: load_local_v4s16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: load_local_v4s16
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
+ ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+ ; GFX7-LABEL: name: load_local_v4s16
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
+ ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+ ; GFX9-LABEL: name: load_local_v4s16
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
+ ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load 8, align 8, addrspace 3)
+ $vgpr0_vgpr1 = COPY %1
+
+...
+
+# ---
+
+# name: load_local_v6s16
+# legalized: true
+# regBankSelected: true
+# tracksRegLiveness: true
+# machineFunctionInfo:
+# scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+# scratchWaveOffsetReg: $sgpr4
+# stackPtrOffsetReg: $sgpr32
+
+# body: |
+# bb.0:
+# liveins: $vgpr0
+
+# %0:vgpr(p3) = COPY $vgpr0
+# %1:vgpr(<6 x s16>) = G_LOAD %0 :: (load 12, align 4, addrspace 3)
+# $vgpr0_vgpr1_vgpr2 = COPY %1
+
+# ...
+
+---
+
+name: load_local_v8s16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: load_local_v8s16
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
+ ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
+ ; GFX7-LABEL: name: load_local_v8s16
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
+ ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
+ ; GFX9-LABEL: name: load_local_v8s16
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
+ ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(<8 x s16>) = G_LOAD %0 :: (load 16, align 4, addrspace 3)
+ $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
+
+...
+
+################################################################################
+### Stress addressing modes
+################################################################################
+
+---
+
+name: load_local_s32_from_1_gep_65535
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: load_local_s32_from_1_gep_65535
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec
+ ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3)
+ ; GFX6: $vgpr0 = COPY [[DS_READ_U8_]]
+ ; GFX7-LABEL: name: load_local_s32_from_1_gep_65535
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 65535, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3)
+ ; GFX7: $vgpr0 = COPY [[DS_READ_U8_]]
+ ; GFX9-LABEL: name: load_local_s32_from_1_gep_65535
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[COPY]], 65535, 0, implicit $exec :: (load 1, addrspace 3)
+ ; GFX9: $vgpr0 = COPY [[DS_READ_U8_gfx9_]]
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(s32) = G_CONSTANT i32 65535
+ %2:vgpr(p3) = G_GEP %0, %1
+ %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 3)
+ $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_local_s32_from_1_gep_65536
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: load_local_s32_from_1_gep_65536
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65536, implicit $exec
+ ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3)
+ ; GFX6: $vgpr0 = COPY [[DS_READ_U8_]]
+ ; GFX7-LABEL: name: load_local_s32_from_1_gep_65536
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65536, implicit $exec
+ ; GFX7: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3)
+ ; GFX7: $vgpr0 = COPY [[DS_READ_U8_]]
+ ; GFX9-LABEL: name: load_local_s32_from_1_gep_65536
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65536, implicit $exec
+ ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+ ; GFX9: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_ADD_U32_e64_]], 0, 0, implicit $exec :: (load 1, addrspace 3)
+ ; GFX9: $vgpr0 = COPY [[DS_READ_U8_gfx9_]]
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(s32) = G_CONSTANT i32 65536
+ %2:vgpr(p3) = G_GEP %0, %1
+ %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 3)
+ $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_local_s32_from_1_gep_m1
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: load_local_s32_from_1_gep_m1
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec
+ ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3)
+ ; GFX6: $vgpr0 = COPY [[DS_READ_U8_]]
+ ; GFX7-LABEL: name: load_local_s32_from_1_gep_m1
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec
+ ; GFX7: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3)
+ ; GFX7: $vgpr0 = COPY [[DS_READ_U8_]]
+ ; GFX9-LABEL: name: load_local_s32_from_1_gep_m1
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec
+ ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+ ; GFX9: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_ADD_U32_e64_]], 0, 0, implicit $exec :: (load 1, addrspace 3)
+ ; GFX9: $vgpr0 = COPY [[DS_READ_U8_gfx9_]]
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(s32) = G_CONSTANT i32 -1
+ %2:vgpr(p3) = G_GEP %0, %1
+ %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 3)
+ $vgpr0 = COPY %3
+
+...
OpenPOWER on IntegriCloud