diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-07-01 16:34:48 +0000 | 
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-07-01 16:34:48 +0000 | 
| commit | 0a52e9d026a86d078c5b7184b63e74b72f830665 (patch) | |
| tree | d7e16cfd0ad38faa2d758f07c5cac373e84319d8 /llvm | |
| parent | e1006259d84da5fe7d877978e9f41dd29ee5d4e9 (diff) | |
| download | bcm5719-llvm-0a52e9d026a86d078c5b7184b63e74b72f830665.tar.gz bcm5719-llvm-0a52e9d026a86d078c5b7184b63e74b72f830665.zip  | |
AMDGPU/GlobalISel: Complete implementation of G_GEP
Also works around tablegen defect in selecting add with unused carry,
but if we have to manually select GEP, might as well handle add
manually.
llvm-svn: 364806
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUGISel.td | 23 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp | 105 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h | 4 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir | 16 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-gep.mir | 354 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir | 44 | 
6 files changed, 463 insertions, 83 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td index ac2951ca532..6f725d60907 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -70,17 +70,6 @@ class GISelVop2Pat <    (inst src0_vt:$src0, src1_vt:$src1)  >; -// FIXME: clamp operand should be OperandWithDefaultOps to 0, but it's badly broken. -class GISelVop2ClampingPat < -  SDPatternOperator node, -  Instruction inst, -  ValueType dst_vt, -  ValueType src0_vt = dst_vt, ValueType src1_vt = src0_vt>   : GCNPat < - -  (dst_vt (node (src0_vt (sd_vsrc0 src0_vt:$src0)), (src1_vt VGPR_32:$src1))), -  (inst src0_vt:$src0, src1_vt:$src1, 0) ->; -  class GISelVop2CommutePat <    SDPatternOperator node,    Instruction inst, @@ -139,18 +128,6 @@ multiclass GISelVop2IntrPat <  def : GISelSop2Pat <or, S_OR_B32, i32>;  def : GISelVop2Pat <or, V_OR_B32_e32, i32>; -def : GISelSop2Pat <add, S_ADD_I32, i32>; - -let SubtargetPredicate = NotHasAddNoCarryInsts in { -// FIXME: This should use the VOP3 form -//def : GISelVop2ClampingPat <add, V_ADD_I32_e64, i32>; -def : GISelVop2Pat <add, V_ADD_I32_e32, i32>; -} - -let SubtargetPredicate = HasAddNoCarryInsts in { -def : GISelVop2ClampingPat <add, V_ADD_U32_e64, i32>; -} -  def : GISelSop2Pat <sra, S_ASHR_I32, i32>;  let AddedComplexity = 100 in {  let SubtargetPredicate = isGFX6GFX7 in { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 462158d9f97..f64da2d4884 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -173,13 +173,14 @@ bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const {  MachineOperand  AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO, +                                           const TargetRegisterClass &SubRC,                                             unsigned SubIdx) const {    MachineInstr *MI = MO.getParent();    MachineBasicBlock *BB = MO.getParent()->getParent();    MachineFunction *MF = BB->getParent();    MachineRegisterInfo &MRI = MF->getRegInfo(); -  unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); +  Register DstReg = MRI.createVirtualRegister(&SubRC);    if (MO.isReg()) {      unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx); @@ -215,40 +216,86 @@ bool AMDGPUInstructionSelector::selectG_ADD(MachineInstr &I) const {    MachineBasicBlock *BB = I.getParent();    MachineFunction *MF = BB->getParent();    MachineRegisterInfo &MRI = MF->getRegInfo(); -  unsigned Size = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI); -  unsigned DstLo = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); -  unsigned DstHi = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); - -  if (Size != 64) -    return false; - -  DebugLoc DL = I.getDebugLoc(); +  Register DstReg = I.getOperand(0).getReg(); +  const DebugLoc &DL = I.getDebugLoc(); +  unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI); +  const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI); +  const bool IsSALU = DstRB->getID() == AMDGPU::SGPRRegBankID; -  MachineOperand Lo1(getSubOperand64(I.getOperand(1), AMDGPU::sub0)); -  MachineOperand Lo2(getSubOperand64(I.getOperand(2), AMDGPU::sub0)); +  if (Size == 32) { +    if (IsSALU) { +      MachineInstr *Add = +        BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstReg) +        .add(I.getOperand(1)) +        .add(I.getOperand(2)); +      I.eraseFromParent(); +      return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI); +    } -  BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo) -          .add(Lo1) -          .add(Lo2); +    if (STI.hasAddNoCarry()) { +      I.setDesc(TII.get(AMDGPU::V_ADD_U32_e64)); +      I.addOperand(*MF, MachineOperand::CreateImm(0)); +      I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true)); +      return constrainSelectedInstRegOperands(I, TII, TRI, RBI); +    } -  MachineOperand Hi1(getSubOperand64(I.getOperand(1), AMDGPU::sub1)); -  MachineOperand Hi2(getSubOperand64(I.getOperand(2), AMDGPU::sub1)); +    Register UnusedCarry = MRI.createVirtualRegister(TRI.getWaveMaskRegClass()); +    MachineInstr *Add +      = BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADD_I32_e64), DstReg) +      .addDef(UnusedCarry, RegState::Dead) +      .add(I.getOperand(1)) +      .add(I.getOperand(2)) +      .addImm(0); +    I.eraseFromParent(); +    return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI); +  } -  BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi) -          .add(Hi1) -          .add(Hi2); +  const TargetRegisterClass &RC +    = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass; +  const TargetRegisterClass &HalfRC +    = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass; + +  MachineOperand Lo1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub0)); +  MachineOperand Lo2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub0)); +  MachineOperand Hi1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub1)); +  MachineOperand Hi2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub1)); + +  Register DstLo = MRI.createVirtualRegister(&HalfRC); +  Register DstHi = MRI.createVirtualRegister(&HalfRC); + +  if (IsSALU) { +    BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo) +      .add(Lo1) +      .add(Lo2); +    BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi) +      .add(Hi1) +      .add(Hi2); +  } else { +    const TargetRegisterClass *CarryRC = TRI.getWaveMaskRegClass(); +    Register CarryReg = MRI.createVirtualRegister(CarryRC); +    BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADD_I32_e64), DstLo) +      .addDef(CarryReg) +      .add(Lo1) +      .add(Lo2) +      .addImm(0); +    BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADDC_U32_e64), DstHi) +      .addDef(MRI.createVirtualRegister(CarryRC), RegState::Dead) +      .add(Hi1) +      .add(Hi2) +      .addReg(CarryReg, RegState::Kill) +      .addImm(0); +  } -  BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), I.getOperand(0).getReg()) -          .addReg(DstLo) -          .addImm(AMDGPU::sub0) -          .addReg(DstHi) -          .addImm(AMDGPU::sub1); +  BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg) +    .addReg(DstLo) +    .addImm(AMDGPU::sub0) +    .addReg(DstHi) +    .addImm(AMDGPU::sub1); -  for (MachineOperand &MO : I.explicit_operands()) { -    if (!MO.isReg() || TargetRegisterInfo::isPhysicalRegister(MO.getReg())) -      continue; -    RBI.constrainGenericRegister(MO.getReg(), AMDGPU::SReg_64RegClass, MRI); -  } +  if (!RBI.constrainGenericRegister(DstReg, RC, MRI) || +      !RBI.constrainGenericRegister(I.getOperand(1).getReg(), RC, MRI) || +      !RBI.constrainGenericRegister(I.getOperand(2).getReg(), RC, MRI)) +    return false;    I.eraseFromParent();    return true; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index 7ce467c15c6..9e1485159cc 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -65,7 +65,9 @@ private:    /// tblgen-erated 'select' implementation.    bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const; -  MachineOperand getSubOperand64(MachineOperand &MO, unsigned SubIdx) const; +  MachineOperand getSubOperand64(MachineOperand &MO, +                                 const TargetRegisterClass &SubRC, +                                 unsigned SubIdx) const;    bool selectCOPY(MachineInstr &I) const;    bool selectPHI(MachineInstr &I) const;    bool selectG_TRUNC(MachineInstr &I) const; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir index 8f7973d4eca..54374f01bf0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir @@ -16,19 +16,19 @@ body: |      ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1      ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0      ; GFX6: [[COPY3:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 -    ; GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32_xm0 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc -    ; GFX6: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_ADD_I32_]], [[COPY2]], implicit-def $vcc, implicit $exec -    ; GFX6: [[V_ADD_I32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_ADD_I32_]], [[V_ADD_I32_e32_]], implicit-def $vcc, implicit $exec -    ; GFX6: [[V_ADD_I32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[V_ADD_I32_e32_1]], [[COPY2]], implicit-def $vcc, implicit $exec -    ; GFX6: FLAT_STORE_DWORD [[COPY3]], [[V_ADD_I32_e32_2]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr +    ; GFX6: [[S_ADD_U32_:%[0-9]+]]:sreg_32_xm0 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc +    ; GFX6: %7:vgpr_32, dead %12:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[S_ADD_U32_]], 0, implicit $exec +    ; GFX6: %8:vgpr_32, dead %11:sreg_64_xexec = V_ADD_I32_e64 [[S_ADD_U32_]], %7, 0, implicit $exec +    ; GFX6: %9:vgpr_32, dead %10:sreg_64_xexec = V_ADD_I32_e64 %8, [[COPY2]], 0, implicit $exec +    ; GFX6: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, 0, 0, implicit $exec, implicit $flat_scr      ; GFX9-LABEL: name: add_s32      ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0      ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1      ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0      ; GFX9: [[COPY3:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 -    ; GFX9: [[S_ADD_I32_:%[0-9]+]]:sreg_32_xm0 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc -    ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[S_ADD_I32_]], [[COPY2]], 0, implicit $exec -    ; GFX9: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[S_ADD_I32_]], [[V_ADD_U32_e64_]], 0, implicit $exec +    ; GFX9: [[S_ADD_U32_:%[0-9]+]]:sreg_32_xm0 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc +    ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY2]], [[S_ADD_U32_]], 0, implicit $exec +    ; GFX9: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[S_ADD_U32_]], [[V_ADD_U32_e64_]], 0, implicit $exec      ; GFX9: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_ADD_U32_e64_1]], [[COPY2]], 0, implicit $exec      ; GFX9: FLAT_STORE_DWORD [[COPY3]], [[V_ADD_U32_e64_2]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-gep.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-gep.mir new file mode 100644 index 00000000000..bc943384b67 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-gep.mir @@ -0,0 +1,354 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -o - %s  | FileCheck -check-prefix=GFX6 %s +# RUN: llc -march=amdgcn -mcpu=fiji -mattr=+wavefrontsize32,-wavefrontsize64  -run-pass=instruction-select -verify-machineinstrs -o - %s  | FileCheck -check-prefix=GFX8 %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+wavefrontsize32,-wavefrontsize64  -run-pass=instruction-select -verify-machineinstrs -o - %s  | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64  -run-pass=instruction-select -verify-machineinstrs -o - %s  | FileCheck -check-prefix=GFX10-WAVE64 %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64  -run-pass=instruction-select -verify-machineinstrs -o - %s  | FileCheck -check-prefix=GFX10-WAVE32 %s + +--- +name:  gep_p0_sgpr_sgpr +legalized:       true +regBankSelected: true + +body: | +  bb.0: +    liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 +    ; GFX6-LABEL: name: gep_p0_sgpr_sgpr +    ; GFX6: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1 +    ; GFX6: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3 +    ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 +    ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0 +    ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 +    ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1 +    ; GFX6: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[COPY3]], implicit-def $scc +    ; GFX6: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def $scc, implicit $scc +    ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 +    ; GFX6: S_ENDPGM 0, implicit [[REG_SEQUENCE]] +    ; GFX8-LABEL: name: gep_p0_sgpr_sgpr +    ; GFX8: $vcc_hi = IMPLICIT_DEF +    ; GFX8: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1 +    ; GFX8: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3 +    ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 +    ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0 +    ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 +    ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1 +    ; GFX8: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[COPY3]], implicit-def $scc +    ; GFX8: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def $scc, implicit $scc +    ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 +    ; GFX8: S_ENDPGM 0, implicit [[REG_SEQUENCE]] +    ; GFX9-LABEL: name: gep_p0_sgpr_sgpr +    ; GFX9: $vcc_hi = IMPLICIT_DEF +    ; GFX9: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1 +    ; GFX9: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3 +    ; GFX9: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 +    ; GFX9: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0 +    ; GFX9: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 +    ; GFX9: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1 +    ; GFX9: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[COPY3]], implicit-def $scc +    ; GFX9: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def $scc, implicit $scc +    ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 +    ; GFX9: S_ENDPGM 0, implicit [[REG_SEQUENCE]] +    ; GFX10-WAVE64-LABEL: name: gep_p0_sgpr_sgpr +    ; GFX10-WAVE64: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1 +    ; GFX10-WAVE64: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3 +    ; GFX10-WAVE64: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 +    ; GFX10-WAVE64: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0 +    ; GFX10-WAVE64: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 +    ; GFX10-WAVE64: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1 +    ; GFX10-WAVE64: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[COPY3]], implicit-def $scc +    ; GFX10-WAVE64: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def $scc, implicit $scc +    ; GFX10-WAVE64: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 +    ; GFX10-WAVE64: S_ENDPGM 0, implicit [[REG_SEQUENCE]] +    ; GFX10-WAVE32-LABEL: name: gep_p0_sgpr_sgpr +    ; GFX10-WAVE32: $vcc_hi = IMPLICIT_DEF +    ; GFX10-WAVE32: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1 +    ; GFX10-WAVE32: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3 +    ; GFX10-WAVE32: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 +    ; GFX10-WAVE32: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0 +    ; GFX10-WAVE32: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 +    ; GFX10-WAVE32: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1 +    ; GFX10-WAVE32: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[COPY3]], implicit-def $scc +    ; GFX10-WAVE32: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def $scc, implicit $scc +    ; GFX10-WAVE32: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 +    ; GFX10-WAVE32: S_ENDPGM 0, implicit [[REG_SEQUENCE]] +    %0:sgpr(p0) = COPY $sgpr0_sgpr1 +    %1:sgpr(s64) = COPY $sgpr2_sgpr3 +    %2:sgpr(p0) = G_GEP %0, %1 +    S_ENDPGM 0, implicit %2 + +... + +--- +name:  gep_p0_vgpr_vgpr +legalized:       true +regBankSelected: true + +body: | +  bb.0: +    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 +    ; GFX6-LABEL: name: gep_p0_vgpr_vgpr +    ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 +    ; GFX6: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 +    ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 +    ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 +    ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 +    ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 +    ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec +    ; GFX6: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec +    ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1 +    ; GFX6: S_ENDPGM 0, implicit [[REG_SEQUENCE]] +    ; GFX8-LABEL: name: gep_p0_vgpr_vgpr +    ; GFX8: $vcc_hi = IMPLICIT_DEF +    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 +    ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 +    ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 +    ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 +    ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 +    ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 +    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec +    ; GFX8: %8:vgpr_32, dead %10:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec +    ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1 +    ; GFX8: S_ENDPGM 0, implicit [[REG_SEQUENCE]] +    ; GFX9-LABEL: name: gep_p0_vgpr_vgpr +    ; GFX9: $vcc_hi = IMPLICIT_DEF +    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 +    ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 +    ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 +    ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 +    ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 +    ; GFX9: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 +    ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec +    ; GFX9: %8:vgpr_32, dead %10:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec +    ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1 +    ; GFX9: S_ENDPGM 0, implicit [[REG_SEQUENCE]] +    ; GFX10-WAVE64-LABEL: name: gep_p0_vgpr_vgpr +    ; GFX10-WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 +    ; GFX10-WAVE64: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 +    ; GFX10-WAVE64: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 +    ; GFX10-WAVE64: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 +    ; GFX10-WAVE64: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 +    ; GFX10-WAVE64: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 +    ; GFX10-WAVE64: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec +    ; GFX10-WAVE64: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec +    ; GFX10-WAVE64: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1 +    ; GFX10-WAVE64: S_ENDPGM 0, implicit [[REG_SEQUENCE]] +    ; GFX10-WAVE32-LABEL: name: gep_p0_vgpr_vgpr +    ; GFX10-WAVE32: $vcc_hi = IMPLICIT_DEF +    ; GFX10-WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 +    ; GFX10-WAVE32: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 +    ; GFX10-WAVE32: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 +    ; GFX10-WAVE32: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 +    ; GFX10-WAVE32: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 +    ; GFX10-WAVE32: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 +    ; GFX10-WAVE32: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec +    ; GFX10-WAVE32: %8:vgpr_32, dead %10:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec +    ; GFX10-WAVE32: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1 +    ; GFX10-WAVE32: S_ENDPGM 0, implicit [[REG_SEQUENCE]] +    %0:vgpr(p0) = COPY $vgpr0_vgpr1 +    %1:vgpr(s64) = COPY $vgpr2_vgpr3 +    %2:vgpr(p0) = G_GEP %0, %1 +    S_ENDPGM 0, implicit %2 + +... + +--- +name:  gep_p0_sgpr_vgpr +legalized:       true +regBankSelected: true + +body: | +  bb.0: +    liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 +    ; GFX6-LABEL: name: gep_p0_sgpr_vgpr +    ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $sgpr0_sgpr1 +    ; GFX6: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 +    ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 +    ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 +    ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 +    ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 +    ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec +    ; GFX6: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec +    ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1 +    ; GFX6: S_ENDPGM 0, implicit [[REG_SEQUENCE]] +    ; GFX8-LABEL: name: gep_p0_sgpr_vgpr +    ; GFX8: $vcc_hi = IMPLICIT_DEF +    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $sgpr0_sgpr1 +    ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 +    ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 +    ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 +    ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 +    ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 +    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec +    ; GFX8: %8:vgpr_32, dead %10:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec +    ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1 +    ; GFX8: S_ENDPGM 0, implicit [[REG_SEQUENCE]] +    ; GFX9-LABEL: name: gep_p0_sgpr_vgpr +    ; GFX9: $vcc_hi = IMPLICIT_DEF +    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $sgpr0_sgpr1 +    ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 +    ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 +    ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 +    ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 +    ; GFX9: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 +    ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec +    ; GFX9: %8:vgpr_32, dead %10:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec +    ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1 +    ; GFX9: S_ENDPGM 0, implicit [[REG_SEQUENCE]] +    ; GFX10-WAVE64-LABEL: name: gep_p0_sgpr_vgpr +    ; GFX10-WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $sgpr0_sgpr1 +    ; GFX10-WAVE64: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 +    ; GFX10-WAVE64: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 +    ; GFX10-WAVE64: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 +    ; GFX10-WAVE64: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 +    ; GFX10-WAVE64: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 +    ; GFX10-WAVE64: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec +    ; GFX10-WAVE64: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec +    ; GFX10-WAVE64: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1 +    ; GFX10-WAVE64: S_ENDPGM 0, implicit [[REG_SEQUENCE]] +    ; GFX10-WAVE32-LABEL: name: gep_p0_sgpr_vgpr +    ; GFX10-WAVE32: $vcc_hi = IMPLICIT_DEF +    ; GFX10-WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $sgpr0_sgpr1 +    ; GFX10-WAVE32: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 +    ; GFX10-WAVE32: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 +    ; GFX10-WAVE32: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 +    ; GFX10-WAVE32: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 +    ; GFX10-WAVE32: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 +    ; GFX10-WAVE32: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec +    ; GFX10-WAVE32: %8:vgpr_32, dead %10:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec +    ; GFX10-WAVE32: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1 +    ; GFX10-WAVE32: S_ENDPGM 0, implicit [[REG_SEQUENCE]] +    %0:vgpr(p0) = COPY $sgpr0_sgpr1 +    %1:vgpr(s64) = COPY $vgpr0_vgpr1 +    %2:vgpr(p0) = G_GEP %0, %1 +    S_ENDPGM 0, implicit %2 + +... + +--- +name:  gep_p3_sgpr_sgpr +legalized:       true +regBankSelected: true + +body: | +  bb.0: +    liveins: $sgpr0, $sgpr1 +    ; GFX6-LABEL: name: gep_p3_sgpr_sgpr +    ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 +    ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +    ; GFX6: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc +    ; GFX6: S_ENDPGM 0, implicit [[S_ADD_U32_]] +    ; GFX8-LABEL: name: gep_p3_sgpr_sgpr +    ; GFX8: $vcc_hi = IMPLICIT_DEF +    ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 +    ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +    ; GFX8: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc +    ; GFX8: S_ENDPGM 0, implicit [[S_ADD_U32_]] +    ; GFX9-LABEL: name: gep_p3_sgpr_sgpr +    ; GFX9: $vcc_hi = IMPLICIT_DEF +    ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 +    ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +    ; GFX9: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc +    ; GFX9: S_ENDPGM 0, implicit [[S_ADD_U32_]] +    ; GFX10-WAVE64-LABEL: name: gep_p3_sgpr_sgpr +    ; GFX10-WAVE64: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 +    ; GFX10-WAVE64: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +    ; GFX10-WAVE64: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc +    ; GFX10-WAVE64: S_ENDPGM 0, implicit [[S_ADD_U32_]] +    ; GFX10-WAVE32-LABEL: name: gep_p3_sgpr_sgpr +    ; GFX10-WAVE32: $vcc_hi = IMPLICIT_DEF +    ; GFX10-WAVE32: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 +    ; GFX10-WAVE32: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +    ; GFX10-WAVE32: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc +    ; GFX10-WAVE32: S_ENDPGM 0, implicit [[S_ADD_U32_]] +    %0:sgpr(p3) = COPY $sgpr0 +    %1:sgpr(s32) = COPY $sgpr1 +    %2:sgpr(p3) = G_GEP %0, %1 +    S_ENDPGM 0, implicit %2 + +... + +--- +name:  gep_p3_vgpr_vgpr +legalized:       true +regBankSelected: true + +body: | +  bb.0: +    liveins: $vgpr0, $vgpr1 +    ; GFX6-LABEL: name: gep_p3_vgpr_vgpr +    ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +    ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +    ; GFX6: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec +    ; GFX6: S_ENDPGM 0, implicit %2 +    ; GFX8-LABEL: name: gep_p3_vgpr_vgpr +    ; GFX8: $vcc_hi = IMPLICIT_DEF +    ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +    ; GFX8: %2:vgpr_32, dead %3:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec +    ; GFX8: S_ENDPGM 0, implicit %2 +    ; GFX9-LABEL: name: gep_p3_vgpr_vgpr +    ; GFX9: $vcc_hi = IMPLICIT_DEF +    ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +    ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec +    ; GFX9: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]] +    ; GFX10-WAVE64-LABEL: name: gep_p3_vgpr_vgpr +    ; GFX10-WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +    ; GFX10-WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +    ; GFX10-WAVE64: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec +    ; GFX10-WAVE64: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]] +    ; GFX10-WAVE32-LABEL: name: gep_p3_vgpr_vgpr +    ; GFX10-WAVE32: $vcc_hi = IMPLICIT_DEF +    ; GFX10-WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +    ; GFX10-WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 +    ; GFX10-WAVE32: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec +    ; GFX10-WAVE32: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]] +    %0:vgpr(p3) = COPY $vgpr0 +    %1:vgpr(s32) = COPY $vgpr1 +    %2:vgpr(p3) = G_GEP %0, %1 +    S_ENDPGM 0, implicit %2 + +... + +--- +name:  gep_p3_sgpr_vgpr +legalized:       true +regBankSelected: true + +body: | +  bb.0: +    liveins: $sgpr0, $vgpr0 +    ; GFX6-LABEL: name: gep_p3_sgpr_vgpr +    ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $sgpr0 +    ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +    ; GFX6: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec +    ; GFX6: S_ENDPGM 0, implicit %2 +    ; GFX8-LABEL: name: gep_p3_sgpr_vgpr +    ; GFX8: $vcc_hi = IMPLICIT_DEF +    ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $sgpr0 +    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +    ; GFX8: %2:vgpr_32, dead %3:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec +    ; GFX8: S_ENDPGM 0, implicit %2 +    ; GFX9-LABEL: name: gep_p3_sgpr_vgpr +    ; GFX9: $vcc_hi = IMPLICIT_DEF +    ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $sgpr0 +    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +    ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec +    ; GFX9: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]] +    ; GFX10-WAVE64-LABEL: name: gep_p3_sgpr_vgpr +    ; GFX10-WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $sgpr0 +    ; GFX10-WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +    ; GFX10-WAVE64: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec +    ; GFX10-WAVE64: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]] +    ; GFX10-WAVE32-LABEL: name: gep_p3_sgpr_vgpr +    ; GFX10-WAVE32: $vcc_hi = IMPLICIT_DEF +    ; GFX10-WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $sgpr0 +    ; GFX10-WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +    ; GFX10-WAVE32: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec +    ; GFX10-WAVE32: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]] +    %0:vgpr(p3) = COPY $sgpr0 +    %1:vgpr(s32) = COPY $vgpr0 +    %2:vgpr(p3) = G_GEP %0, %1 +    S_ENDPGM 0, implicit %2 + +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir index 7fd276fd5b5..f82ad33a515 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir @@ -12,7 +12,7 @@ legalized:       true  regBankSelected: true  # GCN: body: -# GCN: [[PTR:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 +# GCN: [[PTR:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1  # Immediate offset:  # SICI: S_LOAD_DWORD_IMM [[PTR]], 1, 0, 0 @@ -42,28 +42,28 @@ regBankSelected: true  # Max immediate for CI  # SIVI: [[K_LO:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4294967292  # SIVI: [[K_HI:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 3 -# SIVI: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1 -# SIVI-DAG: [[K_SUB0:%[0-9]+]]:sgpr_32 = COPY [[K]].sub0 -# SIVI-DAG: [[PTR_LO:%[0-9]+]]:sgpr_32 = COPY [[PTR]].sub0 -# SIVI: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]] -# SIVI-DAG: [[K_SUB1:%[0-9]+]]:sgpr_32 = COPY [[K]].sub1 -# SIVI-DAG: [[PTR_HI:%[0-9]+]]:sgpr_32 = COPY [[PTR]].sub1 +# SIVI: [[K:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1 +# SIVI-DAG: [[K_SUB0:%[0-9]+]]:sreg_32 = COPY [[K]].sub0 +# SIVI-DAG: [[PTR_LO:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub0 +# SIVI-DAG: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]] +# SIVI-DAG: [[K_SUB1:%[0-9]+]]:sreg_32 = COPY [[K]].sub1 +# SIVI-DAG: [[PTR_HI:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub1  # SIVI: [[ADD_PTR_HI:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]] -# SIVI: [[ADD_PTR:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1 +# SIVI: [[ADD_PTR:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1  # SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0, 0  # CI: S_LOAD_DWORD_IMM_ci [[PTR]], 4294967295, 0, 0  # Immediate overflow for CI  # GCN: [[K_LO:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 0  # GCN: [[K_HI:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4 -# GCN: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1 -# GCN-DAG: [[K_SUB0:%[0-9]+]]:sgpr_32 = COPY [[K]].sub0 -# GCN-DAG: [[PTR_LO:%[0-9]+]]:sgpr_32 = COPY [[PTR]].sub0 -# GCN: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]] -# GCN-DAG: [[K_SUB1:%[0-9]+]]:sgpr_32 = COPY [[K]].sub1 -# GCN-DAG: [[PTR_HI:%[0-9]+]]:sgpr_32 = COPY [[PTR]].sub1 +# GCN: [[K:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1 +# GCN-DAG: [[K_SUB0:%[0-9]+]]:sreg_32 = COPY [[K]].sub0 +# GCN-DAG: [[PTR_LO:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub0 +# GCN-DAG: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]] +# GCN-DAG: [[K_SUB1:%[0-9]+]]:sreg_32 = COPY [[K]].sub1 +# GCN-DAG: [[PTR_HI:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub1  # GCN: [[ADD_PTR_HI:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]] -# GCN: [[ADD_PTR:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1 +# GCN: [[ADD_PTR:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1  # GCN: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0, 0  # Max 32-bit byte offset @@ -74,14 +74,14 @@ regBankSelected: true  # Overflow 32-bit byte offset  # SIVI: [[K_LO:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 0  # SIVI: [[K_HI:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 1 -# SIVI: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1 -# SIVI-DAG: [[K_SUB0:%[0-9]+]]:sgpr_32 = COPY [[K]].sub0 -# SIVI-DAG: [[PTR_LO:%[0-9]+]]:sgpr_32 = COPY [[PTR]].sub0 -# SIVI: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]] -# SIVI-DAG: [[K_SUB1:%[0-9]+]]:sgpr_32 = COPY [[K]].sub1 -# SIVI-DAG: [[PTR_HI:%[0-9]+]]:sgpr_32 = COPY [[PTR]].sub1 +# SIVI: [[K:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1 +# SIVI-DAG: [[K_SUB0:%[0-9]+]]:sreg_32 = COPY [[K]].sub0 +# SIVI-DAG: [[PTR_LO:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub0 +# SIVI-DAG: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]] +# SIVI-DAG: [[K_SUB1:%[0-9]+]]:sreg_32 = COPY [[K]].sub1 +# SIVI-DAG: [[PTR_HI:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub1  # SIVI: [[ADD_PTR_HI:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]] -# SIVI: [[ADD_PTR:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1 +# SIVI: [[ADD_PTR:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1  # SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0, 0  # CI: S_LOAD_DWORD_IMM_ci [[PTR]], 1073741824, 0, 0  | 

