diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp | 168 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h | 5 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll | 1063 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll | 406 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/GlobalISel/ret.ll | 3 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/ret.ll | 12 | 
7 files changed, 1486 insertions, 173 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index 30d3903be9c..d0c99aeca0a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -30,9 +30,9 @@ using namespace llvm;  namespace { -struct OutgoingArgHandler : public CallLowering::ValueHandler { -  OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, -                     MachineInstrBuilder MIB, CCAssignFn *AssignFn) +struct OutgoingValueHandler : public CallLowering::ValueHandler { +  OutgoingValueHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, +                       MachineInstrBuilder MIB, CCAssignFn *AssignFn)        : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {}    MachineInstrBuilder MIB; @@ -49,8 +49,16 @@ struct OutgoingArgHandler : public CallLowering::ValueHandler {    void assignValueToReg(Register ValVReg, Register PhysReg,                          CCValAssign &VA) override { -    MIB.addUse(PhysReg); -    MIRBuilder.buildCopy(PhysReg, ValVReg); +    Register ExtReg; +    if (VA.getLocVT().getSizeInBits() < 32) { +      // 16-bit types are reported as legal for 32-bit registers. We need to +      // extend and do a 32-bit copy to avoid the verifier complaining about it. +      ExtReg = MIRBuilder.buildAnyExt(LLT::scalar(32), ValVReg).getReg(0); +    } else +      ExtReg = extendRegister(ValVReg, VA); + +    MIRBuilder.buildCopy(PhysReg, ExtReg); +    MIB.addUse(PhysReg, RegState::Implicit);    }    bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT, @@ -193,6 +201,90 @@ void AMDGPUCallLowering::splitToValueTypes(    }  } +// Get the appropriate type to make \p OrigTy \p Factor times bigger. +static LLT getMultipleType(LLT OrigTy, int Factor) { +  if (OrigTy.isVector()) { +    return LLT::vector(OrigTy.getNumElements() * Factor, +                       OrigTy.getElementType()); +  } + +  return LLT::scalar(OrigTy.getSizeInBits() * Factor); +} + +// TODO: Move to generic code +static void unpackRegsToOrigType(MachineIRBuilder &MIRBuilder, +                                 ArrayRef<Register> DstRegs, +                                 Register SrcReg, +                                 LLT SrcTy, +                                 LLT PartTy) { +  assert(DstRegs.size() > 1 && "Nothing to unpack"); + +  MachineFunction &MF = MIRBuilder.getMF(); +  MachineRegisterInfo &MRI = MF.getRegInfo(); + +  const unsigned SrcSize = SrcTy.getSizeInBits(); +  const unsigned PartSize = PartTy.getSizeInBits(); + +  if (SrcTy.isVector() && !PartTy.isVector() && +      PartSize > SrcTy.getElementType().getSizeInBits()) { +    // Vector was scalarized, and the elements extended. +    auto UnmergeToEltTy = MIRBuilder.buildUnmerge(SrcTy.getElementType(), +                                                  SrcReg); +    for (int i = 0, e = DstRegs.size(); i != e; ++i) +      MIRBuilder.buildAnyExt(DstRegs[i], UnmergeToEltTy.getReg(i)); +    return; +  } + +  if (SrcSize % PartSize == 0) { +    MIRBuilder.buildUnmerge(DstRegs, SrcReg); +    return; +  } + +  const int NumRoundedParts = (SrcSize + PartSize - 1) / PartSize; + +  LLT BigTy = getMultipleType(PartTy, NumRoundedParts); +  auto ImpDef = MIRBuilder.buildUndef(BigTy); + +  Register BigReg = MRI.createGenericVirtualRegister(BigTy); +  MIRBuilder.buildInsert(BigReg, ImpDef.getReg(0), SrcReg, 0).getReg(0); + +  int64_t Offset = 0; +  for (unsigned i = 0, e = DstRegs.size(); i != e; ++i, Offset += PartSize) +    MIRBuilder.buildExtract(DstRegs[i], BigReg, Offset); +} + +/// Lower the return value for the already existing \p Ret. This assumes that +/// \p MIRBuilder's insertion point is correct. +bool AMDGPUCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder, +                                        const Value *Val, ArrayRef<Register> VRegs, +                                        MachineInstrBuilder &Ret) const { +  if (!Val) +    return true; + +  auto &MF = MIRBuilder.getMF(); +  const auto &F = MF.getFunction(); +  const DataLayout &DL = MF.getDataLayout(); + +  CallingConv::ID CC = F.getCallingConv(); +  const SITargetLowering &TLI = *getTLI<SITargetLowering>(); +  MachineRegisterInfo &MRI = MF.getRegInfo(); + +  ArgInfo OrigRetInfo(VRegs, Val->getType()); +  setArgFlags(OrigRetInfo, AttributeList::ReturnIndex, DL, F); +  SmallVector<ArgInfo, 4> SplitRetInfos; + +  splitToValueTypes( +    OrigRetInfo, SplitRetInfos, DL, MRI, CC, +    [&](ArrayRef<Register> Regs, LLT LLTy, LLT PartLLT, int VTSplitIdx) { +      unpackRegsToOrigType(MIRBuilder, Regs, VRegs[VTSplitIdx], LLTy, PartLLT); +    }); + +  CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(CC, F.isVarArg()); + +  OutgoingValueHandler RetHandler(MIRBuilder, MF.getRegInfo(), Ret, AssignFn); +  return handleAssignments(MIRBuilder, SplitRetInfos, RetHandler); +} +  bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,                                       const Value *Val,                                       ArrayRef<Register> VRegs) const { @@ -202,38 +294,43 @@ bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,    SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();    MFI->setIfReturnsVoid(!Val); -  if (!Val) { -    MIRBuilder.buildInstr(AMDGPU::S_ENDPGM).addImm(0); +  assert(!Val == VRegs.empty() && "Return value without a vreg"); + +  CallingConv::ID CC = MIRBuilder.getMF().getFunction().getCallingConv(); +  const bool IsShader = AMDGPU::isShader(CC); +  const bool IsWaveEnd = (IsShader && MFI->returnsVoid()) || +                         AMDGPU::isKernel(CC); +  if (IsWaveEnd) { +    MIRBuilder.buildInstr(AMDGPU::S_ENDPGM) +      .addImm(0);      return true;    } -  Register VReg = VRegs[0]; - -  const Function &F = MF.getFunction(); -  auto &DL = F.getParent()->getDataLayout(); -  if (!AMDGPU::isShader(F.getCallingConv())) -    return false; +  auto const &ST = MIRBuilder.getMF().getSubtarget<GCNSubtarget>(); +  unsigned ReturnOpc = ReturnOpc = IsShader ? +    AMDGPU::SI_RETURN_TO_EPILOG : AMDGPU::S_SETPC_B64_return; -  const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>(); -  SmallVector<EVT, 4> SplitVTs; -  SmallVector<uint64_t, 4> Offsets; -  ArgInfo OrigArg{VReg, Val->getType()}; -  setArgFlags(OrigArg, AttributeList::ReturnIndex, DL, F); -  ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0); - -  SmallVector<ArgInfo, 8> SplitArgs; -  CCAssignFn *AssignFn = CCAssignFnForReturn(F.getCallingConv(), false); -  for (unsigned i = 0, e = Offsets.size(); i != e; ++i) { -    Type *SplitTy = SplitVTs[i].getTypeForEVT(F.getContext()); -    SplitArgs.push_back({VRegs[i], SplitTy, OrigArg.Flags, OrigArg.IsFixed}); +  auto Ret = MIRBuilder.buildInstrNoInsert(ReturnOpc); +  Register ReturnAddrVReg; +  if (ReturnOpc == AMDGPU::S_SETPC_B64_return) { +    ReturnAddrVReg = MRI.createVirtualRegister(&AMDGPU::CCR_SGPR_64RegClass); +    Ret.addUse(ReturnAddrVReg);    } -  auto RetInstr = MIRBuilder.buildInstrNoInsert(AMDGPU::SI_RETURN_TO_EPILOG); -  OutgoingArgHandler Handler(MIRBuilder, MRI, RetInstr, AssignFn); -  if (!handleAssignments(MIRBuilder, SplitArgs, Handler)) + +  if (!lowerReturnVal(MIRBuilder, Val, VRegs, Ret))      return false; -  MIRBuilder.insertInstr(RetInstr); +  if (ReturnOpc == AMDGPU::S_SETPC_B64_return) { +    const SIRegisterInfo *TRI = ST.getRegisterInfo(); +    Register LiveInReturn = MF.addLiveIn(TRI->getReturnAddressReg(MF), +                                         &AMDGPU::SGPR_64RegClass); +    MIRBuilder.buildCopy(ReturnAddrVReg, LiveInReturn); +  } + +  // TODO: Handle CalleeSavedRegsViaCopy. + +  MIRBuilder.insertInstr(Ret);    return true;  } @@ -386,6 +483,7 @@ bool AMDGPUCallLowering::lowerFormalArgumentsKernel(    return true;  } +// TODO: Move this to generic code  static void packSplitRegsToOrigType(MachineIRBuilder &MIRBuilder,                                      ArrayRef<Register> OrigRegs,                                      ArrayRef<Register> Regs, @@ -476,6 +574,14 @@ bool AMDGPUCallLowering::lowerFormalArguments(    SmallVector<CCValAssign, 16> ArgLocs;    CCState CCInfo(CC, F.isVarArg(), MF, ArgLocs, F.getContext()); +  if (!IsEntryFunc) { +    Register ReturnAddrReg = TRI->getReturnAddressReg(MF); +    Register LiveInReturn = MF.addLiveIn(ReturnAddrReg, +                                         &AMDGPU::SGPR_64RegClass); +    MBB.addLiveIn(ReturnAddrReg); +    MIRBuilder.buildCopy(LiveInReturn, ReturnAddrReg); +  } +    if (Info->hasImplicitBufferPtr()) {      Register ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI);      MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass); @@ -497,9 +603,7 @@ bool AMDGPUCallLowering::lowerFormalArguments(      if (!IsShader && InReg)        return false; -    // TODO: Handle sret. -    if (Arg.hasAttribute(Attribute::StructRet) || -        Arg.hasAttribute(Attribute::SwiftSelf) || +    if (Arg.hasAttribute(Attribute::SwiftSelf) ||          Arg.hasAttribute(Attribute::SwiftError) ||          Arg.hasAttribute(Attribute::Nest))        return false; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h index bc345b6c659..1898c7a4c88 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h @@ -20,6 +20,7 @@  namespace llvm {  class AMDGPUTargetLowering; +class MachineInstrBuilder;  class AMDGPUCallLowering: public CallLowering {    Register lowerParameterPtr(MachineIRBuilder &MIRBuilder, Type *ParamTy, @@ -38,6 +39,10 @@ class AMDGPUCallLowering: public CallLowering {                           CallingConv::ID CallConv,                           SplitArgTy SplitArg) const; +  bool lowerReturnVal(MachineIRBuilder &MIRBuilder, +                      const Value *Val, ArrayRef<Register> VRegs, +                      MachineInstrBuilder &Ret) const; +  public:    AMDGPUCallLowering(const AMDGPUTargetLowering &TLI); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll new file mode 100644 index 00000000000..fcf5deff710 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll @@ -0,0 +1,1063 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -global-isel -global-isel-abort=0 -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -verify-machineinstrs -o - %s | FileCheck -enable-var-scope %s + +define i1 @i1_func_void() #0 { +  ; CHECK-LABEL: name: i1_func_void +  ; CHECK: bb.1 (%ir-block.0): +  ; CHECK:   liveins: $sgpr30_sgpr31 +  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 +  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF +  ; CHECK:   [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load 1 from `i1 addrspace(1)* undef`, addrspace 1) +  ; CHECK:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s1) +  ; CHECK:   $vgpr0 = COPY [[ANYEXT]](s32) +  ; CHECK:   [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] +  ; CHECK:   S_SETPC_B64_return [[COPY1]], implicit $vgpr0 +  %val = load i1, i1 addrspace(1)* undef +  ret i1 %val +} + +define zeroext i1 @i1_zeroext_func_void() #0 { +  ; CHECK-LABEL: name: i1_zeroext_func_void +  ; CHECK: bb.1 (%ir-block.0): +  ; CHECK:   liveins: $sgpr30_sgpr31 +  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 +  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF +  ; CHECK:   [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load 1 from `i1 addrspace(1)* undef`, addrspace 1) +  ; CHECK:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s1) +  ; CHECK:   $vgpr0 = COPY [[ZEXT]](s32) +  ; CHECK:   [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] +  ; CHECK:   S_SETPC_B64_return [[COPY1]], implicit $vgpr0 +  %val = load i1, i1 addrspace(1)* undef +  ret i1 %val +} + +define signext i1 @i1_signext_func_void() #0 { +  ; CHECK-LABEL: name: i1_signext_func_void +  ; CHECK: bb.1 (%ir-block.0): +  ; CHECK:   liveins: $sgpr30_sgpr31 +  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 +  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF +  ; CHECK:   [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load 1 from `i1 addrspace(1)* undef`, addrspace 1) +  ; CHECK:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s1) +  ; CHECK:   $vgpr0 = COPY [[SEXT]](s32) +  ; CHECK:   [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] +  ; CHECK:   S_SETPC_B64_return [[COPY1]], implicit $vgpr0 +  %val = load i1, i1 addrspace(1)* undef +  ret i1 %val +} + +define i8 @i8_func_void() #0 { +  ; CHECK-LABEL: name: i8_func_void +  ; CHECK: bb.0: +  ; CHECK:   successors: %bb.1(0x80000000) +  ; CHECK:   liveins: $sgpr30_sgpr31 +  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 +  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF +  ; CHECK: bb.1 (%ir-block.0): +  ; CHECK:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load 1 from `i8 addrspace(1)* undef`, addrspace 1) +  %val = load i8, i8 addrspace(1)* undef +  ret i8 %val +} + +define zeroext i8 @i8_zeroext_func_void() #0 { +  ; CHECK-LABEL: name: i8_zeroext_func_void +  ; CHECK: bb.0: +  ; CHECK:   successors: %bb.1(0x80000000) +  ; CHECK:   liveins: $sgpr30_sgpr31 +  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 +  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF +  ; CHECK: bb.1 (%ir-block.0): +  ; CHECK:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load 1 from `i8 addrspace(1)* undef`, addrspace 1) +  %val = load i8, i8 addrspace(1)* undef +  ret i8 %val +} + +define signext i8 @i8_signext_func_void() #0 { +  ; CHECK-LABEL: name: i8_signext_func_void +  ; CHECK: bb.0: +  ; CHECK:   successors: %bb.1(0x80000000) +  ; CHECK:   liveins: $sgpr30_sgpr31 +  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 +  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF +  ; CHECK: bb.1 (%ir-block.0): +  ; CHECK:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load 1 from `i8 addrspace(1)* undef`, addrspace 1) +  %val = load i8, i8 addrspace(1)* undef +  ret i8 %val +} + +define i16 @i16_func_void() #0 { +  ; CHECK-LABEL: name: i16_func_void +  ; CHECK: bb.1 (%ir-block.0): +  ; CHECK:   liveins: $sgpr30_sgpr31 +  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 +  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF +  ; CHECK:   [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load 2 from `i16 addrspace(1)* undef`, addrspace 1) +  ; CHECK:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s16) +  ; CHECK:   $vgpr0 = COPY [[ANYEXT]](s32) +  ; CHECK:   [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] +  ; CHECK:   S_SETPC_B64_return [[COPY1]], implicit $vgpr0 +  %val = load i16, i16 addrspace(1)* undef +  ret i16 %val +} + +define zeroext i16 @i16_zeroext_func_void() #0 { +  ; CHECK-LABEL: name: i16_zeroext_func_void +  ; CHECK: bb.1 (%ir-block.0): +  ; CHECK:   liveins: $sgpr30_sgpr31 +  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 +  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF +  ; CHECK:   [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load 2 from `i16 addrspace(1)* undef`, addrspace 1) +  ; CHECK:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s16) +  ; CHECK:   $vgpr0 = COPY [[ZEXT]](s32) +  ; CHECK:   [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] +  ; CHECK:   S_SETPC_B64_return [[COPY1]], implicit $vgpr0 +  %val = load i16, i16 addrspace(1)* undef +  ret i16 %val +} + +define signext i16 @i16_signext_func_void() #0 { +  ; CHECK-LABEL: name: i16_signext_func_void +  ; CHECK: bb.1 (%ir-block.0): +  ; CHECK:   liveins: $sgpr30_sgpr31 +  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 +  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF +  ; CHECK:   [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load 2 from `i16 addrspace(1)* undef`, addrspace 1) +  ; CHECK:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s16) +  ; CHECK:   $vgpr0 = COPY [[SEXT]](s32) +  ; CHECK:   [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] +  ; CHECK:   S_SETPC_B64_return [[COPY1]], implicit $vgpr0 +  %val = load i16, i16 addrspace(1)* undef +  ret i16 %val +} + +define i32 @i32_func_void() #0 { +  ; CHECK-LABEL: name: i32_func_void +  ; CHECK: bb.1 (%ir-block.0): +  ; CHECK:   liveins: $sgpr30_sgpr31 +  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 +  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF +  ; CHECK:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p1) :: (load 4 from `i32 addrspace(1)* undef`, addrspace 1) +  ; CHECK:   $vgpr0 = COPY [[LOAD]](s32) +  ; CHECK:   [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] +  ; CHECK:   S_SETPC_B64_return [[COPY1]], implicit $vgpr0 +  %val = load i32, i32 addrspace(1)* undef +  ret i32 %val +} + +define i48 @i48_func_void() #0 { +  ; CHECK-LABEL: name: i48_func_void +  ; CHECK: bb.1 (%ir-block.0): +  ; CHECK:   liveins: $sgpr30_sgpr31 +  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 +  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF +  ; CHECK:   [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load 6 from `i48 addrspace(1)* undef`, align 8, addrspace 1) +  ; CHECK:   [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF +  ; CHECK:   [[INSERT:%[0-9]+]]:_(s64) = G_INSERT [[DEF1]], [[LOAD]](s48), 0 +  ; CHECK:   [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[INSERT]](s64), 0 +  ; CHECK:   [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[INSERT]](s64), 32 +  ; CHECK:   $vgpr0 = COPY [[EXTRACT]](s32) +  ; CHECK:   $vgpr1 = COPY [[EXTRACT1]](s32) +  ; CHECK:   [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] +  ; CHECK:   S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 +  %val = load i48, i48 addrspace(1)* undef, align 8 +  ret i48 %val +} + +define i64 @i64_func_void() #0 { +  ; CHECK-LABEL: name: i64_func_void +  ; CHECK: bb.1 (%ir-block.0): +  ; CHECK:   liveins: $sgpr30_sgpr31 +  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 +  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF +  ; CHECK:   [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[DEF]](p1) :: (load 8 from `i64 addrspace(1)* undef`, addrspace 1) +  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64) +  ; CHECK:   $vgpr0 = COPY [[UV]](s32) +  ; CHECK:   $vgpr1 = COPY [[UV1]](s32) +  ; CHECK:   [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] +  ; CHECK:   S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 +  %val = load i64, i64 addrspace(1)* undef +  ret i64 %val +} + +define i65 @i65_func_void() #0 { +  ; CHECK-LABEL: name: i65_func_void +  ; CHECK: bb.1 (%ir-block.0): +  ; CHECK:   liveins: $sgpr30_sgpr31 +  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 +  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF +  ; CHECK:   [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load 9 from `i65 addrspace(1)* undef`, align 8, addrspace 1) +  ; CHECK:   [[DEF1:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF +  ; CHECK:   [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF1]], [[LOAD]](s65), 0 +  ; CHECK:   [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[INSERT]](s96), 0 +  ; CHECK:   [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[INSERT]](s96), 32 +  ; CHECK:   [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[INSERT]](s96), 64 +  ; CHECK:   $vgpr0 = COPY [[EXTRACT]](s32) +  ; CHECK:   $vgpr1 = COPY [[EXTRACT1]](s32) +  ; CHECK:   $vgpr2 = COPY [[EXTRACT2]](s32) +  ; CHECK:   [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] +  ; CHECK:   S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 +  %val = load i65, i65 addrspace(1)* undef +  ret i65 %val +} + +define float @f32_func_void() #0 { +  ; CHECK-LABEL: name: f32_func_void +  ; CHECK: bb.1 (%ir-block.0): +  ; CHECK:   liveins: $sgpr30_sgpr31 +  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 +  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF +  ; CHECK:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p1) :: (load 4 from `float addrspace(1)* undef`, addrspace 1) +  ; CHECK:   $vgpr0 = COPY [[LOAD]](s32) +  ; CHECK:   [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] +  ; CHECK:   S_SETPC_B64_return [[COPY1]], implicit $vgpr0 +  %val = load float, float addrspace(1)* undef +  ret float %val +} + +define double @f64_func_void() #0 { +  ; CHECK-LABEL: name: f64_func_void +  ; CHECK: bb.1 (%ir-block.0): +  ; CHECK:   liveins: $sgpr30_sgpr31 +  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 +  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF +  ; CHECK:   [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[DEF]](p1) :: (load 8 from `double addrspace(1)* undef`, addrspace 1) +  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64) +  ; CHECK:   $vgpr0 = COPY [[UV]](s32) +  ; CHECK:   $vgpr1 = COPY [[UV1]](s32) +  ; CHECK:   [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] +  ; CHECK:   S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 +  %val = load double, double addrspace(1)* undef +  ret double %val +} + +define <2 x double> @v2f64_func_void() #0 { +  ; CHECK-LABEL: name: v2f64_func_void +  ; CHECK: bb.1 (%ir-block.0): +  ; CHECK:   liveins: $sgpr30_sgpr31 +  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 +  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF +  ; CHECK:   [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[DEF]](p1) :: (load 16 from `<2 x double> addrspace(1)* undef`, addrspace 1) +  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) +  ; CHECK:   $vgpr0 = COPY [[UV]](s32) +  ; CHECK:   $vgpr1 = COPY [[UV1]](s32) +  ; CHECK:   $vgpr2 = COPY [[UV2]](s32) +  ; CHECK:   $vgpr3 = COPY [[UV3]](s32) +  ; CHECK:   [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] +  ; CHECK:   S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 +  %val = load <2 x double>, <2 x double> addrspace(1)* undef +  ret <2 x double> %val +} + +define <2 x i32> @v2i32_func_void() #0 { +  ; CHECK-LABEL: name: v2i32_func_void +  ; CHECK: bb.1 (%ir-block.0): +  ; CHECK:   liveins: $sgpr30_sgpr31 +  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 +  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF +  ; CHECK:   [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[DEF]](p1) :: (load 8 from `<2 x i32> addrspace(1)* undef`, addrspace 1) +  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) +  ; CHECK:   $vgpr0 = COPY [[UV]](s32) +  ; CHECK:   $vgpr1 = COPY [[UV1]](s32) +  ; CHECK:   [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] +  ; CHECK:   S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 +  %val = load <2 x i32>, <2 x i32> addrspace(1)* undef +  ret <2 x i32> %val +} + +define <3 x i32> @v3i32_func_void() #0 { +  ; CHECK-LABEL: name: v3i32_func_void +  ; CHECK: bb.1 (%ir-block.0): +  ; CHECK:   liveins: $sgpr30_sgpr31 +  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 +  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF +  ; CHECK:   [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[DEF]](p1) :: (load 12 from `<3 x i32> addrspace(1)* undef`, align 16, addrspace 1) +  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<3 x s32>) +  ; CHECK:   $vgpr0 = COPY [[UV]](s32) +  ; CHECK:   $vgpr1 = COPY [[UV1]](s32) +  ; CHECK:   $vgpr2 = COPY [[UV2]](s32) +  ; CHECK:   [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] +  ; CHECK:   S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 +  %val = load <3 x i32>, <3 x i32> addrspace(1)* undef +  ret <3 x i32> %val +} + +define <4 x i32> @v4i32_func_void() #0 { +  ; CHECK-LABEL: name: v4i32_func_void +  ; CHECK: bb.1 (%ir-block.0): +  ; CHECK:   liveins: $sgpr30_sgpr31 +  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 +  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF +  ; CHECK:   [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: (load 16 from `<4 x i32> addrspace(1)* undef`, addrspace 1) +  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) +  ; CHECK:   $vgpr0 = COPY [[UV]](s32) +  ; CHECK:   $vgpr1 = COPY [[UV1]](s32) +  ; CHECK:   $vgpr2 = COPY [[UV2]](s32) +  ; CHECK:   $vgpr3 = COPY [[UV3]](s32) +  ; CHECK:   [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] +  ; CHECK:   S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 +  %val = load <4 x i32>, <4 x i32> addrspace(1)* undef +  ret <4 x i32> %val +} + +define <5 x i32> @v5i32_func_void() #0 { +  ; CHECK-LABEL: name: v5i32_func_void +  ; CHECK: bb.1 (%ir-block.0): +  ; CHECK:   liveins: $sgpr30_sgpr31 +  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 +  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF +  ; CHECK:   [[LOAD:%[0-9]+]]:_(<5 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load 20 from `<5 x i32> addrspace(1)* undef`, align 32, addrspace 1) +  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<5 x s32>) +  ; CHECK:   $vgpr0 = COPY [[UV]](s32) +  ; CHECK:   $vgpr1 = COPY [[UV1]](s32) +  ; CHECK:   $vgpr2 = COPY [[UV2]](s32) +  ; CHECK:   $vgpr3 = COPY [[UV3]](s32) +  ; CHECK:   $vgpr4 = COPY [[UV4]](s32) +  ; CHECK:   [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] +  ; CHECK:   S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4 +  %val = load volatile <5 x i32>, <5 x i32> addrspace(1)* undef +  ret <5 x i32> %val +} + +define <8 x i32> @v8i32_func_void() #0 { +  ; CHECK-LABEL: name: v8i32_func_void +  ; CHECK: bb.1 (%ir-block.0): +  ; CHECK:   liveins: $sgpr30_sgpr31 +  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 +  ; CHECK:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF +  ; CHECK:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load 8 from `<8 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) +  ; CHECK:   [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[LOAD]](p1) :: (load 32 from %ir.ptr, addrspace 1) +  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<8 x s32>) +  ; CHECK:   $vgpr0 = COPY [[UV]](s32) +  ; CHECK:   $vgpr1 = COPY [[UV1]](s32) +  ; CHECK:   $vgpr2 = COPY [[UV2]](s32) +  ; CHECK:   $vgpr3 = COPY [[UV3]](s32) +  ; CHECK:   $vgpr4 = COPY [[UV4]](s32) +  ; CHECK:   $vgpr5 = COPY [[UV5]](s32) +  ; CHECK:   $vgpr6 = COPY [[UV6]](s32) +  ; CHECK:   $vgpr7 = COPY [[UV7]](s32) +  ; CHECK:   [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] +  ; CHECK:   S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 +  %ptr = load volatile <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(4)* undef +  %val = load <8 x i32>, <8 x i32> addrspace(1)* %ptr +  ret <8 x i32> %val +} + +define <16 x i32> @v16i32_func_void() #0 { +  ; CHECK-LABEL: name: v16i32_func_void +  ; CHECK: bb.1 (%ir-block.0): +  ; CHECK:   liveins: $sgpr30_sgpr31 +  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 +  ; CHECK:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF +  ; CHECK:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load 8 from `<16 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) +  ; CHECK:   [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[LOAD]](p1) :: (load 64 from %ir.ptr, addrspace 1) +  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>) +  ; CHECK:   $vgpr0 = COPY [[UV]](s32) +  ; CHECK:   $vgpr1 = COPY [[UV1]](s32) +  ; CHECK:   $vgpr2 = COPY [[UV2]](s32) +  ; CHECK:   $vgpr3 = COPY [[UV3]](s32) +  ; CHECK:   $vgpr4 = COPY [[UV4]](s32) +  ; CHECK:   $vgpr5 = COPY [[UV5]](s32) +  ; CHECK:   $vgpr6 = COPY [[UV6]](s32) +  ; CHECK:   $vgpr7 = COPY [[UV7]](s32) +  ; CHECK:   $vgpr8 = COPY [[UV8]](s32) +  ; CHECK:   $vgpr9 = COPY [[UV9]](s32) +  ; CHECK:   $vgpr10 = COPY [[UV10]](s32) +  ; CHECK:   $vgpr11 = COPY [[UV11]](s32) +  ; CHECK:   $vgpr12 = COPY [[UV12]](s32) +  ; CHECK:   $vgpr13 = COPY [[UV13]](s32) +  ; CHECK:   $vgpr14 = COPY [[UV14]](s32) +  ; CHECK:   $vgpr15 = COPY [[UV15]](s32) +  ; CHECK:   [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] +  ; CHECK:   S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 +  %ptr = load volatile <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(4)* undef +  %val = load <16 x i32>, <16 x i32> addrspace(1)* %ptr +  ret <16 x i32> %val +} + +define <32 x i32> @v32i32_func_void() #0 { +  ; CHECK-LABEL: name: v32i32_func_void +  ; CHECK: bb.1 (%ir-block.0): +  ; CHECK:   liveins: $sgpr30_sgpr31 +  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 +  ; CHECK:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF +  ; CHECK:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load 8 from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) +  ; CHECK:   [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load 128 from %ir.ptr, addrspace 1) +  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>) +  ; CHECK:   $vgpr0 = COPY [[UV]](s32) +  ; CHECK:   $vgpr1 = COPY [[UV1]](s32) +  ; CHECK:   $vgpr2 = COPY [[UV2]](s32) +  ; CHECK:   $vgpr3 = COPY [[UV3]](s32) +  ; CHECK:   $vgpr4 = COPY [[UV4]](s32) +  ; CHECK:   $vgpr5 = COPY [[UV5]](s32) +  ; CHECK:   $vgpr6 = COPY [[UV6]](s32) +  ; CHECK:   $vgpr7 = COPY [[UV7]](s32) +  ; CHECK:   $vgpr8 = COPY [[UV8]](s32) +  ; CHECK:   $vgpr9 = COPY [[UV9]](s32) +  ; CHECK:   $vgpr10 = COPY [[UV10]](s32) +  ; CHECK:   $vgpr11 = COPY [[UV11]](s32) +  ; CHECK:   $vgpr12 = COPY [[UV12]](s32) +  ; CHECK:   $vgpr13 = COPY [[UV13]](s32) +  ; CHECK:   $vgpr14 = COPY [[UV14]](s32) +  ; CHECK:   $vgpr15 = COPY [[UV15]](s32) +  ; CHECK:   $vgpr16 = COPY [[UV16]](s32) +  ; CHECK:   $vgpr17 = COPY [[UV17]](s32) +  ; CHECK:   $vgpr18 = COPY [[UV18]](s32) +  ; CHECK:   $vgpr19 = COPY [[UV19]](s32) +  ; CHECK:   $vgpr20 = COPY [[UV20]](s32) +  ; CHECK:   $vgpr21 = COPY [[UV21]](s32) +  ; CHECK:   $vgpr22 = COPY [[UV22]](s32) +  ; CHECK:   $vgpr23 = COPY [[UV23]](s32) +  ; CHECK:   $vgpr24 = COPY [[UV24]](s32) +  ; CHECK:   $vgpr25 = COPY [[UV25]](s32) +  ; CHECK:   $vgpr26 = COPY [[UV26]](s32) +  ; CHECK:   $vgpr27 = COPY [[UV27]](s32) +  ; CHECK:   $vgpr28 = COPY [[UV28]](s32) +  ; CHECK:   $vgpr29 = COPY [[UV29]](s32) +  ; CHECK:   $vgpr30 = COPY [[UV30]](s32) +  ; CHECK:   $vgpr31 = COPY [[UV31]](s32) +  ; CHECK:   [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] +  ; CHECK:   S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 +  %ptr = load volatile <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef +  %val = load <32 x i32>, <32 x i32> addrspace(1)* %ptr +  ret <32 x i32> %val +} + +define <2 x i64> @v2i64_func_void() #0 { +  ; CHECK-LABEL: name: v2i64_func_void +  ; CHECK: bb.1 (%ir-block.0): +  ; CHECK:   liveins: $sgpr30_sgpr31 +  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 +  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF +  ; CHECK:   [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[DEF]](p1) :: (load 16 from `<2 x i64> addrspace(1)* undef`, addrspace 1) +  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) +  ; CHECK:   $vgpr0 = COPY [[UV]](s32) +  ; CHECK:   $vgpr1 = COPY [[UV1]](s32) +  ; CHECK:   $vgpr2 = COPY [[UV2]](s32) +  ; CHECK:   $vgpr3 = COPY [[UV3]](s32) +  ; CHECK:   [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] +  ; CHECK:   S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 +  %val = load <2 x i64>, <2 x i64> addrspace(1)* undef +  ret <2 x i64> %val +} + +define <3 x i64> @v3i64_func_void() #0 { +  ; CHECK-LABEL: name: v3i64_func_void +  ; CHECK: bb.1 (%ir-block.0): +  ; CHECK:   liveins: $sgpr30_sgpr31 +  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 +  ; CHECK:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF +  ; CHECK:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load 8 from `<3 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) +  ; CHECK:   [[LOAD1:%[0-9]+]]:_(<3 x s64>) = G_LOAD [[LOAD]](p1) :: (load 24 from %ir.ptr, align 32, addrspace 1) +  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s64>) +  ; CHECK:   $vgpr0 = COPY [[UV]](s32) +  ; CHECK:   $vgpr1 = COPY [[UV1]](s32) +  ; CHECK:   $vgpr2 = COPY [[UV2]](s32) +  ; CHECK:   $vgpr3 = COPY [[UV3]](s32) +  ; CHECK:   $vgpr4 = COPY [[UV4]](s32) +  ; CHECK:   $vgpr5 = COPY [[UV5]](s32) +  ; CHECK:   [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] +  ; CHECK:   S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 +  %ptr = load volatile <3 x i64> addrspace(1)*, <3 x i64> addrspace(1)* addrspace(4)* undef +  %val = load <3 x i64>, <3 x i64> addrspace(1)* %ptr +  ret <3 x i64> %val +} + +define <4 x i64> @v4i64_func_void() #0 { +  ; CHECK-LABEL: name: v4i64_func_void +  ; CHECK: bb.1 (%ir-block.0): +  ; CHECK:   liveins: $sgpr30_sgpr31 +  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 +  ; CHECK:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF +  ; CHECK:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load 8 from `<4 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) +  ; CHECK:   [[LOAD1:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[LOAD]](p1) :: (load 32 from %ir.ptr, addrspace 1) +  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<4 x s64>) +  ; CHECK:   $vgpr0 = COPY [[UV]](s32) +  ; CHECK:   $vgpr1 = COPY [[UV1]](s32) +  ; CHECK:   $vgpr2 = COPY [[UV2]](s32) +  ; CHECK:   $vgpr3 = COPY [[UV3]](s32) +  ; CHECK:   $vgpr4 = COPY [[UV4]](s32) +  ; CHECK:   $vgpr5 = COPY [[UV5]](s32) +  ; CHECK:   $vgpr6 = COPY [[UV6]](s32) +  ; CHECK:   $vgpr7 = COPY [[UV7]](s32) +  ; CHECK:   [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] +  ; CHECK:   S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 +  %ptr = load volatile <4 x i64> addrspace(1)*, <4 x i64> addrspace(1)* addrspace(4)* undef +  %val = load <4 x i64>, <4 x i64> addrspace(1)* %ptr +  ret <4 x i64> %val +} + +define <5 x i64> @v5i64_func_void() #0 { +  ; CHECK-LABEL: name: v5i64_func_void +  ; CHECK: bb.1 (%ir-block.0): +  ; CHECK:   liveins: $sgpr30_sgpr31 +  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 +  ; CHECK:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF +  ; CHECK:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load 8 from `<5 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) +  ; CHECK:   [[LOAD1:%[0-9]+]]:_(<5 x s64>) = G_LOAD [[LOAD]](p1) :: (load 40 from %ir.ptr, align 64, addrspace 1) +  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<5 x s64>) +  ; CHECK:   $vgpr0 = COPY [[UV]](s32) +  ; CHECK:   $vgpr1 = COPY [[UV1]](s32) +  ; CHECK:   $vgpr2 = COPY [[UV2]](s32) +  ; CHECK:   $vgpr3 = COPY [[UV3]](s32) +  ; CHECK:   $vgpr4 = COPY [[UV4]](s32) +  ; CHECK:   $vgpr5 = COPY [[UV5]](s32) +  ; CHECK:   $vgpr6 = COPY [[UV6]](s32) +  ; CHECK:   $vgpr7 = COPY [[UV7]](s32) +  ; CHECK:   $vgpr8 = COPY [[UV8]](s32) +  ; CHECK:   $vgpr9 = COPY [[UV9]](s32) +  ; CHECK:   [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] +  ; CHECK:   S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9 +  %ptr = load volatile <5 x i64> addrspace(1)*, <5 x i64> addrspace(1)* addrspace(4)* undef +  %val = load <5 x i64>, <5 x i64> addrspace(1)* %ptr +  ret <5 x i64> %val +} + +define <8 x i64> @v8i64_func_void() #0 { +  ; CHECK-LABEL: name: v8i64_func_void +  ; CHECK: bb.1 (%ir-block.0): +  ; CHECK:   liveins: $sgpr30_sgpr31 +  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 +  ; CHECK:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF +  ; CHECK:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load 8 from `<8 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) +  ; CHECK:   [[LOAD1:%[0-9]+]]:_(<8 x s64>) = G_LOAD [[LOAD]](p1) :: (load 64 from %ir.ptr, addrspace 1) +  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<8 x s64>) +  ; CHECK:   $vgpr0 = COPY [[UV]](s32) +  ; CHECK:   $vgpr1 = COPY [[UV1]](s32) +  ; CHECK:   $vgpr2 = COPY [[UV2]](s32) +  ; CHECK:   $vgpr3 = COPY [[UV3]](s32) +  ; CHECK:   $vgpr4 = COPY [[UV4]](s32) +  ; CHECK:   $vgpr5 = COPY [[UV5]](s32) +  ; CHECK:   $vgpr6 = COPY [[UV6]](s32) +  ; CHECK:   $vgpr7 = COPY [[UV7]](s32) +  ; CHECK:   $vgpr8 = COPY [[UV8]](s32) +  ; CHECK:   $vgpr9 = COPY [[UV9]](s32) +  ; CHECK:   $vgpr10 = COPY [[UV10]](s32) +  ; CHECK:   $vgpr11 = COPY [[UV11]](s32) +  ; CHECK:   $vgpr12 = COPY [[UV12]](s32) +  ; CHECK:   $vgpr13 = COPY [[UV13]](s32) +  ; CHECK:   $vgpr14 = COPY [[UV14]](s32) +  ; CHECK:   $vgpr15 = COPY [[UV15]](s32) +  ; CHECK:   [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] +  ; CHECK:   S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 +  %ptr = load volatile <8 x i64> addrspace(1)*, <8 x i64> addrspace(1)* addrspace(4)* undef +  %val = load <8 x i64>, <8 x i64> addrspace(1)* %ptr +  ret <8 x i64> %val +} + +define <16 x i64> @v16i64_func_void() #0 { +  ; CHECK-LABEL: name: v16i64_func_void +  ; CHECK: bb.1 (%ir-block.0): +  ; CHECK:   liveins: $sgpr30_sgpr31 +  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 +  ; CHECK:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF +  ; CHECK:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load 8 from `<16 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) +  ; CHECK:   [[LOAD1:%[0-9]+]]:_(<16 x s64>) = G_LOAD [[LOAD]](p1) :: (load 128 from %ir.ptr, addrspace 1) +  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s64>) +  ; CHECK:   $vgpr0 = COPY [[UV]](s32) +  ; CHECK:   $vgpr1 = COPY [[UV1]](s32) +  ; CHECK:   $vgpr2 = COPY [[UV2]](s32) +  ; CHECK:   $vgpr3 = COPY [[UV3]](s32) +  ; CHECK:   $vgpr4 = COPY [[UV4]](s32) +  ; CHECK:   $vgpr5 = COPY [[UV5]](s32) +  ; CHECK:   $vgpr6 = COPY [[UV6]](s32) +  ; CHECK:   $vgpr7 = COPY [[UV7]](s32) +  ; CHECK:   $vgpr8 = COPY [[UV8]](s32) +  ; CHECK:   $vgpr9 = COPY [[UV9]](s32) +  ; CHECK:   $vgpr10 = COPY [[UV10]](s32) +  ; CHECK:   $vgpr11 = COPY [[UV11]](s32) +  ; CHECK:   $vgpr12 = COPY [[UV12]](s32) +  ; CHECK:   $vgpr13 = COPY [[UV13]](s32) +  ; CHECK:   $vgpr14 = COPY [[UV14]](s32) +  ; CHECK:   $vgpr15 = COPY [[UV15]](s32) +  ; CHECK:   $vgpr16 = COPY [[UV16]](s32) +  ; CHECK:   $vgpr17 = COPY [[UV17]](s32) +  ; CHECK:   $vgpr18 = COPY [[UV18]](s32) +  ; CHECK:   $vgpr19 = COPY [[UV19]](s32) +  ; CHECK:   $vgpr20 = COPY [[UV20]](s32) +  ; CHECK:   $vgpr21 = COPY [[UV21]](s32) +  ; CHECK:   $vgpr22 = COPY [[UV22]](s32) +  ; CHECK:   $vgpr23 = COPY [[UV23]](s32) +  ; CHECK:   $vgpr24 = COPY [[UV24]](s32) +  ; CHECK:   $vgpr25 = COPY [[UV25]](s32) +  ; CHECK:   $vgpr26 = COPY [[UV26]](s32) +  ; CHECK:   $vgpr27 = COPY [[UV27]](s32) +  ; CHECK:   $vgpr28 = COPY [[UV28]](s32) +  ; CHECK:   $vgpr29 = COPY [[UV29]](s32) +  ; CHECK:   $vgpr30 = COPY [[UV30]](s32) +  ; CHECK:   $vgpr31 = COPY [[UV31]](s32) +  ; CHECK:   [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] +  ; CHECK:   S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 +  %ptr = load volatile <16 x i64> addrspace(1)*, <16 x i64> addrspace(1)* addrspace(4)* undef +  %val = load <16 x i64>, <16 x i64> addrspace(1)* %ptr +  ret <16 x i64> %val +} + +define <2 x i16> @v2i16_func_void() #0 { +  ; CHECK-LABEL: name: v2i16_func_void +  ; CHECK: bb.1 (%ir-block.0): +  ; CHECK:   liveins: $sgpr30_sgpr31 +  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 +  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF +  ; CHECK:   [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load 4 from `<2 x i16> addrspace(1)* undef`, addrspace 1) +  ; CHECK:   [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<2 x s16>) +  ; CHECK:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16) +  ; CHECK:   [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16) +  ; CHECK:   $vgpr0 = COPY [[ANYEXT]](s32) +  ; CHECK:   $vgpr1 = COPY [[ANYEXT1]](s32) +  ; CHECK:   [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] +  ; CHECK:   S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 +  %val = load <2 x i16>, <2 x i16> addrspace(1)* undef +  ret <2 x i16> %val +} + +define <3 x i16> @v3i16_func_void() #0 { +  ; CHECK-LABEL: name: v3i16_func_void +  ; CHECK: bb.1 (%ir-block.0): +  ; CHECK:   liveins: $sgpr30_sgpr31 +  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 +  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF +  ; CHECK:   [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: (load 6 from `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1) +  ; CHECK:   [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<3 x s16>) +  ; CHECK:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16) +  ; CHECK:   [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16) +  ; CHECK:   [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16) +  ; CHECK:   $vgpr0 = COPY [[ANYEXT]](s32) +  ; CHECK:   $vgpr1 = COPY [[ANYEXT1]](s32) +  ; CHECK:   $vgpr2 = COPY [[ANYEXT2]](s32) +  ; CHECK:   [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] +  ; CHECK:   S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 +  %val = load <3 x i16>, <3 x i16> addrspace(1)* undef +  ret <3 x i16> %val +} + +define <4 x i16> @v4i16_func_void() #0 { +  ; CHECK-LABEL: name: v4i16_func_void +  ; CHECK: bb.1 (%ir-block.0): +  ; CHECK:   liveins: $sgpr30_sgpr31 +  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 +  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF +  ; CHECK:   [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: (load 8 from `<4 x i16> addrspace(1)* undef`, addrspace 1) +  ; CHECK:   [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) +  ; CHECK:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16) +  ; CHECK:   [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16) +  ; CHECK:   [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16) +  ; CHECK:   [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16) +  ; CHECK:   $vgpr0 = COPY [[ANYEXT]](s32) +  ; CHECK:   $vgpr1 = COPY [[ANYEXT1]](s32) +  ; CHECK:   $vgpr2 = COPY [[ANYEXT2]](s32) +  ; CHECK:   $vgpr3 = COPY [[ANYEXT3]](s32) +  ; CHECK:   [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] +  ; CHECK:   S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 +  %val = load <4 x i16>, <4 x i16> addrspace(1)* undef +  ret <4 x i16> %val +} + +define <4 x half> @v4f16_func_void() #0 { +  ; CHECK-LABEL: name: v4f16_func_void +  ; CHECK: bb.1 (%ir-block.0): +  ; CHECK:   liveins: $sgpr30_sgpr31 +  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 +  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF +  ; CHECK:   [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: (load 8 from `<4 x half> addrspace(1)* undef`, addrspace 1) +  ; CHECK:   [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) +  ; CHECK:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16) +  ; CHECK:   [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16) +  ; CHECK:   [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16) +  ; CHECK:   [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16) +  ; CHECK:   $vgpr0 = COPY [[ANYEXT]](s32) +  ; CHECK:   $vgpr1 = COPY [[ANYEXT1]](s32) +  ; CHECK:   $vgpr2 = COPY [[ANYEXT2]](s32) +  ; CHECK:   $vgpr3 = COPY [[ANYEXT3]](s32) +  ; CHECK:   [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] +  ; CHECK:   S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 +  %val = load <4 x half>, <4 x half> addrspace(1)* undef +  ret <4 x half> %val +} + +define <5 x i16> @v5i16_func_void() #0 { +  ; CHECK-LABEL: name: v5i16_func_void +  ; CHECK: bb.1 (%ir-block.0): +  ; CHECK:   liveins: $sgpr30_sgpr31 +  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 +  ; CHECK:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF +  ; CHECK:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load 8 from `<5 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) +  ; CHECK:   [[LOAD1:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[LOAD]](p1) :: (load 10 from %ir.ptr, align 16, addrspace 1) +  ; CHECK:   [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD1]](<5 x s16>) +  ; CHECK:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16) +  ; CHECK:   [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16) +  ; CHECK:   [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16) +  ; CHECK:   [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16) +  ; CHECK:   [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s16) +  ; CHECK:   $vgpr0 = COPY [[ANYEXT]](s32) +  ; CHECK:   $vgpr1 = COPY [[ANYEXT1]](s32) +  ; CHECK:   $vgpr2 = COPY [[ANYEXT2]](s32) +  ; CHECK:   $vgpr3 = COPY [[ANYEXT3]](s32) +  ; CHECK:   $vgpr4 = COPY [[ANYEXT4]](s32) +  ; CHECK:   [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] +  ; CHECK:   S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4 +  %ptr = load volatile <5 x i16> addrspace(1)*, <5 x i16> addrspace(1)* addrspace(4)* undef +  %val = load <5 x i16>, <5 x i16> addrspace(1)* %ptr +  ret <5 x i16> %val +} + +define <8 x i16> @v8i16_func_void() #0 { +  ; CHECK-LABEL: name: v8i16_func_void +  ; CHECK: bb.1 (%ir-block.0): +  ; CHECK:   liveins: $sgpr30_sgpr31 +  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 +  ; CHECK:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF +  ; CHECK:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load 8 from `<8 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) +  ; CHECK:   [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[LOAD]](p1) :: (load 16 from %ir.ptr, addrspace 1) +  ; CHECK:   [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD1]](<8 x s16>) +  ; CHECK:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16) +  ; CHECK:   [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16) +  ; CHECK:   [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16) +  ; CHECK:   [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16) +  ; CHECK:   [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s16) +  ; CHECK:   [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s16) +  ; CHECK:   [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s16) +  ; CHECK:   [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s16) +  ; CHECK:   $vgpr0 = COPY [[ANYEXT]](s32) +  ; CHECK:   $vgpr1 = COPY [[ANYEXT1]](s32) +  ; CHECK:   $vgpr2 = COPY [[ANYEXT2]](s32) +  ; CHECK:   $vgpr3 = COPY [[ANYEXT3]](s32) +  ; CHECK:   $vgpr4 = COPY [[ANYEXT4]](s32) +  ; CHECK:   $vgpr5 = COPY [[ANYEXT5]](s32) +  ; CHECK:   $vgpr6 = COPY [[ANYEXT6]](s32) +  ; CHECK:   $vgpr7 = COPY [[ANYEXT7]](s32) +  ; CHECK:   [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] +  ; CHECK:   S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 +  %ptr = load volatile <8 x i16> addrspace(1)*, <8 x i16> addrspace(1)* addrspace(4)* undef +  %val = load <8 x i16>, <8 x i16> addrspace(1)* %ptr +  ret <8 x i16> %val +} + +define <16 x i16> @v16i16_func_void() #0 { +  ; CHECK-LABEL: name: v16i16_func_void +  ; CHECK: bb.1 (%ir-block.0): +  ; CHECK:   liveins: $sgpr30_sgpr31 +  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 +  ; CHECK:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF +  ; CHECK:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load 8 from `<16 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) +  ; CHECK:   [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[LOAD]](p1) :: (load 32 from %ir.ptr, addrspace 1) +  ; CHECK:   [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16), [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16), [[UV14:%[0-9]+]]:_(s16), [[UV15:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD1]](<16 x s16>) +  ; CHECK:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16) +  ; CHECK:   [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16) +  ; CHECK:   [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16) +  ; CHECK:   [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16) +  ; CHECK:   [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s16) +  ; CHECK:   [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s16) +  ; CHECK:   [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s16) +  ; CHECK:   [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s16) +  ; CHECK:   [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[UV8]](s16) +  ; CHECK:   [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[UV9]](s16) +  ; CHECK:   [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[UV10]](s16) +  ; CHECK:   [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[UV11]](s16) +  ; CHECK:   [[ANYEXT12:%[0-9]+]]:_(s32) = G_ANYEXT [[UV12]](s16) +  ; CHECK:   [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[UV13]](s16) +  ; CHECK:   [[ANYEXT14:%[0-9]+]]:_(s32) = G_ANYEXT [[UV14]](s16) +  ; CHECK:   [[ANYEXT15:%[0-9]+]]:_(s32) = G_ANYEXT [[UV15]](s16) +  ; CHECK:   $vgpr0 = COPY [[ANYEXT]](s32) +  ; CHECK:   $vgpr1 = COPY [[ANYEXT1]](s32) +  ; CHECK:   $vgpr2 = COPY [[ANYEXT2]](s32) +  ; CHECK:   $vgpr3 = COPY [[ANYEXT3]](s32) +  ; CHECK:   $vgpr4 = COPY [[ANYEXT4]](s32) +  ; CHECK:   $vgpr5 = COPY [[ANYEXT5]](s32) +  ; CHECK:   $vgpr6 = COPY [[ANYEXT6]](s32) +  ; CHECK:   $vgpr7 = COPY [[ANYEXT7]](s32) +  ; CHECK:   $vgpr8 = COPY [[ANYEXT8]](s32) +  ; CHECK:   $vgpr9 = COPY [[ANYEXT9]](s32) +  ; CHECK:   $vgpr10 = COPY [[ANYEXT10]](s32) +  ; CHECK:   $vgpr11 = COPY [[ANYEXT11]](s32) +  ; CHECK:   $vgpr12 = COPY [[ANYEXT12]](s32) +  ; CHECK:   $vgpr13 = COPY [[ANYEXT13]](s32) +  ; CHECK:   $vgpr14 = COPY [[ANYEXT14]](s32) +  ; CHECK:   $vgpr15 = COPY [[ANYEXT15]](s32) +  ; CHECK:   [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] +  ; CHECK:   S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 +  %ptr = load volatile <16 x i16> addrspace(1)*, <16 x i16> addrspace(1)* addrspace(4)* undef +  %val = load <16 x i16>, <16 x i16> addrspace(1)* %ptr +  ret <16 x i16> %val +} + +define <16 x i8> @v16i8_func_void() #0 { +  ; CHECK-LABEL: name: v16i8_func_void +  ; CHECK: bb.1 (%ir-block.0): +  ; CHECK:   liveins: $sgpr30_sgpr31 +  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 +  ; CHECK:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF +  ; CHECK:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load 8 from `<16 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) +  ; CHECK:   [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[LOAD]](p1) :: (load 16 from %ir.ptr, addrspace 1) +  ; CHECK:   [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<16 x s8>) +  ; CHECK:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) +  ; CHECK:   [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) +  ; CHECK:   [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) +  ; CHECK:   [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) +  ; CHECK:   [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) +  ; CHECK:   [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) +  ; CHECK:   [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8) +  ; CHECK:   [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8) +  ; CHECK:   [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[UV8]](s8) +  ; CHECK:   [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[UV9]](s8) +  ; CHECK:   [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[UV10]](s8) +  ; CHECK:   [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[UV11]](s8) +  ; CHECK:   [[ANYEXT12:%[0-9]+]]:_(s32) = G_ANYEXT [[UV12]](s8) +  ; CHECK:   [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[UV13]](s8) +  ; CHECK:   [[ANYEXT14:%[0-9]+]]:_(s32) = G_ANYEXT [[UV14]](s8) +  ; CHECK:   [[ANYEXT15:%[0-9]+]]:_(s32) = G_ANYEXT [[UV15]](s8) +  ; CHECK:   $vgpr0 = COPY [[ANYEXT]](s32) +  ; CHECK:   $vgpr1 = COPY [[ANYEXT1]](s32) +  ; CHECK:   $vgpr2 = COPY [[ANYEXT2]](s32) +  ; CHECK:   $vgpr3 = COPY [[ANYEXT3]](s32) +  ; CHECK:   $vgpr4 = COPY [[ANYEXT4]](s32) +  ; CHECK:   $vgpr5 = COPY [[ANYEXT5]](s32) +  ; CHECK:   $vgpr6 = COPY [[ANYEXT6]](s32) +  ; CHECK:   $vgpr7 = COPY [[ANYEXT7]](s32) +  ; CHECK:   $vgpr8 = COPY [[ANYEXT8]](s32) +  ; CHECK:   $vgpr9 = COPY [[ANYEXT9]](s32) +  ; CHECK:   $vgpr10 = COPY [[ANYEXT10]](s32) +  ; CHECK:   $vgpr11 = COPY [[ANYEXT11]](s32) +  ; CHECK:   $vgpr12 = COPY [[ANYEXT12]](s32) +  ; CHECK:   $vgpr13 = COPY [[ANYEXT13]](s32) +  ; CHECK:   $vgpr14 = COPY [[ANYEXT14]](s32) +  ; CHECK:   $vgpr15 = COPY [[ANYEXT15]](s32) +  ; CHECK:   [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] +  ; CHECK:   S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 +  %ptr = load volatile <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(4)* undef +  %val = load <16 x i8>, <16 x i8> addrspace(1)* %ptr +  ret <16 x i8> %val +} + +define <4  x i8> @v4i8_func_void() #0 { +  ; CHECK-LABEL: name: v4i8_func_void +  ; CHECK: bb.1 (%ir-block.0): +  ; CHECK:   liveins: $sgpr30_sgpr31 +  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 +  ; CHECK:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF +  ; CHECK:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load 8 from `<4 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) +  ; CHECK:   [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[LOAD]](p1) :: (load 4 from %ir.ptr, addrspace 1) +  ; CHECK:   [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<4 x s8>) +  ; CHECK:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) +  ; CHECK:   [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) +  ; CHECK:   [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) +  ; CHECK:   [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) +  ; CHECK:   $vgpr0 = COPY [[ANYEXT]](s32) +  ; CHECK:   $vgpr1 = COPY [[ANYEXT1]](s32) +  ; CHECK:   $vgpr2 = COPY [[ANYEXT2]](s32) +  ; CHECK:   $vgpr3 = COPY [[ANYEXT3]](s32) +  ; CHECK:   [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] +  ; CHECK:   S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 +  %ptr = load volatile <4  x i8> addrspace(1)*, <4  x i8> addrspace(1)* addrspace(4)* undef +  %val = load <4  x i8>, <4  x i8> addrspace(1)* %ptr +  ret <4  x i8> %val +} + +define {i8, i32} @struct_i8_i32_func_void() #0 { +  ; CHECK-LABEL: name: struct_i8_i32_func_void +  ; CHECK: bb.0: +  ; CHECK:   successors: %bb.1(0x80000000) +  ; CHECK:   liveins: $sgpr30_sgpr31 +  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 +  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF +  ; CHECK: bb.1 (%ir-block.0): +  ; CHECK:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load 1 from `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) +  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 +  ; CHECK:   [[GEP:%[0-9]+]]:_(p1) = G_GEP [[DEF]], [[C]](s64) +  ; CHECK:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 4 from `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) +  %val = load { i8, i32 }, { i8, i32 } addrspace(1)* undef +  ret { i8, i32 } %val +} + +define void @void_func_sret_struct_i8_i32({ i8, i32 } addrspace(5)* sret %arg0) #0 { +  ; CHECK-LABEL: name: void_func_sret_struct_i8_i32 +  ; CHECK: bb.1 (%ir-block.0): +  ; CHECK:   liveins: $vgpr0, $sgpr30_sgpr31 +  ; CHECK:   [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 +  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 +  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF +  ; CHECK:   [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF +  ; CHECK:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile load 1 from `i8 addrspace(1)* undef`, addrspace 1) +  ; CHECK:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[DEF1]](p1) :: (volatile load 4 from `i32 addrspace(1)* undef`, addrspace 1) +  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 +  ; CHECK:   [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) +  ; CHECK:   G_STORE [[LOAD]](s8), [[COPY]](p5) :: (store 1 into %ir.gep01, addrspace 5) +  ; CHECK:   G_STORE [[LOAD1]](s32), [[GEP]](p5) :: (store 4 into %ir.gep1, addrspace 5) +  ; CHECK:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] +  ; CHECK:   S_SETPC_B64_return [[COPY2]] +  %val0 = load volatile i8, i8 addrspace(1)* undef +  %val1 = load volatile i32, i32 addrspace(1)* undef +  %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 0 +  %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 1 +  store i8 %val0, i8 addrspace(5)* %gep0 +  store i32 %val1, i32 addrspace(5)* %gep1 +  ret void +} + +; FIXME: Should be able to fold offsets in all of these pre-gfx9. Call +; lowering introduces an extra CopyToReg/CopyFromReg obscuring the +; AssertZext inserted. Not using it introduces the spills. + +define <33 x i32> @v33i32_func_void() #0 { +  ; CHECK-LABEL: name: v33i32_func_void +  ; CHECK: bb.0: +  ; CHECK:   successors: %bb.1(0x80000000) +  ; CHECK:   liveins: $sgpr30_sgpr31 +  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 +  ; CHECK:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF +  ; CHECK: bb.1 (%ir-block.0): +  ; CHECK:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load 8 from `<33 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) +  ; CHECK:   [[LOAD1:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[LOAD]](p1) :: (load 132 from %ir.ptr, align 256, addrspace 1) +  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<33 x s32>) +  %ptr = load volatile <33 x i32> addrspace(1)*, <33 x i32> addrspace(1)* addrspace(4)* undef +  %val = load <33 x i32>, <33 x i32> addrspace(1)* %ptr +  ret <33 x i32> %val +} + +define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 { +  ; CHECK-LABEL: name: struct_v32i32_i32_func_void +  ; CHECK: bb.0: +  ; CHECK:   successors: %bb.1(0x80000000) +  ; CHECK:   liveins: $sgpr30_sgpr31 +  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 +  ; CHECK:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF +  ; CHECK: bb.1 (%ir-block.0): +  ; CHECK:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load 8 from `{ <32 x i32>, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) +  ; CHECK:   [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load 128 from %ir.ptr, addrspace 1) +  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 +  ; CHECK:   [[GEP:%[0-9]+]]:_(p1) = G_GEP [[LOAD]], [[C]](s64) +  ; CHECK:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 4 from %ir.ptr + 128, align 128, addrspace 1) +  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>) +  %ptr = load volatile { <32 x i32>, i32 } addrspace(1)*, { <32 x i32>, i32 } addrspace(1)* addrspace(4)* undef +  %val = load { <32 x i32>, i32 }, { <32 x i32>, i32 } addrspace(1)* %ptr +  ret { <32 x i32>, i32 }%val +} + +define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 { +  ; CHECK-LABEL: name: struct_i32_v32i32_func_void +  ; CHECK: bb.0: +  ; CHECK:   successors: %bb.1(0x80000000) +  ; CHECK:   liveins: $sgpr30_sgpr31 +  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 +  ; CHECK:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF +  ; CHECK: bb.1 (%ir-block.0): +  ; CHECK:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load 8 from `{ i32, <32 x i32> } addrspace(1)* addrspace(4)* undef`, addrspace 4) +  ; CHECK:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p1) :: (load 4 from %ir.ptr, align 128, addrspace 1) +  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 +  ; CHECK:   [[GEP:%[0-9]+]]:_(p1) = G_GEP [[LOAD]], [[C]](s64) +  ; CHECK:   [[LOAD2:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[GEP]](p1) :: (load 128 from %ir.ptr + 128, addrspace 1) +  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<32 x s32>) +  %ptr = load volatile { i32, <32 x i32> } addrspace(1)*, { i32, <32 x i32> } addrspace(1)* addrspace(4)* undef +  %val = load { i32, <32 x i32> }, { i32, <32 x i32> } addrspace(1)* %ptr +  ret { i32, <32 x i32> }%val +} + +; Make sure the last struct component is returned in v3, not v4. +define { <3 x i32>, i32 } @v3i32_struct_func_void_wasted_reg() #0 { +  ; CHECK-LABEL: name: v3i32_struct_func_void_wasted_reg +  ; CHECK: bb.1 (%ir-block.0): +  ; CHECK:   liveins: $sgpr30_sgpr31 +  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 +  ; CHECK:   [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF +  ; CHECK:   [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF +  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 +  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 +  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 +  ; CHECK:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF +  ; CHECK:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load 4 from `i32 addrspace(3)* undef`, addrspace 3) +  ; CHECK:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load 4 from `i32 addrspace(3)* undef`, addrspace 3) +  ; CHECK:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load 4 from `i32 addrspace(3)* undef`, addrspace 3) +  ; CHECK:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load 4 from `i32 addrspace(3)* undef`, addrspace 3) +  ; CHECK:   [[IVEC:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[DEF1]], [[LOAD]](s32), [[C]](s32) +  ; CHECK:   [[IVEC1:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[LOAD1]](s32), [[C1]](s32) +  ; CHECK:   [[IVEC2:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC1]], [[LOAD2]](s32), [[C2]](s32) +  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[IVEC2]](<3 x s32>) +  ; CHECK:   $vgpr0 = COPY [[UV]](s32) +  ; CHECK:   $vgpr1 = COPY [[UV1]](s32) +  ; CHECK:   $vgpr2 = COPY [[UV2]](s32) +  ; CHECK:   $vgpr3 = COPY [[LOAD3]](s32) +  ; CHECK:   [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] +  ; CHECK:   S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 +  %load0 = load volatile i32, i32 addrspace(3)* undef +  %load1 = load volatile i32, i32 addrspace(3)* undef +  %load2 = load volatile i32, i32 addrspace(3)* undef +  %load3 = load volatile i32, i32 addrspace(3)* undef + +  %insert.0 = insertelement <3 x i32> undef, i32 %load0, i32 0 +  %insert.1 = insertelement <3 x i32> %insert.0, i32 %load1, i32 1 +  %insert.2 = insertelement <3 x i32> %insert.1, i32 %load2, i32 2 +  %insert.3 = insertvalue { <3 x i32>, i32 } undef, <3 x i32> %insert.2, 0 +  %insert.4 = insertvalue { <3 x i32>, i32 } %insert.3, i32 %load3, 1 +  ret { <3 x i32>, i32 } %insert.4 +} + +define { <3 x float>, i32 } @v3f32_struct_func_void_wasted_reg() #0 { +  ; CHECK-LABEL: name: v3f32_struct_func_void_wasted_reg +  ; CHECK: bb.1 (%ir-block.0): +  ; CHECK:   liveins: $sgpr30_sgpr31 +  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 +  ; CHECK:   [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF +  ; CHECK:   [[DEF1:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF +  ; CHECK:   [[DEF2:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF +  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 +  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 +  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 +  ; CHECK:   [[DEF3:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF +  ; CHECK:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load 4 from `float addrspace(3)* undef`, addrspace 3) +  ; CHECK:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load 4 from `float addrspace(3)* undef`, addrspace 3) +  ; CHECK:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load 4 from `float addrspace(3)* undef`, addrspace 3) +  ; CHECK:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[DEF1]](p3) :: (volatile load 4 from `i32 addrspace(3)* undef`, addrspace 3) +  ; CHECK:   [[IVEC:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[DEF2]], [[LOAD]](s32), [[C]](s32) +  ; CHECK:   [[IVEC1:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[LOAD1]](s32), [[C1]](s32) +  ; CHECK:   [[IVEC2:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC1]], [[LOAD2]](s32), [[C2]](s32) +  ; CHECK:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[IVEC2]](<3 x s32>) +  ; CHECK:   $vgpr0 = COPY [[UV]](s32) +  ; CHECK:   $vgpr1 = COPY [[UV1]](s32) +  ; CHECK:   $vgpr2 = COPY [[UV2]](s32) +  ; CHECK:   $vgpr3 = COPY [[LOAD3]](s32) +  ; CHECK:   [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] +  ; CHECK:   S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 +  %load0 = load volatile float, float addrspace(3)* undef +  %load1 = load volatile float, float addrspace(3)* undef +  %load2 = load volatile float, float addrspace(3)* undef +  %load3 = load volatile i32, i32 addrspace(3)* undef + +  %insert.0 = insertelement <3 x float> undef, float %load0, i32 0 +  %insert.1 = insertelement <3 x float> %insert.0, float %load1, i32 1 +  %insert.2 = insertelement <3 x float> %insert.1, float %load2, i32 2 +  %insert.3 = insertvalue { <3 x float>, i32 } undef, <3 x float> %insert.2, 0 +  %insert.4 = insertvalue { <3 x float>, i32 } %insert.3, i32 %load3, 1 +  ret { <3 x float>, i32 } %insert.4 +} + +define void @void_func_sret_max_known_zero_bits(i8 addrspace(5)* sret %arg0) #0 { +  ; CHECK-LABEL: name: void_func_sret_max_known_zero_bits +  ; CHECK: bb.1 (%ir-block.0): +  ; CHECK:   liveins: $vgpr0, $sgpr30_sgpr31 +  ; CHECK:   [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 +  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 +  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 +  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 +  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 +  ; CHECK:   [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF +  ; CHECK:   [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p5) +  ; CHECK:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C]](s32) +  ; CHECK:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C1]](s32) +  ; CHECK:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C2]](s32) +  ; CHECK:   G_STORE [[LSHR]](s32), [[DEF]](p3) :: (volatile store 4 into `i32 addrspace(3)* undef`, addrspace 3) +  ; CHECK:   G_STORE [[LSHR1]](s32), [[DEF]](p3) :: (volatile store 4 into `i32 addrspace(3)* undef`, addrspace 3) +  ; CHECK:   G_STORE [[LSHR2]](s32), [[DEF]](p3) :: (volatile store 4 into `i32 addrspace(3)* undef`, addrspace 3) +  ; CHECK:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] +  ; CHECK:   S_SETPC_B64_return [[COPY2]] +  %arg0.int = ptrtoint i8 addrspace(5)* %arg0 to i32 + +  %lshr0 = lshr i32 %arg0.int, 16 +  %lshr1 = lshr i32 %arg0.int, 17 +  %lshr2 = lshr i32 %arg0.int, 18 + +  store volatile i32 %lshr0, i32 addrspace(3)* undef +  store volatile i32 %lshr1, i32 addrspace(3)* undef +  store volatile i32 %lshr2, i32 addrspace(3)* undef +  ret void +} + +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll index 7b36a46ca0d..5f90340fbd6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll @@ -56,7 +56,7 @@ define amdgpu_vs void @test_order(float inreg %arg0, float inreg %arg1, float %a  ; CHECK: [[S1:%[0-9]+]]:_(s32) = COPY $sgpr3  ; CHECK: $sgpr0 = COPY [[S0]]  ; CHECK: $sgpr1 = COPY [[S1]] -; CHECK: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 +; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1  define amdgpu_vs <{ i32, i32 }> @ret_struct(i32 inreg %arg0, i32 inreg %arg1) {  main_body:    %tmp0 = insertvalue <{ i32, i32 }> undef, i32 %arg0, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll index dcee8eaf799..b4bdb22153d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll @@ -4,12 +4,14 @@  define void @void_func_i1(i1 %arg0) #0 {    ; CHECK-LABEL: name: void_func_i1    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0 +  ; CHECK:   liveins: $vgpr0, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) +  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (store 1 into `i1 addrspace(1)* undef`, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] +  ; CHECK:   S_SETPC_B64_return [[COPY2]]    store i1 %arg0, i1 addrspace(1)* undef    ret void  } @@ -17,15 +19,17 @@ define void @void_func_i1(i1 %arg0) #0 {  define void @void_func_i1_zeroext(i1 zeroext %arg0) #0 {    ; CHECK-LABEL: name: void_func_i1_zeroext    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0 +  ; CHECK:   liveins: $vgpr0, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) +  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s1)    ; CHECK:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[C]]    ; CHECK:   G_STORE [[ADD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] +  ; CHECK:   S_SETPC_B64_return [[COPY2]]    %ext = zext i1 %arg0 to i32    %add = add i32 %ext, 12    store i32 %add, i32 addrspace(1)* undef @@ -35,15 +39,17 @@ define void @void_func_i1_zeroext(i1 zeroext %arg0) #0 {  define void @void_func_i1_signext(i1 signext %arg0) #0 {    ; CHECK-LABEL: name: void_func_i1_signext    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0 +  ; CHECK:   liveins: $vgpr0, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) +  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s1)    ; CHECK:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C]]    ; CHECK:   G_STORE [[ADD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] +  ; CHECK:   S_SETPC_B64_return [[COPY2]]    %ext = sext i1 %arg0 to i32    %add = add i32 %ext, 12    store i32 %add, i32 addrspace(1)* undef @@ -54,9 +60,10 @@ define void @i1_arg_i1_use(i1 %arg) #0 {    ; CHECK-LABEL: name: i1_arg_i1_use    ; CHECK: bb.1.bb:    ; CHECK:   successors: %bb.2(0x40000000), %bb.3(0x40000000) -  ; CHECK:   liveins: $vgpr0 +  ; CHECK:   liveins: $vgpr0, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) +  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true    ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF @@ -69,7 +76,8 @@ define void @i1_arg_i1_use(i1 %arg) #0 {    ; CHECK:   G_STORE [[C1]](s32), [[DEF]](p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)    ; CHECK: bb.3.bb2:    ; CHECK:   G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT1]](s64) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] +  ; CHECK:   S_SETPC_B64_return [[COPY2]]  bb:    br i1 %arg, label %bb2, label %bb1 @@ -84,12 +92,14 @@ bb2:  define void @void_func_i8(i8 %arg0) #0 {    ; CHECK-LABEL: name: void_func_i8    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0 +  ; CHECK:   liveins: $vgpr0, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) +  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   G_STORE [[TRUNC]](s8), [[DEF]](p1) :: (store 1 into `i8 addrspace(1)* undef`, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] +  ; CHECK:   S_SETPC_B64_return [[COPY2]]    store i8 %arg0, i8 addrspace(1)* undef    ret void  } @@ -97,15 +107,17 @@ define void @void_func_i8(i8 %arg0) #0 {  define void @void_func_i8_zeroext(i8 zeroext %arg0) #0 {    ; CHECK-LABEL: name: void_func_i8_zeroext    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0 +  ; CHECK:   liveins: $vgpr0, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) +  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s8)    ; CHECK:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[C]]    ; CHECK:   G_STORE [[ADD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] +  ; CHECK:   S_SETPC_B64_return [[COPY2]]    %ext = zext i8 %arg0 to i32    %add = add i32 %ext, 12    store i32 %add, i32 addrspace(1)* undef @@ -115,15 +127,17 @@ define void @void_func_i8_zeroext(i8 zeroext %arg0) #0 {  define void @void_func_i8_signext(i8 signext %arg0) #0 {    ; CHECK-LABEL: name: void_func_i8_signext    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0 +  ; CHECK:   liveins: $vgpr0, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) +  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s8)    ; CHECK:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C]]    ; CHECK:   G_STORE [[ADD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] +  ; CHECK:   S_SETPC_B64_return [[COPY2]]    %ext = sext i8 %arg0 to i32    %add = add i32 %ext, 12    store i32 %add, i32 addrspace(1)* undef @@ -133,12 +147,14 @@ define void @void_func_i8_signext(i8 signext %arg0) #0 {  define void @void_func_i16(i16 %arg0) #0 {    ; CHECK-LABEL: name: void_func_i16    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0 +  ; CHECK:   liveins: $vgpr0, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) +  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (store 2 into `i16 addrspace(1)* undef`, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] +  ; CHECK:   S_SETPC_B64_return [[COPY2]]    store i16 %arg0, i16 addrspace(1)* undef    ret void  } @@ -146,15 +162,17 @@ define void @void_func_i16(i16 %arg0) #0 {  define void @void_func_i16_zeroext(i16 zeroext %arg0) #0 {    ; CHECK-LABEL: name: void_func_i16_zeroext    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0 +  ; CHECK:   liveins: $vgpr0, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) +  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s16)    ; CHECK:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[C]]    ; CHECK:   G_STORE [[ADD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] +  ; CHECK:   S_SETPC_B64_return [[COPY2]]    %ext = zext i16 %arg0 to i32    %add = add i32 %ext, 12    store i32 %add, i32 addrspace(1)* undef @@ -164,15 +182,17 @@ define void @void_func_i16_zeroext(i16 zeroext %arg0) #0 {  define void @void_func_i16_signext(i16 signext %arg0) #0 {    ; CHECK-LABEL: name: void_func_i16_signext    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0 +  ; CHECK:   liveins: $vgpr0, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) +  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16)    ; CHECK:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C]]    ; CHECK:   G_STORE [[ADD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] +  ; CHECK:   S_SETPC_B64_return [[COPY2]]    %ext = sext i16 %arg0 to i32    %add = add i32 %ext, 12    store i32 %add, i32 addrspace(1)* undef @@ -182,11 +202,13 @@ define void @void_func_i16_signext(i16 signext %arg0) #0 {  define void @void_func_i32(i32 %arg0) #0 {    ; CHECK-LABEL: name: void_func_i32    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0 +  ; CHECK:   liveins: $vgpr0, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 +  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   G_STORE [[COPY]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] +  ; CHECK:   S_SETPC_B64_return [[COPY2]]    store i32 %arg0, i32 addrspace(1)* undef    ret void  } @@ -194,13 +216,15 @@ define void @void_func_i32(i32 %arg0) #0 {  define void @void_func_i64(i64 %arg0) #0 {    ; CHECK-LABEL: name: void_func_i64    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0, $vgpr1 +  ; CHECK:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 +  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   G_STORE [[MV]](s64), [[DEF]](p1) :: (store 8 into `i64 addrspace(1)* undef`, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] +  ; CHECK:   S_SETPC_B64_return [[COPY3]]    store i64 %arg0, i64 addrspace(1)* undef    ret void  } @@ -208,12 +232,14 @@ define void @void_func_i64(i64 %arg0) #0 {  define void @void_func_f16(half %arg0) #0 {    ; CHECK-LABEL: name: void_func_f16    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0 +  ; CHECK:   liveins: $vgpr0, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) +  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (store 2 into `half addrspace(1)* undef`, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] +  ; CHECK:   S_SETPC_B64_return [[COPY2]]    store half %arg0, half addrspace(1)* undef    ret void  } @@ -221,11 +247,13 @@ define void @void_func_f16(half %arg0) #0 {  define void @void_func_f32(float %arg0) #0 {    ; CHECK-LABEL: name: void_func_f32    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0 +  ; CHECK:   liveins: $vgpr0, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 +  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   G_STORE [[COPY]](s32), [[DEF]](p1) :: (store 4 into `float addrspace(1)* undef`, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] +  ; CHECK:   S_SETPC_B64_return [[COPY2]]    store float %arg0, float addrspace(1)* undef    ret void  } @@ -233,13 +261,15 @@ define void @void_func_f32(float %arg0) #0 {  define void @void_func_f64(double %arg0) #0 {    ; CHECK-LABEL: name: void_func_f64    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0, $vgpr1 +  ; CHECK:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 +  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   G_STORE [[MV]](s64), [[DEF]](p1) :: (store 8 into `double addrspace(1)* undef`, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] +  ; CHECK:   S_SETPC_B64_return [[COPY3]]    store double %arg0, double addrspace(1)* undef    ret void  } @@ -247,13 +277,15 @@ define void @void_func_f64(double %arg0) #0 {  define void @void_func_v2i32(<2 x i32> %arg0) #0 {    ; CHECK-LABEL: name: void_func_v2i32    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0, $vgpr1 +  ; CHECK:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 +  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32)    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (store 8 into `<2 x i32> addrspace(1)* undef`, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] +  ; CHECK:   S_SETPC_B64_return [[COPY3]]    store <2 x i32> %arg0, <2 x i32> addrspace(1)* undef    ret void  } @@ -261,14 +293,16 @@ define void @void_func_v2i32(<2 x i32> %arg0) #0 {  define void @void_func_v3i32(<3 x i32> %arg0) #0 {    ; CHECK-LABEL: name: void_func_v3i32    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2 +  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1    ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 +  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (store 12 into `<3 x i32> addrspace(1)* undef`, align 16, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] +  ; CHECK:   S_SETPC_B64_return [[COPY4]]    store <3 x i32> %arg0, <3 x i32> addrspace(1)* undef    ret void  } @@ -276,15 +310,17 @@ define void @void_func_v3i32(<3 x i32> %arg0) #0 {  define void @void_func_v4i32(<4 x i32> %arg0) #0 {    ; CHECK-LABEL: name: void_func_v4i32    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 +  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1    ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2    ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 +  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x i32> addrspace(1)* undef`, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] +  ; CHECK:   S_SETPC_B64_return [[COPY5]]    store <4 x i32> %arg0, <4 x i32> addrspace(1)* undef    ret void  } @@ -292,16 +328,18 @@ define void @void_func_v4i32(<4 x i32> %arg0) #0 {  define void @void_func_v5i32(<5 x i32> %arg0) #0 {    ; CHECK-LABEL: name: void_func_v5i32    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 +  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1    ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2    ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3    ; CHECK:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 +  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (store 20 into `<5 x i32> addrspace(1)* undef`, align 32, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY5]] +  ; CHECK:   S_SETPC_B64_return [[COPY6]]    store <5 x i32> %arg0, <5 x i32> addrspace(1)* undef    ret void  } @@ -309,7 +347,7 @@ define void @void_func_v5i32(<5 x i32> %arg0) #0 {  define void @void_func_v8i32(<8 x i32> %arg0) #0 {    ; CHECK-LABEL: name: void_func_v8i32    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 +  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1    ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -318,10 +356,12 @@ define void @void_func_v8i32(<8 x i32> %arg0) #0 {    ; CHECK:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5    ; CHECK:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6    ; CHECK:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 +  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (store 32 into `<8 x i32> addrspace(1)* undef`, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] +  ; CHECK:   S_SETPC_B64_return [[COPY9]]    store <8 x i32> %arg0, <8 x i32> addrspace(1)* undef    ret void  } @@ -329,7 +369,7 @@ define void @void_func_v8i32(<8 x i32> %arg0) #0 {  define void @void_func_v16i32(<16 x i32> %arg0) #0 {    ; CHECK-LABEL: name: void_func_v16i32    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 +  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1    ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -346,10 +386,12 @@ define void @void_func_v16i32(<16 x i32> %arg0) #0 {    ; CHECK:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13    ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14    ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 +  ; CHECK:   [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32)    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (store 64 into `<16 x i32> addrspace(1)* undef`, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]] +  ; CHECK:   S_SETPC_B64_return [[COPY17]]    store <16 x i32> %arg0, <16 x i32> addrspace(1)* undef    ret void  } @@ -357,7 +399,7 @@ define void @void_func_v16i32(<16 x i32> %arg0) #0 {  define void @void_func_v32i32(<32 x i32> %arg0) #0 {    ; CHECK-LABEL: name: void_func_v32i32    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 +  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1    ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -390,10 +432,12 @@ define void @void_func_v32i32(<32 x i32> %arg0) #0 {    ; CHECK:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29    ; CHECK:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30    ; CHECK:   [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 +  ; CHECK:   [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32)    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (store 128 into `<32 x i32> addrspace(1)* undef`, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY33:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] +  ; CHECK:   S_SETPC_B64_return [[COPY33]]    store <32 x i32> %arg0, <32 x i32> addrspace(1)* undef    ret void  } @@ -402,7 +446,7 @@ define void @void_func_v32i32(<32 x i32> %arg0) #0 {  define void @void_func_v33i32(<33 x i32> %arg0) #0 {    ; CHECK-LABEL: name: void_func_v33i32    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 +  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1    ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -437,10 +481,12 @@ define void @void_func_v33i32(<33 x i32> %arg0) #0 {    ; CHECK:   [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31    ; CHECK:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0    ; CHECK:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.0, align 1, addrspace 5) +  ; CHECK:   [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<33 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[LOAD]](s32)    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   G_STORE [[BUILD_VECTOR]](<33 x s32>), [[DEF]](p1) :: (store 132 into `<33 x i32> addrspace(1)* undef`, align 256, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY33:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] +  ; CHECK:   S_SETPC_B64_return [[COPY33]]    store <33 x i32> %arg0, <33 x i32> addrspace(1)* undef    ret void  } @@ -448,17 +494,19 @@ define void @void_func_v33i32(<33 x i32> %arg0) #0 {  define void @void_func_v2i64(<2 x i64> %arg0) #0 {    ; CHECK-LABEL: name: void_func_v2i64    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 +  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1    ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2    ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 +  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)    ; CHECK:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)    ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (store 16 into `<2 x i64> addrspace(1)* undef`, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] +  ; CHECK:   S_SETPC_B64_return [[COPY5]]    store <2 x i64> %arg0, <2 x i64> addrspace(1)* undef    ret void  } @@ -466,20 +514,22 @@ define void @void_func_v2i64(<2 x i64> %arg0) #0 {  define void @void_func_v3i64(<3 x i64> %arg0) #0 {    ; CHECK-LABEL: name: void_func_v3i64    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 +  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1    ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2    ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3    ; CHECK:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4    ; CHECK:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 +  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)    ; CHECK:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)    ; CHECK:   [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)    ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   G_STORE [[BUILD_VECTOR]](<3 x s64>), [[DEF]](p1) :: (store 24 into `<3 x i64> addrspace(1)* undef`, align 32, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] +  ; CHECK:   S_SETPC_B64_return [[COPY7]]    store <3 x i64> %arg0, <3 x i64> addrspace(1)* undef    ret void  } @@ -487,7 +537,7 @@ define void @void_func_v3i64(<3 x i64> %arg0) #0 {  define void @void_func_v4i64(<4 x i64> %arg0) #0 {    ; CHECK-LABEL: name: void_func_v4i64    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 +  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1    ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -496,6 +546,7 @@ define void @void_func_v4i64(<4 x i64> %arg0) #0 {    ; CHECK:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5    ; CHECK:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6    ; CHECK:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 +  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)    ; CHECK:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)    ; CHECK:   [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) @@ -503,7 +554,8 @@ define void @void_func_v4i64(<4 x i64> %arg0) #0 {    ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64)    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   G_STORE [[BUILD_VECTOR]](<4 x s64>), [[DEF]](p1) :: (store 32 into `<4 x i64> addrspace(1)* undef`, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] +  ; CHECK:   S_SETPC_B64_return [[COPY9]]    store <4 x i64> %arg0, <4 x i64> addrspace(1)* undef    ret void  } @@ -511,7 +563,7 @@ define void @void_func_v4i64(<4 x i64> %arg0) #0 {  define void @void_func_v5i64(<5 x i64> %arg0) #0 {    ; CHECK-LABEL: name: void_func_v5i64    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9 +  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1    ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -522,6 +574,7 @@ define void @void_func_v5i64(<5 x i64> %arg0) #0 {    ; CHECK:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7    ; CHECK:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8    ; CHECK:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 +  ; CHECK:   [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)    ; CHECK:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)    ; CHECK:   [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) @@ -530,7 +583,8 @@ define void @void_func_v5i64(<5 x i64> %arg0) #0 {    ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64)    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   G_STORE [[BUILD_VECTOR]](<5 x s64>), [[DEF]](p1) :: (store 40 into `<5 x i64> addrspace(1)* undef`, align 64, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY10]] +  ; CHECK:   S_SETPC_B64_return [[COPY11]]    store <5 x i64> %arg0, <5 x i64> addrspace(1)* undef    ret void  } @@ -538,7 +592,7 @@ define void @void_func_v5i64(<5 x i64> %arg0) #0 {  define void @void_func_v8i64(<8 x i64> %arg0) #0 {    ; CHECK-LABEL: name: void_func_v8i64    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 +  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1    ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -555,6 +609,7 @@ define void @void_func_v8i64(<8 x i64> %arg0) #0 {    ; CHECK:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13    ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14    ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 +  ; CHECK:   [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)    ; CHECK:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)    ; CHECK:   [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) @@ -566,7 +621,8 @@ define void @void_func_v8i64(<8 x i64> %arg0) #0 {    ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64)    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   G_STORE [[BUILD_VECTOR]](<8 x s64>), [[DEF]](p1) :: (store 64 into `<8 x i64> addrspace(1)* undef`, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]] +  ; CHECK:   S_SETPC_B64_return [[COPY17]]    store <8 x i64> %arg0, <8 x i64> addrspace(1)* undef    ret void  } @@ -574,7 +630,7 @@ define void @void_func_v8i64(<8 x i64> %arg0) #0 {  define void @void_func_v16i64(<16 x i64> %arg0) #0 {    ; CHECK-LABEL: name: void_func_v16i64    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 +  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1    ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -607,6 +663,7 @@ define void @void_func_v16i64(<16 x i64> %arg0) #0 {    ; CHECK:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29    ; CHECK:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30    ; CHECK:   [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 +  ; CHECK:   [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)    ; CHECK:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)    ; CHECK:   [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) @@ -626,7 +683,8 @@ define void @void_func_v16i64(<16 x i64> %arg0) #0 {    ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64), [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64), [[MV12]](s64), [[MV13]](s64), [[MV14]](s64), [[MV15]](s64)    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   G_STORE [[BUILD_VECTOR]](<16 x s64>), [[DEF]](p1) :: (store 128 into `<16 x i64> addrspace(1)* undef`, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY33:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] +  ; CHECK:   S_SETPC_B64_return [[COPY33]]    store <16 x i64> %arg0, <16 x i64> addrspace(1)* undef    ret void  } @@ -634,14 +692,16 @@ define void @void_func_v16i64(<16 x i64> %arg0) #0 {  define void @void_func_v2i16(<2 x i16> %arg0) #0 {    ; CHECK-LABEL: name: void_func_v2i16    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0, $vgpr1 +  ; CHECK:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 +  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32)    ; CHECK:   [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>)    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   G_STORE [[TRUNC]](<2 x s16>), [[DEF]](p1) :: (store 4 into `<2 x i16> addrspace(1)* undef`, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] +  ; CHECK:   S_SETPC_B64_return [[COPY3]]    store <2 x i16> %arg0, <2 x i16> addrspace(1)* undef    ret void  } @@ -649,15 +709,17 @@ define void @void_func_v2i16(<2 x i16> %arg0) #0 {  define void @void_func_v3i16(<3 x i16> %arg0) #0 {    ; CHECK-LABEL: name: void_func_v3i16    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2 +  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1    ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 +  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)    ; CHECK:   [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>)    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   G_STORE [[TRUNC]](<3 x s16>), [[DEF]](p1) :: (store 6 into `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] +  ; CHECK:   S_SETPC_B64_return [[COPY4]]    store <3 x i16> %arg0, <3 x i16> addrspace(1)* undef    ret void  } @@ -665,16 +727,18 @@ define void @void_func_v3i16(<3 x i16> %arg0) #0 {  define void @void_func_v4i16(<4 x i16> %arg0) #0 {    ; CHECK-LABEL: name: void_func_v4i16    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 +  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1    ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2    ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 +  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)    ; CHECK:   [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   G_STORE [[TRUNC]](<4 x s16>), [[DEF]](p1) :: (store 8 into `<4 x i16> addrspace(1)* undef`, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] +  ; CHECK:   S_SETPC_B64_return [[COPY5]]    store <4 x i16> %arg0, <4 x i16> addrspace(1)* undef    ret void  } @@ -682,17 +746,19 @@ define void @void_func_v4i16(<4 x i16> %arg0) #0 {  define void @void_func_v5i16(<5 x i16> %arg0) #0 {    ; CHECK-LABEL: name: void_func_v5i16    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 +  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1    ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2    ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3    ; CHECK:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 +  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)    ; CHECK:   [[TRUNC:%[0-9]+]]:_(<5 x s16>) = G_TRUNC [[BUILD_VECTOR]](<5 x s32>)    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   G_STORE [[TRUNC]](<5 x s16>), [[DEF]](p1) :: (store 10 into `<5 x i16> addrspace(1)* undef`, align 16, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY5]] +  ; CHECK:   S_SETPC_B64_return [[COPY6]]    store <5 x i16> %arg0, <5 x i16> addrspace(1)* undef    ret void  } @@ -700,7 +766,7 @@ define void @void_func_v5i16(<5 x i16> %arg0) #0 {  define void @void_func_v8i16(<8 x i16> %arg0) #0 {    ; CHECK-LABEL: name: void_func_v8i16    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 +  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1    ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -709,11 +775,13 @@ define void @void_func_v8i16(<8 x i16> %arg0) #0 {    ; CHECK:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5    ; CHECK:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6    ; CHECK:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 +  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)    ; CHECK:   [[TRUNC:%[0-9]+]]:_(<8 x s16>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>)    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   G_STORE [[TRUNC]](<8 x s16>), [[DEF]](p1) :: (store 16 into `<8 x i16> addrspace(1)* undef`, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] +  ; CHECK:   S_SETPC_B64_return [[COPY9]]    store <8 x i16> %arg0, <8 x i16> addrspace(1)* undef    ret void  } @@ -721,7 +789,7 @@ define void @void_func_v8i16(<8 x i16> %arg0) #0 {  define void @void_func_v16i16(<16 x i16> %arg0) #0 {    ; CHECK-LABEL: name: void_func_v16i16    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 +  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1    ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -738,11 +806,13 @@ define void @void_func_v16i16(<16 x i16> %arg0) #0 {    ; CHECK:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13    ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14    ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 +  ; CHECK:   [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32)    ; CHECK:   [[TRUNC:%[0-9]+]]:_(<16 x s16>) = G_TRUNC [[BUILD_VECTOR]](<16 x s32>)    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   G_STORE [[TRUNC]](<16 x s16>), [[DEF]](p1) :: (store 32 into `<16 x i16> addrspace(1)* undef`, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]] +  ; CHECK:   S_SETPC_B64_return [[COPY17]]    store <16 x i16> %arg0, <16 x i16> addrspace(1)* undef    ret void  } @@ -750,13 +820,15 @@ define void @void_func_v16i16(<16 x i16> %arg0) #0 {  define void @void_func_v2f32(<2 x float> %arg0) #0 {    ; CHECK-LABEL: name: void_func_v2f32    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0, $vgpr1 +  ; CHECK:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 +  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32)    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (store 8 into `<2 x float> addrspace(1)* undef`, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] +  ; CHECK:   S_SETPC_B64_return [[COPY3]]    store <2 x float> %arg0, <2 x float> addrspace(1)* undef    ret void  } @@ -764,14 +836,16 @@ define void @void_func_v2f32(<2 x float> %arg0) #0 {  define void @void_func_v3f32(<3 x float> %arg0) #0 {    ; CHECK-LABEL: name: void_func_v3f32    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2 +  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1    ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 +  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (store 12 into `<3 x float> addrspace(1)* undef`, align 16, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] +  ; CHECK:   S_SETPC_B64_return [[COPY4]]    store <3 x float> %arg0, <3 x float> addrspace(1)* undef    ret void  } @@ -779,15 +853,17 @@ define void @void_func_v3f32(<3 x float> %arg0) #0 {  define void @void_func_v4f32(<4 x float> %arg0) #0 {    ; CHECK-LABEL: name: void_func_v4f32    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 +  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1    ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2    ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 +  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] +  ; CHECK:   S_SETPC_B64_return [[COPY5]]    store <4 x float> %arg0, <4 x float> addrspace(1)* undef    ret void  } @@ -795,7 +871,7 @@ define void @void_func_v4f32(<4 x float> %arg0) #0 {  define void @void_func_v8f32(<8 x float> %arg0) #0 {    ; CHECK-LABEL: name: void_func_v8f32    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 +  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1    ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -804,10 +880,12 @@ define void @void_func_v8f32(<8 x float> %arg0) #0 {    ; CHECK:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5    ; CHECK:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6    ; CHECK:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 +  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (store 32 into `<8 x float> addrspace(1)* undef`, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] +  ; CHECK:   S_SETPC_B64_return [[COPY9]]    store <8 x float> %arg0, <8 x float> addrspace(1)* undef    ret void  } @@ -815,7 +893,7 @@ define void @void_func_v8f32(<8 x float> %arg0) #0 {  define void @void_func_v16f32(<16 x float> %arg0) #0 {    ; CHECK-LABEL: name: void_func_v16f32    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 +  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1    ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -832,10 +910,12 @@ define void @void_func_v16f32(<16 x float> %arg0) #0 {    ; CHECK:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13    ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14    ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 +  ; CHECK:   [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32)    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (store 64 into `<16 x float> addrspace(1)* undef`, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]] +  ; CHECK:   S_SETPC_B64_return [[COPY17]]    store <16 x float> %arg0, <16 x float> addrspace(1)* undef    ret void  } @@ -843,17 +923,19 @@ define void @void_func_v16f32(<16 x float> %arg0) #0 {  define void @void_func_v2f64(<2 x double> %arg0) #0 {    ; CHECK-LABEL: name: void_func_v2f64    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 +  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1    ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2    ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 +  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)    ; CHECK:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)    ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (store 16 into `<2 x double> addrspace(1)* undef`, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] +  ; CHECK:   S_SETPC_B64_return [[COPY5]]    store <2 x double> %arg0, <2 x double> addrspace(1)* undef    ret void  } @@ -861,20 +943,22 @@ define void @void_func_v2f64(<2 x double> %arg0) #0 {  define void @void_func_v3f64(<3 x double> %arg0) #0 {    ; CHECK-LABEL: name: void_func_v3f64    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 +  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1    ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2    ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3    ; CHECK:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4    ; CHECK:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 +  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)    ; CHECK:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)    ; CHECK:   [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)    ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   G_STORE [[BUILD_VECTOR]](<3 x s64>), [[DEF]](p1) :: (store 24 into `<3 x double> addrspace(1)* undef`, align 32, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] +  ; CHECK:   S_SETPC_B64_return [[COPY7]]    store <3 x double> %arg0, <3 x double> addrspace(1)* undef    ret void  } @@ -882,7 +966,7 @@ define void @void_func_v3f64(<3 x double> %arg0) #0 {  define void @void_func_v4f64(<4 x double> %arg0) #0 {    ; CHECK-LABEL: name: void_func_v4f64    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 +  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1    ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -891,6 +975,7 @@ define void @void_func_v4f64(<4 x double> %arg0) #0 {    ; CHECK:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5    ; CHECK:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6    ; CHECK:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 +  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)    ; CHECK:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)    ; CHECK:   [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) @@ -898,7 +983,8 @@ define void @void_func_v4f64(<4 x double> %arg0) #0 {    ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64)    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   G_STORE [[BUILD_VECTOR]](<4 x s64>), [[DEF]](p1) :: (store 32 into `<4 x double> addrspace(1)* undef`, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] +  ; CHECK:   S_SETPC_B64_return [[COPY9]]    store <4 x double> %arg0, <4 x double> addrspace(1)* undef    ret void  } @@ -906,7 +992,7 @@ define void @void_func_v4f64(<4 x double> %arg0) #0 {  define void @void_func_v8f64(<8 x double> %arg0) #0 {    ; CHECK-LABEL: name: void_func_v8f64    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 +  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1    ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -923,6 +1009,7 @@ define void @void_func_v8f64(<8 x double> %arg0) #0 {    ; CHECK:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13    ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14    ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 +  ; CHECK:   [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)    ; CHECK:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)    ; CHECK:   [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) @@ -934,7 +1021,8 @@ define void @void_func_v8f64(<8 x double> %arg0) #0 {    ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64)    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   G_STORE [[BUILD_VECTOR]](<8 x s64>), [[DEF]](p1) :: (store 64 into `<8 x double> addrspace(1)* undef`, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]] +  ; CHECK:   S_SETPC_B64_return [[COPY17]]    store <8 x double> %arg0, <8 x double> addrspace(1)* undef    ret void  } @@ -942,7 +1030,7 @@ define void @void_func_v8f64(<8 x double> %arg0) #0 {  define void @void_func_v16f64(<16 x double> %arg0) #0 {    ; CHECK-LABEL: name: void_func_v16f64    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 +  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1    ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -975,6 +1063,7 @@ define void @void_func_v16f64(<16 x double> %arg0) #0 {    ; CHECK:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29    ; CHECK:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30    ; CHECK:   [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 +  ; CHECK:   [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)    ; CHECK:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)    ; CHECK:   [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) @@ -994,7 +1083,8 @@ define void @void_func_v16f64(<16 x double> %arg0) #0 {    ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64), [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64), [[MV12]](s64), [[MV13]](s64), [[MV14]](s64), [[MV15]](s64)    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   G_STORE [[BUILD_VECTOR]](<16 x s64>), [[DEF]](p1) :: (store 128 into `<16 x double> addrspace(1)* undef`, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY33:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] +  ; CHECK:   S_SETPC_B64_return [[COPY33]]    store <16 x double> %arg0, <16 x double> addrspace(1)* undef    ret void  } @@ -1002,14 +1092,16 @@ define void @void_func_v16f64(<16 x double> %arg0) #0 {  define void @void_func_v2f16(<2 x half> %arg0) #0 {    ; CHECK-LABEL: name: void_func_v2f16    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0, $vgpr1 +  ; CHECK:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 +  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32)    ; CHECK:   [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>)    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   G_STORE [[TRUNC]](<2 x s16>), [[DEF]](p1) :: (store 4 into `<2 x half> addrspace(1)* undef`, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] +  ; CHECK:   S_SETPC_B64_return [[COPY3]]    store <2 x half> %arg0, <2 x half> addrspace(1)* undef    ret void  } @@ -1017,15 +1109,17 @@ define void @void_func_v2f16(<2 x half> %arg0) #0 {  define void @void_func_v3f16(<3 x half> %arg0) #0 {    ; CHECK-LABEL: name: void_func_v3f16    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2 +  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1    ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 +  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)    ; CHECK:   [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>)    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   G_STORE [[TRUNC]](<3 x s16>), [[DEF]](p1) :: (store 6 into `<3 x half> addrspace(1)* undef`, align 8, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] +  ; CHECK:   S_SETPC_B64_return [[COPY4]]    store <3 x half> %arg0, <3 x half> addrspace(1)* undef    ret void  } @@ -1033,16 +1127,18 @@ define void @void_func_v3f16(<3 x half> %arg0) #0 {  define void @void_func_v4f16(<4 x half> %arg0) #0 {    ; CHECK-LABEL: name: void_func_v4f16    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 +  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1    ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2    ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 +  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)    ; CHECK:   [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   G_STORE [[TRUNC]](<4 x s16>), [[DEF]](p1) :: (store 8 into `<4 x half> addrspace(1)* undef`, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] +  ; CHECK:   S_SETPC_B64_return [[COPY5]]    store <4 x half> %arg0, <4 x half> addrspace(1)* undef    ret void  } @@ -1050,7 +1146,7 @@ define void @void_func_v4f16(<4 x half> %arg0) #0 {  define void @void_func_v8f16(<8 x half> %arg0) #0 {    ; CHECK-LABEL: name: void_func_v8f16    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 +  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1    ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -1059,11 +1155,13 @@ define void @void_func_v8f16(<8 x half> %arg0) #0 {    ; CHECK:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5    ; CHECK:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6    ; CHECK:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 +  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)    ; CHECK:   [[TRUNC:%[0-9]+]]:_(<8 x s16>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>)    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   G_STORE [[TRUNC]](<8 x s16>), [[DEF]](p1) :: (store 16 into `<8 x half> addrspace(1)* undef`, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] +  ; CHECK:   S_SETPC_B64_return [[COPY9]]    store <8 x half> %arg0, <8 x half> addrspace(1)* undef    ret void  } @@ -1071,7 +1169,7 @@ define void @void_func_v8f16(<8 x half> %arg0) #0 {  define void @void_func_v16f16(<16 x half> %arg0) #0 {    ; CHECK-LABEL: name: void_func_v16f16    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 +  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1    ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -1088,11 +1186,13 @@ define void @void_func_v16f16(<16 x half> %arg0) #0 {    ; CHECK:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13    ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14    ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 +  ; CHECK:   [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32)    ; CHECK:   [[TRUNC:%[0-9]+]]:_(<16 x s16>) = G_TRUNC [[BUILD_VECTOR]](<16 x s32>)    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   G_STORE [[TRUNC]](<16 x s16>), [[DEF]](p1) :: (store 32 into `<16 x half> addrspace(1)* undef`, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]] +  ; CHECK:   S_SETPC_B64_return [[COPY17]]    store <16 x half> %arg0, <16 x half> addrspace(1)* undef    ret void  } @@ -1101,18 +1201,20 @@ define void @void_func_v16f16(<16 x half> %arg0) #0 {  define void @void_func_i32_i64_i32(i32 %arg0, i64 %arg1, i32 %arg2) #0 {    ; CHECK-LABEL: name: void_func_i32_i64_i32    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 +  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1    ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2    ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 +  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   G_STORE [[COPY]](s32), [[DEF]](p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)    ; CHECK:   G_STORE [[MV]](s64), [[DEF1]](p1) :: (volatile store 8 into `i64 addrspace(1)* undef`, addrspace 1)    ; CHECK:   G_STORE [[COPY3]](s32), [[DEF]](p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] +  ; CHECK:   S_SETPC_B64_return [[COPY5]]    store volatile i32 %arg0, i32 addrspace(1)* undef    store volatile i64 %arg1, i64 addrspace(1)* undef    store volatile i32 %arg2, i32 addrspace(1)* undef @@ -1122,11 +1224,13 @@ define void @void_func_i32_i64_i32(i32 %arg0, i64 %arg1, i32 %arg2) #0 {  define void @void_func_struct_i32({ i32 } %arg0) #0 {    ; CHECK-LABEL: name: void_func_struct_i32    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0 +  ; CHECK:   liveins: $vgpr0, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 +  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   G_STORE [[COPY]](s32), [[DEF]](p1) :: (store 4 into `{ i32 } addrspace(1)* undef`, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] +  ; CHECK:   S_SETPC_B64_return [[COPY2]]    store { i32 } %arg0, { i32 } addrspace(1)* undef    ret void  } @@ -1134,16 +1238,18 @@ define void @void_func_struct_i32({ i32 } %arg0) #0 {  define void @void_func_struct_i8_i32({ i8, i32 } %arg0) #0 {    ; CHECK-LABEL: name: void_func_struct_i8_i32    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0, $vgpr1 +  ; CHECK:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)    ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 +  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   G_STORE [[TRUNC]](s8), [[DEF]](p1) :: (store 1 into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1)    ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4    ; CHECK:   [[GEP:%[0-9]+]]:_(p1) = G_GEP [[DEF]], [[C]](s64)    ; CHECK:   G_STORE [[COPY1]](s32), [[GEP]](p1) :: (store 4 into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] +  ; CHECK:   S_SETPC_B64_return [[COPY3]]    store { i8, i32 } %arg0, { i8, i32 } addrspace(1)* undef    ret void  } @@ -1151,8 +1257,10 @@ define void @void_func_struct_i8_i32({ i8, i32 } %arg0) #0 {  define void @void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* byval %arg0) #0 {    ; CHECK-LABEL: name: void_func_byval_struct_i8_i32    ; CHECK: bb.1 (%ir-block.0): +  ; CHECK:   liveins: $sgpr30_sgpr31    ; CHECK:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0    ; CHECK:   [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.0, align 1, addrspace 5) +  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p5) :: (load 1 from %ir.arg0, align 4, addrspace 5)    ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 @@ -1162,7 +1270,8 @@ define void @void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* byval %arg0    ; CHECK:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4    ; CHECK:   [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[DEF]], [[C1]](s64)    ; CHECK:   G_STORE [[LOAD2]](s32), [[GEP1]](p1) :: (store 4 into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] +  ; CHECK:   S_SETPC_B64_return [[COPY1]]    %arg0.load = load { i8, i32 }, { i8, i32 } addrspace(5)* %arg0    store { i8, i32 } %arg0.load, { i8, i32 } addrspace(1)* undef    ret void @@ -1171,12 +1280,13 @@ define void @void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* byval %arg0  define void @void_func_byval_struct_i8_i32_x2({ i8, i32 } addrspace(5)* byval %arg0, { i8, i32 } addrspace(5)* byval %arg1, i32 %arg2) #0 {    ; CHECK-LABEL: name: void_func_byval_struct_i8_i32_x2    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0 +  ; CHECK:   liveins: $vgpr0, $sgpr30_sgpr31    ; CHECK:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1    ; CHECK:   [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.1, align 1, addrspace 5)    ; CHECK:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0    ; CHECK:   [[LOAD1:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.0, align 1, addrspace 5)    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 +  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   [[DEF1:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF    ; CHECK:   [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p5) :: (volatile load 1 from %ir.arg0, align 4, addrspace 5) @@ -1194,7 +1304,8 @@ define void @void_func_byval_struct_i8_i32_x2({ i8, i32 } addrspace(5)* byval %a    ; CHECK:   [[GEP3:%[0-9]+]]:_(p1) = G_GEP [[DEF]], [[C1]](s64)    ; CHECK:   G_STORE [[LOAD5]](s32), [[GEP3]](p1) :: (volatile store 4 into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1)    ; CHECK:   G_STORE [[COPY]](s32), [[DEF1]](p3) :: (volatile store 4 into `i32 addrspace(3)* undef`, addrspace 3) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] +  ; CHECK:   S_SETPC_B64_return [[COPY2]]    %arg0.load = load volatile { i8, i32 }, { i8, i32 } addrspace(5)* %arg0    %arg1.load = load volatile { i8, i32 }, { i8, i32 } addrspace(5)* %arg1    store volatile { i8, i32 } %arg0.load, { i8, i32 } addrspace(1)* undef @@ -1206,17 +1317,20 @@ define void @void_func_byval_struct_i8_i32_x2({ i8, i32 } addrspace(5)* byval %a  define void @void_func_byval_i32_byval_i64(i32 addrspace(5)* byval %arg0, i64 addrspace(5)* byval %arg1) #0 {    ; CHECK-LABEL: name: void_func_byval_i32_byval_i64    ; CHECK: bb.1 (%ir-block.0): +  ; CHECK:   liveins: $sgpr30_sgpr31    ; CHECK:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1    ; CHECK:   [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.1, align 1, addrspace 5)    ; CHECK:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0    ; CHECK:   [[LOAD1:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.0, align 1, addrspace 5) +  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p5) :: (load 4 from %ir.arg0, addrspace 5)    ; CHECK:   [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[LOAD1]](p5) :: (load 8 from %ir.arg1, addrspace 5)    ; CHECK:   G_STORE [[LOAD2]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)    ; CHECK:   G_STORE [[LOAD3]](s64), [[DEF1]](p1) :: (store 8 into `i64 addrspace(1)* undef`, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] +  ; CHECK:   S_SETPC_B64_return [[COPY1]]    %arg0.load = load i32, i32 addrspace(5)* %arg0    %arg1.load = load i64, i64 addrspace(5)* %arg1    store i32 %arg0.load, i32 addrspace(1)* undef @@ -1227,7 +1341,7 @@ define void @void_func_byval_i32_byval_i64(i32 addrspace(5)* byval %arg0, i64 ad  define void @void_func_v32i32_i32_i64(<32 x i32> %arg0, i32 %arg1, i64 %arg2) #0 {    ; CHECK-LABEL: name: void_func_v32i32_i32_i64    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 +  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1    ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -1266,6 +1380,7 @@ define void @void_func_v32i32_i32_i64(<32 x i32> %arg0, i32 %arg1, i64 %arg2) #0    ; CHECK:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.1, align 1, addrspace 5)    ; CHECK:   [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0    ; CHECK:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.0, align 1, addrspace 5) +  ; CHECK:   [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32)    ; CHECK:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD1]](s32), [[LOAD2]](s32)    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF @@ -1274,7 +1389,8 @@ define void @void_func_v32i32_i32_i64(<32 x i32> %arg0, i32 %arg1, i64 %arg2) #0    ; CHECK:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store 128 into `<32 x i32> addrspace(1)* undef`, addrspace 1)    ; CHECK:   G_STORE [[LOAD]](s32), [[DEF1]](p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)    ; CHECK:   G_STORE [[MV]](s64), [[DEF2]](p1) :: (volatile store 8 into `i64 addrspace(1)* undef`, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY33:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] +  ; CHECK:   S_SETPC_B64_return [[COPY33]]    store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef    store volatile i32 %arg1, i32 addrspace(1)* undef    store volatile i64 %arg2, i64 addrspace(1)* undef @@ -1285,7 +1401,7 @@ define void @void_func_v32i32_i32_i64(<32 x i32> %arg0, i32 %arg1, i64 %arg2) #0  define void @void_func_v32i32_i1_i8_i16(<32 x i32> %arg0, i1 %arg1, i8 %arg2, i16 %arg3, half %arg4) #0 {    ; CHECK-LABEL: name: void_func_v32i32_i1_i8_i16    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 +  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1    ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -1326,6 +1442,7 @@ define void @void_func_v32i32_i1_i8_i16(<32 x i32> %arg0, i1 %arg1, i8 %arg2, i1    ; CHECK:   [[LOAD2:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 2 from %fixed-stack.1, align 1, addrspace 5)    ; CHECK:   [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0    ; CHECK:   [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load 2 from %fixed-stack.0, align 1, addrspace 5) +  ; CHECK:   [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32)    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF @@ -1337,7 +1454,8 @@ define void @void_func_v32i32_i1_i8_i16(<32 x i32> %arg0, i1 %arg1, i8 %arg2, i1    ; CHECK:   G_STORE [[LOAD1]](s8), [[DEF2]](p1) :: (volatile store 1 into `i8 addrspace(1)* undef`, addrspace 1)    ; CHECK:   G_STORE [[LOAD2]](s16), [[DEF3]](p1) :: (volatile store 2 into `i16 addrspace(1)* undef`, addrspace 1)    ; CHECK:   G_STORE [[LOAD3]](s16), [[DEF4]](p1) :: (volatile store 2 into `half addrspace(1)* undef`, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY33:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] +  ; CHECK:   S_SETPC_B64_return [[COPY33]]    store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef    store volatile i1 %arg1, i1 addrspace(1)* undef    store volatile i8 %arg2, i8 addrspace(1)* undef @@ -1349,7 +1467,7 @@ define void @void_func_v32i32_i1_i8_i16(<32 x i32> %arg0, i1 %arg1, i8 %arg2, i1  define void @void_func_v32i32_v2i32_v2f32(<32 x i32> %arg0, <2 x i32> %arg1, <2 x float> %arg2) #0 {    ; CHECK-LABEL: name: void_func_v32i32_v2i32_v2f32    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 +  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1    ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -1390,6 +1508,7 @@ define void @void_func_v32i32_v2i32_v2f32(<32 x i32> %arg0, <2 x i32> %arg1, <2    ; CHECK:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.1, align 1, addrspace 5)    ; CHECK:   [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0    ; CHECK:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load 4 from %fixed-stack.0, align 1, addrspace 5) +  ; CHECK:   [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32)    ; CHECK:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)    ; CHECK:   [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD2]](s32), [[LOAD3]](s32) @@ -1399,7 +1518,8 @@ define void @void_func_v32i32_v2i32_v2f32(<32 x i32> %arg0, <2 x i32> %arg1, <2    ; CHECK:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store 128 into `<32 x i32> addrspace(1)* undef`, addrspace 1)    ; CHECK:   G_STORE [[BUILD_VECTOR1]](<2 x s32>), [[DEF1]](p1) :: (volatile store 8 into `<2 x i32> addrspace(1)* undef`, addrspace 1)    ; CHECK:   G_STORE [[BUILD_VECTOR2]](<2 x s32>), [[DEF2]](p1) :: (volatile store 8 into `<2 x float> addrspace(1)* undef`, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY33:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] +  ; CHECK:   S_SETPC_B64_return [[COPY33]]    store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef    store volatile <2 x i32> %arg1, <2 x i32> addrspace(1)* undef    store volatile <2 x float> %arg2, <2 x float> addrspace(1)* undef @@ -1409,7 +1529,7 @@ define void @void_func_v32i32_v2i32_v2f32(<32 x i32> %arg0, <2 x i32> %arg1, <2  define void @void_func_v32i32_v2i16_v2f16(<32 x i32> %arg0, <2 x i16> %arg1, <2 x half> %arg2) #0 {    ; CHECK-LABEL: name: void_func_v32i32_v2i16_v2f16    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 +  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1    ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -1450,6 +1570,7 @@ define void @void_func_v32i32_v2i16_v2f16(<32 x i32> %arg0, <2 x i16> %arg1, <2    ; CHECK:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.1, align 1, addrspace 5)    ; CHECK:   [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0    ; CHECK:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load 4 from %fixed-stack.0, align 1, addrspace 5) +  ; CHECK:   [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32)    ; CHECK:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)    ; CHECK:   [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR1]](<2 x s32>) @@ -1461,7 +1582,8 @@ define void @void_func_v32i32_v2i16_v2f16(<32 x i32> %arg0, <2 x i16> %arg1, <2    ; CHECK:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store 128 into `<32 x i32> addrspace(1)* undef`, addrspace 1)    ; CHECK:   G_STORE [[TRUNC]](<2 x s16>), [[DEF1]](p1) :: (volatile store 4 into `<2 x i16> addrspace(1)* undef`, addrspace 1)    ; CHECK:   G_STORE [[TRUNC1]](<2 x s16>), [[DEF2]](p1) :: (volatile store 4 into `<2 x half> addrspace(1)* undef`, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY33:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] +  ; CHECK:   S_SETPC_B64_return [[COPY33]]    store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef    store volatile <2 x i16> %arg1, <2 x i16> addrspace(1)* undef    store volatile <2 x half> %arg2, <2 x half> addrspace(1)* undef @@ -1471,7 +1593,7 @@ define void @void_func_v32i32_v2i16_v2f16(<32 x i32> %arg0, <2 x i16> %arg1, <2  define void @void_func_v32i32_v2i64_v2f64(<32 x i32> %arg0, <2 x i64> %arg1, <2 x double> %arg2) #0 {    ; CHECK-LABEL: name: void_func_v32i32_v2i64_v2f64    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 +  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1    ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -1520,6 +1642,7 @@ define void @void_func_v32i32_v2i64_v2f64(<32 x i32> %arg0, <2 x i64> %arg1, <2    ; CHECK:   [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load 4 from %fixed-stack.1, align 1, addrspace 5)    ; CHECK:   [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0    ; CHECK:   [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load 4 from %fixed-stack.0, align 1, addrspace 5) +  ; CHECK:   [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32)    ; CHECK:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)    ; CHECK:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) @@ -1533,7 +1656,8 @@ define void @void_func_v32i32_v2i64_v2f64(<32 x i32> %arg0, <2 x i64> %arg1, <2    ; CHECK:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store 128 into `<32 x i32> addrspace(1)* undef`, addrspace 1)    ; CHECK:   G_STORE [[BUILD_VECTOR1]](<2 x s64>), [[DEF1]](p1) :: (volatile store 16 into `<2 x i64> addrspace(1)* undef`, addrspace 1)    ; CHECK:   G_STORE [[BUILD_VECTOR2]](<2 x s64>), [[DEF2]](p1) :: (volatile store 16 into `<2 x double> addrspace(1)* undef`, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY33:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] +  ; CHECK:   S_SETPC_B64_return [[COPY33]]    store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef    store volatile <2 x i64> %arg1, <2 x i64> addrspace(1)* undef    store volatile <2 x double> %arg2, <2 x double> addrspace(1)* undef @@ -1543,7 +1667,7 @@ define void @void_func_v32i32_v2i64_v2f64(<32 x i32> %arg0, <2 x i64> %arg1, <2  define void @void_func_v32i32_v4i32_v4f32(<32 x i32> %arg0, <4 x i32> %arg1, <4 x float> %arg2) #0 {    ; CHECK-LABEL: name: void_func_v32i32_v4i32_v4f32    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 +  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1    ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -1592,6 +1716,7 @@ define void @void_func_v32i32_v4i32_v4f32(<32 x i32> %arg0, <4 x i32> %arg1, <4    ; CHECK:   [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load 4 from %fixed-stack.1, align 1, addrspace 5)    ; CHECK:   [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0    ; CHECK:   [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load 4 from %fixed-stack.0, align 1, addrspace 5) +  ; CHECK:   [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32)    ; CHECK:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)    ; CHECK:   [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) @@ -1601,7 +1726,8 @@ define void @void_func_v32i32_v4i32_v4f32(<32 x i32> %arg0, <4 x i32> %arg1, <4    ; CHECK:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store 128 into `<32 x i32> addrspace(1)* undef`, addrspace 1)    ; CHECK:   G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[DEF1]](p1) :: (volatile store 16 into `<4 x i32> addrspace(1)* undef`, addrspace 1)    ; CHECK:   G_STORE [[BUILD_VECTOR2]](<4 x s32>), [[DEF2]](p1) :: (volatile store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY33:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] +  ; CHECK:   S_SETPC_B64_return [[COPY33]]    store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef    store volatile <4 x i32> %arg1, <4 x i32> addrspace(1)* undef    store volatile <4 x float> %arg2, <4 x float> addrspace(1)* undef @@ -1611,7 +1737,7 @@ define void @void_func_v32i32_v4i32_v4f32(<32 x i32> %arg0, <4 x i32> %arg1, <4  define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8 x float> %arg2) #0 {    ; CHECK-LABEL: name: void_func_v32i32_v8i32_v8f32    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 +  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1    ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -1676,6 +1802,7 @@ define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8    ; CHECK:   [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load 4 from %fixed-stack.1, align 1, addrspace 5)    ; CHECK:   [[FRAME_INDEX15:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0    ; CHECK:   [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load 4 from %fixed-stack.0, align 1, addrspace 5) +  ; CHECK:   [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32)    ; CHECK:   [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32)    ; CHECK:   [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32) @@ -1685,7 +1812,8 @@ define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8    ; CHECK:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store 128 into `<32 x i32> addrspace(1)* undef`, addrspace 1)    ; CHECK:   G_STORE [[BUILD_VECTOR1]](<8 x s32>), [[DEF1]](p1) :: (volatile store 32 into `<8 x i32> addrspace(1)* undef`, addrspace 1)    ; CHECK:   G_STORE [[BUILD_VECTOR2]](<8 x s32>), [[DEF2]](p1) :: (volatile store 32 into `<8 x float> addrspace(1)* undef`, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY33:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] +  ; CHECK:   S_SETPC_B64_return [[COPY33]]    store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef    store volatile <8 x i32> %arg1, <8 x i32> addrspace(1)* undef    store volatile <8 x float> %arg2, <8 x float> addrspace(1)* undef @@ -1695,7 +1823,7 @@ define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8  define void @void_func_v32i32_v16i32_v16f32(<32 x i32> %arg0, <16 x i32> %arg1, <16 x float> %arg2) #0 {    ; CHECK-LABEL: name: void_func_v32i32_v16i32_v16f32    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 +  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1    ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -1792,6 +1920,7 @@ define void @void_func_v32i32_v16i32_v16f32(<32 x i32> %arg0, <16 x i32> %arg1,    ; CHECK:   [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX30]](p5) :: (invariant load 4 from %fixed-stack.1, align 1, addrspace 5)    ; CHECK:   [[FRAME_INDEX31:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0    ; CHECK:   [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX31]](p5) :: (invariant load 4 from %fixed-stack.0, align 1, addrspace 5) +  ; CHECK:   [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32)    ; CHECK:   [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32)    ; CHECK:   [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD16]](s32), [[LOAD17]](s32), [[LOAD18]](s32), [[LOAD19]](s32), [[LOAD20]](s32), [[LOAD21]](s32), [[LOAD22]](s32), [[LOAD23]](s32), [[LOAD24]](s32), [[LOAD25]](s32), [[LOAD26]](s32), [[LOAD27]](s32), [[LOAD28]](s32), [[LOAD29]](s32), [[LOAD30]](s32), [[LOAD31]](s32) @@ -1801,7 +1930,8 @@ define void @void_func_v32i32_v16i32_v16f32(<32 x i32> %arg0, <16 x i32> %arg1,    ; CHECK:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store 128 into `<32 x i32> addrspace(1)* undef`, addrspace 1)    ; CHECK:   G_STORE [[BUILD_VECTOR1]](<16 x s32>), [[DEF1]](p1) :: (volatile store 64 into `<16 x i32> addrspace(1)* undef`, addrspace 1)    ; CHECK:   G_STORE [[BUILD_VECTOR2]](<16 x s32>), [[DEF2]](p1) :: (volatile store 64 into `<16 x float> addrspace(1)* undef`, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY33:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] +  ; CHECK:   S_SETPC_B64_return [[COPY33]]    store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef    store volatile <16 x i32> %arg1, <16 x i32> addrspace(1)* undef    store volatile <16 x float> %arg2, <16 x float> addrspace(1)* undef @@ -1812,11 +1942,12 @@ define void @void_func_v32i32_v16i32_v16f32(<32 x i32> %arg0, <16 x i32> %arg1,  define void @void_func_v3f32_wasted_reg(<3 x float> %arg0, i32 %arg1) #0 {    ; CHECK-LABEL: name: void_func_v3f32_wasted_reg    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 +  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1    ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2    ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 +  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)    ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0    ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 @@ -1830,7 +1961,8 @@ define void @void_func_v3f32_wasted_reg(<3 x float> %arg0, i32 %arg1) #0 {    ; CHECK:   G_STORE [[EVEC1]](s32), [[DEF]](p3) :: (volatile store 4 into `float addrspace(3)* undef`, addrspace 3)    ; CHECK:   G_STORE [[EVEC2]](s32), [[DEF]](p3) :: (volatile store 4 into `float addrspace(3)* undef`, addrspace 3)    ; CHECK:   G_STORE [[COPY3]](s32), [[DEF1]](p3) :: (volatile store 4 into `i32 addrspace(3)* undef`, addrspace 3) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] +  ; CHECK:   S_SETPC_B64_return [[COPY5]]    %arg0.0 = extractelement <3 x float> %arg0, i32 0    %arg0.1 = extractelement <3 x float> %arg0, i32 1    %arg0.2 = extractelement <3 x float> %arg0, i32 2 @@ -1844,11 +1976,12 @@ define void @void_func_v3f32_wasted_reg(<3 x float> %arg0, i32 %arg1) #0 {  define void @void_func_v3i32_wasted_reg(<3 x i32> %arg0, i32 %arg1) #0 {    ; CHECK-LABEL: name: void_func_v3i32_wasted_reg    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 +  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1    ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2    ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 +  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)    ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0    ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 @@ -1861,7 +1994,8 @@ define void @void_func_v3i32_wasted_reg(<3 x i32> %arg0, i32 %arg1) #0 {    ; CHECK:   G_STORE [[EVEC1]](s32), [[DEF]](p3) :: (volatile store 4 into `i32 addrspace(3)* undef`, addrspace 3)    ; CHECK:   G_STORE [[EVEC2]](s32), [[DEF]](p3) :: (volatile store 4 into `i32 addrspace(3)* undef`, addrspace 3)    ; CHECK:   G_STORE [[COPY3]](s32), [[DEF]](p3) :: (volatile store 4 into `i32 addrspace(3)* undef`, addrspace 3) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] +  ; CHECK:   S_SETPC_B64_return [[COPY5]]    %arg0.0 = extractelement <3 x i32> %arg0, i32 0    %arg0.1 = extractelement <3 x i32> %arg0, i32 1    %arg0.2 = extractelement <3 x i32> %arg0, i32 2 @@ -1876,7 +2010,7 @@ define void @void_func_v3i32_wasted_reg(<3 x i32> %arg0, i32 %arg1) #0 {  define void @void_func_v16i8(<16 x i8> %arg0) #0 {    ; CHECK-LABEL: name: void_func_v16i8    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 +  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1    ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -1893,11 +2027,13 @@ define void @void_func_v16i8(<16 x i8> %arg0) #0 {    ; CHECK:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13    ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14    ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 +  ; CHECK:   [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32)    ; CHECK:   [[TRUNC:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[BUILD_VECTOR]](<16 x s32>)    ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   G_STORE [[TRUNC]](<16 x s8>), [[DEF]](p1) :: (volatile store 16 into `<16 x i8> addrspace(1)* undef`, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]] +  ; CHECK:   S_SETPC_B64_return [[COPY17]]    store volatile <16 x i8> %arg0, <16 x i8> addrspace(1)* undef    ret void  } @@ -1906,7 +2042,7 @@ define void @void_func_v16i8(<16 x i8> %arg0) #0 {  define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 {    ; CHECK-LABEL: name: void_func_v32i32_v16i8    ; CHECK: bb.1 (%ir-block.0): -  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 +  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr30_sgpr31    ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0    ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1    ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -1971,6 +2107,7 @@ define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 {    ; CHECK:   [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load 4 from %fixed-stack.1, align 1, addrspace 5)    ; CHECK:   [[FRAME_INDEX15:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0    ; CHECK:   [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load 4 from %fixed-stack.0, align 1, addrspace 5) +  ; CHECK:   [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31    ; CHECK:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32)    ; CHECK:   [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32)    ; CHECK:   [[TRUNC:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<16 x s32>) @@ -1978,7 +2115,8 @@ define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 {    ; CHECK:   [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF    ; CHECK:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store 128 into `<32 x i32> addrspace(1)* undef`, addrspace 1)    ; CHECK:   G_STORE [[TRUNC]](<16 x s8>), [[DEF1]](p1) :: (volatile store 16 into `<16 x i8> addrspace(1)* undef`, addrspace 1) -  ; CHECK:   S_ENDPGM 0 +  ; CHECK:   [[COPY33:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] +  ; CHECK:   S_SETPC_B64_return [[COPY33]]    store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef    store volatile <16 x i8> %arg1, <16 x i8> addrspace(1)* undef    ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ret.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ret.ll new file mode 100644 index 00000000000..c6c1a87177f --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ret.ll @@ -0,0 +1,3 @@ +; Runs original SDAG test with -global-isel +; RUN: llc -global-isel -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %S/../ret.ll | FileCheck -check-prefix=GCN %S/../ret.ll +; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -verify-machineinstrs < %S/../ret.ll | FileCheck -check-prefix=GCN %S/../ret.ll diff --git a/llvm/test/CodeGen/AMDGPU/ret.ll b/llvm/test/CodeGen/AMDGPU/ret.ll index e77e2122068..cee224bfa38 100644 --- a/llvm/test/CodeGen/AMDGPU/ret.ll +++ b/llvm/test/CodeGen/AMDGPU/ret.ll @@ -79,7 +79,7 @@ bb:  ; GCN-LABEL: {{^}}ps_input_ena_pos_w:  ; GCN-DAG: v_mov_b32_e32 v0, v4  ; GCN-DAG: v_mov_b32_e32 v1, v2 -; GCN: v_mov_b32_e32 v2, v3 +; GCN-DAG: v_mov_b32_e32 v2, v3  ; GCN-NOT: s_endpgm  define amdgpu_ps { float, <2 x float> } @ps_input_ena_pos_w([9 x <16 x i8>] addrspace(4)* inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {  bb: @@ -177,8 +177,8 @@ bb:  }  ; GCN-LABEL: {{^}}sgpr: -; GCN: s_mov_b32 s2, s3 -; GCN: s_add_i32 s0, s3, 2 +; GCN-DAG: s_mov_b32 s2, s3 +; GCN-DAG: s_add_{{i|u}}32 s0, s3, 2  ; GCN-NOT: s_endpgm  define amdgpu_vs { i32, i32, i32 } @sgpr([9 x <16 x i8>] addrspace(4)* inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {  bb: @@ -206,9 +206,9 @@ bb:  ; GCN-DAG: exp mrt0 v0, v0, v0, v0 done vm  ; GCN-DAG: v_mov_b32_e32 v1, v0  ; GCN-DAG: s_mov_b32 s1, s2 -; GCN: s_waitcnt expcnt(0) -; GCN: v_add_f32_e32 v0, 1.0, v1 -; GCN-DAG: s_add_i32 s0, s3, 2 +; GCN-DAG: s_waitcnt expcnt(0) +; GCN-DAG: v_add_f32_e32 v0, 1.0, v1 +; GCN-DAG: s_add_{{i|u}}32 s0, s3, 2  ; GCN-DAG: s_mov_b32 s2, s3  ; GCN-NOT: s_endpgm  define amdgpu_vs { float, i32, float, i32, i32 } @both([9 x <16 x i8>] addrspace(4)* inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {  | 

