Reapply r372285 "GlobalISel: Don't materialize immarg arguments to intrinsics"

This reverts r372314, reapplying r372285 and the commits which depend on it (r372286-r372293, and r372296-r372297) This was missing one switch to getTargetConstant in an untested case. llvm-svn: 372338
author: Matt Arsenault <Matthew.Arsenault@amd.com> 2019-09-19 16:26:14 +0000
committer: Matt Arsenault <Matthew.Arsenault@amd.com> 2019-09-19 16:26:14 +0000
commit: 3ecab8e4555aee0b4aa10c413696a67f55948c39 (patch)
tree: 312b6fd8b3a9ebc14217e7e19a00d428e3f3f8ff /llvm/lib/Target/AMDGPU
parent: e0900f285bb532790ed494df901f87c5c8b904da (diff)
download: bcm5719-llvm-3ecab8e4555aee0b4aa10c413696a67f55948c39.tar.gz
bcm5719-llvm-3ecab8e4555aee0b4aa10c413696a67f55948c39.zip
15 files changed, 912 insertions, 188 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 217b3996996..73486b969f4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -22,6 +22,7 @@
 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
 #include "llvm/CodeGen/GlobalISel/Utils.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
@@ -245,10 +246,6 @@ AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO,
   }
 }
 
-static int64_t getConstant(const MachineInstr *MI) {
-  return MI->getOperand(1).getCImm()->getSExtValue();
-}
-
 static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64) {
   switch (Opc) {
   case AMDGPU::G_AND:
@@ -737,6 +734,260 @@ buildEXP(const TargetInstrInfo &TII, MachineInstr *Insert, unsigned Tgt,
           .addImm(Enabled);
 }
 
+static bool isZero(Register Reg, MachineRegisterInfo &MRI) {
+  int64_t C;
+  if (mi_match(Reg, MRI, m_ICst(C)) && C == 0)
+    return true;
+
+  // FIXME: matcher should ignore copies
+  return mi_match(Reg, MRI, m_Copy(m_ICst(C))) && C == 0;
+}
+
+static unsigned extractGLC(unsigned CachePolicy) {
+  return CachePolicy & 1;
+}
+
+static unsigned extractSLC(unsigned CachePolicy) {
+  return (CachePolicy >> 1) & 1;
+}
+
+static unsigned extractDLC(unsigned CachePolicy) {
+  return (CachePolicy >> 2) & 1;
+}
+
+// Returns Base register, constant offset, and offset def point.
+static std::tuple<Register, unsigned, MachineInstr *>
+getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg) {
+  MachineInstr *Def = getDefIgnoringCopies(Reg, MRI);
+  if (!Def)
+    return {Reg, 0, nullptr};
+
+  if (Def->getOpcode() == AMDGPU::G_CONSTANT) {
+    unsigned Offset;
+    const MachineOperand &Op = Def->getOperand(1);
+    if (Op.isImm())
+      Offset = Op.getImm();
+    else
+      Offset = Op.getCImm()->getZExtValue();
+
+    return {Register(), Offset, Def};
+  }
+
+  int64_t Offset;
+  if (Def->getOpcode() == AMDGPU::G_ADD) {
+    // TODO: Handle G_OR used for add case
+    if (mi_match(Def->getOperand(1).getReg(), MRI, m_ICst(Offset)))
+      return {Def->getOperand(0).getReg(), Offset, Def};
+
+    // FIXME: matcher should ignore copies
+    if (mi_match(Def->getOperand(1).getReg(), MRI, m_Copy(m_ICst(Offset))))
+      return {Def->getOperand(0).getReg(), Offset, Def};
+  }
+
+  return {Reg, 0, Def};
+}
+
+static unsigned getBufferStoreOpcode(LLT Ty,
+                                     const unsigned MemSize,
+                                     const bool Offen) {
+  const int Size = Ty.getSizeInBits();
+  switch (8 * MemSize) {
+  case 8:
+    return Offen ? AMDGPU::BUFFER_STORE_BYTE_OFFEN_exact :
+                   AMDGPU::BUFFER_STORE_BYTE_OFFSET_exact;
+  case 16:
+    return Offen ? AMDGPU::BUFFER_STORE_SHORT_OFFEN_exact :
+                   AMDGPU::BUFFER_STORE_SHORT_OFFSET_exact;
+  default:
+    unsigned Opc = Offen ? AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact :
+                           AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact;
+    if (Size > 32)
+      Opc = AMDGPU::getMUBUFOpcode(Opc, Size / 32);
+    return Opc;
+  }
+}
+
+static unsigned getBufferStoreFormatOpcode(LLT Ty,
+                                           const unsigned MemSize,
+                                           const bool Offen) {
+  bool IsD16Packed = Ty.getScalarSizeInBits() == 16;
+  bool IsD16Unpacked = 8 * MemSize < Ty.getSizeInBits();
+  int NumElts = Ty.isVector() ? Ty.getNumElements() : 1;
+
+  if (IsD16Packed) {
+    switch (NumElts) {
+    case 1:
+      return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFEN_exact :
+                     AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFSET_exact;
+    case 2:
+      return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact :
+                     AMDGPU::BUFFER_STORE_FORMAT_D16_XY_OFFSET_exact;
+    case 3:
+      return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_OFFEN_exact :
+                     AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_OFFSET_exact;
+    case 4:
+      return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact :
+                     AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_OFFSET_exact;
+    default:
+      return -1;
+    }
+  }
+
+  if (IsD16Unpacked) {
+    switch (NumElts) {
+    case 1:
+      return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFEN_exact :
+                     AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFSET_exact;
+    case 2:
+      return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact :
+                     AMDGPU::BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFSET_exact;
+    case 3:
+      return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_gfx80_OFFEN_exact :
+                     AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_gfx80_OFFSET_exact;
+    case 4:
+      return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact :
+                     AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFSET_exact;
+    default:
+      return -1;
+    }
+  }
+
+  switch (NumElts) {
+  case 1:
+    return Offen ? AMDGPU::BUFFER_STORE_FORMAT_X_OFFEN_exact :
+                   AMDGPU::BUFFER_STORE_FORMAT_X_OFFSET_exact;
+  case 2:
+    return Offen ? AMDGPU::BUFFER_STORE_FORMAT_XY_OFFEN_exact :
+                  AMDGPU::BUFFER_STORE_FORMAT_XY_OFFSET_exact;
+  case 3:
+    return Offen ? AMDGPU::BUFFER_STORE_FORMAT_XYZ_OFFEN_exact :
+                   AMDGPU::BUFFER_STORE_FORMAT_XYZ_OFFSET_exact;
+  case 4:
+    return Offen ? AMDGPU::BUFFER_STORE_FORMAT_XYZW_OFFEN_exact :
+                   AMDGPU::BUFFER_STORE_FORMAT_XYZW_OFFSET_exact;
+  default:
+    return -1;
+  }
+
+  llvm_unreachable("unhandled buffer store");
+}
+
+// TODO: Move this to combiner
+// Returns base register, imm offset, total constant offset.
+std::tuple<Register, unsigned, unsigned>
+AMDGPUInstructionSelector::splitBufferOffsets(MachineIRBuilder &B,
+                                              Register OrigOffset) const {
+  const unsigned MaxImm = 4095;
+  Register BaseReg;
+  unsigned TotalConstOffset;
+  MachineInstr *OffsetDef;
+  MachineRegisterInfo &MRI = *B.getMRI();
+
+  std::tie(BaseReg, TotalConstOffset, OffsetDef)
+    = getBaseWithConstantOffset(MRI, OrigOffset);
+
+  unsigned ImmOffset = TotalConstOffset;
+
+  // If the immediate value is too big for the immoffset field, put the value
+  // and -4096 into the immoffset field so that the value that is copied/added
+  // for the voffset field is a multiple of 4096, and it stands more chance
+  // of being CSEd with the copy/add for another similar load/store.f
+  // However, do not do that rounding down to a multiple of 4096 if that is a
+  // negative number, as it appears to be illegal to have a negative offset
+  // in the vgpr, even if adding the immediate offset makes it positive.
+  unsigned Overflow = ImmOffset & ~MaxImm;
+  ImmOffset -= Overflow;
+  if ((int32_t)Overflow < 0) {
+    Overflow += ImmOffset;
+    ImmOffset = 0;
+  }
+
+  if (Overflow != 0) {
+    // In case this is in a waterfall loop, insert offset code at the def point
+    // of the offset, not inside the loop.
+    MachineBasicBlock::iterator OldInsPt = B.getInsertPt();
+    MachineBasicBlock &OldMBB = B.getMBB();
+    B.setInstr(*OffsetDef);
+
+    if (!BaseReg) {
+      BaseReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+      B.buildInstr(AMDGPU::V_MOV_B32_e32)
+        .addDef(BaseReg)
+        .addImm(Overflow);
+    } else {
+      Register OverflowVal = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+      B.buildInstr(AMDGPU::V_MOV_B32_e32)
+        .addDef(OverflowVal)
+        .addImm(Overflow);
+
+      Register NewBaseReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+      TII.getAddNoCarry(B.getMBB(), B.getInsertPt(), B.getDebugLoc(), NewBaseReg)
+        .addReg(BaseReg)
+        .addReg(OverflowVal, RegState::Kill)
+        .addImm(0);
+      BaseReg = NewBaseReg;
+    }
+
+    B.setInsertPt(OldMBB, OldInsPt);
+  }
+
+  return {BaseReg, ImmOffset, TotalConstOffset};
+}
+
+bool AMDGPUInstructionSelector::selectStoreIntrinsic(MachineInstr &MI,
+                                                     bool IsFormat) const {
+  MachineIRBuilder B(MI);
+  MachineRegisterInfo &MRI = *B.getMRI();
+  MachineFunction &MF = B.getMF();
+  Register VData = MI.getOperand(1).getReg();
+  LLT Ty = MRI.getType(VData);
+
+  int Size = Ty.getSizeInBits();
+  if (Size % 32 != 0)
+    return false;
+
+  // FIXME: Verifier should enforce 1 MMO for these intrinsics.
+  MachineMemOperand *MMO = *MI.memoperands_begin();
+  const int MemSize = MMO->getSize();
+
+  Register RSrc = MI.getOperand(2).getReg();
+  Register VOffset = MI.getOperand(3).getReg();
+  Register SOffset = MI.getOperand(4).getReg();
+  unsigned CachePolicy = MI.getOperand(5).getImm();
+  unsigned ImmOffset;
+  unsigned TotalOffset;
+
+  std::tie(VOffset, ImmOffset, TotalOffset) = splitBufferOffsets(B, VOffset);
+  if (TotalOffset != 0)
+    MMO = MF.getMachineMemOperand(MMO, TotalOffset, MemSize);
+
+  const bool Offen = !isZero(VOffset, MRI);
+
+  int Opc = IsFormat ? getBufferStoreFormatOpcode(Ty, MemSize, Offen) :
+    getBufferStoreOpcode(Ty, MemSize, Offen);
+  if (Opc == -1)
+    return false;
+
+  MachineInstrBuilder MIB = B.buildInstr(Opc)
+    .addUse(VData);
+
+  if (Offen)
+    MIB.addUse(VOffset);
+
+  MIB.addUse(RSrc)
+     .addUse(SOffset)
+     .addImm(ImmOffset)
+     .addImm(extractGLC(CachePolicy))
+     .addImm(extractSLC(CachePolicy))
+     .addImm(0) // tfe: FIXME: Remove from inst
+     .addImm(extractDLC(CachePolicy))
+     .addMemOperand(MMO);
+
+  MI.eraseFromParent();
+
+  return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
+}
+
 bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
     MachineInstr &I) const {
   MachineBasicBlock *BB = I.getParent();
@@ -746,10 +997,10 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
   unsigned IntrinsicID = I.getIntrinsicID();
   switch (IntrinsicID) {
   case Intrinsic::amdgcn_exp: {
-    int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
-    int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
-    int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(7).getReg()));
-    int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(8).getReg()));
+    int64_t Tgt = I.getOperand(1).getImm();
+    int64_t Enabled = I.getOperand(2).getImm();
+    int64_t Done = I.getOperand(7).getImm();
+    int64_t VM = I.getOperand(8).getImm();
 
     MachineInstr *Exp = buildEXP(TII, &I, Tgt, I.getOperand(3).getReg(),
                                  I.getOperand(4).getReg(),
@@ -762,13 +1013,13 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
   }
   case Intrinsic::amdgcn_exp_compr: {
     const DebugLoc &DL = I.getDebugLoc();
-    int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
-    int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
+    int64_t Tgt = I.getOperand(1).getImm();
+    int64_t Enabled = I.getOperand(2).getImm();
     Register Reg0 = I.getOperand(3).getReg();
     Register Reg1 = I.getOperand(4).getReg();
     Register Undef = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
-    int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(5).getReg()));
-    int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(6).getReg()));
+    int64_t Done = I.getOperand(5).getImm();
+    int64_t VM = I.getOperand(6).getImm();
 
     BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef);
     MachineInstr *Exp = buildEXP(TII, &I, Tgt, Reg0, Reg1, Undef, Undef, VM,
@@ -791,6 +1042,10 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
       MRI.setRegClass(Reg, TRI.getWaveMaskRegClass());
     return true;
   }
+  case Intrinsic::amdgcn_raw_buffer_store:
+    return selectStoreIntrinsic(I, false);
+  case Intrinsic::amdgcn_raw_buffer_store_format:
+    return selectStoreIntrinsic(I, true);
   default:
     return selectImpl(I, *CoverageInfo);
   }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 67a5062ca39..8c6e8976194 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -35,6 +35,7 @@ class AMDGPUInstrInfo;
 class AMDGPURegisterBankInfo;
 class GCNSubtarget;
 class MachineInstr;
+class MachineIRBuilder;
 class MachineOperand;
 class MachineRegisterInfo;
 class SIInstrInfo;
@@ -82,6 +83,12 @@ private:
   bool selectG_IMPLICIT_DEF(MachineInstr &I) const;
   bool selectG_INSERT(MachineInstr &I) const;
   bool selectG_INTRINSIC(MachineInstr &I) const;
+
+  std::tuple<Register, unsigned, unsigned>
+  splitBufferOffsets(MachineIRBuilder &B, Register OrigOffset) const;
+
+  bool selectStoreIntrinsic(MachineInstr &MI, bool IsFormat) const;
+
   bool selectG_INTRINSIC_W_SIDE_EFFECTS(MachineInstr &I) const;
   int getS_CMPOpcode(CmpInst::Predicate P, unsigned Size) const;
   bool selectG_ICMP(MachineInstr &I) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index abfb4525935..02b3b421026 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -1751,6 +1751,62 @@ bool AMDGPULegalizerInfo::legalizeIsAddrSpace(MachineInstr &MI,
   return true;
 }
 
+/// Handle register layout difference for f16 images for some subtargets.
+Register AMDGPULegalizerInfo::handleD16VData(MachineIRBuilder &B,
+                                             MachineRegisterInfo &MRI,
+                                             Register Reg) const {
+  if (!ST.hasUnpackedD16VMem())
+    return Reg;
+
+  const LLT S16 = LLT::scalar(16);
+  const LLT S32 = LLT::scalar(32);
+  LLT StoreVT = MRI.getType(Reg);
+  assert(StoreVT.isVector() && StoreVT.getElementType() == S16);
+
+  auto Unmerge = B.buildUnmerge(S16, Reg);
+
+  SmallVector<Register, 4> WideRegs;
+  for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
+    WideRegs.push_back(B.buildAnyExt(S32, Unmerge.getReg(I)).getReg(0));
+
+  int NumElts = StoreVT.getNumElements();
+
+  return B.buildBuildVector(LLT::vector(NumElts, S32), WideRegs).getReg(0);
+}
+
+bool AMDGPULegalizerInfo::legalizeRawBufferStore(MachineInstr &MI,
+                                                 MachineRegisterInfo &MRI,
+                                                 MachineIRBuilder &B,
+                                                 bool IsFormat) const {
+  // TODO: Reject f16 format on targets where unsupported.
+  Register VData = MI.getOperand(1).getReg();
+  LLT Ty = MRI.getType(VData);
+
+  B.setInstr(MI);
+
+  const LLT S32 = LLT::scalar(32);
+  const LLT S16 = LLT::scalar(16);
+
+  // Fixup illegal register types for i8 stores.
+  if (Ty == LLT::scalar(8) || Ty == S16) {
+    Register AnyExt = B.buildAnyExt(LLT::scalar(32), VData).getReg(0);
+    MI.getOperand(1).setReg(AnyExt);
+    return true;
+  }
+
+  if (Ty.isVector()) {
+    if (Ty.getElementType() == S16 && Ty.getNumElements() <= 4) {
+      if (IsFormat)
+        MI.getOperand(1).setReg(handleD16VData(B, MRI, VData));
+      return true;
+    }
+
+    return Ty.getElementType() == S32 && Ty.getNumElements() <= 4;
+  }
+
+  return Ty == S32;
+}
+
 bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
                                             MachineRegisterInfo &MRI,
                                             MachineIRBuilder &B) const {
@@ -1843,6 +1899,10 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
     MI.eraseFromParent();
     return true;
   }
+  case Intrinsic::amdgcn_raw_buffer_store:
+    return legalizeRawBufferStore(MI, MRI, B, false);
+  case Intrinsic::amdgcn_raw_buffer_store_format:
+    return legalizeRawBufferStore(MI, MRI, B, true);
   default:
     return true;
   }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index 99564a04dbb..855444fa276 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -83,6 +83,11 @@ public:
                               MachineIRBuilder &B) const;
   bool legalizeIsAddrSpace(MachineInstr &MI, MachineRegisterInfo &MRI,
                            MachineIRBuilder &B, unsigned AddrSpace) const;
+
+  Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI,
+                          Register Reg) const;
+  bool legalizeRawBufferStore(MachineInstr &MI, MachineRegisterInfo &MRI,
+                              MachineIRBuilder &B, bool IsFormat) const;
   bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI,
                          MachineIRBuilder &B) const override;
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 92d5a5d07c7..0032d046862 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -20,6 +20,7 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
 #include "llvm/CodeGen/GlobalISel/RegisterBank.h"
 #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -33,6 +34,7 @@
 #include "AMDGPUGenRegisterBankInfo.def"
 
 using namespace llvm;
+using namespace MIPatternMatch;
 
 namespace {
 
@@ -84,9 +86,11 @@ public:
 };
 
 }
-AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI)
+AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const GCNSubtarget &ST)
     : AMDGPUGenRegisterBankInfo(),
-      TRI(static_cast<const SIRegisterInfo*>(&TRI)) {
+      Subtarget(ST),
+      TRI(Subtarget.getRegisterInfo()),
+      TII(Subtarget.getInstrInfo()) {
 
   // HACK: Until this is fully tablegen'd.
   static bool AlreadyInit = false;
@@ -638,8 +642,10 @@ static LLT getHalfSizedType(LLT Ty) {
 ///
 /// There is additional complexity to try for compare values to identify the
 /// unique values used.
-void AMDGPURegisterBankInfo::executeInWaterfallLoop(
-  MachineInstr &MI, MachineRegisterInfo &MRI,
+bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
+  MachineIRBuilder &B,
+  MachineInstr &MI,
+  MachineRegisterInfo &MRI,
   ArrayRef<unsigned> OpIndices) const {
   MachineFunction *MF = MI.getParent()->getParent();
   const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
@@ -662,9 +668,8 @@ void AMDGPURegisterBankInfo::executeInWaterfallLoop(
 
   // No operands need to be replaced, so no need to loop.
   if (SGPROperandRegs.empty())
-    return;
+    return false;
 
-  MachineIRBuilder B(MI);
   SmallVector<Register, 4> ResultRegs;
   SmallVector<Register, 4> InitResultRegs;
   SmallVector<Register, 4> PhiRegs;
@@ -922,6 +927,18 @@ void AMDGPURegisterBankInfo::executeInWaterfallLoop(
   B.buildInstr(AMDGPU::S_MOV_B64_term)
     .addDef(AMDGPU::EXEC)
     .addReg(SaveExecReg);
+
+  // Restore the insert point before the original instruction.
+  B.setInsertPt(MBB, MBB.end());
+
+  return true;
+}
+
+bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
+  MachineInstr &MI, MachineRegisterInfo &MRI,
+  ArrayRef<unsigned> OpIndices) const {
+  MachineIRBuilder B(MI);
+  return executeInWaterfallLoop(B, MI, MRI, OpIndices);
 }
 
 // Legalize an operand that must be an SGPR by inserting a readfirstlane.
@@ -1031,6 +1048,33 @@ bool AMDGPURegisterBankInfo::applyMappingWideLoad(MachineInstr &MI,
   return true;
 }
 
+bool AMDGPURegisterBankInfo::applyMappingImage(
+    MachineInstr &MI, const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper,
+    MachineRegisterInfo &MRI, int RsrcIdx) const {
+  const int NumDefs = MI.getNumExplicitDefs();
+
+  // The reported argument index is relative to the IR intrinsic call arguments,
+  // so we need to shift by the number of defs and the intrinsic ID.
+  RsrcIdx += NumDefs + 1;
+
+  // Insert copies to VGPR arguments.
+  applyDefaultMapping(OpdMapper);
+
+  // Fixup any SGPR arguments.
+  SmallVector<unsigned, 4> SGPRIndexes;
+  for (int I = NumDefs, NumOps = MI.getNumOperands(); I != NumOps; ++I) {
+    if (!MI.getOperand(I).isReg())
+      continue;
+
+    // If this intrinsic has a sampler, it immediately follows rsrc.
+    if (I == RsrcIdx || I == RsrcIdx + 1)
+      SGPRIndexes.push_back(I);
+  }
+
+  executeInWaterfallLoop(MI, MRI, SGPRIndexes);
+  return true;
+}
+
 // For cases where only a single copy is inserted for matching register banks.
 // Replace the register in the instruction operand
 static void substituteSimpleCopyRegs(
@@ -1042,6 +1086,184 @@ static void substituteSimpleCopyRegs(
   }
 }
 
+/// Handle register layout difference for f16 images for some subtargets.
+Register AMDGPURegisterBankInfo::handleD16VData(MachineIRBuilder &B,
+                                                MachineRegisterInfo &MRI,
+                                                Register Reg) const {
+  if (!Subtarget.hasUnpackedD16VMem())
+    return Reg;
+
+  const LLT S16 = LLT::scalar(16);
+  LLT StoreVT = MRI.getType(Reg);
+  if (!StoreVT.isVector() || StoreVT.getElementType() != S16)
+    return Reg;
+
+  auto Unmerge = B.buildUnmerge(S16, Reg);
+
+
+  SmallVector<Register, 4> WideRegs;
+  for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
+    WideRegs.push_back(Unmerge.getReg(I));
+
+  const LLT S32 = LLT::scalar(32);
+  int NumElts = StoreVT.getNumElements();
+
+  return B.buildMerge(LLT::vector(NumElts, S32), WideRegs).getReg(0);
+}
+
+static std::pair<Register, unsigned>
+getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg) {
+  int64_t Const;
+  if (mi_match(Reg, MRI, m_ICst(Const)))
+    return std::make_pair(Register(), Const);
+
+  Register Base;
+  if (mi_match(Reg, MRI, m_GAdd(m_Reg(Base), m_ICst(Const))))
+    return std::make_pair(Base, Const);
+
+  // TODO: Handle G_OR used for add case
+  return std::make_pair(Reg, 0);
+}
+
+std::pair<Register, unsigned>
+AMDGPURegisterBankInfo::splitBufferOffsets(MachineIRBuilder &B,
+                                           Register OrigOffset) const {
+  const unsigned MaxImm = 4095;
+  Register BaseReg;
+  unsigned ImmOffset;
+  const LLT S32 = LLT::scalar(32);
+
+  std::tie(BaseReg, ImmOffset) = getBaseWithConstantOffset(*B.getMRI(),
+                                                           OrigOffset);
+
+  unsigned C1 = 0;
+  if (ImmOffset != 0) {
+    // If the immediate value is too big for the immoffset field, put the value
+    // and -4096 into the immoffset field so that the value that is copied/added
+    // for the voffset field is a multiple of 4096, and it stands more chance
+    // of being CSEd with the copy/add for another similar load/store.
+    // However, do not do that rounding down to a multiple of 4096 if that is a
+    // negative number, as it appears to be illegal to have a negative offset
+    // in the vgpr, even if adding the immediate offset makes it positive.
+    unsigned Overflow = ImmOffset & ~MaxImm;
+    ImmOffset -= Overflow;
+    if ((int32_t)Overflow < 0) {
+      Overflow += ImmOffset;
+      ImmOffset = 0;
+    }
+
+    C1 = ImmOffset;
+    if (Overflow != 0) {
+      if (!BaseReg)
+        BaseReg = B.buildConstant(S32, Overflow).getReg(0);
+      else {
+        auto OverflowVal = B.buildConstant(S32, Overflow);
+        BaseReg = B.buildAdd(S32, BaseReg, OverflowVal).getReg(0);
+      }
+    }
+  }
+
+  if (!BaseReg)
+    BaseReg = B.buildConstant(S32, 0).getReg(0);
+
+  return {BaseReg, C1};
+}
+
+static bool isZero(Register Reg, MachineRegisterInfo &MRI) {
+  int64_t C;
+  return mi_match(Reg, MRI, m_ICst(C)) && C == 0;
+}
+
+static unsigned extractGLC(unsigned CachePolicy) {
+  return CachePolicy & 1;
+}
+
+static unsigned extractSLC(unsigned CachePolicy) {
+  return (CachePolicy >> 1) & 1;
+}
+
+static unsigned extractDLC(unsigned CachePolicy) {
+  return (CachePolicy >> 2) & 1;
+}
+
+MachineInstr *
+AMDGPURegisterBankInfo::selectStoreIntrinsic(MachineIRBuilder &B,
+                                             MachineInstr &MI) const {
+   MachineRegisterInfo &MRI = *B.getMRI();
+  executeInWaterfallLoop(B, MI, MRI, {2, 4});
+
+  // FIXME: DAG lowering brokenly changes opcode based on FP vs. integer.
+
+  Register VData = MI.getOperand(1).getReg();
+  LLT Ty = MRI.getType(VData);
+
+  int EltSize = Ty.getScalarSizeInBits();
+  int Size = Ty.getSizeInBits();
+
+  // FIXME: Broken integer truncstore.
+  if (EltSize != 32)
+    report_fatal_error("unhandled intrinsic store");
+
+  // FIXME: Verifier should enforce 1 MMO for these intrinsics.
+  const int MemSize = (*MI.memoperands_begin())->getSize();
+
+
+  Register RSrc = MI.getOperand(2).getReg();
+  Register VOffset = MI.getOperand(3).getReg();
+  Register SOffset = MI.getOperand(4).getReg();
+  unsigned CachePolicy = MI.getOperand(5).getImm();
+
+  unsigned ImmOffset;
+  std::tie(VOffset, ImmOffset) = splitBufferOffsets(B, VOffset);
+
+  const bool Offen = !isZero(VOffset, MRI);
+
+  unsigned Opc = AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact;
+  switch (8 * MemSize) {
+  case 8:
+    Opc = Offen ? AMDGPU::BUFFER_STORE_BYTE_OFFEN_exact :
+                  AMDGPU::BUFFER_STORE_BYTE_OFFSET_exact;
+    break;
+  case 16:
+    Opc = Offen ? AMDGPU::BUFFER_STORE_SHORT_OFFEN_exact :
+                  AMDGPU::BUFFER_STORE_SHORT_OFFSET_exact;
+    break;
+  default:
+    Opc = Offen ? AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact :
+                  AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact;
+    if (Size > 32)
+      Opc = AMDGPU::getMUBUFOpcode(Opc, Size / 32);
+    break;
+  }
+
+
+  // Set the insertion point back to the instruction in case it was moved into a
+  // loop.
+  B.setInstr(MI);
+
+  MachineInstrBuilder MIB = B.buildInstr(Opc)
+    .addUse(VData);
+
+  if (Offen)
+    MIB.addUse(VOffset);
+
+  MIB.addUse(RSrc)
+     .addUse(SOffset)
+     .addImm(ImmOffset)
+     .addImm(extractGLC(CachePolicy))
+     .addImm(extractSLC(CachePolicy))
+     .addImm(0) // tfe: FIXME: Remove from inst
+     .addImm(extractDLC(CachePolicy))
+     .cloneMemRefs(MI);
+
+  // FIXME: We need a way to report failure from applyMappingImpl.
+  // Insert constrain copies before inserting the loop.
+  if (!constrainSelectedInstRegOperands(*MIB, *TII, *TRI, *this))
+    report_fatal_error("failed to constrain selected store intrinsic");
+
+  return MIB;
+}
+
 void AMDGPURegisterBankInfo::applyMappingImpl(
     const OperandsMapper &OpdMapper) const {
   MachineInstr &MI = OpdMapper.getMI();
@@ -1405,7 +1627,8 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
     break;
   }
   case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
-    switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
+    auto IntrID = MI.getIntrinsicID();
+    switch (IntrID) {
     case Intrinsic::amdgcn_buffer_load: {
       executeInWaterfallLoop(MI, MRI, { 2 });
       return;
@@ -1424,9 +1647,39 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
       constrainOpWithReadfirstlane(MI, MRI, 2); // M0
       return;
     }
-    default:
+    case Intrinsic::amdgcn_raw_buffer_load:
+    case Intrinsic::amdgcn_raw_buffer_load_format:
+    case Intrinsic::amdgcn_raw_tbuffer_load:
+    case Intrinsic::amdgcn_raw_buffer_store:
+    case Intrinsic::amdgcn_raw_buffer_store_format:
+    case Intrinsic::amdgcn_raw_tbuffer_store: {
+      applyDefaultMapping(OpdMapper);
+      executeInWaterfallLoop(MI, MRI, {2, 4});
+      return;
+    }
+    case Intrinsic::amdgcn_struct_buffer_load:
+    case Intrinsic::amdgcn_struct_buffer_store:
+    case Intrinsic::amdgcn_struct_tbuffer_load:
+    case Intrinsic::amdgcn_struct_tbuffer_store: {
+      applyDefaultMapping(OpdMapper);
+      executeInWaterfallLoop(MI, MRI, {2, 5});
+      return;
+    }
+    default: {
+      if (const AMDGPU::RsrcIntrinsic *RSrcIntrin =
+              AMDGPU::lookupRsrcIntrinsic(IntrID)) {
+        // Non-images can have complications from operands that allow both SGPR
+        // and VGPR. For now it's too complicated to figure out the final opcode
+        // to derive the register bank from the MCInstrDesc.
+        if (RSrcIntrin->IsImage) {
+          applyMappingImage(MI, OpdMapper, MRI, RSrcIntrin->RsrcArg);
+          return;
+        }
+      }
+
       break;
     }
+    }
     break;
   }
   case AMDGPU::G_LOAD:
@@ -1532,6 +1785,45 @@ AMDGPURegisterBankInfo::getDefaultMappingAllVGPR(const MachineInstr &MI) const {
 }
 
 const RegisterBankInfo::InstructionMapping &
+AMDGPURegisterBankInfo::getImageMapping(const MachineRegisterInfo &MRI,
+                                        const MachineInstr &MI,
+                                        int RsrcIdx) const {
+  // The reported argument index is relative to the IR intrinsic call arguments,
+  // so we need to shift by the number of defs and the intrinsic ID.
+  RsrcIdx += MI.getNumExplicitDefs() + 1;
+
+  const int NumOps = MI.getNumOperands();
+  SmallVector<const ValueMapping *, 8> OpdsMapping(NumOps);
+
+  // TODO: Should packed/unpacked D16 difference be reported here as part of
+  // the value mapping?
+  for (int I = 0; I != NumOps; ++I) {
+    if (!MI.getOperand(I).isReg())
+      continue;
+
+    Register OpReg = MI.getOperand(I).getReg();
+    unsigned Size = getSizeInBits(OpReg, MRI, *TRI);
+
+    // FIXME: Probably need a new intrinsic register bank searchable table to
+    // handle arbitrary intrinsics easily.
+    //
+    // If this has a sampler, it immediately follows rsrc.
+    const bool MustBeSGPR = I == RsrcIdx || I == RsrcIdx + 1;
+
+    if (MustBeSGPR) {
+      // If this must be an SGPR, so we must report whatever it is as legal.
+      unsigned NewBank = getRegBankID(OpReg, MRI, *TRI, AMDGPU::SGPRRegBankID);
+      OpdsMapping[I] = AMDGPU::getValueMapping(NewBank, Size);
+    } else {
+      // Some operands must be VGPR, and these are easy to copy to.
+      OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
+    }
+  }
+
+  return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping), NumOps);
+}
+
+const RegisterBankInfo::InstructionMapping &
 AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const {
 
   const MachineFunction &MF = *MI.getParent()->getParent();
@@ -1577,11 +1869,31 @@ AMDGPURegisterBankInfo::getRegBankID(Register Reg,
   return Bank ? Bank->getID() : Default;
 }
 
+
 static unsigned regBankUnion(unsigned RB0, unsigned RB1) {
   return (RB0 == AMDGPU::SGPRRegBankID && RB1 == AMDGPU::SGPRRegBankID) ?
     AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
 }
 
+const RegisterBankInfo::ValueMapping *
+AMDGPURegisterBankInfo::getSGPROpMapping(Register Reg,
+                                         const MachineRegisterInfo &MRI,
+                                         const TargetRegisterInfo &TRI) const {
+  // Lie and claim anything is legal, even though this needs to be an SGPR
+  // applyMapping will have to deal with it as a waterfall loop.
+  unsigned Bank = getRegBankID(Reg, MRI, TRI, AMDGPU::SGPRRegBankID);
+  unsigned Size = getSizeInBits(Reg, MRI, TRI);
+  return AMDGPU::getValueMapping(Bank, Size);
+}
+
+const RegisterBankInfo::ValueMapping *
+AMDGPURegisterBankInfo::getVGPROpMapping(Register Reg,
+                                         const MachineRegisterInfo &MRI,
+                                         const TargetRegisterInfo &TRI) const {
+  unsigned Size = getSizeInBits(Reg, MRI, TRI);
+  return AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
+}
+
 ///
 /// This function must return a legal mapping, because
 /// AMDGPURegisterBankInfo::getInstrAlternativeMappings() is not called
@@ -1748,7 +2060,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
 
     LLVM_FALLTHROUGH;
   }
-
   case AMDGPU::G_GEP:
   case AMDGPU::G_ADD:
   case AMDGPU::G_SUB:
@@ -1764,8 +2075,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
   case AMDGPU::G_SADDE:
   case AMDGPU::G_USUBE:
   case AMDGPU::G_SSUBE:
-  case AMDGPU::G_UMULH:
-  case AMDGPU::G_SMULH:
   case AMDGPU::G_SMIN:
   case AMDGPU::G_SMAX:
   case AMDGPU::G_UMIN:
@@ -1799,6 +2108,13 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
   case AMDGPU::G_INTRINSIC_TRUNC:
   case AMDGPU::G_INTRINSIC_ROUND:
     return getDefaultMappingVOP(MI);
+  case AMDGPU::G_UMULH:
+  case AMDGPU::G_SMULH: {
+    if (MF.getSubtarget<GCNSubtarget>().hasScalarMulHiInsts() &&
+        isSALUMapping(MI))
+      return getDefaultMappingSOP(MI);
+    return getDefaultMappingVOP(MI);
+  }
   case AMDGPU::G_IMPLICIT_DEF: {
     unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
     OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
@@ -2072,6 +2388,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     case Intrinsic::amdgcn_wwm:
     case Intrinsic::amdgcn_wqm:
       return getDefaultMappingVOP(MI);
+    case Intrinsic::amdgcn_ds_swizzle:
     case Intrinsic::amdgcn_ds_permute:
     case Intrinsic::amdgcn_ds_bpermute:
     case Intrinsic::amdgcn_update_dpp:
@@ -2193,9 +2510,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     break;
   }
   case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
-    switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
-    default:
-      return getInvalidInstructionMapping();
+    auto IntrID = MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID();
+    switch (IntrID) {
     case Intrinsic::amdgcn_s_getreg:
     case Intrinsic::amdgcn_s_memtime:
     case Intrinsic::amdgcn_s_memrealtime:
@@ -2235,18 +2551,11 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
       OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
       break;
     case Intrinsic::amdgcn_exp:
-      OpdsMapping[0] = nullptr; // IntrinsicID
-      // FIXME: These are immediate values which can't be read from registers.
-      OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
-      OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
       // FIXME: Could we support packed types here?
       OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
       OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
       OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
       OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
-      // FIXME: These are immediate values which can't be read from registers.
-      OpdsMapping[7] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
-      OpdsMapping[8] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
       break;
     case Intrinsic::amdgcn_buffer_load: {
       Register RSrc = MI.getOperand(2).getReg();   // SGPR
@@ -2298,6 +2607,54 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
       OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
       break;
     }
+    case Intrinsic::amdgcn_raw_buffer_load:
+    case Intrinsic::amdgcn_raw_tbuffer_load: {
+      // FIXME: Should make intrinsic ID the last operand of the instruction,
+      // then this would be the same as store
+      OpdsMapping[0] = getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
+      OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
+      OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
+      OpdsMapping[4] = getSGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
+      break;
+    }
+    case Intrinsic::amdgcn_raw_buffer_store:
+    case Intrinsic::amdgcn_raw_buffer_store_format:
+    case Intrinsic::amdgcn_raw_tbuffer_store: {
+      OpdsMapping[1] = getVGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI);
+      OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
+      OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
+      OpdsMapping[4] = getSGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
+      break;
+    }
+    case Intrinsic::amdgcn_struct_buffer_load:
+    case Intrinsic::amdgcn_struct_tbuffer_load: {
+      OpdsMapping[0] = getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
+      OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
+      OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
+      OpdsMapping[4] = getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
+      OpdsMapping[5] = getSGPROpMapping(MI.getOperand(5).getReg(), MRI, *TRI);
+      break;
+    }
+    case Intrinsic::amdgcn_struct_buffer_store:
+    case Intrinsic::amdgcn_struct_tbuffer_store: {
+      OpdsMapping[1] = getVGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI);
+      OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
+      OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
+      OpdsMapping[4] = getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
+      OpdsMapping[5] = getSGPROpMapping(MI.getOperand(5).getReg(), MRI, *TRI);
+      break;
+    }
+    default:
+      if (const AMDGPU::RsrcIntrinsic *RSrcIntrin =
+              AMDGPU::lookupRsrcIntrinsic(IntrID)) {
+        // Non-images can have complications from operands that allow both SGPR
+        // and VGPR. For now it's too complicated to figure out the final opcode
+        // to derive the register bank from the MCInstrDesc.
+        if (RSrcIntrin->IsImage)
+          return getImageMapping(MRI, MI, RSrcIntrin->RsrcArg);
+      }
+
+      return getInvalidInstructionMapping();
     }
     break;
   }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
index f3a96e2a612..584b23c0c22 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
@@ -23,7 +23,9 @@
 namespace llvm {
 
 class LLT;
+class GCNSubtarget;
 class MachineIRBuilder;
+class SIInstrInfo;
 class SIRegisterInfo;
 class TargetRegisterInfo;
 
@@ -36,9 +38,15 @@ protected:
 #include "AMDGPUGenRegisterBank.inc"
 };
 class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo {
+  const GCNSubtarget &Subtarget;
   const SIRegisterInfo *TRI;
+  const SIInstrInfo *TII;
 
-  void executeInWaterfallLoop(MachineInstr &MI,
+  bool executeInWaterfallLoop(MachineIRBuilder &B,
+                              MachineInstr &MI,
+                              MachineRegisterInfo &MRI,
+                              ArrayRef<unsigned> OpIndices) const;
+  bool executeInWaterfallLoop(MachineInstr &MI,
                               MachineRegisterInfo &MRI,
                               ArrayRef<unsigned> OpIndices) const;
 
@@ -47,6 +55,19 @@ class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo {
   bool applyMappingWideLoad(MachineInstr &MI,
                             const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper,
                             MachineRegisterInfo &MRI) const;
+  bool
+  applyMappingImage(MachineInstr &MI,
+                    const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper,
+                    MachineRegisterInfo &MRI, int RSrcIdx) const;
+
+  Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI,
+                          Register Reg) const;
+
+  std::pair<Register, unsigned>
+  splitBufferOffsets(MachineIRBuilder &B, Register Offset) const;
+
+  MachineInstr *selectStoreIntrinsic(MachineIRBuilder &B,
+                                     MachineInstr &MI) const;
 
   /// See RegisterBankInfo::applyMapping.
   void applyMappingImpl(const OperandsMapper &OpdMapper) const override;
@@ -58,6 +79,16 @@ class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo {
                         const TargetRegisterInfo &TRI,
                         unsigned Default = AMDGPU::VGPRRegBankID) const;
 
+  // Return a value mapping for an operand that is required to be an SGPR.
+  const ValueMapping *getSGPROpMapping(Register Reg,
+                                       const MachineRegisterInfo &MRI,
+                                       const TargetRegisterInfo &TRI) const;
+
+  // Return a value mapping for an operand that is required to be a VGPR.
+  const ValueMapping *getVGPROpMapping(Register Reg,
+                                       const MachineRegisterInfo &MRI,
+                                       const TargetRegisterInfo &TRI) const;
+
   /// Split 64-bit value \p Reg into two 32-bit halves and populate them into \p
   /// Regs. This appropriately sets the regbank of the new registers.
   void split64BitValueForMapping(MachineIRBuilder &B,
@@ -90,8 +121,13 @@ class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo {
   const InstructionMapping &getDefaultMappingVOP(const MachineInstr &MI) const;
   const InstructionMapping &getDefaultMappingAllVGPR(
     const MachineInstr &MI) const;
+
+  const InstructionMapping &getImageMapping(const MachineRegisterInfo &MRI,
+                                            const MachineInstr &MI,
+                                            int RsrcIdx) const;
+
 public:
-  AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI);
+  AMDGPURegisterBankInfo(const GCNSubtarget &STI);
 
   unsigned copyCost(const RegisterBank &A, const RegisterBank &B,
                     unsigned Size) const override;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 3e556b46fa5..b1069d4a319 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -283,7 +283,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
   MaxWavesPerEU = AMDGPU::IsaInfo::getMaxWavesPerEU(this);
   CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering()));
   Legalizer.reset(new AMDGPULegalizerInfo(*this, TM));
-  RegBankInfo.reset(new AMDGPURegisterBankInfo(*getRegisterInfo()));
+  RegBankInfo.reset(new AMDGPURegisterBankInfo(*this));
   InstSelector.reset(new AMDGPUInstructionSelector(
   *this, *static_cast<AMDGPURegisterBankInfo *>(RegBankInfo.get()), TM));
 }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index c9d305a5bba..bf7cf86bc42 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -555,6 +555,10 @@ public:
     return GFX9Insts;
   }
 
+  bool hasScalarMulHiInsts() const {
+    return GFX9Insts;
+  }
+
   TrapHandlerAbi getTrapHandlerAbi() const {
     return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
   }
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index 1af12721b64..40887a3c56e 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -1135,29 +1135,29 @@ def extract_dlc : SDNodeXForm<imm, [{
 multiclass MUBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
                                   string opcode> {
   def : GCNPat<
-    (vt (name v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
-              imm:$cachepolicy, 0)),
+    (vt (name v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset,
+              timm:$cachepolicy, 0)),
     (!cast<MUBUF_Pseudo>(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset),
       (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0,  (extract_dlc $cachepolicy))
   >;
 
   def : GCNPat<
-    (vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
-              imm:$cachepolicy, 0)),
+    (vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset,
+              timm:$cachepolicy, 0)),
     (!cast<MUBUF_Pseudo>(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset),
       (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0,  (extract_dlc $cachepolicy))
   >;
 
   def : GCNPat<
-    (vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
-              imm:$cachepolicy, imm)),
+    (vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset,
+              timm:$cachepolicy, timm)),
     (!cast<MUBUF_Pseudo>(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset),
       (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0,  (extract_dlc $cachepolicy))
   >;
 
   def : GCNPat<
-    (vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, imm:$offset,
-              imm:$cachepolicy, imm)),
+    (vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset,
+              timm:$cachepolicy, timm)),
     (!cast<MUBUF_Pseudo>(opcode # _BOTHEN)
       (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
       $rsrc, $soffset, (as_i16imm $offset),
@@ -1210,31 +1210,31 @@ defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_ushort,  i32, "BUFFER_LOAD_USHORT">;
 multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
                                    string opcode> {
   def : GCNPat<
-    (name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
-              imm:$cachepolicy, 0),
+    (name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset,
+              timm:$cachepolicy, 0),
     (!cast<MUBUF_Pseudo>(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset, (as_i16imm $offset),
       (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0,  (extract_dlc $cachepolicy))
   >;
 
   def : GCNPat<
-    (name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
-              imm:$cachepolicy, 0),
+    (name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset,
+              timm:$cachepolicy, 0),
     (!cast<MUBUF_Pseudo>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset,
       (as_i16imm $offset), (extract_glc $cachepolicy),
       (extract_slc $cachepolicy), 0,  (extract_dlc $cachepolicy))
   >;
 
   def : GCNPat<
-    (name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
-              imm:$cachepolicy, imm),
+    (name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset,
+              timm:$cachepolicy, timm),
     (!cast<MUBUF_Pseudo>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset,
       (as_i16imm $offset), (extract_glc $cachepolicy),
       (extract_slc $cachepolicy), 0,  (extract_dlc $cachepolicy))
   >;
 
   def : GCNPat<
-    (name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, imm:$offset,
-              imm:$cachepolicy, imm),
+    (name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset,
+              timm:$cachepolicy, timm),
     (!cast<MUBUF_Pseudo>(opcode # _BOTHEN_exact)
       $vdata,
       (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
@@ -1291,32 +1291,32 @@ multiclass BufferAtomicPatterns<SDPatternOperator name, ValueType vt,
                                 string opcode> {
   def : GCNPat<
     (vt (name vt:$vdata_in, v4i32:$rsrc, 0,
-          0, i32:$soffset, imm:$offset,
-          imm:$cachepolicy, 0)),
+          0, i32:$soffset, timm:$offset,
+          timm:$cachepolicy, 0)),
     (!cast<MUBUF_Pseudo>(opcode # _OFFSET_RTN) $vdata_in, $rsrc, $soffset,
                                         (as_i16imm $offset), (extract_slc $cachepolicy))
   >;
 
   def : GCNPat<
     (vt (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex,
-          0, i32:$soffset, imm:$offset,
-          imm:$cachepolicy, imm)),
+          0, i32:$soffset, timm:$offset,
+          timm:$cachepolicy, timm)),
     (!cast<MUBUF_Pseudo>(opcode # _IDXEN_RTN) $vdata_in, $vindex, $rsrc, $soffset,
                                        (as_i16imm $offset), (extract_slc $cachepolicy))
   >;
 
   def : GCNPat<
     (vt (name vt:$vdata_in, v4i32:$rsrc, 0,
-          i32:$voffset, i32:$soffset, imm:$offset,
-          imm:$cachepolicy, 0)),
+          i32:$voffset, i32:$soffset, timm:$offset,
+          timm:$cachepolicy, 0)),
     (!cast<MUBUF_Pseudo>(opcode # _OFFEN_RTN) $vdata_in, $voffset, $rsrc, $soffset,
                                        (as_i16imm $offset), (extract_slc $cachepolicy))
   >;
 
   def : GCNPat<
     (vt (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex,
-          i32:$voffset, i32:$soffset, imm:$offset,
-          imm:$cachepolicy, imm)),
+          i32:$voffset, i32:$soffset, timm:$offset,
+          timm:$cachepolicy, timm)),
     (!cast<MUBUF_Pseudo>(opcode # _BOTHEN_RTN)
       $vdata_in,
       (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
@@ -1353,32 +1353,32 @@ multiclass BufferAtomicPatterns_NO_RTN<SDPatternOperator name, ValueType vt,
                                        string opcode> {
   def : GCNPat<
     (name vt:$vdata_in, v4i32:$rsrc, 0,
-          0, i32:$soffset, imm:$offset,
-          imm:$cachepolicy, 0),
+          0, i32:$soffset, timm:$offset,
+          timm:$cachepolicy, 0),
     (!cast<MUBUF_Pseudo>(opcode # _OFFSET) $vdata_in, $rsrc, $soffset,
                                         (as_i16imm $offset), (extract_slc $cachepolicy))
   >;
 
   def : GCNPat<
     (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex,
-          0, i32:$soffset, imm:$offset,
-          imm:$cachepolicy, imm),
+          0, i32:$soffset, timm:$offset,
+          timm:$cachepolicy, timm),
     (!cast<MUBUF_Pseudo>(opcode # _IDXEN) $vdata_in, $vindex, $rsrc, $soffset,
                                        (as_i16imm $offset), (extract_slc $cachepolicy))
   >;
 
   def : GCNPat<
     (name vt:$vdata_in, v4i32:$rsrc, 0,
-          i32:$voffset, i32:$soffset, imm:$offset,
-          imm:$cachepolicy, 0),
+          i32:$voffset, i32:$soffset, timm:$offset,
+          timm:$cachepolicy, 0),
     (!cast<MUBUF_Pseudo>(opcode # _OFFEN) $vdata_in, $voffset, $rsrc, $soffset,
                                        (as_i16imm $offset), (extract_slc $cachepolicy))
   >;
 
   def : GCNPat<
     (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex,
-          i32:$voffset, i32:$soffset, imm:$offset,
-          imm:$cachepolicy, imm),
+          i32:$voffset, i32:$soffset, timm:$offset,
+          timm:$cachepolicy, timm),
     (!cast<MUBUF_Pseudo>(opcode # _BOTHEN)
       $vdata_in,
       (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
@@ -1392,8 +1392,8 @@ defm : BufferAtomicPatterns_NO_RTN<SIbuffer_atomic_pk_fadd, v2f16, "BUFFER_ATOMI
 def : GCNPat<
   (SIbuffer_atomic_cmpswap
       i32:$data, i32:$cmp, v4i32:$rsrc, 0,
-      0, i32:$soffset, imm:$offset,
-      imm:$cachepolicy, 0),
+      0, i32:$soffset, timm:$offset,
+      timm:$cachepolicy, 0),
   (EXTRACT_SUBREG
     (BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN
       (REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
@@ -1404,8 +1404,8 @@ def : GCNPat<
 def : GCNPat<
   (SIbuffer_atomic_cmpswap
       i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex,
-      0, i32:$soffset, imm:$offset,
-      imm:$cachepolicy, imm),
+      0, i32:$soffset, timm:$offset,
+      timm:$cachepolicy, timm),
   (EXTRACT_SUBREG
     (BUFFER_ATOMIC_CMPSWAP_IDXEN_RTN
       (REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
@@ -1416,8 +1416,8 @@ def : GCNPat<
 def : GCNPat<
   (SIbuffer_atomic_cmpswap
       i32:$data, i32:$cmp, v4i32:$rsrc, 0,
-      i32:$voffset, i32:$soffset, imm:$offset,
-      imm:$cachepolicy, 0),
+      i32:$voffset, i32:$soffset, timm:$offset,
+      timm:$cachepolicy, 0),
   (EXTRACT_SUBREG
     (BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN
       (REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
@@ -1428,8 +1428,8 @@ def : GCNPat<
 def : GCNPat<
   (SIbuffer_atomic_cmpswap
       i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex,
-      i32:$voffset, i32:$soffset, imm:$offset,
-      imm:$cachepolicy, imm),
+      i32:$voffset, i32:$soffset, timm:$offset,
+      timm:$cachepolicy, timm),
   (EXTRACT_SUBREG
     (BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN
       (REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
@@ -1642,32 +1642,32 @@ defm : MUBUFScratchStorePat <BUFFER_STORE_BYTE_D16_HI_OFFEN, BUFFER_STORE_BYTE_D
 multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
                                   string opcode> {
   def : GCNPat<
-    (vt (name v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
-              imm:$format, imm:$cachepolicy, 0)),
+    (vt (name v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset,
+              timm:$format, timm:$cachepolicy, 0)),
     (!cast<MTBUF_Pseudo>(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset),
       (as_i8imm $format),
       (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
   >;
 
   def : GCNPat<
-    (vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
-              imm:$format, imm:$cachepolicy, imm)),
+    (vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset,
+              timm:$format, timm:$cachepolicy, timm)),
     (!cast<MTBUF_Pseudo>(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset),
       (as_i8imm $format),
       (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
   >;
 
   def : GCNPat<
-    (vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
-              imm:$format, imm:$cachepolicy, 0)),
+    (vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset,
+              timm:$format, timm:$cachepolicy, 0)),
     (!cast<MTBUF_Pseudo>(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset),
       (as_i8imm $format),
       (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
   >;
 
   def : GCNPat<
-    (vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, imm:$offset,
-              imm:$format, imm:$cachepolicy, imm)),
+    (vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset,
+              timm:$format, timm:$cachepolicy, timm)),
     (!cast<MTBUF_Pseudo>(opcode # _BOTHEN)
       (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
       $rsrc, $soffset, (as_i16imm $offset),
@@ -1700,24 +1700,24 @@ let SubtargetPredicate = HasPackedD16VMem in {
 multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
                                    string opcode> {
   def : GCNPat<
-    (name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
-          imm:$format, imm:$cachepolicy, 0),
+    (name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset,
+          timm:$format, timm:$cachepolicy, 0),
     (!cast<MTBUF_Pseudo>(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset,
       (as_i16imm $offset), (as_i8imm $format),
       (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
   >;
 
   def : GCNPat<
-    (name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
-          imm:$format, imm:$cachepolicy, imm),
+    (name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset,
+          timm:$format, timm:$cachepolicy, timm),
     (!cast<MTBUF_Pseudo>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset,
       (as_i16imm $offset), (as_i8imm $format),
       (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
   >;
 
   def : GCNPat<
-    (name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
-          imm:$format, imm:$cachepolicy, 0),
+    (name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset,
+          timm:$format, timm:$cachepolicy, 0),
     (!cast<MTBUF_Pseudo>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset,
       (as_i16imm $offset), (as_i8imm $format),
       (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
@@ -1725,7 +1725,7 @@ multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
 
   def : GCNPat<
     (name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset,
-          imm:$offset, imm:$format, imm:$cachepolicy, imm),
+          timm:$offset, timm:$format, timm:$cachepolicy, timm),
     (!cast<MTBUF_Pseudo>(opcode # _BOTHEN_exact)
       $vdata,
       (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td
index e1d53ae71a8..86c2db92acb 100644
--- a/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -603,7 +603,7 @@ def DS_ADD_SRC2_F32 : DS_1A<"ds_add_src2_f32">;
 //===----------------------------------------------------------------------===//
 
 def : GCNPat <
-  (int_amdgcn_ds_swizzle i32:$src, imm:$offset16),
+  (int_amdgcn_ds_swizzle i32:$src, timm:$offset16),
   (DS_SWIZZLE_B32 $src, (as_i16imm $offset16), (i1 0))
 >;
 
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index e459a3a4e0d..690114297d6 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -5666,14 +5666,14 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc,
   SDVTList VTList = DAG.getVTList({LoadVT, MVT::Glue});
   unsigned CachePolicy = cast<ConstantSDNode>(GLC)->getZExtValue();
   SDValue Ops[] = {
-      DAG.getEntryNode(),                         // Chain
-      Rsrc,                                       // rsrc
-      DAG.getConstant(0, DL, MVT::i32),           // vindex
-      {},                                         // voffset
-      {},                                         // soffset
-      {},                                         // offset
-      DAG.getConstant(CachePolicy, DL, MVT::i32), // cachepolicy
-      DAG.getConstant(0, DL, MVT::i1),            // idxen
+      DAG.getEntryNode(),                               // Chain
+      Rsrc,                                             // rsrc
+      DAG.getConstant(0, DL, MVT::i32),                 // vindex
+      {},                                               // voffset
+      {},                                               // soffset
+      {},                                               // offset
+      DAG.getTargetConstant(CachePolicy, DL, MVT::i32), // cachepolicy
+      DAG.getTargetConstant(0, DL, MVT::i1),            // idxen
   };
 
   // Use the alignment to ensure that the required offsets will fit into the
@@ -5682,7 +5682,7 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc,
 
   uint64_t InstOffset = cast<ConstantSDNode>(Ops[5])->getZExtValue();
   for (unsigned i = 0; i < NumLoads; ++i) {
-    Ops[5] = DAG.getConstant(InstOffset + 16 * i, DL, MVT::i32);
+    Ops[5] = DAG.getTargetConstant(InstOffset + 16 * i, DL, MVT::i32);
     Loads.push_back(DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_LOAD, DL, VTList,
                                             Ops, LoadVT, MMO));
   }
@@ -5894,12 +5894,12 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
         Op.getOperand(1), // Src0
         Op.getOperand(2), // Attrchan
         Op.getOperand(3), // Attr
-        DAG.getConstant(0, DL, MVT::i32), // $src0_modifiers
+        DAG.getTargetConstant(0, DL, MVT::i32), // $src0_modifiers
         S, // Src2 - holds two f16 values selected by high
-        DAG.getConstant(0, DL, MVT::i32), // $src2_modifiers
+        DAG.getTargetConstant(0, DL, MVT::i32), // $src2_modifiers
         Op.getOperand(4), // high
-        DAG.getConstant(0, DL, MVT::i1), // $clamp
-        DAG.getConstant(0, DL, MVT::i32) // $omod
+        DAG.getTargetConstant(0, DL, MVT::i1), // $clamp
+        DAG.getTargetConstant(0, DL, MVT::i32) // $omod
       };
       return DAG.getNode(AMDGPUISD::INTERP_P1LV_F16, DL, MVT::f32, Ops);
     } else {
@@ -5908,10 +5908,10 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
         Op.getOperand(1), // Src0
         Op.getOperand(2), // Attrchan
         Op.getOperand(3), // Attr
-        DAG.getConstant(0, DL, MVT::i32), // $src0_modifiers
+        DAG.getTargetConstant(0, DL, MVT::i32), // $src0_modifiers
         Op.getOperand(4), // high
-        DAG.getConstant(0, DL, MVT::i1), // $clamp
-        DAG.getConstant(0, DL, MVT::i32), // $omod
+        DAG.getTargetConstant(0, DL, MVT::i1), // $clamp
+        DAG.getTargetConstant(0, DL, MVT::i32), // $omod
         Glue
       };
       return DAG.getNode(AMDGPUISD::INTERP_P1LL_F16, DL, MVT::f32, Ops);
@@ -5924,11 +5924,11 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
       Op.getOperand(2), // Src0
       Op.getOperand(3), // Attrchan
       Op.getOperand(4), // Attr
-      DAG.getConstant(0, DL, MVT::i32), // $src0_modifiers
+      DAG.getTargetConstant(0, DL, MVT::i32), // $src0_modifiers
       Op.getOperand(1), // Src2
-      DAG.getConstant(0, DL, MVT::i32), // $src2_modifiers
+      DAG.getTargetConstant(0, DL, MVT::i32), // $src2_modifiers
       Op.getOperand(5), // high
-      DAG.getConstant(0, DL, MVT::i1), // $clamp
+      DAG.getTargetConstant(0, DL, MVT::i1), // $clamp
       Glue
     };
     return DAG.getNode(AMDGPUISD::INTERP_P2_F16, DL, MVT::f16, Ops);
@@ -6234,8 +6234,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
       SDValue(),        // voffset -- will be set by setBufferOffsets
       SDValue(),        // soffset -- will be set by setBufferOffsets
       SDValue(),        // offset -- will be set by setBufferOffsets
-      DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
-      DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
+      DAG.getTargetConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
+      DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen
     };
 
     setBufferOffsets(Op.getOperand(4), DAG, &Ops[3]);
@@ -6272,7 +6272,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
       Op.getOperand(4), // soffset
       Offsets.second,   // offset
       Op.getOperand(5), // cachepolicy
-      DAG.getConstant(0, DL, MVT::i1), // idxen
+      DAG.getTargetConstant(0, DL, MVT::i1), // idxen
     };
 
     return lowerIntrinsicLoad(cast<MemSDNode>(Op), IsFormat, DAG, Ops);
@@ -6290,7 +6290,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
       Op.getOperand(5), // soffset
       Offsets.second,   // offset
       Op.getOperand(6), // cachepolicy
-      DAG.getConstant(1, DL, MVT::i1), // idxen
+      DAG.getTargetConstant(1, DL, MVT::i1), // idxen
     };
 
     return lowerIntrinsicLoad(cast<MemSDNode>(Op), IsFormat, DAG, Ops);
@@ -6313,9 +6313,9 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
       Op.getOperand(4),  // voffset
       Op.getOperand(5),  // soffset
       Op.getOperand(6),  // offset
-      DAG.getConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format
-      DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
-      DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
+      DAG.getTargetConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format
+      DAG.getTargetConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
+      DAG.getTargetConstant(IdxEn, DL, MVT::i1) // idxen
     };
 
     if (LoadVT.getScalarType() == MVT::f16)
@@ -6339,7 +6339,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
       Offsets.second,    // offset
       Op.getOperand(5),  // format
       Op.getOperand(6),  // cachepolicy
-      DAG.getConstant(0, DL, MVT::i1), // idxen
+      DAG.getTargetConstant(0, DL, MVT::i1), // idxen
     };
 
     if (LoadVT.getScalarType() == MVT::f16)
@@ -6363,7 +6363,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
       Offsets.second,    // offset
       Op.getOperand(6),  // format
       Op.getOperand(7),  // cachepolicy
-      DAG.getConstant(1, DL, MVT::i1), // idxen
+      DAG.getTargetConstant(1, DL, MVT::i1), // idxen
     };
 
     if (LoadVT.getScalarType() == MVT::f16)
@@ -6395,8 +6395,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
       SDValue(),        // voffset -- will be set by setBufferOffsets
       SDValue(),        // soffset -- will be set by setBufferOffsets
       SDValue(),        // offset -- will be set by setBufferOffsets
-      DAG.getConstant(Slc << 1, DL, MVT::i32), // cachepolicy
-      DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
+      DAG.getTargetConstant(Slc << 1, DL, MVT::i32), // cachepolicy
+      DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen
     };
     setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]);
     EVT VT = Op.getValueType();
@@ -6464,7 +6464,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
       Op.getOperand(5), // soffset
       Offsets.second,   // offset
       Op.getOperand(6), // cachepolicy
-      DAG.getConstant(0, DL, MVT::i1), // idxen
+      DAG.getTargetConstant(0, DL, MVT::i1), // idxen
     };
     EVT VT = Op.getValueType();
 
@@ -6537,7 +6537,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
       Op.getOperand(6), // soffset
       Offsets.second,   // offset
       Op.getOperand(7), // cachepolicy
-      DAG.getConstant(1, DL, MVT::i1), // idxen
+      DAG.getTargetConstant(1, DL, MVT::i1), // idxen
     };
     EVT VT = Op.getValueType();
 
@@ -6602,8 +6602,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
       SDValue(),        // voffset -- will be set by setBufferOffsets
       SDValue(),        // soffset -- will be set by setBufferOffsets
       SDValue(),        // offset -- will be set by setBufferOffsets
-      DAG.getConstant(Slc << 1, DL, MVT::i32), // cachepolicy
-      DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
+      DAG.getTargetConstant(Slc << 1, DL, MVT::i32), // cachepolicy
+      DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen
     };
     setBufferOffsets(Op.getOperand(6), DAG, &Ops[5]);
     EVT VT = Op.getValueType();
@@ -6624,7 +6624,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
       Op.getOperand(6), // soffset
       Offsets.second,   // offset
       Op.getOperand(7), // cachepolicy
-      DAG.getConstant(0, DL, MVT::i1), // idxen
+      DAG.getTargetConstant(0, DL, MVT::i1), // idxen
     };
     EVT VT = Op.getValueType();
     auto *M = cast<MemSDNode>(Op);
@@ -6644,7 +6644,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
       Op.getOperand(7), // soffset
       Offsets.second,   // offset
       Op.getOperand(8), // cachepolicy
-      DAG.getConstant(1, DL, MVT::i1), // idxen
+      DAG.getTargetConstant(1, DL, MVT::i1), // idxen
     };
     EVT VT = Op.getValueType();
     auto *M = cast<MemSDNode>(Op);
@@ -6806,9 +6806,9 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
       Op.getOperand(5),  // voffset
       Op.getOperand(6),  // soffset
       Op.getOperand(7),  // offset
-      DAG.getConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format
-      DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
-      DAG.getConstant(IdxEn, DL, MVT::i1), // idexen
+      DAG.getTargetConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format
+      DAG.getTargetConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
+      DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idexen
     };
     unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 :
                            AMDGPUISD::TBUFFER_STORE_FORMAT;
@@ -6833,7 +6833,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
       Offsets.second,    // offset
       Op.getOperand(7),  // format
       Op.getOperand(8),  // cachepolicy
-      DAG.getConstant(1, DL, MVT::i1), // idexen
+      DAG.getTargetConstant(1, DL, MVT::i1), // idexen
     };
     unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 :
                            AMDGPUISD::TBUFFER_STORE_FORMAT;
@@ -6858,7 +6858,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
       Offsets.second,    // offset
       Op.getOperand(6),  // format
       Op.getOperand(7),  // cachepolicy
-      DAG.getConstant(0, DL, MVT::i1), // idexen
+      DAG.getTargetConstant(0, DL, MVT::i1), // idexen
     };
     unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 :
                            AMDGPUISD::TBUFFER_STORE_FORMAT;
@@ -6886,8 +6886,8 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
       SDValue(), // voffset -- will be set by setBufferOffsets
       SDValue(), // soffset -- will be set by setBufferOffsets
       SDValue(), // offset -- will be set by setBufferOffsets
-      DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
-      DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
+      DAG.getTargetConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
+      DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen
     };
     setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]);
     unsigned Opc = IntrinsicID == Intrinsic::amdgcn_buffer_store ?
@@ -6932,7 +6932,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
       Op.getOperand(5), // soffset
       Offsets.second,   // offset
       Op.getOperand(6), // cachepolicy
-      DAG.getConstant(0, DL, MVT::i1), // idxen
+      DAG.getTargetConstant(0, DL, MVT::i1), // idxen
     };
     unsigned Opc =
         IsFormat ? AMDGPUISD::BUFFER_STORE_FORMAT : AMDGPUISD::BUFFER_STORE;
@@ -6976,7 +6976,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
       Op.getOperand(6), // soffset
       Offsets.second,   // offset
       Op.getOperand(7), // cachepolicy
-      DAG.getConstant(1, DL, MVT::i1), // idxen
+      DAG.getTargetConstant(1, DL, MVT::i1), // idxen
     };
     unsigned Opc = IntrinsicID == Intrinsic::amdgcn_struct_buffer_store ?
                    AMDGPUISD::BUFFER_STORE : AMDGPUISD::BUFFER_STORE_FORMAT;
@@ -7005,8 +7005,8 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
       SDValue(),        // voffset -- will be set by setBufferOffsets
       SDValue(),        // soffset -- will be set by setBufferOffsets
       SDValue(),        // offset -- will be set by setBufferOffsets
-      DAG.getConstant(Slc << 1, DL, MVT::i32), // cachepolicy
-      DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
+      DAG.getTargetConstant(Slc << 1, DL, MVT::i32), // cachepolicy
+      DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen
     };
     setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]);
     EVT VT = Op.getOperand(2).getValueType();
@@ -7084,7 +7084,7 @@ std::pair<SDValue, SDValue> SITargetLowering::splitBufferOffsets(
       Overflow += ImmOffset;
       ImmOffset = 0;
     }
-    C1 = cast<ConstantSDNode>(DAG.getConstant(ImmOffset, DL, MVT::i32));
+    C1 = cast<ConstantSDNode>(DAG.getTargetConstant(ImmOffset, DL, MVT::i32));
     if (Overflow) {
       auto OverflowVal = DAG.getConstant(Overflow, DL, MVT::i32);
       if (!N0)
@@ -7098,7 +7098,7 @@ std::pair<SDValue, SDValue> SITargetLowering::splitBufferOffsets(
   if (!N0)
     N0 = DAG.getConstant(0, DL, MVT::i32);
   if (!C1)
-    C1 = cast<ConstantSDNode>(DAG.getConstant(0, DL, MVT::i32));
+    C1 = cast<ConstantSDNode>(DAG.getTargetConstant(0, DL, MVT::i32));
   return {N0, SDValue(C1, 0)};
 }
 
@@ -7115,7 +7115,7 @@ void SITargetLowering::setBufferOffsets(SDValue CombinedOffset,
     if (AMDGPU::splitMUBUFOffset(Imm, SOffset, ImmOffset, Subtarget, Align)) {
       Offsets[0] = DAG.getConstant(0, DL, MVT::i32);
       Offsets[1] = DAG.getConstant(SOffset, DL, MVT::i32);
-      Offsets[2] = DAG.getConstant(ImmOffset, DL, MVT::i32);
+      Offsets[2] = DAG.getTargetConstant(ImmOffset, DL, MVT::i32);
       return;
     }
   }
@@ -7128,13 +7128,13 @@ void SITargetLowering::setBufferOffsets(SDValue CombinedOffset,
                                                 Subtarget, Align)) {
       Offsets[0] = N0;
       Offsets[1] = DAG.getConstant(SOffset, DL, MVT::i32);
-      Offsets[2] = DAG.getConstant(ImmOffset, DL, MVT::i32);
+      Offsets[2] = DAG.getTargetConstant(ImmOffset, DL, MVT::i32);
       return;
     }
   }
   Offsets[0] = CombinedOffset;
   Offsets[1] = DAG.getConstant(0, DL, MVT::i32);
-  Offsets[2] = DAG.getConstant(0, DL, MVT::i32);
+  Offsets[2] = DAG.getTargetConstant(0, DL, MVT::i32);
 }
 
 // Handle 8 bit and 16 bit buffer loads
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 3ae0da3545c..3c0cc0051c6 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -43,8 +43,8 @@ multiclass V_INTERP_P1_F32_m : VINTRP_m <
   (outs VINTRPDst:$vdst),
   (ins VGPR_32:$vsrc, Attr:$attr, AttrChan:$attrchan),
   "v_interp_p1_f32$vdst, $vsrc, $attr$attrchan",
-  [(set f32:$vdst, (AMDGPUinterp_p1 f32:$vsrc, (i32 imm:$attrchan),
-                                               (i32 imm:$attr)))]
+  [(set f32:$vdst, (AMDGPUinterp_p1 f32:$vsrc, (i32 timm:$attrchan),
+                                               (i32 timm:$attr)))]
 >;
 
 let OtherPredicates = [has32BankLDS] in {
@@ -66,8 +66,8 @@ defm V_INTERP_P2_F32 : VINTRP_m <
   (outs VINTRPDst:$vdst),
   (ins VGPR_32:$src0, VGPR_32:$vsrc, Attr:$attr, AttrChan:$attrchan),
   "v_interp_p2_f32$vdst, $vsrc, $attr$attrchan",
-  [(set f32:$vdst, (AMDGPUinterp_p2 f32:$src0, f32:$vsrc, (i32 imm:$attrchan),
-                                                          (i32 imm:$attr)))]>;
+  [(set f32:$vdst, (AMDGPUinterp_p2 f32:$src0, f32:$vsrc, (i32 timm:$attrchan),
+                                                          (i32 timm:$attr)))]>;
 
 } // End DisableEncoding = "$src0", Constraints = "$src0 = $vdst"
 
@@ -76,8 +76,8 @@ defm V_INTERP_MOV_F32 : VINTRP_m <
   (outs VINTRPDst:$vdst),
   (ins InterpSlot:$vsrc, Attr:$attr, AttrChan:$attrchan),
   "v_interp_mov_f32$vdst, $vsrc, $attr$attrchan",
-  [(set f32:$vdst, (AMDGPUinterp_mov (i32 imm:$vsrc), (i32 imm:$attrchan),
-                                     (i32 imm:$attr)))]>;
+  [(set f32:$vdst, (AMDGPUinterp_mov (i32 imm:$vsrc), (i32 timm:$attrchan),
+                                     (i32 timm:$attr)))]>;
 
 } // End Uses = [M0, EXEC]
 
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 0eb01434deb..2cd4e1cbc07 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -1090,7 +1090,7 @@ def S_WAKEUP : SOPP <0x00000003, (ins), "s_wakeup"> {
 
 let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in
 def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "s_waitcnt $simm16",
-    [(int_amdgcn_s_waitcnt UIMM16bit:$simm16)]>;
+    [(int_amdgcn_s_waitcnt timm:$simm16)]>;
 def S_SETHALT : SOPP <0x0000000d, (ins i16imm:$simm16), "s_sethalt $simm16">;
 def S_SETKILL : SOPP <0x0000000b, (ins i16imm:$simm16), "s_setkill $simm16">;
 
@@ -1099,7 +1099,7 @@ def S_SETKILL : SOPP <0x0000000b, (ins i16imm:$simm16), "s_setkill $simm16">;
 // maximum reported is 960 cycles, so 960 / 64 = 15 max, so is the
 // maximum really 15 on VI?
 def S_SLEEP : SOPP <0x0000000e, (ins i32imm:$simm16),
-  "s_sleep $simm16", [(int_amdgcn_s_sleep SIMM16bit:$simm16)]> {
+  "s_sleep $simm16", [(int_amdgcn_s_sleep timm:$simm16)]> {
   let hasSideEffects = 1;
   let mayLoad = 1;
   let mayStore = 1;
@@ -1110,10 +1110,10 @@ def S_SETPRIO : SOPP <0x0000000f, (ins i16imm:$simm16), "s_setprio $simm16">;
 let Uses = [EXEC, M0] in {
 // FIXME: Should this be mayLoad+mayStore?
 def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16), "s_sendmsg $simm16",
-  [(int_amdgcn_s_sendmsg (i32 imm:$simm16), M0)]>;
+  [(int_amdgcn_s_sendmsg (i32 timm:$simm16), M0)]>;
 
 def S_SENDMSGHALT : SOPP <0x00000011, (ins SendMsgImm:$simm16), "s_sendmsghalt $simm16",
-  [(int_amdgcn_s_sendmsghalt (i32 imm:$simm16), M0)]>;
+  [(int_amdgcn_s_sendmsghalt (i32 timm:$simm16), M0)]>;
 
 } // End Uses = [EXEC, M0]
 
@@ -1125,13 +1125,13 @@ def S_ICACHE_INV : SOPP <0x00000013, (ins), "s_icache_inv"> {
   let simm16 = 0;
 }
 def S_INCPERFLEVEL : SOPP <0x00000014, (ins i32imm:$simm16), "s_incperflevel $simm16",
-  [(int_amdgcn_s_incperflevel SIMM16bit:$simm16)]> {
+  [(int_amdgcn_s_incperflevel timm:$simm16)]> {
   let hasSideEffects = 1;
   let mayLoad = 1;
   let mayStore = 1;
 }
 def S_DECPERFLEVEL : SOPP <0x00000015, (ins i32imm:$simm16), "s_decperflevel $simm16",
-  [(int_amdgcn_s_decperflevel SIMM16bit:$simm16)]> {
+  [(int_amdgcn_s_decperflevel timm:$simm16)]> {
   let hasSideEffects = 1;
   let mayLoad = 1;
   let mayStore = 1;
@@ -1180,7 +1180,7 @@ let SubtargetPredicate = isGFX10Plus in {
 // S_GETREG_B32 Intrinsic Pattern.
 //===----------------------------------------------------------------------===//
 def : GCNPat <
-  (int_amdgcn_s_getreg imm:$simm16),
+  (int_amdgcn_s_getreg timm:$simm16),
   (S_GETREG_B32 (as_i16imm $simm16))
 >;
 
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 76abda9218f..bea0c7bd080 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -841,16 +841,16 @@ def V_MOVRELD_B32_V16 : V_MOVRELD_B32_pseudo<VReg_512>;
 let OtherPredicates = [isGFX8GFX9] in {
 
 def : GCNPat <
-  (i32 (int_amdgcn_mov_dpp i32:$src, imm:$dpp_ctrl, imm:$row_mask, imm:$bank_mask,
-                      imm:$bound_ctrl)),
+  (i32 (int_amdgcn_mov_dpp i32:$src, timm:$dpp_ctrl, timm:$row_mask, timm:$bank_mask,
+                      timm:$bound_ctrl)),
   (V_MOV_B32_dpp $src, $src, (as_i32imm $dpp_ctrl),
                        (as_i32imm $row_mask), (as_i32imm $bank_mask),
                        (as_i1imm $bound_ctrl))
 >;
 
 def : GCNPat <
-  (i32 (int_amdgcn_update_dpp i32:$old, i32:$src, imm:$dpp_ctrl, imm:$row_mask,
-                      imm:$bank_mask, imm:$bound_ctrl)),
+  (i32 (int_amdgcn_update_dpp i32:$old, i32:$src, timm:$dpp_ctrl, timm:$row_mask,
+                      timm:$bank_mask, timm:$bound_ctrl)),
   (V_MOV_B32_dpp $old, $src, (as_i32imm $dpp_ctrl),
                        (as_i32imm $row_mask), (as_i32imm $bank_mask),
                        (as_i1imm $bound_ctrl))
@@ -911,21 +911,21 @@ defm V_SCREEN_PARTITION_4SE_B32 : VOP1_Real_gfx9 <0x37>;
 
 let OtherPredicates = [isGFX10Plus] in {
 def : GCNPat <
-  (i32 (int_amdgcn_mov_dpp8 i32:$src, imm:$dpp8)),
+  (i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)),
   (V_MOV_B32_dpp8_gfx10 $src, $src, (as_i32imm $dpp8), (i32 DPP8Mode.FI_0))
 >;
 
 def : GCNPat <
-  (i32 (int_amdgcn_mov_dpp i32:$src, imm:$dpp_ctrl, imm:$row_mask, imm:$bank_mask,
-                      imm:$bound_ctrl)),
+  (i32 (int_amdgcn_mov_dpp i32:$src, timm:$dpp_ctrl, timm:$row_mask, timm:$bank_mask,
+                      timm:$bound_ctrl)),
   (V_MOV_B32_dpp_gfx10 $src, $src, (as_i32imm $dpp_ctrl),
                        (as_i32imm $row_mask), (as_i32imm $bank_mask),
                        (as_i1imm $bound_ctrl), (i32 0))
 >;
 
 def : GCNPat <
-  (i32 (int_amdgcn_update_dpp i32:$old, i32:$src, imm:$dpp_ctrl, imm:$row_mask,
-                              imm:$bank_mask, imm:$bound_ctrl)),
+  (i32 (int_amdgcn_update_dpp i32:$old, i32:$src, timm:$dpp_ctrl, timm:$row_mask,
+                              timm:$bank_mask, timm:$bound_ctrl)),
   (V_MOV_B32_dpp_gfx10 $old, $src, (as_i32imm $dpp_ctrl),
                        (as_i32imm $row_mask), (as_i32imm $bank_mask),
                        (as_i1imm $bound_ctrl), (i32 0))
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 753f63d3a74..605425972b1 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -112,7 +112,7 @@ class getVOP3ClampPat<VOPProfile P, SDPatternOperator node> {
 
 class getVOP3MAIPat<VOPProfile P, SDPatternOperator node> {
   list<dag> ret = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2,
-                                        imm:$cbsz, imm:$abid, imm:$blgp))];
+                                        timm:$cbsz, timm:$abid, timm:$blgp))];
 }
 
 class VOP3Inst<string OpName, VOPProfile P, SDPatternOperator node = null_frag, bit VOP3Only = 0> :
@@ -453,13 +453,13 @@ let FPDPRounding = 1 in {
 def V_MAD_F16 : VOP3Inst <"v_mad_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fmad>;
 let Uses = [M0, EXEC] in {
 def V_INTERP_P2_F16 : VOP3Interp <"v_interp_p2_f16", VOP3_INTERP16<[f16, f32, i32, f32]>,
-       [(set f16:$vdst, (AMDGPUinterp_p2_f16 f32:$src0, (i32 imm:$attrchan),
-                                                        (i32 imm:$attr),
-                                                        (i32 imm:$src0_modifiers),
+       [(set f16:$vdst, (AMDGPUinterp_p2_f16 f32:$src0, (i32 timm:$attrchan),
+                                                        (i32 timm:$attr),
+                                                        (i32 timm:$src0_modifiers),
                                                         (f32 VRegSrc_32:$src2),
-                                                        (i32 imm:$src2_modifiers),
-                                                        (i1 imm:$high),
-                                                        (i1 imm:$clamp)))]>;
+                                                        (i32 timm:$src2_modifiers),
+                                                        (i1 timm:$high),
+                                                        (i1 timm:$clamp)))]>;
 } // End Uses = [M0, EXEC]
 } // End FPDPRounding = 1
 } // End renamedInGFX9 = 1
@@ -478,21 +478,21 @@ def V_INTERP_P2_F16_gfx9 : VOP3Interp <"v_interp_p2_f16_gfx9", VOP3_INTERP16<[f1
 
 let Uses = [M0, EXEC], FPDPRounding = 1 in {
 def V_INTERP_P1LL_F16 : VOP3Interp <"v_interp_p1ll_f16", VOP3_INTERP16<[f32, f32, i32, untyped]>,
-       [(set f32:$vdst, (AMDGPUinterp_p1ll_f16 f32:$src0, (i32 imm:$attrchan),
-                                                          (i32 imm:$attr),
-                                                          (i32 imm:$src0_modifiers),
-                                                          (i1 imm:$high),
-                                                          (i1 imm:$clamp),
-                                                          (i32 imm:$omod)))]>;
+       [(set f32:$vdst, (AMDGPUinterp_p1ll_f16 f32:$src0, (i32 timm:$attrchan),
+                                                          (i32 timm:$attr),
+                                                          (i32 timm:$src0_modifiers),
+                                                          (i1 timm:$high),
+                                                          (i1 timm:$clamp),
+                                                          (i32 timm:$omod)))]>;
 def V_INTERP_P1LV_F16 : VOP3Interp <"v_interp_p1lv_f16", VOP3_INTERP16<[f32, f32, i32, f16]>,
-       [(set f32:$vdst, (AMDGPUinterp_p1lv_f16 f32:$src0, (i32 imm:$attrchan),
-                                                          (i32 imm:$attr),
-                                                          (i32 imm:$src0_modifiers),
+       [(set f32:$vdst, (AMDGPUinterp_p1lv_f16 f32:$src0, (i32 timm:$attrchan),
+                                                          (i32 timm:$attr),
+                                                          (i32 timm:$src0_modifiers),
                                                           (f32 VRegSrc_32:$src2),
-                                                          (i32 imm:$src2_modifiers),
-                                                          (i1 imm:$high),
-                                                          (i1 imm:$clamp),
-                                                          (i32 imm:$omod)))]>;
+                                                          (i32 timm:$src2_modifiers),
+                                                          (i1 timm:$high),
+                                                          (i1 timm:$clamp),
+                                                          (i32 timm:$omod)))]>;
 } // End Uses = [M0, EXEC], FPDPRounding = 1
 
 } // End SubtargetPredicate = Has16BitInsts, isCommutable = 1
@@ -642,11 +642,11 @@ let SubtargetPredicate = isGFX10Plus in {
   } // End $vdst = $vdst_in, DisableEncoding $vdst_in
 
   def : GCNPat<
-    (int_amdgcn_permlane16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, imm:$fi, imm:$bc),
+    (int_amdgcn_permlane16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, timm:$fi, timm:$bc),
     (V_PERMLANE16_B32 (as_i1imm $fi), $src0, (as_i1imm $bc), $src1, 0, $src2, $vdst_in)
   >;
   def : GCNPat<
-    (int_amdgcn_permlanex16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, imm:$fi, imm:$bc),
+    (int_amdgcn_permlanex16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, timm:$fi, timm:$bc),
     (V_PERMLANEX16_B32 (as_i1imm $fi), $src0, (as_i1imm $bc), $src1, 0, $src2, $vdst_in)
   >;
 } // End SubtargetPredicate = isGFX10Plus
author	Matt Arsenault <Matthew.Arsenault@amd.com>	2019-09-19 16:26:14 +0000
committer	Matt Arsenault <Matthew.Arsenault@amd.com>	2019-09-19 16:26:14 +0000
commit	3ecab8e4555aee0b4aa10c413696a67f55948c39 (patch)
tree	312b6fd8b3a9ebc14217e7e19a00d428e3f3f8ff /llvm/lib/Target/AMDGPU
parent	e0900f285bb532790ed494df901f87c5c8b904da (diff)
download	bcm5719-llvm-3ecab8e4555aee0b4aa10c413696a67f55948c39.tar.gz bcm5719-llvm-3ecab8e4555aee0b4aa10c413696a67f55948c39.zip