5 files changed, 55 insertions, 6 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index 560db41eff6..d77f608fb22 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -49,6 +49,13 @@ def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">;
 def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
 def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>;
 
+// 32-bit VALU immediate operand that uses the constant bus.
+def u32kimm : Operand<i32> {
+  let OperandNamespace = "AMDGPU";
+  let OperandType = "OPERAND_KIMM32";
+  let PrintMethod = "printU32ImmOperand";
+}
+
 let OperandType = "OPERAND_IMMEDIATE" in {
 
 def u32imm : Operand<i32> {
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index 23cde1c4bc1..7a5ec05c26c 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -50,7 +50,12 @@ namespace AMDGPU {
     /// Operand with register or 32-bit immediate
     OPERAND_REG_IMM32 = MCOI::OPERAND_FIRST_TARGET,
     /// Operand with register or inline constant
-    OPERAND_REG_INLINE_C
+    OPERAND_REG_INLINE_C,
+
+    /// Operand with 32-bit immediate that uses the constant bus. The standard
+    /// OPERAND_IMMEDIATE should be used for special immediates such as source
+    /// modifiers.
+    OPERAND_KIMM32
   };
 }
 }
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 159fb72f89f..fc7aca22cef 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -1695,6 +1695,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
       }
       break;
     case MCOI::OPERAND_IMMEDIATE:
+    case AMDGPU::OPERAND_KIMM32:
       // Check if this operand is an immediate.
       // FrameIndex operands will be replaced by immediates, so they are
       // allowed.
@@ -1731,6 +1732,10 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
 
     unsigned ConstantBusCount = 0;
+
+    if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1)
+      ++ConstantBusCount;
+
     unsigned SGPRUsed = findImplicitSGPRRead(MI);
     if (SGPRUsed != AMDGPU::NoRegister)
       ++ConstantBusCount;
@@ -2020,9 +2025,12 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
       if (i == OpIdx)
         continue;
       const MachineOperand &Op = MI.getOperand(i);
-      if (Op.isReg() &&
-          (Op.getReg() != SGPRUsed.Reg || Op.getSubReg() != SGPRUsed.SubReg) &&
-          usesConstantBus(MRI, Op, getOpSize(MI, i))) {
+      if (Op.isReg()) {
+        if ((Op.getReg() != SGPRUsed.Reg || Op.getSubReg() != SGPRUsed.SubReg) &&
+            usesConstantBus(MRI, Op, getOpSize(MI, i))) {
+          return false;
+        }
+      } else if (InstDesc.OpInfo[i].OperandType == AMDGPU::OPERAND_KIMM32) {
         return false;
       }
     }
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index f9697057a1d..708d97e9773 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1582,12 +1582,12 @@ def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>;
 
 def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>;
 def VOP_MADAK : VOPProfile <[f32, f32, f32, f32]> {
-  field dag Ins32 = (ins VCSrc_32:$src0, VGPR_32:$src1, u32imm:$imm);
+  field dag Ins32 = (ins VCSrc_32:$src0, VGPR_32:$src1, u32kimm:$imm);
   field string Asm32 = "$vdst, $src0, $src1, $imm";
   field bit HasExt = 0;
 }
 def VOP_MADMK : VOPProfile <[f32, f32, f32, f32]> {
-  field dag Ins32 = (ins VCSrc_32:$src0, u32imm:$imm, VGPR_32:$src1);
+  field dag Ins32 = (ins VCSrc_32:$src0, u32kimm:$imm, VGPR_32:$src1);
   field string Asm32 = "$vdst, $src0, $imm, $src1";
   field bit HasExt = 0;
 }
diff --git a/llvm/test/CodeGen/AMDGPU/madak.ll b/llvm/test/CodeGen/AMDGPU/madak.ll
index a394db61bfb..6ea1202ac50 100644
--- a/llvm/test/CodeGen/AMDGPU/madak.ll
+++ b/llvm/test/CodeGen/AMDGPU/madak.ll
@@ -191,3 +191,32 @@ define void @no_madak_src1_modifier_f32(float addrspace(1)* noalias %out, float
   store float %madak, float addrspace(1)* %out.gep, align 4
   ret void
 }
+
+; SIFoldOperands should not fold the SGPR copy into the instruction
+; because the implicit immediate already uses the constant bus.
+; GCN-LABEL: {{^}}madak_constant_bus_violation:
+; GCN: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xa|0x28}}
+; GCN: v_mov_b32_e32 [[SGPR0_VCOPY:v[0-9]+]], [[SGPR0]]
+; GCN: buffer_load_dword [[VGPR:v[0-9]+]]
+; GCN: v_madak_f32_e32 [[MADAK:v[0-9]+]], 0.5, [[SGPR0_VCOPY]], 0x42280000
+; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], [[VGPR]], [[MADAK]]
+; GCN: buffer_store_dword [[MUL]]
+define void @madak_constant_bus_violation(i32 %arg1, float %sgpr0, float %sgpr1) #0 {
+bb:
+  %tmp = icmp eq i32 %arg1, 0
+  br i1 %tmp, label %bb3, label %bb4
+
+bb3:
+  store volatile float 0.0, float addrspace(1)* undef
+  br label %bb4
+
+bb4:
+  %vgpr = load volatile float, float addrspace(1)* undef
+  %tmp0 = fmul float %sgpr0, 0.5
+  %tmp1 = fadd float %tmp0, 42.0
+  %tmp2 = fmul float %tmp1, %vgpr
+  store volatile float %tmp2, float addrspace(1)* undef, align 4
+  ret void
+}
+
+attributes #0 = { nounwind}