AMDGPU: Fix folding immediates into mac src2

Whether it is legal or not needs to check for the instruction it will be replaced with. llvm-svn: 291711
author: Matt Arsenault <Matthew.Arsenault@amd.com> 2017-01-11 22:00:02 +0000
committer: Matt Arsenault <Matthew.Arsenault@amd.com> 2017-01-11 22:00:02 +0000
commit: 69e3001b84b5c8704640b32bc41252bcd081092e (patch)
tree: e98ccc7ef35ad1550ccdb3c6d2eefbf995251d66 /llvm/lib
parent: ff69405213d6f1ca8c1dc89d814cf24600ce3ac1 (diff)
download: bcm5719-llvm-69e3001b84b5c8704640b32bc41252bcd081092e.tar.gz
bcm5719-llvm-69e3001b84b5c8704640b32bc41252bcd081092e.zip
1 files changed, 30 insertions, 2 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 6ef6b6c7675..a5c0d4923d6 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -99,6 +99,34 @@ char SIFoldOperands::ID = 0;
 
 char &llvm::SIFoldOperandsID = SIFoldOperands::ID;
 
+// Wrapper around isInlineConstant that understands special cases when
+// instruction types are replaced during operand folding.
+static bool isInlineConstantIfFolded(const SIInstrInfo *TII,
+                                     const MachineInstr &UseMI,
+                                     unsigned OpNo,
+                                     const MachineOperand &OpToFold) {
+  if (TII->isInlineConstant(UseMI, OpNo, OpToFold))
+    return true;
+
+  unsigned Opc = UseMI.getOpcode();
+  switch (Opc) {
+  case AMDGPU::V_MAC_F32_e64:
+  case AMDGPU::V_MAC_F16_e64: {
+    // Special case for mac. Since this is replaced with mad when folded into
+    // src2, we need to check the legality for the final instruction.
+    int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
+    if (static_cast<int>(OpNo) == Src2Idx) {
+      bool IsF32 = Opc == AMDGPU::V_MAC_F32_e64;
+      const MCInstrDesc &MadDesc
+        = TII->get(IsF32 ? AMDGPU::V_MAD_F32 : AMDGPU::V_MAD_F16);
+      return TII->isInlineConstant(OpToFold, MadDesc.OpInfo[OpNo].OperandType);
+    }
+  }
+  default:
+    return false;
+  }
+}
+
 FunctionPass *llvm::createSIFoldOperandsPass() {
   return new SIFoldOperands();
 }
@@ -171,7 +199,7 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
     unsigned Opc = MI->getOpcode();
     if ((Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64) &&
         (int)OpNo == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)) {
-      bool IsF32  = Opc == AMDGPU::V_MAC_F32_e64;
+      bool IsF32 = Opc == AMDGPU::V_MAC_F32_e64;
 
       // Check if changing this to a v_mad_{f16, f32} instruction will allow us
       // to fold the operand.
@@ -611,7 +639,7 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI,
       // Folding immediates with more than one use will increase program size.
       // FIXME: This will also reduce register usage, which may be better
       // in some cases. A better heuristic is needed.
-      if (TII->isInlineConstant(*UseMI, OpNo, OpToFold)) {
+      if (isInlineConstantIfFolded(TII, *UseMI, OpNo, OpToFold)) {
         foldOperand(OpToFold, UseMI, OpNo, FoldList, CopiesToReplace);
       } else {
         if (++NumLiteralUses == 1) {
author	Matt Arsenault <Matthew.Arsenault@amd.com>	2017-01-11 22:00:02 +0000
committer	Matt Arsenault <Matthew.Arsenault@amd.com>	2017-01-11 22:00:02 +0000
commit	69e3001b84b5c8704640b32bc41252bcd081092e (patch)
tree	e98ccc7ef35ad1550ccdb3c6d2eefbf995251d66 /llvm/lib
parent	ff69405213d6f1ca8c1dc89d814cf24600ce3ac1 (diff)
download	bcm5719-llvm-69e3001b84b5c8704640b32bc41252bcd081092e.tar.gz bcm5719-llvm-69e3001b84b5c8704640b32bc41252bcd081092e.zip