Improved the interface of methods commuting operands, improved X86-FMA3 mem-folding&coalescing.

Patch by Slava Klochkov (vyacheslav.n.klochkov@intel.com) Differential Revision: http://reviews.llvm.org/D11370 llvm-svn: 248735
author: Andrew Kaylor <andrew.kaylor@intel.com> 2015-09-28 20:33:22 +0000
committer: Andrew Kaylor <andrew.kaylor@intel.com> 2015-09-28 20:33:22 +0000
commit: 16c4da03d5c8741a4ebbc4bec76c3bfa0c0259c3 (patch)
tree: 8b6857bf14737d4660d5a57b2355328a22caa14d /llvm/lib/Target/AMDGPU
parent: 69dc971527faad8ccfa754ce7d855908b7a3f923 (diff)
download: bcm5719-llvm-16c4da03d5c8741a4ebbc4bec76c3bfa0c0259c3.tar.gz
bcm5719-llvm-16c4da03d5c8741a4ebbc4bec76c3bfa0c0259c3.zip
3 files changed, 55 insertions, 21 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 3a08b695a22..ccb6cb7a0a8 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -165,8 +165,8 @@ static bool tryAddToFoldList(std::vector<FoldCandidate> &FoldList,
 
     // Operand is not legal, so try to commute the instruction to
     // see if this makes it possible to fold.
-    unsigned CommuteIdx0;
-    unsigned CommuteIdx1;
+    unsigned CommuteIdx0 = TargetInstrInfo::CommuteAnyOperandIndex;
+    unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
     bool CanCommute = TII->findCommutedOpIndices(MI, CommuteIdx0, CommuteIdx1);
 
     if (CanCommute) {
@@ -176,7 +176,16 @@ static bool tryAddToFoldList(std::vector<FoldCandidate> &FoldList,
         OpNo = CommuteIdx0;
     }
 
-    if (!CanCommute || !TII->commuteInstruction(MI))
+    // One of operands might be an Imm operand, and OpNo may refer to it after
+    // the call of commuteInstruction() below. Such situations are avoided
+    // here explicitly as OpNo must be a register operand to be a candidate
+    // for memory folding.
+    if (CanCommute && (!MI->getOperand(CommuteIdx0).isReg() ||
+                       !MI->getOperand(CommuteIdx1).isReg()))
+      return false;
+
+    if (!CanCommute ||
+        !TII->commuteInstruction(MI, false, CommuteIdx0, CommuteIdx1))
       return false;
 
     if (!TII->isOperandLegal(MI, OpNo, OpToFold))
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 63fc0c7f74c..8fd065d95ab 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -782,8 +782,17 @@ bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
   return true;
 }
 
-MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
-                                              bool NewMI) const {
+/// Commutes the operands in the given instruction.
+/// The commutable operands are specified by their indices OpIdx0 and OpIdx1.
+///
+/// Do not call this method for a non-commutable instruction or for
+/// non-commutable pair of operand indices OpIdx0 and OpIdx1.
+/// Even though the instruction is commutable, the method may still
+/// fail to commute the operands, null pointer is returned in such cases.
+MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr *MI,
+                                                  bool NewMI,
+                                                  unsigned OpIdx0,
+                                                  unsigned OpIdx1) const {
   int CommutedOpcode = commuteOpcode(*MI);
   if (CommutedOpcode == -1)
     return nullptr;
@@ -796,6 +805,13 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
 
   int Src1Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
                                            AMDGPU::OpName::src1);
+
+  if ((OpIdx0 != static_cast<unsigned>(Src0Idx) ||
+       OpIdx1 != static_cast<unsigned>(Src1Idx)) &&
+      (OpIdx0 != static_cast<unsigned>(Src1Idx) ||
+       OpIdx1 != static_cast<unsigned>(Src0Idx)))
+    return nullptr;
+
   MachineOperand &Src1 = MI->getOperand(Src1Idx);
 
   // Make sure it's legal to commute operands for VOP2.
@@ -841,7 +857,7 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
     Src1.ChangeToRegister(Reg, false);
     Src1.setSubReg(SubReg);
   } else {
-    MI = TargetInstrInfo::commuteInstruction(MI, NewMI);
+    MI = TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx0, OpIdx1);
   }
 
   if (MI)
@@ -854,8 +870,8 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
 // between the true commutable operands, and the base
 // TargetInstrInfo::commuteInstruction uses it.
 bool SIInstrInfo::findCommutedOpIndices(MachineInstr *MI,
-                                        unsigned &SrcOpIdx1,
-                                        unsigned &SrcOpIdx2) const {
+                                        unsigned &SrcOpIdx0,
+                                        unsigned &SrcOpIdx1) const {
   const MCInstrDesc &MCID = MI->getDesc();
   if (!MCID.isCommutable())
     return false;
@@ -866,7 +882,8 @@ bool SIInstrInfo::findCommutedOpIndices(MachineInstr *MI,
     return false;
 
   // FIXME: Workaround TargetInstrInfo::commuteInstruction asserting on
-  // immediate.
+  // immediate. Also, immediate src0 operand is not handled in
+  // SIInstrInfo::commuteInstruction();
   if (!MI->getOperand(Src0Idx).isReg())
     return false;
 
@@ -874,18 +891,22 @@ bool SIInstrInfo::findCommutedOpIndices(MachineInstr *MI,
   if (Src1Idx == -1)
     return false;
 
-  if (!MI->getOperand(Src1Idx).isReg())
-    return false;
-
-  // If any source modifiers are set, the generic instruction commuting won't
-  // understand how to copy the source modifiers.
-  if (hasModifiersSet(*MI, AMDGPU::OpName::src0_modifiers) ||
-      hasModifiersSet(*MI, AMDGPU::OpName::src1_modifiers))
+  MachineOperand &Src1 = MI->getOperand(Src1Idx);
+  if (Src1.isImm()) {
+    // SIInstrInfo::commuteInstruction() does support commuting the immediate
+    // operand src1 in 2 and 3 operand instructions.
+    if (!isVOP2(MI->getOpcode()) && !isVOP3(MI->getOpcode()))
+      return false;
+  } else if (Src1.isReg()) {
+    // If any source modifiers are set, the generic instruction commuting won't
+    // understand how to copy the source modifiers.
+    if (hasModifiersSet(*MI, AMDGPU::OpName::src0_modifiers) ||
+        hasModifiersSet(*MI, AMDGPU::OpName::src1_modifiers))
+      return false;
+  } else
     return false;
 
-  SrcOpIdx1 = Src0Idx;
-  SrcOpIdx2 = Src1Idx;
-  return true;
+  return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
 }
 
 MachineInstr *SIInstrInfo::buildMovInstr(MachineBasicBlock *MBB,
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index d1d964fd16b..474c26f03d0 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -61,6 +61,12 @@ private:
 
   unsigned findUsedSGPR(const MachineInstr *MI, int OpIndices[3]) const;
 
+protected:
+  MachineInstr *commuteInstructionImpl(MachineInstr *MI,
+                                       bool NewMI,
+                                       unsigned OpIdx0,
+                                       unsigned OpIdx1) const override;
+
 public:
   explicit SIInstrInfo(const AMDGPUSubtarget &st);
 
@@ -117,8 +123,6 @@ public:
   LLVM_READONLY
   int commuteOpcode(const MachineInstr &MI) const;
 
-  MachineInstr *commuteInstruction(MachineInstr *MI,
-                                   bool NewMI = false) const override;
   bool findCommutedOpIndices(MachineInstr *MI,
                              unsigned &SrcOpIdx1,
                              unsigned &SrcOpIdx2) const override;
author	Andrew Kaylor <andrew.kaylor@intel.com>	2015-09-28 20:33:22 +0000
committer	Andrew Kaylor <andrew.kaylor@intel.com>	2015-09-28 20:33:22 +0000
commit	16c4da03d5c8741a4ebbc4bec76c3bfa0c0259c3 (patch)
tree	8b6857bf14737d4660d5a57b2355328a22caa14d /llvm/lib/Target/AMDGPU
parent	69dc971527faad8ccfa754ce7d855908b7a3f923 (diff)
download	bcm5719-llvm-16c4da03d5c8741a4ebbc4bec76c3bfa0c0259c3.tar.gz bcm5719-llvm-16c4da03d5c8741a4ebbc4bec76c3bfa0c0259c3.zip