diff options
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/CodeGen/MachineCombiner.cpp | 12 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 13 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/TargetInstrInfo.cpp | 6 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 580 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrInfo.h | 5 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp | 6 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h | 1 |
7 files changed, 39 insertions, 584 deletions
diff --git a/llvm/lib/CodeGen/MachineCombiner.cpp b/llvm/lib/CodeGen/MachineCombiner.cpp index 6b5c6ba8250..44601d5e462 100644 --- a/llvm/lib/CodeGen/MachineCombiner.cpp +++ b/llvm/lib/CodeGen/MachineCombiner.cpp @@ -40,7 +40,6 @@ class MachineCombiner : public MachineFunctionPass { const TargetRegisterInfo *TRI; MCSchedModel SchedModel; MachineRegisterInfo *MRI; - MachineLoopInfo *MLI; // Current MachineLoopInfo MachineTraceMetrics *Traces; MachineTraceMetrics::Ensemble *MinInstr; @@ -87,7 +86,6 @@ char &llvm::MachineCombinerID = MachineCombiner::ID; INITIALIZE_PASS_BEGIN(MachineCombiner, "machine-combiner", "Machine InstCombiner", false, false) -INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics) INITIALIZE_PASS_END(MachineCombiner, "machine-combiner", "Machine InstCombiner", false, false) @@ -95,7 +93,6 @@ INITIALIZE_PASS_END(MachineCombiner, "machine-combiner", "Machine InstCombiner", void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addPreserved<MachineDominatorTree>(); - AU.addRequired<MachineLoopInfo>(); AU.addPreserved<MachineLoopInfo>(); AU.addRequired<MachineTraceMetrics>(); AU.addPreserved<MachineTraceMetrics>(); @@ -357,8 +354,6 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { DEBUG(dbgs() << "Combining MBB " << MBB->getName() << "\n"); auto BlockIter = MBB->begin(); - // Check if the block is in a loop. - const MachineLoop *ML = MLI->getLoopFor(MBB); while (BlockIter != MBB->end()) { auto &MI = *BlockIter++; @@ -411,15 +406,11 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { if (!NewInstCount) continue; - bool SubstituteAlways = false; - if (ML && TII->isThroughputPattern(P)) - SubstituteAlways = true; - // Substitute when we optimize for codesize and the new sequence has // fewer instructions OR // the new sequence neither lengthens the critical path nor increases // resource pressure. - if (SubstituteAlways || doSubstitute(NewInstCount, OldInstCount) || + if (doSubstitute(NewInstCount, OldInstCount) || (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs, InstrIdxForVirtReg, P) && preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs))) { @@ -456,7 +447,6 @@ bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) { SchedModel = STI.getSchedModel(); TSchedModel.init(SchedModel, &STI, TII); MRI = &MF.getRegInfo(); - MLI = &getAnalysis<MachineLoopInfo>(); Traces = &getAnalysis<MachineTraceMetrics>(); MinInstr = nullptr; OptSize = MF.getFunction()->optForSize(); diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index ff3fee2b4cb..6c9800824ef 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -24,7 +24,6 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" @@ -86,7 +85,6 @@ namespace { class DAGCombiner { SelectionDAG &DAG; - const SelectionDAGTargetInfo &STI; const TargetLowering &TLI; CombineLevel Level; CodeGenOpt::Level OptLevel; @@ -471,9 +469,8 @@ namespace { public: DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) - : DAG(D), STI(D.getSelectionDAGInfo()), TLI(D.getTargetLoweringInfo()), - Level(BeforeLegalizeTypes), OptLevel(OL), LegalOperations(false), - LegalTypes(false), AA(A) { + : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), + OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) { ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize(); } @@ -7718,9 +7715,6 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { if (!HasFMAD && !HasFMA) return SDValue(); - if (AllowFusion && STI.GenerateFMAsInMachineCombiner(OptLevel)) - return SDValue(); - // Always prefer FMAD to FMA for precision. unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; bool Aggressive = TLI.enableAggressiveFMAFusion(VT); @@ -7904,9 +7898,6 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { if (!HasFMAD && !HasFMA) return SDValue(); - if (AllowFusion && STI.GenerateFMAsInMachineCombiner(OptLevel)) - return SDValue(); - // Always prefer FMAD to FMA for precision. unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; bool Aggressive = TLI.enableAggressiveFMAFusion(VT); diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp index 800ad6d1bb4..86517d9afbc 100644 --- a/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -655,11 +655,7 @@ bool TargetInstrInfo::getMachineCombinerPatterns( return false; } -/// Return true when a code sequence can improve loop throughput. -bool -TargetInstrInfo::isThroughputPattern(MachineCombinerPattern Pattern) const { - return false; -} + /// Attempt the reassociation transformation to reduce critical path length. /// See the above comments before getMachineCombinerPatterns(). void TargetInstrInfo::reassociateOps( diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index fd137db1a0b..b0574f2de75 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -2788,75 +2788,37 @@ static bool isCombineInstrCandidate64(unsigned Opc) { return false; } // -// FP Opcodes that can be combined with a FMUL -static bool isCombineInstrCandidateFP(const MachineInstr &Inst) { - switch (Inst.getOpcode()) { - case AArch64::FADDSrr: - case AArch64::FADDDrr: - case AArch64::FADDv2f32: - case AArch64::FADDv2f64: - case AArch64::FADDv4f32: - case AArch64::FSUBSrr: - case AArch64::FSUBDrr: - case AArch64::FSUBv2f32: - case AArch64::FSUBv2f64: - case AArch64::FSUBv4f32: - return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath; - default: - break; - } - return false; -} -// // Opcodes that can be combined with a MUL static bool isCombineInstrCandidate(unsigned Opc) { return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc)); } -// -// Utility routine that checks if \param MO is defined by an -// \param CombineOpc instruction in the basic block \param MBB -static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, - unsigned CombineOpc, unsigned ZeroReg = 0, - bool CheckZeroReg = false) { +static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO, + unsigned MulOpc, unsigned ZeroReg) { MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); MachineInstr *MI = nullptr; - + // We need a virtual register definition. if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) MI = MRI.getUniqueVRegDef(MO.getReg()); // And it needs to be in the trace (otherwise, it won't have a depth). - if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc) + if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != MulOpc) + return false; + + assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() && + MI->getOperand(1).isReg() && MI->getOperand(2).isReg() && + MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs"); + + // The third input reg must be zero. + if (MI->getOperand(3).getReg() != ZeroReg) return false; + // Must only used by the user we combine with. if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg())) return false; - if (CheckZeroReg) { - assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() && - MI->getOperand(1).isReg() && MI->getOperand(2).isReg() && - MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs"); - // The third input reg must be zero. - if (MI->getOperand(3).getReg() != ZeroReg) - return false; - } - return true; } -// -// Is \param MO defined by an integer multiply and can be combined? -static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO, - unsigned MulOpc, unsigned ZeroReg) { - return canCombine(MBB, MO, MulOpc, ZeroReg, true); -} - -// -// Is \param MO defined by a floating-point multiply and can be combined? -static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO, - unsigned MulOpc) { - return canCombine(MBB, MO, MulOpc); -} - // TODO: There are many more machine instruction opcodes to match: // 1. Other data types (integer, vectors) // 2. Other math / logic operations (xor, or) @@ -2990,230 +2952,7 @@ static bool getMaddPatterns(MachineInstr &Root, } return Found; } -/// Floating-Point Support -/// Find instructions that can be turned into madd. -static bool getFMAPatterns(MachineInstr &Root, - SmallVectorImpl<MachineCombinerPattern> &Patterns) { - - if (!isCombineInstrCandidateFP(Root)) - return 0; - - MachineBasicBlock &MBB = *Root.getParent(); - bool Found = false; - - switch (Root.getOpcode()) { - default: - assert(false && "Unsupported FP instruction in combiner\n"); - break; - case AArch64::FADDSrr: - assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() && - "FADDWrr does not have register operands"); - if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) { - Patterns.push_back(MachineCombinerPattern::FMULADDS_OP1); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv1i32_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP1); - Found = true; - } - if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) { - Patterns.push_back(MachineCombinerPattern::FMULADDS_OP2); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv1i32_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP2); - Found = true; - } - break; - case AArch64::FADDDrr: - if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) { - Patterns.push_back(MachineCombinerPattern::FMULADDD_OP1); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv1i64_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP1); - Found = true; - } - if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) { - Patterns.push_back(MachineCombinerPattern::FMULADDD_OP2); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv1i64_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP2); - Found = true; - } - break; - case AArch64::FADDv2f32: - if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv2i32_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP1); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv2f32)) { - Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP1); - Found = true; - } - if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv2i32_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP2); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv2f32)) { - Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP2); - Found = true; - } - break; - case AArch64::FADDv2f64: - if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv2i64_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP1); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv2f64)) { - Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP1); - Found = true; - } - if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv2i64_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP2); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv2f64)) { - Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP2); - Found = true; - } - break; - case AArch64::FADDv4f32: - if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv4i32_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP1); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv4f32)) { - Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP1); - Found = true; - } - if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv4i32_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP2); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv4f32)) { - Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP2); - Found = true; - } - break; - - case AArch64::FSUBSrr: - if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) { - Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP1); - Found = true; - } - if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) { - Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP2); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv1i32_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLSv1i32_indexed_OP2); - Found = true; - } - break; - case AArch64::FSUBDrr: - if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) { - Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP1); - Found = true; - } - if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) { - Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP2); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv1i64_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLSv1i64_indexed_OP2); - Found = true; - } - break; - case AArch64::FSUBv2f32: - if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv2i32_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP2); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv2f32)) { - Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP2); - Found = true; - } - break; - case AArch64::FSUBv2f64: - if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv2i64_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP2); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv2f64)) { - Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP2); - Found = true; - } - break; - case AArch64::FSUBv4f32: - if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv4i32_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP2); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv4f32)) { - Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP2); - Found = true; - } - break; - } - return Found; -} - -/// Return true when a code sequence can improve throughput. It -/// should be called only for instructions in loops. -/// \param Pattern - combiner pattern -bool -AArch64InstrInfo::isThroughputPattern(MachineCombinerPattern Pattern) const { - switch (Pattern) { - default: - break; - case MachineCombinerPattern::FMULADDS_OP1: - case MachineCombinerPattern::FMULADDS_OP2: - case MachineCombinerPattern::FMULSUBS_OP1: - case MachineCombinerPattern::FMULSUBS_OP2: - case MachineCombinerPattern::FMULADDD_OP1: - case MachineCombinerPattern::FMULADDD_OP2: - case MachineCombinerPattern::FMULSUBD_OP1: - case MachineCombinerPattern::FMULSUBD_OP2: - case MachineCombinerPattern::FMLAv1i32_indexed_OP1: - case MachineCombinerPattern::FMLAv1i32_indexed_OP2: - case MachineCombinerPattern::FMLAv1i64_indexed_OP1: - case MachineCombinerPattern::FMLAv1i64_indexed_OP2: - case MachineCombinerPattern::FMLAv2f32_OP2: - case MachineCombinerPattern::FMLAv2f32_OP1: - case MachineCombinerPattern::FMLAv2f64_OP1: - case MachineCombinerPattern::FMLAv2f64_OP2: - case MachineCombinerPattern::FMLAv2i32_indexed_OP1: - case MachineCombinerPattern::FMLAv2i32_indexed_OP2: - case MachineCombinerPattern::FMLAv2i64_indexed_OP1: - case MachineCombinerPattern::FMLAv2i64_indexed_OP2: - case MachineCombinerPattern::FMLAv4f32_OP1: - case MachineCombinerPattern::FMLAv4f32_OP2: - case MachineCombinerPattern::FMLAv4i32_indexed_OP1: - case MachineCombinerPattern::FMLAv4i32_indexed_OP2: - case MachineCombinerPattern::FMLSv1i32_indexed_OP2: - case MachineCombinerPattern::FMLSv1i64_indexed_OP2: - case MachineCombinerPattern::FMLSv2i32_indexed_OP2: - case MachineCombinerPattern::FMLSv2i64_indexed_OP2: - case MachineCombinerPattern::FMLSv2f32_OP2: - case MachineCombinerPattern::FMLSv2f64_OP2: - case MachineCombinerPattern::FMLSv4i32_indexed_OP2: - case MachineCombinerPattern::FMLSv4f32_OP2: - return true; - } // end switch (Pattern) - return false; -} /// Return true when there is potentially a faster code sequence for an /// instruction chain ending in \p Root. All potential patterns are listed in /// the \p Pattern vector. Pattern should be sorted in priority order since the @@ -3222,35 +2961,28 @@ AArch64InstrInfo::isThroughputPattern(MachineCombinerPattern Pattern) const { bool AArch64InstrInfo::getMachineCombinerPatterns( MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns) const { - // Integer patterns if (getMaddPatterns(Root, Patterns)) return true; - // Floating point patterns - if (getFMAPatterns(Root, Patterns)) - return true; return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns); } -enum class FMAInstKind { Default, Indexed, Accumulator }; -/// genFusedMultiply - Generate fused multiply instructions. -/// This function supports both integer and floating point instructions. -/// A typical example: -/// F|MUL I=A,B,0 -/// F|ADD R,I,C -/// ==> F|MADD R,A,B,C -/// \param Root is the F|ADD instruction +/// genMadd - Generate madd instruction and combine mul and add. +/// Example: +/// MUL I=A,B,0 +/// ADD R,I,C +/// ==> MADD R,A,B,C +/// \param Root is the ADD instruction /// \param [out] InsInstrs is a vector of machine instructions and will /// contain the generated madd instruction /// \param IdxMulOpd is index of operand in Root that is the result of -/// the F|MUL. In the example above IdxMulOpd is 1. -/// \param MaddOpc the opcode fo the f|madd instruction -static MachineInstr * -genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, - const TargetInstrInfo *TII, MachineInstr &Root, - SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd, - unsigned MaddOpc, const TargetRegisterClass *RC, - FMAInstKind kind = FMAInstKind::Default) { +/// the MUL. In the example above IdxMulOpd is 1. +/// \param MaddOpc the opcode fo the madd instruction +static MachineInstr *genMadd(MachineFunction &MF, MachineRegisterInfo &MRI, + const TargetInstrInfo *TII, MachineInstr &Root, + SmallVectorImpl<MachineInstr *> &InsInstrs, + unsigned IdxMulOpd, unsigned MaddOpc, + const TargetRegisterClass *RC) { assert(IdxMulOpd == 1 || IdxMulOpd == 2); unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1; @@ -3272,26 +3004,12 @@ genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, if (TargetRegisterInfo::isVirtualRegister(SrcReg2)) MRI.constrainRegClass(SrcReg2, RC); - MachineInstrBuilder MIB; - if (kind == FMAInstKind::Default) - MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg) - .addReg(SrcReg0, getKillRegState(Src0IsKill)) - .addReg(SrcReg1, getKillRegState(Src1IsKill)) - .addReg(SrcReg2, getKillRegState(Src2IsKill)); - else if (kind == FMAInstKind::Indexed) - MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg) - .addReg(SrcReg2, getKillRegState(Src2IsKill)) - .addReg(SrcReg0, getKillRegState(Src0IsKill)) - .addReg(SrcReg1, getKillRegState(Src1IsKill)) - .addImm(MUL->getOperand(3).getImm()); - else if (kind == FMAInstKind::Accumulator) - MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg) - .addReg(SrcReg2, getKillRegState(Src2IsKill)) - .addReg(SrcReg0, getKillRegState(Src0IsKill)) - .addReg(SrcReg1, getKillRegState(Src1IsKill)); - else - assert(false && "Invalid FMA instruction kind \n"); - // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL) + MachineInstrBuilder MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), + ResultReg) + .addReg(SrcReg0, getKillRegState(Src0IsKill)) + .addReg(SrcReg1, getKillRegState(Src1IsKill)) + .addReg(SrcReg2, getKillRegState(Src2IsKill)); + // Insert the MADD InsInstrs.push_back(MIB); return MUL; } @@ -3379,7 +3097,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence( Opc = AArch64::MADDXrrr; RC = &AArch64::GPR64RegClass; } - MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); break; case MachineCombinerPattern::MULADDW_OP2: case MachineCombinerPattern::MULADDX_OP2: @@ -3394,7 +3112,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence( Opc = AArch64::MADDXrrr; RC = &AArch64::GPR64RegClass; } - MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); + MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; case MachineCombinerPattern::MULADDWI_OP1: case MachineCombinerPattern::MULADDXI_OP1: { @@ -3486,7 +3204,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence( Opc = AArch64::MSUBXrrr; RC = &AArch64::GPR64RegClass; } - MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); + MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; case MachineCombinerPattern::MULSUBWI_OP1: case MachineCombinerPattern::MULSUBXI_OP1: { @@ -3531,234 +3249,6 @@ void AArch64InstrInfo::genAlternativeCodeSequence( } break; } - // Floating Point Support - case MachineCombinerPattern::FMULADDS_OP1: - case MachineCombinerPattern::FMULADDD_OP1: - // MUL I=A,B,0 - // ADD R,I,C - // ==> MADD R,A,B,C - // --- Create(MADD); - if (Pattern == MachineCombinerPattern::FMULADDS_OP1) { - Opc = AArch64::FMADDSrrr; - RC = &AArch64::FPR32RegClass; - } else { - Opc = AArch64::FMADDDrrr; - RC = &AArch64::FPR64RegClass; - } - MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); - break; - case MachineCombinerPattern::FMULADDS_OP2: - case MachineCombinerPattern::FMULADDD_OP2: - // FMUL I=A,B,0 - // FADD R,C,I - // ==> FMADD R,A,B,C - // --- Create(FMADD); - if (Pattern == MachineCombinerPattern::FMULADDS_OP2) { - Opc = AArch64::FMADDSrrr; - RC = &AArch64::FPR32RegClass; - } else { - Opc = AArch64::FMADDDrrr; - RC = &AArch64::FPR64RegClass; - } - MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); - break; - - case MachineCombinerPattern::FMLAv1i32_indexed_OP1: - Opc = AArch64::FMLAv1i32_indexed; - RC = &AArch64::FPR32RegClass; - MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, - FMAInstKind::Indexed); - break; - case MachineCombinerPattern::FMLAv1i32_indexed_OP2: - Opc = AArch64::FMLAv1i32_indexed; - RC = &AArch64::FPR32RegClass; - MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, - FMAInstKind::Indexed); - break; - - case MachineCombinerPattern::FMLAv1i64_indexed_OP1: - Opc = AArch64::FMLAv1i64_indexed; - RC = &AArch64::FPR64RegClass; - MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, - FMAInstKind::Indexed); - break; - case MachineCombinerPattern::FMLAv1i64_indexed_OP2: - Opc = AArch64::FMLAv1i64_indexed; - RC = &AArch64::FPR64RegClass; - MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, - FMAInstKind::Indexed); - break; - - case MachineCombinerPattern::FMLAv2i32_indexed_OP1: - case MachineCombinerPattern::FMLAv2f32_OP1: - RC = &AArch64::FPR64RegClass; - if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP1) { - Opc = AArch64::FMLAv2i32_indexed; - MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, - FMAInstKind::Indexed); - } else { - Opc = AArch64::FMLAv2f32; - MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, - FMAInstKind::Accumulator); - } - break; - case MachineCombinerPattern::FMLAv2i32_indexed_OP2: - case MachineCombinerPattern::FMLAv2f32_OP2: - RC = &AArch64::FPR64RegClass; - if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP2) { - Opc = AArch64::FMLAv2i32_indexed; - MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, - FMAInstKind::Indexed); - } else { - Opc = AArch64::FMLAv2f32; - MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, - FMAInstKind::Accumulator); - } - break; - - case MachineCombinerPattern::FMLAv2i64_indexed_OP1: - case MachineCombinerPattern::FMLAv2f64_OP1: - RC = &AArch64::FPR128RegClass; - if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP1) { - Opc = AArch64::FMLAv2i64_indexed; - MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, - FMAInstKind::Indexed); - } else { - Opc = AArch64::FMLAv2f64; - MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, - FMAInstKind::Accumulator); - } - break; - case MachineCombinerPattern::FMLAv2i64_indexed_OP2: - case MachineCombinerPattern::FMLAv2f64_OP2: - RC = &AArch64::FPR128RegClass; - if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP2) { - Opc = AArch64::FMLAv2i64_indexed; - MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, - FMAInstKind::Indexed); - } else { - Opc = AArch64::FMLAv2f64; - MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, - FMAInstKind::Accumulator); - } - break; - - case MachineCombinerPattern::FMLAv4i32_indexed_OP1: - case MachineCombinerPattern::FMLAv4f32_OP1: - RC = &AArch64::FPR128RegClass; - if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP1) { - Opc = AArch64::FMLAv4i32_indexed; - MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, - FMAInstKind::Indexed); - } else { - Opc = AArch64::FMLAv4f32; - MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, - FMAInstKind::Accumulator); - } - break; - - case MachineCombinerPattern::FMLAv4i32_indexed_OP2: - case MachineCombinerPattern::FMLAv4f32_OP2: - RC = &AArch64::FPR128RegClass; - if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP2) { - Opc = AArch64::FMLAv4i32_indexed; - MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, - FMAInstKind::Indexed); - } else { - Opc = AArch64::FMLAv4f32; - MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, - FMAInstKind::Accumulator); - } - break; - - case MachineCombinerPattern::FMULSUBS_OP1: - case MachineCombinerPattern::FMULSUBD_OP1: { - // FMUL I=A,B,0 - // FSUB R,I,C - // ==> FNMSUB R,A,B,C // = -C + A*B - // --- Create(FNMSUB); - if (Pattern == MachineCombinerPattern::FMULSUBS_OP1) { - Opc = AArch64::FNMSUBSrrr; - RC = &AArch64::FPR32RegClass; - } else { - Opc = AArch64::FNMSUBDrrr; - RC = &AArch64::FPR64RegClass; - } - MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); - break; - } - case MachineCombinerPattern::FMULSUBS_OP2: - case MachineCombinerPattern::FMULSUBD_OP2: { - // FMUL I=A,B,0 - // FSUB R,C,I - // ==> FMSUB R,A,B,C (computes C - A*B) - // --- Create(FMSUB); - if (Pattern == MachineCombinerPattern::FMULSUBS_OP2) { - Opc = AArch64::FMSUBSrrr; - RC = &AArch64::FPR32RegClass; - } else { - Opc = AArch64::FMSUBDrrr; - RC = &AArch64::FPR64RegClass; - } - MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); - break; - - case MachineCombinerPattern::FMLSv1i32_indexed_OP2: - Opc = AArch64::FMLSv1i32_indexed; - RC = &AArch64::FPR32RegClass; - MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, - FMAInstKind::Indexed); - break; - - case MachineCombinerPattern::FMLSv1i64_indexed_OP2: - Opc = AArch64::FMLSv1i64_indexed; - RC = &AArch64::FPR64RegClass; - MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, - FMAInstKind::Indexed); - break; - - case MachineCombinerPattern::FMLSv2f32_OP2: - case MachineCombinerPattern::FMLSv2i32_indexed_OP2: - RC = &AArch64::FPR64RegClass; - if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP2) { - Opc = AArch64::FMLSv2i32_indexed; - MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, - FMAInstKind::Indexed); - } else { - Opc = AArch64::FMLSv2f32; - MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, - FMAInstKind::Accumulator); - } - break; - - case MachineCombinerPattern::FMLSv2f64_OP2: - case MachineCombinerPattern::FMLSv2i64_indexed_OP2: - RC = &AArch64::FPR128RegClass; - if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP2) { - Opc = AArch64::FMLSv2i64_indexed; - MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, - FMAInstKind::Indexed); - } else { - Opc = AArch64::FMLSv2f64; - MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, - FMAInstKind::Accumulator); - } - break; - - case MachineCombinerPattern::FMLSv4f32_OP2: - case MachineCombinerPattern::FMLSv4i32_indexed_OP2: - RC = &AArch64::FPR128RegClass; - if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP2) { - Opc = AArch64::FMLSv4i32_indexed; - MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, - FMAInstKind::Indexed); - } else { - Opc = AArch64::FMLSv4f32; - MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, - FMAInstKind::Accumulator); - } - break; - } } // end switch (Pattern) // Record MUL and ADD/SUB for deletion DelInstrs.push_back(MUL); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h index 353ef735dac..a592f91dd4e 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -174,11 +174,6 @@ public: unsigned SrcReg2, int CmpMask, int CmpValue, const MachineRegisterInfo *MRI) const override; bool optimizeCondBranch(MachineInstr *MI) const override; - - /// Return true when a code sequence can improve throughput. It - /// should be called only for instructions in loops. - /// \param Pattern - combiner pattern - bool isThroughputPattern(MachineCombinerPattern Pattern) const override; /// Return true when there is potentially a faster code sequence /// for an instruction chain ending in <Root>. All potential patterns are /// listed in the <Patterns> array. diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp index 4e4aaf8e553..f40293021d7 100644 --- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp @@ -51,9 +51,3 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset( } return SDValue(); } -bool AArch64SelectionDAGInfo::GenerateFMAsInMachineCombiner( - CodeGenOpt::Level OptLevel) const { - if (OptLevel >= CodeGenOpt::Aggressive) - return true; - return false; -} diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h index e61f177f2ef..8adb030555a 100644 --- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h +++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h @@ -25,7 +25,6 @@ public: SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, MachinePointerInfo DstPtrInfo) const override; - bool GenerateFMAsInMachineCombiner(CodeGenOpt::Level OptLevel) const override; }; } |

