summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/AArch64/AArch64.h1
-rw-r--r--llvm/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp413
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetMachine.cpp16
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetMachine.h6
-rw-r--r--llvm/lib/Target/AArch64/CMakeLists.txt1
-rw-r--r--llvm/test/CodeGen/AArch64/PBQP.ll14
6 files changed, 449 insertions, 2 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64.h b/llvm/lib/Target/AArch64/AArch64.h
index 3e85bacbda2..425a9028488 100644
--- a/llvm/lib/Target/AArch64/AArch64.h
+++ b/llvm/lib/Target/AArch64/AArch64.h
@@ -39,6 +39,7 @@ ModulePass *createAArch64PromoteConstantPass();
FunctionPass *createAArch64ConditionOptimizerPass();
FunctionPass *createAArch64AddressTypePromotionPass();
FunctionPass *createAArch64A57FPLoadBalancing();
+FunctionPass *createAArch64A57PBQPRegAlloc();
/// \brief Creates an ARM-specific Target Transformation Info pass.
ImmutablePass *
createAArch64TargetTransformInfoPass(const AArch64TargetMachine *TM);
diff --git a/llvm/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp b/llvm/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
new file mode 100644
index 00000000000..c4787f4afd6
--- /dev/null
+++ b/llvm/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
@@ -0,0 +1,413 @@
+//===-- AArch64PBQPRegAlloc.cpp - AArch64 specific PBQP constraints -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This file contains the AArch64 / Cortex-A57 specific register allocation
+// constraints for use by the PBQP register allocator.
+//
+// It is essentially a transcription of what is contained in
+// AArch64A57FPLoadBalancing, which tries to use a balanced
+// mix of odd and even D-registers when performing a critical sequence of
+// independent, non-quadword FP/ASIMD floating-point multiply-accumulates.
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "aarch64-pbqp"
+
+#include "AArch64.h"
+#include "AArch64RegisterInfo.h"
+
+#include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegAllocPBQP.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define PBQP_BUILDER PBQPBuilderWithCoalescing
+//#define PBQP_BUILDER PBQPBuilder
+
+using namespace llvm;
+
+namespace {
+
+bool isFPReg(unsigned reg) {
+ return AArch64::FPR32RegClass.contains(reg) ||
+ AArch64::FPR64RegClass.contains(reg) ||
+ AArch64::FPR128RegClass.contains(reg);
+};
+
+bool isOdd(unsigned reg) {
+ switch (reg) {
+ default:
+ llvm_unreachable("Register is not from the expected class !");
+ case AArch64::S1:
+ case AArch64::S3:
+ case AArch64::S5:
+ case AArch64::S7:
+ case AArch64::S9:
+ case AArch64::S11:
+ case AArch64::S13:
+ case AArch64::S15:
+ case AArch64::S17:
+ case AArch64::S19:
+ case AArch64::S21:
+ case AArch64::S23:
+ case AArch64::S25:
+ case AArch64::S27:
+ case AArch64::S29:
+ case AArch64::S31:
+ case AArch64::D1:
+ case AArch64::D3:
+ case AArch64::D5:
+ case AArch64::D7:
+ case AArch64::D9:
+ case AArch64::D11:
+ case AArch64::D13:
+ case AArch64::D15:
+ case AArch64::D17:
+ case AArch64::D19:
+ case AArch64::D21:
+ case AArch64::D23:
+ case AArch64::D25:
+ case AArch64::D27:
+ case AArch64::D29:
+ case AArch64::D31:
+ case AArch64::Q1:
+ case AArch64::Q3:
+ case AArch64::Q5:
+ case AArch64::Q7:
+ case AArch64::Q9:
+ case AArch64::Q11:
+ case AArch64::Q13:
+ case AArch64::Q15:
+ case AArch64::Q17:
+ case AArch64::Q19:
+ case AArch64::Q21:
+ case AArch64::Q23:
+ case AArch64::Q25:
+ case AArch64::Q27:
+ case AArch64::Q29:
+ case AArch64::Q31:
+ return true;
+ case AArch64::S0:
+ case AArch64::S2:
+ case AArch64::S4:
+ case AArch64::S6:
+ case AArch64::S8:
+ case AArch64::S10:
+ case AArch64::S12:
+ case AArch64::S14:
+ case AArch64::S16:
+ case AArch64::S18:
+ case AArch64::S20:
+ case AArch64::S22:
+ case AArch64::S24:
+ case AArch64::S26:
+ case AArch64::S28:
+ case AArch64::S30:
+ case AArch64::D0:
+ case AArch64::D2:
+ case AArch64::D4:
+ case AArch64::D6:
+ case AArch64::D8:
+ case AArch64::D10:
+ case AArch64::D12:
+ case AArch64::D14:
+ case AArch64::D16:
+ case AArch64::D18:
+ case AArch64::D20:
+ case AArch64::D22:
+ case AArch64::D24:
+ case AArch64::D26:
+ case AArch64::D28:
+ case AArch64::D30:
+ case AArch64::Q0:
+ case AArch64::Q2:
+ case AArch64::Q4:
+ case AArch64::Q6:
+ case AArch64::Q8:
+ case AArch64::Q10:
+ case AArch64::Q12:
+ case AArch64::Q14:
+ case AArch64::Q16:
+ case AArch64::Q18:
+ case AArch64::Q20:
+ case AArch64::Q22:
+ case AArch64::Q24:
+ case AArch64::Q26:
+ case AArch64::Q28:
+ case AArch64::Q30:
+ return false;
+
+ }
+}
+
+bool haveSameParity(unsigned reg1, unsigned reg2) {
+ assert(isFPReg(reg1) && "Expecting an FP register for reg1");
+ assert(isFPReg(reg2) && "Expecting an FP register for reg2");
+
+ return isOdd(reg1) == isOdd(reg2);
+}
+
+class A57PBQPBuilder : public PBQP_BUILDER {
+public:
+ A57PBQPBuilder() : PBQP_BUILDER(), TRI(nullptr), LIs(nullptr), Chains() {}
+
+ // Build a PBQP instance to represent the register allocation problem for
+ // the given MachineFunction.
+ std::unique_ptr<PBQPRAProblem>
+ build(MachineFunction *MF, const LiveIntervals *LI,
+ const MachineBlockFrequencyInfo *blockInfo,
+ const RegSet &VRegs) override;
+
+private:
+ const AArch64RegisterInfo *TRI;
+ const LiveIntervals *LIs;
+ SmallSetVector<unsigned, 32> Chains;
+
+ // Return true if reg is a physical register
+ bool isPhysicalReg(unsigned reg) const {
+ return TRI->isPhysicalRegister(reg);
+ }
+
+ // Add the accumulator chaining constraint, inside the chain, i.e. so that
+ // parity(Rd) == parity(Ra).
+ // \return true if a constraint was added
+ bool addIntraChainConstraint(PBQPRAProblem *p, unsigned Rd, unsigned Ra);
+
+ // Add constraints between existing chains
+ void addInterChainConstraint(PBQPRAProblem *p, unsigned Rd, unsigned Ra);
+};
+} // Anonymous namespace
+
+bool A57PBQPBuilder::addIntraChainConstraint(PBQPRAProblem *p, unsigned Rd,
+ unsigned Ra) {
+ if (Rd == Ra)
+ return false;
+
+ if (isPhysicalReg(Rd) || isPhysicalReg(Ra)) {
+ dbgs() << "Rd is a physical reg:" << isPhysicalReg(Rd) << '\n';
+ dbgs() << "Ra is a physical reg:" << isPhysicalReg(Ra) << '\n';
+ return false;
+ }
+
+ const PBQPRAProblem::AllowedSet *vRdAllowed = &p->getAllowedSet(Rd);
+ const PBQPRAProblem::AllowedSet *vRaAllowed = &p->getAllowedSet(Ra);
+
+ PBQPRAGraph &g = p->getGraph();
+ PBQPRAGraph::NodeId node1 = p->getNodeForVReg(Rd);
+ PBQPRAGraph::NodeId node2 = p->getNodeForVReg(Ra);
+ PBQPRAGraph::EdgeId edge = g.findEdge(node1, node2);
+
+ // The edge does not exist. Create one with the appropriate interference
+ // costs.
+ if (edge == g.invalidEdgeId()) {
+ const LiveInterval &ld = LIs->getInterval(Rd);
+ const LiveInterval &la = LIs->getInterval(Ra);
+ bool livesOverlap = ld.overlaps(la);
+
+ PBQP::Matrix costs(vRdAllowed->size() + 1, vRaAllowed->size() + 1, 0);
+ for (unsigned i = 0; i != vRdAllowed->size(); ++i) {
+ unsigned pRd = (*vRdAllowed)[i];
+ for (unsigned j = 0; j != vRaAllowed->size(); ++j) {
+ unsigned pRa = (*vRaAllowed)[j];
+ if (livesOverlap && TRI->regsOverlap(pRd, pRa))
+ costs[i + 1][j + 1] = std::numeric_limits<PBQP::PBQPNum>::infinity();
+ else
+ costs[i + 1][j + 1] = haveSameParity(pRd, pRa) ? 0.0 : 1.0;
+ }
+ }
+ g.addEdge(node1, node2, std::move(costs));
+ return true;
+ }
+
+ if (g.getEdgeNode1Id(edge) == node2) {
+ std::swap(node1, node2);
+ std::swap(vRdAllowed, vRaAllowed);
+ }
+
+ // Enforce minCost(sameParity(RaClass)) > maxCost(otherParity(RdClass))
+ PBQP::Matrix costs(g.getEdgeCosts(edge));
+ for (unsigned i = 0; i != vRdAllowed->size(); ++i) {
+ unsigned pRd = (*vRdAllowed)[i];
+
+ // Get the maximum cost (excluding unallocatable reg) for same parity
+ // registers
+ PBQP::PBQPNum sameParityMax = std::numeric_limits<PBQP::PBQPNum>::min();
+ for (unsigned j = 0; j != vRaAllowed->size(); ++j) {
+ unsigned pRa = (*vRaAllowed)[j];
+ if (haveSameParity(pRd, pRa))
+ if (costs[i + 1][j + 1] !=
+ std::numeric_limits<PBQP::PBQPNum>::infinity() &&
+ costs[i + 1][j + 1] > sameParityMax)
+ sameParityMax = costs[i + 1][j + 1];
+ }
+
+ // Ensure all registers with a different parity have a higher cost
+ // than sameParityMax
+ for (unsigned j = 0; j != vRaAllowed->size(); ++j) {
+ unsigned pRa = (*vRaAllowed)[j];
+ if (!haveSameParity(pRd, pRa))
+ if (sameParityMax > costs[i + 1][j + 1])
+ costs[i + 1][j + 1] = sameParityMax + 1.0;
+ }
+ }
+ g.setEdgeCosts(edge, costs);
+
+ return true;
+}
+
+void
+A57PBQPBuilder::addInterChainConstraint(PBQPRAProblem *p, unsigned Rd,
+ unsigned Ra) {
+ // Do some Chain management
+ if (Chains.count(Ra)) {
+ if (Rd != Ra) {
+ DEBUG(dbgs() << "Moving acc chain from " << PrintReg(Ra, TRI) << " to "
+ << PrintReg(Rd, TRI) << '\n';);
+ Chains.remove(Ra);
+ Chains.insert(Rd);
+ }
+ } else {
+ DEBUG(dbgs() << "Creating new acc chain for " << PrintReg(Rd, TRI)
+ << '\n';);
+ Chains.insert(Rd);
+ }
+
+ const LiveInterval &ld = LIs->getInterval(Rd);
+ for (auto r : Chains) {
+ // Skip self
+ if (r == Rd)
+ continue;
+
+ const LiveInterval &lr = LIs->getInterval(r);
+ if (ld.overlaps(lr)) {
+ const PBQPRAProblem::AllowedSet *vRdAllowed = &p->getAllowedSet(Rd);
+ const PBQPRAProblem::AllowedSet *vRrAllowed = &p->getAllowedSet(r);
+
+ PBQPRAGraph &g = p->getGraph();
+ PBQPRAGraph::NodeId node1 = p->getNodeForVReg(Rd);
+ PBQPRAGraph::NodeId node2 = p->getNodeForVReg(r);
+ PBQPRAGraph::EdgeId edge = g.findEdge(node1, node2);
+ assert(edge != g.invalidEdgeId() &&
+ "PBQP error ! The edge should exist !");
+
+ DEBUG(dbgs() << "Refining constraint !\n";);
+
+ if (g.getEdgeNode1Id(edge) == node2) {
+ std::swap(node1, node2);
+ std::swap(vRdAllowed, vRrAllowed);
+ }
+
+ // Enforce that cost is higher with all other Chains of the same parity
+ PBQP::Matrix costs(g.getEdgeCosts(edge));
+ for (unsigned i = 0; i != vRdAllowed->size(); ++i) {
+ unsigned pRd = (*vRdAllowed)[i];
+
+ // Get the maximum cost (excluding unallocatable reg) for all other
+ // parity registers
+ PBQP::PBQPNum sameParityMax = std::numeric_limits<PBQP::PBQPNum>::min();
+ for (unsigned j = 0; j != vRrAllowed->size(); ++j) {
+ unsigned pRa = (*vRrAllowed)[j];
+ if (!haveSameParity(pRd, pRa))
+ if (costs[i + 1][j + 1] !=
+ std::numeric_limits<PBQP::PBQPNum>::infinity() &&
+ costs[i + 1][j + 1] > sameParityMax)
+ sameParityMax = costs[i + 1][j + 1];
+ }
+
+ // Ensure all registers with same parity have a higher cost
+ // than sameParityMax
+ for (unsigned j = 0; j != vRrAllowed->size(); ++j) {
+ unsigned pRa = (*vRrAllowed)[j];
+ if (haveSameParity(pRd, pRa))
+ if (sameParityMax > costs[i + 1][j + 1])
+ costs[i + 1][j + 1] = sameParityMax + 1.0;
+ }
+ }
+ g.setEdgeCosts(edge, costs);
+ }
+ }
+}
+
+std::unique_ptr<PBQPRAProblem>
+A57PBQPBuilder::build(MachineFunction *MF, const LiveIntervals *LI,
+ const MachineBlockFrequencyInfo *blockInfo,
+ const RegSet &VRegs) {
+ std::unique_ptr<PBQPRAProblem> p =
+ PBQP_BUILDER::build(MF, LI, blockInfo, VRegs);
+
+ TRI = static_cast<const AArch64RegisterInfo *>(
+ MF->getTarget().getSubtargetImpl()->getRegisterInfo());
+ LIs = LI;
+
+ DEBUG(MF->dump(););
+
+ for (MachineFunction::const_iterator mbbItr = MF->begin(), mbbEnd = MF->end();
+ mbbItr != mbbEnd; ++mbbItr) {
+ const MachineBasicBlock *MBB = &*mbbItr;
+ Chains.clear(); // FIXME: really needed ? Could not work at MF level ?
+
+ for (MachineBasicBlock::const_iterator miItr = MBB->begin(),
+ miEnd = MBB->end();
+ miItr != miEnd; ++miItr) {
+ const MachineInstr *MI = &*miItr;
+ switch (MI->getOpcode()) {
+ case AArch64::FMSUBSrrr:
+ case AArch64::FMADDSrrr:
+ case AArch64::FNMSUBSrrr:
+ case AArch64::FNMADDSrrr:
+ case AArch64::FMSUBDrrr:
+ case AArch64::FMADDDrrr:
+ case AArch64::FNMSUBDrrr:
+ case AArch64::FNMADDDrrr: {
+ unsigned Rd = MI->getOperand(0).getReg();
+ unsigned Ra = MI->getOperand(3).getReg();
+
+ if (addIntraChainConstraint(p.get(), Rd, Ra))
+ addInterChainConstraint(p.get(), Rd, Ra);
+ break;
+ }
+
+ case AArch64::FMLAv2f32:
+ case AArch64::FMLSv2f32: {
+ unsigned Rd = MI->getOperand(0).getReg();
+ addInterChainConstraint(p.get(), Rd, Rd);
+ break;
+ }
+
+ default:
+ // Forget Chains which have been killed
+ for (auto r : Chains) {
+ SmallVector<unsigned, 8> toDel;
+ if (MI->killsRegister(r)) {
+ DEBUG(dbgs() << "Killing chain " << PrintReg(r, TRI) << " at ";
+ MI->print(dbgs()););
+ toDel.push_back(r);
+ }
+
+ while (!toDel.empty()) {
+ Chains.remove(toDel.back());
+ toDel.pop_back();
+ }
+ }
+ }
+ }
+ }
+
+ return p;
+}
+
+// Factory function used by AArch64TargetMachine to add the pass to the
+// passmanager.
+FunctionPass *llvm::createAArch64A57PBQPRegAlloc() {
+ std::unique_ptr<PBQP_BUILDER> builder = llvm::make_unique<A57PBQPBuilder>();
+ return createPBQPRegisterAllocator(std::move(builder), nullptr);
+}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index e867524226b..1f5978198e4 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -13,6 +13,7 @@
#include "AArch64.h"
#include "AArch64TargetMachine.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/PassManager.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/TargetRegistry.h"
@@ -73,6 +74,10 @@ EnableCondOpt("aarch64-condopt",
cl::desc("Enable the condition optimizer pass"),
cl::init(true), cl::Hidden);
+static cl::opt<bool>
+EnablePBQP("aarch64-pbqp", cl::Hidden,
+ cl::desc("Use PBQP register allocator (experimental)"),
+ cl::init(false));
extern "C" void LLVMInitializeAArch64Target() {
// Register the target.
@@ -90,8 +95,14 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, StringRef TT,
CodeGenOpt::Level OL,
bool LittleEndian)
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- Subtarget(TT, CPU, FS, *this, LittleEndian) {
+ Subtarget(TT, CPU, FS, *this, LittleEndian),
+ usingPBQP(false) {
initAsmInfo();
+
+ if (EnablePBQP && Subtarget.isCortexA57() && OL != CodeGenOpt::None) {
+ usingPBQP = true;
+ RegisterRegAlloc::setDefault(createAArch64A57PBQPRegAlloc);
+ }
}
void AArch64leTargetMachine::anchor() { }
@@ -216,7 +227,8 @@ bool AArch64PassConfig::addPostRegAlloc() {
if (TM->getOptLevel() != CodeGenOpt::None && EnableDeadRegisterElimination)
addPass(createAArch64DeadRegisterDefinitions());
if (TM->getOptLevel() != CodeGenOpt::None &&
- TM->getSubtarget<AArch64Subtarget>().isCortexA57())
+ TM->getSubtarget<AArch64Subtarget>().isCortexA57() &&
+ !static_cast<const AArch64TargetMachine *>(TM)->isPBQPUsed())
// Improve performance for some FP/SIMD code for A57.
addPass(createAArch64A57FPLoadBalancing());
return true;
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.h b/llvm/lib/Target/AArch64/AArch64TargetMachine.h
index 101d10c29a8..42d7dc57328 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.h
@@ -40,6 +40,12 @@ public:
/// \brief Register AArch64 analysis passes with a pass manager.
void addAnalysisPasses(PassManagerBase &PM) override;
+
+ /// \brief Query if the PBQP register allocator is being used
+ bool isPBQPUsed() const { return usingPBQP; }
+
+private:
+ bool usingPBQP;
};
// AArch64leTargetMachine - AArch64 little endian target machine.
diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt b/llvm/lib/Target/AArch64/CMakeLists.txt
index ebf7ae0d347..d68a2de94b6 100644
--- a/llvm/lib/Target/AArch64/CMakeLists.txt
+++ b/llvm/lib/Target/AArch64/CMakeLists.txt
@@ -34,6 +34,7 @@ add_llvm_target(AArch64CodeGen
AArch64LoadStoreOptimizer.cpp
AArch64MCInstLower.cpp
AArch64PromoteConstant.cpp
+ AArch64PBQPRegAlloc.cpp
AArch64RegisterInfo.cpp
AArch64SelectionDAGInfo.cpp
AArch64StorePairSuppress.cpp
diff --git a/llvm/test/CodeGen/AArch64/PBQP.ll b/llvm/test/CodeGen/AArch64/PBQP.ll
new file mode 100644
index 00000000000..491998e1e30
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/PBQP.ll
@@ -0,0 +1,14 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=cortex-a57 -aarch64-pbqp -o - %s | FileCheck %s
+
+define i32 @foo(i32 %a) {
+; CHECK-LABEL: foo:
+; CHECK: bl bar
+; CHECK-NEXT: bl baz
+ %call = call i32 @bar(i32 %a)
+ %call1 = call i32 @baz(i32 %call)
+ ret i32 %call1
+}
+
+declare i32 @bar(i32)
+declare i32 @baz(i32)
+
OpenPOWER on IntegriCloud