diff options
Diffstat (limited to 'llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp')
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp | 30 |
1 files changed, 22 insertions, 8 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index 2ed3be7f8ab..6ef4c269d8f 100644 --- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -78,13 +78,12 @@ typedef struct LdStPairFlags { struct AArch64LoadStoreOpt : public MachineFunctionPass { static char ID; - AArch64LoadStoreOpt() : MachineFunctionPass(ID), IsStrictAlign(false) { + AArch64LoadStoreOpt() : MachineFunctionPass(ID) { initializeAArch64LoadStoreOptPass(*PassRegistry::getPassRegistry()); } const AArch64InstrInfo *TII; const TargetRegisterInfo *TRI; - bool IsStrictAlign; // Scan the instructions looking for a load/store that can be combined // with the current instruction into a load/store pair. @@ -127,7 +126,11 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass { // Find and merge foldable ldr/str instructions. bool tryToMergeLdStInst(MachineBasicBlock::iterator &MBBI); - bool optimizeBlock(MachineBasicBlock &MBB); + // Check if converting two narrow loads into a single wider load with + // bitfield extracts could be enabled. + bool enableNarrowLdMerge(MachineFunction &Fn); + + bool optimizeBlock(MachineBasicBlock &MBB, bool enableNarrowLdOpt); bool runOnMachineFunction(MachineFunction &Fn) override; @@ -1161,7 +1164,8 @@ bool AArch64LoadStoreOpt::tryToMergeLdStInst( return false; } -bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) { +bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB, + bool enableNarrowLdOpt) { bool Modified = false; // Three tranformations to do here: // 1) Find halfword loads that can be merged into a single 32-bit word load @@ -1189,7 +1193,7 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) { // ldr x0, [x2], #4 for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); - !IsStrictAlign && MBBI != E;) { + enableNarrowLdOpt && MBBI != E;) { MachineInstr *MI = MBBI; switch (MI->getOpcode()) { default: @@ -1372,15 +1376,25 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) { return Modified; } +bool AArch64LoadStoreOpt::enableNarrowLdMerge(MachineFunction &Fn) { + const AArch64Subtarget *SubTarget = + &static_cast<const AArch64Subtarget &>(Fn.getSubtarget()); + bool ProfitableArch = SubTarget->isCortexA57(); + // FIXME: The benefit from converting narrow loads into a wider load could be + // microarchitectural as it assumes that a single load with two bitfield + // extracts is cheaper than two narrow loads. Currently, this conversion is + // enabled only in cortex-a57 on which performance benefits were verified. + return ProfitableArch & (!SubTarget->requiresStrictAlign()); +} + bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { TII = static_cast<const AArch64InstrInfo *>(Fn.getSubtarget().getInstrInfo()); TRI = Fn.getSubtarget().getRegisterInfo(); - IsStrictAlign = (static_cast<const AArch64Subtarget &>(Fn.getSubtarget())) - .requiresStrictAlign(); bool Modified = false; + bool enableNarrowLdOpt = enableNarrowLdMerge(Fn); for (auto &MBB : Fn) - Modified |= optimizeBlock(MBB); + Modified |= optimizeBlock(MBB, enableNarrowLdOpt); return Modified; } |