diff options
Diffstat (limited to 'llvm/lib/Target/PowerPC')
-rw-r--r-- | llvm/lib/Target/PowerPC/CMakeLists.txt | 1 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPC.h | 1 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 3 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCQPXLoadSplat.cpp | 156 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCTargetMachine.cpp | 13 |
5 files changed, 170 insertions, 4 deletions
diff --git a/llvm/lib/Target/PowerPC/CMakeLists.txt b/llvm/lib/Target/PowerPC/CMakeLists.txt index e8316e937cb..53c2ed3d51e 100644 --- a/llvm/lib/Target/PowerPC/CMakeLists.txt +++ b/llvm/lib/Target/PowerPC/CMakeLists.txt @@ -29,6 +29,7 @@ add_llvm_target(PowerPCCodeGen PPCMachineFunctionInfo.cpp PPCMIPeephole.cpp PPCRegisterInfo.cpp + PPCQPXLoadSplat.cpp PPCSubtarget.cpp PPCTargetMachine.cpp PPCTargetObjectFile.cpp diff --git a/llvm/lib/Target/PowerPC/PPC.h b/llvm/lib/Target/PowerPC/PPC.h index a4235fa6e04..d4eee2204cf 100644 --- a/llvm/lib/Target/PowerPC/PPC.h +++ b/llvm/lib/Target/PowerPC/PPC.h @@ -42,6 +42,7 @@ namespace llvm { FunctionPass *createPPCVSXSwapRemovalPass(); FunctionPass *createPPCMIPeepholePass(); FunctionPass *createPPCBranchSelectionPass(); + FunctionPass *createPPCQPXLoadSplatPass(); FunctionPass *createPPCISelDag(PPCTargetMachine &TM); FunctionPass *createPPCTLSDynamicCallPass(); FunctionPass *createPPCBoolRetToIntPass(); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index d0f43434c39..c645e076ae6 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -7187,9 +7187,6 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SplatIdx -= 4; } - // FIXME: If SplatIdx == 0 and the input came from a load, then there is - // nothing to do. - return DAG.getNode(PPCISD::QVESPLATI, dl, VT, V1, DAG.getConstant(SplatIdx, dl, MVT::i32)); } diff --git a/llvm/lib/Target/PowerPC/PPCQPXLoadSplat.cpp b/llvm/lib/Target/PowerPC/PPCQPXLoadSplat.cpp new file mode 100644 index 00000000000..e15751e444c --- /dev/null +++ b/llvm/lib/Target/PowerPC/PPCQPXLoadSplat.cpp @@ -0,0 +1,156 @@ +//===----- PPCQPXLoadSplat.cpp - QPX Load Splat Simplification ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The QPX vector registers overlay the scalar floating-point registers, and +// any scalar floating-point loads splat their value across all vector lanes. +// Thus, if we have a scalar load followed by a splat, we can remove the splat +// (i.e. replace the load with a load-and-splat pseudo instruction). +// +// This pass must run after anything that might do store-to-load forwarding. +// +//===----------------------------------------------------------------------===// + +#include "PPC.h" +#include "PPCInstrBuilder.h" +#include "PPCInstrInfo.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" +using namespace llvm; + +#define DEBUG_TYPE "ppc-qpx-load-splat" + +STATISTIC(NumSimplified, "Number of QPX load splats simplified"); + +namespace llvm { + void initializePPCQPXLoadSplatPass(PassRegistry&); +} + +namespace { + struct PPCQPXLoadSplat : public MachineFunctionPass { + static char ID; + PPCQPXLoadSplat() : MachineFunctionPass(ID) { + initializePPCQPXLoadSplatPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &Fn) override; + + const char *getPassName() const override { + return "PowerPC QPX Load Splat Simplification"; + } + }; + char PPCQPXLoadSplat::ID = 0; +} + +INITIALIZE_PASS(PPCQPXLoadSplat, "ppc-qpx-load-splat", + "PowerPC QPX Load Splat Simplification", + false, false) + +FunctionPass *llvm::createPPCQPXLoadSplatPass() { + return new PPCQPXLoadSplat(); +} + +bool PPCQPXLoadSplat::runOnMachineFunction(MachineFunction &MF) { + bool MadeChange = false; + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + + for (auto MFI = MF.begin(), MFIE = MF.end(); MFI != MFIE; ++MFI) { + MachineBasicBlock *MBB = &*MFI; + SmallVector<MachineInstr *, 4> Splats; + + for (auto MBBI = MBB->rbegin(); MBBI != MBB->rend(); ++MBBI) { + MachineInstr *MI = &*MBBI; + + if (MI->hasUnmodeledSideEffects() || MI->isCall()) { + Splats.clear(); + continue; + } + + // We're looking for a sequence like this: + // %F0<def> = LFD 0, %X3<kill>, %QF0<imp-def>; mem:LD8[%a](tbaa=!2) + // %QF1<def> = QVESPLATI %QF0<kill>, 0, %RM<imp-use> + + for (auto SI = Splats.begin(); SI != Splats.end();) { + MachineInstr *SMI = *SI; + unsigned SplatReg = SMI->getOperand(0).getReg(); + unsigned SrcReg = SMI->getOperand(1).getReg(); + + if (MI->modifiesRegister(SrcReg, TRI)) { + switch (MI->getOpcode()) { + default: + SI = Splats.erase(SI); + continue; + case PPC::LFS: + case PPC::LFD: + case PPC::LFSU: + case PPC::LFDU: + case PPC::LFSUX: + case PPC::LFDUX: + case PPC::LFSX: + case PPC::LFDX: + case PPC::LFIWAX: + case PPC::LFIWZX: + if (SplatReg != SrcReg) { + // We need to change the load to define the scalar subregister of + // the QPX splat source register. + unsigned SubRegIndex = + TRI->getSubRegIndex(SrcReg, MI->getOperand(0).getReg()); + unsigned SplatSubReg = TRI->getSubReg(SplatReg, SubRegIndex); + + // Substitute both the explicit defined register, and also the + // implicit def of the containing QPX register. + MI->getOperand(0).setReg(SplatSubReg); + MI->substituteRegister(SrcReg, SplatReg, 0, *TRI); + } + + SI = Splats.erase(SI); + + // If SMI is directly after MI, then MBBI's base iterator is + // pointing at SMI. Adjust MBBI around the call to erase SMI to + // avoid invalidating MBBI. + ++MBBI; + SMI->eraseFromParent(); + --MBBI; + + ++NumSimplified; + MadeChange = true; + continue; + } + } + + if (MI->modifiesRegister(SplatReg, TRI)) { + SI = Splats.erase(SI); + continue; + } + + ++SI; + } + + if (MI->getOpcode() != PPC::QVESPLATI && + MI->getOpcode() != PPC::QVESPLATIs && + MI->getOpcode() != PPC::QVESPLATIb) + continue; + if (MI->getOperand(2).getImm() != 0) + continue; + + // If there are other uses of the scalar value after this, replacing + // those uses might be non-trivial. + if (!MI->getOperand(1).isKill()) + continue; + + Splats.push_back(MI); + } + } + + return MadeChange; +} + diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp index 8d5af9458be..5d47e0e3ebd 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -43,6 +43,10 @@ opt<bool> DisableVSXSwapRemoval("disable-ppc-vsx-swap-removal", cl::Hidden, cl::desc("Disable VSX Swap Removal for PPC")); static cl:: +opt<bool> DisableQPXLoadSplat("disable-ppc-qpx-load-splat", cl::Hidden, + cl::desc("Disable QPX load splat simplification")); + +static cl:: opt<bool> DisableMIPeephole("disable-ppc-peephole", cl::Hidden, cl::desc("Disable machine peepholes for PPC")); @@ -388,8 +392,15 @@ void PPCPassConfig::addPreRegAlloc() { } void PPCPassConfig::addPreSched2() { - if (getOptLevel() != CodeGenOpt::None) + if (getOptLevel() != CodeGenOpt::None) { addPass(&IfConverterID); + + // This optimization must happen after anything that might do store-to-load + // forwarding. Here we're after RA (and, thus, when spills are inserted) + // but before post-RA scheduling. + if (!DisableQPXLoadSplat) + addPass(createPPCQPXLoadSplatPass()); + } } void PPCPassConfig::addPreEmitPass() { |