summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorHal Finkel <hfinkel@anl.gov>2016-03-31 20:39:41 +0000
committerHal Finkel <hfinkel@anl.gov>2016-03-31 20:39:41 +0000
commitfc35391f2b4c36ec1a35d6e464ad972cf7117cdb (patch)
tree026da57fa59dce0a2c65203543f5e5c01e23d310 /llvm/lib/Target
parent132cd621216b7b09a170b9f10aa7494a87ce82fc (diff)
downloadbcm5719-llvm-fc35391f2b4c36ec1a35d6e464ad972cf7117cdb.tar.gz
bcm5719-llvm-fc35391f2b4c36ec1a35d6e464ad972cf7117cdb.zip
[PowerPC] Add a late MI-level pass for QPX load/splat simplification
Chapter 3 of the QPX manual states that, "Scalar floating-point load instructions, defined in the Power ISA, cause a replication of the source data across all elements of the target register." Thus, if we have a load followed by a QPX splat (from the first lane), the splat is redundant. This adds a late MI-level pass to remove the redundant splats in some of these cases (specifically when both occur in the same basic block). This optimization is scheduled just prior to post-RA scheduling. It can't happen before anything that might replace the load with some already-computed quantity (i.e. store-to-load forwarding). llvm-svn: 265047
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/PowerPC/CMakeLists.txt1
-rw-r--r--llvm/lib/Target/PowerPC/PPC.h1
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp3
-rw-r--r--llvm/lib/Target/PowerPC/PPCQPXLoadSplat.cpp156
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetMachine.cpp13
5 files changed, 170 insertions, 4 deletions
diff --git a/llvm/lib/Target/PowerPC/CMakeLists.txt b/llvm/lib/Target/PowerPC/CMakeLists.txt
index e8316e937cb..53c2ed3d51e 100644
--- a/llvm/lib/Target/PowerPC/CMakeLists.txt
+++ b/llvm/lib/Target/PowerPC/CMakeLists.txt
@@ -29,6 +29,7 @@ add_llvm_target(PowerPCCodeGen
PPCMachineFunctionInfo.cpp
PPCMIPeephole.cpp
PPCRegisterInfo.cpp
+ PPCQPXLoadSplat.cpp
PPCSubtarget.cpp
PPCTargetMachine.cpp
PPCTargetObjectFile.cpp
diff --git a/llvm/lib/Target/PowerPC/PPC.h b/llvm/lib/Target/PowerPC/PPC.h
index a4235fa6e04..d4eee2204cf 100644
--- a/llvm/lib/Target/PowerPC/PPC.h
+++ b/llvm/lib/Target/PowerPC/PPC.h
@@ -42,6 +42,7 @@ namespace llvm {
FunctionPass *createPPCVSXSwapRemovalPass();
FunctionPass *createPPCMIPeepholePass();
FunctionPass *createPPCBranchSelectionPass();
+ FunctionPass *createPPCQPXLoadSplatPass();
FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
FunctionPass *createPPCTLSDynamicCallPass();
FunctionPass *createPPCBoolRetToIntPass();
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index d0f43434c39..c645e076ae6 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -7187,9 +7187,6 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
SplatIdx -= 4;
}
- // FIXME: If SplatIdx == 0 and the input came from a load, then there is
- // nothing to do.
-
return DAG.getNode(PPCISD::QVESPLATI, dl, VT, V1,
DAG.getConstant(SplatIdx, dl, MVT::i32));
}
diff --git a/llvm/lib/Target/PowerPC/PPCQPXLoadSplat.cpp b/llvm/lib/Target/PowerPC/PPCQPXLoadSplat.cpp
new file mode 100644
index 00000000000..e15751e444c
--- /dev/null
+++ b/llvm/lib/Target/PowerPC/PPCQPXLoadSplat.cpp
@@ -0,0 +1,156 @@
+//===----- PPCQPXLoadSplat.cpp - QPX Load Splat Simplification ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The QPX vector registers overlay the scalar floating-point registers, and
+// any scalar floating-point loads splat their value across all vector lanes.
+// Thus, if we have a scalar load followed by a splat, we can remove the splat
+// (i.e. replace the load with a load-and-splat pseudo instruction).
+//
+// This pass must run after anything that might do store-to-load forwarding.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "PPCInstrBuilder.h"
+#include "PPCInstrInfo.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "ppc-qpx-load-splat"
+
+STATISTIC(NumSimplified, "Number of QPX load splats simplified");
+
+namespace llvm {
+ void initializePPCQPXLoadSplatPass(PassRegistry&);
+}
+
+namespace {
+ struct PPCQPXLoadSplat : public MachineFunctionPass {
+ static char ID;
+ PPCQPXLoadSplat() : MachineFunctionPass(ID) {
+ initializePPCQPXLoadSplatPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+
+ const char *getPassName() const override {
+ return "PowerPC QPX Load Splat Simplification";
+ }
+ };
+ char PPCQPXLoadSplat::ID = 0;
+}
+
+INITIALIZE_PASS(PPCQPXLoadSplat, "ppc-qpx-load-splat",
+ "PowerPC QPX Load Splat Simplification",
+ false, false)
+
+FunctionPass *llvm::createPPCQPXLoadSplatPass() {
+ return new PPCQPXLoadSplat();
+}
+
+bool PPCQPXLoadSplat::runOnMachineFunction(MachineFunction &MF) {
+ bool MadeChange = false;
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+
+ for (auto MFI = MF.begin(), MFIE = MF.end(); MFI != MFIE; ++MFI) {
+ MachineBasicBlock *MBB = &*MFI;
+ SmallVector<MachineInstr *, 4> Splats;
+
+ for (auto MBBI = MBB->rbegin(); MBBI != MBB->rend(); ++MBBI) {
+ MachineInstr *MI = &*MBBI;
+
+ if (MI->hasUnmodeledSideEffects() || MI->isCall()) {
+ Splats.clear();
+ continue;
+ }
+
+ // We're looking for a sequence like this:
+ // %F0<def> = LFD 0, %X3<kill>, %QF0<imp-def>; mem:LD8[%a](tbaa=!2)
+ // %QF1<def> = QVESPLATI %QF0<kill>, 0, %RM<imp-use>
+
+ for (auto SI = Splats.begin(); SI != Splats.end();) {
+ MachineInstr *SMI = *SI;
+ unsigned SplatReg = SMI->getOperand(0).getReg();
+ unsigned SrcReg = SMI->getOperand(1).getReg();
+
+ if (MI->modifiesRegister(SrcReg, TRI)) {
+ switch (MI->getOpcode()) {
+ default:
+ SI = Splats.erase(SI);
+ continue;
+ case PPC::LFS:
+ case PPC::LFD:
+ case PPC::LFSU:
+ case PPC::LFDU:
+ case PPC::LFSUX:
+ case PPC::LFDUX:
+ case PPC::LFSX:
+ case PPC::LFDX:
+ case PPC::LFIWAX:
+ case PPC::LFIWZX:
+ if (SplatReg != SrcReg) {
+ // We need to change the load to define the scalar subregister of
+ // the QPX splat source register.
+ unsigned SubRegIndex =
+ TRI->getSubRegIndex(SrcReg, MI->getOperand(0).getReg());
+ unsigned SplatSubReg = TRI->getSubReg(SplatReg, SubRegIndex);
+
+ // Substitute both the explicit defined register, and also the
+ // implicit def of the containing QPX register.
+ MI->getOperand(0).setReg(SplatSubReg);
+ MI->substituteRegister(SrcReg, SplatReg, 0, *TRI);
+ }
+
+ SI = Splats.erase(SI);
+
+ // If SMI is directly after MI, then MBBI's base iterator is
+ // pointing at SMI. Adjust MBBI around the call to erase SMI to
+ // avoid invalidating MBBI.
+ ++MBBI;
+ SMI->eraseFromParent();
+ --MBBI;
+
+ ++NumSimplified;
+ MadeChange = true;
+ continue;
+ }
+ }
+
+ if (MI->modifiesRegister(SplatReg, TRI)) {
+ SI = Splats.erase(SI);
+ continue;
+ }
+
+ ++SI;
+ }
+
+ if (MI->getOpcode() != PPC::QVESPLATI &&
+ MI->getOpcode() != PPC::QVESPLATIs &&
+ MI->getOpcode() != PPC::QVESPLATIb)
+ continue;
+ if (MI->getOperand(2).getImm() != 0)
+ continue;
+
+ // If there are other uses of the scalar value after this, replacing
+ // those uses might be non-trivial.
+ if (!MI->getOperand(1).isKill())
+ continue;
+
+ Splats.push_back(MI);
+ }
+ }
+
+ return MadeChange;
+}
+
diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index 8d5af9458be..5d47e0e3ebd 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -43,6 +43,10 @@ opt<bool> DisableVSXSwapRemoval("disable-ppc-vsx-swap-removal", cl::Hidden,
cl::desc("Disable VSX Swap Removal for PPC"));
static cl::
+opt<bool> DisableQPXLoadSplat("disable-ppc-qpx-load-splat", cl::Hidden,
+ cl::desc("Disable QPX load splat simplification"));
+
+static cl::
opt<bool> DisableMIPeephole("disable-ppc-peephole", cl::Hidden,
cl::desc("Disable machine peepholes for PPC"));
@@ -388,8 +392,15 @@ void PPCPassConfig::addPreRegAlloc() {
}
void PPCPassConfig::addPreSched2() {
- if (getOptLevel() != CodeGenOpt::None)
+ if (getOptLevel() != CodeGenOpt::None) {
addPass(&IfConverterID);
+
+ // This optimization must happen after anything that might do store-to-load
+ // forwarding. Here we're after RA (and, thus, when spills are inserted)
+ // but before post-RA scheduling.
+ if (!DisableQPXLoadSplat)
+ addPass(createPPCQPXLoadSplatPass());
+ }
}
void PPCPassConfig::addPreEmitPass() {
OpenPOWER on IntegriCloud