summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorBill Schmidt <wschmidt@linux.vnet.ibm.com>2015-07-21 21:40:17 +0000
committerBill Schmidt <wschmidt@linux.vnet.ibm.com>2015-07-21 21:40:17 +0000
commit2be8054b49c42a6bd7d1e94f2b9ca24d92ae7311 (patch)
treeb7a050ac1060d2a5c5a44d011033bf7a7a2e2bef /llvm/lib
parentc1fbb3540a22c64b0afcfb3c2e99171ae7b13414 (diff)
downloadbcm5719-llvm-2be8054b49c42a6bd7d1e94f2b9ca24d92ae7311.tar.gz
bcm5719-llvm-2be8054b49c42a6bd7d1e94f2b9ca24d92ae7311.zip
[PPC64LE] More vector swap optimization TLC
This makes one substantive change and a few stylistic changes to the VSX swap optimization pass. The substantive change is to permit LXSDX and LXSSPX instructions to participate in swap optimization computations. The previous change to insert a swap following a SUBREG_TO_REG widening operation makes this almost trivial. I experimented with also permitting STXSDX and STXSSPX instructions. This can be done using similar techniques: we could insert a swap prior to a narrowing COPY operation, and then permit these stores to participate. I prototyped this, but discovered that the pattern of a narrowing COPY followed by an STXSDX does not occur in any of our test-suite code. So instead, I added commentary indicating that this could be done. Other TLC: - I changed SH_COPYSCALAR to SH_COPYWIDEN to more clearly indicate the direction of the copy. - I factored the insertion of swap instructions into a separate function. Finally, I added a new test case to check that the scalar-to-vector loads are working properly with swap optimization. llvm-svn: 242838
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp68
1 files changed, 47 insertions, 21 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
index 3fb1dcc3d4a..d9504710b3f 100644
--- a/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
+++ b/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
@@ -94,7 +94,7 @@ enum SHValues {
SH_NOSWAP_ST,
SH_SPLAT,
SH_XXPERMDI,
- SH_COPYSCALAR
+ SH_COPYWIDEN
};
struct PPCVSXSwapRemoval : public MachineFunctionPass {
@@ -149,6 +149,11 @@ private:
// handling. Return true iff any changes are made.
bool removeSwaps();
+ // Insert a swap instruction from SrcReg to DstReg at the given
+ // InsertPoint.
+ void insertSwap(MachineInstr *MI, MachineBasicBlock::iterator InsertPoint,
+ unsigned DstReg, unsigned SrcReg);
+
// Update instructions requiring special handling.
void handleSpecialSwappables(int EntryIdx);
@@ -340,6 +345,15 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
SwapVector[VecIdx].IsLoad = 1;
SwapVector[VecIdx].IsSwap = 1;
break;
+ case PPC::LXSDX:
+ case PPC::LXSSPX:
+ // A load of a floating-point value into the high-order half of
+ // a vector register is safe, provided that we introduce a swap
+ // following the load, which will be done by the SUBREG_TO_REG
+ // support. So just mark these as safe.
+ SwapVector[VecIdx].IsLoad = 1;
+ SwapVector[VecIdx].IsSwappable = 1;
+ break;
case PPC::STVX:
// Non-permuting stores are currently unsafe. We can use special
// handling for this in the future. By not marking these as
@@ -382,7 +396,7 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
else if (isVecReg(MI.getOperand(0).getReg()) &&
isScalarVecReg(MI.getOperand(2).getReg())) {
SwapVector[VecIdx].IsSwappable = 1;
- SwapVector[VecIdx].SpecialHandling = SHValues::SH_COPYSCALAR;
+ SwapVector[VecIdx].SpecialHandling = SHValues::SH_COPYWIDEN;
}
break;
}
@@ -417,7 +431,14 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
case PPC::STVEHX:
case PPC::STVEWX:
case PPC::STVXL:
+ // We can handle STXSDX and STXSSPX similarly to LXSDX and LXSSPX,
+ // by adding special handling for narrowing copies as well as
+ // widening ones. However, I've experimented with this, and in
+ // practice we currently do not appear to use STXSDX fed by
+ // a narrowing copy from a full vector register. Since I can't
+ // generate any useful test cases, I've left this alone for now.
case PPC::STXSDX:
+ case PPC::STXSSPX:
case PPC::VCIPHER:
case PPC::VCIPHERLAST:
case PPC::VMRGHB:
@@ -540,7 +561,8 @@ unsigned PPCVSXSwapRemoval::lookThruCopyLike(unsigned SrcReg,
}
if (!TargetRegisterInfo::isVirtualRegister(CopySrcReg)) {
- SwapVector[VecIdx].MentionsPhysVR = 1;
+ if (!isScalarVecReg(CopySrcReg))
+ SwapVector[VecIdx].MentionsPhysVR = 1;
return CopySrcReg;
}
@@ -626,8 +648,8 @@ void PPCVSXSwapRemoval::recordUnoptimizableWebs() {
SwapVector[Repr].WebRejected = 1;
DEBUG(dbgs() <<
- format("Web %d rejected for physreg, partial reg, or not swap[pable]\n",
- Repr));
+ format("Web %d rejected for physreg, partial reg, or not "
+ "swap[pable]\n", Repr));
DEBUG(dbgs() << " in " << EntryIdx << ": ");
DEBUG(SwapVector[EntryIdx].VSEMI->dump());
DEBUG(dbgs() << "\n");
@@ -740,6 +762,21 @@ void PPCVSXSwapRemoval::markSwapsForRemoval() {
}
}
+// Create an xxswapd instruction and insert it prior to the given point.
+// MI is used to determine basic block and debug loc information.
+// FIXME: When inserting a swap, we should check whether SrcReg is
+// defined by another swap: SrcReg = XXPERMDI Reg, Reg, 2; If so,
+// then instead we should generate a copy from Reg to DstReg.
+void PPCVSXSwapRemoval::insertSwap(MachineInstr *MI,
+ MachineBasicBlock::iterator InsertPoint,
+ unsigned DstReg, unsigned SrcReg) {
+ BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(),
+ TII->get(PPC::XXPERMDI), DstReg)
+ .addReg(SrcReg)
+ .addReg(SrcReg)
+ .addImm(2);
+}
+
// The identified swap entry requires special handling to allow its
// containing computation to be optimized. Perform that handling
// here.
@@ -808,7 +845,7 @@ void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) {
// For a copy from a scalar floating-point register to a vector
// register, removing swaps will leave the copied value in the
// wrong lane. Insert a swap following the copy to fix this.
- case SHValues::SH_COPYSCALAR: {
+ case SHValues::SH_COPYWIDEN: {
MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
DEBUG(dbgs() << "Changing SUBREG_TO_REG: ");
@@ -829,7 +866,6 @@ void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) {
// assignment problem. In this case we must copy from VRRC to VSRC
// prior to the swap, and from VSRC to VRRC following the swap.
// Coalescing will usually remove all this mess.
-
if (DstRC == &PPC::VRRCRegClass) {
unsigned VSRCTmp1 = MRI->createVirtualRegister(&PPC::VSRCRegClass);
unsigned VSRCTmp2 = MRI->createVirtualRegister(&PPC::VSRCRegClass);
@@ -839,11 +875,7 @@ void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) {
.addReg(NewVReg);
DEBUG(MI->getNextNode()->dump());
- BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(),
- TII->get(PPC::XXPERMDI), VSRCTmp2)
- .addReg(VSRCTmp1)
- .addReg(VSRCTmp1)
- .addImm(2);
+ insertSwap(MI, InsertPoint, VSRCTmp2, VSRCTmp1);
DEBUG(MI->getNextNode()->getNextNode()->dump());
BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(),
@@ -852,13 +884,7 @@ void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) {
DEBUG(MI->getNextNode()->getNextNode()->getNextNode()->dump());
} else {
-
- BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(),
- TII->get(PPC::XXPERMDI), DstReg)
- .addReg(NewVReg)
- .addReg(NewVReg)
- .addImm(2);
-
+ insertSwap(MI, InsertPoint, DstReg, NewVReg);
DEBUG(MI->getNextNode()->dump());
}
break;
@@ -944,8 +970,8 @@ void PPCVSXSwapRemoval::dumpSwapVector() {
case SH_XXPERMDI:
DEBUG(dbgs() << "special:xxpermdi ");
break;
- case SH_COPYSCALAR:
- DEBUG(dbgs() << "special:copyscalar ");
+ case SH_COPYWIDEN:
+ DEBUG(dbgs() << "special:copywiden ");
break;
}
}
OpenPOWER on IntegriCloud