summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorJessica Paquette <jpaquette@apple.com>2019-07-20 01:55:35 +0000
committerJessica Paquette <jpaquette@apple.com>2019-07-20 01:55:35 +0000
commit41affad967d48b8223dd8cfee254ee787b87a7e6 (patch)
tree7817950d7af9ac443fa7ef8b1811709c76f6ed25 /llvm/lib
parent5204f7611f4ad6549921f9fa757823e77f39ce32 (diff)
downloadbcm5719-llvm-41affad967d48b8223dd8cfee254ee787b87a7e6.tar.gz
bcm5719-llvm-41affad967d48b8223dd8cfee254ee787b87a7e6.zip
[GlobalISel][AArch64] Contract trivial same-size cross-bank copies into G_STOREs
Sometimes, you can end up with cross-bank copies between same-sized GPRs and FPRs, which feed into G_STOREs. When these copies feed only into stores, they aren't necessary; we can just store using the original register bank. This provides some minor code size savings for some floating point SPEC benchmarks. (Around 0.2% for 453.povray and 450.soplex) This issue doesn't seem to show up due to regbankselect or anything similar. So, this patch introduces an early select function, `contractCrossBankCopyIntoStore` which performs the contraction when possible. The selector then continues normally and selects the correct store opcode, eliminating needless copies along the way. Differential Revision: https://reviews.llvm.org/D65024 llvm-svn: 366625
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp49
1 files changed, 49 insertions, 0 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
index f8e15c88ef0..038c6f37a31 100644
--- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -69,6 +69,10 @@ private:
bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool earlySelectLoad(MachineInstr &I, MachineRegisterInfo &MRI) const;
+ /// Eliminate same-sized cross-bank copies into stores before selectImpl().
+ void contractCrossBankCopyIntoStore(MachineInstr &I,
+ MachineRegisterInfo &MRI) const;
+
bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
MachineRegisterInfo &MRI) const;
bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
@@ -1120,6 +1124,9 @@ void AArch64InstructionSelector::preISelLower(MachineInstr &I) const {
}
return;
}
+ case TargetOpcode::G_STORE:
+ contractCrossBankCopyIntoStore(I, MRI);
+ return;
default:
return;
}
@@ -1160,6 +1167,48 @@ bool AArch64InstructionSelector::earlySelectSHL(
return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
}
+void AArch64InstructionSelector::contractCrossBankCopyIntoStore(
+ MachineInstr &I, MachineRegisterInfo &MRI) const {
+ assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
+ // If we're storing a scalar, it doesn't matter what register bank that
+ // scalar is on. All that matters is the size.
+ //
+ // So, if we see something like this (with a 32-bit scalar as an example):
+ //
+ // %x:gpr(s32) = ... something ...
+ // %y:fpr(s32) = COPY %x:gpr(s32)
+ // G_STORE %y:fpr(s32)
+ //
+ // We can fix this up into something like this:
+ //
+ // G_STORE %x:gpr(s32)
+ //
+ // And then continue the selection process normally.
+ MachineInstr *Def = getDefIgnoringCopies(I.getOperand(0).getReg(), MRI);
+ if (!Def)
+ return;
+ Register DefDstReg = Def->getOperand(0).getReg();
+ LLT DefDstTy = MRI.getType(DefDstReg);
+ Register StoreSrcReg = I.getOperand(0).getReg();
+ LLT StoreSrcTy = MRI.getType(StoreSrcReg);
+
+ // If we get something strange like a physical register, then we shouldn't
+ // go any further.
+ if (!DefDstTy.isValid())
+ return;
+
+ // Are the source and dst types the same size?
+ if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
+ return;
+
+ if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
+ RBI.getRegBank(DefDstReg, MRI, TRI))
+ return;
+
+ // We have a cross-bank copy, which is entering a store. Let's fold it.
+ I.getOperand(0).setReg(DefDstReg);
+}
+
bool AArch64InstructionSelector::earlySelectLoad(
MachineInstr &I, MachineRegisterInfo &MRI) const {
// Try to fold in shifts, etc into the addressing mode of a load.
OpenPOWER on IntegriCloud