diff options
| author | Jessica Paquette <jpaquette@apple.com> | 2019-07-20 01:55:35 +0000 |
|---|---|---|
| committer | Jessica Paquette <jpaquette@apple.com> | 2019-07-20 01:55:35 +0000 |
| commit | 41affad967d48b8223dd8cfee254ee787b87a7e6 (patch) | |
| tree | 7817950d7af9ac443fa7ef8b1811709c76f6ed25 | |
| parent | 5204f7611f4ad6549921f9fa757823e77f39ce32 (diff) | |
| download | bcm5719-llvm-41affad967d48b8223dd8cfee254ee787b87a7e6.tar.gz bcm5719-llvm-41affad967d48b8223dd8cfee254ee787b87a7e6.zip | |
[GlobalISel][AArch64] Contract trivial same-size cross-bank copies into G_STOREs
Sometimes, you can end up with cross-bank copies between same-sized GPRs and
FPRs, which feed into G_STOREs. When these copies feed only into stores, they
aren't necessary; we can just store using the original register bank.
This provides some minor code size savings for some floating point SPEC
benchmarks. (Around 0.2% for 453.povray and 450.soplex)
This issue doesn't seem to show up due to regbankselect or anything similar. So,
this patch introduces an early select function, `contractCrossBankCopyIntoStore`
which performs the contraction when possible. The selector then continues
normally and selects the correct store opcode, eliminating needless copies
along the way.
Differential Revision: https://reviews.llvm.org/D65024
llvm-svn: 366625
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp | 49 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AArch64/GlobalISel/contract-store.mir | 89 |
2 files changed, 138 insertions, 0 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp index f8e15c88ef0..038c6f37a31 100644 --- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -69,6 +69,10 @@ private: bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const; bool earlySelectLoad(MachineInstr &I, MachineRegisterInfo &MRI) const; + /// Eliminate same-sized cross-bank copies into stores before selectImpl(). + void contractCrossBankCopyIntoStore(MachineInstr &I, + MachineRegisterInfo &MRI) const; + bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const; bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF, @@ -1120,6 +1124,9 @@ void AArch64InstructionSelector::preISelLower(MachineInstr &I) const { } return; } + case TargetOpcode::G_STORE: + contractCrossBankCopyIntoStore(I, MRI); + return; default: return; } @@ -1160,6 +1167,48 @@ bool AArch64InstructionSelector::earlySelectSHL( return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI); } +void AArch64InstructionSelector::contractCrossBankCopyIntoStore( + MachineInstr &I, MachineRegisterInfo &MRI) const { + assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE"); + // If we're storing a scalar, it doesn't matter what register bank that + // scalar is on. All that matters is the size. + // + // So, if we see something like this (with a 32-bit scalar as an example): + // + // %x:gpr(s32) = ... something ... + // %y:fpr(s32) = COPY %x:gpr(s32) + // G_STORE %y:fpr(s32) + // + // We can fix this up into something like this: + // + // G_STORE %x:gpr(s32) + // + // And then continue the selection process normally. + MachineInstr *Def = getDefIgnoringCopies(I.getOperand(0).getReg(), MRI); + if (!Def) + return; + Register DefDstReg = Def->getOperand(0).getReg(); + LLT DefDstTy = MRI.getType(DefDstReg); + Register StoreSrcReg = I.getOperand(0).getReg(); + LLT StoreSrcTy = MRI.getType(StoreSrcReg); + + // If we get something strange like a physical register, then we shouldn't + // go any further. + if (!DefDstTy.isValid()) + return; + + // Are the source and dst types the same size? + if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits()) + return; + + if (RBI.getRegBank(StoreSrcReg, MRI, TRI) == + RBI.getRegBank(DefDstReg, MRI, TRI)) + return; + + // We have a cross-bank copy, which is entering a store. Let's fold it. + I.getOperand(0).setReg(DefDstReg); +} + bool AArch64InstructionSelector::earlySelectLoad( MachineInstr &I, MachineRegisterInfo &MRI) const { // Try to fold in shifts, etc into the addressing mode of a load. diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/contract-store.mir b/llvm/test/CodeGen/AArch64/GlobalISel/contract-store.mir new file mode 100644 index 00000000000..f1b40c18fb9 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/contract-store.mir @@ -0,0 +1,89 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s + +--- | + define void @contract_s64_gpr(i64* %addr) { ret void } + define void @contract_s32_gpr(i32* %addr) { ret void } + define void @contract_s64_fpr(i64* %addr) { ret void } + define void @contract_s32_fpr(i32* %addr) { ret void } + define void @contract_s16_fpr(i16* %addr) { ret void } +... +--- +name: contract_s64_gpr +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $x0, $x1 + ; CHECK-LABEL: name: contract_s64_gpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK: STRXui [[COPY1]], [[COPY]], 0 :: (store 8 into %ir.addr) + %0:gpr(p0) = COPY $x0 + %1:gpr(s64) = COPY $x1 + %2:fpr(s64) = COPY %1 + G_STORE %2:fpr(s64), %0 :: (store 8 into %ir.addr) +... +--- +name: contract_s32_gpr +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $x0, $w1 + ; CHECK-LABEL: name: contract_s32_gpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK: STRWui [[COPY1]], [[COPY]], 0 :: (store 4 into %ir.addr) + %0:gpr(p0) = COPY $x0 + %1:gpr(s32) = COPY $w1 + %2:fpr(s32) = COPY %1 + G_STORE %2:fpr(s32), %0 :: (store 4 into %ir.addr) +... +--- +name: contract_s64_fpr +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $x0, $d1 + ; CHECK-LABEL: name: contract_s64_fpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: STRDui [[COPY1]], [[COPY]], 0 :: (store 8 into %ir.addr) + %0:gpr(p0) = COPY $x0 + %1:fpr(s64) = COPY $d1 + %2:gpr(s64) = COPY %1 + G_STORE %2:gpr(s64), %0 :: (store 8 into %ir.addr) +... +--- +name: contract_s32_fpr +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $x0, $s1 + ; CHECK-LABEL: name: contract_s32_fpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1 + ; CHECK: STRSui [[COPY1]], [[COPY]], 0 :: (store 4 into %ir.addr) + %0:gpr(p0) = COPY $x0 + %1:fpr(s32) = COPY $s1 + %2:gpr(s32) = COPY %1 + G_STORE %2:gpr(s32), %0 :: (store 4 into %ir.addr) +... +--- +name: contract_s16_fpr +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $x0, $h1 + ; CHECK-LABEL: name: contract_s16_fpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr16 = COPY $h1 + ; CHECK: STRHui [[COPY1]], [[COPY]], 0 :: (store 2 into %ir.addr) + %0:gpr(p0) = COPY $x0 + %1:fpr(s16) = COPY $h1 + %2:gpr(s16) = COPY %1 + G_STORE %2:gpr(s16), %0 :: (store 2 into %ir.addr) |

