diff options
| author | Jiong Wang <wong.kwongyuan.tools@gmail.com> | 2019-10-16 15:27:59 +0000 |
|---|---|---|
| committer | Jiong Wang <wong.kwongyuan.tools@gmail.com> | 2019-10-16 15:27:59 +0000 |
| commit | ec51851026a55e1cfc7f006f0e75f0a19acb32d3 (patch) | |
| tree | 3db61a88d21b78834cf555a5415d55894b74d157 /llvm/lib/Target/BPF/BPFMIPeephole.cpp | |
| parent | 1893f9a45813d7b486576d88642e0e1cc5bc92fb (diff) | |
| download | bcm5719-llvm-ec51851026a55e1cfc7f006f0e75f0a19acb32d3.tar.gz bcm5719-llvm-ec51851026a55e1cfc7f006f0e75f0a19acb32d3.zip | |
bpf: fix wrong truncation elimination when there is back-edge/loop
Currently, BPF backend is doing truncation elimination. If one truncation
is performed on a value defined by narrow loads, then it could be redundant
given BPF loads zero extend the destination register implicitly.
When the definition of the truncated value is a merging value (PHI node)
that could come from different code paths, then checks need to be done on
all possible code paths.
Above described optimization was introduced as r306685, however it doesn't
work when there is back-edge, for example when loop is used inside BPF
code.
For example for the following code, a zero-extended value should be stored
into b[i], but the "and reg, 0xffff" is wrongly eliminated which then
generates corrupted data.
void cal1(unsigned short *a, unsigned long *b, unsigned int k)
{
unsigned short e;
e = *a;
for (unsigned int i = 0; i < k; i++) {
b[i] = e;
e = ~e;
}
}
The reason is r306685 was trying to do the PHI node checks inside isel
DAG2DAG phase, and the checks are done on MachineInstr. This is actually
wrong, because MachineInstr is being built during isel phase and the
associated information is not completed yet. A quick search shows none
target other than BPF is access MachineInstr info during isel phase.
For an PHI node, when you reached it during isel phase, it may have all
predecessors linked, but not successors. It seems successors are linked to
PHI node only when doing SelectionDAGISel::FinishBasicBlock and this
happens later than PreprocessISelDAG hook.
Previously, BPF program doesn't allow loop, there is probably the reason
why this bug was not exposed.
This patch therefore fixes the bug by the following approach:
- The existing truncation elimination code and the associated
"load_to_vreg_" records are removed.
- Instead, implement truncation elimination using MachineSSA pass, this
is where all information are built, and keep the pass together with other
similar peephole optimizations inside BPFMIPeephole.cpp. Redundant move
elimination logic is updated accordingly.
- Unit testcase included + no compilation errors for kernel BPF selftest.
Patch Review
===
Patch was sent to and reviewed by BPF community at:
https://lore.kernel.org/bpf
Reported-by: David Beckett <david.beckett@netronome.com>
Reviewed-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
llvm-svn: 375007
Diffstat (limited to 'llvm/lib/Target/BPF/BPFMIPeephole.cpp')
| -rw-r--r-- | llvm/lib/Target/BPF/BPFMIPeephole.cpp | 189 |
1 files changed, 184 insertions, 5 deletions
diff --git a/llvm/lib/Target/BPF/BPFMIPeephole.cpp b/llvm/lib/Target/BPF/BPFMIPeephole.cpp index fafd2f703ad..72fd131c459 100644 --- a/llvm/lib/Target/BPF/BPFMIPeephole.cpp +++ b/llvm/lib/Target/BPF/BPFMIPeephole.cpp @@ -71,7 +71,7 @@ void BPFMIPeephole::initialize(MachineFunction &MFParm) { MF = &MFParm; MRI = &MF->getRegInfo(); TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo(); - LLVM_DEBUG(dbgs() << "*** BPF MachineSSA peephole pass ***\n\n"); + LLVM_DEBUG(dbgs() << "*** BPF MachineSSA ZEXT Elim peephole pass ***\n\n"); } bool BPFMIPeephole::isMovFrom32Def(MachineInstr *MovMI) @@ -186,7 +186,8 @@ bool BPFMIPeephole::eliminateZExtSeq(void) { } // end default namespace INITIALIZE_PASS(BPFMIPeephole, DEBUG_TYPE, - "BPF MachineSSA Peephole Optimization", false, false) + "BPF MachineSSA Peephole Optimization For ZEXT Eliminate", + false, false) char BPFMIPeephole::ID = 0; FunctionPass* llvm::createBPFMIPeepholePass() { return new BPFMIPeephole(); } @@ -253,12 +254,16 @@ bool BPFMIPreEmitPeephole::eliminateRedundantMov(void) { // enabled. The special type cast insn MOV_32_64 involves different // register class on src (i32) and dst (i64), RA could generate useless // instruction due to this. - if (MI.getOpcode() == BPF::MOV_32_64) { + unsigned Opcode = MI.getOpcode(); + if (Opcode == BPF::MOV_32_64 || + Opcode == BPF::MOV_rr || Opcode == BPF::MOV_rr_32) { Register dst = MI.getOperand(0).getReg(); - Register dst_sub = TRI->getSubReg(dst, BPF::sub_32); Register src = MI.getOperand(1).getReg(); - if (dst_sub != src) + if (Opcode == BPF::MOV_32_64) + dst = TRI->getSubReg(dst, BPF::sub_32); + + if (dst != src) continue; ToErase = &MI; @@ -281,3 +286,177 @@ FunctionPass* llvm::createBPFMIPreEmitPeepholePass() { return new BPFMIPreEmitPeephole(); } + +STATISTIC(TruncElemNum, "Number of truncation eliminated"); + +namespace { + +struct BPFMIPeepholeTruncElim : public MachineFunctionPass { + + static char ID; + const BPFInstrInfo *TII; + MachineFunction *MF; + MachineRegisterInfo *MRI; + + BPFMIPeepholeTruncElim() : MachineFunctionPass(ID) { + initializeBPFMIPeepholeTruncElimPass(*PassRegistry::getPassRegistry()); + } + +private: + // Initialize class variables. + void initialize(MachineFunction &MFParm); + + bool eliminateTruncSeq(void); + +public: + + // Main entry point for this pass. + bool runOnMachineFunction(MachineFunction &MF) override { + if (skipFunction(MF.getFunction())) + return false; + + initialize(MF); + + return eliminateTruncSeq(); + } +}; + +static bool TruncSizeCompatible(int TruncSize, unsigned opcode) +{ + if (TruncSize == 1) + return opcode == BPF::LDB || opcode == BPF::LDB32; + + if (TruncSize == 2) + return opcode == BPF::LDH || opcode == BPF::LDH32; + + if (TruncSize == 4) + return opcode == BPF::LDW || opcode == BPF::LDW32; + + return false; +} + +// Initialize class variables. +void BPFMIPeepholeTruncElim::initialize(MachineFunction &MFParm) { + MF = &MFParm; + MRI = &MF->getRegInfo(); + TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo(); + LLVM_DEBUG(dbgs() << "*** BPF MachineSSA TRUNC Elim peephole pass ***\n\n"); +} + +// Reg truncating is often the result of 8/16/32bit->64bit or +// 8/16bit->32bit conversion. If the reg value is loaded with +// masked byte width, the AND operation can be removed since +// BPF LOAD already has zero extension. +// +// This also solved a correctness issue. +// In BPF socket-related program, e.g., __sk_buff->{data, data_end} +// are 32-bit registers, but later on, kernel verifier will rewrite +// it with 64-bit value. Therefore, truncating the value after the +// load will result in incorrect code. +bool BPFMIPeepholeTruncElim::eliminateTruncSeq(void) { + MachineInstr* ToErase = nullptr; + bool Eliminated = false; + + for (MachineBasicBlock &MBB : *MF) { + for (MachineInstr &MI : MBB) { + // The second insn to remove if the eliminate candidate is a pair. + MachineInstr *MI2 = nullptr; + Register DstReg, SrcReg; + MachineInstr *DefMI; + int TruncSize = -1; + + // If the previous instruction was marked for elimination, remove it now. + if (ToErase) { + ToErase->eraseFromParent(); + ToErase = nullptr; + } + + // AND A, 0xFFFFFFFF will be turned into SLL/SRL pair due to immediate + // for BPF ANDI is i32, and this case only happens on ALU64. + if (MI.getOpcode() == BPF::SRL_ri && + MI.getOperand(2).getImm() == 32) { + SrcReg = MI.getOperand(1).getReg(); + MI2 = MRI->getVRegDef(SrcReg); + DstReg = MI.getOperand(0).getReg(); + + if (!MI2 || + MI2->getOpcode() != BPF::SLL_ri || + MI2->getOperand(2).getImm() != 32) + continue; + + // Update SrcReg. + SrcReg = MI2->getOperand(1).getReg(); + DefMI = MRI->getVRegDef(SrcReg); + if (DefMI) + TruncSize = 4; + } else if (MI.getOpcode() == BPF::AND_ri || + MI.getOpcode() == BPF::AND_ri_32) { + SrcReg = MI.getOperand(1).getReg(); + DstReg = MI.getOperand(0).getReg(); + DefMI = MRI->getVRegDef(SrcReg); + + if (!DefMI) + continue; + + int64_t imm = MI.getOperand(2).getImm(); + if (imm == 0xff) + TruncSize = 1; + else if (imm == 0xffff) + TruncSize = 2; + } + + if (TruncSize == -1) + continue; + + // The definition is PHI node, check all inputs. + if (DefMI->isPHI()) { + bool CheckFail = false; + + for (unsigned i = 1, e = DefMI->getNumOperands(); i < e; i += 2) { + MachineOperand &opnd = DefMI->getOperand(i); + if (!opnd.isReg()) { + CheckFail = true; + break; + } + + MachineInstr *PhiDef = MRI->getVRegDef(opnd.getReg()); + if (!PhiDef || PhiDef->isPHI() || + !TruncSizeCompatible(TruncSize, PhiDef->getOpcode())) { + CheckFail = true; + break; + } + } + + if (CheckFail) + continue; + } else if (!TruncSizeCompatible(TruncSize, DefMI->getOpcode())) { + continue; + } + + BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(BPF::MOV_rr), DstReg) + .addReg(SrcReg); + + if (MI2) + MI2->eraseFromParent(); + + // Mark it to ToErase, and erase in the next iteration. + ToErase = &MI; + TruncElemNum++; + Eliminated = true; + } + } + + return Eliminated; +} + +} // end default namespace + +INITIALIZE_PASS(BPFMIPeepholeTruncElim, "bpf-mi-trunc-elim", + "BPF MachineSSA Peephole Optimization For TRUNC Eliminate", + false, false) + +char BPFMIPeepholeTruncElim::ID = 0; +FunctionPass* llvm::createBPFMIPeepholeTruncElimPass() +{ + return new BPFMIPeepholeTruncElim(); +} |

