diff options
Diffstat (limited to 'llvm/lib/Target/BPF/BPFISelLowering.cpp')
-rw-r--r-- | llvm/lib/Target/BPF/BPFISelLowering.cpp | 73 |
1 files changed, 68 insertions, 5 deletions
diff --git a/llvm/lib/Target/BPF/BPFISelLowering.cpp b/llvm/lib/Target/BPF/BPFISelLowering.cpp index 5c9f51a3924..9272cf692dc 100644 --- a/llvm/lib/Target/BPF/BPFISelLowering.cpp +++ b/llvm/lib/Target/BPF/BPFISelLowering.cpp @@ -33,6 +33,10 @@ using namespace llvm; #define DEBUG_TYPE "bpf-lower" +static cl::opt<bool> BPFExpandMemcpyInOrder("bpf-expand-memcpy-in-order", + cl::Hidden, cl::init(false), + cl::desc("Expand memcpy into load/store pairs in order")); + static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg) { MachineFunction &MF = DAG.getMachineFunction(); DAG.getContext()->diagnose( @@ -132,10 +136,30 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM, setMinFunctionAlignment(3); setPrefFunctionAlignment(3); - // inline memcpy() for kernel to see explicit copy - MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 128; - MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 128; - MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 128; + if (BPFExpandMemcpyInOrder) { + // LLVM generic code will try to expand memcpy into load/store pairs at this + // stage which is before quite a few IR optimization passes, therefore the + // loads and stores could potentially be moved apart from each other which + // will cause trouble to memcpy pattern matcher inside kernel eBPF JIT + // compilers. + // + // When -bpf-expand-memcpy-in-order specified, we want to defer the expand + // of memcpy to later stage in IR optimization pipeline so those load/store + // pairs won't be touched and could be kept in order. Hence, we set + // MaxStoresPerMem* to zero to disable the generic getMemcpyLoadsAndStores + // code path, and ask LLVM to use target expander EmitTargetCodeForMemcpy. + MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 0; + MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 0; + MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 0; + } else { + // inline memcpy() for kernel to see explicit copy + unsigned CommonMaxStores = + STI.getSelectionDAGInfo()->getCommonMaxStoresPerMemFunc(); + + MaxStoresPerMemset = MaxStoresPerMemsetOptSize = CommonMaxStores; + MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = CommonMaxStores; + MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = CommonMaxStores; + } // CPU/Feature control HasAlu32 = STI.getHasAlu32(); @@ -518,6 +542,8 @@ const char *BPFTargetLowering::getTargetNodeName(unsigned Opcode) const { return "BPFISD::BR_CC"; case BPFISD::Wrapper: return "BPFISD::Wrapper"; + case BPFISD::MEMCPY: + return "BPFISD::MEMCPY"; } return nullptr; } @@ -557,6 +583,37 @@ BPFTargetLowering::EmitSubregExt(MachineInstr &MI, MachineBasicBlock *BB, } MachineBasicBlock * +BPFTargetLowering::EmitInstrWithCustomInserterMemcpy(MachineInstr &MI, + MachineBasicBlock *BB) + const { + MachineFunction *MF = MI.getParent()->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + MachineInstrBuilder MIB(*MF, MI); + unsigned ScratchReg; + + // This function does custom insertion during lowering BPFISD::MEMCPY which + // only has two register operands from memcpy semantics, the copy source + // address and the copy destination address. + // + // Because we will expand BPFISD::MEMCPY into load/store pairs, we will need + // a third scratch register to serve as the destination register of load and + // source register of store. + // + // The scratch register here is with the Define | Dead | EarlyClobber flags. + // The EarlyClobber flag has the semantic property that the operand it is + // attached to is clobbered before the rest of the inputs are read. Hence it + // must be unique among the operands to the instruction. The Define flag is + // needed to coerce the machine verifier that an Undef value isn't a problem + // as we anyway is loading memory into it. The Dead flag is needed as the + // value in scratch isn't supposed to be used by any other instruction. + ScratchReg = MRI.createVirtualRegister(&BPF::GPRRegClass); + MIB.addReg(ScratchReg, + RegState::Define | RegState::Dead | RegState::EarlyClobber); + + return BB; +} + +MachineBasicBlock * BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const { const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); @@ -567,6 +624,8 @@ BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, Opc == BPF::Select_32 || Opc == BPF::Select_32_64); + bool isMemcpyOp = Opc == BPF::MEMCPY; + #ifndef NDEBUG bool isSelectRIOp = (Opc == BPF::Select_Ri || Opc == BPF::Select_Ri_64_32 || @@ -574,9 +633,13 @@ BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, Opc == BPF::Select_Ri_32_64); - assert((isSelectRROp || isSelectRIOp) && "Unexpected instr type to insert"); + assert((isSelectRROp || isSelectRIOp || isMemcpyOp) && + "Unexpected instr type to insert"); #endif + if (isMemcpyOp) + return EmitInstrWithCustomInserterMemcpy(MI, BB); + bool is32BitCmp = (Opc == BPF::Select_32 || Opc == BPF::Select_32_64 || Opc == BPF::Select_Ri_32 || |