summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/BPF/BPFISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/BPF/BPFISelLowering.cpp')
-rw-r--r--llvm/lib/Target/BPF/BPFISelLowering.cpp73
1 files changed, 68 insertions, 5 deletions
diff --git a/llvm/lib/Target/BPF/BPFISelLowering.cpp b/llvm/lib/Target/BPF/BPFISelLowering.cpp
index 5c9f51a3924..9272cf692dc 100644
--- a/llvm/lib/Target/BPF/BPFISelLowering.cpp
+++ b/llvm/lib/Target/BPF/BPFISelLowering.cpp
@@ -33,6 +33,10 @@ using namespace llvm;
#define DEBUG_TYPE "bpf-lower"
+static cl::opt<bool> BPFExpandMemcpyInOrder("bpf-expand-memcpy-in-order",
+ cl::Hidden, cl::init(false),
+ cl::desc("Expand memcpy into load/store pairs in order"));
+
static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg) {
MachineFunction &MF = DAG.getMachineFunction();
DAG.getContext()->diagnose(
@@ -132,10 +136,30 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
setMinFunctionAlignment(3);
setPrefFunctionAlignment(3);
- // inline memcpy() for kernel to see explicit copy
- MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 128;
- MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 128;
- MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 128;
+ if (BPFExpandMemcpyInOrder) {
+ // LLVM generic code will try to expand memcpy into load/store pairs at this
+ // stage which is before quite a few IR optimization passes, therefore the
+ // loads and stores could potentially be moved apart from each other which
+ // will cause trouble to memcpy pattern matcher inside kernel eBPF JIT
+ // compilers.
+ //
+ // When -bpf-expand-memcpy-in-order specified, we want to defer the expand
+ // of memcpy to later stage in IR optimization pipeline so those load/store
+ // pairs won't be touched and could be kept in order. Hence, we set
+ // MaxStoresPerMem* to zero to disable the generic getMemcpyLoadsAndStores
+ // code path, and ask LLVM to use target expander EmitTargetCodeForMemcpy.
+ MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 0;
+ MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 0;
+ MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 0;
+ } else {
+ // inline memcpy() for kernel to see explicit copy
+ unsigned CommonMaxStores =
+ STI.getSelectionDAGInfo()->getCommonMaxStoresPerMemFunc();
+
+ MaxStoresPerMemset = MaxStoresPerMemsetOptSize = CommonMaxStores;
+ MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = CommonMaxStores;
+ MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = CommonMaxStores;
+ }
// CPU/Feature control
HasAlu32 = STI.getHasAlu32();
@@ -518,6 +542,8 @@ const char *BPFTargetLowering::getTargetNodeName(unsigned Opcode) const {
return "BPFISD::BR_CC";
case BPFISD::Wrapper:
return "BPFISD::Wrapper";
+ case BPFISD::MEMCPY:
+ return "BPFISD::MEMCPY";
}
return nullptr;
}
@@ -557,6 +583,37 @@ BPFTargetLowering::EmitSubregExt(MachineInstr &MI, MachineBasicBlock *BB,
}
MachineBasicBlock *
+BPFTargetLowering::EmitInstrWithCustomInserterMemcpy(MachineInstr &MI,
+ MachineBasicBlock *BB)
+ const {
+ MachineFunction *MF = MI.getParent()->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ MachineInstrBuilder MIB(*MF, MI);
+ unsigned ScratchReg;
+
+ // This function does custom insertion during lowering BPFISD::MEMCPY which
+ // only has two register operands from memcpy semantics, the copy source
+ // address and the copy destination address.
+ //
+ // Because we will expand BPFISD::MEMCPY into load/store pairs, we will need
+ // a third scratch register to serve as the destination register of load and
+ // source register of store.
+ //
+ // The scratch register here is with the Define | Dead | EarlyClobber flags.
+ // The EarlyClobber flag has the semantic property that the operand it is
+ // attached to is clobbered before the rest of the inputs are read. Hence it
+ // must be unique among the operands to the instruction. The Define flag is
+ // needed to coerce the machine verifier that an Undef value isn't a problem
+ // as we anyway is loading memory into it. The Dead flag is needed as the
+ // value in scratch isn't supposed to be used by any other instruction.
+ ScratchReg = MRI.createVirtualRegister(&BPF::GPRRegClass);
+ MIB.addReg(ScratchReg,
+ RegState::Define | RegState::Dead | RegState::EarlyClobber);
+
+ return BB;
+}
+
+MachineBasicBlock *
BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *BB) const {
const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
@@ -567,6 +624,8 @@ BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
Opc == BPF::Select_32 ||
Opc == BPF::Select_32_64);
+ bool isMemcpyOp = Opc == BPF::MEMCPY;
+
#ifndef NDEBUG
bool isSelectRIOp = (Opc == BPF::Select_Ri ||
Opc == BPF::Select_Ri_64_32 ||
@@ -574,9 +633,13 @@ BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
Opc == BPF::Select_Ri_32_64);
- assert((isSelectRROp || isSelectRIOp) && "Unexpected instr type to insert");
+ assert((isSelectRROp || isSelectRIOp || isMemcpyOp) &&
+ "Unexpected instr type to insert");
#endif
+ if (isMemcpyOp)
+ return EmitInstrWithCustomInserterMemcpy(MI, BB);
+
bool is32BitCmp = (Opc == BPF::Select_32 ||
Opc == BPF::Select_32_64 ||
Opc == BPF::Select_Ri_32 ||
OpenPOWER on IntegriCloud