bpf: new option -bpf-expand-memcpy-in-order to expand memcpy in order

Some BPF JIT backends would want to optimize memcpy in their own architecture specific way. However, at the moment, there is no way for JIT backends to see memcpy semantics in a reliable way. This is due to LLVM BPF backend is expanding memcpy into load/store sequences and could possibly schedule them apart from each other further. So, BPF JIT backends inside kernel can't reliably recognize memcpy semantics by peephole BPF sequence. This patch introduce new intrinsic expand infrastructure to memcpy. To get stable in-order load/store sequence from memcpy, we first lower memcpy into BPF::MEMCPY node which then expanded into in-order load/store sequences in expandPostRAPseudo pass which will happen after instruction scheduling. By this way, kernel JIT backends could reliably recognize memcpy through scanning BPF sequence. This new memcpy expand infrastructure is gated by a new option: -bpf-expand-memcpy-in-order Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com> Signed-off-by: Jiong Wang <jiong.wang@netronome.com> Signed-off-by: Yonghong Song <yhs@fb.com> llvm-svn: 337977
author: Yonghong Song <yhs@fb.com> 2018-07-25 22:40:02 +0000
committer: Yonghong Song <yhs@fb.com> 2018-07-25 22:40:02 +0000
commit: 71d81e5c8fcebb7663973d686ac1ad3309502f4b (patch)
tree: b736fbd18247bcced3522642ad47914ce594235d /llvm/test/CodeGen/BPF
parent: 99ca3c0a617337902c2b2006231acc64397d9df3 (diff)
download: bcm5719-llvm-71d81e5c8fcebb7663973d686ac1ad3309502f4b.tar.gz
bcm5719-llvm-71d81e5c8fcebb7663973d686ac1ad3309502f4b.zip
1 files changed, 116 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/BPF/memcpy-expand-in-order.ll b/llvm/test/CodeGen/BPF/memcpy-expand-in-order.ll
new file mode 100644
index 00000000000..6ee31264c76
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/memcpy-expand-in-order.ll
@@ -0,0 +1,116 @@
+; RUN: llc < %s -march=bpfel -bpf-expand-memcpy-in-order | FileCheck %s
+; RUN: llc < %s -march=bpfeb -bpf-expand-memcpy-in-order | FileCheck %s
+;
+; #define COPY_LEN	9
+;
+; void cal_align1(void *a, void *b)
+; {
+;   __builtin_memcpy(a, b, COPY_LEN);
+; }
+;
+; void cal_align2(short *a, short *b)
+; {
+;   __builtin_memcpy(a, b, COPY_LEN);
+; }
+;
+; #undef COPY_LEN
+; #define COPY_LEN	19
+; void cal_align4(int *a, int *b)
+; {
+;   __builtin_memcpy(a, b, COPY_LEN);
+; }
+;
+; #undef COPY_LEN
+; #define COPY_LEN	27
+; void cal_align8(long long *a, long long *b)
+; {
+;   __builtin_memcpy(a, b, COPY_LEN);
+; }
+
+; Function Attrs: nounwind
+define dso_local void @cal_align1(i8* nocapture %a, i8* nocapture readonly %b) local_unnamed_addr #0 {
+entry:
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %a, i8* align 1 %b, i64 9, i1 false)
+  ret void
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #1
+
+; CHECK: [[SCRATCH_REG:r[0-9]]] = *(u8 *)([[SRC_REG:r[0-9]]] + 0)
+; CHECK: *(u8 *)([[DST_REG:r[0-9]]] + 0) = [[SCRATCH_REG]]
+; CHECK: [[SCRATCH_REG]] = *(u8 *)([[SRC_REG]] + 1)
+; CHECK: *(u8 *)([[DST_REG]] + 1) = [[SCRATCH_REG]]
+; CHECK: [[SCRATCH_REG]] = *(u8 *)([[SRC_REG]] + 2)
+; CHECK: *(u8 *)([[DST_REG]] + 2) = [[SCRATCH_REG]]
+; CHECK: [[SCRATCH_REG]] = *(u8 *)([[SRC_REG]] + 3)
+; CHECK: *(u8 *)([[DST_REG]] + 3) = [[SCRATCH_REG]]
+; CHECK: [[SCRATCH_REG]] = *(u8 *)([[SRC_REG]] + 4)
+; CHECK: *(u8 *)([[DST_REG]] + 4) = [[SCRATCH_REG]]
+; CHECK: [[SCRATCH_REG]] = *(u8 *)([[SRC_REG]] + 5)
+; CHECK: *(u8 *)([[DST_REG]] + 5) = [[SCRATCH_REG]]
+; CHECK: [[SCRATCH_REG]] = *(u8 *)([[SRC_REG]] + 6)
+; CHECK: *(u8 *)([[DST_REG]] + 6) = [[SCRATCH_REG]]
+; CHECK: [[SCRATCH_REG]] = *(u8 *)([[SRC_REG]] + 7)
+; CHECK: *(u8 *)([[DST_REG]] + 7) = [[SCRATCH_REG]]
+; CHECK: [[SCRATCH_REG]] = *(u8 *)([[SRC_REG]] + 8)
+; CHECK: *(u8 *)([[DST_REG]] + 8) = [[SCRATCH_REG]]
+
+; Function Attrs: nounwind
+define dso_local void @cal_align2(i16* nocapture %a, i16* nocapture readonly %b) local_unnamed_addr #0 {
+entry:
+  %0 = bitcast i16* %a to i8*
+  %1 = bitcast i16* %b to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 %0, i8* align 2 %1, i64 9, i1 false)
+  ret void
+}
+; CHECK: [[SCRATCH_REG:r[0-9]]] = *(u16 *)([[SRC_REG:r[0-9]]] + 0)
+; CHECK: *(u16 *)([[DST_REG:r[0-9]]] + 0) = [[SCRATCH_REG]]
+; CHECK: [[SCRATCH_REG]] = *(u16 *)([[SRC_REG]] + 2)
+; CHECK: *(u16 *)([[DST_REG]] + 2) = [[SCRATCH_REG]]
+; CHECK: [[SCRATCH_REG]] = *(u16 *)([[SRC_REG]] + 4)
+; CHECK: *(u16 *)([[DST_REG]] + 4) = [[SCRATCH_REG]]
+; CHECK: [[SCRATCH_REG]] = *(u16 *)([[SRC_REG]] + 6)
+; CHECK: *(u16 *)([[DST_REG]] + 6) = [[SCRATCH_REG]]
+; CHECK: [[SCRATCH_REG]] = *(u8 *)([[SRC_REG]] + 8)
+; CHECK: *(u8 *)([[DST_REG]] + 8) = [[SCRATCH_REG]]
+
+; Function Attrs: nounwind
+define dso_local void @cal_align4(i32* nocapture %a, i32* nocapture readonly %b) local_unnamed_addr #0 {
+entry:
+  %0 = bitcast i32* %a to i8*
+  %1 = bitcast i32* %b to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 19, i1 false)
+  ret void
+}
+; CHECK: [[SCRATCH_REG:r[0-9]]] = *(u32 *)([[SRC_REG:r[0-9]]] + 0)
+; CHECK: *(u32 *)([[DST_REG:r[0-9]]] + 0) = [[SCRATCH_REG]]
+; CHECK: [[SCRATCH_REG]] = *(u32 *)([[SRC_REG]] + 4)
+; CHECK: *(u32 *)([[DST_REG]] + 4) = [[SCRATCH_REG]]
+; CHECK: [[SCRATCH_REG]] = *(u32 *)([[SRC_REG]] + 8)
+; CHECK: *(u32 *)([[DST_REG]] + 8) = [[SCRATCH_REG]]
+; CHECK: [[SCRATCH_REG]] = *(u32 *)([[SRC_REG]] + 12)
+; CHECK: *(u32 *)([[DST_REG]] + 12) = [[SCRATCH_REG]]
+; CHECK: [[SCRATCH_REG]] = *(u16 *)([[SRC_REG]] + 16)
+; CHECK: *(u16 *)([[DST_REG]] + 16) = [[SCRATCH_REG]]
+; CHECK: [[SCRATCH_REG]] = *(u8 *)([[SRC_REG]] + 18)
+; CHECK: *(u8 *)([[DST_REG]] + 18) = [[SCRATCH_REG]]
+
+; Function Attrs: nounwind
+define dso_local void @cal_align8(i64* nocapture %a, i64* nocapture readonly %b) local_unnamed_addr #0 {
+entry:
+  %0 = bitcast i64* %a to i8*
+  %1 = bitcast i64* %b to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %0, i8* align 8 %1, i64 27, i1 false)
+  ret void
+}
+; CHECK: [[SCRATCH_REG:r[0-9]]] = *(u64 *)([[SRC_REG:r[0-9]]] + 0)
+; CHECK: *(u64 *)([[DST_REG:r[0-9]]] + 0) = [[SCRATCH_REG]]
+; CHECK: [[SCRATCH_REG]] = *(u64 *)([[SRC_REG]] + 8)
+; CHECK: *(u64 *)([[DST_REG]] + 8) = [[SCRATCH_REG]]
+; CHECK: [[SCRATCH_REG]] = *(u64 *)([[SRC_REG]] + 16)
+; CHECK: *(u64 *)([[DST_REG]] + 16) = [[SCRATCH_REG]]
+; CHECK: [[SCRATCH_REG]] = *(u16 *)([[SRC_REG]] + 24)
+; CHECK: *(u16 *)([[DST_REG]] + 24) = [[SCRATCH_REG]]
+; CHECK: [[SCRATCH_REG]] = *(u8 *)([[SRC_REG]] + 26)
+; CHECK: *(u8 *)([[DST_REG]] + 26) = [[SCRATCH_REG]]
author	Yonghong Song <yhs@fb.com>	2018-07-25 22:40:02 +0000
committer	Yonghong Song <yhs@fb.com>	2018-07-25 22:40:02 +0000
commit	71d81e5c8fcebb7663973d686ac1ad3309502f4b (patch)
tree	b736fbd18247bcced3522642ad47914ce594235d /llvm/test/CodeGen/BPF
parent	99ca3c0a617337902c2b2006231acc64397d9df3 (diff)
download	bcm5719-llvm-71d81e5c8fcebb7663973d686ac1ad3309502f4b.tar.gz bcm5719-llvm-71d81e5c8fcebb7663973d686ac1ad3309502f4b.zip