summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/BPF
diff options
context:
space:
mode:
authorYonghong Song <yhs@fb.com>2018-07-25 22:40:02 +0000
committerYonghong Song <yhs@fb.com>2018-07-25 22:40:02 +0000
commit71d81e5c8fcebb7663973d686ac1ad3309502f4b (patch)
treeb736fbd18247bcced3522642ad47914ce594235d /llvm/test/CodeGen/BPF
parent99ca3c0a617337902c2b2006231acc64397d9df3 (diff)
downloadbcm5719-llvm-71d81e5c8fcebb7663973d686ac1ad3309502f4b.tar.gz
bcm5719-llvm-71d81e5c8fcebb7663973d686ac1ad3309502f4b.zip
bpf: new option -bpf-expand-memcpy-in-order to expand memcpy in order
Some BPF JIT backends would want to optimize memcpy in their own architecture specific way. However, at the moment, there is no way for JIT backends to see memcpy semantics in a reliable way. This is due to LLVM BPF backend is expanding memcpy into load/store sequences and could possibly schedule them apart from each other further. So, BPF JIT backends inside kernel can't reliably recognize memcpy semantics by peephole BPF sequence. This patch introduce new intrinsic expand infrastructure to memcpy. To get stable in-order load/store sequence from memcpy, we first lower memcpy into BPF::MEMCPY node which then expanded into in-order load/store sequences in expandPostRAPseudo pass which will happen after instruction scheduling. By this way, kernel JIT backends could reliably recognize memcpy through scanning BPF sequence. This new memcpy expand infrastructure is gated by a new option: -bpf-expand-memcpy-in-order Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com> Signed-off-by: Jiong Wang <jiong.wang@netronome.com> Signed-off-by: Yonghong Song <yhs@fb.com> llvm-svn: 337977
Diffstat (limited to 'llvm/test/CodeGen/BPF')
-rw-r--r--llvm/test/CodeGen/BPF/memcpy-expand-in-order.ll116
1 files changed, 116 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/BPF/memcpy-expand-in-order.ll b/llvm/test/CodeGen/BPF/memcpy-expand-in-order.ll
new file mode 100644
index 00000000000..6ee31264c76
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/memcpy-expand-in-order.ll
@@ -0,0 +1,116 @@
+; RUN: llc < %s -march=bpfel -bpf-expand-memcpy-in-order | FileCheck %s
+; RUN: llc < %s -march=bpfeb -bpf-expand-memcpy-in-order | FileCheck %s
+;
+; #define COPY_LEN 9
+;
+; void cal_align1(void *a, void *b)
+; {
+; __builtin_memcpy(a, b, COPY_LEN);
+; }
+;
+; void cal_align2(short *a, short *b)
+; {
+; __builtin_memcpy(a, b, COPY_LEN);
+; }
+;
+; #undef COPY_LEN
+; #define COPY_LEN 19
+; void cal_align4(int *a, int *b)
+; {
+; __builtin_memcpy(a, b, COPY_LEN);
+; }
+;
+; #undef COPY_LEN
+; #define COPY_LEN 27
+; void cal_align8(long long *a, long long *b)
+; {
+; __builtin_memcpy(a, b, COPY_LEN);
+; }
+
+; Function Attrs: nounwind
+define dso_local void @cal_align1(i8* nocapture %a, i8* nocapture readonly %b) local_unnamed_addr #0 {
+entry:
+ tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %a, i8* align 1 %b, i64 9, i1 false)
+ ret void
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #1
+
+; CHECK: [[SCRATCH_REG:r[0-9]]] = *(u8 *)([[SRC_REG:r[0-9]]] + 0)
+; CHECK: *(u8 *)([[DST_REG:r[0-9]]] + 0) = [[SCRATCH_REG]]
+; CHECK: [[SCRATCH_REG]] = *(u8 *)([[SRC_REG]] + 1)
+; CHECK: *(u8 *)([[DST_REG]] + 1) = [[SCRATCH_REG]]
+; CHECK: [[SCRATCH_REG]] = *(u8 *)([[SRC_REG]] + 2)
+; CHECK: *(u8 *)([[DST_REG]] + 2) = [[SCRATCH_REG]]
+; CHECK: [[SCRATCH_REG]] = *(u8 *)([[SRC_REG]] + 3)
+; CHECK: *(u8 *)([[DST_REG]] + 3) = [[SCRATCH_REG]]
+; CHECK: [[SCRATCH_REG]] = *(u8 *)([[SRC_REG]] + 4)
+; CHECK: *(u8 *)([[DST_REG]] + 4) = [[SCRATCH_REG]]
+; CHECK: [[SCRATCH_REG]] = *(u8 *)([[SRC_REG]] + 5)
+; CHECK: *(u8 *)([[DST_REG]] + 5) = [[SCRATCH_REG]]
+; CHECK: [[SCRATCH_REG]] = *(u8 *)([[SRC_REG]] + 6)
+; CHECK: *(u8 *)([[DST_REG]] + 6) = [[SCRATCH_REG]]
+; CHECK: [[SCRATCH_REG]] = *(u8 *)([[SRC_REG]] + 7)
+; CHECK: *(u8 *)([[DST_REG]] + 7) = [[SCRATCH_REG]]
+; CHECK: [[SCRATCH_REG]] = *(u8 *)([[SRC_REG]] + 8)
+; CHECK: *(u8 *)([[DST_REG]] + 8) = [[SCRATCH_REG]]
+
+; Function Attrs: nounwind
+define dso_local void @cal_align2(i16* nocapture %a, i16* nocapture readonly %b) local_unnamed_addr #0 {
+entry:
+ %0 = bitcast i16* %a to i8*
+ %1 = bitcast i16* %b to i8*
+ tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 %0, i8* align 2 %1, i64 9, i1 false)
+ ret void
+}
+; CHECK: [[SCRATCH_REG:r[0-9]]] = *(u16 *)([[SRC_REG:r[0-9]]] + 0)
+; CHECK: *(u16 *)([[DST_REG:r[0-9]]] + 0) = [[SCRATCH_REG]]
+; CHECK: [[SCRATCH_REG]] = *(u16 *)([[SRC_REG]] + 2)
+; CHECK: *(u16 *)([[DST_REG]] + 2) = [[SCRATCH_REG]]
+; CHECK: [[SCRATCH_REG]] = *(u16 *)([[SRC_REG]] + 4)
+; CHECK: *(u16 *)([[DST_REG]] + 4) = [[SCRATCH_REG]]
+; CHECK: [[SCRATCH_REG]] = *(u16 *)([[SRC_REG]] + 6)
+; CHECK: *(u16 *)([[DST_REG]] + 6) = [[SCRATCH_REG]]
+; CHECK: [[SCRATCH_REG]] = *(u8 *)([[SRC_REG]] + 8)
+; CHECK: *(u8 *)([[DST_REG]] + 8) = [[SCRATCH_REG]]
+
+; Function Attrs: nounwind
+define dso_local void @cal_align4(i32* nocapture %a, i32* nocapture readonly %b) local_unnamed_addr #0 {
+entry:
+ %0 = bitcast i32* %a to i8*
+ %1 = bitcast i32* %b to i8*
+ tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 19, i1 false)
+ ret void
+}
+; CHECK: [[SCRATCH_REG:r[0-9]]] = *(u32 *)([[SRC_REG:r[0-9]]] + 0)
+; CHECK: *(u32 *)([[DST_REG:r[0-9]]] + 0) = [[SCRATCH_REG]]
+; CHECK: [[SCRATCH_REG]] = *(u32 *)([[SRC_REG]] + 4)
+; CHECK: *(u32 *)([[DST_REG]] + 4) = [[SCRATCH_REG]]
+; CHECK: [[SCRATCH_REG]] = *(u32 *)([[SRC_REG]] + 8)
+; CHECK: *(u32 *)([[DST_REG]] + 8) = [[SCRATCH_REG]]
+; CHECK: [[SCRATCH_REG]] = *(u32 *)([[SRC_REG]] + 12)
+; CHECK: *(u32 *)([[DST_REG]] + 12) = [[SCRATCH_REG]]
+; CHECK: [[SCRATCH_REG]] = *(u16 *)([[SRC_REG]] + 16)
+; CHECK: *(u16 *)([[DST_REG]] + 16) = [[SCRATCH_REG]]
+; CHECK: [[SCRATCH_REG]] = *(u8 *)([[SRC_REG]] + 18)
+; CHECK: *(u8 *)([[DST_REG]] + 18) = [[SCRATCH_REG]]
+
+; Function Attrs: nounwind
+define dso_local void @cal_align8(i64* nocapture %a, i64* nocapture readonly %b) local_unnamed_addr #0 {
+entry:
+ %0 = bitcast i64* %a to i8*
+ %1 = bitcast i64* %b to i8*
+ tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %0, i8* align 8 %1, i64 27, i1 false)
+ ret void
+}
+; CHECK: [[SCRATCH_REG:r[0-9]]] = *(u64 *)([[SRC_REG:r[0-9]]] + 0)
+; CHECK: *(u64 *)([[DST_REG:r[0-9]]] + 0) = [[SCRATCH_REG]]
+; CHECK: [[SCRATCH_REG]] = *(u64 *)([[SRC_REG]] + 8)
+; CHECK: *(u64 *)([[DST_REG]] + 8) = [[SCRATCH_REG]]
+; CHECK: [[SCRATCH_REG]] = *(u64 *)([[SRC_REG]] + 16)
+; CHECK: *(u64 *)([[DST_REG]] + 16) = [[SCRATCH_REG]]
+; CHECK: [[SCRATCH_REG]] = *(u16 *)([[SRC_REG]] + 24)
+; CHECK: *(u16 *)([[DST_REG]] + 24) = [[SCRATCH_REG]]
+; CHECK: [[SCRATCH_REG]] = *(u8 *)([[SRC_REG]] + 26)
+; CHECK: *(u8 *)([[DST_REG]] + 26) = [[SCRATCH_REG]]
OpenPOWER on IntegriCloud