diff options
author | David Green <david.green@arm.com> | 2018-11-05 14:54:34 +0000 |
---|---|---|
committer | David Green <david.green@arm.com> | 2018-11-05 14:54:34 +0000 |
commit | ba9f245b0df90a7214293f937ae0c6748c6503f0 (patch) | |
tree | f74f6ef7566dcac60d22d32f3680f32531862d14 /llvm/test/Transforms/Inline | |
parent | 8d7c351799692a93621bb44ceec744c853c41942 (diff) | |
download | bcm5719-llvm-ba9f245b0df90a7214293f937ae0c6748c6503f0.tar.gz bcm5719-llvm-ba9f245b0df90a7214293f937ae0c6748c6503f0.zip |
[Inliner] Penalise inlining of calls with loops at Oz
We currently seem to underestimate the size of functions with loops in them,
both in terms of absolute code size and in the difficulties of dealing with
such code. (Calls, for example, can be tail merged to further reduce
codesize). At -Oz, we can then increase code size by inlining small loops
multiple times.
This attempts to penalise functions with loops at -Oz by adding a CallPenalty
for each top level loop in the function. It uses LI (and hence DT) to calculate
the number of loops. As we are dealing with minsize, the inline threshold is
small and functions at this point should be relatively small, making the
construction of these cheap.
Differential Revision: https://reviews.llvm.org/D52716
llvm-svn: 346134
Diffstat (limited to 'llvm/test/Transforms/Inline')
-rw-r--r-- | llvm/test/Transforms/Inline/ARM/loop-add.ll | 95 | ||||
-rw-r--r-- | llvm/test/Transforms/Inline/ARM/loop-memcpy.ll | 87 | ||||
-rw-r--r-- | llvm/test/Transforms/Inline/ARM/loop-noinline.ll | 49 |
3 files changed, 231 insertions, 0 deletions
diff --git a/llvm/test/Transforms/Inline/ARM/loop-add.ll b/llvm/test/Transforms/Inline/ARM/loop-add.ll new file mode 100644 index 00000000000..a4717bc95b7 --- /dev/null +++ b/llvm/test/Transforms/Inline/ARM/loop-add.ll @@ -0,0 +1,95 @@ +; RUN: opt -inline %s -S | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv7m-arm-none-eabi" + +; CHECK-LABEL: void @doCalls +define void @doCalls(i8* nocapture %p1, i8* nocapture %p2, i32 %n) #0 { +entry: + %div = lshr i32 %n, 1 +; CHECK: call void @LoopCall + tail call void @LoopCall(i8* %p1, i8* %p2, i32 %div) #0 + + %div2 = lshr i32 %n, 2 +; CHECK: call void @LoopCall + tail call void @LoopCall(i8* %p1, i8* %p2, i32 %div2) #0 + +; CHECK-NOT: call void @LoopCall + tail call void @LoopCall(i8* %p2, i8* %p1, i32 0) #0 + +; CHECK-NOT: call void @LoopCall_internal + tail call void @LoopCall_internal(i8* %p1, i8* %p2, i32 %div2) #0 + + %div3 = lshr i32 %n, 4 +; CHECK-NOT: call void @SimpleCall + tail call void @SimpleCall(i8* %p2, i8* %p1, i32 %div3) #0 + ret void +} + +; CHECK-LABEL: define void @LoopCall +define void @LoopCall(i8* nocapture %dest, i8* nocapture readonly %source, i32 %num) #0 { +entry: + %c = icmp ne i32 %num, 0 + br i1 %c, label %while.cond, label %while.end + +while.cond: ; preds = %while.body, %entry + %num.addr.0 = phi i32 [ %num, %entry ], [ %dec, %while.body ] + %p_dest.0 = phi i8* [ %dest, %entry ], [ %incdec.ptr2, %while.body ] + %p_source.0 = phi i8* [ %source, %entry ], [ %incdec.ptr, %while.body ] + %cmp = icmp eq i32 %num.addr.0, 0 + br i1 %cmp, label %while.end, label %while.body + +while.body: ; preds = %while.cond + %incdec.ptr = getelementptr inbounds i8, i8* %p_source.0, i32 1 + %0 = load i8, i8* %p_source.0, align 1 + %1 = trunc i32 %num.addr.0 to i8 + %conv1 = add i8 %0, %1 + %incdec.ptr2 = getelementptr inbounds i8, i8* %p_dest.0, i32 1 + store i8 %conv1, i8* %p_dest.0, align 1 + %dec = add i32 %num.addr.0, -1 + br label %while.cond + +while.end: ; preds = %while.cond + ret void +} + +; CHECK-LABEL-NOT: define void @LoopCall_internal +define internal void @LoopCall_internal(i8* nocapture %dest, i8* nocapture readonly %source, i32 %num) #0 { +entry: + %c = icmp ne i32 %num, 0 + br i1 %c, label %while.cond, label %while.end + +while.cond: ; preds = %while.body, %entry + %num.addr.0 = phi i32 [ %num, %entry ], [ %dec, %while.body ] + %p_dest.0 = phi i8* [ %dest, %entry ], [ %incdec.ptr2, %while.body ] + %p_source.0 = phi i8* [ %source, %entry ], [ %incdec.ptr, %while.body ] + %cmp = icmp eq i32 %num.addr.0, 0 + br i1 %cmp, label %while.end, label %while.body + +while.body: ; preds = %while.cond + %incdec.ptr = getelementptr inbounds i8, i8* %p_source.0, i32 1 + %0 = load i8, i8* %p_source.0, align 1 + %1 = trunc i32 %num.addr.0 to i8 + %conv1 = add i8 %0, %1 + %incdec.ptr2 = getelementptr inbounds i8, i8* %p_dest.0, i32 1 + store i8 %conv1, i8* %p_dest.0, align 1 + %dec = add i32 %num.addr.0, -1 + br label %while.cond + +while.end: ; preds = %while.cond + ret void +} + +; CHECK-LABEL: define void @SimpleCall +define void @SimpleCall(i8* nocapture %dest, i8* nocapture readonly %source, i32 %num) #0 { +entry: + %arrayidx = getelementptr inbounds i8, i8* %source, i32 %num + %0 = load i8, i8* %arrayidx, align 1 + %1 = xor i8 %0, 127 + %arrayidx2 = getelementptr inbounds i8, i8* %dest, i32 %num + store i8 %1, i8* %arrayidx2, align 1 + ret void +} + +attributes #0 = { minsize optsize } + diff --git a/llvm/test/Transforms/Inline/ARM/loop-memcpy.ll b/llvm/test/Transforms/Inline/ARM/loop-memcpy.ll new file mode 100644 index 00000000000..3b3625c6027 --- /dev/null +++ b/llvm/test/Transforms/Inline/ARM/loop-memcpy.ll @@ -0,0 +1,87 @@ +; RUN: opt -inline %s -S | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv7m-arm-none-eabi" + +; CHECK-LABEL: define void @matcpy +define void @matcpy(i8* %dest, i8* %source, i32 %num) #0 { +entry: + %0 = ptrtoint i8* %dest to i32 + %1 = ptrtoint i8* %source to i32 + %2 = xor i32 %0, %1 + %3 = and i32 %2, 3 + %cmp = icmp eq i32 %3, 0 + br i1 %cmp, label %if.then, label %if.else20 + +if.then: ; preds = %entry + %sub = sub i32 0, %0 + %and2 = and i32 %sub, 3 + %add = or i32 %and2, 4 + %cmp3 = icmp ugt i32 %add, %num + br i1 %cmp3, label %if.else, label %if.then4 + +if.then4: ; preds = %if.then + %sub5 = sub i32 %num, %and2 + %shr = and i32 %sub5, -4 + %sub7 = sub i32 %sub5, %shr + %tobool = icmp eq i32 %and2, 0 + br i1 %tobool, label %if.end, label %if.then8 + +if.then8: ; preds = %if.then4 +; CHECK: call fastcc void @memcpy + call fastcc void @memcpy(i8* %dest, i8* %source, i32 %and2) #0 + %add.ptr = getelementptr inbounds i8, i8* %dest, i32 %and2 + %add.ptr9 = getelementptr inbounds i8, i8* %source, i32 %and2 + br label %if.end + +if.end: ; preds = %if.then4, %if.then8 + %p_dest.0 = phi i8* [ %add.ptr, %if.then8 ], [ %dest, %if.then4 ] + %p_source.0 = phi i8* [ %add.ptr9, %if.then8 ], [ %source, %if.then4 ] + %tobool14 = icmp eq i32 %sub7, 0 + br i1 %tobool14, label %if.end22, label %if.then15 + +if.then15: ; preds = %if.end + %add.ptr13 = getelementptr inbounds i8, i8* %p_source.0, i32 %shr + %add.ptr11 = getelementptr inbounds i8, i8* %p_dest.0, i32 %shr +; CHECK: call fastcc void @memcpy + call fastcc void @memcpy(i8* %add.ptr11, i8* %add.ptr13, i32 %sub7) #0 + br label %if.end22 + +if.else: ; preds = %if.then + call fastcc void @memcpy(i8* %dest, i8* %source, i32 %num) #0 + br label %if.end22 + +if.else20: ; preds = %entry + call fastcc void @memcpy(i8* %dest, i8* %source, i32 %num) #0 + br label %if.end22 + +if.end22: ; preds = %if.then15, %if.end, %if.else, %if.else20 + ret void +} + +; CHECK-LABEL: define internal void @memcpy +define internal void @memcpy(i8* nocapture %dest, i8* nocapture readonly %source, i32 %num) #0 { +entry: + br label %while.cond + +while.cond: ; preds = %while.body, %entry + %num.addr.0 = phi i32 [ %num, %entry ], [ %dec, %while.body ] + %p_dest.0 = phi i8* [ %dest, %entry ], [ %incdec.ptr1, %while.body ] + %p_source.0 = phi i8* [ %source, %entry ], [ %incdec.ptr, %while.body ] + %cmp = icmp eq i32 %num.addr.0, 0 + br i1 %cmp, label %while.end, label %while.body + +while.body: ; preds = %while.cond + %incdec.ptr = getelementptr inbounds i8, i8* %p_source.0, i32 1 + %0 = load i8, i8* %p_source.0, align 1 + %incdec.ptr1 = getelementptr inbounds i8, i8* %p_dest.0, i32 1 + store i8 %0, i8* %p_dest.0, align 1 + %dec = add i32 %num.addr.0, -1 + br label %while.cond + +while.end: ; preds = %while.cond + ret void +} + +attributes #0 = { minsize optsize } + diff --git a/llvm/test/Transforms/Inline/ARM/loop-noinline.ll b/llvm/test/Transforms/Inline/ARM/loop-noinline.ll new file mode 100644 index 00000000000..8438d16b03e --- /dev/null +++ b/llvm/test/Transforms/Inline/ARM/loop-noinline.ll @@ -0,0 +1,49 @@ +; RUN: opt -inline %s -S | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv7m-arm-none-eabi" + +; Check we don't inline loops at -Oz. They tend to be larger than we +; expect. + +; CHECK: define i8* @H +@digits = constant [16 x i8] c"0123456789ABCDEF", align 1 +define i8* @H(i8* %p, i32 %val, i32 %num) #0 { +entry: + br label %do.body + +do.body: ; preds = %do.body, %entry + %p.addr.0 = phi i8* [ %p, %entry ], [ %incdec.ptr, %do.body ] + %val.addr.0 = phi i32 [ %val, %entry ], [ %shl, %do.body ] + %num.addr.0 = phi i32 [ %num, %entry ], [ %dec, %do.body ] + %shr = lshr i32 %val.addr.0, 28 + %arrayidx = getelementptr inbounds [16 x i8], [16 x i8]* @digits, i32 0, i32 %shr + %0 = load i8, i8* %arrayidx, align 1 + %incdec.ptr = getelementptr inbounds i8, i8* %p.addr.0, i32 1 + store i8 %0, i8* %p.addr.0, align 1 + %shl = shl i32 %val.addr.0, 4 + %dec = add i32 %num.addr.0, -1 + %tobool = icmp eq i32 %dec, 0 + br i1 %tobool, label %do.end, label %do.body + +do.end: ; preds = %do.body + %scevgep = getelementptr i8, i8* %p, i32 %num + ret i8* %scevgep +} + +define nonnull i8* @call1(i8* %p, i32 %val, i32 %num) #0 { +entry: +; CHECK: tail call i8* @H + %call = tail call i8* @H(i8* %p, i32 %val, i32 %num) #0 + ret i8* %call +} + +define nonnull i8* @call2(i8* %p, i32 %val) #0 { +entry: +; CHECK: tail call i8* @H + %call = tail call i8* @H(i8* %p, i32 %val, i32 32) #0 + ret i8* %call +} + +attributes #0 = { minsize optsize } + |