diff options
| author | Wei Mi <wmi@google.com> | 2016-04-06 15:41:07 +0000 |
|---|---|---|
| committer | Wei Mi <wmi@google.com> | 2016-04-06 15:41:07 +0000 |
| commit | 18293bef4e4efaafff57da67cef87ea47dc26cae (patch) | |
| tree | 979c8be87f939b1a7c5c65feeb71ed537bb3424c /llvm/test/CodeGen/X86 | |
| parent | 506f295a109918ae7449688e5d6eb0c024f895d0 (diff) | |
| download | bcm5719-llvm-18293bef4e4efaafff57da67cef87ea47dc26cae.tar.gz bcm5719-llvm-18293bef4e4efaafff57da67cef87ea47dc26cae.zip | |
Recommit r265309 after fixed an invalid memory reference bug happened
when DenseMap growed and moved memory. I verified it fixed the bootstrap
problem on x86_64-linux-gnu but I cannot verify whether it fixes
the bootstrap error on clang-ppc64be-linux. I will watch the build-bot
result closely.
Replace analyzeSiblingValues with new algorithm to fix its compile
time issue. The patch is to solve PR17409 and its duplicates.
analyzeSiblingValues is a N x N complexity algorithm where N is
the number of siblings generated by reg splitting. Although it
causes siginificant compile time issue when N is large, it is also
important for performance since it removes redundent spills and
enables rematerialization.
To solve the compile time issue, the patch removes analyzeSiblingValues
and replaces it with lower cost alternatives containing two parts. The
first part creates a new spill hoisting method in postOptimization of
register allocation. It does spill hoisting at once after all the spills
are generated instead of inside every instance of selectOrSplit. The
second part queries the define expr of the original register for
rematerializaiton and keep it always available during register allocation
even if it is already dead. It deletes those dead instructions only in
postOptimization. With the two parts in the patch, it can remove
analyzeSiblingValues without sacrificing performance.
Differential Revision: http://reviews.llvm.org/D15302
llvm-svn: 265547
Diffstat (limited to 'llvm/test/CodeGen/X86')
| -rw-r--r-- | llvm/test/CodeGen/X86/fp128-compare.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/hoist-spill.ll | 121 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/new-remat.ll | 70 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll | 10 |
4 files changed, 199 insertions, 4 deletions
diff --git a/llvm/test/CodeGen/X86/fp128-compare.ll b/llvm/test/CodeGen/X86/fp128-compare.ll index b5d4fbe1b74..d9a48c5c13e 100644 --- a/llvm/test/CodeGen/X86/fp128-compare.ll +++ b/llvm/test/CodeGen/X86/fp128-compare.ll @@ -86,8 +86,8 @@ entry: %cond = select i1 %cmp, fp128 %x, fp128 %y ret fp128 %cond ; CHECK-LABEL: TestMax: -; CHECK: movaps %xmm1 ; CHECK: movaps %xmm0 +; CHECK: movaps %xmm1 ; CHECK: callq __gttf2 ; CHECK: movaps {{.*}}, %xmm0 ; CHECK: testl %eax, %eax diff --git a/llvm/test/CodeGen/X86/hoist-spill.ll b/llvm/test/CodeGen/X86/hoist-spill.ll new file mode 100644 index 00000000000..db9c4105a02 --- /dev/null +++ b/llvm/test/CodeGen/X86/hoist-spill.ll @@ -0,0 +1,121 @@ +; RUN: llc < %s | FileCheck %s + +; grep 'Spill' |sed 's%.*\(-[0-9]\+(\%rsp)\).*%\1%g' |sort |uniq -d |awk '{if (/rsp/); exit -1}' +; Check no spills to the same stack slot after hoisting. +; CHECK: mov{{.}} %{{.*}}, [[SPOFFSET1:-?[0-9]*]](%rsp) +; CHECK: mov{{.}} %{{.*}}, [[SPOFFSET2:-?[0-9]*]](%rsp) +; CHECK: mov{{.}} %{{.*}}, [[SPOFFSET3:-?[0-9]*]](%rsp) +; CHECK-NOT: mov{{.}} %{{.*}}, [[SPOFFSET1]](%rsp) +; CHECK-NOT: mov{{.}} %{{.*}}, [[SPOFFSET2]](%rsp) +; CHECK-NOT: mov{{.}} %{{.*}}, [[SPOFFSET3]](%rsp) + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@a = external global i32*, align 8 +@b = external global i32, align 4 +@d = external global i32*, align 8 + +; Function Attrs: norecurse noreturn nounwind uwtable +define void @fn1(i32 %p1) { +entry: + %tmp = load i32*, i32** @d, align 8 + %tmp1 = load i32*, i32** @a, align 8 + %tmp2 = sext i32 %p1 to i64 + br label %for.cond + +for.cond: ; preds = %for.inc14, %entry + %indvar = phi i32 [ %indvar.next, %for.inc14 ], [ 0, %entry ] + %indvars.iv30.in = phi i32 [ %indvars.iv30, %for.inc14 ], [ %p1, %entry ] + %c.0 = phi i32 [ %inc15, %for.inc14 ], [ 1, %entry ] + %k.0 = phi i32 [ %k.1.lcssa, %for.inc14 ], [ undef, %entry ] + %tmp3 = icmp sgt i32 undef, 0 + %smax52 = select i1 %tmp3, i32 undef, i32 0 + %tmp4 = zext i32 %smax52 to i64 + %tmp5 = icmp sgt i64 undef, %tmp4 + %smax53 = select i1 %tmp5, i64 undef, i64 %tmp4 + %tmp6 = add nsw i64 %smax53, 1 + %tmp7 = sub nsw i64 %tmp6, %tmp4 + %tmp8 = add nsw i64 %tmp7, -8 + %tmp9 = sub i32 undef, %indvar + %tmp10 = icmp sgt i64 %tmp2, 0 + %smax40 = select i1 %tmp10, i64 %tmp2, i64 0 + %scevgep41 = getelementptr i32, i32* %tmp1, i64 %smax40 + %indvars.iv30 = add i32 %indvars.iv30.in, -1 + %tmp11 = icmp sgt i32 %indvars.iv30, 0 + %smax = select i1 %tmp11, i32 %indvars.iv30, i32 0 + %tmp12 = zext i32 %smax to i64 + %sub = sub nsw i32 %p1, %c.0 + %cmp = icmp sgt i32 %sub, 0 + %sub. = select i1 %cmp, i32 %sub, i32 0 + %cmp326 = icmp sgt i32 %k.0, %p1 + br i1 %cmp326, label %for.cond4.preheader, label %for.body.preheader + +for.body.preheader: ; preds = %for.cond + br label %for.body + +for.cond4.preheader: ; preds = %for.body, %for.cond + %k.1.lcssa = phi i32 [ %k.0, %for.cond ], [ %add, %for.body ] + %cmp528 = icmp sgt i32 %sub., %p1 + br i1 %cmp528, label %for.inc14, label %for.body6.preheader + +for.body6.preheader: ; preds = %for.cond4.preheader + br i1 undef, label %for.body6, label %min.iters.checked + +min.iters.checked: ; preds = %for.body6.preheader + br i1 undef, label %for.body6, label %vector.memcheck + +vector.memcheck: ; preds = %min.iters.checked + %bound1 = icmp ule i32* undef, %scevgep41 + %memcheck.conflict = and i1 undef, %bound1 + br i1 %memcheck.conflict, label %for.body6, label %vector.body.preheader + +vector.body.preheader: ; preds = %vector.memcheck + %lcmp.mod = icmp eq i64 undef, 0 + br i1 %lcmp.mod, label %vector.body.preheader.split, label %vector.body.prol + +vector.body.prol: ; preds = %vector.body.prol, %vector.body.preheader + %prol.iter.cmp = icmp eq i64 undef, 0 + br i1 %prol.iter.cmp, label %vector.body.preheader.split, label %vector.body.prol + +vector.body.preheader.split: ; preds = %vector.body.prol, %vector.body.preheader + %tmp13 = icmp ult i64 %tmp8, 24 + br i1 %tmp13, label %middle.block, label %vector.body + +vector.body: ; preds = %vector.body, %vector.body.preheader.split + %index = phi i64 [ %index.next.3, %vector.body ], [ 0, %vector.body.preheader.split ] + %index.next = add i64 %index, 8 + %offset.idx.1 = add i64 %tmp12, %index.next + %tmp14 = getelementptr inbounds i32, i32* %tmp, i64 %offset.idx.1 + %tmp15 = bitcast i32* %tmp14 to <4 x i32>* + %wide.load.1 = load <4 x i32>, <4 x i32>* %tmp15, align 4 + %tmp16 = getelementptr inbounds i32, i32* %tmp1, i64 %offset.idx.1 + %tmp17 = bitcast i32* %tmp16 to <4 x i32>* + store <4 x i32> %wide.load.1, <4 x i32>* %tmp17, align 4 + %index.next.3 = add i64 %index, 32 + br i1 undef, label %middle.block, label %vector.body + +middle.block: ; preds = %vector.body, %vector.body.preheader.split + br i1 undef, label %for.inc14, label %for.body6 + +for.body: ; preds = %for.body, %for.body.preheader + %k.127 = phi i32 [ %k.0, %for.body.preheader ], [ %add, %for.body ] + %add = add nsw i32 %k.127, 1 + %tmp18 = load i32, i32* undef, align 4 + store i32 %tmp18, i32* @b, align 4 + br i1 undef, label %for.body, label %for.cond4.preheader + +for.body6: ; preds = %for.body6, %middle.block, %vector.memcheck, %min.iters.checked, %for.body6.preheader + %indvars.iv32 = phi i64 [ undef, %for.body6 ], [ %tmp12, %vector.memcheck ], [ %tmp12, %min.iters.checked ], [ %tmp12, %for.body6.preheader ], [ undef, %middle.block ] + %arrayidx8 = getelementptr inbounds i32, i32* %tmp, i64 %indvars.iv32 + %tmp19 = load i32, i32* %arrayidx8, align 4 + %arrayidx10 = getelementptr inbounds i32, i32* %tmp1, i64 %indvars.iv32 + store i32 %tmp19, i32* %arrayidx10, align 4 + %cmp5 = icmp slt i64 %indvars.iv32, undef + br i1 %cmp5, label %for.body6, label %for.inc14 + +for.inc14: ; preds = %for.body6, %middle.block, %for.cond4.preheader + %inc15 = add nuw nsw i32 %c.0, 1 + %indvar.next = add i32 %indvar, 1 + br label %for.cond +} diff --git a/llvm/test/CodeGen/X86/new-remat.ll b/llvm/test/CodeGen/X86/new-remat.ll new file mode 100644 index 00000000000..d06ac72e3ec --- /dev/null +++ b/llvm/test/CodeGen/X86/new-remat.ll @@ -0,0 +1,70 @@ +; RUN: llc < %s | FileCheck %s +; Check all spills are rematerialized. +; CHECK-NOT: Spill + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@b = common global double 0.000000e+00, align 8 +@a = common global i32 0, align 4 + +; Function Attrs: nounwind uwtable +define i32 @uniform_testdata(i32 %p1) { +entry: + %cmp3 = icmp sgt i32 %p1, 0 + br i1 %cmp3, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + %tmp = add i32 %p1, -1 + %xtraiter = and i32 %p1, 7 + %lcmp.mod = icmp eq i32 %xtraiter, 0 + br i1 %lcmp.mod, label %for.body.preheader.split, label %for.body.prol.preheader + +for.body.prol.preheader: ; preds = %for.body.preheader + br label %for.body.prol + +for.body.prol: ; preds = %for.body.prol, %for.body.prol.preheader + %i.04.prol = phi i32 [ %inc.prol, %for.body.prol ], [ 0, %for.body.prol.preheader ] + %prol.iter = phi i32 [ %prol.iter.sub, %for.body.prol ], [ %xtraiter, %for.body.prol.preheader ] + %tmp1 = load double, double* @b, align 8 + %call.prol = tail call double @pow(double %tmp1, double 2.500000e-01) + %inc.prol = add nuw nsw i32 %i.04.prol, 1 + %prol.iter.sub = add i32 %prol.iter, -1 + %prol.iter.cmp = icmp eq i32 %prol.iter.sub, 0 + br i1 %prol.iter.cmp, label %for.body.preheader.split.loopexit, label %for.body.prol + +for.body.preheader.split.loopexit: ; preds = %for.body.prol + %inc.prol.lcssa = phi i32 [ %inc.prol, %for.body.prol ] + br label %for.body.preheader.split + +for.body.preheader.split: ; preds = %for.body.preheader.split.loopexit, %for.body.preheader + %i.04.unr = phi i32 [ 0, %for.body.preheader ], [ %inc.prol.lcssa, %for.body.preheader.split.loopexit ] + %tmp2 = icmp ult i32 %tmp, 7 + br i1 %tmp2, label %for.end.loopexit, label %for.body.preheader.split.split + +for.body.preheader.split.split: ; preds = %for.body.preheader.split + br label %for.body + +for.body: ; preds = %for.body, %for.body.preheader.split.split + %i.04 = phi i32 [ %i.04.unr, %for.body.preheader.split.split ], [ %inc.7, %for.body ] + %tmp3 = load double, double* @b, align 8 + %call = tail call double @pow(double %tmp3, double 2.500000e-01) + %tmp4 = load double, double* @b, align 8 + %call.1 = tail call double @pow(double %tmp4, double 2.500000e-01) + %inc.7 = add nsw i32 %i.04, 8 + %exitcond.7 = icmp eq i32 %inc.7, %p1 + br i1 %exitcond.7, label %for.end.loopexit.unr-lcssa, label %for.body + +for.end.loopexit.unr-lcssa: ; preds = %for.body + br label %for.end.loopexit + +for.end.loopexit: ; preds = %for.end.loopexit.unr-lcssa, %for.body.preheader.split + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + %tmp5 = load i32, i32* @a, align 4 + ret i32 %tmp5 +} + +; Function Attrs: nounwind +declare double @pow(double, double) diff --git a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll index 46b65bd24fc..1d6b4f94731 100644 --- a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll +++ b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -mtriple=x86_64-apple-macosx -regalloc=greedy | FileCheck %s ; This testing case is reduced from 254.gap SyFgets function. -; We make sure a spill is not hoisted to a hotter outer loop. +; We make sure a spill is hoisted to a cold BB inside the hotter outer loop. %struct.TMP.1 = type { %struct.TMP.2*, %struct.TMP.2*, [1024 x i8] } %struct.TMP.2 = type { i8*, i32, i32, i16, i16, %struct.TMP.3, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.TMP.3, %struct.TMP.4*, i32, [3 x i8], [1 x i8], %struct.TMP.3, i32, i64 } @@ -181,6 +181,10 @@ sw.bb474: br i1 %cmp476, label %if.end517, label %do.body479.preheader do.body479.preheader: + ; CHECK: do.body479.preheader + ; spill is hoisted here. Although loop depth1 is even hotter than loop depth2, do.body479.preheader is cold. + ; CHECK: movq %r{{.*}}, {{[0-9]+}}(%rsp) + ; CHECK: land.rhs485 %cmp4833314 = icmp eq i8 undef, 0 br i1 %cmp4833314, label %if.end517, label %land.rhs485 @@ -200,8 +204,8 @@ land.lhs.true490: lor.rhs500: ; CHECK: lor.rhs500 - ; Make sure that we don't hoist the spill to outer loops. - ; CHECK: movq %r{{.*}}, {{[0-9]+}}(%rsp) + ; Make sure spill is hoisted to a cold preheader in outside loop. + ; CHECK-NOT: movq %r{{.*}}, {{[0-9]+}}(%rsp) ; CHECK: callq {{.*}}maskrune %call3.i.i2792 = call i32 @__maskrune(i32 undef, i64 256) br i1 undef, label %land.lhs.true504, label %do.body479.backedge |

