diff options
Diffstat (limited to 'llvm/test/CodeGen/X86')
-rw-r--r-- | llvm/test/CodeGen/X86/fp128-compare.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/hoist-spill.ll | 121 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/new-remat.ll | 71 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll | 10 |
4 files changed, 200 insertions, 4 deletions
diff --git a/llvm/test/CodeGen/X86/fp128-compare.ll b/llvm/test/CodeGen/X86/fp128-compare.ll index b5d4fbe1b74..d9a48c5c13e 100644 --- a/llvm/test/CodeGen/X86/fp128-compare.ll +++ b/llvm/test/CodeGen/X86/fp128-compare.ll @@ -86,8 +86,8 @@ entry: %cond = select i1 %cmp, fp128 %x, fp128 %y ret fp128 %cond ; CHECK-LABEL: TestMax: -; CHECK: movaps %xmm1 ; CHECK: movaps %xmm0 +; CHECK: movaps %xmm1 ; CHECK: callq __gttf2 ; CHECK: movaps {{.*}}, %xmm0 ; CHECK: testl %eax, %eax diff --git a/llvm/test/CodeGen/X86/hoist-spill.ll b/llvm/test/CodeGen/X86/hoist-spill.ll new file mode 100644 index 00000000000..db9c4105a02 --- /dev/null +++ b/llvm/test/CodeGen/X86/hoist-spill.ll @@ -0,0 +1,121 @@ +; RUN: llc < %s | FileCheck %s + +; grep 'Spill' |sed 's%.*\(-[0-9]\+(\%rsp)\).*%\1%g' |sort |uniq -d |awk '{if (/rsp/); exit -1}' +; Check no spills to the same stack slot after hoisting. +; CHECK: mov{{.}} %{{.*}}, [[SPOFFSET1:-?[0-9]*]](%rsp) +; CHECK: mov{{.}} %{{.*}}, [[SPOFFSET2:-?[0-9]*]](%rsp) +; CHECK: mov{{.}} %{{.*}}, [[SPOFFSET3:-?[0-9]*]](%rsp) +; CHECK-NOT: mov{{.}} %{{.*}}, [[SPOFFSET1]](%rsp) +; CHECK-NOT: mov{{.}} %{{.*}}, [[SPOFFSET2]](%rsp) +; CHECK-NOT: mov{{.}} %{{.*}}, [[SPOFFSET3]](%rsp) + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@a = external global i32*, align 8 +@b = external global i32, align 4 +@d = external global i32*, align 8 + +; Function Attrs: norecurse noreturn nounwind uwtable +define void @fn1(i32 %p1) { +entry: + %tmp = load i32*, i32** @d, align 8 + %tmp1 = load i32*, i32** @a, align 8 + %tmp2 = sext i32 %p1 to i64 + br label %for.cond + +for.cond: ; preds = %for.inc14, %entry + %indvar = phi i32 [ %indvar.next, %for.inc14 ], [ 0, %entry ] + %indvars.iv30.in = phi i32 [ %indvars.iv30, %for.inc14 ], [ %p1, %entry ] + %c.0 = phi i32 [ %inc15, %for.inc14 ], [ 1, %entry ] + %k.0 = phi i32 [ %k.1.lcssa, %for.inc14 ], [ undef, %entry ] + %tmp3 = icmp sgt i32 undef, 0 + %smax52 = select i1 %tmp3, i32 undef, i32 0 + %tmp4 = zext i32 %smax52 to i64 + %tmp5 = icmp sgt i64 undef, %tmp4 + %smax53 = select i1 %tmp5, i64 undef, i64 %tmp4 + %tmp6 = add nsw i64 %smax53, 1 + %tmp7 = sub nsw i64 %tmp6, %tmp4 + %tmp8 = add nsw i64 %tmp7, -8 + %tmp9 = sub i32 undef, %indvar + %tmp10 = icmp sgt i64 %tmp2, 0 + %smax40 = select i1 %tmp10, i64 %tmp2, i64 0 + %scevgep41 = getelementptr i32, i32* %tmp1, i64 %smax40 + %indvars.iv30 = add i32 %indvars.iv30.in, -1 + %tmp11 = icmp sgt i32 %indvars.iv30, 0 + %smax = select i1 %tmp11, i32 %indvars.iv30, i32 0 + %tmp12 = zext i32 %smax to i64 + %sub = sub nsw i32 %p1, %c.0 + %cmp = icmp sgt i32 %sub, 0 + %sub. = select i1 %cmp, i32 %sub, i32 0 + %cmp326 = icmp sgt i32 %k.0, %p1 + br i1 %cmp326, label %for.cond4.preheader, label %for.body.preheader + +for.body.preheader: ; preds = %for.cond + br label %for.body + +for.cond4.preheader: ; preds = %for.body, %for.cond + %k.1.lcssa = phi i32 [ %k.0, %for.cond ], [ %add, %for.body ] + %cmp528 = icmp sgt i32 %sub., %p1 + br i1 %cmp528, label %for.inc14, label %for.body6.preheader + +for.body6.preheader: ; preds = %for.cond4.preheader + br i1 undef, label %for.body6, label %min.iters.checked + +min.iters.checked: ; preds = %for.body6.preheader + br i1 undef, label %for.body6, label %vector.memcheck + +vector.memcheck: ; preds = %min.iters.checked + %bound1 = icmp ule i32* undef, %scevgep41 + %memcheck.conflict = and i1 undef, %bound1 + br i1 %memcheck.conflict, label %for.body6, label %vector.body.preheader + +vector.body.preheader: ; preds = %vector.memcheck + %lcmp.mod = icmp eq i64 undef, 0 + br i1 %lcmp.mod, label %vector.body.preheader.split, label %vector.body.prol + +vector.body.prol: ; preds = %vector.body.prol, %vector.body.preheader + %prol.iter.cmp = icmp eq i64 undef, 0 + br i1 %prol.iter.cmp, label %vector.body.preheader.split, label %vector.body.prol + +vector.body.preheader.split: ; preds = %vector.body.prol, %vector.body.preheader + %tmp13 = icmp ult i64 %tmp8, 24 + br i1 %tmp13, label %middle.block, label %vector.body + +vector.body: ; preds = %vector.body, %vector.body.preheader.split + %index = phi i64 [ %index.next.3, %vector.body ], [ 0, %vector.body.preheader.split ] + %index.next = add i64 %index, 8 + %offset.idx.1 = add i64 %tmp12, %index.next + %tmp14 = getelementptr inbounds i32, i32* %tmp, i64 %offset.idx.1 + %tmp15 = bitcast i32* %tmp14 to <4 x i32>* + %wide.load.1 = load <4 x i32>, <4 x i32>* %tmp15, align 4 + %tmp16 = getelementptr inbounds i32, i32* %tmp1, i64 %offset.idx.1 + %tmp17 = bitcast i32* %tmp16 to <4 x i32>* + store <4 x i32> %wide.load.1, <4 x i32>* %tmp17, align 4 + %index.next.3 = add i64 %index, 32 + br i1 undef, label %middle.block, label %vector.body + +middle.block: ; preds = %vector.body, %vector.body.preheader.split + br i1 undef, label %for.inc14, label %for.body6 + +for.body: ; preds = %for.body, %for.body.preheader + %k.127 = phi i32 [ %k.0, %for.body.preheader ], [ %add, %for.body ] + %add = add nsw i32 %k.127, 1 + %tmp18 = load i32, i32* undef, align 4 + store i32 %tmp18, i32* @b, align 4 + br i1 undef, label %for.body, label %for.cond4.preheader + +for.body6: ; preds = %for.body6, %middle.block, %vector.memcheck, %min.iters.checked, %for.body6.preheader + %indvars.iv32 = phi i64 [ undef, %for.body6 ], [ %tmp12, %vector.memcheck ], [ %tmp12, %min.iters.checked ], [ %tmp12, %for.body6.preheader ], [ undef, %middle.block ] + %arrayidx8 = getelementptr inbounds i32, i32* %tmp, i64 %indvars.iv32 + %tmp19 = load i32, i32* %arrayidx8, align 4 + %arrayidx10 = getelementptr inbounds i32, i32* %tmp1, i64 %indvars.iv32 + store i32 %tmp19, i32* %arrayidx10, align 4 + %cmp5 = icmp slt i64 %indvars.iv32, undef + br i1 %cmp5, label %for.body6, label %for.inc14 + +for.inc14: ; preds = %for.body6, %middle.block, %for.cond4.preheader + %inc15 = add nuw nsw i32 %c.0, 1 + %indvar.next = add i32 %indvar, 1 + br label %for.cond +} diff --git a/llvm/test/CodeGen/X86/new-remat.ll b/llvm/test/CodeGen/X86/new-remat.ll new file mode 100644 index 00000000000..4d311aae622 --- /dev/null +++ b/llvm/test/CodeGen/X86/new-remat.ll @@ -0,0 +1,71 @@ +; RUN: llc < %s | FileCheck %s +; Check all spills are rematerialized. +; CHECK-NOT: Spill + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@b = common global double 0.000000e+00, align 8 +@a = common global i32 0, align 4 + +; Function Attrs: nounwind uwtable +define i32 @uniform_testdata(i32 %p1) { +entry: + %cmp3 = icmp sgt i32 %p1, 0 + br i1 %cmp3, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + %tmp = add i32 %p1, -1 + %xtraiter = and i32 %p1, 7 + %lcmp.mod = icmp eq i32 %xtraiter, 0 + br i1 %lcmp.mod, label %for.body.preheader.split, label %for.body.prol.preheader + +for.body.prol.preheader: ; preds = %for.body.preheader + br label %for.body.prol + +for.body.prol: ; preds = %for.body.prol, %for.body.prol.preheader + %i.04.prol = phi i32 [ %inc.prol, %for.body.prol ], [ 0, %for.body.prol.preheader ] + %prol.iter = phi i32 [ %prol.iter.sub, %for.body.prol ], [ %xtraiter, %for.body.prol.preheader ] + %tmp1 = load double, double* @b, align 8 + %call.prol = tail call double @pow(double %tmp1, double 2.500000e-01) + %inc.prol = add nuw nsw i32 %i.04.prol, 1 + %prol.iter.sub = add i32 %prol.iter, -1 + %prol.iter.cmp = icmp eq i32 %prol.iter.sub, 0 + br i1 %prol.iter.cmp, label %for.body.preheader.split.loopexit, label %for.body.prol + +for.body.preheader.split.loopexit: ; preds = %for.body.prol + %inc.prol.lcssa = phi i32 [ %inc.prol, %for.body.prol ] + br label %for.body.preheader.split + +for.body.preheader.split: ; preds = %for.body.preheader.split.loopexit, %for.body.preheader + %i.04.unr = phi i32 [ 0, %for.body.preheader ], [ %inc.prol.lcssa, %for.body.preheader.split.loopexit ] + %tmp2 = icmp ult i32 %tmp, 7 + br i1 %tmp2, label %for.end.loopexit, label %for.body.preheader.split.split + +for.body.preheader.split.split: ; preds = %for.body.preheader.split + br label %for.body + +for.body: ; preds = %for.body, %for.body.preheader.split.split + %i.04 = phi i32 [ %i.04.unr, %for.body.preheader.split.split ], [ %inc.7, %for.body ] + %tmp3 = load double, double* @b, align 8 + %call = tail call double @pow(double %tmp3, double 2.500000e-01) + %tmp4 = load double, double* @b, align 8 + %call.1 = tail call double @pow(double %tmp4, double 2.500000e-01) + %inc.7 = add nsw i32 %i.04, 8 + %exitcond.7 = icmp eq i32 %inc.7, %p1 + br i1 %exitcond.7, label %for.end.loopexit.unr-lcssa, label %for.body + +for.end.loopexit.unr-lcssa: ; preds = %for.body + br label %for.end.loopexit + +for.end.loopexit: ; preds = %for.end.loopexit.unr-lcssa, %for.body.preheader.split + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + %tmp5 = load i32, i32* @a, align 4 + ret i32 %tmp5 +} + +; Function Attrs: nounwind +declare double @pow(double, double) + diff --git a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll index 46b65bd24fc..1d6b4f94731 100644 --- a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll +++ b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -mtriple=x86_64-apple-macosx -regalloc=greedy | FileCheck %s ; This testing case is reduced from 254.gap SyFgets function. -; We make sure a spill is not hoisted to a hotter outer loop. +; We make sure a spill is hoisted to a cold BB inside the hotter outer loop. %struct.TMP.1 = type { %struct.TMP.2*, %struct.TMP.2*, [1024 x i8] } %struct.TMP.2 = type { i8*, i32, i32, i16, i16, %struct.TMP.3, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.TMP.3, %struct.TMP.4*, i32, [3 x i8], [1 x i8], %struct.TMP.3, i32, i64 } @@ -181,6 +181,10 @@ sw.bb474: br i1 %cmp476, label %if.end517, label %do.body479.preheader do.body479.preheader: + ; CHECK: do.body479.preheader + ; spill is hoisted here. Although loop depth1 is even hotter than loop depth2, do.body479.preheader is cold. + ; CHECK: movq %r{{.*}}, {{[0-9]+}}(%rsp) + ; CHECK: land.rhs485 %cmp4833314 = icmp eq i8 undef, 0 br i1 %cmp4833314, label %if.end517, label %land.rhs485 @@ -200,8 +204,8 @@ land.lhs.true490: lor.rhs500: ; CHECK: lor.rhs500 - ; Make sure that we don't hoist the spill to outer loops. - ; CHECK: movq %r{{.*}}, {{[0-9]+}}(%rsp) + ; Make sure spill is hoisted to a cold preheader in outside loop. + ; CHECK-NOT: movq %r{{.*}}, {{[0-9]+}}(%rsp) ; CHECK: callq {{.*}}maskrune %call3.i.i2792 = call i32 @__maskrune(i32 undef, i64 256) br i1 undef, label %land.lhs.true504, label %do.body479.backedge |