diff options
Diffstat (limited to 'llvm/test/Transforms/LoopStrengthReduce/X86')
25 files changed, 0 insertions, 2622 deletions
diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/2008-08-14-ShadowIV.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/2008-08-14-ShadowIV.ll deleted file mode 100644 index da14e631f51..00000000000 --- a/llvm/test/Transforms/LoopStrengthReduce/X86/2008-08-14-ShadowIV.ll +++ /dev/null @@ -1,215 +0,0 @@ -; RUN: opt < %s -loop-reduce -S -mtriple=x86_64-unknown-unknown 2>&1 | FileCheck %s - -; Provide legal integer types. -target datalayout = "n8:16:32:64" - - -define void @foobar(i32 %n) nounwind { - -; CHECK-LABEL: foobar( -; CHECK: phi double - -entry: - %cond = icmp eq i32 %n, 0 ; <i1>:0 [#uses=2] - br i1 %cond, label %return, label %bb.nph - -bb.nph: ; preds = %entry - %umax = select i1 %cond, i32 1, i32 %n ; <i32> [#uses=1] - br label %bb - -bb: ; preds = %bb, %bb.nph - %i.03 = phi i32 [ 0, %bb.nph ], [ %indvar.next, %bb ] ; <i32> [#uses=3] - tail call void @bar( i32 %i.03 ) nounwind - %tmp1 = uitofp i32 %i.03 to double ; <double>:1 [#uses=1] - tail call void @foo( double %tmp1 ) nounwind - %indvar.next = add nsw nuw i32 %i.03, 1 ; <i32> [#uses=2] - %exitcond = icmp eq i32 %indvar.next, %umax ; <i1> [#uses=1] - br i1 %exitcond, label %return, label %bb - -return: ; preds = %bb, %entry - ret void -} - -; Unable to eliminate cast because the mantissa bits for double are not enough -; to hold all of i64 IV bits. -define void @foobar2(i64 %n) nounwind { - -; CHECK-LABEL: foobar2( -; CHECK-NOT: phi double -; CHECK-NOT: phi float - -entry: - %cond = icmp eq i64 %n, 0 ; <i1>:0 [#uses=2] - br i1 %cond, label %return, label %bb.nph - -bb.nph: ; preds = %entry - %umax = select i1 %cond, i64 1, i64 %n ; <i64> [#uses=1] - br label %bb - -bb: ; preds = %bb, %bb.nph - %i.03 = phi i64 [ 0, %bb.nph ], [ %indvar.next, %bb ] ; <i64> [#uses=3] - %tmp1 = trunc i64 %i.03 to i32 ; <i32>:1 [#uses=1] - tail call void @bar( i32 %tmp1 ) nounwind - %tmp2 = uitofp i64 %i.03 to double ; <double>:2 [#uses=1] - tail call void @foo( double %tmp2 ) nounwind - %indvar.next = add nsw nuw i64 %i.03, 1 ; <i64> [#uses=2] - %exitcond = icmp eq i64 %indvar.next, %umax ; <i1> [#uses=1] - br i1 %exitcond, label %return, label %bb - -return: ; preds = %bb, %entry - ret void -} - -; Unable to eliminate cast due to potentional overflow. -define void @foobar3() nounwind { - -; CHECK-LABEL: foobar3( -; CHECK-NOT: phi double -; CHECK-NOT: phi float - -entry: - %tmp0 = tail call i32 (...) @nn( ) nounwind ; <i32>:0 [#uses=1] - %cond = icmp eq i32 %tmp0, 0 ; <i1>:1 [#uses=1] - br i1 %cond, label %return, label %bb - -bb: ; preds = %bb, %entry - %i.03 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=3] - tail call void @bar( i32 %i.03 ) nounwind - %tmp2 = uitofp i32 %i.03 to double ; <double>:2 [#uses=1] - tail call void @foo( double %tmp2 ) nounwind - %indvar.next = add nuw nsw i32 %i.03, 1 ; <i32>:3 [#uses=2] - %tmp4 = tail call i32 (...) @nn( ) nounwind ; <i32>:4 [#uses=1] - %exitcond = icmp ugt i32 %tmp4, %indvar.next ; <i1>:5 [#uses=1] - br i1 %exitcond, label %bb, label %return - -return: ; preds = %bb, %entry - ret void -} - -; Unable to eliminate cast due to overflow. -define void @foobar4() nounwind { - -; CHECK-LABEL: foobar4( -; CHECK-NOT: phi double -; CHECK-NOT: phi float - -entry: - br label %bb.nph - -bb.nph: ; preds = %entry - br label %bb - -bb: ; preds = %bb, %bb.nph - %i.03 = phi i8 [ 0, %bb.nph ], [ %indvar.next, %bb ] ; <i32> [#uses=3] - %tmp2 = sext i8 %i.03 to i32 ; <i32>:0 [#uses=1] - tail call void @bar( i32 %tmp2 ) nounwind - %tmp3 = uitofp i8 %i.03 to double ; <double>:1 [#uses=1] - tail call void @foo( double %tmp3 ) nounwind - %indvar.next = add nsw nuw i8 %i.03, 1 ; <i32> [#uses=2] - %tmp = sext i8 %indvar.next to i32 - %exitcond = icmp eq i32 %tmp, 32767 ; <i1> [#uses=1] - br i1 %exitcond, label %return, label %bb - -return: ; preds = %bb, %entry - ret void -} - -; Unable to eliminate cast because the integer IV overflows (accum exceeds -; SINT_MAX). - -define i32 @foobar5() { -; CHECK-LABEL: foobar5( -; CHECK-NOT: phi double -; CHECK-NOT: phi float -entry: - br label %loop - -loop: - %accum = phi i32 [ -3220, %entry ], [ %accum.next, %loop ] - %iv = phi i32 [ 12, %entry ], [ %iv.next, %loop ] - %tmp1 = sitofp i32 %accum to double - tail call void @foo( double %tmp1 ) nounwind - %accum.next = add i32 %accum, 9597741 - %iv.next = add nuw nsw i32 %iv, 1 - %exitcond = icmp ugt i32 %iv, 235 - br i1 %exitcond, label %exit, label %loop - -exit: ; preds = %loop - ret i32 %accum.next -} - -; Can eliminate if we set nsw and, thus, think that we don't overflow SINT_MAX. - -define i32 @foobar6() { -; CHECK-LABEL: foobar6( -; CHECK: phi double - -entry: - br label %loop - -loop: - %accum = phi i32 [ -3220, %entry ], [ %accum.next, %loop ] - %iv = phi i32 [ 12, %entry ], [ %iv.next, %loop ] - %tmp1 = sitofp i32 %accum to double - tail call void @foo( double %tmp1 ) nounwind - %accum.next = add nsw i32 %accum, 9597741 - %iv.next = add nuw nsw i32 %iv, 1 - %exitcond = icmp ugt i32 %iv, 235 - br i1 %exitcond, label %exit, label %loop - -exit: ; preds = %loop - ret i32 %accum.next -} - -; Unable to eliminate cast because the integer IV overflows (accum exceeds -; UINT_MAX). - -define i32 @foobar7() { -; CHECK-LABEL: foobar7( -; CHECK-NOT: phi double -; CHECK-NOT: phi float -entry: - br label %loop - -loop: - %accum = phi i32 [ -3220, %entry ], [ %accum.next, %loop ] - %iv = phi i32 [ 12, %entry ], [ %iv.next, %loop ] - %tmp1 = uitofp i32 %accum to double - tail call void @foo( double %tmp1 ) nounwind - %accum.next = add i32 %accum, 9597741 - %iv.next = add nuw nsw i32 %iv, 1 - %exitcond = icmp ugt i32 %iv, 235 - br i1 %exitcond, label %exit, label %loop - -exit: ; preds = %loop - ret i32 %accum.next -} - -; Can eliminate if we set nuw and, thus, think that we don't overflow UINT_MAX. - -define i32 @foobar8() { -; CHECK-LABEL: foobar8( -; CHECK: phi double - -entry: - br label %loop - -loop: - %accum = phi i32 [ -3220, %entry ], [ %accum.next, %loop ] - %iv = phi i32 [ 12, %entry ], [ %iv.next, %loop ] - %tmp1 = uitofp i32 %accum to double - tail call void @foo( double %tmp1 ) nounwind - %accum.next = add nuw i32 %accum, 9597741 - %iv.next = add nuw nsw i32 %iv, 1 - %exitcond = icmp ugt i32 %iv, 235 - br i1 %exitcond, label %exit, label %loop - -exit: ; preds = %loop - ret i32 %accum.next -} - -declare void @bar(i32) - -declare void @foo(double) - -declare i32 @nn(...) diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/2009-11-10-LSRCrash.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/2009-11-10-LSRCrash.ll deleted file mode 100644 index 4032a599e8d..00000000000 --- a/llvm/test/Transforms/LoopStrengthReduce/X86/2009-11-10-LSRCrash.ll +++ /dev/null @@ -1,130 +0,0 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin11 - -define void @_ZN4llvm20SelectionDAGLowering14visitInlineAsmENS_8CallSiteE() nounwind ssp align 2 { -entry: - br i1 undef, label %bb3.i, label %bb4.i - -bb3.i: ; preds = %entry - unreachable - -bb4.i: ; preds = %entry - br i1 undef, label %bb.i.i, label %_ZNK4llvm8CallSite14getCalledValueEv.exit - -bb.i.i: ; preds = %bb4.i - unreachable - -_ZNK4llvm8CallSite14getCalledValueEv.exit: ; preds = %bb4.i - br i1 undef, label %_ZN4llvm4castINS_9InlineAsmEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_.exit, label %bb6.i - -bb6.i: ; preds = %_ZNK4llvm8CallSite14getCalledValueEv.exit - unreachable - -_ZN4llvm4castINS_9InlineAsmEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_.exit: ; preds = %_ZNK4llvm8CallSite14getCalledValueEv.exit - br i1 undef, label %_ZL25hasInlineAsmMemConstraintRSt6vectorIN4llvm9InlineAsm14ConstraintInfoESaIS2_EERKNS0_14TargetLoweringE.exit, label %bb.i - -bb.i: ; preds = %_ZN4llvm4castINS_9InlineAsmEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_.exit - br label %_ZL25hasInlineAsmMemConstraintRSt6vectorIN4llvm9InlineAsm14ConstraintInfoESaIS2_EERKNS0_14TargetLoweringE.exit - -_ZL25hasInlineAsmMemConstraintRSt6vectorIN4llvm9InlineAsm14ConstraintInfoESaIS2_EERKNS0_14TargetLoweringE.exit: ; preds = %bb.i, %_ZN4llvm4castINS_9InlineAsmEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_.exit - br i1 undef, label %bb50, label %bb27 - -bb27: ; preds = %_ZL25hasInlineAsmMemConstraintRSt6vectorIN4llvm9InlineAsm14ConstraintInfoESaIS2_EERKNS0_14TargetLoweringE.exit - br i1 undef, label %bb1.i727, label %bb.i.i726 - -bb.i.i726: ; preds = %bb27 - unreachable - -bb1.i727: ; preds = %bb27 - unreachable - -bb50: ; preds = %_ZL25hasInlineAsmMemConstraintRSt6vectorIN4llvm9InlineAsm14ConstraintInfoESaIS2_EERKNS0_14TargetLoweringE.exit - br label %bb107 - -bb51: ; preds = %bb107 - br i1 undef, label %bb105, label %bb106 - -bb105: ; preds = %bb51 - unreachable - -bb106: ; preds = %bb51 - br label %bb107 - -bb107: ; preds = %bb106, %bb50 - br i1 undef, label %bb108, label %bb51 - -bb108: ; preds = %bb107 - br i1 undef, label %bb242, label %bb114 - -bb114: ; preds = %bb108 - br i1 undef, label %bb141, label %bb116 - -bb116: ; preds = %bb114 - br i1 undef, label %bb120, label %bb121 - -bb120: ; preds = %bb116 - unreachable - -bb121: ; preds = %bb116 - unreachable - -bb141: ; preds = %bb114 - br i1 undef, label %bb182, label %bb143 - -bb143: ; preds = %bb141 - br label %bb157 - -bb144: ; preds = %bb.i.i.i843 - switch i32 undef, label %bb155 [ - i32 2, label %bb153 - i32 6, label %bb153 - i32 4, label %bb153 - ] - -bb153: ; preds = %bb144, %bb144, %bb144 - %indvar.next = add i32 %indvar, 1 ; <i32> [#uses=1] - br label %bb157 - -bb155: ; preds = %bb144 - unreachable - -bb157: ; preds = %bb153, %bb143 - %indvar = phi i32 [ %indvar.next, %bb153 ], [ 0, %bb143 ] ; <i32> [#uses=2] - %0 = icmp eq i32 undef, %indvar ; <i1> [#uses=1] - switch i16 undef, label %bb6.i841 [ - i16 9, label %_ZN4llvm4castINS_14ConstantSDNodeENS_7SDValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS5_.exit - i16 26, label %_ZN4llvm4castINS_14ConstantSDNodeENS_7SDValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS5_.exit - ] - -bb6.i841: ; preds = %bb157 - unreachable - -_ZN4llvm4castINS_14ConstantSDNodeENS_7SDValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS5_.exit: ; preds = %bb157, %bb157 - br i1 undef, label %bb.i.i.i843, label %bb1.i.i.i844 - -bb.i.i.i843: ; preds = %_ZN4llvm4castINS_14ConstantSDNodeENS_7SDValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS5_.exit - br i1 %0, label %bb158, label %bb144 - -bb1.i.i.i844: ; preds = %_ZN4llvm4castINS_14ConstantSDNodeENS_7SDValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS5_.exit - unreachable - -bb158: ; preds = %bb.i.i.i843 - br i1 undef, label %bb177, label %bb176 - -bb176: ; preds = %bb158 - unreachable - -bb177: ; preds = %bb158 - br i1 undef, label %bb179, label %bb178 - -bb178: ; preds = %bb177 - unreachable - -bb179: ; preds = %bb177 - unreachable - -bb182: ; preds = %bb141 - unreachable - -bb242: ; preds = %bb108 - unreachable -} diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/2011-07-20-DoubleIV.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/2011-07-20-DoubleIV.ll deleted file mode 100644 index 0fc928ca9b2..00000000000 --- a/llvm/test/Transforms/LoopStrengthReduce/X86/2011-07-20-DoubleIV.ll +++ /dev/null @@ -1,47 +0,0 @@ -; RUN: opt < %s -loop-reduce -S -mtriple=x86_64-unknown-unknown | FileCheck %s -; -; Test LSR's OptimizeShadowIV. Handle a floating-point IV with a -; nonzero initial value. -; rdar://9786536 - -; Provide legal integer types. -target datalayout = "n8:16:32:64" - - -; First, make sure LSR doesn't crash on an empty IVUsers list. -; CHECK-LABEL: @dummyIV( -; CHECK-NOT: phi -; CHECK-NOT: sitofp -; CHECK: br -define void @dummyIV() nounwind { -entry: - br label %loop - -loop: - %i.01 = phi i32 [ -39, %entry ], [ %inc, %loop ] - %conv = sitofp i32 %i.01 to double - %inc = add nsw i32 %i.01, 1 - br i1 undef, label %loop, label %for.end - -for.end: - unreachable -} - -; Now check that the computed double constant is correct. -; CHECK-LABEL: @doubleIV( -; CHECK: phi double [ -3.900000e+01, %entry ] -; CHECK: br -define void @doubleIV() nounwind { -entry: - br label %loop - -loop: - %i.01 = phi i32 [ -39, %entry ], [ %inc, %loop ] - %conv = sitofp i32 %i.01 to double - %div = fdiv double %conv, 4.000000e+01 - %inc = add nsw i32 %i.01, 1 - br i1 undef, label %loop, label %for.end - -for.end: - unreachable -} diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/2011-11-29-postincphi.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/2011-11-29-postincphi.ll deleted file mode 100644 index 8053940df13..00000000000 --- a/llvm/test/Transforms/LoopStrengthReduce/X86/2011-11-29-postincphi.ll +++ /dev/null @@ -1,36 +0,0 @@ -; RUN: llc < %s | FileCheck %s -; -; PR11431: handle a phi operand that is replaced by a postinc user. -; LSR first expands %t3 to %t2 in %phi -; LSR then expands %t2 in %phi into two decrements, one on each loop exit. - -target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64" -target triple = "x86_64-unknown-linux-gnu" - -declare i1 @check() nounwind - -; Check that LSR did something close to the behavior at the time of the bug. -; CHECK: @sqlite3DropTriggerPtr -; CHECK: incq %r{{[a-d]}}x -; CHECK: jne -; CHECK: decq %r{{[a-d]}}x -; CHECK: ret -define i64 @sqlite3DropTriggerPtr() nounwind { -bb: - %cmp = call zeroext i1 @check() - br label %bb1 - -bb1: ; preds = %bb4, %bb - %t0 = phi i64 [ 0, %bb ], [ %t3, %bb4 ] - %t2 = phi i64 [ 1, %bb ], [ %t5, %bb4 ] - %t3 = add nsw i64 %t0, 1 - br i1 %cmp, label %bb4, label %bb8 - -bb4: ; preds = %bb1 - %t5 = add nsw i64 %t2, 1 - br i1 %cmp, label %bb1, label %bb8 - -bb8: ; preds = %bb8, %bb4 - %phi = phi i64 [ %t3, %bb1 ], [ %t2, %bb4 ] - ret i64 %phi -} diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/2011-12-04-loserreg.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/2011-12-04-loserreg.ll deleted file mode 100644 index 862fff29cef..00000000000 --- a/llvm/test/Transforms/LoopStrengthReduce/X86/2011-12-04-loserreg.ll +++ /dev/null @@ -1,93 +0,0 @@ -; RUN: opt < %s -loop-reduce -S | FileCheck %s -; -; Test LSR's ability to prune formulae that refer to nonexistent -; AddRecs in other loops. -; -; Unable to reduce this case further because it requires LSR to exceed -; ComplexityLimit. -; -; We really just want to ensure that LSR can process this loop without -; finding an unsatisfactory solution and bailing out. I've added -; dummyout, an obvious candidate for postinc replacement so we can -; verify that LSR removes it. - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" -target triple = "x86_64-apple-darwin" - -; CHECK-LABEL: @test( -; CHECK: for.body: -; CHECK: %lsr.iv -; CHECK-NOT: %dummyout -; CHECK: ret -define i64 @test(i64 %count, float* nocapture %srcrow, i32* nocapture %destrow) nounwind uwtable ssp { -entry: - %cmp34 = icmp eq i64 %count, 0 - br i1 %cmp34, label %for.end29, label %for.body - -for.body: ; preds = %entry, %for.body - %dummyiv = phi i64 [ %dummycnt, %for.body ], [ 0, %entry ] - %indvars.iv39 = phi i64 [ %indvars.iv.next40, %for.body ], [ 0, %entry ] - %dp.036 = phi i32* [ %add.ptr, %for.body ], [ %destrow, %entry ] - %p.035 = phi float* [ %incdec.ptr4, %for.body ], [ %srcrow, %entry ] - %incdec.ptr = getelementptr inbounds float, float* %p.035, i64 1 - %0 = load float, float* %incdec.ptr, align 4 - %incdec.ptr2 = getelementptr inbounds float, float* %p.035, i64 2 - %1 = load float, float* %incdec.ptr2, align 4 - %incdec.ptr3 = getelementptr inbounds float, float* %p.035, i64 3 - %2 = load float, float* %incdec.ptr3, align 4 - %incdec.ptr4 = getelementptr inbounds float, float* %p.035, i64 4 - %3 = load float, float* %incdec.ptr4, align 4 - %4 = load i32, i32* %dp.036, align 4 - %conv5 = fptoui float %0 to i32 - %or = or i32 %4, %conv5 - %arrayidx6 = getelementptr inbounds i32, i32* %dp.036, i64 1 - %5 = load i32, i32* %arrayidx6, align 4 - %conv7 = fptoui float %1 to i32 - %or8 = or i32 %5, %conv7 - %arrayidx9 = getelementptr inbounds i32, i32* %dp.036, i64 2 - %6 = load i32, i32* %arrayidx9, align 4 - %conv10 = fptoui float %2 to i32 - %or11 = or i32 %6, %conv10 - %arrayidx12 = getelementptr inbounds i32, i32* %dp.036, i64 3 - %7 = load i32, i32* %arrayidx12, align 4 - %conv13 = fptoui float %3 to i32 - %or14 = or i32 %7, %conv13 - store i32 %or, i32* %dp.036, align 4 - store i32 %or8, i32* %arrayidx6, align 4 - store i32 %or11, i32* %arrayidx9, align 4 - store i32 %or14, i32* %arrayidx12, align 4 - %add.ptr = getelementptr inbounds i32, i32* %dp.036, i64 4 - %indvars.iv.next40 = add i64 %indvars.iv39, 4 - %dummycnt = add i64 %dummyiv, 1 - %cmp = icmp ult i64 %indvars.iv.next40, %count - br i1 %cmp, label %for.body, label %for.cond19.preheader - -for.cond19.preheader: ; preds = %for.body - %dummyout = add i64 %dummyiv, 1 - %rem = and i64 %count, 3 - %cmp2130 = icmp eq i64 %rem, 0 - br i1 %cmp2130, label %for.end29, label %for.body23.lr.ph - -for.body23.lr.ph: ; preds = %for.cond19.preheader - %8 = and i64 %count, 3 - br label %for.body23 - -for.body23: ; preds = %for.body23, %for.body23.lr.ph - %indvars.iv = phi i64 [ 0, %for.body23.lr.ph ], [ %indvars.iv.next, %for.body23 ] - %dp.132 = phi i32* [ %add.ptr, %for.body23.lr.ph ], [ %incdec.ptr28, %for.body23 ] - %p.131 = phi float* [ %incdec.ptr4, %for.body23.lr.ph ], [ %incdec.ptr24, %for.body23 ] - %incdec.ptr24 = getelementptr inbounds float, float* %p.131, i64 1 - %9 = load float, float* %incdec.ptr24, align 4 - %10 = load i32, i32* %dp.132, align 4 - %conv25 = fptoui float %9 to i32 - %or26 = or i32 %10, %conv25 - store i32 %or26, i32* %dp.132, align 4 - %indvars.iv.next = add i64 %indvars.iv, 1 - %incdec.ptr28 = getelementptr inbounds i32, i32* %dp.132, i64 1 - %exitcond = icmp eq i64 %indvars.iv.next, %8 - br i1 %exitcond, label %for.end29, label %for.body23 - -for.end29: ; preds = %entry, %for.body23, %for.cond19.preheader - %result = phi i64 [ 0, %entry ], [ %dummyout, %for.body23 ], [ %dummyout, %for.cond19.preheader ] - ret i64 %result -} diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll deleted file mode 100644 index 2e32d916fe3..00000000000 --- a/llvm/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll +++ /dev/null @@ -1,148 +0,0 @@ -; RUN: llc < %s -O3 -march=x86-64 -mcpu=core2 | FileCheck %s - -declare i1 @check() nounwind -declare i1 @foo(i8*, i8*, i8*) nounwind - -; Check that redundant phi elimination ran -; CHECK: @test -; CHECK: %while.body.i -; CHECK: movs -; CHECK-NOT: movs -; CHECK: %for.end.i -define i32 @test(i8* %base) nounwind uwtable ssp { -entry: - br label %while.body.lr.ph.i - -while.body.lr.ph.i: ; preds = %cond.true.i - br label %while.body.i - -while.body.i: ; preds = %cond.true29.i, %while.body.lr.ph.i - %indvars.iv7.i = phi i64 [ 16, %while.body.lr.ph.i ], [ %indvars.iv.next8.i, %cond.true29.i ] - %i.05.i = phi i64 [ 0, %while.body.lr.ph.i ], [ %indvars.iv7.i, %cond.true29.i ] - %sext.i = shl i64 %i.05.i, 32 - %idx.ext.i = ashr exact i64 %sext.i, 32 - %add.ptr.sum.i = add i64 %idx.ext.i, 16 - br label %for.body.i - -for.body.i: ; preds = %for.body.i, %while.body.i - %indvars.iv.i = phi i64 [ 0, %while.body.i ], [ %indvars.iv.next.i, %for.body.i ] - %add.ptr.sum = add i64 %add.ptr.sum.i, %indvars.iv.i - %arrayidx22.i = getelementptr inbounds i8, i8* %base, i64 %add.ptr.sum - %0 = load i8, i8* %arrayidx22.i, align 1 - %indvars.iv.next.i = add i64 %indvars.iv.i, 1 - %cmp = call i1 @check() nounwind - br i1 %cmp, label %for.end.i, label %for.body.i - -for.end.i: ; preds = %for.body.i - %add.ptr.i144 = getelementptr inbounds i8, i8* %base, i64 %add.ptr.sum.i - %cmp2 = tail call i1 @foo(i8* %add.ptr.i144, i8* %add.ptr.i144, i8* undef) nounwind - br i1 %cmp2, label %cond.true29.i, label %cond.false35.i - -cond.true29.i: ; preds = %for.end.i - %indvars.iv.next8.i = add i64 %indvars.iv7.i, 16 - br i1 false, label %exit, label %while.body.i - -cond.false35.i: ; preds = %for.end.i - unreachable - -exit: ; preds = %cond.true29.i, %cond.true.i - ret i32 0 -} - -%struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771 = type { i32, i32, i32 } - -@tags = external global [5000 x %struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771], align 16 - -; PR11782: SCEVExpander assert -; -; Test phi reuse after LSR that requires SCEVExpander to hoist an -; interesting GEP. -; -; CHECK: @test2 -; CHECK: %entry -; CHECK-NOT: mov -; CHECK: je -define void @test2(i32 %n) nounwind uwtable { -entry: - br i1 undef, label %while.end, label %for.cond468 - -for.cond468: ; preds = %if.then477, %entry - %indvars.iv1163 = phi i64 [ %indvars.iv.next1164, %if.then477 ], [ 1, %entry ] - %k.0.in = phi i32* [ %last, %if.then477 ], [ getelementptr inbounds ([5000 x %struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771], [5000 x %struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771]* @tags, i64 0, i64 0, i32 2), %entry ] - %k.0 = load i32, i32* %k.0.in, align 4 - %0 = trunc i64 %indvars.iv1163 to i32 - %cmp469 = icmp slt i32 %0, %n - br i1 %cmp469, label %for.body471, label %for.inc498 - -for.body471: ; preds = %for.cond468 - %first = getelementptr inbounds [5000 x %struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771], [5000 x %struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771]* @tags, i64 0, i64 %indvars.iv1163, i32 1 - %1 = load i32, i32* %first, align 4 - br i1 undef, label %if.then477, label %for.inc498 - -if.then477: ; preds = %for.body471 - %last = getelementptr inbounds [5000 x %struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771], [5000 x %struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771]* @tags, i64 0, i64 %indvars.iv1163, i32 2 - %indvars.iv.next1164 = add i64 %indvars.iv1163, 1 - br label %for.cond468 - -for.inc498: ; preds = %for.inc498, %for.body471, %for.cond468 - br label %for.inc498 - -while.end: ; preds = %entry - ret void -} - -; PR12898: SCEVExpander crash -; Test redundant phi elimination when the deleted phi's increment is -; itself a phi. -; -; CHECK: @test3 -; CHECK: %meshBB1 -; CHECK: %meshBB -; CHECK-NEXT: Parent Loop -; CHECK-NEXT: Inner Loop -; CHECK-NEXT: incq -; CHECK: testb -; CHECK: je -; CHECK: jmp -define fastcc void @test3(double* nocapture %u) nounwind uwtable ssp { -entry: - br i1 undef, label %meshBB1, label %meshBB5 - -for.inc8.us.i: ; preds = %for.body3.us.i - br i1 undef, label %meshBB1, label %meshBB - -for.body3.us.i: ; preds = %meshBB, %for.body3.lr.ph.us.i - %indvars.iv.i.SV.phi = phi i64 [ %indvars.iv.next.i, %meshBB ], [ 0, %for.body3.lr.ph.us.i ] - %storemerge13.us.i.SV.phi = phi i32 [ 0, %meshBB ], [ 0, %for.body3.lr.ph.us.i ] - %Opq.sa.calc12 = sub i32 undef, 227 - %0 = add nsw i64 %indvars.iv.i.SV.phi, %indvars.iv8.i.SV.phi26 - %1 = trunc i64 %0 to i32 - %mul.i.us.i = mul nsw i32 0, %1 - %arrayidx5.us.i = getelementptr inbounds double, double* %u, i64 %indvars.iv.i.SV.phi - %2 = load double, double* %arrayidx5.us.i, align 8 - %indvars.iv.next.i = add i64 %indvars.iv.i.SV.phi, 1 - br i1 undef, label %for.inc8.us.i, label %meshBB - -for.body3.lr.ph.us.i: ; preds = %meshBB1, %meshBB - %indvars.iv8.i.SV.phi26 = phi i64 [ undef, %meshBB1 ], [ %indvars.iv8.i.SV.phi24, %meshBB ] - %arrayidx.us.i = getelementptr inbounds double, double* undef, i64 %indvars.iv8.i.SV.phi26 - %3 = add i64 %indvars.iv8.i.SV.phi26, 1 - br label %for.body3.us.i - -for.inc8.us.i2: ; preds = %meshBB5 - unreachable - -eval_At_times_u.exit: ; preds = %meshBB5 - ret void - -meshBB: ; preds = %for.body3.us.i, %for.inc8.us.i - %indvars.iv8.i.SV.phi24 = phi i64 [ undef, %for.body3.us.i ], [ %3, %for.inc8.us.i ] - %meshStackVariable.phi = phi i32 [ %Opq.sa.calc12, %for.body3.us.i ], [ undef, %for.inc8.us.i ] - br i1 undef, label %for.body3.lr.ph.us.i, label %for.body3.us.i - -meshBB1: ; preds = %for.inc8.us.i, %entry - br label %for.body3.lr.ph.us.i - -meshBB5: ; preds = %entry - br i1 undef, label %eval_At_times_u.exit, label %for.inc8.us.i2 -} diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/bin_power.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/bin_power.ll deleted file mode 100644 index c9781241c96..00000000000 --- a/llvm/test/Transforms/LoopStrengthReduce/X86/bin_power.ll +++ /dev/null @@ -1,264 +0,0 @@ -; RUN: opt < %s -scalar-evolution-huge-expr-threshold=1000000 -loop-reduce -S | FileCheck %s - -target datalayout = "e-m:e-i32:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Show that the b^2 is expanded correctly. -define i32 @test_01(i32 %a) { -; CHECK-LABEL: @test_01 -; CHECK: entry: -; CHECK-NEXT: br label %loop -; CHECK: loop: -; CHECK-NEXT: [[IV:[^ ]+]] = phi i32 [ [[IV_INC:[^ ]+]], %loop ], [ 0, %entry ] -; CHECK-NEXT: [[IV_INC]] = add nsw i32 [[IV]], -1 -; CHECK-NEXT: [[EXITCOND:[^ ]+]] = icmp eq i32 [[IV_INC]], -80 -; CHECK-NEXT: br i1 [[EXITCOND]], label %exit, label %loop -; CHECK: exit: -; CHECK-NEXT: [[B:[^ ]+]] = add i32 %a, 1 -; CHECK-NEXT: [[B2:[^ ]+]] = mul i32 [[B]], [[B]] -; CHECK-NEXT: [[R1:[^ ]+]] = add i32 [[B2]], -1 -; CHECK-NEXT: [[R2:[^ ]+]] = sub i32 [[R1]], [[IV_INC]] -; CHECK-NEXT: ret i32 [[R2]] - -entry: - br label %loop - -loop: ; preds = %loop, %entry - %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %loop ] - %b = add i32 %a, 1 - %b.pow.2 = mul i32 %b, %b - %result = add i32 %b.pow.2, %indvars.iv - %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1 - %exitcond = icmp eq i32 %indvars.iv.next, 80 - br i1 %exitcond, label %exit, label %loop - -exit: ; preds = %loop - ret i32 %result -} - -; Show that b^8 is expanded correctly. -define i32 @test_02(i32 %a) { -; CHECK-LABEL: @test_02 -; CHECK: entry: -; CHECK-NEXT: br label %loop -; CHECK: loop: -; CHECK-NEXT: [[IV:[^ ]+]] = phi i32 [ [[IV_INC:[^ ]+]], %loop ], [ 0, %entry ] -; CHECK-NEXT: [[IV_INC]] = add nsw i32 [[IV]], -1 -; CHECK-NEXT: [[EXITCOND:[^ ]+]] = icmp eq i32 [[IV_INC]], -80 -; CHECK-NEXT: br i1 [[EXITCOND]], label %exit, label %loop -; CHECK: exit: -; CHECK-NEXT: [[B:[^ ]+]] = add i32 %a, 1 -; CHECK-NEXT: [[B2:[^ ]+]] = mul i32 [[B]], [[B]] -; CHECK-NEXT: [[B4:[^ ]+]] = mul i32 [[B2]], [[B2]] -; CHECK-NEXT: [[B8:[^ ]+]] = mul i32 [[B4]], [[B4]] -; CHECK-NEXT: [[R1:[^ ]+]] = add i32 [[B8]], -1 -; CHECK-NEXT: [[R2:[^ ]+]] = sub i32 [[R1]], [[IV_INC]] -; CHECK-NEXT: ret i32 [[R2]] -entry: - br label %loop - -loop: ; preds = %loop, %entry - %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %loop ] - %b = add i32 %a, 1 - %b.pow.2 = mul i32 %b, %b - %b.pow.4 = mul i32 %b.pow.2, %b.pow.2 - %b.pow.8 = mul i32 %b.pow.4, %b.pow.4 - %result = add i32 %b.pow.8, %indvars.iv - %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1 - %exitcond = icmp eq i32 %indvars.iv.next, 80 - br i1 %exitcond, label %exit, label %loop - -exit: ; preds = %loop - ret i32 %result -} - -; Show that b^27 (27 = 1 + 2 + 8 + 16) is expanded correctly. -define i32 @test_03(i32 %a) { -; CHECK-LABEL: @test_03 -; CHECK: entry: -; CHECK-NEXT: br label %loop -; CHECK: loop: -; CHECK-NEXT: [[IV:[^ ]+]] = phi i32 [ [[IV_INC:[^ ]+]], %loop ], [ 0, %entry ] -; CHECK-NEXT: [[IV_INC]] = add nsw i32 [[IV]], -1 -; CHECK-NEXT: [[EXITCOND:[^ ]+]] = icmp eq i32 [[IV_INC]], -80 -; CHECK-NEXT: br i1 [[EXITCOND]], label %exit, label %loop -; CHECK: exit: -; CHECK-NEXT: [[B:[^ ]+]] = add i32 %a, 1 -; CHECK-NEXT: [[B2:[^ ]+]] = mul i32 [[B]], [[B]] -; CHECK-NEXT: [[B3:[^ ]+]] = mul i32 [[B]], [[B2]] -; CHECK-NEXT: [[B4:[^ ]+]] = mul i32 [[B2]], [[B2]] -; CHECK-NEXT: [[B8:[^ ]+]] = mul i32 [[B4]], [[B4]] -; CHECK-NEXT: [[B11:[^ ]+]] = mul i32 [[B3]], [[B8]] -; CHECK-NEXT: [[B16:[^ ]+]] = mul i32 [[B8]], [[B8]] -; CHECK-NEXT: [[B27:[^ ]+]] = mul i32 [[B11]], [[B16]] -; CHECK-NEXT: [[R1:[^ ]+]] = add i32 [[B27]], -1 -; CHECK-NEXT: [[R2:[^ ]+]] = sub i32 [[R1]], [[IV_INC]] -; CHECK-NEXT: ret i32 [[R2]] -entry: - br label %loop - -loop: ; preds = %loop, %entry - %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %loop ] - %b = add i32 %a, 1 - %b.pow.2 = mul i32 %b, %b - %b.pow.4 = mul i32 %b.pow.2, %b.pow.2 - %b.pow.8 = mul i32 %b.pow.4, %b.pow.4 - %b.pow.16 = mul i32 %b.pow.8, %b.pow.8 - %b.pow.24 = mul i32 %b.pow.16, %b.pow.8 - %b.pow.25 = mul i32 %b.pow.24, %b - %b.pow.26 = mul i32 %b.pow.25, %b - %b.pow.27 = mul i32 %b.pow.26, %b - %result = add i32 %b.pow.27, %indvars.iv - %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1 - %exitcond = icmp eq i32 %indvars.iv.next, 80 - br i1 %exitcond, label %exit, label %loop - -exit: ; preds = %loop - ret i32 %result -} - -; Show how linear calculation of b^16 is turned into logarithmic. -define i32 @test_04(i32 %a) { -; CHECK-LABEL: @test_04 -; CHECK: entry: -; CHECK-NEXT: br label %loop -; CHECK: loop: -; CHECK-NEXT: [[IV:[^ ]+]] = phi i32 [ [[IV_INC:[^ ]+]], %loop ], [ 0, %entry ] -; CHECK-NEXT: [[IV_INC]] = add nsw i32 [[IV]], -1 -; CHECK-NEXT: [[EXITCOND:[^ ]+]] = icmp eq i32 [[IV_INC]], -80 -; CHECK-NEXT: br i1 [[EXITCOND]], label %exit, label %loop -; CHECK: exit: -; CHECK-NEXT: [[B:[^ ]+]] = add i32 %a, 1 -; CHECK-NEXT: [[B2:[^ ]+]] = mul i32 [[B]], [[B]] -; CHECK-NEXT: [[B4:[^ ]+]] = mul i32 [[B2]], [[B2]] -; CHECK-NEXT: [[B8:[^ ]+]] = mul i32 [[B4]], [[B4]] -; CHECK-NEXT: [[B16:[^ ]+]] = mul i32 [[B8]], [[B8]] -; CHECK-NEXT: [[R1:[^ ]+]] = add i32 [[B16]], -1 -; CHECK-NEXT: [[R2:[^ ]+]] = sub i32 [[R1]], [[IV_INC]] -; CHECK-NEXT: ret i32 [[R2]] -entry: - br label %loop - -loop: ; preds = %loop, %entry - %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %loop ] - %b = add i32 %a, 1 - %b.pow.2 = mul i32 %b, %b - %b.pow.3 = mul i32 %b.pow.2, %b - %b.pow.4 = mul i32 %b.pow.3, %b - %b.pow.5 = mul i32 %b.pow.4, %b - %b.pow.6 = mul i32 %b.pow.5, %b - %b.pow.7 = mul i32 %b.pow.6, %b - %b.pow.8 = mul i32 %b.pow.7, %b - %b.pow.9 = mul i32 %b.pow.8, %b - %b.pow.10 = mul i32 %b.pow.9, %b - %b.pow.11 = mul i32 %b.pow.10, %b - %b.pow.12 = mul i32 %b.pow.11, %b - %b.pow.13 = mul i32 %b.pow.12, %b - %b.pow.14 = mul i32 %b.pow.13, %b - %b.pow.15 = mul i32 %b.pow.14, %b - %b.pow.16 = mul i32 %b.pow.15, %b - %result = add i32 %b.pow.16, %indvars.iv - %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1 - %exitcond = icmp eq i32 %indvars.iv.next, 80 - br i1 %exitcond, label %exit, label %loop - -exit: ; preds = %loop - ret i32 %result -} - -; The output here is reasonably big, we just check that the amount of expanded -; instructions is sane. -define i32 @test_05(i32 %a) { -; CHECK-LABEL: @test_05 -; CHECK: entry: -; CHECK-NEXT: br label %loop -; CHECK: loop: -; CHECK-NEXT: [[IV:[^ ]+]] = phi i32 [ [[IV_INC:[^ ]+]], %loop ], [ 0, %entry ] -; CHECK-NEXT: [[IV_INC]] = add nsw i32 [[IV]], -1 -; CHECK-NEXT: [[EXITCOND:[^ ]+]] = icmp eq i32 [[IV_INC]], -80 -; CHECK-NEXT: br i1 [[EXITCOND]], label %exit, label %loop -; CHECK: exit: -; CHECK: %100 -; CHECK-NOT: %150 - -entry: - br label %loop - -loop: ; preds = %loop, %entry - %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %loop ] - %tmp3 = add i32 %a, 1 - %tmp4 = mul i32 %tmp3, %tmp3 - %tmp5 = mul i32 %tmp4, %tmp4 - %tmp6 = mul i32 %tmp5, %tmp5 - %tmp7 = mul i32 %tmp6, %tmp6 - %tmp8 = mul i32 %tmp7, %tmp7 - %tmp9 = mul i32 %tmp8, %tmp8 - %tmp10 = mul i32 %tmp9, %tmp9 - %tmp11 = mul i32 %tmp10, %tmp10 - %tmp12 = mul i32 %tmp11, %tmp11 - %tmp13 = mul i32 %tmp12, %tmp12 - %tmp14 = mul i32 %tmp13, %tmp13 - %tmp15 = mul i32 %tmp14, %tmp14 - %tmp16 = mul i32 %tmp15, %tmp15 - %tmp17 = mul i32 %tmp16, %tmp16 - %tmp18 = mul i32 %tmp17, %tmp17 - %tmp19 = mul i32 %tmp18, %tmp18 - %tmp20 = mul i32 %tmp19, %tmp19 - %tmp22 = add i32 %tmp20, %indvars.iv - %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1 - %exitcond = icmp eq i32 %indvars.iv.next, 80 - br i1 %exitcond, label %exit, label %loop - -exit: ; preds = %loop - ret i32 %tmp22 -} - -; Show that the transformation works even if the calculation involves different -; values inside. -define i32 @test_06(i32 %a, i32 %c) { -; CHECK-LABEL: @test_06 -; CHECK: entry: -; CHECK-NEXT: br label %loop -; CHECK: loop: -; CHECK-NEXT: [[IV:[^ ]+]] = phi i32 [ [[IV_INC:[^ ]+]], %loop ], [ 0, %entry ] -; CHECK-NEXT: [[IV_INC]] = add nsw i32 [[IV]], -1 -; CHECK-NEXT: [[EXITCOND:[^ ]+]] = icmp eq i32 [[IV_INC]], -80 -; CHECK-NEXT: br i1 [[EXITCOND]], label %exit, label %loop -; CHECK: exit: -; CHECK: [[B:[^ ]+]] = add i32 %a, 1 -; CHECK-NEXT: [[B2:[^ ]+]] = mul i32 [[B]], [[B]] -; CHECK-NEXT: [[B4:[^ ]+]] = mul i32 [[B2]], [[B2]] -; CHECK-NEXT: [[B8:[^ ]+]] = mul i32 [[B4]], [[B4]] -; CHECK-NEXT: [[B16:[^ ]+]] = mul i32 [[B8]], [[B8]] -entry: - br label %loop - -loop: ; preds = %loop, %entry - %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %loop ] - %b = add i32 %a, 1 - %b.pow.2.tmp = mul i32 %b, %b - %b.pow.2 = mul i32 %b.pow.2.tmp, %c - %b.pow.3 = mul i32 %b.pow.2, %b - %b.pow.4 = mul i32 %b.pow.3, %b - %b.pow.5 = mul i32 %b.pow.4, %b - %b.pow.6.tmp = mul i32 %b.pow.5, %b - %b.pow.6 = mul i32 %b.pow.6.tmp, %c - %b.pow.7 = mul i32 %b.pow.6, %b - %b.pow.8 = mul i32 %b.pow.7, %b - %b.pow.9 = mul i32 %b.pow.8, %b - %b.pow.10 = mul i32 %b.pow.9, %b - %b.pow.11 = mul i32 %b.pow.10, %b - %b.pow.12.tmp = mul i32 %b.pow.11, %b - %b.pow.12 = mul i32 %c, %b.pow.12.tmp - %b.pow.13 = mul i32 %b.pow.12, %b - %b.pow.14 = mul i32 %b.pow.13, %b - %b.pow.15 = mul i32 %b.pow.14, %b - %b.pow.16 = mul i32 %b.pow.15, %b - %result = add i32 %b.pow.16, %indvars.iv - %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1 - %exitcond = icmp eq i32 %indvars.iv.next, 80 - br i1 %exitcond, label %exit, label %loop - -exit: ; preds = %loop - ret i32 %result -} diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/canonical-2.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/canonical-2.ll deleted file mode 100644 index 69bae3a5115..00000000000 --- a/llvm/test/Transforms/LoopStrengthReduce/X86/canonical-2.ll +++ /dev/null @@ -1,36 +0,0 @@ -; REQUIRES: asserts -; RUN: opt -mtriple=x86_64-unknown-linux-gnu -loop-reduce -S < %s -; PR33077. Check the LSR Use formula to be inserted is already canonicalized and -; will not trigger assertion. - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - -; Function Attrs: uwtable -define void @foo() { -cHeapLvb.exit: - br label %not_zero48.us - -not_zero48.us: ; preds = %not_zero48.us, %cHeapLvb.exit - %indvars.iv.us = phi i64 [ %indvars.iv.next.us.7, %not_zero48.us ], [ undef, %cHeapLvb.exit ] - %0 = phi i32 [ %13, %not_zero48.us ], [ undef, %cHeapLvb.exit ] - %indvars.iv.next.us = add nuw nsw i64 %indvars.iv.us, 1 - %1 = add i32 %0, 2 - %2 = getelementptr inbounds i32, i32 addrspace(1)* undef, i64 %indvars.iv.next.us - %3 = load i32, i32 addrspace(1)* %2, align 4 - %4 = add i32 %0, 3 - %5 = load i32, i32 addrspace(1)* undef, align 4 - %6 = sub i32 undef, %5 - %factor.us.2 = shl i32 %6, 1 - %7 = add i32 %factor.us.2, %1 - %8 = load i32, i32 addrspace(1)* undef, align 4 - %9 = sub i32 %7, %8 - %factor.us.3 = shl i32 %9, 1 - %10 = add i32 %factor.us.3, %4 - %11 = load i32, i32 addrspace(1)* undef, align 4 - %12 = sub i32 %10, %11 - %factor.us.4 = shl i32 %12, 1 - %13 = add i32 %0, 8 - %indvars.iv.next.us.7 = add nsw i64 %indvars.iv.us, 8 - br label %not_zero48.us -} - diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/canonical.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/canonical.ll deleted file mode 100644 index 6b6acb86874..00000000000 --- a/llvm/test/Transforms/LoopStrengthReduce/X86/canonical.ll +++ /dev/null @@ -1,65 +0,0 @@ -; RUN: opt -mtriple=x86_64-unknown-linux-gnu -loop-reduce -lsr-insns-cost=false -S < %s | FileCheck %s -; Check LSR formula canonicalization will put loop invariant regs before -; induction variable of current loop, so exprs involving loop invariant regs -; can be promoted outside of current loop. - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - -define void @foo(i32 %size, i32 %nsteps, i8* nocapture %maxarray, i8* nocapture readnone %buffer, i32 %init) local_unnamed_addr #0 { -entry: - %cmp25 = icmp sgt i32 %nsteps, 0 - br i1 %cmp25, label %for.cond1.preheader.lr.ph, label %for.end12 - -for.cond1.preheader.lr.ph: ; preds = %entry - %cmp223 = icmp sgt i32 %size, 1 - %t0 = sext i32 %init to i64 - %wide.trip.count = zext i32 %size to i64 - %wide.trip.count31 = zext i32 %nsteps to i64 - br label %for.cond1.preheader - -for.cond1.preheader: ; preds = %for.inc10, %for.cond1.preheader.lr.ph - %indvars.iv28 = phi i64 [ 0, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next29, %for.inc10 ] - br i1 %cmp223, label %for.body3.lr.ph, label %for.inc10 - -for.body3.lr.ph: ; preds = %for.cond1.preheader - %t1 = add nsw i64 %indvars.iv28, %t0 - %t2 = trunc i64 %indvars.iv28 to i8 - br label %for.body3 - -; Make sure loop invariant items are grouped together so that load address can -; be represented in one getelementptr. -; CHECK-LABEL: for.body3: -; CHECK-NEXT: [[LSR:%[^,]+]] = phi i64 [ 1, %for.body3.lr.ph ], [ {{.*}}, %for.body3 ] -; CHECK-NOT: = phi i64 -; CHECK-NEXT: [[LOADADDR:%[^,]+]] = getelementptr i8, i8* {{.*}}, i64 [[LSR]] -; CHECK-NEXT: = load i8, i8* [[LOADADDR]], align 1 -; CHECK: br i1 %exitcond, label %for.inc10.loopexit, label %for.body3 - -for.body3: ; preds = %for.body3, %for.body3.lr.ph - %indvars.iv = phi i64 [ 1, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ] - %t5 = trunc i64 %indvars.iv to i8 - %t3 = add nsw i64 %t1, %indvars.iv - %arrayidx = getelementptr inbounds i8, i8* %maxarray, i64 %t3 - %t4 = load i8, i8* %arrayidx, align 1 - %add5 = add i8 %t4, %t5 - %add6 = add i8 %add5, %t2 - %arrayidx9 = getelementptr inbounds i8, i8* %maxarray, i64 %indvars.iv - store i8 %add6, i8* %arrayidx9, align 1 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count - br i1 %exitcond, label %for.inc10.loopexit, label %for.body3 - -for.inc10.loopexit: ; preds = %for.body3 - br label %for.inc10 - -for.inc10: ; preds = %for.inc10.loopexit, %for.cond1.preheader - %indvars.iv.next29 = add nuw nsw i64 %indvars.iv28, 1 - %exitcond32 = icmp eq i64 %indvars.iv.next29, %wide.trip.count31 - br i1 %exitcond32, label %for.end12.loopexit, label %for.cond1.preheader - -for.end12.loopexit: ; preds = %for.inc10 - br label %for.end12 - -for.end12: ; preds = %for.end12.loopexit, %entry - ret void -} diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/incorrect-offset-scaling.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/incorrect-offset-scaling.ll deleted file mode 100644 index 00c3222b005..00000000000 --- a/llvm/test/Transforms/LoopStrengthReduce/X86/incorrect-offset-scaling.ll +++ /dev/null @@ -1,46 +0,0 @@ -; RUN: opt -S -loop-reduce < %s | FileCheck %s - -target triple = "x86_64-unknown-unknown" -target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" - -define void @incorrect_offset_scaling(i64, i64*) { -top: - br label %L - -L: ; preds = %idxend.10, %idxend, %L2, %top - br i1 undef, label %L, label %L1 - -L1: ; preds = %L1.preheader, %L2 - %r13 = phi i64 [ %r1, %L2 ], [ 1, %L ] -; CHECK: %lsr.iv = phi i64 [ 0, %L{{[^ ]+}} ], [ %lsr.iv.next, %L2 ] -; CHECK-NOT: %lsr.iv = phi i64 [ -1, %L{{[^ ]+}} ], [ %lsr.iv.next, %L2 ] -; CHECK: br - %r0 = add i64 %r13, -1 - br label %idxend.8 - -L2: ; preds = %idxend.8 - %r1 = add i64 %r13, 1 - br i1 undef, label %L, label %L1 - -if6: ; preds = %idxend.8 - %r2 = add i64 %0, -1 - %r3 = load i64, i64* %1, align 8 -; CHECK: %r2 = add i64 %0, -1 -; CHECK: %r3 = load i64 - br label %ib - -idxend.8: ; preds = %L1 - br i1 undef, label %if6, label %L2 - -ib: ; preds = %if6 - %r4 = mul i64 %r3, %r0 - %r5 = add i64 %r2, %r4 - %r6 = icmp ult i64 %r5, undef -; CHECK: %r4 = mul i64 %r3, %lsr.iv -; CHECK: %r5 = add i64 %r2, %r4 -; CHECK: %r6 = icmp ult i64 %r5, undef -; CHECK: %r7 = getelementptr i64, i64* undef, i64 %r5 - %r7 = getelementptr i64, i64* undef, i64 %r5 - store i64 1, i64* %r7, align 8 - br label %L -} diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll deleted file mode 100644 index 0be39d3814a..00000000000 --- a/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll +++ /dev/null @@ -1,576 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -O3 -mtriple=x86_64-unknown-unknown -mcpu=core2 | FileCheck %s -check-prefix=X64 -; RUN: llc < %s -O3 -mtriple=i686-unknown-unknown -mcpu=core2 | FileCheck %s -check-prefix=X32 - -; @simple is the most basic chain of address induction variables. Chaining -; saves at least one register and avoids complex addressing and setup -; code. -; -; %x * 4 -; no other address computation in the preheader -; no complex address modes -; -; no expensive address computation in the preheader -; no complex address modes - -define i32 @simple(i32* %a, i32* %b, i32 %x) nounwind { -; X64-LABEL: simple: -; X64: # %bb.0: # %entry -; X64-NEXT: movslq %edx, %rcx -; X64-NEXT: shlq $2, %rcx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB0_1: # %loop -; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: addl (%rdi), %eax -; X64-NEXT: leaq (%rdi,%rcx), %r8 -; X64-NEXT: addl (%rdi,%rcx), %eax -; X64-NEXT: leaq (%r8,%rcx), %rdx -; X64-NEXT: addl (%rcx,%r8), %eax -; X64-NEXT: addl (%rcx,%rdx), %eax -; X64-NEXT: addq %rcx, %rdx -; X64-NEXT: addq %rcx, %rdx -; X64-NEXT: movq %rdx, %rdi -; X64-NEXT: cmpq %rsi, %rdx -; X64-NEXT: jne .LBB0_1 -; X64-NEXT: # %bb.2: # %exit -; X64-NEXT: retq -; -; X32-LABEL: simple: -; X32: # %bb.0: # %entry -; X32-NEXT: pushl %ebx -; X32-NEXT: pushl %edi -; X32-NEXT: pushl %esi -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: shll $2, %edx -; X32-NEXT: xorl %eax, %eax -; X32-NEXT: .p2align 4, 0x90 -; X32-NEXT: .LBB0_1: # %loop -; X32-NEXT: # =>This Inner Loop Header: Depth=1 -; X32-NEXT: addl (%esi), %eax -; X32-NEXT: leal (%esi,%edx), %edi -; X32-NEXT: addl (%esi,%edx), %eax -; X32-NEXT: leal (%edi,%edx), %ebx -; X32-NEXT: addl (%edx,%edi), %eax -; X32-NEXT: addl (%edx,%ebx), %eax -; X32-NEXT: addl %edx, %ebx -; X32-NEXT: addl %edx, %ebx -; X32-NEXT: movl %ebx, %esi -; X32-NEXT: cmpl %ecx, %ebx -; X32-NEXT: jne .LBB0_1 -; X32-NEXT: # %bb.2: # %exit -; X32-NEXT: popl %esi -; X32-NEXT: popl %edi -; X32-NEXT: popl %ebx -; X32-NEXT: retl -entry: - br label %loop -loop: - %iv = phi i32* [ %a, %entry ], [ %iv4, %loop ] - %s = phi i32 [ 0, %entry ], [ %s4, %loop ] - %v = load i32, i32* %iv - %iv1 = getelementptr inbounds i32, i32* %iv, i32 %x - %v1 = load i32, i32* %iv1 - %iv2 = getelementptr inbounds i32, i32* %iv1, i32 %x - %v2 = load i32, i32* %iv2 - %iv3 = getelementptr inbounds i32, i32* %iv2, i32 %x - %v3 = load i32, i32* %iv3 - %s1 = add i32 %s, %v - %s2 = add i32 %s1, %v1 - %s3 = add i32 %s2, %v2 - %s4 = add i32 %s3, %v3 - %iv4 = getelementptr inbounds i32, i32* %iv3, i32 %x - %cmp = icmp eq i32* %iv4, %b - br i1 %cmp, label %exit, label %loop -exit: - ret i32 %s4 -} - -; @user is not currently chained because the IV is live across memory ops. -; -; expensive address computation in the preheader -; complex address modes -define i32 @user(i32* %a, i32* %b, i32 %x) nounwind { -; X64-LABEL: user: -; X64: # %bb.0: # %entry -; X64-NEXT: movslq %edx, %rcx -; X64-NEXT: movq %rcx, %rdx -; X64-NEXT: shlq $4, %rdx -; X64-NEXT: leaq (,%rcx,4), %rax -; X64-NEXT: leaq (%rax,%rax,2), %r8 -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB1_1: # %loop -; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: addl (%rdi), %eax -; X64-NEXT: addl (%rdi,%rcx,4), %eax -; X64-NEXT: addl (%rdi,%rcx,8), %eax -; X64-NEXT: addl (%rdi,%r8), %eax -; X64-NEXT: movl %eax, (%rdi) -; X64-NEXT: addq %rdx, %rdi -; X64-NEXT: cmpq %rdi, %rsi -; X64-NEXT: jne .LBB1_1 -; X64-NEXT: # %bb.2: # %exit -; X64-NEXT: retq -; -; X32-LABEL: user: -; X32: # %bb.0: # %entry -; X32-NEXT: pushl %ebx -; X32-NEXT: pushl %edi -; X32-NEXT: pushl %esi -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: movl %ecx, %edi -; X32-NEXT: shll $4, %edi -; X32-NEXT: leal (,%ecx,4), %eax -; X32-NEXT: leal (%eax,%eax,2), %ebx -; X32-NEXT: xorl %eax, %eax -; X32-NEXT: .p2align 4, 0x90 -; X32-NEXT: .LBB1_1: # %loop -; X32-NEXT: # =>This Inner Loop Header: Depth=1 -; X32-NEXT: addl (%esi), %eax -; X32-NEXT: addl (%esi,%ecx,4), %eax -; X32-NEXT: addl (%esi,%ecx,8), %eax -; X32-NEXT: addl (%esi,%ebx), %eax -; X32-NEXT: movl %eax, (%esi) -; X32-NEXT: addl %edi, %esi -; X32-NEXT: cmpl %esi, %edx -; X32-NEXT: jne .LBB1_1 -; X32-NEXT: # %bb.2: # %exit -; X32-NEXT: popl %esi -; X32-NEXT: popl %edi -; X32-NEXT: popl %ebx -; X32-NEXT: retl -entry: - br label %loop -loop: - %iv = phi i32* [ %a, %entry ], [ %iv4, %loop ] - %s = phi i32 [ 0, %entry ], [ %s4, %loop ] - %v = load i32, i32* %iv - %iv1 = getelementptr inbounds i32, i32* %iv, i32 %x - %v1 = load i32, i32* %iv1 - %iv2 = getelementptr inbounds i32, i32* %iv1, i32 %x - %v2 = load i32, i32* %iv2 - %iv3 = getelementptr inbounds i32, i32* %iv2, i32 %x - %v3 = load i32, i32* %iv3 - %s1 = add i32 %s, %v - %s2 = add i32 %s1, %v1 - %s3 = add i32 %s2, %v2 - %s4 = add i32 %s3, %v3 - %iv4 = getelementptr inbounds i32, i32* %iv3, i32 %x - store i32 %s4, i32* %iv - %cmp = icmp eq i32* %iv4, %b - br i1 %cmp, label %exit, label %loop -exit: - ret i32 %s4 -} - -; @extrastride is a slightly more interesting case of a single -; complete chain with multiple strides. The test case IR is what LSR -; used to do, and exactly what we don't want to do. LSR's new IV -; chaining feature should now undo the damage. -; -; We currently don't handle this on X64 because the sexts cause -; strange increment expressions like this: -; IV + ((sext i32 (2 * %s) to i64) + (-1 * (sext i32 %s to i64))) -; -; For x32, no spills in the preheader, no complex address modes, no reloads. - -define void @extrastride(i8* nocapture %main, i32 %main_stride, i32* nocapture %res, i32 %x, i32 %y, i32 %z) nounwind { -; X64-LABEL: extrastride: -; X64: # %bb.0: # %entry -; X64-NEXT: pushq %rbp -; X64-NEXT: pushq %r14 -; X64-NEXT: pushq %rbx -; X64-NEXT: # kill: def $ecx killed $ecx def $rcx -; X64-NEXT: # kill: def $esi killed $esi def $rsi -; X64-NEXT: testl %r9d, %r9d -; X64-NEXT: je .LBB2_3 -; X64-NEXT: # %bb.1: # %for.body.lr.ph -; X64-NEXT: leal (%rsi,%rsi), %r14d -; X64-NEXT: leal (%rsi,%rsi,2), %ebx -; X64-NEXT: addl %esi, %ecx -; X64-NEXT: leal (,%rsi,4), %eax -; X64-NEXT: leal (%rcx,%rsi,4), %ebp -; X64-NEXT: movslq %eax, %r10 -; X64-NEXT: movslq %ebx, %r11 -; X64-NEXT: movslq %r14d, %rbx -; X64-NEXT: movslq %esi, %rsi -; X64-NEXT: movslq %r8d, %rcx -; X64-NEXT: shlq $2, %rcx -; X64-NEXT: movslq %ebp, %rax -; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB2_2: # %for.body -; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movl (%rdi,%rsi), %ebp -; X64-NEXT: addl (%rdi), %ebp -; X64-NEXT: addl (%rdi,%rbx), %ebp -; X64-NEXT: addl (%rdi,%r11), %ebp -; X64-NEXT: addl (%rdi,%r10), %ebp -; X64-NEXT: movl %ebp, (%rdx) -; X64-NEXT: addq %rax, %rdi -; X64-NEXT: addq %rcx, %rdx -; X64-NEXT: decl %r9d -; X64-NEXT: jne .LBB2_2 -; X64-NEXT: .LBB2_3: # %for.end -; X64-NEXT: popq %rbx -; X64-NEXT: popq %r14 -; X64-NEXT: popq %rbp -; X64-NEXT: retq -; -; X32-LABEL: extrastride: -; X32: # %bb.0: # %entry -; X32-NEXT: pushl %ebp -; X32-NEXT: pushl %ebx -; X32-NEXT: pushl %edi -; X32-NEXT: pushl %esi -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: testl %eax, %eax -; X32-NEXT: je .LBB2_3 -; X32-NEXT: # %bb.1: # %for.body.lr.ph -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi -; X32-NEXT: addl %esi, %edi -; X32-NEXT: shll $2, %ecx -; X32-NEXT: .p2align 4, 0x90 -; X32-NEXT: .LBB2_2: # %for.body -; X32-NEXT: # =>This Inner Loop Header: Depth=1 -; X32-NEXT: movl (%ebx,%esi), %ebp -; X32-NEXT: addl (%ebx), %ebp -; X32-NEXT: leal (%ebx,%esi), %ebx -; X32-NEXT: addl (%esi,%ebx), %ebp -; X32-NEXT: leal (%ebx,%esi), %ebx -; X32-NEXT: addl (%esi,%ebx), %ebp -; X32-NEXT: leal (%ebx,%esi), %ebx -; X32-NEXT: addl (%esi,%ebx), %ebp -; X32-NEXT: movl %ebp, (%edx) -; X32-NEXT: leal (%ebx,%esi), %ebx -; X32-NEXT: addl %edi, %ebx -; X32-NEXT: addl %ecx, %edx -; X32-NEXT: decl %eax -; X32-NEXT: jne .LBB2_2 -; X32-NEXT: .LBB2_3: # %for.end -; X32-NEXT: popl %esi -; X32-NEXT: popl %edi -; X32-NEXT: popl %ebx -; X32-NEXT: popl %ebp -; X32-NEXT: retl -entry: - %cmp8 = icmp eq i32 %z, 0 - br i1 %cmp8, label %for.end, label %for.body.lr.ph - -for.body.lr.ph: ; preds = %entry - %add.ptr.sum = shl i32 %main_stride, 1 ; s*2 - %add.ptr1.sum = add i32 %add.ptr.sum, %main_stride ; s*3 - %add.ptr2.sum = add i32 %x, %main_stride ; s + x - %add.ptr4.sum = shl i32 %main_stride, 2 ; s*4 - %add.ptr3.sum = add i32 %add.ptr2.sum, %add.ptr4.sum ; total IV stride = s*5+x - br label %for.body - -for.body: ; preds = %for.body.lr.ph, %for.body - %main.addr.011 = phi i8* [ %main, %for.body.lr.ph ], [ %add.ptr6, %for.body ] - %i.010 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] - %res.addr.09 = phi i32* [ %res, %for.body.lr.ph ], [ %add.ptr7, %for.body ] - %0 = bitcast i8* %main.addr.011 to i32* - %1 = load i32, i32* %0, align 4 - %add.ptr = getelementptr inbounds i8, i8* %main.addr.011, i32 %main_stride - %2 = bitcast i8* %add.ptr to i32* - %3 = load i32, i32* %2, align 4 - %add.ptr1 = getelementptr inbounds i8, i8* %main.addr.011, i32 %add.ptr.sum - %4 = bitcast i8* %add.ptr1 to i32* - %5 = load i32, i32* %4, align 4 - %add.ptr2 = getelementptr inbounds i8, i8* %main.addr.011, i32 %add.ptr1.sum - %6 = bitcast i8* %add.ptr2 to i32* - %7 = load i32, i32* %6, align 4 - %add.ptr3 = getelementptr inbounds i8, i8* %main.addr.011, i32 %add.ptr4.sum - %8 = bitcast i8* %add.ptr3 to i32* - %9 = load i32, i32* %8, align 4 - %add = add i32 %3, %1 - %add4 = add i32 %add, %5 - %add5 = add i32 %add4, %7 - %add6 = add i32 %add5, %9 - store i32 %add6, i32* %res.addr.09, align 4 - %add.ptr6 = getelementptr inbounds i8, i8* %main.addr.011, i32 %add.ptr3.sum - %add.ptr7 = getelementptr inbounds i32, i32* %res.addr.09, i32 %y - %inc = add i32 %i.010, 1 - %cmp = icmp eq i32 %inc, %z - br i1 %cmp, label %for.end, label %for.body - -for.end: ; preds = %for.body, %entry - ret void -} - -; @foldedidx is an unrolled variant of this loop: -; for (unsigned long i = 0; i < len; i += s) { -; c[i] = a[i] + b[i]; -; } -; where 's' can be folded into the addressing mode. -; Consequently, we should *not* form any chains. - -define void @foldedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c) nounwind ssp { -; X64-LABEL: foldedidx: -; X64: # %bb.0: # %entry -; X64-NEXT: movl $3, %eax -; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB3_1: # %for.body -; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movzbl -3(%rdi,%rax), %r8d -; X64-NEXT: movzbl -3(%rsi,%rax), %ecx -; X64-NEXT: addl %r8d, %ecx -; X64-NEXT: movb %cl, -3(%rdx,%rax) -; X64-NEXT: movzbl -2(%rdi,%rax), %r8d -; X64-NEXT: movzbl -2(%rsi,%rax), %ecx -; X64-NEXT: addl %r8d, %ecx -; X64-NEXT: movb %cl, -2(%rdx,%rax) -; X64-NEXT: movzbl -1(%rdi,%rax), %r8d -; X64-NEXT: movzbl -1(%rsi,%rax), %ecx -; X64-NEXT: addl %r8d, %ecx -; X64-NEXT: movb %cl, -1(%rdx,%rax) -; X64-NEXT: movzbl (%rdi,%rax), %r8d -; X64-NEXT: movzbl (%rsi,%rax), %ecx -; X64-NEXT: addl %r8d, %ecx -; X64-NEXT: movb %cl, (%rdx,%rax) -; X64-NEXT: addq $4, %rax -; X64-NEXT: cmpl $403, %eax # imm = 0x193 -; X64-NEXT: jne .LBB3_1 -; X64-NEXT: # %bb.2: # %for.end -; X64-NEXT: retq -; -; X32-LABEL: foldedidx: -; X32: # %bb.0: # %entry -; X32-NEXT: pushl %ebx -; X32-NEXT: pushl %edi -; X32-NEXT: pushl %esi -; X32-NEXT: movl $3, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: .p2align 4, 0x90 -; X32-NEXT: .LBB3_1: # %for.body -; X32-NEXT: # =>This Inner Loop Header: Depth=1 -; X32-NEXT: movzbl -3(%esi,%eax), %edi -; X32-NEXT: movzbl -3(%edx,%eax), %ebx -; X32-NEXT: addl %edi, %ebx -; X32-NEXT: movb %bl, -3(%ecx,%eax) -; X32-NEXT: movzbl -2(%esi,%eax), %edi -; X32-NEXT: movzbl -2(%edx,%eax), %ebx -; X32-NEXT: addl %edi, %ebx -; X32-NEXT: movb %bl, -2(%ecx,%eax) -; X32-NEXT: movzbl -1(%esi,%eax), %edi -; X32-NEXT: movzbl -1(%edx,%eax), %ebx -; X32-NEXT: addl %edi, %ebx -; X32-NEXT: movb %bl, -1(%ecx,%eax) -; X32-NEXT: movzbl (%esi,%eax), %edi -; X32-NEXT: movzbl (%edx,%eax), %ebx -; X32-NEXT: addl %edi, %ebx -; X32-NEXT: movb %bl, (%ecx,%eax) -; X32-NEXT: addl $4, %eax -; X32-NEXT: cmpl $403, %eax # imm = 0x193 -; X32-NEXT: jne .LBB3_1 -; X32-NEXT: # %bb.2: # %for.end -; X32-NEXT: popl %esi -; X32-NEXT: popl %edi -; X32-NEXT: popl %ebx -; X32-NEXT: retl -entry: - br label %for.body - -for.body: ; preds = %for.body, %entry - %i.07 = phi i32 [ 0, %entry ], [ %inc.3, %for.body ] - %arrayidx = getelementptr inbounds i8, i8* %a, i32 %i.07 - %0 = load i8, i8* %arrayidx, align 1 - %conv5 = zext i8 %0 to i32 - %arrayidx1 = getelementptr inbounds i8, i8* %b, i32 %i.07 - %1 = load i8, i8* %arrayidx1, align 1 - %conv26 = zext i8 %1 to i32 - %add = add nsw i32 %conv26, %conv5 - %conv3 = trunc i32 %add to i8 - %arrayidx4 = getelementptr inbounds i8, i8* %c, i32 %i.07 - store i8 %conv3, i8* %arrayidx4, align 1 - %inc1 = or i32 %i.07, 1 - %arrayidx.1 = getelementptr inbounds i8, i8* %a, i32 %inc1 - %2 = load i8, i8* %arrayidx.1, align 1 - %conv5.1 = zext i8 %2 to i32 - %arrayidx1.1 = getelementptr inbounds i8, i8* %b, i32 %inc1 - %3 = load i8, i8* %arrayidx1.1, align 1 - %conv26.1 = zext i8 %3 to i32 - %add.1 = add nsw i32 %conv26.1, %conv5.1 - %conv3.1 = trunc i32 %add.1 to i8 - %arrayidx4.1 = getelementptr inbounds i8, i8* %c, i32 %inc1 - store i8 %conv3.1, i8* %arrayidx4.1, align 1 - %inc.12 = or i32 %i.07, 2 - %arrayidx.2 = getelementptr inbounds i8, i8* %a, i32 %inc.12 - %4 = load i8, i8* %arrayidx.2, align 1 - %conv5.2 = zext i8 %4 to i32 - %arrayidx1.2 = getelementptr inbounds i8, i8* %b, i32 %inc.12 - %5 = load i8, i8* %arrayidx1.2, align 1 - %conv26.2 = zext i8 %5 to i32 - %add.2 = add nsw i32 %conv26.2, %conv5.2 - %conv3.2 = trunc i32 %add.2 to i8 - %arrayidx4.2 = getelementptr inbounds i8, i8* %c, i32 %inc.12 - store i8 %conv3.2, i8* %arrayidx4.2, align 1 - %inc.23 = or i32 %i.07, 3 - %arrayidx.3 = getelementptr inbounds i8, i8* %a, i32 %inc.23 - %6 = load i8, i8* %arrayidx.3, align 1 - %conv5.3 = zext i8 %6 to i32 - %arrayidx1.3 = getelementptr inbounds i8, i8* %b, i32 %inc.23 - %7 = load i8, i8* %arrayidx1.3, align 1 - %conv26.3 = zext i8 %7 to i32 - %add.3 = add nsw i32 %conv26.3, %conv5.3 - %conv3.3 = trunc i32 %add.3 to i8 - %arrayidx4.3 = getelementptr inbounds i8, i8* %c, i32 %inc.23 - store i8 %conv3.3, i8* %arrayidx4.3, align 1 - %inc.3 = add nsw i32 %i.07, 4 - %exitcond.3 = icmp eq i32 %inc.3, 400 - br i1 %exitcond.3, label %for.end, label %for.body - -for.end: ; preds = %for.body - ret void -} - -; @multioper tests instructions with multiple IV user operands. We -; should be able to chain them independent of each other. - -define void @multioper(i32* %a, i32 %n) nounwind { -; X64-LABEL: multioper: -; X64: # %bb.0: # %entry -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB4_1: # %for.body -; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movl %eax, (%rdi,%rax,4) -; X64-NEXT: leal 1(%rax), %ecx -; X64-NEXT: movl %ecx, 4(%rdi,%rax,4) -; X64-NEXT: leal 2(%rax), %ecx -; X64-NEXT: movl %ecx, 8(%rdi,%rax,4) -; X64-NEXT: leal 3(%rax), %ecx -; X64-NEXT: movl %ecx, 12(%rdi,%rax,4) -; X64-NEXT: addq $4, %rax -; X64-NEXT: cmpl %esi, %eax -; X64-NEXT: jl .LBB4_1 -; X64-NEXT: # %bb.2: # %exit -; X64-NEXT: retq -; -; X32-LABEL: multioper: -; X32: # %bb.0: # %entry -; X32-NEXT: pushl %esi -; X32-NEXT: xorl %eax, %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: .p2align 4, 0x90 -; X32-NEXT: .LBB4_1: # %for.body -; X32-NEXT: # =>This Inner Loop Header: Depth=1 -; X32-NEXT: movl %eax, (%edx,%eax,4) -; X32-NEXT: leal 1(%eax), %esi -; X32-NEXT: movl %esi, 4(%edx,%eax,4) -; X32-NEXT: leal 2(%eax), %esi -; X32-NEXT: movl %esi, 8(%edx,%eax,4) -; X32-NEXT: leal 3(%eax), %esi -; X32-NEXT: movl %esi, 12(%edx,%eax,4) -; X32-NEXT: addl $4, %eax -; X32-NEXT: cmpl %ecx, %eax -; X32-NEXT: jl .LBB4_1 -; X32-NEXT: # %bb.2: # %exit -; X32-NEXT: popl %esi -; X32-NEXT: retl -entry: - br label %for.body - -for.body: - %p = phi i32* [ %p.next, %for.body ], [ %a, %entry ] - %i = phi i32 [ %inc4, %for.body ], [ 0, %entry ] - store i32 %i, i32* %p, align 4 - %inc1 = or i32 %i, 1 - %add.ptr.i1 = getelementptr inbounds i32, i32* %p, i32 1 - store i32 %inc1, i32* %add.ptr.i1, align 4 - %inc2 = add nsw i32 %i, 2 - %add.ptr.i2 = getelementptr inbounds i32, i32* %p, i32 2 - store i32 %inc2, i32* %add.ptr.i2, align 4 - %inc3 = add nsw i32 %i, 3 - %add.ptr.i3 = getelementptr inbounds i32, i32* %p, i32 3 - store i32 %inc3, i32* %add.ptr.i3, align 4 - %p.next = getelementptr inbounds i32, i32* %p, i32 4 - %inc4 = add nsw i32 %i, 4 - %cmp = icmp slt i32 %inc4, %n - br i1 %cmp, label %for.body, label %exit - -exit: - ret void -} - -; @testCmpZero has a ICmpZero LSR use that should not be hidden from -; LSR. Profitable chains should have more than one nonzero increment -; anyway. - -define void @testCmpZero(i8* %src, i8* %dst, i32 %srcidx, i32 %dstidx, i32 %len) nounwind ssp { -; X64-LABEL: testCmpZero: -; X64: # %bb.0: # %entry -; X64-NEXT: movslq %edx, %rdx -; X64-NEXT: addq %rdx, %rdi -; X64-NEXT: movslq %ecx, %r9 -; X64-NEXT: addq %rsi, %r9 -; X64-NEXT: addl %edx, %r8d -; X64-NEXT: movslq %r8d, %rcx -; X64-NEXT: subq %rdx, %rcx -; X64-NEXT: xorl %edx, %edx -; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB5_1: # %for.body82.us -; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movzbl (%r9,%rdx,4), %eax -; X64-NEXT: movb %al, (%rdi,%rdx) -; X64-NEXT: incq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB5_1 -; X64-NEXT: # %bb.2: # %return -; X64-NEXT: retq -; -; X32-LABEL: testCmpZero: -; X32: # %bb.0: # %entry -; X32-NEXT: pushl %ebx -; X32-NEXT: pushl %esi -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: addl {{[0-9]+}}(%esp), %edx -; X32-NEXT: xorl %esi, %esi -; X32-NEXT: .p2align 4, 0x90 -; X32-NEXT: .LBB5_1: # %for.body82.us -; X32-NEXT: # =>This Inner Loop Header: Depth=1 -; X32-NEXT: movzbl (%edx,%esi,4), %ebx -; X32-NEXT: movb %bl, (%ecx,%esi) -; X32-NEXT: incl %esi -; X32-NEXT: cmpl %esi, %eax -; X32-NEXT: jne .LBB5_1 -; X32-NEXT: # %bb.2: # %return -; X32-NEXT: popl %esi -; X32-NEXT: popl %ebx -; X32-NEXT: retl -entry: - %dest0 = getelementptr inbounds i8, i8* %src, i32 %srcidx - %source0 = getelementptr inbounds i8, i8* %dst, i32 %dstidx - %add.ptr79.us.sum = add i32 %srcidx, %len - %lftr.limit = getelementptr i8, i8* %src, i32 %add.ptr79.us.sum - br label %for.body82.us - -for.body82.us: - %dest = phi i8* [ %dest0, %entry ], [ %incdec.ptr91.us, %for.body82.us ] - %source = phi i8* [ %source0, %entry ], [ %add.ptr83.us, %for.body82.us ] - %0 = bitcast i8* %source to i32* - %1 = load i32, i32* %0, align 4 - %trunc = trunc i32 %1 to i8 - %add.ptr83.us = getelementptr inbounds i8, i8* %source, i32 4 - %incdec.ptr91.us = getelementptr inbounds i8, i8* %dest, i32 1 - store i8 %trunc, i8* %dest, align 1 - %exitcond = icmp eq i8* %incdec.ptr91.us, %lftr.limit - br i1 %exitcond, label %return, label %for.body82.us - -return: - ret void -} diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll deleted file mode 100644 index 7925bf01020..00000000000 --- a/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll +++ /dev/null @@ -1,96 +0,0 @@ -; REQUIRES: asserts -; RUN: llc < %s -O3 -march=x86-64 -mcpu=core2 -stress-ivchain | FileCheck %s -check-prefix=X64 -; RUN: llc < %s -O3 -march=x86 -mcpu=core2 -stress-ivchain | FileCheck %s -check-prefix=X32 - -; @sharedidx is an unrolled variant of this loop: -; for (unsigned long i = 0; i < len; i += s) { -; c[i] = a[i] + b[i]; -; } -; where 's' cannot be folded into the addressing mode. -; -; This is not quite profitable to chain. But with -stress-ivchain, we -; can form three address chains in place of the shared induction -; variable. - -; X64: sharedidx: -; X64: %for.body.preheader -; X64-NOT: leal ({{.*}},4) -; X64: %for.body.1 - -; X32: sharedidx: -; X32: %for.body.2 -; X32: add -; X32: add -; X32: add -; X32: add -; X32: add -; X32: %for.body.3 -define void @sharedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c, i32 %s, i32 %len) nounwind ssp { -entry: - %cmp8 = icmp eq i32 %len, 0 - br i1 %cmp8, label %for.end, label %for.body - -for.body: ; preds = %entry, %for.body.3 - %i.09 = phi i32 [ %add5.3, %for.body.3 ], [ 0, %entry ] - %arrayidx = getelementptr inbounds i8, i8* %a, i32 %i.09 - %0 = load i8, i8* %arrayidx, align 1 - %conv6 = zext i8 %0 to i32 - %arrayidx1 = getelementptr inbounds i8, i8* %b, i32 %i.09 - %1 = load i8, i8* %arrayidx1, align 1 - %conv27 = zext i8 %1 to i32 - %add = add nsw i32 %conv27, %conv6 - %conv3 = trunc i32 %add to i8 - %arrayidx4 = getelementptr inbounds i8, i8* %c, i32 %i.09 - store i8 %conv3, i8* %arrayidx4, align 1 - %add5 = add i32 %i.09, %s - %cmp = icmp ult i32 %add5, %len - br i1 %cmp, label %for.body.1, label %for.end - -for.end: ; preds = %for.body, %for.body.1, %for.body.2, %for.body.3, %entry - ret void - -for.body.1: ; preds = %for.body - %arrayidx.1 = getelementptr inbounds i8, i8* %a, i32 %add5 - %2 = load i8, i8* %arrayidx.1, align 1 - %conv6.1 = zext i8 %2 to i32 - %arrayidx1.1 = getelementptr inbounds i8, i8* %b, i32 %add5 - %3 = load i8, i8* %arrayidx1.1, align 1 - %conv27.1 = zext i8 %3 to i32 - %add.1 = add nsw i32 %conv27.1, %conv6.1 - %conv3.1 = trunc i32 %add.1 to i8 - %arrayidx4.1 = getelementptr inbounds i8, i8* %c, i32 %add5 - store i8 %conv3.1, i8* %arrayidx4.1, align 1 - %add5.1 = add i32 %add5, %s - %cmp.1 = icmp ult i32 %add5.1, %len - br i1 %cmp.1, label %for.body.2, label %for.end - -for.body.2: ; preds = %for.body.1 - %arrayidx.2 = getelementptr inbounds i8, i8* %a, i32 %add5.1 - %4 = load i8, i8* %arrayidx.2, align 1 - %conv6.2 = zext i8 %4 to i32 - %arrayidx1.2 = getelementptr inbounds i8, i8* %b, i32 %add5.1 - %5 = load i8, i8* %arrayidx1.2, align 1 - %conv27.2 = zext i8 %5 to i32 - %add.2 = add nsw i32 %conv27.2, %conv6.2 - %conv3.2 = trunc i32 %add.2 to i8 - %arrayidx4.2 = getelementptr inbounds i8, i8* %c, i32 %add5.1 - store i8 %conv3.2, i8* %arrayidx4.2, align 1 - %add5.2 = add i32 %add5.1, %s - %cmp.2 = icmp ult i32 %add5.2, %len - br i1 %cmp.2, label %for.body.3, label %for.end - -for.body.3: ; preds = %for.body.2 - %arrayidx.3 = getelementptr inbounds i8, i8* %a, i32 %add5.2 - %6 = load i8, i8* %arrayidx.3, align 1 - %conv6.3 = zext i8 %6 to i32 - %arrayidx1.3 = getelementptr inbounds i8, i8* %b, i32 %add5.2 - %7 = load i8, i8* %arrayidx1.3, align 1 - %conv27.3 = zext i8 %7 to i32 - %add.3 = add nsw i32 %conv27.3, %conv6.3 - %conv3.3 = trunc i32 %add.3 to i8 - %arrayidx4.3 = getelementptr inbounds i8, i8* %c, i32 %add5.2 - store i8 %conv3.3, i8* %arrayidx4.3, align 1 - %add5.3 = add i32 %add5.2, %s - %cmp.3 = icmp ult i32 %add5.3, %len - br i1 %cmp.3, label %for.body, label %for.end -} diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/lit.local.cfg b/llvm/test/Transforms/LoopStrengthReduce/X86/lit.local.cfg deleted file mode 100644 index e71f3cc4c41..00000000000 --- a/llvm/test/Transforms/LoopStrengthReduce/X86/lit.local.cfg +++ /dev/null @@ -1,3 +0,0 @@ -if not 'X86' in config.root.targets: - config.unsupported = True - diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/lsr-expand-quadratic.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/lsr-expand-quadratic.ll deleted file mode 100644 index deca954fea7..00000000000 --- a/llvm/test/Transforms/LoopStrengthReduce/X86/lsr-expand-quadratic.ll +++ /dev/null @@ -1,58 +0,0 @@ -; REQUIRES: x86-registered-target -; RUN: opt -loop-reduce -S < %s | FileCheck %s - -; Strength reduction analysis here relies on IV Users analysis, that -; only finds users among instructions with types that are treated as -; legal by the data layout. When running this test on pure non-x86 -; configs (for example, ARM 64), it gets confused with the target -; triple and uses a default data layout instead. This default layout -; does not have any legal types (even i32), so the transformation -; does not happen. - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" -target triple = "x86_64-apple-macosx" - -; PR15470: LSR miscompile. The test2 function should return '1'. -; -; SCEV expander cannot expand quadratic recurrences outside of the -; loop. This recurrence depends on %sub.us, so can't be expanded. -; We cannot fold SCEVUnknown (sub.us) with recurrences since it is -; declared after the loop. -; -; CHECK-LABEL: @test2 -; CHECK-LABEL: test2.loop: -; CHECK: %lsr.iv1 = phi i32 [ %lsr.iv.next2, %test2.loop ], [ -16777216, %entry ] -; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %test2.loop ], [ 1, %entry ] -; CHECK: %lsr.iv.next = add nsw i32 %lsr.iv, -1 -; CHECK: %lsr.iv.next2 = add nsw i32 %lsr.iv1, 16777216 -; -; CHECK-LABEL: for.end: -; CHECK: %tobool.us = icmp eq i32 %lsr.iv.next, 0 -; CHECK: %sub.us = select i1 %tobool.us, i32 0, i32 0 -; CHECK: %0 = sub i32 0, %sub.us -; CHECK: %1 = sub i32 %0, %lsr.iv.next -; CHECK: %sext.us = mul i32 %lsr.iv.next2, %1 -; CHECK: %f = ashr i32 %sext.us, 24 -; CHECK: ret i32 %f -define i32 @test2() { -entry: - br label %test2.loop - -test2.loop: - %inc1115.us = phi i32 [ 0, %entry ], [ %inc11.us, %test2.loop ] - %inc11.us = add nsw i32 %inc1115.us, 1 - %cmp.us = icmp slt i32 %inc11.us, 2 - br i1 %cmp.us, label %test2.loop, label %for.end - -for.end: - %tobool.us = icmp eq i32 %inc1115.us, 0 - %sub.us = select i1 %tobool.us, i32 0, i32 0 - %mul.us = shl i32 %inc1115.us, 24 - %sub.cond.us = sub nsw i32 %inc1115.us, %sub.us - %sext.us = mul i32 %mul.us, %sub.cond.us - %f = ashr i32 %sext.us, 24 - br label %exit - -exit: - ret i32 %f -} diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/lsr-filtering-scaledreg.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/lsr-filtering-scaledreg.ll deleted file mode 100644 index 4ce6f1a79fb..00000000000 --- a/llvm/test/Transforms/LoopStrengthReduce/X86/lsr-filtering-scaledreg.ll +++ /dev/null @@ -1,60 +0,0 @@ -; RUN: opt < %s -loop-reduce -lsr-filter-same-scaled-reg=true -mtriple=x86_64-unknown-linux-gnu -S | FileCheck %s - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - -%struct.ham = type { i8, i8, [5 x i32], i64, i64, i64 } - -@global = external local_unnamed_addr global %struct.ham, align 8 - -define void @foo() local_unnamed_addr { -bb: - %tmp = load i64, i64* getelementptr inbounds (%struct.ham, %struct.ham* @global, i64 0, i32 3), align 8 - %tmp1 = and i64 %tmp, 1792 - %tmp2 = load i64, i64* getelementptr inbounds (%struct.ham, %struct.ham* @global, i64 0, i32 4), align 8 - %tmp3 = add i64 %tmp1, %tmp2 - %tmp4 = load i8*, i8** null, align 8 - %tmp5 = getelementptr inbounds i8, i8* %tmp4, i64 0 - %tmp6 = sub i64 0, %tmp3 - %tmp7 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp6 - %tmp8 = inttoptr i64 0 to i8* - br label %bb9 - -; Without filtering non-optimal formulae with the same ScaledReg and Scale, the strategy -; to narrow LSR search space by picking winner reg will generate only one lsr.iv and -; unoptimal result. -; CHECK-LABEL: @foo( -; CHECK: bb9: -; CHECK-NEXT: = phi i8* -; CHECK-NEXT: = phi i8* - -bb9: ; preds = %bb12, %bb - %tmp10 = phi i8* [ %tmp7, %bb ], [ %tmp16, %bb12 ] - %tmp11 = phi i8* [ %tmp8, %bb ], [ %tmp17, %bb12 ] - br i1 false, label %bb18, label %bb12 - -bb12: ; preds = %bb9 - %tmp13 = getelementptr inbounds i8, i8* %tmp10, i64 8 - %tmp14 = bitcast i8* %tmp13 to i64* - %tmp15 = load i64, i64* %tmp14, align 1 - %tmp16 = getelementptr inbounds i8, i8* %tmp10, i64 16 - %tmp17 = getelementptr inbounds i8, i8* %tmp11, i64 16 - br label %bb9 - -bb18: ; preds = %bb9 - %tmp19 = icmp ugt i8* %tmp11, null - %tmp20 = getelementptr inbounds i8, i8* %tmp10, i64 8 - %tmp21 = getelementptr inbounds i8, i8* %tmp11, i64 8 - %tmp22 = select i1 %tmp19, i8* %tmp10, i8* %tmp20 - %tmp23 = select i1 %tmp19, i8* %tmp11, i8* %tmp21 - br label %bb24 - -bb24: ; preds = %bb24, %bb18 - %tmp25 = phi i8* [ %tmp27, %bb24 ], [ %tmp22, %bb18 ] - %tmp26 = phi i8* [ %tmp29, %bb24 ], [ %tmp23, %bb18 ] - %tmp27 = getelementptr inbounds i8, i8* %tmp25, i64 1 - %tmp28 = load i8, i8* %tmp25, align 1 - %tmp29 = getelementptr inbounds i8, i8* %tmp26, i64 1 - store i8 %tmp28, i8* %tmp26, align 1 - %tmp30 = icmp eq i8* %tmp29, %tmp5 - br label %bb24 -} diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/lsr-insns-1.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/lsr-insns-1.ll deleted file mode 100644 index b96dc62a29f..00000000000 --- a/llvm/test/Transforms/LoopStrengthReduce/X86/lsr-insns-1.ll +++ /dev/null @@ -1,101 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -loop-reduce -mtriple=x86_64 -S | FileCheck %s -check-prefix=BOTH -check-prefix=INSN -; RUN: opt < %s -loop-reduce -mtriple=x86_64 -lsr-insns-cost=false -S | FileCheck %s -check-prefix=BOTH -check-prefix=REGS -; RUN: llc < %s -O2 -mtriple=x86_64-unknown-unknown -lsr-insns-cost | FileCheck %s - -; OPT test checks that LSR optimize compare for static counter to compare with 0. - -; LLC test checks that LSR optimize compare for static counter. -; That means that instead of creating the following: -; movl %ecx, (%rdx,%rax,4) -; incq %rax -; cmpq $1024, %rax -; LSR should optimize out cmp: -; movl %ecx, 4096(%rdx,%rax) -; addq $4, %rax -; or -; movl %ecx, 4096(%rdx,%rax,4) -; incq %rax - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - -define void @foo(i32* nocapture readonly %x, i32* nocapture readonly %y, i32* nocapture %q) { -; INSN-LABEL: @foo( -; INSN-NEXT: entry: -; INSN-NEXT: [[Q1:%.*]] = bitcast i32* [[Q:%.*]] to i8* -; INSN-NEXT: [[Y3:%.*]] = bitcast i32* [[Y:%.*]] to i8* -; INSN-NEXT: [[X7:%.*]] = bitcast i32* [[X:%.*]] to i8* -; INSN-NEXT: br label [[FOR_BODY:%.*]] -; INSN: for.cond.cleanup: -; INSN-NEXT: ret void -; INSN: for.body: -; INSN-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY]] ], [ -4096, [[ENTRY:%.*]] ] -; INSN-NEXT: [[UGLYGEP8:%.*]] = getelementptr i8, i8* [[X7]], i64 [[LSR_IV]] -; INSN-NEXT: [[UGLYGEP89:%.*]] = bitcast i8* [[UGLYGEP8]] to i32* -; INSN-NEXT: [[SCEVGEP10:%.*]] = getelementptr i32, i32* [[UGLYGEP89]], i64 1024 -; INSN-NEXT: [[TMP:%.*]] = load i32, i32* [[SCEVGEP10]], align 4 -; INSN-NEXT: [[UGLYGEP4:%.*]] = getelementptr i8, i8* [[Y3]], i64 [[LSR_IV]] -; INSN-NEXT: [[UGLYGEP45:%.*]] = bitcast i8* [[UGLYGEP4]] to i32* -; INSN-NEXT: [[SCEVGEP6:%.*]] = getelementptr i32, i32* [[UGLYGEP45]], i64 1024 -; INSN-NEXT: [[TMP1:%.*]] = load i32, i32* [[SCEVGEP6]], align 4 -; INSN-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP]] -; INSN-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[Q1]], i64 [[LSR_IV]] -; INSN-NEXT: [[UGLYGEP2:%.*]] = bitcast i8* [[UGLYGEP]] to i32* -; INSN-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[UGLYGEP2]], i64 1024 -; INSN-NEXT: store i32 [[ADD]], i32* [[SCEVGEP]], align 4 -; INSN-NEXT: [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], 4 -; INSN-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0 -; INSN-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] -; -; REGS-LABEL: @foo( -; REGS-NEXT: entry: -; REGS-NEXT: br label [[FOR_BODY:%.*]] -; REGS: for.cond.cleanup: -; REGS-NEXT: ret void -; REGS: for.body: -; REGS-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; REGS-NEXT: [[SCEVGEP2:%.*]] = getelementptr i32, i32* [[X:%.*]], i64 [[INDVARS_IV]] -; REGS-NEXT: [[TMP:%.*]] = load i32, i32* [[SCEVGEP2]], align 4 -; REGS-NEXT: [[SCEVGEP1:%.*]] = getelementptr i32, i32* [[Y:%.*]], i64 [[INDVARS_IV]] -; REGS-NEXT: [[TMP1:%.*]] = load i32, i32* [[SCEVGEP1]], align 4 -; REGS-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP]] -; REGS-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[Q:%.*]], i64 [[INDVARS_IV]] -; REGS-NEXT: store i32 [[ADD]], i32* [[SCEVGEP]], align 4 -; REGS-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; REGS-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024 -; REGS-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] -; -; CHECK-LABEL: foo: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_1: # %for.body -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: movl 4096(%rsi,%rax), %ecx -; CHECK-NEXT: addl 4096(%rdi,%rax), %ecx -; CHECK-NEXT: movl %ecx, 4096(%rdx,%rax) -; CHECK-NEXT: addq $4, %rax -; CHECK-NEXT: jne .LBB0_1 -; CHECK-NEXT: # %bb.2: # %for.cond.cleanup -; CHECK-NEXT: retq -entry: - br label %for.body - -for.cond.cleanup: ; preds = %for.body - ret void - -for.body: ; preds = %for.body, %entry - %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %arrayidx = getelementptr inbounds i32, i32* %x, i64 %indvars.iv - %tmp = load i32, i32* %arrayidx, align 4 - %arrayidx2 = getelementptr inbounds i32, i32* %y, i64 %indvars.iv - %tmp1 = load i32, i32* %arrayidx2, align 4 - %add = add nsw i32 %tmp1, %tmp - %arrayidx4 = getelementptr inbounds i32, i32* %q, i64 %indvars.iv - store i32 %add, i32* %arrayidx4, align 4 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %exitcond = icmp eq i64 %indvars.iv.next, 1024 - br i1 %exitcond, label %for.cond.cleanup, label %for.body -} - diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/lsr-insns-2.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/lsr-insns-2.ll deleted file mode 100644 index 239cc023350..00000000000 --- a/llvm/test/Transforms/LoopStrengthReduce/X86/lsr-insns-2.ll +++ /dev/null @@ -1,58 +0,0 @@ -; RUN: opt < %s -loop-reduce -mtriple=x86_64 -S | FileCheck %s -check-prefix=BOTH -check-prefix=INSN -; RUN: opt < %s -loop-reduce -mtriple=x86_64 -lsr-insns-cost=false -S | FileCheck %s -check-prefix=BOTH -check-prefix=REGS -; RUN: llc < %s -O2 -march=x86-64 -lsr-insns-cost -asm-verbose=0 | FileCheck %s - -; OPT checks that LSR prefers less instructions to less registers. -; For x86 LSR should prefer complicated address to new lsr induction -; variables. - -; BOTH: for.body: -; INSN: getelementptr i32, i32* %x, i64 %indvars.iv -; INSN: getelementptr i32, i32* %y, i64 %indvars.iv -; INSN: getelementptr i32, i32* %q, i64 %indvars.iv -; REGS %lsr.iv4 = phi -; REGS %lsr.iv2 = phi -; REGS %lsr.iv1 = phi -; REGS: getelementptr i32, i32* %lsr.iv1, i64 1 -; REGS: getelementptr i32, i32* %lsr.iv2, i64 1 -; REGS: getelementptr i32, i32* %lsr.iv4, i64 1 - -; LLC checks that LSR prefers less instructions to less registers. -; LSR should prefer complicated address to additonal add instructions. - -; CHECK: LBB0_2: -; CHECK-NEXT: movl (%r{{.+}}, -; CHECK-NEXT: addl (%r{{.+}}, -; CHECK-NEXT: movl %e{{.+}}, (%r{{.+}}, - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - -; Function Attrs: norecurse nounwind uwtable -define void @foo(i32* nocapture readonly %x, i32* nocapture readonly %y, i32* nocapture %q, i32 %n) { -entry: - %cmp10 = icmp sgt i32 %n, 0 - br i1 %cmp10, label %for.body.preheader, label %for.cond.cleanup - -for.body.preheader: ; preds = %entry - %wide.trip.count = zext i32 %n to i64 - br label %for.body - -for.cond.cleanup.loopexit: ; preds = %for.body - br label %for.cond.cleanup - -for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry - ret void - -for.body: ; preds = %for.body, %for.body.preheader - %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] - %arrayidx = getelementptr inbounds i32, i32* %x, i64 %indvars.iv - %tmp = load i32, i32* %arrayidx, align 4 - %arrayidx2 = getelementptr inbounds i32, i32* %y, i64 %indvars.iv - %tmp1 = load i32, i32* %arrayidx2, align 4 - %add = add nsw i32 %tmp1, %tmp - %arrayidx4 = getelementptr inbounds i32, i32* %q, i64 %indvars.iv - store i32 %add, i32* %arrayidx4, align 4 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count - br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body -} diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/lsr-overflow.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/lsr-overflow.ll deleted file mode 100644 index 0b71d92bf2a..00000000000 --- a/llvm/test/Transforms/LoopStrengthReduce/X86/lsr-overflow.ll +++ /dev/null @@ -1,38 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -lsr-complexity-limit=50 -loop-reduce -S %s | FileCheck %s - -target triple = "x86_64-apple-macosx10.14.0" -target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" - -define void @overflow1(i64 %a) { -; CHECK-LABEL: @overflow1( -; CHECK-NEXT: bb: -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[A:%.*]], -1 -; CHECK-NEXT: br label [[BB1:%.*]] -; CHECK: bb1: -; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[BB1]] ], [ [[TMP0]], [[BB:%.*]] ] -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[LSR_IV]], -9223372036854775808 -; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP1]], -1 -; CHECK-NEXT: [[TMP5:%.*]] = and i1 [[TMP4]], true -; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], 1 -; CHECK-NEXT: br i1 [[TMP5]], label [[BB1]], label [[BB7:%.*]] -; CHECK: bb7: -; CHECK-NEXT: [[TMP9:%.*]] = and i64 [[LSR_IV_NEXT]], 1 -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 0 -; CHECK-NEXT: unreachable -; -bb: - br label %bb1 - -bb1: ; preds = %bb1, %bb - %tmp = phi i64 [ %a, %bb ], [ %tmp6, %bb1 ] - %tmp4 = icmp ne i64 %tmp, -9223372036854775808 - %tmp5 = and i1 %tmp4, 1 - %tmp6 = add i64 %tmp, 1 - br i1 %tmp5, label %bb1, label %bb7 - -bb7: ; preds = %bb1 - %tmp9 = and i64 %tmp, 1 - %tmp10 = icmp eq i64 %tmp9, 0 - unreachable -} diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/macro-fuse-cmp.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/macro-fuse-cmp.ll deleted file mode 100644 index 10a725a7ef2..00000000000 --- a/llvm/test/Transforms/LoopStrengthReduce/X86/macro-fuse-cmp.ll +++ /dev/null @@ -1,141 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: opt < %s -loop-reduce -mcpu=btver2 -S | FileCheck %s --check-prefix=JAG -; RUN: opt < %s -loop-reduce -mcpu=bdver2 -S | FileCheck %s --check-prefix=BUL -; RUN: opt < %s -loop-reduce -mcpu=haswell -S | FileCheck %s --check-prefix=HSW - -; RUN: llc < %s | FileCheck %s --check-prefix=BASE -; RUN: llc < %s -mattr=macrofusion | FileCheck %s --check-prefix=FUSE -; RUN: llc < %s -mattr=branchfusion | FileCheck %s --check-prefix=FUSE - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-unknown" - -; PR35681 - https://bugs.llvm.org/show_bug.cgi?id=35681 -; FIXME: If a CPU can macro-fuse a compare and branch, then we discount that -; cost in LSR and avoid generating large offsets in each memory access. -; This reduces code size and may improve decode throughput. - -define void @maxArray(double* noalias nocapture %x, double* noalias nocapture readonly %y) { -; JAG-LABEL: @maxArray( -; JAG-NEXT: entry: -; JAG-NEXT: [[Y1:%.*]] = bitcast double* [[Y:%.*]] to i8* -; JAG-NEXT: [[X3:%.*]] = bitcast double* [[X:%.*]] to i8* -; JAG-NEXT: br label [[VECTOR_BODY:%.*]] -; JAG: vector.body: -; JAG-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[VECTOR_BODY]] ], [ -524288, [[ENTRY:%.*]] ] -; JAG-NEXT: [[UGLYGEP7:%.*]] = getelementptr i8, i8* [[X3]], i64 [[LSR_IV]] -; JAG-NEXT: [[UGLYGEP78:%.*]] = bitcast i8* [[UGLYGEP7]] to <2 x double>* -; JAG-NEXT: [[SCEVGEP9:%.*]] = getelementptr <2 x double>, <2 x double>* [[UGLYGEP78]], i64 32768 -; JAG-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[Y1]], i64 [[LSR_IV]] -; JAG-NEXT: [[UGLYGEP2:%.*]] = bitcast i8* [[UGLYGEP]] to <2 x double>* -; JAG-NEXT: [[SCEVGEP:%.*]] = getelementptr <2 x double>, <2 x double>* [[UGLYGEP2]], i64 32768 -; JAG-NEXT: [[XVAL:%.*]] = load <2 x double>, <2 x double>* [[SCEVGEP9]], align 8 -; JAG-NEXT: [[YVAL:%.*]] = load <2 x double>, <2 x double>* [[SCEVGEP]], align 8 -; JAG-NEXT: [[CMP:%.*]] = fcmp ogt <2 x double> [[YVAL]], [[XVAL]] -; JAG-NEXT: [[MAX:%.*]] = select <2 x i1> [[CMP]], <2 x double> [[YVAL]], <2 x double> [[XVAL]] -; JAG-NEXT: [[UGLYGEP4:%.*]] = getelementptr i8, i8* [[X3]], i64 [[LSR_IV]] -; JAG-NEXT: [[UGLYGEP45:%.*]] = bitcast i8* [[UGLYGEP4]] to <2 x double>* -; JAG-NEXT: [[SCEVGEP6:%.*]] = getelementptr <2 x double>, <2 x double>* [[UGLYGEP45]], i64 32768 -; JAG-NEXT: store <2 x double> [[MAX]], <2 x double>* [[SCEVGEP6]], align 8 -; JAG-NEXT: [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], 16 -; JAG-NEXT: [[DONE:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0 -; JAG-NEXT: br i1 [[DONE]], label [[EXIT:%.*]], label [[VECTOR_BODY]] -; JAG: exit: -; JAG-NEXT: ret void -; -; BUL-LABEL: @maxArray( -; BUL-NEXT: entry: -; BUL-NEXT: br label [[VECTOR_BODY:%.*]] -; BUL: vector.body: -; BUL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; BUL-NEXT: [[SCEVGEP4:%.*]] = getelementptr double, double* [[X:%.*]], i64 [[INDEX]] -; BUL-NEXT: [[SCEVGEP45:%.*]] = bitcast double* [[SCEVGEP4]] to <2 x double>* -; BUL-NEXT: [[SCEVGEP:%.*]] = getelementptr double, double* [[Y:%.*]], i64 [[INDEX]] -; BUL-NEXT: [[SCEVGEP1:%.*]] = bitcast double* [[SCEVGEP]] to <2 x double>* -; BUL-NEXT: [[XVAL:%.*]] = load <2 x double>, <2 x double>* [[SCEVGEP45]], align 8 -; BUL-NEXT: [[YVAL:%.*]] = load <2 x double>, <2 x double>* [[SCEVGEP1]], align 8 -; BUL-NEXT: [[CMP:%.*]] = fcmp ogt <2 x double> [[YVAL]], [[XVAL]] -; BUL-NEXT: [[MAX:%.*]] = select <2 x i1> [[CMP]], <2 x double> [[YVAL]], <2 x double> [[XVAL]] -; BUL-NEXT: [[SCEVGEP2:%.*]] = getelementptr double, double* [[X]], i64 [[INDEX]] -; BUL-NEXT: [[SCEVGEP23:%.*]] = bitcast double* [[SCEVGEP2]] to <2 x double>* -; BUL-NEXT: store <2 x double> [[MAX]], <2 x double>* [[SCEVGEP23]], align 8 -; BUL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 -; BUL-NEXT: [[DONE:%.*]] = icmp eq i64 [[INDEX_NEXT]], 65536 -; BUL-NEXT: br i1 [[DONE]], label [[EXIT:%.*]], label [[VECTOR_BODY]] -; BUL: exit: -; BUL-NEXT: ret void -; -; HSW-LABEL: @maxArray( -; HSW-NEXT: entry: -; HSW-NEXT: br label [[VECTOR_BODY:%.*]] -; HSW: vector.body: -; HSW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; HSW-NEXT: [[SCEVGEP4:%.*]] = getelementptr double, double* [[X:%.*]], i64 [[INDEX]] -; HSW-NEXT: [[SCEVGEP45:%.*]] = bitcast double* [[SCEVGEP4]] to <2 x double>* -; HSW-NEXT: [[SCEVGEP:%.*]] = getelementptr double, double* [[Y:%.*]], i64 [[INDEX]] -; HSW-NEXT: [[SCEVGEP1:%.*]] = bitcast double* [[SCEVGEP]] to <2 x double>* -; HSW-NEXT: [[XVAL:%.*]] = load <2 x double>, <2 x double>* [[SCEVGEP45]], align 8 -; HSW-NEXT: [[YVAL:%.*]] = load <2 x double>, <2 x double>* [[SCEVGEP1]], align 8 -; HSW-NEXT: [[CMP:%.*]] = fcmp ogt <2 x double> [[YVAL]], [[XVAL]] -; HSW-NEXT: [[MAX:%.*]] = select <2 x i1> [[CMP]], <2 x double> [[YVAL]], <2 x double> [[XVAL]] -; HSW-NEXT: [[SCEVGEP2:%.*]] = getelementptr double, double* [[X]], i64 [[INDEX]] -; HSW-NEXT: [[SCEVGEP23:%.*]] = bitcast double* [[SCEVGEP2]] to <2 x double>* -; HSW-NEXT: store <2 x double> [[MAX]], <2 x double>* [[SCEVGEP23]], align 8 -; HSW-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 -; HSW-NEXT: [[DONE:%.*]] = icmp eq i64 [[INDEX_NEXT]], 65536 -; HSW-NEXT: br i1 [[DONE]], label [[EXIT:%.*]], label [[VECTOR_BODY]] -; HSW: exit: -; HSW-NEXT: ret void -; -; BASE-LABEL: maxArray: -; BASE: # %bb.0: # %entry -; BASE-NEXT: movq $-524288, %rax # imm = 0xFFF80000 -; BASE-NEXT: .p2align 4, 0x90 -; BASE-NEXT: .LBB0_1: # %vector.body -; BASE-NEXT: # =>This Inner Loop Header: Depth=1 -; BASE-NEXT: movupd 524288(%rdi,%rax), %xmm0 -; BASE-NEXT: movupd 524288(%rsi,%rax), %xmm1 -; BASE-NEXT: maxpd %xmm0, %xmm1 -; BASE-NEXT: movupd %xmm1, 524288(%rdi,%rax) -; BASE-NEXT: addq $16, %rax -; BASE-NEXT: jne .LBB0_1 -; BASE-NEXT: # %bb.2: # %exit -; BASE-NEXT: retq -; FUSE-LABEL: maxArray: -; FUSE: # %bb.0: # %entry -; FUSE-NEXT: xorl %eax, %eax -; FUSE-NEXT: .p2align 4, 0x90 -; FUSE-NEXT: .LBB0_1: # %vector.body -; FUSE-NEXT: # =>This Inner Loop Header: Depth=1 -; FUSE-NEXT: movupd (%rdi,%rax,8), %xmm0 -; FUSE-NEXT: movupd (%rsi,%rax,8), %xmm1 -; FUSE-NEXT: maxpd %xmm0, %xmm1 -; FUSE-NEXT: movupd %xmm1, (%rdi,%rax,8) -; FUSE-NEXT: addq $2, %rax -; FUSE-NEXT: cmpq $65536, %rax # imm = 0x10000 -; FUSE-NEXT: jne .LBB0_1 -; FUSE-NEXT: # %bb.2: # %exit -; FUSE-NEXT: retq -entry: - br label %vector.body - -vector.body: - %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] - %gepx = getelementptr inbounds double, double* %x, i64 %index - %gepy = getelementptr inbounds double, double* %y, i64 %index - %xptr = bitcast double* %gepx to <2 x double>* - %yptr = bitcast double* %gepy to <2 x double>* - %xval = load <2 x double>, <2 x double>* %xptr, align 8 - %yval = load <2 x double>, <2 x double>* %yptr, align 8 - %cmp = fcmp ogt <2 x double> %yval, %xval - %max = select <2 x i1> %cmp, <2 x double> %yval, <2 x double> %xval - %xptr_again = bitcast double* %gepx to <2 x double>* - store <2 x double> %max, <2 x double>* %xptr_again, align 8 - %index.next = add i64 %index, 2 - %done = icmp eq i64 %index.next, 65536 - br i1 %done, label %exit, label %vector.body - -exit: - ret void -} - diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/nested-loop.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/nested-loop.ll deleted file mode 100644 index b9af5a0c68a..00000000000 --- a/llvm/test/Transforms/LoopStrengthReduce/X86/nested-loop.ll +++ /dev/null @@ -1,93 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -loop-reduce -S < %s | FileCheck %s - -; Check when we use an outerloop induction variable inside of an innerloop -; induction value expr, LSR can still choose to use single induction variable -; for the innerloop and share it in multiple induction value exprs. - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -define void @foo(i32 %size, i32 %nsteps, i32 %hsize, i32* %lined, i8* %maxarray) { -; CHECK-LABEL: @foo( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP215:%.*]] = icmp sgt i32 [[SIZE:%.*]], 1 -; CHECK-NEXT: [[T0:%.*]] = zext i32 [[SIZE]] to i64 -; CHECK-NEXT: [[T1:%.*]] = sext i32 [[NSTEPS:%.*]] to i64 -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[T0]], -1 -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[LSR_IV1:%.*]] = phi i64 [ [[LSR_IV_NEXT2:%.*]], [[FOR_INC:%.*]] ], [ 1, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[INDVARS_IV2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT3:%.*]], [[FOR_INC]] ], [ 0, [[ENTRY]] ] -; CHECK-NEXT: br i1 [[CMP215]], label [[FOR_BODY2_PREHEADER:%.*]], label [[FOR_INC]] -; CHECK: for.body2.preheader: -; CHECK-NEXT: br label [[FOR_BODY2:%.*]] -; CHECK: for.body2: -; CHECK-NEXT: [[LSR_IV3:%.*]] = phi i8* [ [[SCEVGEP:%.*]], [[FOR_BODY2]] ], [ [[MAXARRAY:%.*]], [[FOR_BODY2_PREHEADER]] ] -; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY2]] ], [ [[TMP0]], [[FOR_BODY2_PREHEADER]] ] -; CHECK-NEXT: [[SCEVGEP6:%.*]] = getelementptr i8, i8* [[LSR_IV3]], i64 1 -; CHECK-NEXT: [[V1:%.*]] = load i8, i8* [[SCEVGEP6]], align 1 -; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, i8* [[LSR_IV3]], i64 [[TMP0]] -; CHECK-NEXT: [[V2:%.*]] = load i8, i8* [[SCEVGEP5]], align 1 -; CHECK-NEXT: [[TMPV:%.*]] = xor i8 [[V1]], [[V2]] -; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, i8* [[LSR_IV3]], i64 [[LSR_IV1]] -; CHECK-NEXT: store i8 [[TMPV]], i8* [[SCEVGEP4]], align 1 -; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -1 -; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, i8* [[LSR_IV3]], i64 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[LSR_IV_NEXT]], 0 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY2]], label [[FOR_INC_LOOPEXIT:%.*]] -; CHECK: for.inc.loopexit: -; CHECK-NEXT: br label [[FOR_INC]] -; CHECK: for.inc: -; CHECK-NEXT: [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV2]], 1 -; CHECK-NEXT: [[LSR_IV_NEXT2]] = add nuw nsw i64 [[LSR_IV1]], [[T0]] -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT3]], [[T1]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]] -; CHECK: for.end.loopexit: -; CHECK-NEXT: ret void -; -entry: - %cmp215 = icmp sgt i32 %size, 1 - %t0 = zext i32 %size to i64 - %t1 = sext i32 %nsteps to i64 - %sub2 = sub i64 %t0, 2 - br label %for.body - -for.body: ; preds = %for.inc, %entry - %indvars.iv2 = phi i64 [ %indvars.iv.next3, %for.inc ], [ 0, %entry ] - %t2 = mul nsw i64 %indvars.iv2, %t0 - br i1 %cmp215, label %for.body2.preheader, label %for.inc - -for.body2.preheader: ; preds = %for.body - br label %for.body2 - -; Check LSR only generates two induction variables for for.body2 one for compare and -; one to shared by multiple array accesses. - -for.body2: ; preds = %for.body2.preheader, %for.body2 - %indvars.iv = phi i64 [ 1, %for.body2.preheader ], [ %indvars.iv.next, %for.body2 ] - %arrayidx1 = getelementptr inbounds i8, i8* %maxarray, i64 %indvars.iv - %v1 = load i8, i8* %arrayidx1, align 1 - %idx2 = add nsw i64 %indvars.iv, %sub2 - %arrayidx2 = getelementptr inbounds i8, i8* %maxarray, i64 %idx2 - %v2 = load i8, i8* %arrayidx2, align 1 - %tmpv = xor i8 %v1, %v2 - %t4 = add nsw i64 %t2, %indvars.iv - %add.ptr = getelementptr inbounds i8, i8* %maxarray, i64 %t4 - store i8 %tmpv, i8* %add.ptr, align 1 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %wide.trip.count = zext i32 %size to i64 - %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count - br i1 %exitcond, label %for.body2, label %for.inc.loopexit - -for.inc.loopexit: ; preds = %for.body2 - br label %for.inc - -for.inc: ; preds = %for.inc.loopexit, %for.body - %indvars.iv.next3 = add nuw nsw i64 %indvars.iv2, 1 - %cmp = icmp slt i64 %indvars.iv.next3, %t1 - br i1 %cmp, label %for.body, label %for.end.loopexit - -for.end.loopexit: ; preds = %for.inc - ret void -} diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/no_superflous_induction_vars.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/no_superflous_induction_vars.ll deleted file mode 100644 index a6613c53d78..00000000000 --- a/llvm/test/Transforms/LoopStrengthReduce/X86/no_superflous_induction_vars.ll +++ /dev/null @@ -1,50 +0,0 @@ -; RUN: opt -S -loop-reduce -mcpu=corei7-avx -mtriple=x86_64-apple-macosx < %s | FileCheck %s - -target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" - -define void @indvar_expansion(i8* nocapture readonly %rowsptr) { -entry: - br label %for.cond - -; SCEVExpander used to create induction variables in the loop %for.cond while -; expanding the recurrence start value of loop strength reduced values from -; %vector.body. - -; CHECK-LABEL: indvar_expansion -; CHECK: for.cond: -; CHECK-NOT: phi i3 -; CHECK: br i1 {{.+}}, label %for.cond - -for.cond: - %indvars.iv44 = phi i64 [ %indvars.iv.next45, %for.cond ], [ 0, %entry ] - %cmp = icmp eq i8 undef, 0 - %indvars.iv.next45 = add nuw nsw i64 %indvars.iv44, 1 - br i1 %cmp, label %for.cond, label %for.cond2 - -for.cond2: - br i1 undef, label %for.cond2, label %for.body14.lr.ph - -for.body14.lr.ph: - %sext = shl i64 %indvars.iv44, 32 - %0 = ashr exact i64 %sext, 32 - %1 = sub i64 undef, %indvars.iv44 - %2 = and i64 %1, 4294967295 - %3 = add i64 %2, 1 - %fold = add i64 %1, 1 - %n.mod.vf = and i64 %fold, 7 - %n.vec = sub i64 %3, %n.mod.vf - %end.idx.rnd.down = add i64 %n.vec, %0 - br label %vector.body - -vector.body: - %index = phi i64 [ %index.next, %vector.body ], [ %0, %for.body14.lr.ph ] - %4 = getelementptr inbounds i8, i8* %rowsptr, i64 %index - %5 = bitcast i8* %4 to <4 x i8>* - %wide.load = load <4 x i8>, <4 x i8>* %5, align 1 - %index.next = add i64 %index, 8 - %6 = icmp eq i64 %index.next, %end.idx.rnd.down - br i1 %6, label %for.end24, label %vector.body - -for.end24: - ret void -} diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/pr17473.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/pr17473.ll deleted file mode 100644 index 5b7bb884604..00000000000 --- a/llvm/test/Transforms/LoopStrengthReduce/X86/pr17473.ll +++ /dev/null @@ -1,67 +0,0 @@ -; RUN: opt < %s -loop-reduce -S | FileCheck %s - -target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-apple-macosx10.9.0" - -; LSR shouldn't normalize IV if it can't be denormalized to the original -; expression. In this testcase, the normalized expression was denormalized to -; an expression different from the original, and we were losing sign extension. - -; CHECK: [[TMP:%[a-z]+]] = trunc i32 {{.*}} to i8 -; CHECK: {{%[a-z0-9]+}} = sext i8 [[TMP]] to i32 - -@j = common global i32 0, align 4 -@c = common global i32 0, align 4 -@g = common global i32 0, align 4 -@h = common global i8 0, align 1 -@d = common global i32 0, align 4 -@i = common global i32 0, align 4 -@e = common global i32 0, align 4 -@.str = private unnamed_addr constant [4 x i8] c"%x\0A\00", align 1 -@a = common global i32 0, align 4 -@b = common global i16 0, align 2 - -; Function Attrs: nounwind optsize ssp uwtable -define i32 @main() #0 { -entry: - store i8 0, i8* @h, align 1 - %0 = load i32, i32* @j, align 4 - %tobool.i = icmp eq i32 %0, 0 - %1 = load i32, i32* @d, align 4 - %cmp3 = icmp sgt i32 %1, -1 - %.lobit = lshr i32 %1, 31 - %.lobit.not = xor i32 %.lobit, 1 - br label %for.body - -for.body: ; preds = %entry, %fn3.exit - %inc9 = phi i8 [ 0, %entry ], [ %inc, %fn3.exit ] - %conv = sext i8 %inc9 to i32 - br i1 %tobool.i, label %fn3.exit, label %land.rhs.i - -land.rhs.i: ; preds = %for.body - store i32 0, i32* @c, align 4 - br label %fn3.exit - -fn3.exit: ; preds = %for.body, %land.rhs.i - %inc = add i8 %inc9, 1 - %cmp = icmp sgt i8 %inc, -1 - br i1 %cmp, label %for.body, label %for.end - -for.end: ; preds = %fn3.exit - %.lobit.not. = select i1 %cmp3, i32 %.lobit.not, i32 0 - store i32 %conv, i32* @g, align 4 - store i32 %.lobit.not., i32* @i, align 4 - store i8 %inc, i8* @h, align 1 - %conv7 = sext i8 %inc to i32 - %add = add nsw i32 %conv7, %conv - store i32 %add, i32* @e, align 4 - %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32 %add) #2 - ret i32 0 -} - -; Function Attrs: nounwind optsize -declare i32 @printf(i8* nocapture readonly, ...) #1 - -attributes #0 = { nounwind optsize ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind optsize } diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/pr28719.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/pr28719.ll deleted file mode 100644 index 0e74ff20073..00000000000 --- a/llvm/test/Transforms/LoopStrengthReduce/X86/pr28719.ll +++ /dev/null @@ -1,47 +0,0 @@ -; RUN: opt < %s -loop-reduce -S | FileCheck %s - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -@a = global i32 0, align 4 -@b = global i8 0, align 1 -@c = global [4 x i8] zeroinitializer, align 1 - -; Just make sure we don't generate code with uses not dominated by defs. -; CHECK-LABEL: @main( -define i32 @main() { -entry: - %a0 = load i32, i32* @a, align 4 - %cmpa = icmp slt i32 %a0, 4 - br i1 %cmpa, label %preheader, label %for.end - -preheader: - %b0 = load i8, i8* @b, align 1 - %b0sext = sext i8 %b0 to i64 - br label %for.body - -for.body: - %iv = phi i64 [ 0, %preheader ], [ %iv.next, %lor.false ] - %mul = mul nsw i64 %b0sext, %iv - %multrunc = trunc i64 %mul to i32 - %cmp = icmp eq i32 %multrunc, 0 - br i1 %cmp, label %lor.false, label %if.then - -lor.false: - %cgep = getelementptr inbounds [4 x i8], [4 x i8]* @c, i64 0, i64 %iv - %ci = load i8, i8* %cgep, align 1 - %cisext = sext i8 %ci to i32 - %ivtrunc = trunc i64 %iv to i32 - %cmp2 = icmp eq i32 %cisext, %ivtrunc - %iv.next = add i64 %iv, 1 - br i1 %cmp2, label %for.body, label %if.then - -if.then: - tail call void @abort() - unreachable - -for.end: - ret i32 0 -} - -declare void @abort() diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/pr40514.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/pr40514.ll deleted file mode 100644 index 4ffcfd8549e..00000000000 --- a/llvm/test/Transforms/LoopStrengthReduce/X86/pr40514.ll +++ /dev/null @@ -1,57 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -loop-reduce -S | FileCheck %s - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1" -target triple = "x86_64-unknown-linux-gnu" - -define i32 @pluto(i32 %arg) #0 { -; CHECK-LABEL: @pluto( -; CHECK-NEXT: bb: -; CHECK-NEXT: br label [[BB10:%.*]] -; CHECK: bb1: -; CHECK-NEXT: store i64 [[LSR_IV_NEXT2:%.*]], i64 addrspace(1)* undef, align 8 -; CHECK-NEXT: ret i32 [[LSR_IV_NEXT:%.*]] -; CHECK: bb10: -; CHECK-NEXT: [[LSR_IV1:%.*]] = phi i64 [ [[LSR_IV_NEXT2]], [[BB10]] ], [ 9, [[BB:%.*]] ] -; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT]], [[BB10]] ], [ undef, [[BB]] ] -; CHECK-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1 -; CHECK-NEXT: [[LSR_IV_NEXT2]] = add nuw nsw i64 [[LSR_IV1]], 1 -; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB10]] -; - -bb: - br label %bb10 - -bb1: ; preds = %bb10 - %tmp = and i64 %tmp24, 4294967295 - %tmp2 = shl i64 %tmp23, 33 - %tmp3 = ashr exact i64 %tmp2, 32 - %tmp4 = add i64 undef, %tmp - %tmp5 = add i64 %tmp4, %tmp3 - %tmp6 = add i64 %tmp5, undef - %tmp7 = add i64 %tmp6, undef - %tmp8 = add i64 undef, %tmp7 - store i64 %tmp8, i64 addrspace(1)* undef, align 8 - %tmp9 = trunc i64 %tmp7 to i32 - ret i32 %tmp9 - -bb10: ; preds = %bb10, %bb - %tmp11 = phi i64 [ 9, %bb ], [ %tmp24, %bb10 ] - %tmp12 = shl i64 undef, 1 - %tmp13 = mul i64 %tmp12, %tmp12 - %tmp14 = shl i64 %tmp13, 1 - %tmp15 = mul i64 %tmp14, %tmp14 - %tmp16 = shl i64 %tmp15, 1 - %tmp17 = mul i64 %tmp16, %tmp16 - %tmp18 = shl i64 %tmp17, 1 - %tmp19 = mul i64 %tmp18, %tmp18 - %tmp20 = shl i64 %tmp19, 1 - %tmp21 = mul i64 %tmp20, %tmp20 - %tmp22 = shl i64 %tmp21, 1 - %tmp23 = mul i64 %tmp22, %tmp22 - %tmp24 = add nuw nsw i64 %tmp11, 1 - br i1 undef, label %bb1, label %bb10 -} - - -attributes #0 = { "target-cpu"="broadwell" "target-features"="+sse2,+cx16,+sahf,-tbm,-avx512ifma,-sha,-gfni,-fma4,-vpclmulqdq,+prfchw,+bmi2,-cldemote,+fsgsbase,-ptwrite,-xsavec,+popcnt,+aes,-avx512bitalg,-movdiri,-xsaves,-avx512er,-avx512vnni,-avx512vpopcntdq,-pconfig,-clwb,-avx512f,-clzero,-pku,+mmx,-lwp,-rdpid,-xop,+rdseed,-waitpkg,-movdir64b,-sse4a,-avx512bw,-clflushopt,+xsave,-avx512vbmi2,+64bit,-avx512vl,+invpcid,-avx512cd,+avx,-vaes,+rtm,+fma,+bmi,+rdrnd,-mwaitx,+sse4.1,+sse4.2,+avx2,-wbnoinvd,+sse,+lzcnt,+pclmul,-prefetchwt1,+f16c,+ssse3,-sgx,-shstk,+cmov,-avx512vbmi,+movbe,+xsaveopt,-avx512dq,+adx,-avx512pf,+sse3" } diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/sibling-loops.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/sibling-loops.ll deleted file mode 100644 index a69d6adc0f0..00000000000 --- a/llvm/test/Transforms/LoopStrengthReduce/X86/sibling-loops.ll +++ /dev/null @@ -1,97 +0,0 @@ -; RUN: opt -loop-reduce -S < %s | FileCheck %s -; We find it is very bad to allow LSR formula containing SCEVAddRecExpr Reg -; from siblings of current loop. When one loop is LSR optimized, it can -; insert lsr.iv for other sibling loops, which sometimes leads to many extra -; lsr.iv inserted for loops. - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - -@cond = common local_unnamed_addr global i64 0, align 8 - -; Check there is no extra lsr.iv generated in foo. -; CHECK-LABEL: @foo( -; CHECK-NOT: lsr.iv{{[0-9]+}} = -; -define void @foo(i64 %N) local_unnamed_addr { -entry: - br label %do.body - -do.body: ; preds = %do.body, %entry - %i.0 = phi i64 [ 0, %entry ], [ %inc, %do.body ] - tail call void @goo(i64 %i.0, i64 %i.0) - %inc = add nuw nsw i64 %i.0, 1 - %t0 = load i64, i64* @cond, align 8 - %tobool = icmp eq i64 %t0, 0 - br i1 %tobool, label %do.body2.preheader, label %do.body - -do.body2.preheader: ; preds = %do.body - br label %do.body2 - -do.body2: ; preds = %do.body2.preheader, %do.body2 - %i.1 = phi i64 [ %inc3, %do.body2 ], [ 0, %do.body2.preheader ] - %j.1 = phi i64 [ %inc4, %do.body2 ], [ %inc, %do.body2.preheader ] - tail call void @goo(i64 %i.1, i64 %j.1) - %inc3 = add nuw nsw i64 %i.1, 1 - %inc4 = add nsw i64 %j.1, 1 - %t1 = load i64, i64* @cond, align 8 - %tobool6 = icmp eq i64 %t1, 0 - br i1 %tobool6, label %do.body8.preheader, label %do.body2 - -do.body8.preheader: ; preds = %do.body2 - br label %do.body8 - -do.body8: ; preds = %do.body8.preheader, %do.body8 - %i.2 = phi i64 [ %inc9, %do.body8 ], [ 0, %do.body8.preheader ] - %j.2 = phi i64 [ %inc10, %do.body8 ], [ %inc4, %do.body8.preheader ] - tail call void @goo(i64 %i.2, i64 %j.2) - %inc9 = add nuw nsw i64 %i.2, 1 - %inc10 = add nsw i64 %j.2, 1 - %t2 = load i64, i64* @cond, align 8 - %tobool12 = icmp eq i64 %t2, 0 - br i1 %tobool12, label %do.body14.preheader, label %do.body8 - -do.body14.preheader: ; preds = %do.body8 - br label %do.body14 - -do.body14: ; preds = %do.body14.preheader, %do.body14 - %i.3 = phi i64 [ %inc15, %do.body14 ], [ 0, %do.body14.preheader ] - %j.3 = phi i64 [ %inc16, %do.body14 ], [ %inc10, %do.body14.preheader ] - tail call void @goo(i64 %i.3, i64 %j.3) - %inc15 = add nuw nsw i64 %i.3, 1 - %inc16 = add nsw i64 %j.3, 1 - %t3 = load i64, i64* @cond, align 8 - %tobool18 = icmp eq i64 %t3, 0 - br i1 %tobool18, label %do.body20.preheader, label %do.body14 - -do.body20.preheader: ; preds = %do.body14 - br label %do.body20 - -do.body20: ; preds = %do.body20.preheader, %do.body20 - %i.4 = phi i64 [ %inc21, %do.body20 ], [ 0, %do.body20.preheader ] - %j.4 = phi i64 [ %inc22, %do.body20 ], [ %inc16, %do.body20.preheader ] - tail call void @goo(i64 %i.4, i64 %j.4) - %inc21 = add nuw nsw i64 %i.4, 1 - %inc22 = add nsw i64 %j.4, 1 - %t4 = load i64, i64* @cond, align 8 - %tobool24 = icmp eq i64 %t4, 0 - br i1 %tobool24, label %do.body26.preheader, label %do.body20 - -do.body26.preheader: ; preds = %do.body20 - br label %do.body26 - -do.body26: ; preds = %do.body26.preheader, %do.body26 - %i.5 = phi i64 [ %inc27, %do.body26 ], [ 0, %do.body26.preheader ] - %j.5 = phi i64 [ %inc28, %do.body26 ], [ %inc22, %do.body26.preheader ] - tail call void @goo(i64 %i.5, i64 %j.5) - %inc27 = add nuw nsw i64 %i.5, 1 - %inc28 = add nsw i64 %j.5, 1 - %t5 = load i64, i64* @cond, align 8 - %tobool30 = icmp eq i64 %t5, 0 - br i1 %tobool30, label %do.end31, label %do.body26 - -do.end31: ; preds = %do.body26 - ret void -} - -declare void @goo(i64, i64) local_unnamed_addr - |