diff options
-rw-r--r-- | llvm/lib/Transforms/Scalar/LoopPredication.cpp | 18 | ||||
-rw-r--r-- | llvm/test/Transforms/LoopPredication/basic.ll | 6 | ||||
-rw-r--r-- | llvm/test/Transforms/LoopPredication/widened.ll | 6 |
3 files changed, 22 insertions, 8 deletions
diff --git a/llvm/lib/Transforms/Scalar/LoopPredication.cpp b/llvm/lib/Transforms/Scalar/LoopPredication.cpp index 8487278d3d6..3b27364cf5e 100644 --- a/llvm/lib/Transforms/Scalar/LoopPredication.cpp +++ b/llvm/lib/Transforms/Scalar/LoopPredication.cpp @@ -264,6 +264,12 @@ class LoopPredication { Optional<LoopICmp> parseLoopLatchICmp(); + /// Return an insertion point suitable for inserting a safe to speculate + /// instruction whose only user will be 'User' which has operands 'Ops'. A + /// trivial result would be the at the User itself, but we try to return a + /// loop invariant location if possible. + Instruction *findInsertPt(Instruction *User, ArrayRef<Value*> Ops); + bool CanExpand(const SCEV* S); Value *expandCheck(SCEVExpander &Expander, IRBuilder<> &Builder, ICmpInst::Predicate Pred, const SCEV *LHS, @@ -438,6 +444,14 @@ bool LoopPredication::isSupportedStep(const SCEV* Step) { return Step->isOne() || (Step->isAllOnesValue() && EnableCountDownLoop); } +Instruction *LoopPredication::findInsertPt(Instruction *Use, + ArrayRef<Value*> Ops) { + for (Value *Op : Ops) + if (!L->isLoopInvariant(Op)) + return Use; + return Preheader->getTerminator(); +} + bool LoopPredication::CanExpand(const SCEV* S) { return SE->isLoopInvariant(S, L) && isSafeToExpand(S, *SE); } @@ -652,7 +666,7 @@ bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard, TotalWidened += NumWidened; // Emit the new guard condition - Builder.SetInsertPoint(Guard); + Builder.SetInsertPoint(findInsertPt(Guard, Checks)); Value *LastCheck = nullptr; for (auto *Check : Checks) if (!LastCheck) @@ -684,7 +698,7 @@ bool LoopPredication::widenWidenableBranchGuardConditions( TotalWidened += NumWidened; // Emit the new guard condition - Builder.SetInsertPoint(BI); + Builder.SetInsertPoint(findInsertPt(BI, Checks)); Value *LastCheck = nullptr; for (auto *Check : Checks) if (!LastCheck) diff --git a/llvm/test/Transforms/LoopPredication/basic.ll b/llvm/test/Transforms/LoopPredication/basic.ll index afb92f263cb..287c6d20be7 100644 --- a/llvm/test/Transforms/LoopPredication/basic.ll +++ b/llvm/test/Transforms/LoopPredication/basic.ll @@ -976,11 +976,11 @@ define i32 @two_range_checks(i32* %array.1, i32 %length.1, ; CHECK-NEXT: [[TMP3:%.*]] = icmp ule i32 [[N]], [[LENGTH_1:%.*]] ; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i32 0, [[LENGTH_1]] ; CHECK-NEXT: [[TMP5:%.*]] = and i1 [[TMP4]], [[TMP3]] +; CHECK-NEXT: [[TMP6:%.*]] = and i1 [[TMP2]], [[TMP5]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ] ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-NEXT: [[TMP6:%.*]] = and i1 [[TMP2]], [[TMP5]] ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP6]], i32 9) [ "deopt"() ] ; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 ; CHECK-NEXT: [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_1:%.*]], i64 [[I_I64]] @@ -1048,12 +1048,12 @@ define i32 @three_range_checks(i32* %array.1, i32 %length.1, ; CHECK-NEXT: [[TMP6:%.*]] = icmp ule i32 [[N]], [[LENGTH_1:%.*]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i32 0, [[LENGTH_1]] ; CHECK-NEXT: [[TMP8:%.*]] = and i1 [[TMP7]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = and i1 [[TMP2]], [[TMP5]] +; CHECK-NEXT: [[TMP10:%.*]] = and i1 [[TMP9]], [[TMP8]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ] ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-NEXT: [[TMP9:%.*]] = and i1 [[TMP2]], [[TMP5]] -; CHECK-NEXT: [[TMP10:%.*]] = and i1 [[TMP9]], [[TMP8]] ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP10]], i32 9) [ "deopt"() ] ; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 ; CHECK-NEXT: [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_1:%.*]], i64 [[I_I64]] diff --git a/llvm/test/Transforms/LoopPredication/widened.ll b/llvm/test/Transforms/LoopPredication/widened.ll index 569daff9081..38325cbec50 100644 --- a/llvm/test/Transforms/LoopPredication/widened.ll +++ b/llvm/test/Transforms/LoopPredication/widened.ll @@ -20,6 +20,7 @@ define i64 @iv_wider_type_rc_two_narrow_types(i32 %offA, i16 %offB, i8* %arrA, i ; CHECK-NEXT: [[TMP5:%.*]] = icmp ule i32 16, [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i32 [[OFFA]], [[LENGTHA]] ; CHECK-NEXT: [[TMP7:%.*]] = and i1 [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = and i1 [[TMP3]], [[TMP7]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -27,7 +28,6 @@ define i64 @iv_wider_type_rc_two_narrow_types(i32 %offA, i16 %offB, i8* %arrA, i ; CHECK-NEXT: [[IV_TRUNC_16:%.*]] = trunc i64 [[IV]] to i16 ; CHECK-NEXT: [[INDEXA:%.*]] = add i32 [[IV_TRUNC_32]], [[OFFA]] ; CHECK-NEXT: [[INDEXB:%.*]] = add i16 [[IV_TRUNC_16]], [[OFFB]] -; CHECK-NEXT: [[TMP8:%.*]] = and i1 [[TMP3]], [[TMP7]] ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP8]], i32 9) [ "deopt"() ] ; CHECK-NEXT: [[INDEXA_EXT:%.*]] = zext i32 [[INDEXA]] to i64 ; CHECK-NEXT: [[ADDRA:%.*]] = getelementptr inbounds i8, i8* [[ARRA]], i64 [[INDEXA_EXT]] @@ -93,14 +93,14 @@ define i64 @iv_rc_different_types(i32 %offA, i32 %offB, i8* %arrA, i8* %arrB, i6 ; CHECK-NEXT: [[TMP11:%.*]] = icmp ule i32 15, [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = icmp ult i32 [[OFFA]], [[LENGTHA]] ; CHECK-NEXT: [[TMP13:%.*]] = and i1 [[TMP12]], [[TMP11]] +; CHECK-NEXT: [[TMP14:%.*]] = and i1 [[TMP4]], [[TMP8]] +; CHECK-NEXT: [[TMP15:%.*]] = and i1 [[TMP14]], [[TMP13]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i32 ; CHECK-NEXT: [[INDEXA:%.*]] = add i32 [[IV_TRUNC]], [[OFFA]] ; CHECK-NEXT: [[INDEXB:%.*]] = add i32 [[IV_TRUNC]], [[OFFB]] -; CHECK-NEXT: [[TMP14:%.*]] = and i1 [[TMP4]], [[TMP8]] -; CHECK-NEXT: [[TMP15:%.*]] = and i1 [[TMP14]], [[TMP13]] ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP15]], i32 9) [ "deopt"() ] ; CHECK-NEXT: [[INDEXA_EXT:%.*]] = zext i32 [[INDEXA]] to i64 ; CHECK-NEXT: [[ADDRA:%.*]] = getelementptr inbounds i8, i8* [[ARRA]], i64 [[INDEXA_EXT]] |