summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Transforms/Utils/LoopUtils.cpp7
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp6
-rw-r--r--llvm/test/Transforms/LoopVectorize/minmax_reduction.ll104
3 files changed, 63 insertions, 54 deletions
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index a8d20c65c15..56b9084a916 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -594,6 +594,13 @@ Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder,
break;
}
+ // We only match FP sequences with unsafe algebra, so we can unconditionally
+ // set it on any generated instructions.
+ IRBuilder<>::FastMathFlagGuard FMFG(Builder);
+ FastMathFlags FMF;
+ FMF.setUnsafeAlgebra();
+ Builder.SetFastMathFlags(FMF);
+
Value *Cmp;
if (RK == MRK_FloatMin || RK == MRK_FloatMax)
Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp");
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 19dcccc6eec..41cfd52c51b 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3654,10 +3654,12 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
VectorParts &B = getVectorValue(it->getOperand(1));
for (unsigned Part = 0; Part < UF; ++Part) {
Value *C = nullptr;
- if (FCmp)
+ if (FCmp) {
C = Builder.CreateFCmp(Cmp->getPredicate(), A[Part], B[Part]);
- else
+ cast<FCmpInst>(C)->copyFastMathFlags(it);
+ } else {
C = Builder.CreateICmp(Cmp->getPredicate(), A[Part], B[Part]);
+ }
Entry[Part] = C;
}
diff --git a/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll b/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll
index 5a0356fe11a..19a401213fd 100644
--- a/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll
@@ -412,10 +412,10 @@ for.end:
; Turn this into a max reduction in the presence of a no-nans-fp-math attribute.
; CHECK-LABEL: @max_red_float(
-; CHECK: fcmp ogt <2 x float>
+; CHECK: fcmp fast ogt <2 x float>
; CHECK: select <2 x i1>
; CHECK: middle.block
-; CHECK: fcmp ogt <2 x float>
+; CHECK: fcmp fast ogt <2 x float>
; CHECK: select i1
define float @max_red_float(float %max) #0 {
@@ -427,7 +427,7 @@ for.body:
%max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
- %cmp3 = fcmp ogt float %0, %max.red.08
+ %cmp3 = fcmp fast ogt float %0, %max.red.08
%max.red.0 = select i1 %cmp3, float %0, float %max.red.08
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -438,10 +438,10 @@ for.end:
}
; CHECK-LABEL: @max_red_float_ge(
-; CHECK: fcmp oge <2 x float>
+; CHECK: fcmp fast oge <2 x float>
; CHECK: select <2 x i1>
; CHECK: middle.block
-; CHECK: fcmp ogt <2 x float>
+; CHECK: fcmp fast ogt <2 x float>
; CHECK: select i1
define float @max_red_float_ge(float %max) #0 {
@@ -453,7 +453,7 @@ for.body:
%max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
- %cmp3 = fcmp oge float %0, %max.red.08
+ %cmp3 = fcmp fast oge float %0, %max.red.08
%max.red.0 = select i1 %cmp3, float %0, float %max.red.08
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -464,10 +464,10 @@ for.end:
}
; CHECK-LABEL: @inverted_max_red_float(
-; CHECK: fcmp olt <2 x float>
+; CHECK: fcmp fast olt <2 x float>
; CHECK: select <2 x i1>
; CHECK: middle.block
-; CHECK: fcmp ogt <2 x float>
+; CHECK: fcmp fast ogt <2 x float>
; CHECK: select i1
define float @inverted_max_red_float(float %max) #0 {
@@ -479,7 +479,7 @@ for.body:
%max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
- %cmp3 = fcmp olt float %0, %max.red.08
+ %cmp3 = fcmp fast olt float %0, %max.red.08
%max.red.0 = select i1 %cmp3, float %max.red.08, float %0
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -490,10 +490,10 @@ for.end:
}
; CHECK-LABEL: @inverted_max_red_float_le(
-; CHECK: fcmp ole <2 x float>
+; CHECK: fcmp fast ole <2 x float>
; CHECK: select <2 x i1>
; CHECK: middle.block
-; CHECK: fcmp ogt <2 x float>
+; CHECK: fcmp fast ogt <2 x float>
; CHECK: select i1
define float @inverted_max_red_float_le(float %max) #0 {
@@ -505,7 +505,7 @@ for.body:
%max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
- %cmp3 = fcmp ole float %0, %max.red.08
+ %cmp3 = fcmp fast ole float %0, %max.red.08
%max.red.0 = select i1 %cmp3, float %max.red.08, float %0
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -516,10 +516,10 @@ for.end:
}
; CHECK-LABEL: @unordered_max_red_float(
-; CHECK: fcmp ole <2 x float>
+; CHECK: fcmp fast ole <2 x float>
; CHECK: select <2 x i1>
; CHECK: middle.block
-; CHECK: fcmp ogt <2 x float>
+; CHECK: fcmp fast ogt <2 x float>
; CHECK: select i1
define float @unordered_max_red_float(float %max) #0 {
@@ -531,7 +531,7 @@ for.body:
%max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
- %cmp3 = fcmp ugt float %0, %max.red.08
+ %cmp3 = fcmp fast ugt float %0, %max.red.08
%max.red.0 = select i1 %cmp3, float %0, float %max.red.08
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -542,10 +542,10 @@ for.end:
}
; CHECK-LABEL: @unordered_max_red_float_ge(
-; CHECK: fcmp olt <2 x float>
+; CHECK: fcmp fast olt <2 x float>
; CHECK: select <2 x i1>
; CHECK: middle.block
-; CHECK: fcmp ogt <2 x float>
+; CHECK: fcmp fast ogt <2 x float>
; CHECK: select i1
define float @unordered_max_red_float_ge(float %max) #0 {
@@ -557,7 +557,7 @@ for.body:
%max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
- %cmp3 = fcmp uge float %0, %max.red.08
+ %cmp3 = fcmp fast uge float %0, %max.red.08
%max.red.0 = select i1 %cmp3, float %0, float %max.red.08
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -568,10 +568,10 @@ for.end:
}
; CHECK-LABEL: @inverted_unordered_max_red_float(
-; CHECK: fcmp oge <2 x float>
+; CHECK: fcmp fast oge <2 x float>
; CHECK: select <2 x i1>
; CHECK: middle.block
-; CHECK: fcmp ogt <2 x float>
+; CHECK: fcmp fast ogt <2 x float>
; CHECK: select i1
define float @inverted_unordered_max_red_float(float %max) #0 {
@@ -583,7 +583,7 @@ for.body:
%max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
- %cmp3 = fcmp ult float %0, %max.red.08
+ %cmp3 = fcmp fast ult float %0, %max.red.08
%max.red.0 = select i1 %cmp3, float %max.red.08, float %0
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -594,10 +594,10 @@ for.end:
}
; CHECK-LABEL: @inverted_unordered_max_red_float_le(
-; CHECK: fcmp ogt <2 x float>
+; CHECK: fcmp fast ogt <2 x float>
; CHECK: select <2 x i1>
; CHECK: middle.block
-; CHECK: fcmp ogt <2 x float>
+; CHECK: fcmp fast ogt <2 x float>
; CHECK: select i1
define float @inverted_unordered_max_red_float_le(float %max) #0 {
@@ -609,7 +609,7 @@ for.body:
%max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
- %cmp3 = fcmp ule float %0, %max.red.08
+ %cmp3 = fcmp fast ule float %0, %max.red.08
%max.red.0 = select i1 %cmp3, float %max.red.08, float %0
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -623,10 +623,10 @@ for.end:
; Turn this into a min reduction in the presence of a no-nans-fp-math attribute.
; CHECK-LABEL: @min_red_float(
-; CHECK: fcmp olt <2 x float>
+; CHECK: fcmp fast olt <2 x float>
; CHECK: select <2 x i1>
; CHECK: middle.block
-; CHECK: fcmp olt <2 x float>
+; CHECK: fcmp fast olt <2 x float>
; CHECK: select i1
define float @min_red_float(float %min) #0 {
@@ -638,7 +638,7 @@ for.body:
%min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
- %cmp3 = fcmp olt float %0, %min.red.08
+ %cmp3 = fcmp fast olt float %0, %min.red.08
%min.red.0 = select i1 %cmp3, float %0, float %min.red.08
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -649,10 +649,10 @@ for.end:
}
; CHECK-LABEL: @min_red_float_le(
-; CHECK: fcmp ole <2 x float>
+; CHECK: fcmp fast ole <2 x float>
; CHECK: select <2 x i1>
; CHECK: middle.block
-; CHECK: fcmp olt <2 x float>
+; CHECK: fcmp fast olt <2 x float>
; CHECK: select i1
define float @min_red_float_le(float %min) #0 {
@@ -664,7 +664,7 @@ for.body:
%min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
- %cmp3 = fcmp ole float %0, %min.red.08
+ %cmp3 = fcmp fast ole float %0, %min.red.08
%min.red.0 = select i1 %cmp3, float %0, float %min.red.08
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -675,10 +675,10 @@ for.end:
}
; CHECK-LABEL: @inverted_min_red_float(
-; CHECK: fcmp ogt <2 x float>
+; CHECK: fcmp fast ogt <2 x float>
; CHECK: select <2 x i1>
; CHECK: middle.block
-; CHECK: fcmp olt <2 x float>
+; CHECK: fcmp fast olt <2 x float>
; CHECK: select i1
define float @inverted_min_red_float(float %min) #0 {
@@ -690,7 +690,7 @@ for.body:
%min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
- %cmp3 = fcmp ogt float %0, %min.red.08
+ %cmp3 = fcmp fast ogt float %0, %min.red.08
%min.red.0 = select i1 %cmp3, float %min.red.08, float %0
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -701,10 +701,10 @@ for.end:
}
; CHECK-LABEL: @inverted_min_red_float_ge(
-; CHECK: fcmp oge <2 x float>
+; CHECK: fcmp fast oge <2 x float>
; CHECK: select <2 x i1>
; CHECK: middle.block
-; CHECK: fcmp olt <2 x float>
+; CHECK: fcmp fast olt <2 x float>
; CHECK: select i1
define float @inverted_min_red_float_ge(float %min) #0 {
@@ -716,7 +716,7 @@ for.body:
%min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
- %cmp3 = fcmp oge float %0, %min.red.08
+ %cmp3 = fcmp fast oge float %0, %min.red.08
%min.red.0 = select i1 %cmp3, float %min.red.08, float %0
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -727,10 +727,10 @@ for.end:
}
; CHECK-LABEL: @unordered_min_red_float(
-; CHECK: fcmp oge <2 x float>
+; CHECK: fcmp fast oge <2 x float>
; CHECK: select <2 x i1>
; CHECK: middle.block
-; CHECK: fcmp olt <2 x float>
+; CHECK: fcmp fast olt <2 x float>
; CHECK: select i1
define float @unordered_min_red_float(float %min) #0 {
@@ -742,7 +742,7 @@ for.body:
%min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
- %cmp3 = fcmp ult float %0, %min.red.08
+ %cmp3 = fcmp fast ult float %0, %min.red.08
%min.red.0 = select i1 %cmp3, float %0, float %min.red.08
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -753,10 +753,10 @@ for.end:
}
; CHECK-LABEL: @unordered_min_red_float_le(
-; CHECK: fcmp ogt <2 x float>
+; CHECK: fcmp fast ogt <2 x float>
; CHECK: select <2 x i1>
; CHECK: middle.block
-; CHECK: fcmp olt <2 x float>
+; CHECK: fcmp fast olt <2 x float>
; CHECK: select i1
define float @unordered_min_red_float_le(float %min) #0 {
@@ -768,7 +768,7 @@ for.body:
%min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
- %cmp3 = fcmp ule float %0, %min.red.08
+ %cmp3 = fcmp fast ule float %0, %min.red.08
%min.red.0 = select i1 %cmp3, float %0, float %min.red.08
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -779,10 +779,10 @@ for.end:
}
; CHECK-LABEL: @inverted_unordered_min_red_float(
-; CHECK: fcmp ole <2 x float>
+; CHECK: fcmp fast ole <2 x float>
; CHECK: select <2 x i1>
; CHECK: middle.block
-; CHECK: fcmp olt <2 x float>
+; CHECK: fcmp fast olt <2 x float>
; CHECK: select i1
define float @inverted_unordered_min_red_float(float %min) #0 {
@@ -794,7 +794,7 @@ for.body:
%min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
- %cmp3 = fcmp ugt float %0, %min.red.08
+ %cmp3 = fcmp fast ugt float %0, %min.red.08
%min.red.0 = select i1 %cmp3, float %min.red.08, float %0
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -805,10 +805,10 @@ for.end:
}
; CHECK-LABEL: @inverted_unordered_min_red_float_ge(
-; CHECK: fcmp olt <2 x float>
+; CHECK: fcmp fast olt <2 x float>
; CHECK: select <2 x i1>
; CHECK: middle.block
-; CHECK: fcmp olt <2 x float>
+; CHECK: fcmp fast olt <2 x float>
; CHECK: select i1
define float @inverted_unordered_min_red_float_ge(float %min) #0 {
@@ -820,7 +820,7 @@ for.body:
%min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
- %cmp3 = fcmp uge float %0, %min.red.08
+ %cmp3 = fcmp fast uge float %0, %min.red.08
%min.red.0 = select i1 %cmp3, float %min.red.08, float %0
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -832,10 +832,10 @@ for.end:
; Make sure we handle doubles, too.
; CHECK-LABEL: @min_red_double(
-; CHECK: fcmp olt <2 x double>
+; CHECK: fcmp fast olt <2 x double>
; CHECK: select <2 x i1>
; CHECK: middle.block
-; CHECK: fcmp olt <2 x double>
+; CHECK: fcmp fast olt <2 x double>
; CHECK: select i1
define double @min_red_double(double %min) #0 {
@@ -847,7 +847,7 @@ for.body:
%min.red.08 = phi double [ %min, %entry ], [ %min.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x double], [1024 x double]* @dA, i64 0, i64 %indvars.iv
%0 = load double, double* %arrayidx, align 4
- %cmp3 = fcmp olt double %0, %min.red.08
+ %cmp3 = fcmp fast olt double %0, %min.red.08
%min.red.0 = select i1 %cmp3, double %0, double %min.red.08
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -871,7 +871,7 @@ for.body:
%max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
- %cmp3 = fcmp ogt float %0, %max.red.08
+ %cmp3 = fcmp fast ogt float %0, %max.red.08
%max.red.0 = select i1 %cmp3, float %0, float %max.red.08
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
OpenPOWER on IntegriCloud