diff options
author | Sjoerd Meijer <sjoerd.meijer@arm.com> | 2019-11-15 11:01:13 +0000 |
---|---|---|
committer | Sjoerd Meijer <sjoerd.meijer@arm.com> | 2019-11-15 11:01:13 +0000 |
commit | 71327707b056c1de28fb0b2c2046740ce1e5cb0d (patch) | |
tree | 55b916debb5f323e2fd725e6eea3a5e4e39e1d73 | |
parent | 1f559353a7821769c94f03b00cc9c2f65f982d42 (diff) | |
download | bcm5719-llvm-71327707b056c1de28fb0b2c2046740ce1e5cb0d.tar.gz bcm5719-llvm-71327707b056c1de28fb0b2c2046740ce1e5cb0d.zip |
[ARM][MVE] tail-predication
This is a follow up of d90804d, to also flag fmcp instructions as instructions
that we do not support in tail-predicated vector loops.
Differential Revision: https://reviews.llvm.org/D70295
-rw-r--r-- | llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp | 3 | ||||
-rw-r--r-- | llvm/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll | 26 |
2 files changed, 29 insertions, 0 deletions
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index df3057d62c7..cc5fae4a869 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -1008,6 +1008,9 @@ static bool canTailPredicateInstruction(Instruction &I, int &ICmpCount) { if (isa<ICmpInst>(&I) && ++ICmpCount > 1) return false; + if (isa<FCmpInst>(&I)) + return false; + // We could allow extending/narrowing FP loads/stores, but codegen is // too inefficient so reject this for now. if (isa<FPExtInst>(&I) || isa<FPTruncInst>(&I)) diff --git a/llvm/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll b/llvm/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll index fda374189c8..2b8d9314ba8 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll @@ -322,6 +322,32 @@ for.end: ret i32 0 } +@ftab = common global [32 x float] zeroinitializer, align 1 + +define float @fcmp_not_allowed() #0 { +; CHECK-LABEL: fcmp_not_allowed( +; PREFER-FOLDING: vector.body: +; PREFER-FOLDING-NOT: llvm.masked.load +; PREFER-FOLDING-NOT: llvm.masked.store +; PREFER-FOLDING: br i1 %{{.*}}, label %{{.*}}, label %vector.body +entry: + br label %for.body + +for.body: + %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds [32 x float], [32 x float]* @ftab, i32 0, i32 %i.08 + %0 = load float, float* %arrayidx, align 4 + %cmp1 = fcmp oeq float %0, 0.000000e+00 + %. = select i1 %cmp1, float 2.000000e+00, float 1.000000e+00 + store float %., float* %arrayidx, align 4 + %inc = add nsw i32 %i.08, 1 + %exitcond = icmp slt i32 %inc, 999 + br i1 %exitcond, label %for.body, label %for.end + +for.end: + ret float 0.000000e+00 +} + define void @pragma_vect_predicate_disable(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C) #0 { ; CHECK-LABEL: pragma_vect_predicate_disable( ; PREFER-FOLDING: vector.body: |