summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
authorNikita Popov <nikita.ppv@gmail.com>2020-02-02 18:44:18 +0100
committerHans Wennborg <hans@chromium.org>2020-02-05 13:52:41 +0100
commit99c6a4ea9201f09e8107bb83675f1e7235456b6d (patch)
tree499e8cf52975392d143a97aacd1641f9a203e135 /llvm
parent0b8a540dff86662fc9426bb4dd8797c547db5000 (diff)
downloadbcm5719-llvm-99c6a4ea9201f09e8107bb83675f1e7235456b6d.tar.gz
bcm5719-llvm-99c6a4ea9201f09e8107bb83675f1e7235456b6d.zip
[ARM] Expand vector reduction intrinsics on soft float
Followup to D73135. If the target doesn't have hard float (default for ARM), then we assert when trying to soften the result of vector reduction intrinsics. This patch marks these for expansion as well. (A bit odd to use vectors on a target without hard float ... but that's where you end up if you expose target-independent vector types.) Differential Revision: https://reviews.llvm.org/D73854 (cherry picked from commit 1cc4f8d17247cd9be88addd75d060f9321b6f8b0)
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/ARM/ARMTargetTransformInfo.h9
-rw-r--r--llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-soft-float.ll63
2 files changed, 71 insertions, 1 deletions
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index 4a9a8f688ab..b860df62b78 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -175,7 +175,14 @@ public:
case Intrinsic::experimental_vector_reduce_v2_fadd:
case Intrinsic::experimental_vector_reduce_v2_fmul:
// We don't have legalization support for ordered FP reductions.
- return !II->getFastMathFlags().allowReassoc();
+ if (!II->getFastMathFlags().allowReassoc())
+ return true;
+ LLVM_FALLTHROUGH;
+
+ case Intrinsic::experimental_vector_reduce_fmin:
+ case Intrinsic::experimental_vector_reduce_fmax:
+ // Can't legalize reductions with soft floats.
+ return TLI->useSoftFloat() || !TLI->getSubtarget()->hasFPRegs();
default:
// Don't expand anything else, let legalization deal with it.
diff --git a/llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-soft-float.ll b/llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-soft-float.ll
new file mode 100644
index 00000000000..f3eeb11a17f
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-soft-float.ll
@@ -0,0 +1,63 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=arm-none-eabi -mattr=-neon | FileCheck %s --check-prefix=CHECK
+
+declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float, <4 x float>)
+declare double @llvm.experimental.vector.reduce.v2.fadd.f64.v2f64(double, <2 x double>)
+declare fp128 @llvm.experimental.vector.reduce.v2.fadd.f128.v2f128(fp128, <2 x fp128>)
+
+define float @test_v4f32(<4 x float> %a) nounwind {
+; CHECK-LABEL: test_v4f32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r4, r5, r6, lr}
+; CHECK-NEXT: push {r4, r5, r6, lr}
+; CHECK-NEXT: mov r5, r1
+; CHECK-NEXT: mov r1, r2
+; CHECK-NEXT: mov r4, r3
+; CHECK-NEXT: bl __aeabi_fadd
+; CHECK-NEXT: mov r6, r0
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: mov r1, r4
+; CHECK-NEXT: bl __aeabi_fadd
+; CHECK-NEXT: mov r1, r0
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: bl __aeabi_fadd
+; CHECK-NEXT: pop {r4, r5, r6, lr}
+; CHECK-NEXT: mov pc, lr
+ %b = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.0, <4 x float> %a)
+ ret float %b
+}
+
+define double @test_v2f64(<2 x double> %a) nounwind {
+; CHECK-LABEL: test_v2f64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: bl __aeabi_dadd
+; CHECK-NEXT: pop {r11, lr}
+; CHECK-NEXT: mov pc, lr
+ %b = call fast double @llvm.experimental.vector.reduce.v2.fadd.f64.v2f64(double zeroinitializer, <2 x double> %a)
+ ret double %b
+}
+
+define fp128 @test_v2f128(<2 x fp128> %a) nounwind {
+; CHECK-LABEL: test_v2f128:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: .pad #16
+; CHECK-NEXT: sub sp, sp, #16
+; CHECK-NEXT: ldr r12, [sp, #36]
+; CHECK-NEXT: str r12, [sp, #12]
+; CHECK-NEXT: ldr r12, [sp, #32]
+; CHECK-NEXT: str r12, [sp, #8]
+; CHECK-NEXT: ldr r12, [sp, #28]
+; CHECK-NEXT: str r12, [sp, #4]
+; CHECK-NEXT: ldr r12, [sp, #24]
+; CHECK-NEXT: str r12, [sp]
+; CHECK-NEXT: bl __addtf3
+; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: pop {r11, lr}
+; CHECK-NEXT: mov pc, lr
+ %b = call fast fp128 @llvm.experimental.vector.reduce.v2.fadd.f128.v2f128(fp128 zeroinitializer, <2 x fp128> %a)
+ ret fp128 %b
+}
OpenPOWER on IntegriCloud