diff options
author | Sander de Smalen <sander.desmalen@arm.com> | 2019-06-11 08:22:10 +0000 |
---|---|---|
committer | Sander de Smalen <sander.desmalen@arm.com> | 2019-06-11 08:22:10 +0000 |
commit | cbeb563cfb1752044fb8771586ae9bbd89d2a07b (patch) | |
tree | dd9dec7d2ce2d7f949c97d9624df5ea1bbbf551d /llvm/test/Bitcode | |
parent | e2acbeb94cf28cf6a8c82e09073df79aa1e846be (diff) | |
download | bcm5719-llvm-cbeb563cfb1752044fb8771586ae9bbd89d2a07b.tar.gz bcm5719-llvm-cbeb563cfb1752044fb8771586ae9bbd89d2a07b.zip |
Change semantics of fadd/fmul vector reductions.
This patch changes how LLVM handles the accumulator/start value
in the reduction, by never ignoring it regardless of the presence of
fast-math flags on callsites. This change introduces the following
new intrinsics to replace the existing ones:
llvm.experimental.vector.reduce.fadd -> llvm.experimental.vector.reduce.v2.fadd
llvm.experimental.vector.reduce.fmul -> llvm.experimental.vector.reduce.v2.fmul
and adds functionality to auto-upgrade existing LLVM IR and bitcode.
Reviewers: RKSimon, greened, dmgreen, nikic, simoll, aemerson
Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D60261
llvm-svn: 363035
Diffstat (limited to 'llvm/test/Bitcode')
-rw-r--r-- | llvm/test/Bitcode/upgrade-vecreduce-intrinsics.ll | 64 | ||||
-rw-r--r-- | llvm/test/Bitcode/upgrade-vecreduce-intrinsics.ll.bc | bin | 0 -> 1860 bytes |
2 files changed, 64 insertions, 0 deletions
diff --git a/llvm/test/Bitcode/upgrade-vecreduce-intrinsics.ll b/llvm/test/Bitcode/upgrade-vecreduce-intrinsics.ll new file mode 100644 index 00000000000..c91c7bc3953 --- /dev/null +++ b/llvm/test/Bitcode/upgrade-vecreduce-intrinsics.ll @@ -0,0 +1,64 @@ +; RUN: opt -S < %s | FileCheck %s +; RUN: llvm-dis < %s.bc | FileCheck %s + +define float @fadd_acc(<4 x float> %in, float %acc) { +; CHECK-LABEL: @fadd_acc +; CHECK: %res = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %acc, <4 x float> %in) + %res = call float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float %acc, <4 x float> %in) + ret float %res +} + +define float @fadd_undef(<4 x float> %in) { +; CHECK-LABEL: @fadd_undef +; CHECK: %res = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float undef, <4 x float> %in) + %res = call float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float undef, <4 x float> %in) + ret float %res +} + +define float @fadd_fast_acc(<4 x float> %in, float %acc) { +; CHECK-LABEL: @fadd_fast_acc +; CHECK: %res = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.000000e+00, <4 x float> %in) + %res = call fast float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float %acc, <4 x float> %in) + ret float %res +} + +define float @fadd_fast_undef(<4 x float> %in) { +; CHECK-LABEL: @fadd_fast_undef +; CHECK: %res = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.000000e+00, <4 x float> %in) + %res = call fast float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float undef, <4 x float> %in) + ret float %res +} + +define float @fmul_acc(<4 x float> %in, float %acc) { +; CHECK-LABEL: @fmul_acc +; CHECK: %res = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float %acc, <4 x float> %in) + %res = call float @llvm.experimental.vector.reduce.fmul.f32.v4f32(float %acc, <4 x float> %in) + ret float %res +} + +define float @fmul_undef(<4 x float> %in) { +; CHECK-LABEL: @fmul_undef +; CHECK: %res = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float undef, <4 x float> %in) + %res = call float @llvm.experimental.vector.reduce.fmul.f32.v4f32(float undef, <4 x float> %in) + ret float %res +} + +define float @fmul_fast_acc(<4 x float> %in, float %acc) { +; CHECK-LABEL: @fmul_fast_acc +; CHECK: %res = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float 1.000000e+00, <4 x float> %in) + %res = call fast float @llvm.experimental.vector.reduce.fmul.f32.v4f32(float %acc, <4 x float> %in) + ret float %res +} + +define float @fmul_fast_undef(<4 x float> %in) { +; CHECK-LABEL: @fmul_fast_undef +; CHECK: %res = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float 1.000000e+00, <4 x float> %in) + %res = call fast float @llvm.experimental.vector.reduce.fmul.f32.v4f32(float undef, <4 x float> %in) + ret float %res +} + +declare float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float, <4 x float>) +; CHECK: declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float, <4 x float>) + +declare float @llvm.experimental.vector.reduce.fmul.f32.v4f32(float, <4 x float>) +; CHECK: declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float, <4 x float>) diff --git a/llvm/test/Bitcode/upgrade-vecreduce-intrinsics.ll.bc b/llvm/test/Bitcode/upgrade-vecreduce-intrinsics.ll.bc Binary files differnew file mode 100644 index 00000000000..18f44f7acd9 --- /dev/null +++ b/llvm/test/Bitcode/upgrade-vecreduce-intrinsics.ll.bc |