diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-04-09 15:44:20 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-04-09 15:44:20 +0000 |
| commit | 23c2182c2bfcd145b2c33cf5ca831a7348e6adce (patch) | |
| tree | 539213fa47e89fc0f31e62c647aceda93c205cb5 /llvm/test | |
| parent | bec8a66454882285db4c98311ba430adec2c8dac (diff) | |
| download | bcm5719-llvm-23c2182c2bfcd145b2c33cf5ca831a7348e6adce.tar.gz bcm5719-llvm-23c2182c2bfcd145b2c33cf5ca831a7348e6adce.zip | |
Support generic expansion of ordered vector reduction (PR36732)
Without the fast math flags, the llvm.experimental.vector.reduce.fadd/fmul intrinsic expansions must be expanded in order.
This patch scalarizes the reduction, applying the accumulator at the start of the sequence: ((((Acc + Scl[0]) + Scl[1]) + Scl[2]) + ) ... + Scl[NumElts-1]
Differential Revision: https://reviews.llvm.org/D45366
llvm-svn: 329585
Diffstat (limited to 'llvm/test')
| -rw-r--r-- | llvm/test/CodeGen/Generic/expand-experimental-reductions.ll | 44 |
1 files changed, 36 insertions, 8 deletions
diff --git a/llvm/test/CodeGen/Generic/expand-experimental-reductions.ll b/llvm/test/CodeGen/Generic/expand-experimental-reductions.ll index 472e66ce1dd..05fa6e324ac 100644 --- a/llvm/test/CodeGen/Generic/expand-experimental-reductions.ll +++ b/llvm/test/CodeGen/Generic/expand-experimental-reductions.ll @@ -117,8 +117,15 @@ entry: define float @fadd_f32_strict(<4 x float> %vec) { ; CHECK-LABEL: @fadd_f32_strict( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[R:%.*]] = call float @llvm.experimental.vector.reduce.fadd.f32.f32.v4f32(float undef, <4 x float> [[VEC:%.*]]) -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[VEC:%.*]], i32 0 +; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd float undef, [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[VEC]], i32 1 +; CHECK-NEXT: [[BIN_RDX1:%.*]] = fadd float [[BIN_RDX]], [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[VEC]], i32 2 +; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd float [[BIN_RDX1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[VEC]], i32 3 +; CHECK-NEXT: [[BIN_RDX3:%.*]] = fadd float [[BIN_RDX2]], [[TMP3]] +; CHECK-NEXT: ret float [[BIN_RDX3]] ; entry: %r = call float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float undef, <4 x float> %vec) @@ -128,8 +135,15 @@ entry: define float @fadd_f32_strict_accum(float %accum, <4 x float> %vec) { ; CHECK-LABEL: @fadd_f32_strict_accum( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[R:%.*]] = call float @llvm.experimental.vector.reduce.fadd.f32.f32.v4f32(float [[ACCUM:%.*]], <4 x float> [[VEC:%.*]]) -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[VEC:%.*]], i32 0 +; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd float [[ACCUM:%.*]], [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[VEC]], i32 1 +; CHECK-NEXT: [[BIN_RDX1:%.*]] = fadd float [[BIN_RDX]], [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[VEC]], i32 2 +; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd float [[BIN_RDX1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[VEC]], i32 3 +; CHECK-NEXT: [[BIN_RDX3:%.*]] = fadd float [[BIN_RDX2]], [[TMP3]] +; CHECK-NEXT: ret float [[BIN_RDX3]] ; entry: %r = call float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float %accum, <4 x float> %vec) @@ -169,8 +183,15 @@ entry: define float @fmul_f32_strict(<4 x float> %vec) { ; CHECK-LABEL: @fmul_f32_strict( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[R:%.*]] = call float @llvm.experimental.vector.reduce.fmul.f32.f32.v4f32(float undef, <4 x float> [[VEC:%.*]]) -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[VEC:%.*]], i32 0 +; CHECK-NEXT: [[BIN_RDX:%.*]] = fmul float undef, [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[VEC]], i32 1 +; CHECK-NEXT: [[BIN_RDX1:%.*]] = fmul float [[BIN_RDX]], [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[VEC]], i32 2 +; CHECK-NEXT: [[BIN_RDX2:%.*]] = fmul float [[BIN_RDX1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[VEC]], i32 3 +; CHECK-NEXT: [[BIN_RDX3:%.*]] = fmul float [[BIN_RDX2]], [[TMP3]] +; CHECK-NEXT: ret float [[BIN_RDX3]] ; entry: %r = call float @llvm.experimental.vector.reduce.fmul.f32.v4f32(float undef, <4 x float> %vec) @@ -180,8 +201,15 @@ entry: define float @fmul_f32_strict_accum(float %accum, <4 x float> %vec) { ; CHECK-LABEL: @fmul_f32_strict_accum( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[R:%.*]] = call float @llvm.experimental.vector.reduce.fmul.f32.f32.v4f32(float [[ACCUM:%.*]], <4 x float> [[VEC:%.*]]) -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[VEC:%.*]], i32 0 +; CHECK-NEXT: [[BIN_RDX:%.*]] = fmul float [[ACCUM:%.*]], [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[VEC]], i32 1 +; CHECK-NEXT: [[BIN_RDX1:%.*]] = fmul float [[BIN_RDX]], [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[VEC]], i32 2 +; CHECK-NEXT: [[BIN_RDX2:%.*]] = fmul float [[BIN_RDX1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[VEC]], i32 3 +; CHECK-NEXT: [[BIN_RDX3:%.*]] = fmul float [[BIN_RDX2]], [[TMP3]] +; CHECK-NEXT: ret float [[BIN_RDX3]] ; entry: %r = call float @llvm.experimental.vector.reduce.fmul.f32.v4f32(float %accum, <4 x float> %vec) |

