diff options
author | Eric Christopher <echristo@gmail.com> | 2019-02-20 04:42:07 +0000 |
---|---|---|
committer | Eric Christopher <echristo@gmail.com> | 2019-02-20 04:42:07 +0000 |
commit | 2534592b9fe436004fa7fd7da73cbb3a00902b9c (patch) | |
tree | 18fa4ff46b46043b2859cf03446f62c35a3792eb /llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll | |
parent | d990c2a9e23f1b8d6d7ee17c2d1f46202d70b88e (diff) | |
download | bcm5719-llvm-2534592b9fe436004fa7fd7da73cbb3a00902b9c.tar.gz bcm5719-llvm-2534592b9fe436004fa7fd7da73cbb3a00902b9c.zip |
Temporarily Revert "[X86][SLP] Enable SLP vectorization for 128-bit horizontal X86 instructions (add, sub)"
As this has broken the lto bootstrap build for 3 days and is
showing a significant regression on the Dither_benchmark results (from
the LLVM benchmark suite) -- specifically, on the
BENCHMARK_FLOYD_DITHER_128, BENCHMARK_FLOYD_DITHER_256, and
BENCHMARK_FLOYD_DITHER_512; the others are unchanged. These have
regressed by about 28% on Skylake, 34% on Haswell, and over 40% on
Sandybridge.
This reverts commit r353923.
llvm-svn: 354434
Diffstat (limited to 'llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll')
-rw-r--r-- | llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll | 8 |
1 files changed, 4 insertions, 4 deletions
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll b/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll index 95b7d6bf55e..19f06805ff5 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll @@ -172,13 +172,13 @@ define void @tiny_tree_not_fully_vectorizable2(float* noalias nocapture %dst, fl ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[DST_ADDR_022]], i64 1 ; CHECK-NEXT: store float [[TMP1]], float* [[ARRAYIDX3]], align 4 ; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[SRC_ADDR_021]], i64 2 +; CHECK-NEXT: [[TMP2:%.*]] = load float, float* [[ARRAYIDX4]], align 4 ; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[DST_ADDR_022]], i64 2 +; CHECK-NEXT: store float [[TMP2]], float* [[ARRAYIDX5]], align 4 ; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[SRC_ADDR_021]], i64 3 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX4]] to <2 x float>* -; CHECK-NEXT: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[TMP2]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[ARRAYIDX6]], align 4 ; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[DST_ADDR_022]], i64 3 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[ARRAYIDX5]] to <2 x float>* -; CHECK-NEXT: store <2 x float> [[TMP3]], <2 x float>* [[TMP4]], align 4 +; CHECK-NEXT: store float [[TMP3]], float* [[ARRAYIDX7]], align 4 ; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds float, float* [[SRC_ADDR_021]], i64 [[I_023]] ; CHECK-NEXT: [[ADD_PTR8]] = getelementptr inbounds float, float* [[DST_ADDR_022]], i64 [[I_023]] ; CHECK-NEXT: [[INC]] = add i64 [[I_023]], 1 |