diff options
author | Eric Christopher <echristo@gmail.com> | 2019-02-20 04:42:07 +0000 |
---|---|---|
committer | Eric Christopher <echristo@gmail.com> | 2019-02-20 04:42:07 +0000 |
commit | 2534592b9fe436004fa7fd7da73cbb3a00902b9c (patch) | |
tree | 18fa4ff46b46043b2859cf03446f62c35a3792eb /llvm/test/Transforms/SLPVectorizer/X86/schedule-bundle.ll | |
parent | d990c2a9e23f1b8d6d7ee17c2d1f46202d70b88e (diff) | |
download | bcm5719-llvm-2534592b9fe436004fa7fd7da73cbb3a00902b9c.tar.gz bcm5719-llvm-2534592b9fe436004fa7fd7da73cbb3a00902b9c.zip |
Temporarily Revert "[X86][SLP] Enable SLP vectorization for 128-bit horizontal X86 instructions (add, sub)"
As this has broken the lto bootstrap build for 3 days and is
showing a significant regression on the Dither_benchmark results (from
the LLVM benchmark suite) -- specifically, on the
BENCHMARK_FLOYD_DITHER_128, BENCHMARK_FLOYD_DITHER_256, and
BENCHMARK_FLOYD_DITHER_512; the others are unchanged. These have
regressed by about 28% on Skylake, 34% on Haswell, and over 40% on
Sandybridge.
This reverts commit r353923.
llvm-svn: 354434
Diffstat (limited to 'llvm/test/Transforms/SLPVectorizer/X86/schedule-bundle.ll')
-rw-r--r-- | llvm/test/Transforms/SLPVectorizer/X86/schedule-bundle.ll | 12 |
1 files changed, 8 insertions, 4 deletions
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/schedule-bundle.ll b/llvm/test/Transforms/SLPVectorizer/X86/schedule-bundle.ll index 60b2aa5ba73..3abde37048f 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/schedule-bundle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/schedule-bundle.ll @@ -14,10 +14,14 @@ define i32 @slp_schedule_bundle() local_unnamed_addr #0 { ; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[TMP0]], <i32 31, i32 31, i32 31, i32 31> ; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i32> <i32 1, i32 1, i32 1, i32 1>, [[TMP1]] ; CHECK-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([1 x i32]* @a to <4 x i32>*), align 4 -; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* bitcast (i32* getelementptr ([1 x i32], [1 x i32]* @b, i64 4, i64 0) to <2 x i32>*), align 4 -; CHECK-NEXT: [[TMP4:%.*]] = lshr <2 x i32> [[TMP3]], <i32 31, i32 31> -; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i32> <i32 1, i32 1>, [[TMP4]] -; CHECK-NEXT: store <2 x i32> [[TMP5]], <2 x i32>* bitcast (i32* getelementptr ([1 x i32], [1 x i32]* @a, i64 4, i64 0) to <2 x i32>*), align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr ([1 x i32], [1 x i32]* @b, i64 4, i64 0), align 4 +; CHECK-NEXT: [[DOTLOBIT_4:%.*]] = lshr i32 [[TMP3]], 31 +; CHECK-NEXT: [[DOTLOBIT_NOT_4:%.*]] = xor i32 [[DOTLOBIT_4]], 1 +; CHECK-NEXT: store i32 [[DOTLOBIT_NOT_4]], i32* getelementptr ([1 x i32], [1 x i32]* @a, i64 4, i64 0), align 4 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* getelementptr ([1 x i32], [1 x i32]* @b, i64 5, i64 0), align 4 +; CHECK-NEXT: [[DOTLOBIT_5:%.*]] = lshr i32 [[TMP4]], 31 +; CHECK-NEXT: [[DOTLOBIT_NOT_5:%.*]] = xor i32 [[DOTLOBIT_5]], 1 +; CHECK-NEXT: store i32 [[DOTLOBIT_NOT_5]], i32* getelementptr ([1 x i32], [1 x i32]* @a, i64 5, i64 0), align 4 ; CHECK-NEXT: ret i32 undef ; entry: |