diff options
author | Eric Christopher <echristo@gmail.com> | 2019-02-20 04:42:07 +0000 |
---|---|---|
committer | Eric Christopher <echristo@gmail.com> | 2019-02-20 04:42:07 +0000 |
commit | 2534592b9fe436004fa7fd7da73cbb3a00902b9c (patch) | |
tree | 18fa4ff46b46043b2859cf03446f62c35a3792eb /llvm/test/Transforms/SLPVectorizer/X86/resched.ll | |
parent | d990c2a9e23f1b8d6d7ee17c2d1f46202d70b88e (diff) | |
download | bcm5719-llvm-2534592b9fe436004fa7fd7da73cbb3a00902b9c.tar.gz bcm5719-llvm-2534592b9fe436004fa7fd7da73cbb3a00902b9c.zip |
Temporarily Revert "[X86][SLP] Enable SLP vectorization for 128-bit horizontal X86 instructions (add, sub)"
As this has broken the lto bootstrap build for 3 days and is
showing a significant regression on the Dither_benchmark results (from
the LLVM benchmark suite) -- specifically, on the
BENCHMARK_FLOYD_DITHER_128, BENCHMARK_FLOYD_DITHER_256, and
BENCHMARK_FLOYD_DITHER_512; the others are unchanged. These have
regressed by about 28% on Skylake, 34% on Haswell, and over 40% on
Sandybridge.
This reverts commit r353923.
llvm-svn: 354434
Diffstat (limited to 'llvm/test/Transforms/SLPVectorizer/X86/resched.ll')
-rw-r--r-- | llvm/test/Transforms/SLPVectorizer/X86/resched.ll | 71 |
1 files changed, 34 insertions, 37 deletions
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/resched.ll b/llvm/test/Transforms/SLPVectorizer/X86/resched.ll index 848b70c4d86..b8b1ff00db4 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/resched.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/resched.ll @@ -38,47 +38,44 @@ define fastcc void @_ZN12_GLOBAL__N_127PolynomialMultiplyRecognize9recognizeEv() ; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[CONV31_I]], i32 3 ; CHECK-NEXT: [[TMP14:%.*]] = lshr <4 x i32> [[TMP13]], <i32 9, i32 10, i32 11, i32 12> ; CHECK-NEXT: [[ARRAYIDX_I_I7_12_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 12 +; CHECK-NEXT: [[SHR_12_I_I:%.*]] = lshr i32 [[CONV31_I]], 13 ; CHECK-NEXT: [[ARRAYIDX_I_I7_13_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 13 -; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x i32> undef, i32 [[CONV31_I]], i32 0 -; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> [[TMP15]], i32 [[CONV31_I]], i32 1 -; CHECK-NEXT: [[TMP17:%.*]] = lshr <2 x i32> [[TMP16]], <i32 13, i32 14> +; CHECK-NEXT: [[SHR_13_I_I:%.*]] = lshr i32 [[CONV31_I]], 14 ; CHECK-NEXT: [[ARRAYIDX_I_I7_14_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 14 ; CHECK-NEXT: [[SHR_14_I_I:%.*]] = lshr i32 [[CONV31_I]], 15 -; CHECK-NEXT: [[TMP18:%.*]] = insertelement <16 x i32> undef, i32 [[SUB_I]], i32 0 -; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i32> [[TMP9]], i32 0 -; CHECK-NEXT: [[TMP20:%.*]] = insertelement <16 x i32> [[TMP18]], i32 [[TMP19]], i32 1 -; CHECK-NEXT: [[TMP21:%.*]] = extractelement <8 x i32> [[TMP9]], i32 1 -; CHECK-NEXT: [[TMP22:%.*]] = insertelement <16 x i32> [[TMP20]], i32 [[TMP21]], i32 2 -; CHECK-NEXT: [[TMP23:%.*]] = extractelement <8 x i32> [[TMP9]], i32 2 -; CHECK-NEXT: [[TMP24:%.*]] = insertelement <16 x i32> [[TMP22]], i32 [[TMP23]], i32 3 -; CHECK-NEXT: [[TMP25:%.*]] = extractelement <8 x i32> [[TMP9]], i32 3 -; CHECK-NEXT: [[TMP26:%.*]] = insertelement <16 x i32> [[TMP24]], i32 [[TMP25]], i32 4 -; CHECK-NEXT: [[TMP27:%.*]] = extractelement <8 x i32> [[TMP9]], i32 4 -; CHECK-NEXT: [[TMP28:%.*]] = insertelement <16 x i32> [[TMP26]], i32 [[TMP27]], i32 5 -; CHECK-NEXT: [[TMP29:%.*]] = extractelement <8 x i32> [[TMP9]], i32 5 -; CHECK-NEXT: [[TMP30:%.*]] = insertelement <16 x i32> [[TMP28]], i32 [[TMP29]], i32 6 -; CHECK-NEXT: [[TMP31:%.*]] = extractelement <8 x i32> [[TMP9]], i32 6 -; CHECK-NEXT: [[TMP32:%.*]] = insertelement <16 x i32> [[TMP30]], i32 [[TMP31]], i32 7 -; CHECK-NEXT: [[TMP33:%.*]] = extractelement <8 x i32> [[TMP9]], i32 7 -; CHECK-NEXT: [[TMP34:%.*]] = insertelement <16 x i32> [[TMP32]], i32 [[TMP33]], i32 8 -; CHECK-NEXT: [[TMP35:%.*]] = extractelement <4 x i32> [[TMP14]], i32 0 -; CHECK-NEXT: [[TMP36:%.*]] = insertelement <16 x i32> [[TMP34]], i32 [[TMP35]], i32 9 -; CHECK-NEXT: [[TMP37:%.*]] = extractelement <4 x i32> [[TMP14]], i32 1 -; CHECK-NEXT: [[TMP38:%.*]] = insertelement <16 x i32> [[TMP36]], i32 [[TMP37]], i32 10 -; CHECK-NEXT: [[TMP39:%.*]] = extractelement <4 x i32> [[TMP14]], i32 2 -; CHECK-NEXT: [[TMP40:%.*]] = insertelement <16 x i32> [[TMP38]], i32 [[TMP39]], i32 11 -; CHECK-NEXT: [[TMP41:%.*]] = extractelement <4 x i32> [[TMP14]], i32 3 -; CHECK-NEXT: [[TMP42:%.*]] = insertelement <16 x i32> [[TMP40]], i32 [[TMP41]], i32 12 -; CHECK-NEXT: [[TMP43:%.*]] = extractelement <2 x i32> [[TMP17]], i32 0 -; CHECK-NEXT: [[TMP44:%.*]] = insertelement <16 x i32> [[TMP42]], i32 [[TMP43]], i32 13 -; CHECK-NEXT: [[TMP45:%.*]] = extractelement <2 x i32> [[TMP17]], i32 1 -; CHECK-NEXT: [[TMP46:%.*]] = insertelement <16 x i32> [[TMP44]], i32 [[TMP45]], i32 14 -; CHECK-NEXT: [[TMP47:%.*]] = insertelement <16 x i32> [[TMP46]], i32 [[SHR_14_I_I]], i32 15 -; CHECK-NEXT: [[TMP48:%.*]] = trunc <16 x i32> [[TMP47]] to <16 x i8> -; CHECK-NEXT: [[TMP49:%.*]] = and <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, [[TMP48]] +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <16 x i32> undef, i32 [[SUB_I]], i32 0 +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[TMP9]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <16 x i32> [[TMP15]], i32 [[TMP16]], i32 1 +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i32> [[TMP9]], i32 1 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <16 x i32> [[TMP17]], i32 [[TMP18]], i32 2 +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i32> [[TMP9]], i32 2 +; CHECK-NEXT: [[TMP21:%.*]] = insertelement <16 x i32> [[TMP19]], i32 [[TMP20]], i32 3 +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <8 x i32> [[TMP9]], i32 3 +; CHECK-NEXT: [[TMP23:%.*]] = insertelement <16 x i32> [[TMP21]], i32 [[TMP22]], i32 4 +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <8 x i32> [[TMP9]], i32 4 +; CHECK-NEXT: [[TMP25:%.*]] = insertelement <16 x i32> [[TMP23]], i32 [[TMP24]], i32 5 +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <8 x i32> [[TMP9]], i32 5 +; CHECK-NEXT: [[TMP27:%.*]] = insertelement <16 x i32> [[TMP25]], i32 [[TMP26]], i32 6 +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <8 x i32> [[TMP9]], i32 6 +; CHECK-NEXT: [[TMP29:%.*]] = insertelement <16 x i32> [[TMP27]], i32 [[TMP28]], i32 7 +; CHECK-NEXT: [[TMP30:%.*]] = extractelement <8 x i32> [[TMP9]], i32 7 +; CHECK-NEXT: [[TMP31:%.*]] = insertelement <16 x i32> [[TMP29]], i32 [[TMP30]], i32 8 +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <4 x i32> [[TMP14]], i32 0 +; CHECK-NEXT: [[TMP33:%.*]] = insertelement <16 x i32> [[TMP31]], i32 [[TMP32]], i32 9 +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <4 x i32> [[TMP14]], i32 1 +; CHECK-NEXT: [[TMP35:%.*]] = insertelement <16 x i32> [[TMP33]], i32 [[TMP34]], i32 10 +; CHECK-NEXT: [[TMP36:%.*]] = extractelement <4 x i32> [[TMP14]], i32 2 +; CHECK-NEXT: [[TMP37:%.*]] = insertelement <16 x i32> [[TMP35]], i32 [[TMP36]], i32 11 +; CHECK-NEXT: [[TMP38:%.*]] = extractelement <4 x i32> [[TMP14]], i32 3 +; CHECK-NEXT: [[TMP39:%.*]] = insertelement <16 x i32> [[TMP37]], i32 [[TMP38]], i32 12 +; CHECK-NEXT: [[TMP40:%.*]] = insertelement <16 x i32> [[TMP39]], i32 [[SHR_12_I_I]], i32 13 +; CHECK-NEXT: [[TMP41:%.*]] = insertelement <16 x i32> [[TMP40]], i32 [[SHR_13_I_I]], i32 14 +; CHECK-NEXT: [[TMP42:%.*]] = insertelement <16 x i32> [[TMP41]], i32 [[SHR_14_I_I]], i32 15 +; CHECK-NEXT: [[TMP43:%.*]] = trunc <16 x i32> [[TMP42]] to <16 x i8> +; CHECK-NEXT: [[TMP44:%.*]] = and <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, [[TMP43]] ; CHECK-NEXT: [[ARRAYIDX_I_I7_15_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 15 -; CHECK-NEXT: [[TMP50:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* -; CHECK-NEXT: store <16 x i8> [[TMP49]], <16 x i8>* [[TMP50]], align 1 +; CHECK-NEXT: [[TMP45:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* +; CHECK-NEXT: store <16 x i8> [[TMP44]], <16 x i8>* [[TMP45]], align 1 ; CHECK-NEXT: unreachable ; CHECK: if.end50.i: ; CHECK-NEXT: ret void |