summaryrefslogtreecommitdiffstats
path: root/llvm/test/Transforms/SLPVectorizer/X86/resched.ll
diff options
context:
space:
mode:
authorEric Christopher <echristo@gmail.com>2019-02-20 04:42:07 +0000
committerEric Christopher <echristo@gmail.com>2019-02-20 04:42:07 +0000
commit2534592b9fe436004fa7fd7da73cbb3a00902b9c (patch)
tree18fa4ff46b46043b2859cf03446f62c35a3792eb /llvm/test/Transforms/SLPVectorizer/X86/resched.ll
parentd990c2a9e23f1b8d6d7ee17c2d1f46202d70b88e (diff)
downloadbcm5719-llvm-2534592b9fe436004fa7fd7da73cbb3a00902b9c.tar.gz
bcm5719-llvm-2534592b9fe436004fa7fd7da73cbb3a00902b9c.zip
Temporarily Revert "[X86][SLP] Enable SLP vectorization for 128-bit horizontal X86 instructions (add, sub)"
As this has broken the lto bootstrap build for 3 days and is showing a significant regression on the Dither_benchmark results (from the LLVM benchmark suite) -- specifically, on the BENCHMARK_FLOYD_DITHER_128, BENCHMARK_FLOYD_DITHER_256, and BENCHMARK_FLOYD_DITHER_512; the others are unchanged. These have regressed by about 28% on Skylake, 34% on Haswell, and over 40% on Sandybridge. This reverts commit r353923. llvm-svn: 354434
Diffstat (limited to 'llvm/test/Transforms/SLPVectorizer/X86/resched.ll')
-rw-r--r--llvm/test/Transforms/SLPVectorizer/X86/resched.ll71
1 files changed, 34 insertions, 37 deletions
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/resched.ll b/llvm/test/Transforms/SLPVectorizer/X86/resched.ll
index 848b70c4d86..b8b1ff00db4 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/resched.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/resched.ll
@@ -38,47 +38,44 @@ define fastcc void @_ZN12_GLOBAL__N_127PolynomialMultiplyRecognize9recognizeEv()
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[CONV31_I]], i32 3
; CHECK-NEXT: [[TMP14:%.*]] = lshr <4 x i32> [[TMP13]], <i32 9, i32 10, i32 11, i32 12>
; CHECK-NEXT: [[ARRAYIDX_I_I7_12_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 12
+; CHECK-NEXT: [[SHR_12_I_I:%.*]] = lshr i32 [[CONV31_I]], 13
; CHECK-NEXT: [[ARRAYIDX_I_I7_13_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 13
-; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x i32> undef, i32 [[CONV31_I]], i32 0
-; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> [[TMP15]], i32 [[CONV31_I]], i32 1
-; CHECK-NEXT: [[TMP17:%.*]] = lshr <2 x i32> [[TMP16]], <i32 13, i32 14>
+; CHECK-NEXT: [[SHR_13_I_I:%.*]] = lshr i32 [[CONV31_I]], 14
; CHECK-NEXT: [[ARRAYIDX_I_I7_14_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 14
; CHECK-NEXT: [[SHR_14_I_I:%.*]] = lshr i32 [[CONV31_I]], 15
-; CHECK-NEXT: [[TMP18:%.*]] = insertelement <16 x i32> undef, i32 [[SUB_I]], i32 0
-; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i32> [[TMP9]], i32 0
-; CHECK-NEXT: [[TMP20:%.*]] = insertelement <16 x i32> [[TMP18]], i32 [[TMP19]], i32 1
-; CHECK-NEXT: [[TMP21:%.*]] = extractelement <8 x i32> [[TMP9]], i32 1
-; CHECK-NEXT: [[TMP22:%.*]] = insertelement <16 x i32> [[TMP20]], i32 [[TMP21]], i32 2
-; CHECK-NEXT: [[TMP23:%.*]] = extractelement <8 x i32> [[TMP9]], i32 2
-; CHECK-NEXT: [[TMP24:%.*]] = insertelement <16 x i32> [[TMP22]], i32 [[TMP23]], i32 3
-; CHECK-NEXT: [[TMP25:%.*]] = extractelement <8 x i32> [[TMP9]], i32 3
-; CHECK-NEXT: [[TMP26:%.*]] = insertelement <16 x i32> [[TMP24]], i32 [[TMP25]], i32 4
-; CHECK-NEXT: [[TMP27:%.*]] = extractelement <8 x i32> [[TMP9]], i32 4
-; CHECK-NEXT: [[TMP28:%.*]] = insertelement <16 x i32> [[TMP26]], i32 [[TMP27]], i32 5
-; CHECK-NEXT: [[TMP29:%.*]] = extractelement <8 x i32> [[TMP9]], i32 5
-; CHECK-NEXT: [[TMP30:%.*]] = insertelement <16 x i32> [[TMP28]], i32 [[TMP29]], i32 6
-; CHECK-NEXT: [[TMP31:%.*]] = extractelement <8 x i32> [[TMP9]], i32 6
-; CHECK-NEXT: [[TMP32:%.*]] = insertelement <16 x i32> [[TMP30]], i32 [[TMP31]], i32 7
-; CHECK-NEXT: [[TMP33:%.*]] = extractelement <8 x i32> [[TMP9]], i32 7
-; CHECK-NEXT: [[TMP34:%.*]] = insertelement <16 x i32> [[TMP32]], i32 [[TMP33]], i32 8
-; CHECK-NEXT: [[TMP35:%.*]] = extractelement <4 x i32> [[TMP14]], i32 0
-; CHECK-NEXT: [[TMP36:%.*]] = insertelement <16 x i32> [[TMP34]], i32 [[TMP35]], i32 9
-; CHECK-NEXT: [[TMP37:%.*]] = extractelement <4 x i32> [[TMP14]], i32 1
-; CHECK-NEXT: [[TMP38:%.*]] = insertelement <16 x i32> [[TMP36]], i32 [[TMP37]], i32 10
-; CHECK-NEXT: [[TMP39:%.*]] = extractelement <4 x i32> [[TMP14]], i32 2
-; CHECK-NEXT: [[TMP40:%.*]] = insertelement <16 x i32> [[TMP38]], i32 [[TMP39]], i32 11
-; CHECK-NEXT: [[TMP41:%.*]] = extractelement <4 x i32> [[TMP14]], i32 3
-; CHECK-NEXT: [[TMP42:%.*]] = insertelement <16 x i32> [[TMP40]], i32 [[TMP41]], i32 12
-; CHECK-NEXT: [[TMP43:%.*]] = extractelement <2 x i32> [[TMP17]], i32 0
-; CHECK-NEXT: [[TMP44:%.*]] = insertelement <16 x i32> [[TMP42]], i32 [[TMP43]], i32 13
-; CHECK-NEXT: [[TMP45:%.*]] = extractelement <2 x i32> [[TMP17]], i32 1
-; CHECK-NEXT: [[TMP46:%.*]] = insertelement <16 x i32> [[TMP44]], i32 [[TMP45]], i32 14
-; CHECK-NEXT: [[TMP47:%.*]] = insertelement <16 x i32> [[TMP46]], i32 [[SHR_14_I_I]], i32 15
-; CHECK-NEXT: [[TMP48:%.*]] = trunc <16 x i32> [[TMP47]] to <16 x i8>
-; CHECK-NEXT: [[TMP49:%.*]] = and <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, [[TMP48]]
+; CHECK-NEXT: [[TMP15:%.*]] = insertelement <16 x i32> undef, i32 [[SUB_I]], i32 0
+; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[TMP9]], i32 0
+; CHECK-NEXT: [[TMP17:%.*]] = insertelement <16 x i32> [[TMP15]], i32 [[TMP16]], i32 1
+; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i32> [[TMP9]], i32 1
+; CHECK-NEXT: [[TMP19:%.*]] = insertelement <16 x i32> [[TMP17]], i32 [[TMP18]], i32 2
+; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i32> [[TMP9]], i32 2
+; CHECK-NEXT: [[TMP21:%.*]] = insertelement <16 x i32> [[TMP19]], i32 [[TMP20]], i32 3
+; CHECK-NEXT: [[TMP22:%.*]] = extractelement <8 x i32> [[TMP9]], i32 3
+; CHECK-NEXT: [[TMP23:%.*]] = insertelement <16 x i32> [[TMP21]], i32 [[TMP22]], i32 4
+; CHECK-NEXT: [[TMP24:%.*]] = extractelement <8 x i32> [[TMP9]], i32 4
+; CHECK-NEXT: [[TMP25:%.*]] = insertelement <16 x i32> [[TMP23]], i32 [[TMP24]], i32 5
+; CHECK-NEXT: [[TMP26:%.*]] = extractelement <8 x i32> [[TMP9]], i32 5
+; CHECK-NEXT: [[TMP27:%.*]] = insertelement <16 x i32> [[TMP25]], i32 [[TMP26]], i32 6
+; CHECK-NEXT: [[TMP28:%.*]] = extractelement <8 x i32> [[TMP9]], i32 6
+; CHECK-NEXT: [[TMP29:%.*]] = insertelement <16 x i32> [[TMP27]], i32 [[TMP28]], i32 7
+; CHECK-NEXT: [[TMP30:%.*]] = extractelement <8 x i32> [[TMP9]], i32 7
+; CHECK-NEXT: [[TMP31:%.*]] = insertelement <16 x i32> [[TMP29]], i32 [[TMP30]], i32 8
+; CHECK-NEXT: [[TMP32:%.*]] = extractelement <4 x i32> [[TMP14]], i32 0
+; CHECK-NEXT: [[TMP33:%.*]] = insertelement <16 x i32> [[TMP31]], i32 [[TMP32]], i32 9
+; CHECK-NEXT: [[TMP34:%.*]] = extractelement <4 x i32> [[TMP14]], i32 1
+; CHECK-NEXT: [[TMP35:%.*]] = insertelement <16 x i32> [[TMP33]], i32 [[TMP34]], i32 10
+; CHECK-NEXT: [[TMP36:%.*]] = extractelement <4 x i32> [[TMP14]], i32 2
+; CHECK-NEXT: [[TMP37:%.*]] = insertelement <16 x i32> [[TMP35]], i32 [[TMP36]], i32 11
+; CHECK-NEXT: [[TMP38:%.*]] = extractelement <4 x i32> [[TMP14]], i32 3
+; CHECK-NEXT: [[TMP39:%.*]] = insertelement <16 x i32> [[TMP37]], i32 [[TMP38]], i32 12
+; CHECK-NEXT: [[TMP40:%.*]] = insertelement <16 x i32> [[TMP39]], i32 [[SHR_12_I_I]], i32 13
+; CHECK-NEXT: [[TMP41:%.*]] = insertelement <16 x i32> [[TMP40]], i32 [[SHR_13_I_I]], i32 14
+; CHECK-NEXT: [[TMP42:%.*]] = insertelement <16 x i32> [[TMP41]], i32 [[SHR_14_I_I]], i32 15
+; CHECK-NEXT: [[TMP43:%.*]] = trunc <16 x i32> [[TMP42]] to <16 x i8>
+; CHECK-NEXT: [[TMP44:%.*]] = and <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, [[TMP43]]
; CHECK-NEXT: [[ARRAYIDX_I_I7_15_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 15
-; CHECK-NEXT: [[TMP50:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
-; CHECK-NEXT: store <16 x i8> [[TMP49]], <16 x i8>* [[TMP50]], align 1
+; CHECK-NEXT: [[TMP45:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
+; CHECK-NEXT: store <16 x i8> [[TMP44]], <16 x i8>* [[TMP45]], align 1
; CHECK-NEXT: unreachable
; CHECK: if.end50.i:
; CHECK-NEXT: ret void
OpenPOWER on IntegriCloud