[SLP] allow forming 2-way reduction patterns

We have a vector compare reduction problem seen in PR39665 comment 2: https://bugs.llvm.org/show_bug.cgi?id=39665#c2 Or slightly reduced here: define i1 @cmp2(<2 x double> %a0) { %a = fcmp ogt <2 x double> %a0, <double 1.0, double 1.0> %b = extractelement <2 x i1> %a, i32 0 %c = extractelement <2 x i1> %a, i32 1 %d = and i1 %b, %c ret i1 %d } SLP would not attempt to turn this into a vector reduction because there is an artificial lower limit on that transform. We can not completely remove that limit without inducing regressions though, so this patch just hacks an extra attempt at creating a 2-way reduction to the end of the analysis. As shown in the test file, we are still not getting some of the motivating cases, so follow-on patches will be needed to solve those cases. Differential Revision: https://reviews.llvm.org/D59710
author: Sanjay Patel <spatel@rotateright.com> 2019-11-06 17:24:09 -0500
committer: Sanjay Patel <spatel@rotateright.com> 2019-11-07 06:08:42 -0500
commit: 7ff57705ba196ce649d6034614b3b9df57e1f84f (patch)
tree: f64f7a3809df4ab32e919b62c531e0a2b7dacccd /llvm/test
parent: 55b445150da9101fda07a4c28ee6a4e4bc9fc89a (diff)
download: bcm5719-llvm-7ff57705ba196ce649d6034614b3b9df57e1f84f.tar.gz
bcm5719-llvm-7ff57705ba196ce649d6034614b3b9df57e1f84f.zip
2 files changed, 10 insertions, 11 deletions
diff --git a/llvm/test/Feature/weak_constant.ll b/llvm/test/Feature/weak_constant.ll
index 4ac2e7e7d68..9a2ea126ebc 100644
--- a/llvm/test/Feature/weak_constant.ll
+++ b/llvm/test/Feature/weak_constant.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -O3 -S > %t
-; RUN:   grep undef %t | count 1
+; RUN:   grep undef %t | count 2
 ; RUN:   grep 5 %t | count 1
 ; RUN:   grep 7 %t | count 1
 ; RUN:   grep 9 %t | count 1
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll
index b5f43354927..fef9a8e50cd 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll
@@ -54,10 +54,10 @@ define double @foo(double* nocapture %D) {
 define i1 @two_wide_fcmp_reduction(<2 x double> %a0) {
 ; CHECK-LABEL: @two_wide_fcmp_reduction(
 ; CHECK-NEXT:    [[A:%.*]] = fcmp ogt <2 x double> [[A0:%.*]], <double 1.000000e+00, double 1.000000e+00>
-; CHECK-NEXT:    [[B:%.*]] = extractelement <2 x i1> [[A]], i32 0
-; CHECK-NEXT:    [[C:%.*]] = extractelement <2 x i1> [[A]], i32 1
-; CHECK-NEXT:    [[D:%.*]] = and i1 [[B]], [[C]]
-; CHECK-NEXT:    ret i1 [[D]]
+; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> undef, <2 x i32> <i32 1, i32 undef>
+; CHECK-NEXT:    [[BIN_RDX:%.*]] = and <2 x i1> [[A]], [[RDX_SHUF]]
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i1> [[BIN_RDX]], i32 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
 ;
   %a = fcmp ogt <2 x double> %a0, <double 1.0, double 1.0>
   %b = extractelement <2 x i1> %a, i32 0
@@ -96,12 +96,11 @@ define i1 @fcmp_lt_gt(double %a, double %b, double %c) {
 ; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x double> undef, double [[MUL]], i32 0
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[MUL]], i32 1
 ; CHECK-NEXT:    [[TMP7:%.*]] = fdiv <2 x double> [[TMP4]], [[TMP6]]
-; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x double> [[TMP7]], i32 1
-; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt double [[TMP8]], 0x3EB0C6F7A0B5ED8D
-; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x double> [[TMP7]], i32 0
-; CHECK-NEXT:    [[CMP4:%.*]] = fcmp olt double [[TMP9]], 0x3EB0C6F7A0B5ED8D
-; CHECK-NEXT:    [[OR_COND:%.*]] = and i1 [[CMP]], [[CMP4]]
-; CHECK-NEXT:    br i1 [[OR_COND]], label [[CLEANUP:%.*]], label [[LOR_LHS_FALSE:%.*]]
+; CHECK-NEXT:    [[TMP8:%.*]] = fcmp olt <2 x double> [[TMP7]], <double 0x3EB0C6F7A0B5ED8D, double 0x3EB0C6F7A0B5ED8D>
+; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i1> [[TMP8]], <2 x i1> undef, <2 x i32> <i32 1, i32 undef>
+; CHECK-NEXT:    [[BIN_RDX:%.*]] = and <2 x i1> [[TMP8]], [[RDX_SHUF]]
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x i1> [[BIN_RDX]], i32 0
+; CHECK-NEXT:    br i1 [[TMP9]], label [[CLEANUP:%.*]], label [[LOR_LHS_FALSE:%.*]]
 ; CHECK:       lor.lhs.false:
 ; CHECK-NEXT:    [[TMP10:%.*]] = fcmp ule <2 x double> [[TMP7]], <double 1.000000e+00, double 1.000000e+00>
 ; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0
author	Sanjay Patel <spatel@rotateright.com>	2019-11-06 17:24:09 -0500
committer	Sanjay Patel <spatel@rotateright.com>	2019-11-07 06:08:42 -0500
commit	7ff57705ba196ce649d6034614b3b9df57e1f84f (patch)
tree	f64f7a3809df4ab32e919b62c531e0a2b7dacccd /llvm/test
parent	55b445150da9101fda07a4c28ee6a4e4bc9fc89a (diff)
download	bcm5719-llvm-7ff57705ba196ce649d6034614b3b9df57e1f84f.tar.gz bcm5719-llvm-7ff57705ba196ce649d6034614b3b9df57e1f84f.zip