diff options
author | Sanjay Patel <spatel@rotateright.com> | 2019-11-06 17:24:09 -0500 |
---|---|---|
committer | Sanjay Patel <spatel@rotateright.com> | 2019-11-07 06:08:42 -0500 |
commit | 7ff57705ba196ce649d6034614b3b9df57e1f84f (patch) | |
tree | f64f7a3809df4ab32e919b62c531e0a2b7dacccd /llvm/test | |
parent | 55b445150da9101fda07a4c28ee6a4e4bc9fc89a (diff) | |
download | bcm5719-llvm-7ff57705ba196ce649d6034614b3b9df57e1f84f.tar.gz bcm5719-llvm-7ff57705ba196ce649d6034614b3b9df57e1f84f.zip |
[SLP] allow forming 2-way reduction patterns
We have a vector compare reduction problem seen in PR39665 comment 2:
https://bugs.llvm.org/show_bug.cgi?id=39665#c2
Or slightly reduced here:
define i1 @cmp2(<2 x double> %a0) {
%a = fcmp ogt <2 x double> %a0, <double 1.0, double 1.0>
%b = extractelement <2 x i1> %a, i32 0
%c = extractelement <2 x i1> %a, i32 1
%d = and i1 %b, %c
ret i1 %d
}
SLP would not attempt to turn this into a vector reduction because there is an
artificial lower limit on that transform. We can not completely remove that limit
without inducing regressions though, so this patch just hacks an extra attempt at
creating a 2-way reduction to the end of the analysis.
As shown in the test file, we are still not getting some of the motivating cases,
so follow-on patches will be needed to solve those cases.
Differential Revision: https://reviews.llvm.org/D59710
Diffstat (limited to 'llvm/test')
-rw-r--r-- | llvm/test/Feature/weak_constant.ll | 2 | ||||
-rw-r--r-- | llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll | 19 |
2 files changed, 10 insertions, 11 deletions
diff --git a/llvm/test/Feature/weak_constant.ll b/llvm/test/Feature/weak_constant.ll index 4ac2e7e7d68..9a2ea126ebc 100644 --- a/llvm/test/Feature/weak_constant.ll +++ b/llvm/test/Feature/weak_constant.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -O3 -S > %t -; RUN: grep undef %t | count 1 +; RUN: grep undef %t | count 2 ; RUN: grep 5 %t | count 1 ; RUN: grep 7 %t | count 1 ; RUN: grep 9 %t | count 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll index b5f43354927..fef9a8e50cd 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll @@ -54,10 +54,10 @@ define double @foo(double* nocapture %D) { define i1 @two_wide_fcmp_reduction(<2 x double> %a0) { ; CHECK-LABEL: @two_wide_fcmp_reduction( ; CHECK-NEXT: [[A:%.*]] = fcmp ogt <2 x double> [[A0:%.*]], <double 1.000000e+00, double 1.000000e+00> -; CHECK-NEXT: [[B:%.*]] = extractelement <2 x i1> [[A]], i32 0 -; CHECK-NEXT: [[C:%.*]] = extractelement <2 x i1> [[A]], i32 1 -; CHECK-NEXT: [[D:%.*]] = and i1 [[B]], [[C]] -; CHECK-NEXT: ret i1 [[D]] +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> undef, <2 x i32> <i32 1, i32 undef> +; CHECK-NEXT: [[BIN_RDX:%.*]] = and <2 x i1> [[A]], [[RDX_SHUF]] +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[BIN_RDX]], i32 0 +; CHECK-NEXT: ret i1 [[TMP1]] ; %a = fcmp ogt <2 x double> %a0, <double 1.0, double 1.0> %b = extractelement <2 x i1> %a, i32 0 @@ -96,12 +96,11 @@ define i1 @fcmp_lt_gt(double %a, double %b, double %c) { ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> undef, double [[MUL]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[MUL]], i32 1 ; CHECK-NEXT: [[TMP7:%.*]] = fdiv <2 x double> [[TMP4]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP7]], i32 1 -; CHECK-NEXT: [[CMP:%.*]] = fcmp olt double [[TMP8]], 0x3EB0C6F7A0B5ED8D -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP7]], i32 0 -; CHECK-NEXT: [[CMP4:%.*]] = fcmp olt double [[TMP9]], 0x3EB0C6F7A0B5ED8D -; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[CMP]], [[CMP4]] -; CHECK-NEXT: br i1 [[OR_COND]], label [[CLEANUP:%.*]], label [[LOR_LHS_FALSE:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = fcmp olt <2 x double> [[TMP7]], <double 0x3EB0C6F7A0B5ED8D, double 0x3EB0C6F7A0B5ED8D> +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i1> [[TMP8]], <2 x i1> undef, <2 x i32> <i32 1, i32 undef> +; CHECK-NEXT: [[BIN_RDX:%.*]] = and <2 x i1> [[TMP8]], [[RDX_SHUF]] +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[BIN_RDX]], i32 0 +; CHECK-NEXT: br i1 [[TMP9]], label [[CLEANUP:%.*]], label [[LOR_LHS_FALSE:%.*]] ; CHECK: lor.lhs.false: ; CHECK-NEXT: [[TMP10:%.*]] = fcmp ule <2 x double> [[TMP7]], <double 1.000000e+00, double 1.000000e+00> ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0 |