summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFlorian Hahn <flo@fhahn.com>2020-02-18 17:23:01 +0100
committerHans Wennborg <hans@chromium.org>2020-02-19 13:38:53 +0100
commita0fe9e0517637c97f4f282bd3b76f2406a6cc1b2 (patch)
treec2d86c204ee7b7086184cabdfc3397cf9414b870
parentbe45a5a4092d678c992ac5d32e4b41da9367989a (diff)
downloadbcm5719-llvm-a0fe9e0517637c97f4f282bd3b76f2406a6cc1b2.tar.gz
bcm5719-llvm-a0fe9e0517637c97f4f282bd3b76f2406a6cc1b2.zip
[SLPVectorizer] Do not assume extracelement idx is a ConstantInt.
The index of an ExtractElementInst is not guaranteed to be a ConstantInt. It can be any integer value. Check explicitly for ConstantInts. The new test cases illustrate scenarios where we crash without this patch. I've also added another test case to check the matching of extractelement vector ops works. Reviewers: RKSimon, ABataev, dtemirbulatov, vporpo Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D74758 (cherry picked from commit e32522ca178acc42e26f21d64ef8fc180ad772bd)
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp11
-rw-r--r--llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll150
2 files changed, 155 insertions, 6 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 479bca83b51..26cae4134eb 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -832,13 +832,12 @@ public:
// Extracts from consecutive indexes of the same vector better score as
// the extracts could be optimized away.
- auto *Ex1 = dyn_cast<ExtractElementInst>(V1);
- auto *Ex2 = dyn_cast<ExtractElementInst>(V2);
- if (Ex1 && Ex2 && Ex1->getVectorOperand() == Ex2->getVectorOperand() &&
- cast<ConstantInt>(Ex1->getIndexOperand())->getZExtValue() + 1 ==
- cast<ConstantInt>(Ex2->getIndexOperand())->getZExtValue()) {
+ Value *EV;
+ ConstantInt *Ex1Idx, *Ex2Idx;
+ if (match(V1, m_ExtractElement(m_Value(EV), m_ConstantInt(Ex1Idx))) &&
+ match(V2, m_ExtractElement(m_Deferred(EV), m_ConstantInt(Ex2Idx))) &&
+ Ex1Idx->getZExtValue() + 1 == Ex2Idx->getZExtValue())
return VLOperands::ScoreConsecutiveExtracts;
- }
auto *I1 = dyn_cast<Instruction>(V1);
auto *I2 = dyn_cast<Instruction>(V2);
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll b/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll
index 4217d730375..75165308959 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll
@@ -494,3 +494,153 @@ define void @ChecksExtractScores(double* %storeArray, double* %array, <2 x doubl
store double %add1, double *%sidx1, align 8
ret void
}
+
+
+define i1 @ExtractIdxNotConstantInt1(float %a, float %b, float %c, <4 x float> %vec, i64 %idx2) {
+; CHECK-LABEL: @ExtractIdxNotConstantInt1(
+; CHECK-NEXT: [[VECEXT_I291_I166:%.*]] = extractelement <4 x float> [[VEC:%.*]], i64 undef
+; CHECK-NEXT: [[SUB14_I167:%.*]] = fsub float undef, [[VECEXT_I291_I166]]
+; CHECK-NEXT: [[FM:%.*]] = fmul float [[A:%.*]], [[SUB14_I167]]
+; CHECK-NEXT: [[SUB25_I168:%.*]] = fsub float [[FM]], [[B:%.*]]
+; CHECK-NEXT: [[VECEXT_I276_I169:%.*]] = extractelement <4 x float> [[VEC]], i64 [[IDX2:%.*]]
+; CHECK-NEXT: [[ADD36_I173:%.*]] = fadd float [[SUB25_I168]], 1.000000e+01
+; CHECK-NEXT: [[MUL72_I179:%.*]] = fmul float [[C:%.*]], [[VECEXT_I276_I169]]
+; CHECK-NEXT: [[ADD78_I180:%.*]] = fsub float [[MUL72_I179]], 3.000000e+01
+; CHECK-NEXT: [[ADD79_I181:%.*]] = fadd float 2.000000e+00, [[ADD78_I180]]
+; CHECK-NEXT: [[MUL123_I184:%.*]] = fmul float [[ADD36_I173]], [[ADD79_I181]]
+; CHECK-NEXT: [[CMP_I185:%.*]] = fcmp ogt float [[MUL123_I184]], 0.000000e+00
+; CHECK-NEXT: ret i1 [[CMP_I185]]
+;
+ %vecext.i291.i166 = extractelement <4 x float> %vec, i64 undef
+ %sub14.i167 = fsub float undef, %vecext.i291.i166
+ %fm = fmul float %a, %sub14.i167
+ %sub25.i168 = fsub float %fm, %b
+ %vecext.i276.i169 = extractelement <4 x float> %vec, i64 %idx2
+ %add36.i173 = fadd float %sub25.i168, 10.0
+ %mul72.i179 = fmul float %c, %vecext.i276.i169
+ %add78.i180 = fsub float %mul72.i179, 30.0
+ %add79.i181 = fadd float 2.0, %add78.i180
+ %mul123.i184 = fmul float %add36.i173, %add79.i181
+ %cmp.i185 = fcmp ogt float %mul123.i184, 0.000000e+00
+ ret i1 %cmp.i185
+}
+
+
+define i1 @ExtractIdxNotConstantInt2(float %a, float %b, float %c, <4 x float> %vec, i64 %idx2) {
+; CHECK-LABEL: @ExtractIdxNotConstantInt2(
+; CHECK-NEXT: [[VECEXT_I291_I166:%.*]] = extractelement <4 x float> [[VEC:%.*]], i64 1
+; CHECK-NEXT: [[SUB14_I167:%.*]] = fsub float undef, [[VECEXT_I291_I166]]
+; CHECK-NEXT: [[FM:%.*]] = fmul float [[A:%.*]], [[SUB14_I167]]
+; CHECK-NEXT: [[SUB25_I168:%.*]] = fsub float [[FM]], [[B:%.*]]
+; CHECK-NEXT: [[VECEXT_I276_I169:%.*]] = extractelement <4 x float> [[VEC]], i64 [[IDX2:%.*]]
+; CHECK-NEXT: [[ADD36_I173:%.*]] = fadd float [[SUB25_I168]], 1.000000e+01
+; CHECK-NEXT: [[MUL72_I179:%.*]] = fmul float [[C:%.*]], [[VECEXT_I276_I169]]
+; CHECK-NEXT: [[ADD78_I180:%.*]] = fsub float [[MUL72_I179]], 3.000000e+01
+; CHECK-NEXT: [[ADD79_I181:%.*]] = fadd float 2.000000e+00, [[ADD78_I180]]
+; CHECK-NEXT: [[MUL123_I184:%.*]] = fmul float [[ADD36_I173]], [[ADD79_I181]]
+; CHECK-NEXT: [[CMP_I185:%.*]] = fcmp ogt float [[MUL123_I184]], 0.000000e+00
+; CHECK-NEXT: ret i1 [[CMP_I185]]
+;
+ %vecext.i291.i166 = extractelement <4 x float> %vec, i64 1
+ %sub14.i167 = fsub float undef, %vecext.i291.i166
+ %fm = fmul float %a, %sub14.i167
+ %sub25.i168 = fsub float %fm, %b
+ %vecext.i276.i169 = extractelement <4 x float> %vec, i64 %idx2
+ %add36.i173 = fadd float %sub25.i168, 10.0
+ %mul72.i179 = fmul float %c, %vecext.i276.i169
+ %add78.i180 = fsub float %mul72.i179, 30.0
+ %add79.i181 = fadd float 2.0, %add78.i180
+ %mul123.i184 = fmul float %add36.i173, %add79.i181
+ %cmp.i185 = fcmp ogt float %mul123.i184, 0.000000e+00
+ ret i1 %cmp.i185
+}
+
+
+define i1 @foo(float %a, float %b, float %c, <4 x float> %vec, i64 %idx2) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT: [[VECEXT_I291_I166:%.*]] = extractelement <4 x float> [[VEC:%.*]], i64 0
+; CHECK-NEXT: [[SUB14_I167:%.*]] = fsub float undef, [[VECEXT_I291_I166]]
+; CHECK-NEXT: [[FM:%.*]] = fmul float [[A:%.*]], [[SUB14_I167]]
+; CHECK-NEXT: [[SUB25_I168:%.*]] = fsub float [[FM]], [[B:%.*]]
+; CHECK-NEXT: [[VECEXT_I276_I169:%.*]] = extractelement <4 x float> [[VEC]], i64 1
+; CHECK-NEXT: [[ADD36_I173:%.*]] = fadd float [[SUB25_I168]], 1.000000e+01
+; CHECK-NEXT: [[MUL72_I179:%.*]] = fmul float [[C:%.*]], [[VECEXT_I276_I169]]
+; CHECK-NEXT: [[ADD78_I180:%.*]] = fsub float [[MUL72_I179]], 3.000000e+01
+; CHECK-NEXT: [[ADD79_I181:%.*]] = fadd float 2.000000e+00, [[ADD78_I180]]
+; CHECK-NEXT: [[MUL123_I184:%.*]] = fmul float [[ADD36_I173]], [[ADD79_I181]]
+; CHECK-NEXT: [[CMP_I185:%.*]] = fcmp ogt float [[MUL123_I184]], 0.000000e+00
+; CHECK-NEXT: ret i1 [[CMP_I185]]
+;
+ %vecext.i291.i166 = extractelement <4 x float> %vec, i64 0
+ %sub14.i167 = fsub float undef, %vecext.i291.i166
+ %fm = fmul float %a, %sub14.i167
+ %sub25.i168 = fsub float %fm, %b
+ %vecext.i276.i169 = extractelement <4 x float> %vec, i64 1
+ %add36.i173 = fadd float %sub25.i168, 10.0
+ %mul72.i179 = fmul float %c, %vecext.i276.i169
+ %add78.i180 = fsub float %mul72.i179, 30.0
+ %add79.i181 = fadd float 2.0, %add78.i180
+ %mul123.i184 = fmul float %add36.i173, %add79.i181
+ %cmp.i185 = fcmp ogt float %mul123.i184, 0.000000e+00
+ ret i1 %cmp.i185
+}
+
+; Same as @ChecksExtractScores, but the extratelement vector operands do not match.
+define void @ChecksExtractScores_different_vectors(double* %storeArray, double* %array, <2 x double> *%vecPtr1, <2 x double>* %vecPtr2, <2 x double>* %vecPtr3, <2 x double>* %vecPtr4) {
+; CHECK-LABEL: @ChecksExtractScores_different_vectors(
+; CHECK-NEXT: [[IDX0:%.*]] = getelementptr inbounds double, double* [[ARRAY:%.*]], i64 0
+; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 1
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[IDX0]] to <2 x double>*
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 4
+; CHECK-NEXT: [[LOADVEC:%.*]] = load <2 x double>, <2 x double>* [[VECPTR1:%.*]], align 4
+; CHECK-NEXT: [[LOADVEC2:%.*]] = load <2 x double>, <2 x double>* [[VECPTR2:%.*]], align 4
+; CHECK-NEXT: [[EXTRA0:%.*]] = extractelement <2 x double> [[LOADVEC]], i32 0
+; CHECK-NEXT: [[EXTRA1:%.*]] = extractelement <2 x double> [[LOADVEC2]], i32 1
+; CHECK-NEXT: [[LOADVEC3:%.*]] = load <2 x double>, <2 x double>* [[VECPTR3:%.*]], align 4
+; CHECK-NEXT: [[LOADVEC4:%.*]] = load <2 x double>, <2 x double>* [[VECPTR4:%.*]], align 4
+; CHECK-NEXT: [[EXTRB0:%.*]] = extractelement <2 x double> [[LOADVEC3]], i32 0
+; CHECK-NEXT: [[EXTRB1:%.*]] = extractelement <2 x double> [[LOADVEC4]], i32 1
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> undef, double [[EXTRB0]], i32 0
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[EXTRA1]], i32 1
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP2]], i32 1
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> undef, double [[TMP5]], i32 0
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP2]], i32 0
+; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP6]], double [[TMP7]], i32 1
+; CHECK-NEXT: [[TMP9:%.*]] = fmul <2 x double> [[TMP4]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x double> undef, double [[EXTRA0]], i32 0
+; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x double> [[TMP10]], double [[EXTRB1]], i32 1
+; CHECK-NEXT: [[TMP12:%.*]] = fmul <2 x double> [[TMP11]], [[TMP2]]
+; CHECK-NEXT: [[TMP13:%.*]] = fadd <2 x double> [[TMP12]], [[TMP9]]
+; CHECK-NEXT: [[SIDX0:%.*]] = getelementptr inbounds double, double* [[STOREARRAY:%.*]], i64 0
+; CHECK-NEXT: [[SIDX1:%.*]] = getelementptr inbounds double, double* [[STOREARRAY]], i64 1
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast double* [[SIDX0]] to <2 x double>*
+; CHECK-NEXT: store <2 x double> [[TMP13]], <2 x double>* [[TMP14]], align 8
+; CHECK-NEXT: ret void
+;
+ %idx0 = getelementptr inbounds double, double* %array, i64 0
+ %idx1 = getelementptr inbounds double, double* %array, i64 1
+ %loadA0 = load double, double* %idx0, align 4
+ %loadA1 = load double, double* %idx1, align 4
+
+ %loadVec = load <2 x double>, <2 x double>* %vecPtr1, align 4
+ %loadVec2 = load <2 x double>, <2 x double>* %vecPtr2, align 4
+ %extrA0 = extractelement <2 x double> %loadVec, i32 0
+ %extrA1 = extractelement <2 x double> %loadVec2, i32 1
+ %loadVec3= load <2 x double>, <2 x double>* %vecPtr3, align 4
+ %loadVec4 = load <2 x double>, <2 x double>* %vecPtr4, align 4
+ %extrB0 = extractelement <2 x double> %loadVec3, i32 0
+ %extrB1 = extractelement <2 x double> %loadVec4, i32 1
+
+ %mul0 = fmul double %extrA0, %loadA0
+ %mul1 = fmul double %extrA1, %loadA0
+ %mul3 = fmul double %extrB0, %loadA1
+ %mul4 = fmul double %extrB1, %loadA1
+ %add0 = fadd double %mul0, %mul3
+ %add1 = fadd double %mul1, %mul4
+
+ %sidx0 = getelementptr inbounds double, double* %storeArray, i64 0
+ %sidx1 = getelementptr inbounds double, double* %storeArray, i64 1
+ store double %add0, double *%sidx0, align 8
+ store double %add1, double *%sidx1, align 8
+ ret void
+}
OpenPOWER on IntegriCloud