summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2019-04-17 10:58:19 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2019-04-17 10:58:19 +0000
commit9daacec816da9eb31d3b4dda8d4ce7d102aeebcf (patch)
tree32821f18736ac7bb77edad7d2ef9b5ba254d1968 /llvm
parentcf5bdb83b02a87823a25a4cf6de5fb88b43b8135 (diff)
downloadbcm5719-llvm-9daacec816da9eb31d3b4dda8d4ce7d102aeebcf.tar.gz
bcm5719-llvm-9daacec816da9eb31d3b4dda8d4ce7d102aeebcf.zip
[CostModel][X86] Add bool anyof/allof reduction costs
On pre-AVX512 targets we can use MOVMSK to extract reduced boolean results. This is properly optimized, annoyingly AVX512 isn't and produces code that is almost as bad as the (unchanged) costs suggest...... Differential Revision: https://reviews.llvm.org/D60403 llvm-svn: 358574
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.cpp42
-rw-r--r--llvm/test/Analysis/CostModel/X86/reduce-and-widen.ll70
-rw-r--r--llvm/test/Analysis/CostModel/X86/reduce-and.ll70
-rw-r--r--llvm/test/Analysis/CostModel/X86/reduce-or-widen.ll70
-rw-r--r--llvm/test/Analysis/CostModel/X86/reduce-or.ll70
5 files changed, 138 insertions, 184 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index ca32ae093cd..b0a45fba17b 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -2489,6 +2489,48 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
return LT.first * Entry->Cost;
}
+ static const CostTblEntry AVX2BoolReduction[] = {
+ { ISD::AND, MVT::v16i16, 2 }, // vpmovmskb + cmp
+ { ISD::AND, MVT::v32i8, 2 }, // vpmovmskb + cmp
+ { ISD::OR, MVT::v16i16, 2 }, // vpmovmskb + cmp
+ { ISD::OR, MVT::v32i8, 2 }, // vpmovmskb + cmp
+ };
+
+ static const CostTblEntry AVX1BoolReduction[] = {
+ { ISD::AND, MVT::v4i64, 2 }, // vmovmskpd + cmp
+ { ISD::AND, MVT::v8i32, 2 }, // vmovmskps + cmp
+ { ISD::AND, MVT::v16i16, 4 }, // vextractf128 + vpand + vpmovmskb + cmp
+ { ISD::AND, MVT::v32i8, 4 }, // vextractf128 + vpand + vpmovmskb + cmp
+ { ISD::OR, MVT::v4i64, 2 }, // vmovmskpd + cmp
+ { ISD::OR, MVT::v8i32, 2 }, // vmovmskps + cmp
+ { ISD::OR, MVT::v16i16, 4 }, // vextractf128 + vpor + vpmovmskb + cmp
+ { ISD::OR, MVT::v32i8, 4 }, // vextractf128 + vpor + vpmovmskb + cmp
+ };
+
+ static const CostTblEntry SSE2BoolReduction[] = {
+ { ISD::AND, MVT::v2i64, 2 }, // movmskpd + cmp
+ { ISD::AND, MVT::v4i32, 2 }, // movmskps + cmp
+ { ISD::AND, MVT::v8i16, 2 }, // pmovmskb + cmp
+ { ISD::AND, MVT::v16i8, 2 }, // pmovmskb + cmp
+ { ISD::OR, MVT::v2i64, 2 }, // movmskpd + cmp
+ { ISD::OR, MVT::v4i32, 2 }, // movmskps + cmp
+ { ISD::OR, MVT::v8i16, 2 }, // pmovmskb + cmp
+ { ISD::OR, MVT::v16i8, 2 }, // pmovmskb + cmp
+ };
+
+ // Handle bool allof/anyof patterns.
+ if (ValTy->getVectorElementType()->isIntegerTy(1)) {
+ if (ST->hasAVX2())
+ if (const auto *Entry = CostTableLookup(AVX2BoolReduction, ISD, MTy))
+ return LT.first * Entry->Cost;
+ if (ST->hasAVX())
+ if (const auto *Entry = CostTableLookup(AVX1BoolReduction, ISD, MTy))
+ return LT.first * Entry->Cost;
+ if (ST->hasSSE2())
+ if (const auto *Entry = CostTableLookup(SSE2BoolReduction, ISD, MTy))
+ return LT.first * Entry->Cost;
+ }
+
return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwise);
}
diff --git a/llvm/test/Analysis/CostModel/X86/reduce-and-widen.ll b/llvm/test/Analysis/CostModel/X86/reduce-and-widen.ll
index 6f9db08b707..712d9dc9b64 100644
--- a/llvm/test/Analysis/CostModel/X86/reduce-and-widen.ll
+++ b/llvm/test/Analysis/CostModel/X86/reduce-and-widen.ll
@@ -264,59 +264,37 @@ define i32 @reduce_i8(i32 %arg) {
}
define i32 @reduce_i1(i32 %arg) {
-; SSE2-LABEL: 'reduce_i1'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; SSSE3-LABEL: 'reduce_i1'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; SSE42-LABEL: 'reduce_i1'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; SSE-LABEL: 'reduce_i1'
+; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i1'
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i1'
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'reduce_i1'
diff --git a/llvm/test/Analysis/CostModel/X86/reduce-and.ll b/llvm/test/Analysis/CostModel/X86/reduce-and.ll
index 35caf489c00..e3d90d271e9 100644
--- a/llvm/test/Analysis/CostModel/X86/reduce-and.ll
+++ b/llvm/test/Analysis/CostModel/X86/reduce-and.ll
@@ -264,59 +264,37 @@ define i32 @reduce_i8(i32 %arg) {
}
define i32 @reduce_i1(i32 %arg) {
-; SSE2-LABEL: 'reduce_i1'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; SSSE3-LABEL: 'reduce_i1'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; SSE42-LABEL: 'reduce_i1'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; SSE-LABEL: 'reduce_i1'
+; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i1'
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i1'
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'reduce_i1'
diff --git a/llvm/test/Analysis/CostModel/X86/reduce-or-widen.ll b/llvm/test/Analysis/CostModel/X86/reduce-or-widen.ll
index 8cdef17e438..45f140220ee 100644
--- a/llvm/test/Analysis/CostModel/X86/reduce-or-widen.ll
+++ b/llvm/test/Analysis/CostModel/X86/reduce-or-widen.ll
@@ -264,59 +264,37 @@ define i32 @reduce_i8(i32 %arg) {
}
define i32 @reduce_i1(i32 %arg) {
-; SSE2-LABEL: 'reduce_i1'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; SSSE3-LABEL: 'reduce_i1'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; SSE42-LABEL: 'reduce_i1'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; SSE-LABEL: 'reduce_i1'
+; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i1'
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i1'
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'reduce_i1'
diff --git a/llvm/test/Analysis/CostModel/X86/reduce-or.ll b/llvm/test/Analysis/CostModel/X86/reduce-or.ll
index ed0ef620482..8858768276a 100644
--- a/llvm/test/Analysis/CostModel/X86/reduce-or.ll
+++ b/llvm/test/Analysis/CostModel/X86/reduce-or.ll
@@ -264,59 +264,37 @@ define i32 @reduce_i8(i32 %arg) {
}
define i32 @reduce_i1(i32 %arg) {
-; SSE2-LABEL: 'reduce_i1'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; SSSE3-LABEL: 'reduce_i1'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; SSE42-LABEL: 'reduce_i1'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; SSE-LABEL: 'reduce_i1'
+; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i1'
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i1'
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'reduce_i1'
OpenPOWER on IntegriCloud