summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSander de Smalen <sander.desmalen@arm.com>2019-06-13 09:37:38 +0000
committerSander de Smalen <sander.desmalen@arm.com>2019-06-13 09:37:38 +0000
commit51c2fa0e2ac1ee0afdffb45ff3c2bac4bf09c3cf (patch)
tree49360c0aef3239b611cbd42593dc54c2f6470098
parent8d59f5370d46a2b9cb2714a3342eb58bacd52580 (diff)
downloadbcm5719-llvm-51c2fa0e2ac1ee0afdffb45ff3c2bac4bf09c3cf.tar.gz
bcm5719-llvm-51c2fa0e2ac1ee0afdffb45ff3c2bac4bf09c3cf.zip
Improve reduction intrinsics by overloading result value.
This patch uses the mechanism from D62995 to strengthen the definitions of the reduction intrinsics by letting the scalar result/accumulator type be overloaded from the vector element type. For example: ; The LLVM LangRef specifies that the scalar result must equal the ; vector element type, but this is not checked/enforced by LLVM. declare i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32> %a) This patch changes that into: declare i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> %a) Which has the type-constraint more explicit and causes LLVM to check the result type with the vector element type. Reviewers: RKSimon, arsenm, rnk, greened, aemerson Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D62996 llvm-svn: 363240
-rw-r--r--llvm/docs/LangRef.rst30
-rw-r--r--llvm/include/llvm/IR/Intrinsics.h8
-rw-r--r--llvm/include/llvm/IR/Intrinsics.td23
-rw-r--r--llvm/lib/IR/Function.cpp21
-rw-r--r--llvm/lib/IR/IRBuilder.cpp2
-rw-r--r--llvm/test/Analysis/CostModel/AArch64/vector-reduce.ll174
-rw-r--r--llvm/test/Analysis/CostModel/X86/reduce-add-widen.ll400
-rw-r--r--llvm/test/Analysis/CostModel/X86/reduce-add.ll400
-rw-r--r--llvm/test/Analysis/CostModel/X86/reduce-and-widen.ll508
-rw-r--r--llvm/test/Analysis/CostModel/X86/reduce-and.ll508
-rw-r--r--llvm/test/Analysis/CostModel/X86/reduce-mul-widen.ll420
-rw-r--r--llvm/test/Analysis/CostModel/X86/reduce-mul.ll440
-rw-r--r--llvm/test/Analysis/CostModel/X86/reduce-or-widen.ll508
-rw-r--r--llvm/test/Analysis/CostModel/X86/reduce-or.ll508
-rw-r--r--llvm/test/Analysis/CostModel/X86/reduce-smax-widen.ll420
-rw-r--r--llvm/test/Analysis/CostModel/X86/reduce-smax.ll420
-rw-r--r--llvm/test/Analysis/CostModel/X86/reduce-smin-widen.ll402
-rw-r--r--llvm/test/Analysis/CostModel/X86/reduce-smin.ll420
-rw-r--r--llvm/test/Analysis/CostModel/X86/reduce-umax-widen.ll420
-rw-r--r--llvm/test/Analysis/CostModel/X86/reduce-umax.ll420
-rw-r--r--llvm/test/Analysis/CostModel/X86/reduce-umin-widen.ll420
-rw-r--r--llvm/test/Analysis/CostModel/X86/reduce-umin.ll420
-rw-r--r--llvm/test/Analysis/CostModel/X86/reduce-xor-widen.ll540
-rw-r--r--llvm/test/Analysis/CostModel/X86/reduce-xor.ll540
-rw-r--r--llvm/test/CodeGen/AArch64/aarch64-addv.ll24
-rw-r--r--llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll92
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-vabs.ll12
-rw-r--r--llvm/test/CodeGen/AArch64/vecreduce-add-legalization.ll58
-rw-r--r--llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll58
-rw-r--r--llvm/test/CodeGen/AArch64/vecreduce-bool.ll48
-rw-r--r--llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll28
-rw-r--r--llvm/test/CodeGen/AArch64/vecreduce-propagate-sd-flags.ll4
-rw-r--r--llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll58
-rw-r--r--llvm/test/CodeGen/Generic/expand-experimental-reductions.ll44
-rw-r--r--llvm/test/CodeGen/X86/vector-reduce-add-widen.ll88
-rw-r--r--llvm/test/CodeGen/X86/vector-reduce-add.ll88
-rw-r--r--llvm/test/CodeGen/X86/vector-reduce-and-widen.ll88
-rw-r--r--llvm/test/CodeGen/X86/vector-reduce-and.ll88
-rw-r--r--llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll32
-rw-r--r--llvm/test/CodeGen/X86/vector-reduce-fmax.ll32
-rw-r--r--llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll32
-rw-r--r--llvm/test/CodeGen/X86/vector-reduce-fmin.ll32
-rw-r--r--llvm/test/CodeGen/X86/vector-reduce-mul-widen.ll88
-rw-r--r--llvm/test/CodeGen/X86/vector-reduce-mul.ll88
-rw-r--r--llvm/test/CodeGen/X86/vector-reduce-or-widen.ll88
-rw-r--r--llvm/test/CodeGen/X86/vector-reduce-or.ll88
-rw-r--r--llvm/test/CodeGen/X86/vector-reduce-smax-widen.ll88
-rw-r--r--llvm/test/CodeGen/X86/vector-reduce-smax.ll88
-rw-r--r--llvm/test/CodeGen/X86/vector-reduce-smin-widen.ll88
-rw-r--r--llvm/test/CodeGen/X86/vector-reduce-smin.ll88
-rw-r--r--llvm/test/CodeGen/X86/vector-reduce-umax-widen.ll88
-rw-r--r--llvm/test/CodeGen/X86/vector-reduce-umax.ll88
-rw-r--r--llvm/test/CodeGen/X86/vector-reduce-umin-widen.ll88
-rw-r--r--llvm/test/CodeGen/X86/vector-reduce-umin.ll88
-rw-r--r--llvm/test/CodeGen/X86/vector-reduce-xor-widen.ll88
-rw-r--r--llvm/test/CodeGen/X86/vector-reduce-xor.ll88
-rw-r--r--llvm/test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll6
-rw-r--r--llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll2
-rw-r--r--llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll10
-rw-r--r--llvm/test/Transforms/SLPVectorizer/AArch64/horizontal.ll6
-rw-r--r--llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll2
-rw-r--r--llvm/utils/TableGen/IntrinsicEmitter.cpp5
62 files changed, 5283 insertions, 5258 deletions
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 6f255d14c70..6c4422a7e11 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -13719,8 +13719,8 @@ Syntax:
::
- declare i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> %a)
- declare i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> %a)
+ declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %a)
+ declare i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %a)
Overview:
"""""""""
@@ -13780,8 +13780,8 @@ Syntax:
::
- declare i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> %a)
- declare i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> %a)
+ declare i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> %a)
+ declare i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64> %a)
Overview:
"""""""""
@@ -13840,7 +13840,7 @@ Syntax:
::
- declare i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32> %a)
+ declare i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> %a)
Overview:
"""""""""
@@ -13861,7 +13861,7 @@ Syntax:
::
- declare i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32> %a)
+ declare i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> %a)
Overview:
"""""""""
@@ -13882,7 +13882,7 @@ Syntax:
::
- declare i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32> %a)
+ declare i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32> %a)
Overview:
"""""""""
@@ -13903,7 +13903,7 @@ Syntax:
::
- declare i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> %a)
+ declare i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> %a)
Overview:
"""""""""
@@ -13924,7 +13924,7 @@ Syntax:
::
- declare i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> %a)
+ declare i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> %a)
Overview:
"""""""""
@@ -13945,7 +13945,7 @@ Syntax:
::
- declare i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> %a)
+ declare i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> %a)
Overview:
"""""""""
@@ -13966,7 +13966,7 @@ Syntax:
::
- declare i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> %a)
+ declare i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> %a)
Overview:
"""""""""
@@ -13987,8 +13987,8 @@ Syntax:
::
- declare float @llvm.experimental.vector.reduce.fmax.f32.v4f32(<4 x float> %a)
- declare double @llvm.experimental.vector.reduce.fmax.f64.v2f64(<2 x double> %a)
+ declare float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> %a)
+ declare double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> %a)
Overview:
"""""""""
@@ -14012,8 +14012,8 @@ Syntax:
::
- declare float @llvm.experimental.vector.reduce.fmin.f32.v4f32(<4 x float> %a)
- declare double @llvm.experimental.vector.reduce.fmin.f64.v2f64(<2 x double> %a)
+ declare float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> %a)
+ declare double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double> %a)
Overview:
"""""""""
diff --git a/llvm/include/llvm/IR/Intrinsics.h b/llvm/include/llvm/IR/Intrinsics.h
index 4954f163a02..f38f92022d2 100644
--- a/llvm/include/llvm/IR/Intrinsics.h
+++ b/llvm/include/llvm/IR/Intrinsics.h
@@ -99,7 +99,8 @@ namespace Intrinsic {
Void, VarArg, MMX, Token, Metadata, Half, Float, Double, Quad,
Integer, Vector, Pointer, Struct,
Argument, ExtendArgument, TruncArgument, HalfVecArgument,
- SameVecWidthArgument, PtrToArgument, PtrToElt, VecOfAnyPtrsToElt
+ SameVecWidthArgument, PtrToArgument, PtrToElt, VecOfAnyPtrsToElt,
+ VecElementArgument
} Kind;
union {
@@ -124,13 +125,14 @@ namespace Intrinsic {
assert(Kind == Argument || Kind == ExtendArgument ||
Kind == TruncArgument || Kind == HalfVecArgument ||
Kind == SameVecWidthArgument || Kind == PtrToArgument ||
- Kind == PtrToElt);
+ Kind == PtrToElt || Kind == VecElementArgument);
return Argument_Info >> 3;
}
ArgKind getArgumentKind() const {
assert(Kind == Argument || Kind == ExtendArgument ||
Kind == TruncArgument || Kind == HalfVecArgument ||
- Kind == SameVecWidthArgument || Kind == PtrToArgument);
+ Kind == SameVecWidthArgument || Kind == PtrToArgument ||
+ Kind == VecElementArgument);
return (ArgKind)(Argument_Info & 7);
}
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 2f4ec169ba0..8b259ee0ee9 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -173,6 +173,7 @@ class LLVMScalarOrSameVectorWidth<int idx, LLVMType elty>
class LLVMPointerTo<int num> : LLVMMatchType<num>;
class LLVMPointerToElt<int num> : LLVMMatchType<num>;
class LLVMVectorOfAnyPointersToElt<int num> : LLVMMatchType<num>;
+class LLVMVectorElementType<int num> : LLVMMatchType<num>;
// Match the type of another intrinsic parameter that is expected to be a
// vector type, but change the element count to be half as many
@@ -1148,37 +1149,37 @@ def int_experimental_vector_reduce_v2_fmul : Intrinsic<[llvm_anyfloat_ty],
[LLVMMatchType<0>,
llvm_anyvector_ty],
[IntrNoMem]>;
-def int_experimental_vector_reduce_add : Intrinsic<[llvm_anyint_ty],
+def int_experimental_vector_reduce_add : Intrinsic<[LLVMVectorElementType<0>],
[llvm_anyvector_ty],
[IntrNoMem]>;
-def int_experimental_vector_reduce_mul : Intrinsic<[llvm_anyint_ty],
+def int_experimental_vector_reduce_mul : Intrinsic<[LLVMVectorElementType<0>],
[llvm_anyvector_ty],
[IntrNoMem]>;
-def int_experimental_vector_reduce_and : Intrinsic<[llvm_anyint_ty],
+def int_experimental_vector_reduce_and : Intrinsic<[LLVMVectorElementType<0>],
[llvm_anyvector_ty],
[IntrNoMem]>;
-def int_experimental_vector_reduce_or : Intrinsic<[llvm_anyint_ty],
+def int_experimental_vector_reduce_or : Intrinsic<[LLVMVectorElementType<0>],
[llvm_anyvector_ty],
[IntrNoMem]>;
-def int_experimental_vector_reduce_xor : Intrinsic<[llvm_anyint_ty],
+def int_experimental_vector_reduce_xor : Intrinsic<[LLVMVectorElementType<0>],
[llvm_anyvector_ty],
[IntrNoMem]>;
-def int_experimental_vector_reduce_smax : Intrinsic<[llvm_anyint_ty],
+def int_experimental_vector_reduce_smax : Intrinsic<[LLVMVectorElementType<0>],
[llvm_anyvector_ty],
[IntrNoMem]>;
-def int_experimental_vector_reduce_smin : Intrinsic<[llvm_anyint_ty],
+def int_experimental_vector_reduce_smin : Intrinsic<[LLVMVectorElementType<0>],
[llvm_anyvector_ty],
[IntrNoMem]>;
-def int_experimental_vector_reduce_umax : Intrinsic<[llvm_anyint_ty],
+def int_experimental_vector_reduce_umax : Intrinsic<[LLVMVectorElementType<0>],
[llvm_anyvector_ty],
[IntrNoMem]>;
-def int_experimental_vector_reduce_umin : Intrinsic<[llvm_anyint_ty],
+def int_experimental_vector_reduce_umin : Intrinsic<[LLVMVectorElementType<0>],
[llvm_anyvector_ty],
[IntrNoMem]>;
-def int_experimental_vector_reduce_fmax : Intrinsic<[llvm_anyfloat_ty],
+def int_experimental_vector_reduce_fmax : Intrinsic<[LLVMVectorElementType<0>],
[llvm_anyvector_ty],
[IntrNoMem]>;
-def int_experimental_vector_reduce_fmin : Intrinsic<[llvm_anyfloat_ty],
+def int_experimental_vector_reduce_fmin : Intrinsic<[LLVMVectorElementType<0>],
[llvm_anyvector_ty],
[IntrNoMem]>;
diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp
index affd85e79ee..770524feace 100644
--- a/llvm/lib/IR/Function.cpp
+++ b/llvm/lib/IR/Function.cpp
@@ -700,7 +700,8 @@ enum IIT_Info {
IIT_STRUCT6 = 38,
IIT_STRUCT7 = 39,
IIT_STRUCT8 = 40,
- IIT_F128 = 41
+ IIT_F128 = 41,
+ IIT_VEC_ELEMENT = 42
};
static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
@@ -865,6 +866,12 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
DecodeIITType(NextElt, Infos, OutputTable);
return;
}
+ case IIT_VEC_ELEMENT: {
+ unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
+ OutputTable.push_back(IITDescriptor::get(IITDescriptor::VecElementArgument,
+ ArgInfo));
+ return;
+ }
}
llvm_unreachable("unhandled");
}
@@ -977,6 +984,12 @@ static Type *DecodeFixedType(ArrayRef<Intrinsic::IITDescriptor> &Infos,
Type *EltTy = VTy->getVectorElementType();
return PointerType::getUnqual(EltTy);
}
+ case IITDescriptor::VecElementArgument: {
+ Type *Ty = Tys[D.getArgumentNumber()];
+ if (VectorType *VTy = dyn_cast<VectorType>(Ty))
+ return VTy->getElementType();
+ llvm_unreachable("Expected an argument of Vector Type");
+ }
case IITDescriptor::VecOfAnyPtrsToElt:
// Return the overloaded type (which determines the pointers address space)
return Tys[D.getOverloadArgNumber()];
@@ -1237,6 +1250,12 @@ static bool matchIntrinsicType(
return ThisArgEltTy->getElementType() !=
ReferenceType->getVectorElementType();
}
+ case IITDescriptor::VecElementArgument: {
+ if (D.getArgumentNumber() >= ArgTys.size())
+ return IsDeferredCheck ? true : DeferCheck(Ty);
+ auto *ReferenceType = dyn_cast<VectorType>(ArgTys[D.getArgumentNumber()]);
+ return !ReferenceType || Ty != ReferenceType->getElementType();
+ }
}
llvm_unreachable("unhandled");
}
diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp
index 36c823e7a10..0c6461c9078 100644
--- a/llvm/lib/IR/IRBuilder.cpp
+++ b/llvm/lib/IR/IRBuilder.cpp
@@ -313,7 +313,7 @@ static CallInst *getReductionIntrinsic(IRBuilderBase *Builder, Intrinsic::ID ID,
Value *Src) {
Module *M = Builder->GetInsertBlock()->getParent()->getParent();
Value *Ops[] = {Src};
- Type *Tys[] = { Src->getType()->getVectorElementType(), Src->getType() };
+ Type *Tys[] = { Src->getType() };
auto Decl = Intrinsic::getDeclaration(M, ID, Tys);
return createCallHelper(Decl, Ops, Builder);
}
diff --git a/llvm/test/Analysis/CostModel/AArch64/vector-reduce.ll b/llvm/test/Analysis/CostModel/AArch64/vector-reduce.ll
index 8cc6eea4ec4..d42d740dabf 100644
--- a/llvm/test/Analysis/CostModel/AArch64/vector-reduce.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/vector-reduce.ll
@@ -2,278 +2,278 @@
; RUN: llc < %s -mtriple=aarch64--linux-gnu | FileCheck %s --check-prefix=CODE
; COST-LABEL: add.i8.v8i8
-; COST: Found an estimated cost of 1 for instruction: %r = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> %v)
+; COST: Found an estimated cost of 1 for instruction: %r = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> %v)
; CODE-LABEL: add.i8.v8i8
; CODE: addv b0, v0.8b
define i8 @add.i8.v8i8(<8 x i8> %v) {
- %r = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> %v)
+ %r = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> %v)
ret i8 %r
}
; COST-LABEL: add.i8.v16i8
-; COST: Found an estimated cost of 1 for instruction: %r = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> %v)
+; COST: Found an estimated cost of 1 for instruction: %r = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %v)
; CODE-LABEL: add.i8.v16i8
; CODE: addv b0, v0.16b
define i8 @add.i8.v16i8(<16 x i8> %v) {
- %r = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> %v)
+ %r = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %v)
ret i8 %r
}
; COST-LABEL: add.i16.v4i16
-; COST: Found an estimated cost of 1 for instruction: %r = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> %v)
+; COST: Found an estimated cost of 1 for instruction: %r = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> %v)
; CODE-LABEL: add.i16.v4i16
; CODE: addv h0, v0.4h
define i16 @add.i16.v4i16(<4 x i16> %v) {
- %r = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> %v)
+ %r = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> %v)
ret i16 %r
}
; COST-LABEL: add.i16.v8i16
-; COST: Found an estimated cost of 1 for instruction: %r = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> %v)
+; COST: Found an estimated cost of 1 for instruction: %r = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %v)
; CODE-LABEL: add.i16.v8i16
; CODE: addv h0, v0.8h
define i16 @add.i16.v8i16(<8 x i16> %v) {
- %r = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> %v)
+ %r = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %v)
ret i16 %r
}
; COST-LABEL: add.i32.v4i32
-; COST: Found an estimated cost of 1 for instruction: %r = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> %v)
+; COST: Found an estimated cost of 1 for instruction: %r = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %v)
; CODE-LABEL: add.i32.v4i32
; CODE: addv s0, v0.4s
define i32 @add.i32.v4i32(<4 x i32> %v) {
- %r = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> %v)
+ %r = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %v)
ret i32 %r
}
; COST-LABEL: umin.i8.v8i8
-; COST: Found an estimated cost of 216 for instruction: %r = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> %v)
+; COST: Found an estimated cost of 216 for instruction: %r = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> %v)
; CODE-LABEL: umin.i8.v8i8
; CODE: uminv b0, v0.8b
define i8 @umin.i8.v8i8(<8 x i8> %v) {
- %r = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> %v)
+ %r = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> %v)
ret i8 %r
}
; COST-LABEL: umin.i8.v16i8
-; COST: Found an estimated cost of 608 for instruction: %r = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> %v)
+; COST: Found an estimated cost of 608 for instruction: %r = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> %v)
; CODE-LABEL: umin.i8.v16i8
; CODE: uminv b0, v0.16b
define i8 @umin.i8.v16i8(<16 x i8> %v) {
- %r = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> %v)
+ %r = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> %v)
ret i8 %r
}
; COST-LABEL: umin.i16.v4i16
-; COST: Found an estimated cost of 64 for instruction: %r = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> %v)
+; COST: Found an estimated cost of 64 for instruction: %r = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> %v)
; CODE-LABEL: umin.i16.v4i16
; CODE: uminv h0, v0.4h
define i16 @umin.i16.v4i16(<4 x i16> %v) {
- %r = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> %v)
+ %r = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> %v)
ret i16 %r
}
; COST-LABEL: umin.i16.v8i16
-; COST: Found an estimated cost of 216 for instruction: %r = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> %v)
+; COST: Found an estimated cost of 216 for instruction: %r = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> %v)
; CODE-LABEL: umin.i16.v8i16
; CODE: uminv h0, v0.8h
define i16 @umin.i16.v8i16(<8 x i16> %v) {
- %r = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> %v)
+ %r = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> %v)
ret i16 %r
}
; COST-LABEL: umin.i32.v4i32
-; COST: Found an estimated cost of 34 for instruction: %r = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> %v)
+; COST: Found an estimated cost of 34 for instruction: %r = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> %v)
; CODE-LABEL: umin.i32.v4i32
; CODE: uminv s0, v0.4s
define i32 @umin.i32.v4i32(<4 x i32> %v) {
- %r = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> %v)
+ %r = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> %v)
ret i32 %r
}
; COST-LABEL: umax.i8.v8i8
-; COST: Found an estimated cost of 216 for instruction: %r = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> %v)
+; COST: Found an estimated cost of 216 for instruction: %r = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> %v)
; CODE-LABEL: umax.i8.v8i8
; CODE: umaxv b0, v0.8b
define i8 @umax.i8.v8i8(<8 x i8> %v) {
- %r = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> %v)
+ %r = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> %v)
ret i8 %r
}
; COST-LABEL: umax.i8.v16i8
-; COST: Found an estimated cost of 608 for instruction: %r = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> %v)
+; COST: Found an estimated cost of 608 for instruction: %r = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> %v)
; CODE-LABEL: umax.i8.v16i8
; CODE: umaxv b0, v0.16b
define i8 @umax.i8.v16i8(<16 x i8> %v) {
- %r = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> %v)
+ %r = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> %v)
ret i8 %r
}
; COST-LABEL: umax.i16.v4i16
-; COST: Found an estimated cost of 64 for instruction: %r = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> %v)
+; COST: Found an estimated cost of 64 for instruction: %r = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> %v)
; CODE-LABEL: umax.i16.v4i16
; CODE: umaxv h0, v0.4h
define i16 @umax.i16.v4i16(<4 x i16> %v) {
- %r = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> %v)
+ %r = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> %v)
ret i16 %r
}
; COST-LABEL: umax.i16.v8i16
-; COST: Found an estimated cost of 216 for instruction: %r = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> %v)
+; COST: Found an estimated cost of 216 for instruction: %r = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %v)
; CODE-LABEL: umax.i16.v8i16
; CODE: umaxv h0, v0.8h
define i16 @umax.i16.v8i16(<8 x i16> %v) {
- %r = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> %v)
+ %r = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %v)
ret i16 %r
}
; COST-LABEL: umax.i32.v4i32
-; COST: Found an estimated cost of 34 for instruction: %r = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> %v)
+; COST: Found an estimated cost of 34 for instruction: %r = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> %v)
; CODE-LABEL: umax.i32.v4i32
; CODE: umaxv s0, v0.4s
define i32 @umax.i32.v4i32(<4 x i32> %v) {
- %r = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> %v)
+ %r = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> %v)
ret i32 %r
}
; COST-LABEL: smin.i8.v8i8
-; COST: Found an estimated cost of 216 for instruction: %r = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> %v)
+; COST: Found an estimated cost of 216 for instruction: %r = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> %v)
; CODE-LABEL: smin.i8.v8i8
; CODE: sminv b0, v0.8b
define i8 @smin.i8.v8i8(<8 x i8> %v) {
- %r = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> %v)
+ %r = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> %v)
ret i8 %r
}
; COST-LABEL: smin.i8.v16i8
-; COST: Found an estimated cost of 608 for instruction: %r = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> %v)
+; COST: Found an estimated cost of 608 for instruction: %r = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> %v)
; CODE-LABEL: smin.i8.v16i8
; CODE: sminv b0, v0.16b
define i8 @smin.i8.v16i8(<16 x i8> %v) {
- %r = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> %v)
+ %r = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> %v)
ret i8 %r
}
; COST-LABEL: smin.i16.v4i16
-; COST: Found an estimated cost of 64 for instruction: %r = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> %v)
+; COST: Found an estimated cost of 64 for instruction: %r = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> %v)
; CODE-LABEL: smin.i16.v4i16
; CODE: sminv h0, v0.4h
define i16 @smin.i16.v4i16(<4 x i16> %v) {
- %r = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> %v)
+ %r = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> %v)
ret i16 %r
}
; COST-LABEL: smin.i16.v8i16
-; COST: Found an estimated cost of 216 for instruction: %r = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> %v)
+; COST: Found an estimated cost of 216 for instruction: %r = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> %v)
; CODE-LABEL: smin.i16.v8i16
; CODE: sminv h0, v0.8h
define i16 @smin.i16.v8i16(<8 x i16> %v) {
- %r = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> %v)
+ %r = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> %v)
ret i16 %r
}
; COST-LABEL: smin.i32.v4i32
-; COST: Found an estimated cost of 34 for instruction: %r = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> %v)
+; COST: Found an estimated cost of 34 for instruction: %r = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> %v)
; CODE-LABEL: smin.i32.v4i32
; CODE: sminv s0, v0.4s
define i32 @smin.i32.v4i32(<4 x i32> %v) {
- %r = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> %v)
+ %r = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> %v)
ret i32 %r
}
; COST-LABEL: smax.i8.v8i8
-; COST: Found an estimated cost of 216 for instruction: %r = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> %v)
+; COST: Found an estimated cost of 216 for instruction: %r = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> %v)
; CODE-LABEL: smax.i8.v8i8
; CODE: smaxv b0, v0.8b
define i8 @smax.i8.v8i8(<8 x i8> %v) {
- %r = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> %v)
+ %r = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> %v)
ret i8 %r
}
; COST-LABEL: smax.i8.v16i8
-; COST: Found an estimated cost of 608 for instruction: %r = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> %v)
+; COST: Found an estimated cost of 608 for instruction: %r = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %v)
; CODE-LABEL: smax.i8.v16i8
; CODE: smaxv b0, v0.16b
define i8 @smax.i8.v16i8(<16 x i8> %v) {
- %r = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> %v)
+ %r = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %v)
ret i8 %r
}
; COST-LABEL: smax.i16.v4i16
-; COST: Found an estimated cost of 64 for instruction: %r = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> %v)
+; COST: Found an estimated cost of 64 for instruction: %r = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> %v)
; CODE-LABEL: smax.i16.v4i16
; CODE: smaxv h0, v0.4h
define i16 @smax.i16.v4i16(<4 x i16> %v) {
- %r = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> %v)
+ %r = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> %v)
ret i16 %r
}
; COST-LABEL: smax.i16.v8i16
-; COST: Found an estimated cost of 216 for instruction: %r = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> %v)
+; COST: Found an estimated cost of 216 for instruction: %r = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %v)
; CODE-LABEL: smax.i16.v8i16
; CODE: smaxv h0, v0.8h
define i16 @smax.i16.v8i16(<8 x i16> %v) {
- %r = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> %v)
+ %r = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %v)
ret i16 %r
}
; COST-LABEL: smax.i32.v4i32
-; COST: Found an estimated cost of 34 for instruction: %r = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> %v)
+; COST: Found an estimated cost of 34 for instruction: %r = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> %v)
; CODE-LABEL: smax.i32.v4i32
; CODE: smaxv s0, v0.4s
define i32 @smax.i32.v4i32(<4 x i32> %v) {
- %r = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> %v)
+ %r = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> %v)
ret i32 %r
}
; COST-LABEL: fmin.f32.v4f32
-; COST: Found an estimated cost of 34 for instruction: %r = call nnan float @llvm.experimental.vector.reduce.fmin.f32.v4f32(<4 x float> %v)
+; COST: Found an estimated cost of 34 for instruction: %r = call nnan float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> %v)
; CODE-LABEL: fmin.f32.v4f32
; CODE: fminnmv s0, v0.4s
define float @fmin.f32.v4f32(<4 x float> %v) {
- %r = call nnan float @llvm.experimental.vector.reduce.fmin.f32.v4f32(<4 x float> %v)
+ %r = call nnan float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> %v)
ret float %r
}
; COST-LABEL: fmax.f32.v4f32
-; COST: Found an estimated cost of 34 for instruction: %r = call nnan float @llvm.experimental.vector.reduce.fmax.f32.v4f32(<4 x float> %v)
+; COST: Found an estimated cost of 34 for instruction: %r = call nnan float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> %v)
; CODE-LABEL: fmax.f32.v4f32
; CODE: fmaxnmv s0, v0.4s
define float @fmax.f32.v4f32(<4 x float> %v) {
- %r = call nnan float @llvm.experimental.vector.reduce.fmax.f32.v4f32(<4 x float> %v)
+ %r = call nnan float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> %v)
ret float %r
}
-declare i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8>)
-declare i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16>)
-declare i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32>)
-
-declare i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8>)
-declare i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16>)
-declare i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32>)
-
-declare i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8>)
-declare i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16>)
-declare i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32>)
-
-declare i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8>)
-declare i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16>)
-declare i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32>)
-
-declare i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8>)
-declare i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16>)
-declare i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32>)
-
-declare float @llvm.experimental.vector.reduce.fmin.f32.v4f32(<4 x float>)
-
-declare float @llvm.experimental.vector.reduce.fmax.f32.v4f32(<4 x float>)
+declare i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8>)
+declare i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8>)
+declare i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16>)
+declare i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16>)
+declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>)
+
+declare i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8>)
+declare i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16>)
+declare i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32>)
+
+declare i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8>)
+declare i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16>)
+declare i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32>)
+
+declare i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8>)
+declare i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16>)
+declare i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32>)
+
+declare i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8>)
+declare i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16>)
+declare i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32>)
+
+declare float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float>)
+
+declare float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float>)
diff --git a/llvm/test/Analysis/CostModel/X86/reduce-add-widen.ll b/llvm/test/Analysis/CostModel/X86/reduce-add-widen.ll
index 5230511b11e..0450cc0843b 100644
--- a/llvm/test/Analysis/CostModel/X86/reduce-add-widen.ll
+++ b/llvm/test/Analysis/CostModel/X86/reduce-add-widen.ll
@@ -10,298 +10,298 @@
define i32 @reduce_i64(i32 %arg) {
; SSE2-LABEL: 'reduce_i64'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i64'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i64'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX-LABEL: 'reduce_i64'
-; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'reduce_i64'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V1 = call i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64> undef)
- %V2 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> undef)
- %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64> undef)
- %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64> undef)
- %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> undef)
+ %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef)
+ %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef)
+ %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef)
+ %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef)
+ %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef)
ret i32 undef
}
define i32 @reduce_i32(i32 %arg) {
; SSE2-LABEL: 'reduce_i32'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i32'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i32'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX-LABEL: 'reduce_i32'
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'reduce_i32'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V2 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32> undef)
- %V4 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> undef)
- %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> undef)
- %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> undef)
- %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> undef)
+ %V2 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> undef)
+ %V4 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> undef)
+ %V8 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> undef)
+ %V16 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> undef)
+ %V32 = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> undef)
ret i32 undef
}
define i32 @reduce_i16(i32 %arg) {
; SSE2-LABEL: 'reduce_i16'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i16'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i16'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i16'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i16'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'reduce_i16'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'reduce_i16'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'reduce_i16'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V2 = call i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16> undef)
- %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef)
- %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef)
- %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef)
- %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef)
- %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef)
+ %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef)
+ %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef)
+ %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef)
+ %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef)
+ %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef)
+ %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef)
ret i32 undef
}
define i32 @reduce_i8(i32 %arg) {
; SSE2-LABEL: 'reduce_i8'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i8'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i8'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i8'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i8'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'reduce_i8'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'reduce_i8'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'reduce_i8'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V2 = call i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8> undef)
- %V4 = call i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8> undef)
- %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef)
- %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef)
- %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef)
- %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef)
- %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef)
+ %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef)
+ %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef)
+ %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef)
+ %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef)
+ %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef)
+ %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef)
+ %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef)
ret i32 undef
}
-declare i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64>)
-declare i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64>)
-declare i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64>)
+declare i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64>)
+declare i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64>)
+declare i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64>)
+declare i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64>)
+declare i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64>)
-declare i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32>)
+declare i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32>)
+declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>)
+declare i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32>)
+declare i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32>)
+declare i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32>)
-declare i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16>)
-declare i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16>)
-declare i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16>)
+declare i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16>)
+declare i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16>)
+declare i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16>)
+declare i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16>)
+declare i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16>)
+declare i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16>)
-declare i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8>)
-declare i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8>)
-declare i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8>)
-declare i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8>)
+declare i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8>)
+declare i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8>)
+declare i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8>)
+declare i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8>)
+declare i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8>)
+declare i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8>)
+declare i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8>)
diff --git a/llvm/test/Analysis/CostModel/X86/reduce-add.ll b/llvm/test/Analysis/CostModel/X86/reduce-add.ll
index bb762233385..066ddb77a20 100644
--- a/llvm/test/Analysis/CostModel/X86/reduce-add.ll
+++ b/llvm/test/Analysis/CostModel/X86/reduce-add.ll
@@ -10,298 +10,298 @@
define i32 @reduce_i64(i32 %arg) {
; SSE2-LABEL: 'reduce_i64'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i64'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i64'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX-LABEL: 'reduce_i64'
-; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'reduce_i64'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V1 = call i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64> undef)
- %V2 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> undef)
- %V4 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64> undef)
- %V8 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64> undef)
- %V16 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> undef)
+ %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef)
+ %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef)
+ %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef)
+ %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef)
+ %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef)
ret i32 undef
}
define i32 @reduce_i32(i32 %arg) {
; SSE2-LABEL: 'reduce_i32'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i32'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i32'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX-LABEL: 'reduce_i32'
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'reduce_i32'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V2 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32> undef)
- %V4 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> undef)
- %V8 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> undef)
- %V16 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> undef)
- %V32 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> undef)
+ %V2 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> undef)
+ %V4 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> undef)
+ %V8 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> undef)
+ %V16 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> undef)
+ %V32 = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> undef)
ret i32 undef
}
define i32 @reduce_i16(i32 %arg) {
; SSE2-LABEL: 'reduce_i16'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i16'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i16'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i16'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i16'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'reduce_i16'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'reduce_i16'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'reduce_i16'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V2 = call i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16> undef)
- %V4 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> undef)
- %V8 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> undef)
- %V16 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> undef)
- %V32 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> undef)
- %V64 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> undef)
+ %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef)
+ %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef)
+ %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef)
+ %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef)
+ %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef)
+ %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef)
ret i32 undef
}
define i32 @reduce_i8(i32 %arg) {
; SSE2-LABEL: 'reduce_i8'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i8'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i8'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i8'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i8'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'reduce_i8'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'reduce_i8'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'reduce_i8'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V2 = call i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8> undef)
- %V4 = call i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8> undef)
- %V8 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> undef)
- %V16 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> undef)
- %V32 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> undef)
- %V64 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> undef)
- %V128 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> undef)
+ %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef)
+ %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef)
+ %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef)
+ %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef)
+ %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef)
+ %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef)
+ %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef)
ret i32 undef
}
-declare i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64>)
-declare i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64>)
-declare i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64>)
+declare i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64>)
+declare i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64>)
+declare i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64>)
+declare i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64>)
+declare i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64>)
-declare i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32>)
+declare i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32>)
+declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>)
+declare i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32>)
+declare i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32>)
+declare i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32>)
-declare i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16>)
-declare i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16>)
-declare i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16>)
+declare i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16>)
+declare i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16>)
+declare i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16>)
+declare i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16>)
+declare i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16>)
+declare i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16>)
-declare i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8>)
-declare i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8>)
-declare i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8>)
-declare i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8>)
+declare i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8>)
+declare i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8>)
+declare i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8>)
+declare i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8>)
+declare i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8>)
+declare i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8>)
+declare i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8>)
diff --git a/llvm/test/Analysis/CostModel/X86/reduce-and-widen.ll b/llvm/test/Analysis/CostModel/X86/reduce-and-widen.ll
index 712d9dc9b64..a69b12aa24f 100644
--- a/llvm/test/Analysis/CostModel/X86/reduce-and-widen.ll
+++ b/llvm/test/Analysis/CostModel/X86/reduce-and-widen.ll
@@ -10,369 +10,369 @@
define i32 @reduce_i64(i32 %arg) {
; SSE-LABEL: 'reduce_i64'
-; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.and.i64.v1i64(<1 x i64> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.and.v1i64(<1 x i64> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.and.v4i64(<4 x i64> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.and.v8i64(<8 x i64> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.and.v16i64(<16 x i64> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i64'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.and.i64.v1i64(<1 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.and.v1i64(<1 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.and.v4i64(<4 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.and.v8i64(<8 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.and.v16i64(<16 x i64> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i64'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.and.i64.v1i64(<1 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.and.v1i64(<1 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.and.v4i64(<4 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.and.v8i64(<8 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.and.v16i64(<16 x i64> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'reduce_i64'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.and.i64.v1i64(<1 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.and.v1i64(<1 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.and.v4i64(<4 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.and.v8i64(<8 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.and.v16i64(<16 x i64> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V1 = call i64 @llvm.experimental.vector.reduce.and.i64.v1i64(<1 x i64> undef)
- %V2 = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64> undef)
- %V4 = call i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64> undef)
- %V8 = call i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64> undef)
- %V16 = call i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64> undef)
+ %V1 = call i64 @llvm.experimental.vector.reduce.and.v1i64(<1 x i64> undef)
+ %V2 = call i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64> undef)
+ %V4 = call i64 @llvm.experimental.vector.reduce.and.v4i64(<4 x i64> undef)
+ %V8 = call i64 @llvm.experimental.vector.reduce.and.v8i64(<8 x i64> undef)
+ %V16 = call i64 @llvm.experimental.vector.reduce.and.v16i64(<16 x i64> undef)
ret i32 undef
}
define i32 @reduce_i32(i32 %arg) {
; SSE-LABEL: 'reduce_i32'
-; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.and.v2i32(<2 x i32> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.and.v16i32(<16 x i32> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.and.v32i32(<32 x i32> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i32'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.and.v2i32(<2 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.and.v16i32(<16 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.and.v32i32(<32 x i32> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i32'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.and.v2i32(<2 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.and.v16i32(<16 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.and.v32i32(<32 x i32> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'reduce_i32'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.and.v2i32(<2 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.and.v16i32(<16 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.and.v32i32(<32 x i32> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V2 = call i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32> undef)
- %V4 = call i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32> undef)
- %V8 = call i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32> undef)
- %V16 = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32> undef)
- %V32 = call i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32> undef)
+ %V2 = call i32 @llvm.experimental.vector.reduce.and.v2i32(<2 x i32> undef)
+ %V4 = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> undef)
+ %V8 = call i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> undef)
+ %V16 = call i32 @llvm.experimental.vector.reduce.and.v16i32(<16 x i32> undef)
+ %V32 = call i32 @llvm.experimental.vector.reduce.and.v32i32(<32 x i32> undef)
ret i32 undef
}
define i32 @reduce_i16(i32 %arg) {
; SSE2-LABEL: 'reduce_i16'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.i16.v2i16(<2 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i16'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.i16.v2i16(<2 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i16'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.i16.v2i16(<2 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i16'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.i16.v2i16(<2 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i16'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.i16.v2i16(<2 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'reduce_i16'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.i16.v2i16(<2 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'reduce_i16'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.i16.v2i16(<2 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'reduce_i16'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.i16.v2i16(<2 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V2 = call i16 @llvm.experimental.vector.reduce.and.i16.v2i16(<2 x i16> undef)
- %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef)
- %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef)
- %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef)
- %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef)
- %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef)
+ %V2 = call i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16> undef)
+ %V4 = call i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16> undef)
+ %V8 = call i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16> undef)
+ %V16 = call i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16> undef)
+ %V32 = call i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16> undef)
+ %V64 = call i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16> undef)
ret i32 undef
}
define i32 @reduce_i8(i32 %arg) {
; SSE2-LABEL: 'reduce_i8'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.i8.v2i8(<2 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.i8.v4i8(<4 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i8'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.i8.v2i8(<2 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.i8.v4i8(<4 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i8'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.i8.v2i8(<2 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.i8.v4i8(<4 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i8'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.i8.v2i8(<2 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.i8.v4i8(<4 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i8'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.i8.v2i8(<2 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.i8.v4i8(<4 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'reduce_i8'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.i8.v2i8(<2 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.i8.v4i8(<4 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'reduce_i8'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.i8.v2i8(<2 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.i8.v4i8(<4 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'reduce_i8'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.i8.v2i8(<2 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.i8.v4i8(<4 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V2 = call i8 @llvm.experimental.vector.reduce.and.i8.v2i8(<2 x i8> undef)
- %V4 = call i8 @llvm.experimental.vector.reduce.and.i8.v4i8(<4 x i8> undef)
- %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef)
- %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef)
- %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef)
- %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef)
- %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef)
+ %V2 = call i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8> undef)
+ %V4 = call i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8> undef)
+ %V8 = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> undef)
+ %V16 = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> undef)
+ %V32 = call i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8> undef)
+ %V64 = call i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8> undef)
+ %V128 = call i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8> undef)
ret i32 undef
}
define i32 @reduce_i1(i32 %arg) {
; SSE-LABEL: 'reduce_i1'
-; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.v128i1(<128 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i1'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.v128i1(<128 x i1> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i1'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.v128i1(<128 x i1> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'reduce_i1'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.v128i1(<128 x i1> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'reduce_i1'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 326 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 775 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 776 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 326 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 775 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 776 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.v128i1(<128 x i1> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'reduce_i1'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.v128i1(<128 x i1> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef)
- %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef)
- %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef)
- %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef)
- %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef)
- %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef)
- %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef)
- %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef)
+ %V1 = call i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> undef)
+ %V2 = call i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> undef)
+ %V4 = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> undef)
+ %V8 = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> undef)
+ %V16 = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> undef)
+ %V32 = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> undef)
+ %V64 = call i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1> undef)
+ %V128 = call i1 @llvm.experimental.vector.reduce.and.v128i1(<128 x i1> undef)
ret i32 undef
}
-declare i64 @llvm.experimental.vector.reduce.and.i64.v1i64(<1 x i64>)
-declare i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64>)
-declare i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64>)
+declare i64 @llvm.experimental.vector.reduce.and.v1i64(<1 x i64>)
+declare i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64>)
+declare i64 @llvm.experimental.vector.reduce.and.v4i64(<4 x i64>)
+declare i64 @llvm.experimental.vector.reduce.and.v8i64(<8 x i64>)
+declare i64 @llvm.experimental.vector.reduce.and.v16i64(<16 x i64>)
-declare i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32>)
+declare i32 @llvm.experimental.vector.reduce.and.v2i32(<2 x i32>)
+declare i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32>)
+declare i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32>)
+declare i32 @llvm.experimental.vector.reduce.and.v16i32(<16 x i32>)
+declare i32 @llvm.experimental.vector.reduce.and.v32i32(<32 x i32>)
-declare i16 @llvm.experimental.vector.reduce.and.i16.v2i16(<2 x i16>)
-declare i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16>)
-declare i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16>)
+declare i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16>)
+declare i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16>)
+declare i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16>)
+declare i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16>)
+declare i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16>)
+declare i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16>)
-declare i8 @llvm.experimental.vector.reduce.and.i8.v2i8(<2 x i8>)
-declare i8 @llvm.experimental.vector.reduce.and.i8.v4i8(<4 x i8>)
-declare i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8>)
-declare i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8>)
+declare i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8>)
+declare i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8>)
+declare i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8>)
+declare i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8>)
+declare i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8>)
+declare i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8>)
+declare i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8>)
-declare i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1>)
-declare i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1>)
-declare i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1>)
-declare i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1>)
-declare i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1>)
-declare i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1>)
-declare i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1>)
-declare i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1>)
+declare i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1>)
+declare i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1>)
+declare i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1>)
+declare i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1>)
+declare i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1>)
+declare i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1>)
+declare i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1>)
+declare i1 @llvm.experimental.vector.reduce.and.v128i1(<128 x i1>)
diff --git a/llvm/test/Analysis/CostModel/X86/reduce-and.ll b/llvm/test/Analysis/CostModel/X86/reduce-and.ll
index e3d90d271e9..77e8ee57e0c 100644
--- a/llvm/test/Analysis/CostModel/X86/reduce-and.ll
+++ b/llvm/test/Analysis/CostModel/X86/reduce-and.ll
@@ -10,369 +10,369 @@
define i32 @reduce_i64(i32 %arg) {
; SSE-LABEL: 'reduce_i64'
-; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.and.i64.v1i64(<1 x i64> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.and.v1i64(<1 x i64> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.and.v4i64(<4 x i64> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.and.v8i64(<8 x i64> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.and.v16i64(<16 x i64> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i64'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.and.i64.v1i64(<1 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.and.v1i64(<1 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.and.v4i64(<4 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.and.v8i64(<8 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.and.v16i64(<16 x i64> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i64'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.and.i64.v1i64(<1 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.and.v1i64(<1 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.and.v4i64(<4 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.and.v8i64(<8 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.and.v16i64(<16 x i64> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'reduce_i64'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.and.i64.v1i64(<1 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.and.v1i64(<1 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.and.v4i64(<4 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.and.v8i64(<8 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.and.v16i64(<16 x i64> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V1 = call i64 @llvm.experimental.vector.reduce.and.i64.v1i64(<1 x i64> undef)
- %V2 = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64> undef)
- %V4 = call i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64> undef)
- %V8 = call i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64> undef)
- %V16 = call i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64> undef)
+ %V1 = call i64 @llvm.experimental.vector.reduce.and.v1i64(<1 x i64> undef)
+ %V2 = call i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64> undef)
+ %V4 = call i64 @llvm.experimental.vector.reduce.and.v4i64(<4 x i64> undef)
+ %V8 = call i64 @llvm.experimental.vector.reduce.and.v8i64(<8 x i64> undef)
+ %V16 = call i64 @llvm.experimental.vector.reduce.and.v16i64(<16 x i64> undef)
ret i32 undef
}
define i32 @reduce_i32(i32 %arg) {
; SSE-LABEL: 'reduce_i32'
-; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.and.v2i32(<2 x i32> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.and.v16i32(<16 x i32> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.and.v32i32(<32 x i32> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i32'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.and.v2i32(<2 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.and.v16i32(<16 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.and.v32i32(<32 x i32> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i32'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.and.v2i32(<2 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.and.v16i32(<16 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.and.v32i32(<32 x i32> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'reduce_i32'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.and.v2i32(<2 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.and.v16i32(<16 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.and.v32i32(<32 x i32> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V2 = call i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32> undef)
- %V4 = call i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32> undef)
- %V8 = call i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32> undef)
- %V16 = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32> undef)
- %V32 = call i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32> undef)
+ %V2 = call i32 @llvm.experimental.vector.reduce.and.v2i32(<2 x i32> undef)
+ %V4 = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> undef)
+ %V8 = call i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> undef)
+ %V16 = call i32 @llvm.experimental.vector.reduce.and.v16i32(<16 x i32> undef)
+ %V32 = call i32 @llvm.experimental.vector.reduce.and.v32i32(<32 x i32> undef)
ret i32 undef
}
define i32 @reduce_i16(i32 %arg) {
; SSE2-LABEL: 'reduce_i16'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.i16.v2i16(<2 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i16'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.i16.v2i16(<2 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i16'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.i16.v2i16(<2 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i16'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.i16.v2i16(<2 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i16'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.i16.v2i16(<2 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'reduce_i16'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.i16.v2i16(<2 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'reduce_i16'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.i16.v2i16(<2 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'reduce_i16'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.i16.v2i16(<2 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V2 = call i16 @llvm.experimental.vector.reduce.and.i16.v2i16(<2 x i16> undef)
- %V4 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> undef)
- %V8 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> undef)
- %V16 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> undef)
- %V32 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> undef)
- %V64 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> undef)
+ %V2 = call i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16> undef)
+ %V4 = call i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16> undef)
+ %V8 = call i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16> undef)
+ %V16 = call i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16> undef)
+ %V32 = call i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16> undef)
+ %V64 = call i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16> undef)
ret i32 undef
}
define i32 @reduce_i8(i32 %arg) {
; SSE2-LABEL: 'reduce_i8'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.i8.v2i8(<2 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.i8.v4i8(<4 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i8'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.i8.v2i8(<2 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.i8.v4i8(<4 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i8'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.i8.v2i8(<2 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.i8.v4i8(<4 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i8'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.i8.v2i8(<2 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.i8.v4i8(<4 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i8'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.i8.v2i8(<2 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.i8.v4i8(<4 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'reduce_i8'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.i8.v2i8(<2 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.i8.v4i8(<4 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'reduce_i8'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.i8.v2i8(<2 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.i8.v4i8(<4 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'reduce_i8'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.i8.v2i8(<2 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.i8.v4i8(<4 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V2 = call i8 @llvm.experimental.vector.reduce.and.i8.v2i8(<2 x i8> undef)
- %V4 = call i8 @llvm.experimental.vector.reduce.and.i8.v4i8(<4 x i8> undef)
- %V8 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> undef)
- %V16 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> undef)
- %V32 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> undef)
- %V64 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> undef)
- %V128 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> undef)
+ %V2 = call i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8> undef)
+ %V4 = call i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8> undef)
+ %V8 = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> undef)
+ %V16 = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> undef)
+ %V32 = call i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8> undef)
+ %V64 = call i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8> undef)
+ %V128 = call i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8> undef)
ret i32 undef
}
define i32 @reduce_i1(i32 %arg) {
; SSE-LABEL: 'reduce_i1'
-; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.v128i1(<128 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i1'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.v128i1(<128 x i1> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i1'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.v128i1(<128 x i1> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'reduce_i1'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.v128i1(<128 x i1> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'reduce_i1'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 326 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 775 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 776 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 326 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 775 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 776 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.v128i1(<128 x i1> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'reduce_i1'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.and.v128i1(<128 x i1> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V1 = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> undef)
- %V2 = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> undef)
- %V4 = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> undef)
- %V8 = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> undef)
- %V16 = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> undef)
- %V32 = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> undef)
- %V64 = call i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1> undef)
- %V128 = call i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1> undef)
+ %V1 = call i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> undef)
+ %V2 = call i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> undef)
+ %V4 = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> undef)
+ %V8 = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> undef)
+ %V16 = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> undef)
+ %V32 = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> undef)
+ %V64 = call i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1> undef)
+ %V128 = call i1 @llvm.experimental.vector.reduce.and.v128i1(<128 x i1> undef)
ret i32 undef
}
-declare i64 @llvm.experimental.vector.reduce.and.i64.v1i64(<1 x i64>)
-declare i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64>)
-declare i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64>)
+declare i64 @llvm.experimental.vector.reduce.and.v1i64(<1 x i64>)
+declare i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64>)
+declare i64 @llvm.experimental.vector.reduce.and.v4i64(<4 x i64>)
+declare i64 @llvm.experimental.vector.reduce.and.v8i64(<8 x i64>)
+declare i64 @llvm.experimental.vector.reduce.and.v16i64(<16 x i64>)
-declare i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32>)
+declare i32 @llvm.experimental.vector.reduce.and.v2i32(<2 x i32>)
+declare i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32>)
+declare i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32>)
+declare i32 @llvm.experimental.vector.reduce.and.v16i32(<16 x i32>)
+declare i32 @llvm.experimental.vector.reduce.and.v32i32(<32 x i32>)
-declare i16 @llvm.experimental.vector.reduce.and.i16.v2i16(<2 x i16>)
-declare i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16>)
-declare i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16>)
+declare i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16>)
+declare i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16>)
+declare i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16>)
+declare i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16>)
+declare i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16>)
+declare i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16>)
-declare i8 @llvm.experimental.vector.reduce.and.i8.v2i8(<2 x i8>)
-declare i8 @llvm.experimental.vector.reduce.and.i8.v4i8(<4 x i8>)
-declare i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8>)
-declare i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8>)
+declare i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8>)
+declare i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8>)
+declare i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8>)
+declare i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8>)
+declare i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8>)
+declare i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8>)
+declare i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8>)
-declare i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1>)
-declare i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1>)
-declare i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1>)
-declare i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1>)
-declare i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1>)
-declare i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1>)
-declare i1 @llvm.experimental.vector.reduce.and.i1.v64i1(<64 x i1>)
-declare i1 @llvm.experimental.vector.reduce.and.i1.v128i1(<128 x i1>)
+declare i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1>)
+declare i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1>)
+declare i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1>)
+declare i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1>)
+declare i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1>)
+declare i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1>)
+declare i1 @llvm.experimental.vector.reduce.and.v64i1(<64 x i1>)
+declare i1 @llvm.experimental.vector.reduce.and.v128i1(<128 x i1>)
diff --git a/llvm/test/Analysis/CostModel/X86/reduce-mul-widen.ll b/llvm/test/Analysis/CostModel/X86/reduce-mul-widen.ll
index 1331f78f0b4..9ea2dac8f55 100644
--- a/llvm/test/Analysis/CostModel/X86/reduce-mul-widen.ll
+++ b/llvm/test/Analysis/CostModel/X86/reduce-mul-widen.ll
@@ -10,314 +10,314 @@
define i32 @reduce_i64(i32 %arg) {
; SSE-LABEL: 'reduce_i64'
-; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x i64> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.v1i64(<1 x i64> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.v8i64(<8 x i64> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.v16i64(<16 x i64> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i64'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.v1i64(<1 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.v8i64(<8 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.v16i64(<16 x i64> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i64'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.v1i64(<1 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.v8i64(<8 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.v16i64(<16 x i64> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'reduce_i64'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x i64> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.v1i64(<1 x i64> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.v8i64(<8 x i64> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.v16i64(<16 x i64> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'reduce_i64'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x i64> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.v1i64(<1 x i64> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.v8i64(<8 x i64> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.v16i64(<16 x i64> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'reduce_i64'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x i64> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.v1i64(<1 x i64> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.v8i64(<8 x i64> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.v16i64(<16 x i64> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x i64> undef)
- %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef)
- %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef)
- %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef)
- %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef)
+ %V1 = call i64 @llvm.experimental.vector.reduce.mul.v1i64(<1 x i64> undef)
+ %V2 = call i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64> undef)
+ %V4 = call i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64> undef)
+ %V8 = call i64 @llvm.experimental.vector.reduce.mul.v8i64(<8 x i64> undef)
+ %V16 = call i64 @llvm.experimental.vector.reduce.mul.v16i64(<16 x i64> undef)
ret i32 undef
}
define i32 @reduce_i32(i32 %arg) {
; SSE2-LABEL: 'reduce_i32'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i32'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i32'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i32'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i32'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'reduce_i32'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef)
- %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef)
- %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef)
- %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef)
- %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef)
+ %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef)
+ %V4 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> undef)
+ %V8 = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> undef)
+ %V16 = call i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32> undef)
+ %V32 = call i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32> undef)
ret i32 undef
}
define i32 @reduce_i16(i32 %arg) {
; SSE2-LABEL: 'reduce_i16'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.i16.v2i16(<2 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i16'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.i16.v2i16(<2 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i16'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.i16.v2i16(<2 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i16'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.i16.v2i16(<2 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i16'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.i16.v2i16(<2 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'reduce_i16'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.i16.v2i16(<2 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'reduce_i16'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.i16.v2i16(<2 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'reduce_i16'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.i16.v2i16(<2 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V2 = call i16 @llvm.experimental.vector.reduce.mul.i16.v2i16(<2 x i16> undef)
- %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef)
- %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef)
- %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef)
- %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef)
- %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef)
+ %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef)
+ %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef)
+ %V8 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> undef)
+ %V16 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> undef)
+ %V32 = call i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16> undef)
+ %V64 = call i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16> undef)
ret i32 undef
}
define i32 @reduce_i8(i32 %arg) {
; SSE2-LABEL: 'reduce_i8'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.i8.v2i8(<2 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.i8.v4i8(<4 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 173 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 173 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i8'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.i8.v2i8(<2 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.i8.v4i8(<4 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 137 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 137 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i8'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.i8.v2i8(<2 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.i8.v4i8(<4 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 137 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 137 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i8'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.i8.v2i8(<2 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.i8.v4i8(<4 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 171 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 197 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 249 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 171 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 197 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 249 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i8'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.i8.v2i8(<2 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.i8.v4i8(<4 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 157 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 157 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'reduce_i8'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.i8.v2i8(<2 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.i8.v4i8(<4 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'reduce_i8'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.i8.v2i8(<2 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.i8.v4i8(<4 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'reduce_i8'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.i8.v2i8(<2 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.i8.v4i8(<4 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V2 = call i8 @llvm.experimental.vector.reduce.mul.i8.v2i8(<2 x i8> undef)
- %V4 = call i8 @llvm.experimental.vector.reduce.mul.i8.v4i8(<4 x i8> undef)
- %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef)
- %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef)
- %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef)
- %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef)
- %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef)
+ %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef)
+ %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef)
+ %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef)
+ %V16 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> undef)
+ %V32 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> undef)
+ %V64 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> undef)
+ %V128 = call i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8> undef)
ret i32 undef
}
-declare i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x i64>)
-declare i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64>)
-declare i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64>)
+declare i64 @llvm.experimental.vector.reduce.mul.v1i64(<1 x i64>)
+declare i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64>)
+declare i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64>)
+declare i64 @llvm.experimental.vector.reduce.mul.v8i64(<8 x i64>)
+declare i64 @llvm.experimental.vector.reduce.mul.v16i64(<16 x i64>)
-declare i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32>)
+declare i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32>)
+declare i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32>)
+declare i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32>)
+declare i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32>)
+declare i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32>)
-declare i16 @llvm.experimental.vector.reduce.mul.i16.v2i16(<2 x i16>)
-declare i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16>)
-declare i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16>)
+declare i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16>)
+declare i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16>)
+declare i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16>)
+declare i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16>)
+declare i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16>)
+declare i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16>)
-declare i8 @llvm.experimental.vector.reduce.mul.i8.v2i8(<2 x i8>)
-declare i8 @llvm.experimental.vector.reduce.mul.i8.v4i8(<4 x i8>)
-declare i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8>)
-declare i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8>)
+declare i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8>)
+declare i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8>)
+declare i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8>)
+declare i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8>)
+declare i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8>)
+declare i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8>)
+declare i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8>)
diff --git a/llvm/test/Analysis/CostModel/X86/reduce-mul.ll b/llvm/test/Analysis/CostModel/X86/reduce-mul.ll
index 68f94cdfec9..ce94216f4b4 100644
--- a/llvm/test/Analysis/CostModel/X86/reduce-mul.ll
+++ b/llvm/test/Analysis/CostModel/X86/reduce-mul.ll
@@ -10,330 +10,330 @@
define i32 @reduce_i64(i32 %arg) {
; SSE-LABEL: 'reduce_i64'
-; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x i64> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.v1i64(<1 x i64> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.v8i64(<8 x i64> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.v16i64(<16 x i64> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i64'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.v1i64(<1 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.v8i64(<8 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.v16i64(<16 x i64> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i64'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.v1i64(<1 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.v8i64(<8 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.v16i64(<16 x i64> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'reduce_i64'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x i64> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.v1i64(<1 x i64> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.v8i64(<8 x i64> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.v16i64(<16 x i64> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'reduce_i64'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x i64> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.v1i64(<1 x i64> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.v8i64(<8 x i64> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.v16i64(<16 x i64> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'reduce_i64'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x i64> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.mul.v1i64(<1 x i64> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.mul.v8i64(<8 x i64> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.mul.v16i64(<16 x i64> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x i64> undef)
- %V2 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> undef)
- %V4 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> undef)
- %V8 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> undef)
- %V16 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> undef)
+ %V1 = call i64 @llvm.experimental.vector.reduce.mul.v1i64(<1 x i64> undef)
+ %V2 = call i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64> undef)
+ %V4 = call i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64> undef)
+ %V8 = call i64 @llvm.experimental.vector.reduce.mul.v8i64(<8 x i64> undef)
+ %V16 = call i64 @llvm.experimental.vector.reduce.mul.v16i64(<16 x i64> undef)
ret i32 undef
}
define i32 @reduce_i32(i32 %arg) {
; SSE2-LABEL: 'reduce_i32'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i32'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i32'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i32'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i32'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'reduce_i32'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'reduce_i32'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'reduce_i32'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V2 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> undef)
- %V4 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> undef)
- %V8 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> undef)
- %V16 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> undef)
- %V32 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> undef)
+ %V2 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> undef)
+ %V4 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> undef)
+ %V8 = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> undef)
+ %V16 = call i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32> undef)
+ %V32 = call i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32> undef)
ret i32 undef
}
define i32 @reduce_i16(i32 %arg) {
; SSE2-LABEL: 'reduce_i16'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.i16.v2i16(<2 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i16'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.i16.v2i16(<2 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i16'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.i16.v2i16(<2 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i16'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.i16.v2i16(<2 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i16'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.i16.v2i16(<2 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'reduce_i16'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.i16.v2i16(<2 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'reduce_i16'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.i16.v2i16(<2 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'reduce_i16'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.i16.v2i16(<2 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V2 = call i16 @llvm.experimental.vector.reduce.mul.i16.v2i16(<2 x i16> undef)
- %V4 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> undef)
- %V8 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> undef)
- %V16 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> undef)
- %V32 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> undef)
- %V64 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> undef)
+ %V2 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> undef)
+ %V4 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> undef)
+ %V8 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> undef)
+ %V16 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> undef)
+ %V32 = call i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16> undef)
+ %V64 = call i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16> undef)
ret i32 undef
}
define i32 @reduce_i8(i32 %arg) {
; SSE2-LABEL: 'reduce_i8'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.i8.v2i8(<2 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.i8.v4i8(<4 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 173 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 173 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i8'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.i8.v2i8(<2 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.i8.v4i8(<4 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 137 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 137 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i8'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.i8.v2i8(<2 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.i8.v4i8(<4 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 137 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 137 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i8'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.i8.v2i8(<2 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.i8.v4i8(<4 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 171 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 197 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 249 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 171 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 197 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 249 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i8'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.i8.v2i8(<2 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.i8.v4i8(<4 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 157 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 157 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'reduce_i8'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.i8.v2i8(<2 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.i8.v4i8(<4 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'reduce_i8'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.i8.v2i8(<2 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.i8.v4i8(<4 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'reduce_i8'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.i8.v2i8(<2 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.i8.v4i8(<4 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V2 = call i8 @llvm.experimental.vector.reduce.mul.i8.v2i8(<2 x i8> undef)
- %V4 = call i8 @llvm.experimental.vector.reduce.mul.i8.v4i8(<4 x i8> undef)
- %V8 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> undef)
- %V16 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> undef)
- %V32 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> undef)
- %V64 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> undef)
- %V128 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> undef)
+ %V2 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> undef)
+ %V4 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> undef)
+ %V8 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> undef)
+ %V16 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> undef)
+ %V32 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> undef)
+ %V64 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> undef)
+ %V128 = call i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8> undef)
ret i32 undef
}
-declare i64 @llvm.experimental.vector.reduce.mul.i64.v1i64(<1 x i64>)
-declare i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64>)
-declare i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64>)
+declare i64 @llvm.experimental.vector.reduce.mul.v1i64(<1 x i64>)
+declare i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64>)
+declare i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64>)
+declare i64 @llvm.experimental.vector.reduce.mul.v8i64(<8 x i64>)
+declare i64 @llvm.experimental.vector.reduce.mul.v16i64(<16 x i64>)
-declare i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32>)
+declare i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32>)
+declare i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32>)
+declare i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32>)
+declare i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32>)
+declare i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32>)
-declare i16 @llvm.experimental.vector.reduce.mul.i16.v2i16(<2 x i16>)
-declare i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16>)
-declare i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16>)
+declare i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16>)
+declare i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16>)
+declare i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16>)
+declare i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16>)
+declare i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16>)
+declare i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16>)
-declare i8 @llvm.experimental.vector.reduce.mul.i8.v2i8(<2 x i8>)
-declare i8 @llvm.experimental.vector.reduce.mul.i8.v4i8(<4 x i8>)
-declare i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8>)
-declare i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8>)
+declare i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8>)
+declare i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8>)
+declare i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8>)
+declare i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8>)
+declare i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8>)
+declare i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8>)
+declare i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8>)
diff --git a/llvm/test/Analysis/CostModel/X86/reduce-or-widen.ll b/llvm/test/Analysis/CostModel/X86/reduce-or-widen.ll
index 45f140220ee..d0fb99b12b3 100644
--- a/llvm/test/Analysis/CostModel/X86/reduce-or-widen.ll
+++ b/llvm/test/Analysis/CostModel/X86/reduce-or-widen.ll
@@ -10,369 +10,369 @@
define i32 @reduce_i64(i32 %arg) {
; SSE-LABEL: 'reduce_i64'
-; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.or.i64.v1i64(<1 x i64> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.or.v1i64(<1 x i64> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.or.v4i64(<4 x i64> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.or.v8i64(<8 x i64> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.or.v16i64(<16 x i64> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i64'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.or.i64.v1i64(<1 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.or.v1i64(<1 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.or.v4i64(<4 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.or.v8i64(<8 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.or.v16i64(<16 x i64> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i64'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.or.i64.v1i64(<1 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.or.v1i64(<1 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.or.v4i64(<4 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.or.v8i64(<8 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.or.v16i64(<16 x i64> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'reduce_i64'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.or.i64.v1i64(<1 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.or.v1i64(<1 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.or.v4i64(<4 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.or.v8i64(<8 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.or.v16i64(<16 x i64> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V1 = call i64 @llvm.experimental.vector.reduce.or.i64.v1i64(<1 x i64> undef)
- %V2 = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64> undef)
- %V4 = call i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64> undef)
- %V8 = call i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64> undef)
- %V16 = call i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64> undef)
+ %V1 = call i64 @llvm.experimental.vector.reduce.or.v1i64(<1 x i64> undef)
+ %V2 = call i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64> undef)
+ %V4 = call i64 @llvm.experimental.vector.reduce.or.v4i64(<4 x i64> undef)
+ %V8 = call i64 @llvm.experimental.vector.reduce.or.v8i64(<8 x i64> undef)
+ %V16 = call i64 @llvm.experimental.vector.reduce.or.v16i64(<16 x i64> undef)
ret i32 undef
}
define i32 @reduce_i32(i32 %arg) {
; SSE-LABEL: 'reduce_i32'
-; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.or.v2i32(<2 x i32> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.or.v16i32(<16 x i32> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.or.v32i32(<32 x i32> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i32'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.or.v2i32(<2 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.or.v16i32(<16 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.or.v32i32(<32 x i32> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i32'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.or.v2i32(<2 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.or.v16i32(<16 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.or.v32i32(<32 x i32> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'reduce_i32'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.or.v2i32(<2 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.or.v16i32(<16 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.or.v32i32(<32 x i32> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V2 = call i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32> undef)
- %V4 = call i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32> undef)
- %V8 = call i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32> undef)
- %V16 = call i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32> undef)
- %V32 = call i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32> undef)
+ %V2 = call i32 @llvm.experimental.vector.reduce.or.v2i32(<2 x i32> undef)
+ %V4 = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> undef)
+ %V8 = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> undef)
+ %V16 = call i32 @llvm.experimental.vector.reduce.or.v16i32(<16 x i32> undef)
+ %V32 = call i32 @llvm.experimental.vector.reduce.or.v32i32(<32 x i32> undef)
ret i32 undef
}
define i32 @reduce_i16(i32 %arg) {
; SSE2-LABEL: 'reduce_i16'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.i16.v2i16(<2 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i16'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.i16.v2i16(<2 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i16'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.i16.v2i16(<2 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i16'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.i16.v2i16(<2 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i16'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.i16.v2i16(<2 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'reduce_i16'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.i16.v2i16(<2 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'reduce_i16'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.i16.v2i16(<2 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'reduce_i16'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.i16.v2i16(<2 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V2 = call i16 @llvm.experimental.vector.reduce.or.i16.v2i16(<2 x i16> undef)
- %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef)
- %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef)
- %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef)
- %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef)
- %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef)
+ %V2 = call i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16> undef)
+ %V4 = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> undef)
+ %V8 = call i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16> undef)
+ %V16 = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> undef)
+ %V32 = call i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16> undef)
+ %V64 = call i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16> undef)
ret i32 undef
}
define i32 @reduce_i8(i32 %arg) {
; SSE2-LABEL: 'reduce_i8'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.i8.v2i8(<2 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.i8.v4i8(<4 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i8'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.i8.v2i8(<2 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.i8.v4i8(<4 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i8'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.i8.v2i8(<2 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.i8.v4i8(<4 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i8'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.i8.v2i8(<2 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.i8.v4i8(<4 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i8'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.i8.v2i8(<2 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.i8.v4i8(<4 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'reduce_i8'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.i8.v2i8(<2 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.i8.v4i8(<4 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'reduce_i8'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.i8.v2i8(<2 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.i8.v4i8(<4 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'reduce_i8'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.i8.v2i8(<2 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.i8.v4i8(<4 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V2 = call i8 @llvm.experimental.vector.reduce.or.i8.v2i8(<2 x i8> undef)
- %V4 = call i8 @llvm.experimental.vector.reduce.or.i8.v4i8(<4 x i8> undef)
- %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef)
- %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef)
- %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef)
- %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef)
- %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef)
+ %V2 = call i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8> undef)
+ %V4 = call i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8> undef)
+ %V8 = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> undef)
+ %V16 = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> undef)
+ %V32 = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> undef)
+ %V64 = call i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8> undef)
+ %V128 = call i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8> undef)
ret i32 undef
}
define i32 @reduce_i1(i32 %arg) {
; SSE-LABEL: 'reduce_i1'
-; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.v128i1(<128 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i1'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.v128i1(<128 x i1> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i1'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.v128i1(<128 x i1> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'reduce_i1'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.v128i1(<128 x i1> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'reduce_i1'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 326 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 775 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 776 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 326 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 775 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 776 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.v128i1(<128 x i1> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'reduce_i1'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.v128i1(<128 x i1> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef)
- %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef)
- %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef)
- %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef)
- %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef)
- %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
- %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
- %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef)
+ %V1 = call i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1> undef)
+ %V2 = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> undef)
+ %V4 = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> undef)
+ %V8 = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> undef)
+ %V16 = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> undef)
+ %V32 = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> undef)
+ %V64 = call i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1> undef)
+ %V128 = call i1 @llvm.experimental.vector.reduce.or.v128i1(<128 x i1> undef)
ret i32 undef
}
-declare i64 @llvm.experimental.vector.reduce.or.i64.v1i64(<1 x i64>)
-declare i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64>)
-declare i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64>)
+declare i64 @llvm.experimental.vector.reduce.or.v1i64(<1 x i64>)
+declare i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64>)
+declare i64 @llvm.experimental.vector.reduce.or.v4i64(<4 x i64>)
+declare i64 @llvm.experimental.vector.reduce.or.v8i64(<8 x i64>)
+declare i64 @llvm.experimental.vector.reduce.or.v16i64(<16 x i64>)
-declare i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32>)
+declare i32 @llvm.experimental.vector.reduce.or.v2i32(<2 x i32>)
+declare i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32>)
+declare i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32>)
+declare i32 @llvm.experimental.vector.reduce.or.v16i32(<16 x i32>)
+declare i32 @llvm.experimental.vector.reduce.or.v32i32(<32 x i32>)
-declare i16 @llvm.experimental.vector.reduce.or.i16.v2i16(<2 x i16>)
-declare i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16>)
-declare i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16>)
+declare i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16>)
+declare i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16>)
+declare i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16>)
+declare i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16>)
+declare i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16>)
+declare i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16>)
-declare i8 @llvm.experimental.vector.reduce.or.i8.v2i8(<2 x i8>)
-declare i8 @llvm.experimental.vector.reduce.or.i8.v4i8(<4 x i8>)
-declare i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8>)
-declare i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8>)
+declare i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8>)
+declare i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8>)
+declare i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8>)
+declare i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8>)
+declare i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8>)
+declare i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8>)
+declare i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8>)
-declare i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1>)
-declare i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1>)
-declare i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1>)
-declare i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1>)
-declare i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1>)
-declare i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1>)
-declare i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1>)
-declare i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1>)
+declare i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1>)
+declare i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1>)
+declare i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1>)
+declare i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1>)
+declare i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1>)
+declare i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1>)
+declare i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1>)
+declare i1 @llvm.experimental.vector.reduce.or.v128i1(<128 x i1>)
diff --git a/llvm/test/Analysis/CostModel/X86/reduce-or.ll b/llvm/test/Analysis/CostModel/X86/reduce-or.ll
index 8858768276a..16978e5185b 100644
--- a/llvm/test/Analysis/CostModel/X86/reduce-or.ll
+++ b/llvm/test/Analysis/CostModel/X86/reduce-or.ll
@@ -10,369 +10,369 @@
define i32 @reduce_i64(i32 %arg) {
; SSE-LABEL: 'reduce_i64'
-; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.or.i64.v1i64(<1 x i64> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.or.v1i64(<1 x i64> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.or.v4i64(<4 x i64> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.or.v8i64(<8 x i64> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.or.v16i64(<16 x i64> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i64'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.or.i64.v1i64(<1 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.or.v1i64(<1 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.or.v4i64(<4 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.or.v8i64(<8 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.or.v16i64(<16 x i64> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i64'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.or.i64.v1i64(<1 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.or.v1i64(<1 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.or.v4i64(<4 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.or.v8i64(<8 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.or.v16i64(<16 x i64> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'reduce_i64'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.or.i64.v1i64(<1 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.or.v1i64(<1 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.or.v4i64(<4 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.or.v8i64(<8 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.or.v16i64(<16 x i64> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V1 = call i64 @llvm.experimental.vector.reduce.or.i64.v1i64(<1 x i64> undef)
- %V2 = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64> undef)
- %V4 = call i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64> undef)
- %V8 = call i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64> undef)
- %V16 = call i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64> undef)
+ %V1 = call i64 @llvm.experimental.vector.reduce.or.v1i64(<1 x i64> undef)
+ %V2 = call i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64> undef)
+ %V4 = call i64 @llvm.experimental.vector.reduce.or.v4i64(<4 x i64> undef)
+ %V8 = call i64 @llvm.experimental.vector.reduce.or.v8i64(<8 x i64> undef)
+ %V16 = call i64 @llvm.experimental.vector.reduce.or.v16i64(<16 x i64> undef)
ret i32 undef
}
define i32 @reduce_i32(i32 %arg) {
; SSE-LABEL: 'reduce_i32'
-; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.or.v2i32(<2 x i32> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.or.v16i32(<16 x i32> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.or.v32i32(<32 x i32> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i32'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.or.v2i32(<2 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.or.v16i32(<16 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.or.v32i32(<32 x i32> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i32'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.or.v2i32(<2 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.or.v16i32(<16 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.or.v32i32(<32 x i32> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'reduce_i32'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.or.v2i32(<2 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.or.v16i32(<16 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.or.v32i32(<32 x i32> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V2 = call i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32> undef)
- %V4 = call i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32> undef)
- %V8 = call i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32> undef)
- %V16 = call i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32> undef)
- %V32 = call i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32> undef)
+ %V2 = call i32 @llvm.experimental.vector.reduce.or.v2i32(<2 x i32> undef)
+ %V4 = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> undef)
+ %V8 = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> undef)
+ %V16 = call i32 @llvm.experimental.vector.reduce.or.v16i32(<16 x i32> undef)
+ %V32 = call i32 @llvm.experimental.vector.reduce.or.v32i32(<32 x i32> undef)
ret i32 undef
}
define i32 @reduce_i16(i32 %arg) {
; SSE2-LABEL: 'reduce_i16'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.i16.v2i16(<2 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i16'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.i16.v2i16(<2 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i16'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.i16.v2i16(<2 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i16'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.i16.v2i16(<2 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i16'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.i16.v2i16(<2 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'reduce_i16'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.i16.v2i16(<2 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'reduce_i16'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.i16.v2i16(<2 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'reduce_i16'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.i16.v2i16(<2 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V2 = call i16 @llvm.experimental.vector.reduce.or.i16.v2i16(<2 x i16> undef)
- %V4 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> undef)
- %V8 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> undef)
- %V16 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> undef)
- %V32 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> undef)
- %V64 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> undef)
+ %V2 = call i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16> undef)
+ %V4 = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> undef)
+ %V8 = call i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16> undef)
+ %V16 = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> undef)
+ %V32 = call i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16> undef)
+ %V64 = call i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16> undef)
ret i32 undef
}
define i32 @reduce_i8(i32 %arg) {
; SSE2-LABEL: 'reduce_i8'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.i8.v2i8(<2 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.i8.v4i8(<4 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i8'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.i8.v2i8(<2 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.i8.v4i8(<4 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i8'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.i8.v2i8(<2 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.i8.v4i8(<4 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i8'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.i8.v2i8(<2 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.i8.v4i8(<4 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i8'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.i8.v2i8(<2 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.i8.v4i8(<4 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'reduce_i8'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.i8.v2i8(<2 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.i8.v4i8(<4 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'reduce_i8'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.i8.v2i8(<2 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.i8.v4i8(<4 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'reduce_i8'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.i8.v2i8(<2 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.i8.v4i8(<4 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V2 = call i8 @llvm.experimental.vector.reduce.or.i8.v2i8(<2 x i8> undef)
- %V4 = call i8 @llvm.experimental.vector.reduce.or.i8.v4i8(<4 x i8> undef)
- %V8 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> undef)
- %V16 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> undef)
- %V32 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> undef)
- %V64 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> undef)
- %V128 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> undef)
+ %V2 = call i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8> undef)
+ %V4 = call i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8> undef)
+ %V8 = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> undef)
+ %V16 = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> undef)
+ %V32 = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> undef)
+ %V64 = call i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8> undef)
+ %V128 = call i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8> undef)
ret i32 undef
}
define i32 @reduce_i1(i32 %arg) {
; SSE-LABEL: 'reduce_i1'
-; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.v128i1(<128 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i1'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.v128i1(<128 x i1> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i1'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.v128i1(<128 x i1> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'reduce_i1'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.v128i1(<128 x i1> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'reduce_i1'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 326 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 775 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 776 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 326 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 775 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 776 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.v128i1(<128 x i1> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'reduce_i1'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.or.v128i1(<128 x i1> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V1 = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> undef)
- %V2 = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> undef)
- %V4 = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> undef)
- %V8 = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> undef)
- %V16 = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> undef)
- %V32 = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> undef)
- %V64 = call i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1> undef)
- %V128 = call i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1> undef)
+ %V1 = call i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1> undef)
+ %V2 = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> undef)
+ %V4 = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> undef)
+ %V8 = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> undef)
+ %V16 = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> undef)
+ %V32 = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> undef)
+ %V64 = call i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1> undef)
+ %V128 = call i1 @llvm.experimental.vector.reduce.or.v128i1(<128 x i1> undef)
ret i32 undef
}
-declare i64 @llvm.experimental.vector.reduce.or.i64.v1i64(<1 x i64>)
-declare i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64>)
-declare i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64>)
+declare i64 @llvm.experimental.vector.reduce.or.v1i64(<1 x i64>)
+declare i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64>)
+declare i64 @llvm.experimental.vector.reduce.or.v4i64(<4 x i64>)
+declare i64 @llvm.experimental.vector.reduce.or.v8i64(<8 x i64>)
+declare i64 @llvm.experimental.vector.reduce.or.v16i64(<16 x i64>)
-declare i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32>)
+declare i32 @llvm.experimental.vector.reduce.or.v2i32(<2 x i32>)
+declare i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32>)
+declare i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32>)
+declare i32 @llvm.experimental.vector.reduce.or.v16i32(<16 x i32>)
+declare i32 @llvm.experimental.vector.reduce.or.v32i32(<32 x i32>)
-declare i16 @llvm.experimental.vector.reduce.or.i16.v2i16(<2 x i16>)
-declare i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16>)
-declare i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16>)
+declare i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16>)
+declare i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16>)
+declare i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16>)
+declare i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16>)
+declare i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16>)
+declare i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16>)
-declare i8 @llvm.experimental.vector.reduce.or.i8.v2i8(<2 x i8>)
-declare i8 @llvm.experimental.vector.reduce.or.i8.v4i8(<4 x i8>)
-declare i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8>)
-declare i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8>)
+declare i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8>)
+declare i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8>)
+declare i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8>)
+declare i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8>)
+declare i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8>)
+declare i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8>)
+declare i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8>)
-declare i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1>)
-declare i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1>)
-declare i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1>)
-declare i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1>)
-declare i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1>)
-declare i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1>)
-declare i1 @llvm.experimental.vector.reduce.or.i1.v64i1(<64 x i1>)
-declare i1 @llvm.experimental.vector.reduce.or.i1.v128i1(<128 x i1>)
+declare i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1>)
+declare i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1>)
+declare i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1>)
+declare i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1>)
+declare i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1>)
+declare i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1>)
+declare i1 @llvm.experimental.vector.reduce.or.v64i1(<64 x i1>)
+declare i1 @llvm.experimental.vector.reduce.or.v128i1(<128 x i1>)
diff --git a/llvm/test/Analysis/CostModel/X86/reduce-smax-widen.ll b/llvm/test/Analysis/CostModel/X86/reduce-smax-widen.ll
index 6afa23c9d39..0aea5eee165 100644
--- a/llvm/test/Analysis/CostModel/X86/reduce-smax-widen.ll
+++ b/llvm/test/Analysis/CostModel/X86/reduce-smax-widen.ll
@@ -10,314 +10,314 @@
define i32 @reduce_i64(i32 %arg) {
; SSE2-LABEL: 'reduce_i64'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i64'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i64'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i64'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i64'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'reduce_i64'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64> undef)
- %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef)
- %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef)
- %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef)
- %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef)
+ %V1 = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef)
+ %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef)
+ %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef)
+ %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef)
+ %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef)
ret i32 undef
}
define i32 @reduce_i32(i32 %arg) {
; SSE2-LABEL: 'reduce_i32'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i32'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i32'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i32'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i32'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'reduce_i32'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef)
- %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef)
- %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef)
- %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef)
- %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef)
+ %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef)
+ %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef)
+ %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef)
+ %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef)
+ %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef)
ret i32 undef
}
define i32 @reduce_i16(i32 %arg) {
; SSE2-LABEL: 'reduce_i16'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i16'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i16'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i16'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i16'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'reduce_i16'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'reduce_i16'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'reduce_i16'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef)
- %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef)
- %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef)
- %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef)
- %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef)
- %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef)
+ %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef)
+ %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef)
+ %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef)
+ %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef)
+ %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef)
+ %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef)
ret i32 undef
}
define i32 @reduce_i8(i32 %arg) {
; SSE2-LABEL: 'reduce_i8'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i8'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i8'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i8'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i8'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'reduce_i8'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'reduce_i8'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'reduce_i8'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef)
- %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef)
- %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
- %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
- %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef)
- %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef)
- %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
+ %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef)
+ %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef)
+ %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef)
+ %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef)
+ %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef)
+ %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef)
+ %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef)
ret i32 undef
}
-declare i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64>)
-declare i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64>)
-declare i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64>)
+declare i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64>)
+declare i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64>)
+declare i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64>)
+declare i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64>)
+declare i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64>)
-declare i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32>)
+declare i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32>)
+declare i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32>)
+declare i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32>)
+declare i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32>)
+declare i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32>)
-declare i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16>)
-declare i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8>)
diff --git a/llvm/test/Analysis/CostModel/X86/reduce-smax.ll b/llvm/test/Analysis/CostModel/X86/reduce-smax.ll
index 804e62723be..54613791ef6 100644
--- a/llvm/test/Analysis/CostModel/X86/reduce-smax.ll
+++ b/llvm/test/Analysis/CostModel/X86/reduce-smax.ll
@@ -10,314 +10,314 @@
define i32 @reduce_i64(i32 %arg) {
; SSE2-LABEL: 'reduce_i64'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i64'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i64'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i64'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i64'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'reduce_i64'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64> undef)
- %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef)
- %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef)
- %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef)
- %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef)
+ %V1 = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef)
+ %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef)
+ %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef)
+ %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef)
+ %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef)
ret i32 undef
}
define i32 @reduce_i32(i32 %arg) {
; SSE2-LABEL: 'reduce_i32'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i32'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i32'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i32'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i32'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'reduce_i32'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef)
- %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef)
- %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef)
- %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef)
- %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef)
+ %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef)
+ %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef)
+ %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef)
+ %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef)
+ %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef)
ret i32 undef
}
define i32 @reduce_i16(i32 %arg) {
; SSE2-LABEL: 'reduce_i16'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i16'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i16'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i16'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i16'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'reduce_i16'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'reduce_i16'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'reduce_i16'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef)
- %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef)
- %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef)
- %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef)
- %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef)
- %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef)
+ %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef)
+ %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef)
+ %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef)
+ %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef)
+ %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef)
+ %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef)
ret i32 undef
}
define i32 @reduce_i8(i32 %arg) {
; SSE2-LABEL: 'reduce_i8'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i8'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i8'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i8'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i8'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'reduce_i8'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'reduce_i8'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'reduce_i8'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef)
- %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef)
- %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
- %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
- %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef)
- %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef)
- %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
+ %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef)
+ %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef)
+ %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef)
+ %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef)
+ %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef)
+ %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef)
+ %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef)
ret i32 undef
}
-declare i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64>)
-declare i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64>)
-declare i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64>)
+declare i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64>)
+declare i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64>)
+declare i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64>)
+declare i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64>)
+declare i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64>)
-declare i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32>)
+declare i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32>)
+declare i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32>)
+declare i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32>)
+declare i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32>)
+declare i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32>)
-declare i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16>)
-declare i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8>)
diff --git a/llvm/test/Analysis/CostModel/X86/reduce-smin-widen.ll b/llvm/test/Analysis/CostModel/X86/reduce-smin-widen.ll
index b1d020b9d28..a12768a0e2a 100644
--- a/llvm/test/Analysis/CostModel/X86/reduce-smin-widen.ll
+++ b/llvm/test/Analysis/CostModel/X86/reduce-smin-widen.ll
@@ -10,305 +10,305 @@
define i32 @reduce_i64(i32 %arg) {
; SSE2-LABEL: 'reduce_i64'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i64'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i64'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i64'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i64'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'reduce_i64'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64> undef)
- %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef)
- %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef)
- %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef)
- %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef)
+ %V1 = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> undef)
+ %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef)
+ %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef)
+ %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef)
+ %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef)
ret i32 undef
}
define i32 @reduce_i32(i32 %arg) {
; SSE2-LABEL: 'reduce_i32'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i32'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i32'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i32'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i32'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'reduce_i32'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> undef)
- %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> undef)
- %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> undef)
- %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> undef)
- %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> undef)
+ %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef)
+ %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef)
+ %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef)
+ %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef)
+ %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef)
ret i32 undef
}
define i32 @reduce_i16(i32 %arg) {
; SSE2-LABEL: 'reduce_i16'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i16'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i16'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i16'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i16'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'reduce_i16'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'reduce_i16'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'reduce_i16'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef)
- %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef)
- %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef)
- %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef)
- %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef)
+ %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef)
+ %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef)
+ %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef)
+ %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef)
+ %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef)
ret i32 undef
}
define i32 @reduce_i8(i32 %arg) {
; SSE2-LABEL: 'reduce_i8'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i8'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i8'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i8'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i8'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'reduce_i8'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'reduce_i8'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'reduce_i8'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef)
- %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef)
- %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef)
- %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef)
- %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef)
- %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef)
- %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef)
+ %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef)
+ %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef)
+ %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef)
+ %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef)
+ %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef)
+ %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef)
+ %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef)
ret i32 undef
}
-declare i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64>)
-declare i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64>)
-declare i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64>)
+declare i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64>)
+declare i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64>)
+declare i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64>)
+declare i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64>)
+declare i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64>)
-declare i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32>)
+declare i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32>)
+declare i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32>)
+declare i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32>)
+declare i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32>)
+declare i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32>)
-declare i16 @llvm.experimental.vector.reduce.smin.i16.v2i16(<2 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16>)
-declare i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8>)
diff --git a/llvm/test/Analysis/CostModel/X86/reduce-smin.ll b/llvm/test/Analysis/CostModel/X86/reduce-smin.ll
index 9448208f290..777ac088e70 100644
--- a/llvm/test/Analysis/CostModel/X86/reduce-smin.ll
+++ b/llvm/test/Analysis/CostModel/X86/reduce-smin.ll
@@ -10,314 +10,314 @@
define i32 @reduce_i64(i32 %arg) {
; SSE2-LABEL: 'reduce_i64'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i64'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i64'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i64'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i64'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'reduce_i64'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64> undef)
- %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef)
- %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef)
- %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef)
- %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef)
+ %V1 = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> undef)
+ %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef)
+ %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef)
+ %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef)
+ %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef)
ret i32 undef
}
define i32 @reduce_i32(i32 %arg) {
; SSE2-LABEL: 'reduce_i32'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i32'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i32'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i32'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i32'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'reduce_i32'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> undef)
- %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> undef)
- %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> undef)
- %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> undef)
- %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> undef)
+ %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef)
+ %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef)
+ %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef)
+ %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef)
+ %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef)
ret i32 undef
}
define i32 @reduce_i16(i32 %arg) {
; SSE2-LABEL: 'reduce_i16'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.i16.v2i16(<2 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i16'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.i16.v2i16(<2 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i16'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.i16.v2i16(<2 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i16'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.i16.v2i16(<2 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i16'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.i16.v2i16(<2 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'reduce_i16'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.i16.v2i16(<2 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'reduce_i16'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.i16.v2i16(<2 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'reduce_i16'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.i16.v2i16(<2 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V2 = call i16 @llvm.experimental.vector.reduce.smin.i16.v2i16(<2 x i16> undef)
- %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef)
- %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef)
- %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef)
- %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef)
- %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef)
+ %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef)
+ %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef)
+ %V8 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> undef)
+ %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef)
+ %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef)
+ %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef)
ret i32 undef
}
define i32 @reduce_i8(i32 %arg) {
; SSE2-LABEL: 'reduce_i8'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i8'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i8'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i8'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i8'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'reduce_i8'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'reduce_i8'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'reduce_i8'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef)
- %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef)
- %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef)
- %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef)
- %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef)
- %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef)
- %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef)
+ %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef)
+ %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef)
+ %V8 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> undef)
+ %V16 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> undef)
+ %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef)
+ %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef)
+ %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef)
ret i32 undef
}
-declare i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64>)
-declare i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64>)
-declare i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64>)
+declare i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64>)
+declare i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64>)
+declare i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64>)
+declare i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64>)
+declare i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64>)
-declare i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32>)
+declare i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32>)
+declare i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32>)
+declare i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32>)
+declare i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32>)
+declare i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32>)
-declare i16 @llvm.experimental.vector.reduce.smin.i16.v2i16(<2 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16>)
-declare i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8>)
diff --git a/llvm/test/Analysis/CostModel/X86/reduce-umax-widen.ll b/llvm/test/Analysis/CostModel/X86/reduce-umax-widen.ll
index 3b658e7e77e..380c57b7a79 100644
--- a/llvm/test/Analysis/CostModel/X86/reduce-umax-widen.ll
+++ b/llvm/test/Analysis/CostModel/X86/reduce-umax-widen.ll
@@ -10,314 +10,314 @@
define i32 @reduce_i64(i32 %arg) {
; SSE2-LABEL: 'reduce_i64'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i64'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i64'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i64'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i64'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'reduce_i64'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64> undef)
- %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef)
- %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef)
- %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef)
- %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef)
+ %V1 = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> undef)
+ %V2 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef)
+ %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef)
+ %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef)
+ %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef)
ret i32 undef
}
define i32 @reduce_i32(i32 %arg) {
; SSE2-LABEL: 'reduce_i32'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i32'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i32'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i32'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i32'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'reduce_i32'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32> undef)
- %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> undef)
- %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> undef)
- %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> undef)
- %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> undef)
+ %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef)
+ %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef)
+ %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef)
+ %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef)
+ %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef)
ret i32 undef
}
define i32 @reduce_i16(i32 %arg) {
; SSE2-LABEL: 'reduce_i16'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i16'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i16'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i16'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i16'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'reduce_i16'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'reduce_i16'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'reduce_i16'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef)
- %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef)
- %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef)
- %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef)
- %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef)
- %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef)
+ %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef)
+ %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef)
+ %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef)
+ %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef)
+ %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef)
+ %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef)
ret i32 undef
}
define i32 @reduce_i8(i32 %arg) {
; SSE2-LABEL: 'reduce_i8'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i8'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i8'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i8'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i8'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'reduce_i8'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'reduce_i8'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'reduce_i8'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef)
- %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef)
- %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef)
- %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef)
- %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef)
- %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef)
- %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef)
+ %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef)
+ %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef)
+ %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef)
+ %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef)
+ %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef)
+ %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef)
+ %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef)
ret i32 undef
}
-declare i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64>)
-declare i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64>)
-declare i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64>)
+declare i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64>)
+declare i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64>)
+declare i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64>)
+declare i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64>)
+declare i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64>)
-declare i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32>)
+declare i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32>)
+declare i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32>)
+declare i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32>)
+declare i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32>)
+declare i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32>)
-declare i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16>)
-declare i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8>)
diff --git a/llvm/test/Analysis/CostModel/X86/reduce-umax.ll b/llvm/test/Analysis/CostModel/X86/reduce-umax.ll
index d2fb14277fa..1259bdf8cb6 100644
--- a/llvm/test/Analysis/CostModel/X86/reduce-umax.ll
+++ b/llvm/test/Analysis/CostModel/X86/reduce-umax.ll
@@ -10,314 +10,314 @@
define i32 @reduce_i64(i32 %arg) {
; SSE2-LABEL: 'reduce_i64'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i64'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i64'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i64'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i64'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'reduce_i64'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64> undef)
- %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef)
- %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef)
- %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef)
- %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef)
+ %V1 = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> undef)
+ %V2 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef)
+ %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef)
+ %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef)
+ %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef)
ret i32 undef
}
define i32 @reduce_i32(i32 %arg) {
; SSE2-LABEL: 'reduce_i32'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i32'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i32'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i32'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i32'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'reduce_i32'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32> undef)
- %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> undef)
- %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> undef)
- %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> undef)
- %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> undef)
+ %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef)
+ %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef)
+ %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef)
+ %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef)
+ %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef)
ret i32 undef
}
define i32 @reduce_i16(i32 %arg) {
; SSE2-LABEL: 'reduce_i16'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i16'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i16'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i16'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i16'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'reduce_i16'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'reduce_i16'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'reduce_i16'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef)
- %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef)
- %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef)
- %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef)
- %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef)
- %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef)
+ %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef)
+ %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef)
+ %V8 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> undef)
+ %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef)
+ %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef)
+ %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef)
ret i32 undef
}
define i32 @reduce_i8(i32 %arg) {
; SSE2-LABEL: 'reduce_i8'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i8'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i8'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i8'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i8'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'reduce_i8'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'reduce_i8'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'reduce_i8'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef)
- %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef)
- %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef)
- %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef)
- %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef)
- %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef)
- %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef)
+ %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef)
+ %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef)
+ %V8 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> undef)
+ %V16 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> undef)
+ %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef)
+ %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef)
+ %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef)
ret i32 undef
}
-declare i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64>)
-declare i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64>)
-declare i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64>)
+declare i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64>)
+declare i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64>)
+declare i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64>)
+declare i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64>)
+declare i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64>)
-declare i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32>)
+declare i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32>)
+declare i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32>)
+declare i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32>)
+declare i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32>)
+declare i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32>)
-declare i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16>)
-declare i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8>)
diff --git a/llvm/test/Analysis/CostModel/X86/reduce-umin-widen.ll b/llvm/test/Analysis/CostModel/X86/reduce-umin-widen.ll
index 7ffbac46ea6..347cae73699 100644
--- a/llvm/test/Analysis/CostModel/X86/reduce-umin-widen.ll
+++ b/llvm/test/Analysis/CostModel/X86/reduce-umin-widen.ll
@@ -10,314 +10,314 @@
define i32 @reduce_i64(i32 %arg) {
; SSE2-LABEL: 'reduce_i64'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i64'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i64'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i64'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i64'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'reduce_i64'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64> undef)
- %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef)
- %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef)
- %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef)
- %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef)
+ %V1 = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> undef)
+ %V2 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef)
+ %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef)
+ %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef)
+ %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef)
ret i32 undef
}
define i32 @reduce_i32(i32 %arg) {
; SSE2-LABEL: 'reduce_i32'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i32'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i32'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i32'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i32'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'reduce_i32'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32> undef)
- %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> undef)
- %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> undef)
- %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> undef)
- %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> undef)
+ %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef)
+ %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef)
+ %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef)
+ %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef)
+ %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef)
ret i32 undef
}
define i32 @reduce_i16(i32 %arg) {
; SSE2-LABEL: 'reduce_i16'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i16'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i16'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i16'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i16'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'reduce_i16'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'reduce_i16'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'reduce_i16'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef)
- %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef)
- %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef)
- %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef)
- %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef)
- %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef)
+ %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef)
+ %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef)
+ %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef)
+ %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef)
+ %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef)
+ %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef)
ret i32 undef
}
define i32 @reduce_i8(i32 %arg) {
; SSE2-LABEL: 'reduce_i8'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i8'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i8'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i8'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i8'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'reduce_i8'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'reduce_i8'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'reduce_i8'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef)
- %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef)
- %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef)
- %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef)
- %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef)
- %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef)
- %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef)
+ %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef)
+ %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef)
+ %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef)
+ %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef)
+ %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef)
+ %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef)
+ %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef)
ret i32 undef
}
-declare i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64>)
-declare i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64>)
-declare i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64>)
+declare i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64>)
+declare i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64>)
+declare i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64>)
+declare i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64>)
+declare i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64>)
-declare i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32>)
+declare i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32>)
+declare i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32>)
+declare i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32>)
+declare i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32>)
+declare i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32>)
-declare i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16>)
-declare i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8>)
diff --git a/llvm/test/Analysis/CostModel/X86/reduce-umin.ll b/llvm/test/Analysis/CostModel/X86/reduce-umin.ll
index df35f2401c7..b946310e85e 100644
--- a/llvm/test/Analysis/CostModel/X86/reduce-umin.ll
+++ b/llvm/test/Analysis/CostModel/X86/reduce-umin.ll
@@ -10,314 +10,314 @@
define i32 @reduce_i64(i32 %arg) {
; SSE2-LABEL: 'reduce_i64'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i64'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i64'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i64'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i64'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'reduce_i64'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64> undef)
- %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef)
- %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef)
- %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef)
- %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef)
+ %V1 = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> undef)
+ %V2 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef)
+ %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef)
+ %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef)
+ %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef)
ret i32 undef
}
define i32 @reduce_i32(i32 %arg) {
; SSE2-LABEL: 'reduce_i32'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i32'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i32'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i32'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i32'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'reduce_i32'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32> undef)
- %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> undef)
- %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> undef)
- %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> undef)
- %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> undef)
+ %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef)
+ %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef)
+ %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef)
+ %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef)
+ %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef)
ret i32 undef
}
define i32 @reduce_i16(i32 %arg) {
; SSE2-LABEL: 'reduce_i16'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i16'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i16'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i16'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i16'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'reduce_i16'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'reduce_i16'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'reduce_i16'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef)
- %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef)
- %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef)
- %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef)
- %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef)
- %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef)
+ %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef)
+ %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef)
+ %V8 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> undef)
+ %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef)
+ %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef)
+ %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef)
ret i32 undef
}
define i32 @reduce_i8(i32 %arg) {
; SSE2-LABEL: 'reduce_i8'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i8'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i8'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i8'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i8'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'reduce_i8'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'reduce_i8'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'reduce_i8'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef)
- %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef)
- %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef)
- %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef)
- %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef)
- %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef)
- %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef)
+ %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef)
+ %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef)
+ %V8 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> undef)
+ %V16 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> undef)
+ %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef)
+ %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef)
+ %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef)
ret i32 undef
}
-declare i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64>)
-declare i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64>)
-declare i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64>)
+declare i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64>)
+declare i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64>)
+declare i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64>)
+declare i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64>)
+declare i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64>)
-declare i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32>)
+declare i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32>)
+declare i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32>)
+declare i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32>)
+declare i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32>)
+declare i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32>)
-declare i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16>)
-declare i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8>)
diff --git a/llvm/test/Analysis/CostModel/X86/reduce-xor-widen.ll b/llvm/test/Analysis/CostModel/X86/reduce-xor-widen.ll
index ff0d276c221..69af1f370f2 100644
--- a/llvm/test/Analysis/CostModel/X86/reduce-xor-widen.ll
+++ b/llvm/test/Analysis/CostModel/X86/reduce-xor-widen.ll
@@ -10,391 +10,391 @@
define i32 @reduce_i64(i32 %arg) {
; SSE-LABEL: 'reduce_i64'
-; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.xor.i64.v1i64(<1 x i64> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.xor.v1i64(<1 x i64> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.xor.v4i64(<4 x i64> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.xor.v8i64(<8 x i64> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.xor.v16i64(<16 x i64> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i64'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.xor.i64.v1i64(<1 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.xor.v1i64(<1 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.xor.v4i64(<4 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.xor.v8i64(<8 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.xor.v16i64(<16 x i64> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i64'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.xor.i64.v1i64(<1 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.xor.v1i64(<1 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.xor.v4i64(<4 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.xor.v8i64(<8 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.xor.v16i64(<16 x i64> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'reduce_i64'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.xor.i64.v1i64(<1 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.xor.v1i64(<1 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.xor.v4i64(<4 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.xor.v8i64(<8 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.xor.v16i64(<16 x i64> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V1 = call i64 @llvm.experimental.vector.reduce.xor.i64.v1i64(<1 x i64> undef)
- %V2 = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64> undef)
- %V4 = call i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64> undef)
- %V8 = call i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64> undef)
- %V16 = call i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64> undef)
+ %V1 = call i64 @llvm.experimental.vector.reduce.xor.v1i64(<1 x i64> undef)
+ %V2 = call i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64> undef)
+ %V4 = call i64 @llvm.experimental.vector.reduce.xor.v4i64(<4 x i64> undef)
+ %V8 = call i64 @llvm.experimental.vector.reduce.xor.v8i64(<8 x i64> undef)
+ %V16 = call i64 @llvm.experimental.vector.reduce.xor.v16i64(<16 x i64> undef)
ret i32 undef
}
define i32 @reduce_i32(i32 %arg) {
; SSE-LABEL: 'reduce_i32'
-; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.xor.v2i32(<2 x i32> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.xor.v16i32(<16 x i32> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.xor.v32i32(<32 x i32> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i32'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.xor.v2i32(<2 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.xor.v16i32(<16 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.xor.v32i32(<32 x i32> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i32'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.xor.v2i32(<2 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.xor.v16i32(<16 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.xor.v32i32(<32 x i32> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'reduce_i32'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.xor.v2i32(<2 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.xor.v16i32(<16 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.xor.v32i32(<32 x i32> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V2 = call i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32> undef)
- %V4 = call i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32> undef)
- %V8 = call i32 @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32> undef)
- %V16 = call i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32> undef)
- %V32 = call i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32> undef)
+ %V2 = call i32 @llvm.experimental.vector.reduce.xor.v2i32(<2 x i32> undef)
+ %V4 = call i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32> undef)
+ %V8 = call i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32> undef)
+ %V16 = call i32 @llvm.experimental.vector.reduce.xor.v16i32(<16 x i32> undef)
+ %V32 = call i32 @llvm.experimental.vector.reduce.xor.v32i32(<32 x i32> undef)
ret i32 undef
}
define i32 @reduce_i16(i32 %arg) {
; SSE2-LABEL: 'reduce_i16'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.i16.v2i16(<2 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.v64i16(<64 x i16> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i16'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.i16.v2i16(<2 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.v64i16(<64 x i16> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i16'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.i16.v2i16(<2 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.v64i16(<64 x i16> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i16'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.i16.v2i16(<2 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.v64i16(<64 x i16> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i16'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.i16.v2i16(<2 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.v64i16(<64 x i16> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'reduce_i16'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.i16.v2i16(<2 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.v64i16(<64 x i16> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'reduce_i16'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.i16.v2i16(<2 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.v64i16(<64 x i16> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'reduce_i16'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.i16.v2i16(<2 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.v64i16(<64 x i16> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V2 = call i16 @llvm.experimental.vector.reduce.xor.i16.v2i16(<2 x i16> undef)
- %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef)
- %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef)
- %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef)
- %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef)
- %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef)
+ %V2 = call i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16> undef)
+ %V4 = call i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16> undef)
+ %V8 = call i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16> undef)
+ %V16 = call i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16> undef)
+ %V32 = call i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16> undef)
+ %V64 = call i16 @llvm.experimental.vector.reduce.xor.v64i16(<64 x i16> undef)
ret i32 undef
}
define i32 @reduce_i8(i32 %arg) {
; SSE2-LABEL: 'reduce_i8'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.i8.v2i8(<2 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.i8.v4i8(<4 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.v128i8(<128 x i8> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i8'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.i8.v2i8(<2 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.i8.v4i8(<4 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.v128i8(<128 x i8> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i8'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.i8.v2i8(<2 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.i8.v4i8(<4 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.v128i8(<128 x i8> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i8'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.i8.v2i8(<2 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.i8.v4i8(<4 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.v128i8(<128 x i8> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i8'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.i8.v2i8(<2 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.i8.v4i8(<4 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.v128i8(<128 x i8> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'reduce_i8'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.i8.v2i8(<2 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.i8.v4i8(<4 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.v128i8(<128 x i8> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'reduce_i8'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.i8.v2i8(<2 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.i8.v4i8(<4 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.v128i8(<128 x i8> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'reduce_i8'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.i8.v2i8(<2 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.i8.v4i8(<4 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.v128i8(<128 x i8> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V2 = call i8 @llvm.experimental.vector.reduce.xor.i8.v2i8(<2 x i8> undef)
- %V4 = call i8 @llvm.experimental.vector.reduce.xor.i8.v4i8(<4 x i8> undef)
- %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef)
- %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef)
- %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef)
- %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef)
- %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef)
+ %V2 = call i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8> undef)
+ %V4 = call i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8> undef)
+ %V8 = call i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8> undef)
+ %V16 = call i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8> undef)
+ %V32 = call i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8> undef)
+ %V64 = call i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8> undef)
+ %V128 = call i8 @llvm.experimental.vector.reduce.xor.v128i8(<128 x i8> undef)
ret i32 undef
}
define i32 @reduce_i1(i32 %arg) {
; SSE2-LABEL: 'reduce_i1'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.v1i1(<1 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.v128i1(<128 x i1> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i1'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.v1i1(<1 x i1> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.v128i1(<128 x i1> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i1'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.v1i1(<1 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.v128i1(<128 x i1> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i1'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.v1i1(<1 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.v128i1(<128 x i1> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i1'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.v1i1(<1 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.v128i1(<128 x i1> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'reduce_i1'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.v1i1(<1 x i1> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.v128i1(<128 x i1> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'reduce_i1'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 326 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 775 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 776 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.v1i1(<1 x i1> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 326 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 775 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 776 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.v128i1(<128 x i1> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'reduce_i1'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.v1i1(<1 x i1> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.v128i1(<128 x i1> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef)
- %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef)
- %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef)
- %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef)
- %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef)
- %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef)
- %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef)
- %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef)
+ %V1 = call i1 @llvm.experimental.vector.reduce.xor.v1i1(<1 x i1> undef)
+ %V2 = call i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1> undef)
+ %V4 = call i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1> undef)
+ %V8 = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> undef)
+ %V16 = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> undef)
+ %V32 = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> undef)
+ %V64 = call i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1> undef)
+ %V128 = call i1 @llvm.experimental.vector.reduce.xor.v128i1(<128 x i1> undef)
ret i32 undef
}
-declare i64 @llvm.experimental.vector.reduce.xor.i64.v1i64(<1 x i64>)
-declare i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64>)
-declare i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64>)
+declare i64 @llvm.experimental.vector.reduce.xor.v1i64(<1 x i64>)
+declare i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64>)
+declare i64 @llvm.experimental.vector.reduce.xor.v4i64(<4 x i64>)
+declare i64 @llvm.experimental.vector.reduce.xor.v8i64(<8 x i64>)
+declare i64 @llvm.experimental.vector.reduce.xor.v16i64(<16 x i64>)
-declare i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32>)
+declare i32 @llvm.experimental.vector.reduce.xor.v2i32(<2 x i32>)
+declare i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32>)
+declare i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32>)
+declare i32 @llvm.experimental.vector.reduce.xor.v16i32(<16 x i32>)
+declare i32 @llvm.experimental.vector.reduce.xor.v32i32(<32 x i32>)
-declare i16 @llvm.experimental.vector.reduce.xor.i16.v2i16(<2 x i16>)
-declare i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16>)
-declare i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16>)
+declare i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16>)
+declare i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16>)
+declare i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16>)
+declare i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16>)
+declare i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16>)
+declare i16 @llvm.experimental.vector.reduce.xor.v64i16(<64 x i16>)
-declare i8 @llvm.experimental.vector.reduce.xor.i8.v2i8(<2 x i8>)
-declare i8 @llvm.experimental.vector.reduce.xor.i8.v4i8(<4 x i8>)
-declare i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8>)
-declare i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8>)
+declare i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8>)
+declare i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8>)
+declare i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8>)
+declare i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8>)
+declare i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8>)
+declare i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8>)
+declare i8 @llvm.experimental.vector.reduce.xor.v128i8(<128 x i8>)
-declare i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1>)
-declare i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1>)
-declare i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1>)
-declare i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1>)
-declare i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1>)
-declare i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1>)
-declare i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1>)
-declare i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1>)
+declare i1 @llvm.experimental.vector.reduce.xor.v1i1(<1 x i1>)
+declare i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1>)
+declare i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1>)
+declare i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1>)
+declare i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1>)
+declare i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1>)
+declare i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1>)
+declare i1 @llvm.experimental.vector.reduce.xor.v128i1(<128 x i1>)
diff --git a/llvm/test/Analysis/CostModel/X86/reduce-xor.ll b/llvm/test/Analysis/CostModel/X86/reduce-xor.ll
index 8f7dfdd02fb..a97fc21f36c 100644
--- a/llvm/test/Analysis/CostModel/X86/reduce-xor.ll
+++ b/llvm/test/Analysis/CostModel/X86/reduce-xor.ll
@@ -10,391 +10,391 @@
define i32 @reduce_i64(i32 %arg) {
; SSE-LABEL: 'reduce_i64'
-; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.xor.i64.v1i64(<1 x i64> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.xor.v1i64(<1 x i64> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.xor.v4i64(<4 x i64> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.xor.v8i64(<8 x i64> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.xor.v16i64(<16 x i64> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i64'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.xor.i64.v1i64(<1 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.xor.v1i64(<1 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.xor.v4i64(<4 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.xor.v8i64(<8 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.xor.v16i64(<16 x i64> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i64'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.xor.i64.v1i64(<1 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.xor.v1i64(<1 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.xor.v4i64(<4 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.xor.v8i64(<8 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.xor.v16i64(<16 x i64> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'reduce_i64'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.xor.i64.v1i64(<1 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.xor.v1i64(<1 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.xor.v4i64(<4 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.xor.v8i64(<8 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.xor.v16i64(<16 x i64> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V1 = call i64 @llvm.experimental.vector.reduce.xor.i64.v1i64(<1 x i64> undef)
- %V2 = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64> undef)
- %V4 = call i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64> undef)
- %V8 = call i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64> undef)
- %V16 = call i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64> undef)
+ %V1 = call i64 @llvm.experimental.vector.reduce.xor.v1i64(<1 x i64> undef)
+ %V2 = call i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64> undef)
+ %V4 = call i64 @llvm.experimental.vector.reduce.xor.v4i64(<4 x i64> undef)
+ %V8 = call i64 @llvm.experimental.vector.reduce.xor.v8i64(<8 x i64> undef)
+ %V16 = call i64 @llvm.experimental.vector.reduce.xor.v16i64(<16 x i64> undef)
ret i32 undef
}
define i32 @reduce_i32(i32 %arg) {
; SSE-LABEL: 'reduce_i32'
-; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32> undef)
-; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.xor.v2i32(<2 x i32> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.xor.v16i32(<16 x i32> undef)
+; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.xor.v32i32(<32 x i32> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i32'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.xor.v2i32(<2 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.xor.v16i32(<16 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.xor.v32i32(<32 x i32> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i32'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.xor.v2i32(<2 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.xor.v16i32(<16 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.xor.v32i32(<32 x i32> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'reduce_i32'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.xor.v2i32(<2 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.xor.v16i32(<16 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.xor.v32i32(<32 x i32> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V2 = call i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32> undef)
- %V4 = call i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32> undef)
- %V8 = call i32 @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32> undef)
- %V16 = call i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32> undef)
- %V32 = call i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32> undef)
+ %V2 = call i32 @llvm.experimental.vector.reduce.xor.v2i32(<2 x i32> undef)
+ %V4 = call i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32> undef)
+ %V8 = call i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32> undef)
+ %V16 = call i32 @llvm.experimental.vector.reduce.xor.v16i32(<16 x i32> undef)
+ %V32 = call i32 @llvm.experimental.vector.reduce.xor.v32i32(<32 x i32> undef)
ret i32 undef
}
define i32 @reduce_i16(i32 %arg) {
; SSE2-LABEL: 'reduce_i16'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.i16.v2i16(<2 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.v64i16(<64 x i16> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i16'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.i16.v2i16(<2 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.v64i16(<64 x i16> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i16'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.i16.v2i16(<2 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.v64i16(<64 x i16> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i16'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.i16.v2i16(<2 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.v64i16(<64 x i16> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i16'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.i16.v2i16(<2 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.v64i16(<64 x i16> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'reduce_i16'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.i16.v2i16(<2 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.v64i16(<64 x i16> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'reduce_i16'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.i16.v2i16(<2 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.v64i16(<64 x i16> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'reduce_i16'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.i16.v2i16(<2 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.xor.v64i16(<64 x i16> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V2 = call i16 @llvm.experimental.vector.reduce.xor.i16.v2i16(<2 x i16> undef)
- %V4 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> undef)
- %V8 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> undef)
- %V16 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> undef)
- %V32 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> undef)
- %V64 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> undef)
+ %V2 = call i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16> undef)
+ %V4 = call i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16> undef)
+ %V8 = call i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16> undef)
+ %V16 = call i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16> undef)
+ %V32 = call i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16> undef)
+ %V64 = call i16 @llvm.experimental.vector.reduce.xor.v64i16(<64 x i16> undef)
ret i32 undef
}
define i32 @reduce_i8(i32 %arg) {
; SSE2-LABEL: 'reduce_i8'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.i8.v2i8(<2 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.i8.v4i8(<4 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.v128i8(<128 x i8> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i8'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.i8.v2i8(<2 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.i8.v4i8(<4 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.v128i8(<128 x i8> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i8'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.i8.v2i8(<2 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.i8.v4i8(<4 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.v128i8(<128 x i8> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i8'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.i8.v2i8(<2 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.i8.v4i8(<4 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.v128i8(<128 x i8> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i8'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.i8.v2i8(<2 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.i8.v4i8(<4 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.v128i8(<128 x i8> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'reduce_i8'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.i8.v2i8(<2 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.i8.v4i8(<4 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.v128i8(<128 x i8> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'reduce_i8'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.i8.v2i8(<2 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.i8.v4i8(<4 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.v128i8(<128 x i8> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'reduce_i8'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.i8.v2i8(<2 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.i8.v4i8(<4 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.xor.v128i8(<128 x i8> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V2 = call i8 @llvm.experimental.vector.reduce.xor.i8.v2i8(<2 x i8> undef)
- %V4 = call i8 @llvm.experimental.vector.reduce.xor.i8.v4i8(<4 x i8> undef)
- %V8 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> undef)
- %V16 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> undef)
- %V32 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> undef)
- %V64 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> undef)
- %V128 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> undef)
+ %V2 = call i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8> undef)
+ %V4 = call i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8> undef)
+ %V8 = call i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8> undef)
+ %V16 = call i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8> undef)
+ %V32 = call i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8> undef)
+ %V64 = call i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8> undef)
+ %V128 = call i8 @llvm.experimental.vector.reduce.xor.v128i8(<128 x i8> undef)
ret i32 undef
}
define i32 @reduce_i1(i32 %arg) {
; SSE2-LABEL: 'reduce_i1'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.v1i1(<1 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.v128i1(<128 x i1> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i1'
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.v1i1(<1 x i1> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.v128i1(<128 x i1> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i1'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.v1i1(<1 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.v128i1(<128 x i1> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i1'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.v1i1(<1 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.v128i1(<128 x i1> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'reduce_i1'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.v1i1(<1 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.v128i1(<128 x i1> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'reduce_i1'
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.v1i1(<1 x i1> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.v128i1(<128 x i1> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'reduce_i1'
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 326 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 775 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef)
-; AVX512BW-NEXT: Cost Model: Found an estimated cost of 776 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.v1i1(<1 x i1> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 326 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 775 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1> undef)
+; AVX512BW-NEXT: Cost Model: Found an estimated cost of 776 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.v128i1(<128 x i1> undef)
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'reduce_i1'
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.experimental.vector.reduce.xor.v1i1(<1 x i1> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.experimental.vector.reduce.xor.v128i1(<128 x i1> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %V1 = call i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1> undef)
- %V2 = call i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1> undef)
- %V4 = call i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1> undef)
- %V8 = call i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1> undef)
- %V16 = call i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1> undef)
- %V32 = call i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1> undef)
- %V64 = call i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1> undef)
- %V128 = call i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1> undef)
+ %V1 = call i1 @llvm.experimental.vector.reduce.xor.v1i1(<1 x i1> undef)
+ %V2 = call i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1> undef)
+ %V4 = call i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1> undef)
+ %V8 = call i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1> undef)
+ %V16 = call i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1> undef)
+ %V32 = call i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1> undef)
+ %V64 = call i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1> undef)
+ %V128 = call i1 @llvm.experimental.vector.reduce.xor.v128i1(<128 x i1> undef)
ret i32 undef
}
-declare i64 @llvm.experimental.vector.reduce.xor.i64.v1i64(<1 x i64>)
-declare i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64>)
-declare i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64>)
+declare i64 @llvm.experimental.vector.reduce.xor.v1i64(<1 x i64>)
+declare i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64>)
+declare i64 @llvm.experimental.vector.reduce.xor.v4i64(<4 x i64>)
+declare i64 @llvm.experimental.vector.reduce.xor.v8i64(<8 x i64>)
+declare i64 @llvm.experimental.vector.reduce.xor.v16i64(<16 x i64>)
-declare i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32>)
+declare i32 @llvm.experimental.vector.reduce.xor.v2i32(<2 x i32>)
+declare i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32>)
+declare i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32>)
+declare i32 @llvm.experimental.vector.reduce.xor.v16i32(<16 x i32>)
+declare i32 @llvm.experimental.vector.reduce.xor.v32i32(<32 x i32>)
-declare i16 @llvm.experimental.vector.reduce.xor.i16.v2i16(<2 x i16>)
-declare i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16>)
-declare i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16>)
+declare i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16>)
+declare i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16>)
+declare i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16>)
+declare i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16>)
+declare i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16>)
+declare i16 @llvm.experimental.vector.reduce.xor.v64i16(<64 x i16>)
-declare i8 @llvm.experimental.vector.reduce.xor.i8.v2i8(<2 x i8>)
-declare i8 @llvm.experimental.vector.reduce.xor.i8.v4i8(<4 x i8>)
-declare i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8>)
-declare i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8>)
+declare i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8>)
+declare i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8>)
+declare i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8>)
+declare i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8>)
+declare i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8>)
+declare i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8>)
+declare i8 @llvm.experimental.vector.reduce.xor.v128i8(<128 x i8>)
-declare i1 @llvm.experimental.vector.reduce.xor.i1.v1i1(<1 x i1>)
-declare i1 @llvm.experimental.vector.reduce.xor.i1.v2i1(<2 x i1>)
-declare i1 @llvm.experimental.vector.reduce.xor.i1.v4i1(<4 x i1>)
-declare i1 @llvm.experimental.vector.reduce.xor.i1.v8i1(<8 x i1>)
-declare i1 @llvm.experimental.vector.reduce.xor.i1.v16i1(<16 x i1>)
-declare i1 @llvm.experimental.vector.reduce.xor.i1.v32i1(<32 x i1>)
-declare i1 @llvm.experimental.vector.reduce.xor.i1.v64i1(<64 x i1>)
-declare i1 @llvm.experimental.vector.reduce.xor.i1.v128i1(<128 x i1>)
+declare i1 @llvm.experimental.vector.reduce.xor.v1i1(<1 x i1>)
+declare i1 @llvm.experimental.vector.reduce.xor.v2i1(<2 x i1>)
+declare i1 @llvm.experimental.vector.reduce.xor.v4i1(<4 x i1>)
+declare i1 @llvm.experimental.vector.reduce.xor.v8i1(<8 x i1>)
+declare i1 @llvm.experimental.vector.reduce.xor.v16i1(<16 x i1>)
+declare i1 @llvm.experimental.vector.reduce.xor.v32i1(<32 x i1>)
+declare i1 @llvm.experimental.vector.reduce.xor.v64i1(<64 x i1>)
+declare i1 @llvm.experimental.vector.reduce.xor.v128i1(<128 x i1>)
diff --git a/llvm/test/CodeGen/AArch64/aarch64-addv.ll b/llvm/test/CodeGen/AArch64/aarch64-addv.ll
index e65992e9913..2fd57f70a65 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-addv.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-addv.ll
@@ -1,16 +1,16 @@
; RUN: llc < %s -mtriple=aarch64-eabi -aarch64-neon-syntax=generic | FileCheck %s
; Function Attrs: nounwind readnone
-declare i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64>)
-declare i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32>)
-declare i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16>)
-declare i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8>)
+declare i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64>)
+declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>)
+declare i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16>)
+declare i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8>)
define i8 @add_B(<16 x i8>* %arr) {
; CHECK-LABEL: add_B
; CHECK: addv {{b[0-9]+}}, {{v[0-9]+}}.16b
%bin.rdx = load <16 x i8>, <16 x i8>* %arr
- %r = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> %bin.rdx)
+ %r = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %bin.rdx)
ret i8 %r
}
@@ -18,7 +18,7 @@ define i16 @add_H(<8 x i16>* %arr) {
; CHECK-LABEL: add_H
; CHECK: addv {{h[0-9]+}}, {{v[0-9]+}}.8h
%bin.rdx = load <8 x i16>, <8 x i16>* %arr
- %r = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> %bin.rdx)
+ %r = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %bin.rdx)
ret i16 %r
}
@@ -26,7 +26,7 @@ define i32 @add_S( <4 x i32>* %arr) {
; CHECK-LABEL: add_S
; CHECK: addv {{s[0-9]+}}, {{v[0-9]+}}.4s
%bin.rdx = load <4 x i32>, <4 x i32>* %arr
- %r = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> %bin.rdx)
+ %r = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %bin.rdx)
ret i32 %r
}
@@ -34,11 +34,11 @@ define i64 @add_D(<2 x i64>* %arr) {
; CHECK-LABEL: add_D
; CHECK-NOT: addv
%bin.rdx = load <2 x i64>, <2 x i64>* %arr
- %r = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> %bin.rdx)
+ %r = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %bin.rdx)
ret i64 %r
}
-declare i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32>)
+declare i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32>)
define i32 @oversized_ADDV_256(i8* noalias nocapture readonly %arg1, i8* noalias nocapture readonly %arg2) {
; CHECK-LABEL: oversized_ADDV_256
@@ -54,16 +54,16 @@ entry:
%7 = icmp slt <8 x i32> %6, zeroinitializer
%8 = sub nsw <8 x i32> zeroinitializer, %6
%9 = select <8 x i1> %7, <8 x i32> %8, <8 x i32> %6
- %r = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> %9)
+ %r = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %9)
ret i32 %r
}
-declare i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32>)
+declare i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32>)
define i32 @oversized_ADDV_512(<16 x i32>* %arr) {
; CHECK-LABEL: oversized_ADDV_512
; CHECK: addv {{s[0-9]+}}, {{v[0-9]+}}.4s
%bin.rdx = load <16 x i32>, <16 x i32>* %arr
- %r = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> %bin.rdx)
+ %r = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %bin.rdx)
ret i32 %r
}
diff --git a/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll b/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
index 760a8f8419f..b6f95a7f3bf 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
@@ -2,28 +2,28 @@
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
-declare i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8>)
-declare i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16>)
-declare i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32>)
-declare i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8>)
-declare i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16>)
-declare i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32>)
-
-declare i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8>)
-declare i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16>)
-declare i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32>)
-declare i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8>)
-declare i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16>)
-declare i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32>)
-
-declare float @llvm.experimental.vector.reduce.fmax.f32.v4f32(<4 x float>)
-declare float @llvm.experimental.vector.reduce.fmin.f32.v4f32(<4 x float>)
+declare i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8>)
+declare i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16>)
+declare i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32>)
+declare i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8>)
+declare i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16>)
+declare i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32>)
+
+declare i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8>)
+declare i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16>)
+declare i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32>)
+declare i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8>)
+declare i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16>)
+declare i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32>)
+
+declare float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float>)
+declare float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float>)
; CHECK-LABEL: smax_B
; CHECK: smaxv {{b[0-9]+}}, {{v[0-9]+}}.16b
define i8 @smax_B(<16 x i8>* nocapture readonly %arr) {
%arr.load = load <16 x i8>, <16 x i8>* %arr
- %r = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> %arr.load)
+ %r = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %arr.load)
ret i8 %r
}
@@ -31,7 +31,7 @@ define i8 @smax_B(<16 x i8>* nocapture readonly %arr) {
; CHECK: smaxv {{h[0-9]+}}, {{v[0-9]+}}.8h
define i16 @smax_H(<8 x i16>* nocapture readonly %arr) {
%arr.load = load <8 x i16>, <8 x i16>* %arr
- %r = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> %arr.load)
+ %r = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %arr.load)
ret i16 %r
}
@@ -39,7 +39,7 @@ define i16 @smax_H(<8 x i16>* nocapture readonly %arr) {
; CHECK: smaxv {{s[0-9]+}}, {{v[0-9]+}}.4s
define i32 @smax_S(<4 x i32> * nocapture readonly %arr) {
%arr.load = load <4 x i32>, <4 x i32>* %arr
- %r = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> %arr.load)
+ %r = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> %arr.load)
ret i32 %r
}
@@ -47,7 +47,7 @@ define i32 @smax_S(<4 x i32> * nocapture readonly %arr) {
; CHECK: umaxv {{b[0-9]+}}, {{v[0-9]+}}.16b
define i8 @umax_B(<16 x i8>* nocapture readonly %arr) {
%arr.load = load <16 x i8>, <16 x i8>* %arr
- %r = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> %arr.load)
+ %r = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> %arr.load)
ret i8 %r
}
@@ -55,7 +55,7 @@ define i8 @umax_B(<16 x i8>* nocapture readonly %arr) {
; CHECK: umaxv {{h[0-9]+}}, {{v[0-9]+}}.8h
define i16 @umax_H(<8 x i16>* nocapture readonly %arr) {
%arr.load = load <8 x i16>, <8 x i16>* %arr
- %r = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> %arr.load)
+ %r = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %arr.load)
ret i16 %r
}
@@ -63,7 +63,7 @@ define i16 @umax_H(<8 x i16>* nocapture readonly %arr) {
; CHECK: umaxv {{s[0-9]+}}, {{v[0-9]+}}.4s
define i32 @umax_S(<4 x i32>* nocapture readonly %arr) {
%arr.load = load <4 x i32>, <4 x i32>* %arr
- %r = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> %arr.load)
+ %r = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> %arr.load)
ret i32 %r
}
@@ -71,7 +71,7 @@ define i32 @umax_S(<4 x i32>* nocapture readonly %arr) {
; CHECK: sminv {{b[0-9]+}}, {{v[0-9]+}}.16b
define i8 @smin_B(<16 x i8>* nocapture readonly %arr) {
%arr.load = load <16 x i8>, <16 x i8>* %arr
- %r = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> %arr.load)
+ %r = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> %arr.load)
ret i8 %r
}
@@ -79,7 +79,7 @@ define i8 @smin_B(<16 x i8>* nocapture readonly %arr) {
; CHECK: sminv {{h[0-9]+}}, {{v[0-9]+}}.8h
define i16 @smin_H(<8 x i16>* nocapture readonly %arr) {
%arr.load = load <8 x i16>, <8 x i16>* %arr
- %r = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> %arr.load)
+ %r = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> %arr.load)
ret i16 %r
}
@@ -87,7 +87,7 @@ define i16 @smin_H(<8 x i16>* nocapture readonly %arr) {
; CHECK: sminv {{s[0-9]+}}, {{v[0-9]+}}.4s
define i32 @smin_S(<4 x i32>* nocapture readonly %arr) {
%arr.load = load <4 x i32>, <4 x i32>* %arr
- %r = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> %arr.load)
+ %r = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> %arr.load)
ret i32 %r
}
@@ -95,7 +95,7 @@ define i32 @smin_S(<4 x i32>* nocapture readonly %arr) {
; CHECK: uminv {{b[0-9]+}}, {{v[0-9]+}}.16b
define i8 @umin_B(<16 x i8>* nocapture readonly %arr) {
%arr.load = load <16 x i8>, <16 x i8>* %arr
- %r = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> %arr.load)
+ %r = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> %arr.load)
ret i8 %r
}
@@ -103,7 +103,7 @@ define i8 @umin_B(<16 x i8>* nocapture readonly %arr) {
; CHECK: uminv {{h[0-9]+}}, {{v[0-9]+}}.8h
define i16 @umin_H(<8 x i16>* nocapture readonly %arr) {
%arr.load = load <8 x i16>, <8 x i16>* %arr
- %r = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> %arr.load)
+ %r = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> %arr.load)
ret i16 %r
}
@@ -111,7 +111,7 @@ define i16 @umin_H(<8 x i16>* nocapture readonly %arr) {
; CHECK: uminv {{s[0-9]+}}, {{v[0-9]+}}.4s
define i32 @umin_S(<4 x i32>* nocapture readonly %arr) {
%arr.load = load <4 x i32>, <4 x i32>* %arr
- %r = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> %arr.load)
+ %r = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> %arr.load)
ret i32 %r
}
@@ -119,7 +119,7 @@ define i32 @umin_S(<4 x i32>* nocapture readonly %arr) {
; CHECK: fmaxnmv
define float @fmaxnm_S(<4 x float>* nocapture readonly %arr) {
%arr.load = load <4 x float>, <4 x float>* %arr
- %r = call nnan float @llvm.experimental.vector.reduce.fmax.f32.v4f32(<4 x float> %arr.load)
+ %r = call nnan float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> %arr.load)
ret float %r
}
@@ -127,22 +127,22 @@ define float @fmaxnm_S(<4 x float>* nocapture readonly %arr) {
; CHECK: fminnmv
define float @fminnm_S(<4 x float>* nocapture readonly %arr) {
%arr.load = load <4 x float>, <4 x float>* %arr
- %r = call nnan float @llvm.experimental.vector.reduce.fmin.f32.v4f32(<4 x float> %arr.load)
+ %r = call nnan float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> %arr.load)
ret float %r
}
-declare i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16>)
define i16 @oversized_umax_256(<16 x i16>* nocapture readonly %arr) {
; CHECK-LABEL: oversized_umax_256
; CHECK: umax [[V0:v[0-9]+]].8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
; CHECK: umaxv {{h[0-9]+}}, [[V0]]
%arr.load = load <16 x i16>, <16 x i16>* %arr
- %r = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> %arr.load)
+ %r = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> %arr.load)
ret i16 %r
}
-declare i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32>)
+declare i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32>)
define i32 @oversized_umax_512(<16 x i32>* nocapture readonly %arr) {
; CHECK-LABEL: oversized_umax_512
@@ -151,22 +151,22 @@ define i32 @oversized_umax_512(<16 x i32>* nocapture readonly %arr) {
; CHECK-NEXT: umax [[V0:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
; CHECK-NEXT: umaxv {{s[0-9]+}}, [[V0]]
%arr.load = load <16 x i32>, <16 x i32>* %arr
- %r = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> %arr.load)
+ %r = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> %arr.load)
ret i32 %r
}
-declare i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16>)
define i16 @oversized_umin_256(<16 x i16>* nocapture readonly %arr) {
; CHECK-LABEL: oversized_umin_256
; CHECK: umin [[V0:v[0-9]+]].8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
; CHECK: uminv {{h[0-9]+}}, [[V0]]
%arr.load = load <16 x i16>, <16 x i16>* %arr
- %r = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> %arr.load)
+ %r = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> %arr.load)
ret i16 %r
}
-declare i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32>)
+declare i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32>)
define i32 @oversized_umin_512(<16 x i32>* nocapture readonly %arr) {
; CHECK-LABEL: oversized_umin_512
@@ -175,22 +175,22 @@ define i32 @oversized_umin_512(<16 x i32>* nocapture readonly %arr) {
; CHECK-NEXT: umin [[V0:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
; CHECK-NEXT: uminv {{s[0-9]+}}, [[V0]]
%arr.load = load <16 x i32>, <16 x i32>* %arr
- %r = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> %arr.load)
+ %r = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> %arr.load)
ret i32 %r
}
-declare i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16>)
define i16 @oversized_smax_256(<16 x i16>* nocapture readonly %arr) {
; CHECK-LABEL: oversized_smax_256
; CHECK: smax [[V0:v[0-9]+]].8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
; CHECK: smaxv {{h[0-9]+}}, [[V0]]
%arr.load = load <16 x i16>, <16 x i16>* %arr
- %r = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> %arr.load)
+ %r = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> %arr.load)
ret i16 %r
}
-declare i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32>)
+declare i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32>)
define i32 @oversized_smax_512(<16 x i32>* nocapture readonly %arr) {
; CHECK-LABEL: oversized_smax_512
@@ -199,22 +199,22 @@ define i32 @oversized_smax_512(<16 x i32>* nocapture readonly %arr) {
; CHECK-NEXT: smax [[V0:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
; CHECK-NEXT: smaxv {{s[0-9]+}}, [[V0]]
%arr.load = load <16 x i32>, <16 x i32>* %arr
- %r = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> %arr.load)
+ %r = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> %arr.load)
ret i32 %r
}
-declare i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16>)
define i16 @oversized_smin_256(<16 x i16>* nocapture readonly %arr) {
; CHECK-LABEL: oversized_smin_256
; CHECK: smin [[V0:v[0-9]+]].8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
; CHECK: sminv {{h[0-9]+}}, [[V0]]
%arr.load = load <16 x i16>, <16 x i16>* %arr
- %r = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> %arr.load)
+ %r = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> %arr.load)
ret i16 %r
}
-declare i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32>)
+declare i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32>)
define i32 @oversized_smin_512(<16 x i32>* nocapture readonly %arr) {
; CHECK-LABEL: oversized_smin_512
@@ -223,6 +223,6 @@ define i32 @oversized_smin_512(<16 x i32>* nocapture readonly %arr) {
; CHECK-NEXT: smin [[V0:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
; CHECK-NEXT: sminv {{s[0-9]+}}, [[V0]]
%arr.load = load <16 x i32>, <16 x i32>* %arr
- %r = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> %arr.load)
+ %r = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> %arr.load)
ret i32 %r
}
diff --git a/llvm/test/CodeGen/AArch64/arm64-vabs.ll b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
index c77f48cc17b..3720170584c 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
@@ -141,7 +141,7 @@ define <2 x i64> @uabdl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind {
ret <2 x i64> %tmp4
}
-declare i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16>)
+declare i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16>)
define i16 @uabdl8h_rdx(<16 x i8>* %a, <16 x i8>* %b) {
; CHECK-LABEL: uabdl8h_rdx
@@ -155,11 +155,11 @@ define i16 @uabdl8h_rdx(<16 x i8>* %a, <16 x i8>* %b) {
%abcmp = icmp slt <16 x i16> %abdiff, zeroinitializer
%ababs = sub nsw <16 x i16> zeroinitializer, %abdiff
%absel = select <16 x i1> %abcmp, <16 x i16> %ababs, <16 x i16> %abdiff
- %reduced_v = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> %absel)
+ %reduced_v = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> %absel)
ret i16 %reduced_v
}
-declare i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32>)
+declare i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32>)
define i32 @uabdl4s_rdx(<8 x i16>* %a, <8 x i16>* %b) {
; CHECK-LABEL: uabdl4s_rdx
@@ -173,11 +173,11 @@ define i32 @uabdl4s_rdx(<8 x i16>* %a, <8 x i16>* %b) {
%abcmp = icmp slt <8 x i32> %abdiff, zeroinitializer
%ababs = sub nsw <8 x i32> zeroinitializer, %abdiff
%absel = select <8 x i1> %abcmp, <8 x i32> %ababs, <8 x i32> %abdiff
- %reduced_v = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> %absel)
+ %reduced_v = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %absel)
ret i32 %reduced_v
}
-declare i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64>)
+declare i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64>)
define i64 @uabdl2d_rdx(<4 x i32>* %a, <4 x i32>* %b, i32 %h) {
; CHECK: uabdl2d_rdx
@@ -191,7 +191,7 @@ define i64 @uabdl2d_rdx(<4 x i32>* %a, <4 x i32>* %b, i32 %h) {
%abcmp = icmp slt <4 x i64> %abdiff, zeroinitializer
%ababs = sub nsw <4 x i64> zeroinitializer, %abdiff
%absel = select <4 x i1> %abcmp, <4 x i64> %ababs, <4 x i64> %abdiff
- %reduced_v = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64> %absel)
+ %reduced_v = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> %absel)
ret i64 %reduced_v
}
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-add-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-add-legalization.ll
index eb4bd8c5c7b..2f899878363 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-add-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-add-legalization.ll
@@ -1,28 +1,28 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK
-declare i1 @llvm.experimental.vector.reduce.add.i1.v1i1(<1 x i1> %a)
-declare i8 @llvm.experimental.vector.reduce.add.i8.v1i8(<1 x i8> %a)
-declare i16 @llvm.experimental.vector.reduce.add.i16.v1i16(<1 x i16> %a)
-declare i24 @llvm.experimental.vector.reduce.add.i24.v1i24(<1 x i24> %a)
-declare i32 @llvm.experimental.vector.reduce.add.i32.v1i32(<1 x i32> %a)
-declare i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64> %a)
-declare i128 @llvm.experimental.vector.reduce.add.i128.v1i128(<1 x i128> %a)
-
-declare i8 @llvm.experimental.vector.reduce.add.i8.v3i8(<3 x i8> %a)
-declare i8 @llvm.experimental.vector.reduce.add.i8.v9i8(<9 x i8> %a)
-declare i32 @llvm.experimental.vector.reduce.add.i32.v3i32(<3 x i32> %a)
-declare i1 @llvm.experimental.vector.reduce.add.i1.v4i1(<4 x i1> %a)
-declare i24 @llvm.experimental.vector.reduce.add.i24.v4i24(<4 x i24> %a)
-declare i128 @llvm.experimental.vector.reduce.add.i128.v2i128(<2 x i128> %a)
-declare i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> %a)
+declare i1 @llvm.experimental.vector.reduce.add.v1i1(<1 x i1> %a)
+declare i8 @llvm.experimental.vector.reduce.add.v1i8(<1 x i8> %a)
+declare i16 @llvm.experimental.vector.reduce.add.v1i16(<1 x i16> %a)
+declare i24 @llvm.experimental.vector.reduce.add.v1i24(<1 x i24> %a)
+declare i32 @llvm.experimental.vector.reduce.add.v1i32(<1 x i32> %a)
+declare i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> %a)
+declare i128 @llvm.experimental.vector.reduce.add.v1i128(<1 x i128> %a)
+
+declare i8 @llvm.experimental.vector.reduce.add.v3i8(<3 x i8> %a)
+declare i8 @llvm.experimental.vector.reduce.add.v9i8(<9 x i8> %a)
+declare i32 @llvm.experimental.vector.reduce.add.v3i32(<3 x i32> %a)
+declare i1 @llvm.experimental.vector.reduce.add.v4i1(<4 x i1> %a)
+declare i24 @llvm.experimental.vector.reduce.add.v4i24(<4 x i24> %a)
+declare i128 @llvm.experimental.vector.reduce.add.v2i128(<2 x i128> %a)
+declare i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %a)
define i1 @test_v1i1(<1 x i1> %a) nounwind {
; CHECK-LABEL: test_v1i1:
; CHECK: // %bb.0:
; CHECK-NEXT: and w0, w0, #0x1
; CHECK-NEXT: ret
- %b = call i1 @llvm.experimental.vector.reduce.add.i1.v1i1(<1 x i1> %a)
+ %b = call i1 @llvm.experimental.vector.reduce.add.v1i1(<1 x i1> %a)
ret i1 %b
}
@@ -32,7 +32,7 @@ define i8 @test_v1i8(<1 x i8> %a) nounwind {
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: umov w0, v0.b[0]
; CHECK-NEXT: ret
- %b = call i8 @llvm.experimental.vector.reduce.add.i8.v1i8(<1 x i8> %a)
+ %b = call i8 @llvm.experimental.vector.reduce.add.v1i8(<1 x i8> %a)
ret i8 %b
}
@@ -42,7 +42,7 @@ define i16 @test_v1i16(<1 x i16> %a) nounwind {
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: umov w0, v0.h[0]
; CHECK-NEXT: ret
- %b = call i16 @llvm.experimental.vector.reduce.add.i16.v1i16(<1 x i16> %a)
+ %b = call i16 @llvm.experimental.vector.reduce.add.v1i16(<1 x i16> %a)
ret i16 %b
}
@@ -50,7 +50,7 @@ define i24 @test_v1i24(<1 x i24> %a) nounwind {
; CHECK-LABEL: test_v1i24:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
- %b = call i24 @llvm.experimental.vector.reduce.add.i24.v1i24(<1 x i24> %a)
+ %b = call i24 @llvm.experimental.vector.reduce.add.v1i24(<1 x i24> %a)
ret i24 %b
}
@@ -60,7 +60,7 @@ define i32 @test_v1i32(<1 x i32> %a) nounwind {
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %b = call i32 @llvm.experimental.vector.reduce.add.i32.v1i32(<1 x i32> %a)
+ %b = call i32 @llvm.experimental.vector.reduce.add.v1i32(<1 x i32> %a)
ret i32 %b
}
@@ -70,7 +70,7 @@ define i64 @test_v1i64(<1 x i64> %a) nounwind {
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
- %b = call i64 @llvm.experimental.vector.reduce.add.i64.v1i64(<1 x i64> %a)
+ %b = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> %a)
ret i64 %b
}
@@ -78,7 +78,7 @@ define i128 @test_v1i128(<1 x i128> %a) nounwind {
; CHECK-LABEL: test_v1i128:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
- %b = call i128 @llvm.experimental.vector.reduce.add.i128.v1i128(<1 x i128> %a)
+ %b = call i128 @llvm.experimental.vector.reduce.add.v1i128(<1 x i128> %a)
ret i128 %b
}
@@ -92,7 +92,7 @@ define i8 @test_v3i8(<3 x i8> %a) nounwind {
; CHECK-NEXT: addv h0, v0.4h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %b = call i8 @llvm.experimental.vector.reduce.add.i8.v3i8(<3 x i8> %a)
+ %b = call i8 @llvm.experimental.vector.reduce.add.v3i8(<3 x i8> %a)
ret i8 %b
}
@@ -109,7 +109,7 @@ define i8 @test_v9i8(<9 x i8> %a) nounwind {
; CHECK-NEXT: addv b0, v0.16b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %b = call i8 @llvm.experimental.vector.reduce.add.i8.v9i8(<9 x i8> %a)
+ %b = call i8 @llvm.experimental.vector.reduce.add.v9i8(<9 x i8> %a)
ret i8 %b
}
@@ -120,7 +120,7 @@ define i32 @test_v3i32(<3 x i32> %a) nounwind {
; CHECK-NEXT: addv s0, v0.4s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %b = call i32 @llvm.experimental.vector.reduce.add.i32.v3i32(<3 x i32> %a)
+ %b = call i32 @llvm.experimental.vector.reduce.add.v3i32(<3 x i32> %a)
ret i32 %b
}
@@ -131,7 +131,7 @@ define i1 @test_v4i1(<4 x i1> %a) nounwind {
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
- %b = call i1 @llvm.experimental.vector.reduce.add.i1.v4i1(<4 x i1> %a)
+ %b = call i1 @llvm.experimental.vector.reduce.add.v4i1(<4 x i1> %a)
ret i1 %b
}
@@ -141,7 +141,7 @@ define i24 @test_v4i24(<4 x i24> %a) nounwind {
; CHECK-NEXT: addv s0, v0.4s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %b = call i24 @llvm.experimental.vector.reduce.add.i24.v4i24(<4 x i24> %a)
+ %b = call i24 @llvm.experimental.vector.reduce.add.v4i24(<4 x i24> %a)
ret i24 %b
}
@@ -151,7 +151,7 @@ define i128 @test_v2i128(<2 x i128> %a) nounwind {
; CHECK-NEXT: adds x0, x0, x2
; CHECK-NEXT: adcs x1, x1, x3
; CHECK-NEXT: ret
- %b = call i128 @llvm.experimental.vector.reduce.add.i128.v2i128(<2 x i128> %a)
+ %b = call i128 @llvm.experimental.vector.reduce.add.v2i128(<2 x i128> %a)
ret i128 %b
}
@@ -164,6 +164,6 @@ define i32 @test_v16i32(<16 x i32> %a) nounwind {
; CHECK-NEXT: addv s0, v0.4s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %b = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> %a)
+ %b = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %a)
ret i32 %b
}
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll
index 78032521a85..bf186263b88 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll
@@ -1,28 +1,28 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK
-declare i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> %a)
-declare i8 @llvm.experimental.vector.reduce.and.i8.v1i8(<1 x i8> %a)
-declare i16 @llvm.experimental.vector.reduce.and.i16.v1i16(<1 x i16> %a)
-declare i24 @llvm.experimental.vector.reduce.and.i24.v1i24(<1 x i24> %a)
-declare i32 @llvm.experimental.vector.reduce.and.i32.v1i32(<1 x i32> %a)
-declare i64 @llvm.experimental.vector.reduce.and.i64.v1i64(<1 x i64> %a)
-declare i128 @llvm.experimental.vector.reduce.and.i128.v1i128(<1 x i128> %a)
-
-declare i8 @llvm.experimental.vector.reduce.and.i8.v3i8(<3 x i8> %a)
-declare i8 @llvm.experimental.vector.reduce.and.i8.v9i8(<9 x i8> %a)
-declare i32 @llvm.experimental.vector.reduce.and.i32.v3i32(<3 x i32> %a)
-declare i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> %a)
-declare i24 @llvm.experimental.vector.reduce.and.i24.v4i24(<4 x i24> %a)
-declare i128 @llvm.experimental.vector.reduce.and.i128.v2i128(<2 x i128> %a)
-declare i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32> %a)
+declare i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> %a)
+declare i8 @llvm.experimental.vector.reduce.and.v1i8(<1 x i8> %a)
+declare i16 @llvm.experimental.vector.reduce.and.v1i16(<1 x i16> %a)
+declare i24 @llvm.experimental.vector.reduce.and.v1i24(<1 x i24> %a)
+declare i32 @llvm.experimental.vector.reduce.and.v1i32(<1 x i32> %a)
+declare i64 @llvm.experimental.vector.reduce.and.v1i64(<1 x i64> %a)
+declare i128 @llvm.experimental.vector.reduce.and.v1i128(<1 x i128> %a)
+
+declare i8 @llvm.experimental.vector.reduce.and.v3i8(<3 x i8> %a)
+declare i8 @llvm.experimental.vector.reduce.and.v9i8(<9 x i8> %a)
+declare i32 @llvm.experimental.vector.reduce.and.v3i32(<3 x i32> %a)
+declare i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> %a)
+declare i24 @llvm.experimental.vector.reduce.and.v4i24(<4 x i24> %a)
+declare i128 @llvm.experimental.vector.reduce.and.v2i128(<2 x i128> %a)
+declare i32 @llvm.experimental.vector.reduce.and.v16i32(<16 x i32> %a)
define i1 @test_v1i1(<1 x i1> %a) nounwind {
; CHECK-LABEL: test_v1i1:
; CHECK: // %bb.0:
; CHECK-NEXT: and w0, w0, #0x1
; CHECK-NEXT: ret
- %b = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> %a)
+ %b = call i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> %a)
ret i1 %b
}
@@ -32,7 +32,7 @@ define i8 @test_v1i8(<1 x i8> %a) nounwind {
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: umov w0, v0.b[0]
; CHECK-NEXT: ret
- %b = call i8 @llvm.experimental.vector.reduce.and.i8.v1i8(<1 x i8> %a)
+ %b = call i8 @llvm.experimental.vector.reduce.and.v1i8(<1 x i8> %a)
ret i8 %b
}
@@ -42,7 +42,7 @@ define i16 @test_v1i16(<1 x i16> %a) nounwind {
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: umov w0, v0.h[0]
; CHECK-NEXT: ret
- %b = call i16 @llvm.experimental.vector.reduce.and.i16.v1i16(<1 x i16> %a)
+ %b = call i16 @llvm.experimental.vector.reduce.and.v1i16(<1 x i16> %a)
ret i16 %b
}
@@ -50,7 +50,7 @@ define i24 @test_v1i24(<1 x i24> %a) nounwind {
; CHECK-LABEL: test_v1i24:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
- %b = call i24 @llvm.experimental.vector.reduce.and.i24.v1i24(<1 x i24> %a)
+ %b = call i24 @llvm.experimental.vector.reduce.and.v1i24(<1 x i24> %a)
ret i24 %b
}
@@ -60,7 +60,7 @@ define i32 @test_v1i32(<1 x i32> %a) nounwind {
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %b = call i32 @llvm.experimental.vector.reduce.and.i32.v1i32(<1 x i32> %a)
+ %b = call i32 @llvm.experimental.vector.reduce.and.v1i32(<1 x i32> %a)
ret i32 %b
}
@@ -70,7 +70,7 @@ define i64 @test_v1i64(<1 x i64> %a) nounwind {
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
- %b = call i64 @llvm.experimental.vector.reduce.and.i64.v1i64(<1 x i64> %a)
+ %b = call i64 @llvm.experimental.vector.reduce.and.v1i64(<1 x i64> %a)
ret i64 %b
}
@@ -78,7 +78,7 @@ define i128 @test_v1i128(<1 x i128> %a) nounwind {
; CHECK-LABEL: test_v1i128:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
- %b = call i128 @llvm.experimental.vector.reduce.and.i128.v1i128(<1 x i128> %a)
+ %b = call i128 @llvm.experimental.vector.reduce.and.v1i128(<1 x i128> %a)
ret i128 %b
}
@@ -89,7 +89,7 @@ define i8 @test_v3i8(<3 x i8> %a) nounwind {
; CHECK-NEXT: and w8, w8, w2
; CHECK-NEXT: and w0, w8, #0xff
; CHECK-NEXT: ret
- %b = call i8 @llvm.experimental.vector.reduce.and.i8.v3i8(<3 x i8> %a)
+ %b = call i8 @llvm.experimental.vector.reduce.and.v3i8(<3 x i8> %a)
ret i8 %b
}
@@ -122,7 +122,7 @@ define i8 @test_v9i8(<9 x i8> %a) nounwind {
; CHECK-NEXT: umov w9, v0.b[7]
; CHECK-NEXT: and w0, w8, w9
; CHECK-NEXT: ret
- %b = call i8 @llvm.experimental.vector.reduce.and.i8.v9i8(<9 x i8> %a)
+ %b = call i8 @llvm.experimental.vector.reduce.and.v9i8(<9 x i8> %a)
ret i8 %b
}
@@ -137,7 +137,7 @@ define i32 @test_v3i32(<3 x i32> %a) nounwind {
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: and w0, w9, w8
; CHECK-NEXT: ret
- %b = call i32 @llvm.experimental.vector.reduce.and.i32.v3i32(<3 x i32> %a)
+ %b = call i32 @llvm.experimental.vector.reduce.and.v3i32(<3 x i32> %a)
ret i32 %b
}
@@ -154,7 +154,7 @@ define i1 @test_v4i1(<4 x i1> %a) nounwind {
; CHECK-NEXT: and w8, w9, w8
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
- %b = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> %a)
+ %b = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> %a)
ret i1 %b
}
@@ -167,7 +167,7 @@ define i24 @test_v4i24(<4 x i24> %a) nounwind {
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: and w0, w9, w8
; CHECK-NEXT: ret
- %b = call i24 @llvm.experimental.vector.reduce.and.i24.v4i24(<4 x i24> %a)
+ %b = call i24 @llvm.experimental.vector.reduce.and.v4i24(<4 x i24> %a)
ret i24 %b
}
@@ -177,7 +177,7 @@ define i128 @test_v2i128(<2 x i128> %a) nounwind {
; CHECK-NEXT: and x0, x0, x2
; CHECK-NEXT: and x1, x1, x3
; CHECK-NEXT: ret
- %b = call i128 @llvm.experimental.vector.reduce.and.i128.v2i128(<2 x i128> %a)
+ %b = call i128 @llvm.experimental.vector.reduce.and.v2i128(<2 x i128> %a)
ret i128 %b
}
@@ -193,6 +193,6 @@ define i32 @test_v16i32(<16 x i32> %a) nounwind {
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: and w0, w9, w8
; CHECK-NEXT: ret
- %b = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32> %a)
+ %b = call i32 @llvm.experimental.vector.reduce.and.v16i32(<16 x i32> %a)
ret i32 %b
}
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-bool.ll b/llvm/test/CodeGen/AArch64/vecreduce-bool.ll
index e882a804c2d..e404159c763 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-bool.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-bool.ll
@@ -1,19 +1,19 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK
-declare i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> %a)
-declare i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> %a)
-declare i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> %a)
-declare i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> %a)
-declare i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> %a)
-declare i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> %a)
+declare i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> %a)
+declare i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> %a)
+declare i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> %a)
+declare i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> %a)
+declare i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> %a)
+declare i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> %a)
-declare i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> %a)
-declare i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> %a)
-declare i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> %a)
-declare i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> %a)
-declare i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> %a)
-declare i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> %a)
+declare i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1> %a)
+declare i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> %a)
+declare i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> %a)
+declare i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> %a)
+declare i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> %a)
+declare i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> %a)
define i32 @reduce_and_v1(<1 x i8> %a0, i32 %a1, i32 %a2) nounwind {
; CHECK-LABEL: reduce_and_v1:
@@ -24,7 +24,7 @@ define i32 @reduce_and_v1(<1 x i8> %a0, i32 %a1, i32 %a2) nounwind {
; CHECK-NEXT: csel w0, w0, w1, lt
; CHECK-NEXT: ret
%x = icmp slt <1 x i8> %a0, zeroinitializer
- %y = call i1 @llvm.experimental.vector.reduce.and.i1.v1i1(<1 x i1> %x)
+ %y = call i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> %x)
%z = select i1 %y, i32 %a1, i32 %a2
ret i32 %z
}
@@ -41,7 +41,7 @@ define i32 @reduce_and_v2(<2 x i8> %a0, i32 %a1, i32 %a2) nounwind {
; CHECK-NEXT: csel w0, w0, w1, ne
; CHECK-NEXT: ret
%x = icmp slt <2 x i8> %a0, zeroinitializer
- %y = call i1 @llvm.experimental.vector.reduce.and.i1.v2i1(<2 x i1> %x)
+ %y = call i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> %x)
%z = select i1 %y, i32 %a1, i32 %a2
ret i32 %z
}
@@ -58,7 +58,7 @@ define i32 @reduce_and_v4(<4 x i8> %a0, i32 %a1, i32 %a2) nounwind {
; CHECK-NEXT: csel w0, w0, w1, ne
; CHECK-NEXT: ret
%x = icmp slt <4 x i8> %a0, zeroinitializer
- %y = call i1 @llvm.experimental.vector.reduce.and.i1.v4i1(<4 x i1> %x)
+ %y = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> %x)
%z = select i1 %y, i32 %a1, i32 %a2
ret i32 %z
}
@@ -73,7 +73,7 @@ define i32 @reduce_and_v8(<8 x i8> %a0, i32 %a1, i32 %a2) nounwind {
; CHECK-NEXT: csel w0, w0, w1, ne
; CHECK-NEXT: ret
%x = icmp slt <8 x i8> %a0, zeroinitializer
- %y = call i1 @llvm.experimental.vector.reduce.and.i1.v8i1(<8 x i1> %x)
+ %y = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> %x)
%z = select i1 %y, i32 %a1, i32 %a2
ret i32 %z
}
@@ -88,7 +88,7 @@ define i32 @reduce_and_v16(<16 x i8> %a0, i32 %a1, i32 %a2) nounwind {
; CHECK-NEXT: csel w0, w0, w1, ne
; CHECK-NEXT: ret
%x = icmp slt <16 x i8> %a0, zeroinitializer
- %y = call i1 @llvm.experimental.vector.reduce.and.i1.v16i1(<16 x i1> %x)
+ %y = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> %x)
%z = select i1 %y, i32 %a1, i32 %a2
ret i32 %z
}
@@ -105,7 +105,7 @@ define i32 @reduce_and_v32(<32 x i8> %a0, i32 %a1, i32 %a2) nounwind {
; CHECK-NEXT: csel w0, w0, w1, ne
; CHECK-NEXT: ret
%x = icmp slt <32 x i8> %a0, zeroinitializer
- %y = call i1 @llvm.experimental.vector.reduce.and.i1.v32i1(<32 x i1> %x)
+ %y = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> %x)
%z = select i1 %y, i32 %a1, i32 %a2
ret i32 %z
}
@@ -119,7 +119,7 @@ define i32 @reduce_or_v1(<1 x i8> %a0, i32 %a1, i32 %a2) nounwind {
; CHECK-NEXT: csel w0, w0, w1, lt
; CHECK-NEXT: ret
%x = icmp slt <1 x i8> %a0, zeroinitializer
- %y = call i1 @llvm.experimental.vector.reduce.or.i1.v1i1(<1 x i1> %x)
+ %y = call i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1> %x)
%z = select i1 %y, i32 %a1, i32 %a2
ret i32 %z
}
@@ -136,7 +136,7 @@ define i32 @reduce_or_v2(<2 x i8> %a0, i32 %a1, i32 %a2) nounwind {
; CHECK-NEXT: csel w0, w0, w1, ne
; CHECK-NEXT: ret
%x = icmp slt <2 x i8> %a0, zeroinitializer
- %y = call i1 @llvm.experimental.vector.reduce.or.i1.v2i1(<2 x i1> %x)
+ %y = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> %x)
%z = select i1 %y, i32 %a1, i32 %a2
ret i32 %z
}
@@ -153,7 +153,7 @@ define i32 @reduce_or_v4(<4 x i8> %a0, i32 %a1, i32 %a2) nounwind {
; CHECK-NEXT: csel w0, w0, w1, ne
; CHECK-NEXT: ret
%x = icmp slt <4 x i8> %a0, zeroinitializer
- %y = call i1 @llvm.experimental.vector.reduce.or.i1.v4i1(<4 x i1> %x)
+ %y = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> %x)
%z = select i1 %y, i32 %a1, i32 %a2
ret i32 %z
}
@@ -168,7 +168,7 @@ define i32 @reduce_or_v8(<8 x i8> %a0, i32 %a1, i32 %a2) nounwind {
; CHECK-NEXT: csel w0, w0, w1, ne
; CHECK-NEXT: ret
%x = icmp slt <8 x i8> %a0, zeroinitializer
- %y = call i1 @llvm.experimental.vector.reduce.or.i1.v8i1(<8 x i1> %x)
+ %y = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> %x)
%z = select i1 %y, i32 %a1, i32 %a2
ret i32 %z
}
@@ -183,7 +183,7 @@ define i32 @reduce_or_v16(<16 x i8> %a0, i32 %a1, i32 %a2) nounwind {
; CHECK-NEXT: csel w0, w0, w1, ne
; CHECK-NEXT: ret
%x = icmp slt <16 x i8> %a0, zeroinitializer
- %y = call i1 @llvm.experimental.vector.reduce.or.i1.v16i1(<16 x i1> %x)
+ %y = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> %x)
%z = select i1 %y, i32 %a1, i32 %a2
ret i32 %z
}
@@ -200,7 +200,7 @@ define i32 @reduce_or_v32(<32 x i8> %a0, i32 %a1, i32 %a2) nounwind {
; CHECK-NEXT: csel w0, w0, w1, ne
; CHECK-NEXT: ret
%x = icmp slt <32 x i8> %a0, zeroinitializer
- %y = call i1 @llvm.experimental.vector.reduce.or.i1.v32i1(<32 x i1> %x)
+ %y = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> %x)
%z = select i1 %y, i32 %a1, i32 %a2
ret i32 %z
}
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
index 90768bb5351..72c94aaf2d6 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
@@ -1,20 +1,20 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK
-declare half @llvm.experimental.vector.reduce.fmax.f16.v1f16(<1 x half> %a)
-declare float @llvm.experimental.vector.reduce.fmax.f32.v1f32(<1 x float> %a)
-declare double @llvm.experimental.vector.reduce.fmax.f64.v1f64(<1 x double> %a)
-declare fp128 @llvm.experimental.vector.reduce.fmax.f128.v1f128(<1 x fp128> %a)
+declare half @llvm.experimental.vector.reduce.fmax.v1f16(<1 x half> %a)
+declare float @llvm.experimental.vector.reduce.fmax.v1f32(<1 x float> %a)
+declare double @llvm.experimental.vector.reduce.fmax.v1f64(<1 x double> %a)
+declare fp128 @llvm.experimental.vector.reduce.fmax.v1f128(<1 x fp128> %a)
-declare float @llvm.experimental.vector.reduce.fmax.f32.v3f32(<3 x float> %a)
-declare fp128 @llvm.experimental.vector.reduce.fmax.f128.v2f128(<2 x fp128> %a)
-declare float @llvm.experimental.vector.reduce.fmax.f32.v16f32(<16 x float> %a)
+declare float @llvm.experimental.vector.reduce.fmax.v3f32(<3 x float> %a)
+declare fp128 @llvm.experimental.vector.reduce.fmax.v2f128(<2 x fp128> %a)
+declare float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float> %a)
define half @test_v1f16(<1 x half> %a) nounwind {
; CHECK-LABEL: test_v1f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
- %b = call nnan half @llvm.experimental.vector.reduce.fmax.f16.v1f16(<1 x half> %a)
+ %b = call nnan half @llvm.experimental.vector.reduce.fmax.v1f16(<1 x half> %a)
ret half %b
}
@@ -24,7 +24,7 @@ define float @test_v1f32(<1 x float> %a) nounwind {
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-NEXT: ret
- %b = call nnan float @llvm.experimental.vector.reduce.fmax.f32.v1f32(<1 x float> %a)
+ %b = call nnan float @llvm.experimental.vector.reduce.fmax.v1f32(<1 x float> %a)
ret float %b
}
@@ -32,7 +32,7 @@ define double @test_v1f64(<1 x double> %a) nounwind {
; CHECK-LABEL: test_v1f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
- %b = call nnan double @llvm.experimental.vector.reduce.fmax.f64.v1f64(<1 x double> %a)
+ %b = call nnan double @llvm.experimental.vector.reduce.fmax.v1f64(<1 x double> %a)
ret double %b
}
@@ -40,7 +40,7 @@ define fp128 @test_v1f128(<1 x fp128> %a) nounwind {
; CHECK-LABEL: test_v1f128:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
- %b = call nnan fp128 @llvm.experimental.vector.reduce.fmax.f128.v1f128(<1 x fp128> %a)
+ %b = call nnan fp128 @llvm.experimental.vector.reduce.fmax.v1f128(<1 x fp128> %a)
ret fp128 %b
}
@@ -52,7 +52,7 @@ define float @test_v3f32(<3 x float> %a) nounwind {
; CHECK-NEXT: mov v0.s[3], v1.s[0]
; CHECK-NEXT: fmaxnmv s0, v0.4s
; CHECK-NEXT: ret
- %b = call nnan float @llvm.experimental.vector.reduce.fmax.f32.v3f32(<3 x float> %a)
+ %b = call nnan float @llvm.experimental.vector.reduce.fmax.v3f32(<3 x float> %a)
ret float %b
}
@@ -60,7 +60,7 @@ define fp128 @test_v2f128(<2 x fp128> %a) nounwind {
; CHECK-LABEL: test_v2f128:
; CHECK: // %bb.0:
; CHECK-NEXT: b fmaxl
- %b = call nnan fp128 @llvm.experimental.vector.reduce.fmax.f128.v2f128(<2 x fp128> %a)
+ %b = call nnan fp128 @llvm.experimental.vector.reduce.fmax.v2f128(<2 x fp128> %a)
ret fp128 %b
}
@@ -72,6 +72,6 @@ define float @test_v16f32(<16 x float> %a) nounwind {
; CHECK-NEXT: fmaxnm v0.4s, v0.4s, v1.4s
; CHECK-NEXT: fmaxnmv s0, v0.4s
; CHECK-NEXT: ret
- %b = call nnan float @llvm.experimental.vector.reduce.fmax.f32.v16f32(<16 x float> %a)
+ %b = call nnan float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float> %a)
ret float %b
}
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-propagate-sd-flags.ll b/llvm/test/CodeGen/AArch64/vecreduce-propagate-sd-flags.ll
index 999958b3025..cdb557d1a39 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-propagate-sd-flags.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-propagate-sd-flags.ll
@@ -24,8 +24,8 @@ entry:
%1 = insertelement <4 x double> %0, double 1.0, i32 1
%2 = insertelement <4 x double> %1, double 1.0, i32 2
%3 = insertelement <4 x double> %2, double 1.0, i32 3
- %4 = call nnan reassoc double @llvm.experimental.vector.reduce.fmax.f64.v4f64(<4 x double> %3)
+ %4 = call nnan reassoc double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double> %3)
ret double %4
}
-declare double @llvm.experimental.vector.reduce.fmax.f64.v4f64(<4 x double>)
+declare double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double>)
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll
index eff97260ea3..99b6ebe2f1d 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll
@@ -1,28 +1,28 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK
-declare i1 @llvm.experimental.vector.reduce.umax.i1.v1i1(<1 x i1> %a)
-declare i8 @llvm.experimental.vector.reduce.umax.i8.v1i8(<1 x i8> %a)
-declare i16 @llvm.experimental.vector.reduce.umax.i16.v1i16(<1 x i16> %a)
-declare i24 @llvm.experimental.vector.reduce.umax.i24.v1i24(<1 x i24> %a)
-declare i32 @llvm.experimental.vector.reduce.umax.i32.v1i32(<1 x i32> %a)
-declare i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64> %a)
-declare i128 @llvm.experimental.vector.reduce.umax.i128.v1i128(<1 x i128> %a)
-
-declare i8 @llvm.experimental.vector.reduce.umax.i8.v3i8(<3 x i8> %a)
-declare i8 @llvm.experimental.vector.reduce.umax.i8.v9i8(<9 x i8> %a)
-declare i32 @llvm.experimental.vector.reduce.umax.i32.v3i32(<3 x i32> %a)
-declare i1 @llvm.experimental.vector.reduce.umax.i1.v4i1(<4 x i1> %a)
-declare i24 @llvm.experimental.vector.reduce.umax.i24.v4i24(<4 x i24> %a)
-declare i128 @llvm.experimental.vector.reduce.umax.i128.v2i128(<2 x i128> %a)
-declare i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> %a)
+declare i1 @llvm.experimental.vector.reduce.umax.v1i1(<1 x i1> %a)
+declare i8 @llvm.experimental.vector.reduce.umax.v1i8(<1 x i8> %a)
+declare i16 @llvm.experimental.vector.reduce.umax.v1i16(<1 x i16> %a)
+declare i24 @llvm.experimental.vector.reduce.umax.v1i24(<1 x i24> %a)
+declare i32 @llvm.experimental.vector.reduce.umax.v1i32(<1 x i32> %a)
+declare i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> %a)
+declare i128 @llvm.experimental.vector.reduce.umax.v1i128(<1 x i128> %a)
+
+declare i8 @llvm.experimental.vector.reduce.umax.v3i8(<3 x i8> %a)
+declare i8 @llvm.experimental.vector.reduce.umax.v9i8(<9 x i8> %a)
+declare i32 @llvm.experimental.vector.reduce.umax.v3i32(<3 x i32> %a)
+declare i1 @llvm.experimental.vector.reduce.umax.v4i1(<4 x i1> %a)
+declare i24 @llvm.experimental.vector.reduce.umax.v4i24(<4 x i24> %a)
+declare i128 @llvm.experimental.vector.reduce.umax.v2i128(<2 x i128> %a)
+declare i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> %a)
define i1 @test_v1i1(<1 x i1> %a) nounwind {
; CHECK-LABEL: test_v1i1:
; CHECK: // %bb.0:
; CHECK-NEXT: and w0, w0, #0x1
; CHECK-NEXT: ret
- %b = call i1 @llvm.experimental.vector.reduce.umax.i1.v1i1(<1 x i1> %a)
+ %b = call i1 @llvm.experimental.vector.reduce.umax.v1i1(<1 x i1> %a)
ret i1 %b
}
@@ -32,7 +32,7 @@ define i8 @test_v1i8(<1 x i8> %a) nounwind {
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: umov w0, v0.b[0]
; CHECK-NEXT: ret
- %b = call i8 @llvm.experimental.vector.reduce.umax.i8.v1i8(<1 x i8> %a)
+ %b = call i8 @llvm.experimental.vector.reduce.umax.v1i8(<1 x i8> %a)
ret i8 %b
}
@@ -42,7 +42,7 @@ define i16 @test_v1i16(<1 x i16> %a) nounwind {
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: umov w0, v0.h[0]
; CHECK-NEXT: ret
- %b = call i16 @llvm.experimental.vector.reduce.umax.i16.v1i16(<1 x i16> %a)
+ %b = call i16 @llvm.experimental.vector.reduce.umax.v1i16(<1 x i16> %a)
ret i16 %b
}
@@ -50,7 +50,7 @@ define i24 @test_v1i24(<1 x i24> %a) nounwind {
; CHECK-LABEL: test_v1i24:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
- %b = call i24 @llvm.experimental.vector.reduce.umax.i24.v1i24(<1 x i24> %a)
+ %b = call i24 @llvm.experimental.vector.reduce.umax.v1i24(<1 x i24> %a)
ret i24 %b
}
@@ -60,7 +60,7 @@ define i32 @test_v1i32(<1 x i32> %a) nounwind {
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %b = call i32 @llvm.experimental.vector.reduce.umax.i32.v1i32(<1 x i32> %a)
+ %b = call i32 @llvm.experimental.vector.reduce.umax.v1i32(<1 x i32> %a)
ret i32 %b
}
@@ -70,7 +70,7 @@ define i64 @test_v1i64(<1 x i64> %a) nounwind {
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
- %b = call i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64> %a)
+ %b = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> %a)
ret i64 %b
}
@@ -78,7 +78,7 @@ define i128 @test_v1i128(<1 x i128> %a) nounwind {
; CHECK-LABEL: test_v1i128:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
- %b = call i128 @llvm.experimental.vector.reduce.umax.i128.v1i128(<1 x i128> %a)
+ %b = call i128 @llvm.experimental.vector.reduce.umax.v1i128(<1 x i128> %a)
ret i128 %b
}
@@ -93,7 +93,7 @@ define i8 @test_v3i8(<3 x i8> %a) nounwind {
; CHECK-NEXT: umaxv h0, v0.4h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %b = call i8 @llvm.experimental.vector.reduce.umax.i8.v3i8(<3 x i8> %a)
+ %b = call i8 @llvm.experimental.vector.reduce.umax.v3i8(<3 x i8> %a)
ret i8 %b
}
@@ -110,7 +110,7 @@ define i8 @test_v9i8(<9 x i8> %a) nounwind {
; CHECK-NEXT: umaxv b0, v0.16b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %b = call i8 @llvm.experimental.vector.reduce.umax.i8.v9i8(<9 x i8> %a)
+ %b = call i8 @llvm.experimental.vector.reduce.umax.v9i8(<9 x i8> %a)
ret i8 %b
}
@@ -121,7 +121,7 @@ define i32 @test_v3i32(<3 x i32> %a) nounwind {
; CHECK-NEXT: umaxv s0, v0.4s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %b = call i32 @llvm.experimental.vector.reduce.umax.i32.v3i32(<3 x i32> %a)
+ %b = call i32 @llvm.experimental.vector.reduce.umax.v3i32(<3 x i32> %a)
ret i32 %b
}
@@ -134,7 +134,7 @@ define i1 @test_v4i1(<4 x i1> %a) nounwind {
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
- %b = call i1 @llvm.experimental.vector.reduce.umax.i1.v4i1(<4 x i1> %a)
+ %b = call i1 @llvm.experimental.vector.reduce.umax.v4i1(<4 x i1> %a)
ret i1 %b
}
@@ -145,7 +145,7 @@ define i24 @test_v4i24(<4 x i24> %a) nounwind {
; CHECK-NEXT: umaxv s0, v0.4s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %b = call i24 @llvm.experimental.vector.reduce.umax.i24.v4i24(<4 x i24> %a)
+ %b = call i24 @llvm.experimental.vector.reduce.umax.v4i24(<4 x i24> %a)
ret i24 %b
}
@@ -159,7 +159,7 @@ define i128 @test_v2i128(<2 x i128> %a) nounwind {
; CHECK-NEXT: csel x0, x8, x9, eq
; CHECK-NEXT: csel x1, x1, x3, hi
; CHECK-NEXT: ret
- %b = call i128 @llvm.experimental.vector.reduce.umax.i128.v2i128(<2 x i128> %a)
+ %b = call i128 @llvm.experimental.vector.reduce.umax.v2i128(<2 x i128> %a)
ret i128 %b
}
@@ -172,6 +172,6 @@ define i32 @test_v16i32(<16 x i32> %a) nounwind {
; CHECK-NEXT: umaxv s0, v0.4s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
- %b = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> %a)
+ %b = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> %a)
ret i32 %b
}
diff --git a/llvm/test/CodeGen/Generic/expand-experimental-reductions.ll b/llvm/test/CodeGen/Generic/expand-experimental-reductions.ll
index 5fc7427a9aa..a13c23d2e59 100644
--- a/llvm/test/CodeGen/Generic/expand-experimental-reductions.ll
+++ b/llvm/test/CodeGen/Generic/expand-experimental-reductions.ll
@@ -1,22 +1,22 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -expand-reductions -S | FileCheck %s
; Tests without a target which should expand all reductions
-declare i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64>)
+declare i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64>)
+declare i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64>)
+declare i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64>)
+declare i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64>)
+declare i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64>)
declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float, <4 x float>)
declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float, <4 x float>)
-declare i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64>)
+declare i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64>)
+declare i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64>)
+declare i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64>)
+declare i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64>)
-declare double @llvm.experimental.vector.reduce.fmax.f64.v2f64(<2 x double>)
-declare double @llvm.experimental.vector.reduce.fmin.f64.v2f64(<2 x double>)
+declare double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double>)
+declare double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double>)
define i64 @add_i64(<2 x i64> %vec) {
@@ -28,7 +28,7 @@ define i64 @add_i64(<2 x i64> %vec) {
; CHECK-NEXT: ret i64 [[TMP0]]
;
entry:
- %r = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> %vec)
+ %r = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %vec)
ret i64 %r
}
@@ -41,7 +41,7 @@ define i64 @mul_i64(<2 x i64> %vec) {
; CHECK-NEXT: ret i64 [[TMP0]]
;
entry:
- %r = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> %vec)
+ %r = call i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64> %vec)
ret i64 %r
}
@@ -54,7 +54,7 @@ define i64 @and_i64(<2 x i64> %vec) {
; CHECK-NEXT: ret i64 [[TMP0]]
;
entry:
- %r = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64> %vec)
+ %r = call i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64> %vec)
ret i64 %r
}
@@ -67,7 +67,7 @@ define i64 @or_i64(<2 x i64> %vec) {
; CHECK-NEXT: ret i64 [[TMP0]]
;
entry:
- %r = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64> %vec)
+ %r = call i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64> %vec)
ret i64 %r
}
@@ -80,7 +80,7 @@ define i64 @xor_i64(<2 x i64> %vec) {
; CHECK-NEXT: ret i64 [[TMP0]]
;
entry:
- %r = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64> %vec)
+ %r = call i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64> %vec)
ret i64 %r
}
@@ -230,7 +230,7 @@ define i64 @smax_i64(<2 x i64> %vec) {
; CHECK-NEXT: ret i64 [[TMP0]]
;
entry:
- %r = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> %vec)
+ %r = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> %vec)
ret i64 %r
}
@@ -244,7 +244,7 @@ define i64 @smin_i64(<2 x i64> %vec) {
; CHECK-NEXT: ret i64 [[TMP0]]
;
entry:
- %r = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> %vec)
+ %r = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> %vec)
ret i64 %r
}
@@ -258,7 +258,7 @@ define i64 @umax_i64(<2 x i64> %vec) {
; CHECK-NEXT: ret i64 [[TMP0]]
;
entry:
- %r = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> %vec)
+ %r = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> %vec)
ret i64 %r
}
@@ -272,7 +272,7 @@ define i64 @umin_i64(<2 x i64> %vec) {
; CHECK-NEXT: ret i64 [[TMP0]]
;
entry:
- %r = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> %vec)
+ %r = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> %vec)
ret i64 %r
}
@@ -286,7 +286,7 @@ define double @fmax_f64(<2 x double> %vec) {
; CHECK-NEXT: ret double [[TMP0]]
;
entry:
- %r = call double @llvm.experimental.vector.reduce.fmax.f64.v2f64(<2 x double> %vec)
+ %r = call double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> %vec)
ret double %r
}
@@ -300,6 +300,6 @@ define double @fmin_f64(<2 x double> %vec) {
; CHECK-NEXT: ret double [[TMP0]]
;
entry:
- %r = call double @llvm.experimental.vector.reduce.fmin.f64.v2f64(<2 x double> %vec)
+ %r = call double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double> %vec)
ret double %r
}
diff --git a/llvm/test/CodeGen/X86/vector-reduce-add-widen.ll b/llvm/test/CodeGen/X86/vector-reduce-add-widen.ll
index 9a045f7bce5..b886a745edc 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-add-widen.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-add-widen.ll
@@ -32,7 +32,7 @@ define i64 @test_v2i64(<2 x i64> %a0) {
; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %a0)
ret i64 %1
}
@@ -74,7 +74,7 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> %a0)
ret i64 %1
}
@@ -124,7 +124,7 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %a0)
ret i64 %1
}
@@ -187,7 +187,7 @@ define i64 @test_v16i64(<16 x i64> %a0) {
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> %a0)
ret i64 %1
}
@@ -229,7 +229,7 @@ define i32 @test_v2i32(<2 x i32> %a0) {
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> %a0)
ret i32 %1
}
@@ -277,7 +277,7 @@ define i32 @test_v4i32(<4 x i32> %a0) {
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %a0)
ret i32 %1
}
@@ -338,7 +338,7 @@ define i32 @test_v8i32(<8 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %a0)
ret i32 %1
}
@@ -410,7 +410,7 @@ define i32 @test_v16i32(<16 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %a0)
ret i32 %1
}
@@ -501,7 +501,7 @@ define i32 @test_v32i32(<32 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> %a0)
ret i32 %1
}
@@ -549,7 +549,7 @@ define i16 @test_v2i16(<2 x i16> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> %a0)
ret i16 %1
}
@@ -603,7 +603,7 @@ define i16 @test_v4i16(<4 x i16> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> %a0)
ret i16 %1
}
@@ -667,7 +667,7 @@ define i16 @test_v8i16(<8 x i16> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %a0)
ret i16 %1
}
@@ -744,7 +744,7 @@ define i16 @test_v16i16(<16 x i16> %a0) {
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> %a0)
ret i16 %1
}
@@ -832,7 +832,7 @@ define i16 @test_v32i16(<32 x i16> %a0) {
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> %a0)
ret i16 %1
}
@@ -939,7 +939,7 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> %a0)
ret i16 %1
}
@@ -981,7 +981,7 @@ define i8 @test_v2i8(<2 x i8> %a0) {
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> %a0)
ret i8 %1
}
@@ -1029,7 +1029,7 @@ define i8 @test_v4i8(<4 x i8> %a0) {
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> %a0)
ret i8 %1
}
@@ -1085,7 +1085,7 @@ define i8 @test_v8i8(<8 x i8> %a0) {
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> %a0)
ret i8 %1
}
@@ -1149,7 +1149,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %a0)
ret i8 %1
}
@@ -1238,7 +1238,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> %a0)
ret i8 %1
}
@@ -1337,7 +1337,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> %a0)
ret i8 %1
}
@@ -1453,32 +1453,32 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> %a0)
ret i8 %1
}
-declare i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64>)
-declare i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64>)
+declare i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64>)
+declare i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64>)
+declare i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64>)
+declare i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64>)
-declare i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32>)
+declare i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32>)
+declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>)
+declare i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32>)
+declare i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32>)
+declare i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32>)
-declare i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16>)
-declare i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16>)
-declare i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16>)
+declare i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16>)
+declare i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16>)
+declare i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16>)
+declare i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16>)
+declare i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16>)
+declare i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16>)
-declare i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8>)
-declare i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8>)
-declare i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8>)
-declare i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8>)
+declare i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8>)
+declare i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8>)
+declare i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8>)
+declare i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8>)
+declare i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8>)
+declare i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8>)
+declare i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8>)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-add.ll b/llvm/test/CodeGen/X86/vector-reduce-add.ll
index 7abdf1cb037..02fb375a318 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-add.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-add.ll
@@ -32,7 +32,7 @@ define i64 @test_v2i64(<2 x i64> %a0) {
; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %a0)
ret i64 %1
}
@@ -74,7 +74,7 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> %a0)
ret i64 %1
}
@@ -124,7 +124,7 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %a0)
ret i64 %1
}
@@ -187,7 +187,7 @@ define i64 @test_v16i64(<16 x i64> %a0) {
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> %a0)
ret i64 %1
}
@@ -216,7 +216,7 @@ define i32 @test_v2i32(<2 x i32> %a0) {
; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> %a0)
ret i32 %1
}
@@ -264,7 +264,7 @@ define i32 @test_v4i32(<4 x i32> %a0) {
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %a0)
ret i32 %1
}
@@ -325,7 +325,7 @@ define i32 @test_v8i32(<8 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> %a0)
ret i32 %1
}
@@ -397,7 +397,7 @@ define i32 @test_v16i32(<16 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %a0)
ret i32 %1
}
@@ -488,7 +488,7 @@ define i32 @test_v32i32(<32 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> %a0)
ret i32 %1
}
@@ -520,7 +520,7 @@ define i16 @test_v2i16(<2 x i16> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> %a0)
ret i16 %1
}
@@ -573,7 +573,7 @@ define i16 @test_v4i16(<4 x i16> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> %a0)
ret i16 %1
}
@@ -637,7 +637,7 @@ define i16 @test_v8i16(<8 x i16> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %a0)
ret i16 %1
}
@@ -714,7 +714,7 @@ define i16 @test_v16i16(<16 x i16> %a0) {
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> %a0)
ret i16 %1
}
@@ -802,7 +802,7 @@ define i16 @test_v32i16(<32 x i16> %a0) {
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> %a0)
ret i16 %1
}
@@ -909,7 +909,7 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> %a0)
ret i16 %1
}
@@ -949,7 +949,7 @@ define i8 @test_v2i8(<2 x i8> %a0) {
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> %a0)
ret i8 %1
}
@@ -1012,7 +1012,7 @@ define i8 @test_v4i8(<4 x i8> %a0) {
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> %a0)
ret i8 %1
}
@@ -1089,7 +1089,7 @@ define i8 @test_v8i8(<8 x i8> %a0) {
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> %a0)
ret i8 %1
}
@@ -1153,7 +1153,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %a0)
ret i8 %1
}
@@ -1242,7 +1242,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> %a0)
ret i8 %1
}
@@ -1341,7 +1341,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> %a0)
ret i8 %1
}
@@ -1457,32 +1457,32 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> %a0)
ret i8 %1
}
-declare i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64>)
-declare i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64>)
+declare i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64>)
+declare i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64>)
+declare i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64>)
+declare i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64>)
-declare i32 @llvm.experimental.vector.reduce.add.i32.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32>)
+declare i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32>)
+declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>)
+declare i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32>)
+declare i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32>)
+declare i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32>)
-declare i16 @llvm.experimental.vector.reduce.add.i16.v2i16(<2 x i16>)
-declare i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16>)
-declare i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16>)
+declare i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16>)
+declare i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16>)
+declare i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16>)
+declare i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16>)
+declare i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16>)
+declare i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16>)
-declare i8 @llvm.experimental.vector.reduce.add.i8.v2i8(<2 x i8>)
-declare i8 @llvm.experimental.vector.reduce.add.i8.v4i8(<4 x i8>)
-declare i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8>)
-declare i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8>)
+declare i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8>)
+declare i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8>)
+declare i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8>)
+declare i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8>)
+declare i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8>)
+declare i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8>)
+declare i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8>)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-and-widen.ll b/llvm/test/CodeGen/X86/vector-reduce-and-widen.ll
index 16c45da9c33..d169f0cb138 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-and-widen.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-and-widen.ll
@@ -24,7 +24,7 @@ define i64 @test_v2i64(<2 x i64> %a0) {
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovq %xmm0, %rax
; AVX-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64> %a0)
ret i64 %1
}
@@ -66,7 +66,7 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.and.v4i64(<4 x i64> %a0)
ret i64 %1
}
@@ -114,7 +114,7 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.and.v8i64(<8 x i64> %a0)
ret i64 %1
}
@@ -171,7 +171,7 @@ define i64 @test_v16i64(<16 x i64> %a0) {
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.and.v16i64(<16 x i64> %a0)
ret i64 %1
}
@@ -193,7 +193,7 @@ define i32 @test_v2i32(<2 x i32> %a0) {
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.and.v2i32(<2 x i32> %a0)
ret i32 %1
}
@@ -215,7 +215,7 @@ define i32 @test_v4i32(<4 x i32> %a0) {
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> %a0)
ret i32 %1
}
@@ -265,7 +265,7 @@ define i32 @test_v8i32(<8 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> %a0)
ret i32 %1
}
@@ -321,7 +321,7 @@ define i32 @test_v16i32(<16 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.and.v16i32(<16 x i32> %a0)
ret i32 %1
}
@@ -386,7 +386,7 @@ define i32 @test_v32i32(<32 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.and.v32i32(<32 x i32> %a0)
ret i32 %1
}
@@ -411,7 +411,7 @@ define i16 @test_v2i16(<2 x i16> %a0) {
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.and.i16.v2i16(<2 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16> %a0)
ret i16 %1
}
@@ -436,7 +436,7 @@ define i16 @test_v4i16(<4 x i16> %a0) {
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16> %a0)
ret i16 %1
}
@@ -465,7 +465,7 @@ define i16 @test_v8i16(<8 x i16> %a0) {
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16> %a0)
ret i16 %1
}
@@ -528,7 +528,7 @@ define i16 @test_v16i16(<16 x i16> %a0) {
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16> %a0)
ret i16 %1
}
@@ -597,7 +597,7 @@ define i16 @test_v32i16(<32 x i16> %a0) {
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16> %a0)
ret i16 %1
}
@@ -675,7 +675,7 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16> %a0)
ret i16 %1
}
@@ -709,7 +709,7 @@ define i8 @test_v2i8(<2 x i8> %a0) {
; AVX-NEXT: vpextrb $0, %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.and.i8.v2i8(<2 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8> %a0)
ret i8 %1
}
@@ -747,7 +747,7 @@ define i8 @test_v4i8(<4 x i8> %a0) {
; AVX-NEXT: vpextrb $0, %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.and.i8.v4i8(<4 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8> %a0)
ret i8 %1
}
@@ -791,7 +791,7 @@ define i8 @test_v8i8(<8 x i8> %a0) {
; AVX-NEXT: vpextrb $0, %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> %a0)
ret i8 %1
}
@@ -841,7 +841,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX-NEXT: vpextrb $0, %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> %a0)
ret i8 %1
}
@@ -930,7 +930,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8> %a0)
ret i8 %1
}
@@ -1027,7 +1027,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8> %a0)
ret i8 %1
}
@@ -1137,32 +1137,32 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8> %a0)
ret i8 %1
}
-declare i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64>)
-declare i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64>)
+declare i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64>)
+declare i64 @llvm.experimental.vector.reduce.and.v4i64(<4 x i64>)
+declare i64 @llvm.experimental.vector.reduce.and.v8i64(<8 x i64>)
+declare i64 @llvm.experimental.vector.reduce.and.v16i64(<16 x i64>)
-declare i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32>)
+declare i32 @llvm.experimental.vector.reduce.and.v2i32(<2 x i32>)
+declare i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32>)
+declare i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32>)
+declare i32 @llvm.experimental.vector.reduce.and.v16i32(<16 x i32>)
+declare i32 @llvm.experimental.vector.reduce.and.v32i32(<32 x i32>)
-declare i16 @llvm.experimental.vector.reduce.and.i16.v2i16(<2 x i16>)
-declare i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16>)
-declare i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16>)
+declare i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16>)
+declare i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16>)
+declare i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16>)
+declare i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16>)
+declare i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16>)
+declare i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16>)
-declare i8 @llvm.experimental.vector.reduce.and.i8.v2i8(<2 x i8>)
-declare i8 @llvm.experimental.vector.reduce.and.i8.v4i8(<4 x i8>)
-declare i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8>)
-declare i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8>)
+declare i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8>)
+declare i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8>)
+declare i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8>)
+declare i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8>)
+declare i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8>)
+declare i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8>)
+declare i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8>)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-and.ll b/llvm/test/CodeGen/X86/vector-reduce-and.ll
index 9a9f5f21af5..de102b44ae3 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-and.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-and.ll
@@ -24,7 +24,7 @@ define i64 @test_v2i64(<2 x i64> %a0) {
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovq %xmm0, %rax
; AVX-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64> %a0)
ret i64 %1
}
@@ -66,7 +66,7 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.and.v4i64(<4 x i64> %a0)
ret i64 %1
}
@@ -114,7 +114,7 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.and.v8i64(<8 x i64> %a0)
ret i64 %1
}
@@ -171,7 +171,7 @@ define i64 @test_v16i64(<16 x i64> %a0) {
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.and.v16i64(<16 x i64> %a0)
ret i64 %1
}
@@ -193,7 +193,7 @@ define i32 @test_v2i32(<2 x i32> %a0) {
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.and.v2i32(<2 x i32> %a0)
ret i32 %1
}
@@ -215,7 +215,7 @@ define i32 @test_v4i32(<4 x i32> %a0) {
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> %a0)
ret i32 %1
}
@@ -265,7 +265,7 @@ define i32 @test_v8i32(<8 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> %a0)
ret i32 %1
}
@@ -321,7 +321,7 @@ define i32 @test_v16i32(<16 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.and.v16i32(<16 x i32> %a0)
ret i32 %1
}
@@ -386,7 +386,7 @@ define i32 @test_v32i32(<32 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.and.v32i32(<32 x i32> %a0)
ret i32 %1
}
@@ -410,7 +410,7 @@ define i16 @test_v2i16(<2 x i16> %a0) {
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.and.i16.v2i16(<2 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16> %a0)
ret i16 %1
}
@@ -434,7 +434,7 @@ define i16 @test_v4i16(<4 x i16> %a0) {
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16> %a0)
ret i16 %1
}
@@ -463,7 +463,7 @@ define i16 @test_v8i16(<8 x i16> %a0) {
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16> %a0)
ret i16 %1
}
@@ -526,7 +526,7 @@ define i16 @test_v16i16(<16 x i16> %a0) {
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16> %a0)
ret i16 %1
}
@@ -595,7 +595,7 @@ define i16 @test_v32i16(<32 x i16> %a0) {
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16> %a0)
ret i16 %1
}
@@ -673,7 +673,7 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16> %a0)
ret i16 %1
}
@@ -705,7 +705,7 @@ define i8 @test_v2i8(<2 x i8> %a0) {
; AVX-NEXT: vpextrb $0, %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.and.i8.v2i8(<2 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8> %a0)
ret i8 %1
}
@@ -739,7 +739,7 @@ define i8 @test_v4i8(<4 x i8> %a0) {
; AVX-NEXT: vpextrb $0, %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.and.i8.v4i8(<4 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8> %a0)
ret i8 %1
}
@@ -781,7 +781,7 @@ define i8 @test_v8i8(<8 x i8> %a0) {
; AVX-NEXT: vpextrb $0, %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> %a0)
ret i8 %1
}
@@ -831,7 +831,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX-NEXT: vpextrb $0, %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> %a0)
ret i8 %1
}
@@ -920,7 +920,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8> %a0)
ret i8 %1
}
@@ -1017,7 +1017,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8> %a0)
ret i8 %1
}
@@ -1127,32 +1127,32 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8> %a0)
ret i8 %1
}
-declare i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64>)
-declare i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64>)
+declare i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64>)
+declare i64 @llvm.experimental.vector.reduce.and.v4i64(<4 x i64>)
+declare i64 @llvm.experimental.vector.reduce.and.v8i64(<8 x i64>)
+declare i64 @llvm.experimental.vector.reduce.and.v16i64(<16 x i64>)
-declare i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32>)
+declare i32 @llvm.experimental.vector.reduce.and.v2i32(<2 x i32>)
+declare i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32>)
+declare i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32>)
+declare i32 @llvm.experimental.vector.reduce.and.v16i32(<16 x i32>)
+declare i32 @llvm.experimental.vector.reduce.and.v32i32(<32 x i32>)
-declare i16 @llvm.experimental.vector.reduce.and.i16.v2i16(<2 x i16>)
-declare i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16>)
-declare i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16>)
+declare i16 @llvm.experimental.vector.reduce.and.v2i16(<2 x i16>)
+declare i16 @llvm.experimental.vector.reduce.and.v4i16(<4 x i16>)
+declare i16 @llvm.experimental.vector.reduce.and.v8i16(<8 x i16>)
+declare i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16>)
+declare i16 @llvm.experimental.vector.reduce.and.v32i16(<32 x i16>)
+declare i16 @llvm.experimental.vector.reduce.and.v64i16(<64 x i16>)
-declare i8 @llvm.experimental.vector.reduce.and.i8.v2i8(<2 x i8>)
-declare i8 @llvm.experimental.vector.reduce.and.i8.v4i8(<4 x i8>)
-declare i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8>)
-declare i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8>)
+declare i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8>)
+declare i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8>)
+declare i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8>)
+declare i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8>)
+declare i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8>)
+declare i8 @llvm.experimental.vector.reduce.and.v64i8(<64 x i8>)
+declare i8 @llvm.experimental.vector.reduce.and.v128i8(<128 x i8>)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll b/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll
index 3fc44b82bc1..6a6333bcdda 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll
@@ -35,7 +35,7 @@ define float @test_v2f32(<2 x float> %a0) {
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
- %1 = call nnan float @llvm.experimental.vector.reduce.fmax.f32.v2f32(<2 x float> %a0)
+ %1 = call nnan float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float> %a0)
ret float %1
}
@@ -74,7 +74,7 @@ define float @test_v4f32(<4 x float> %a0) {
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
- %1 = call nnan float @llvm.experimental.vector.reduce.fmax.f32.v4f32(<4 x float> %a0)
+ %1 = call nnan float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> %a0)
ret float %1
}
@@ -121,7 +121,7 @@ define float @test_v8f32(<8 x float> %a0) {
; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call nnan float @llvm.experimental.vector.reduce.fmax.f32.v8f32(<8 x float> %a0)
+ %1 = call nnan float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float> %a0)
ret float %1
}
@@ -175,7 +175,7 @@ define float @test_v16f32(<16 x float> %a0) {
; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call nnan float @llvm.experimental.vector.reduce.fmax.f32.v16f32(<16 x float> %a0)
+ %1 = call nnan float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float> %a0)
ret float %1
}
@@ -202,7 +202,7 @@ define double @test_v2f64(<2 x double> %a0) {
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
- %1 = call nnan double @llvm.experimental.vector.reduce.fmax.f64.v2f64(<2 x double> %a0)
+ %1 = call nnan double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> %a0)
ret double %1
}
@@ -232,7 +232,7 @@ define double @test_v4f64(<4 x double> %a0) {
; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call nnan double @llvm.experimental.vector.reduce.fmax.f64.v4f64(<4 x double> %a0)
+ %1 = call nnan double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double> %a0)
ret double %1
}
@@ -267,7 +267,7 @@ define double @test_v8f64(<8 x double> %a0) {
; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call nnan double @llvm.experimental.vector.reduce.fmax.f64.v8f64(<8 x double> %a0)
+ %1 = call nnan double @llvm.experimental.vector.reduce.fmax.v8f64(<8 x double> %a0)
ret double %1
}
@@ -309,16 +309,16 @@ define double @test_v16f64(<16 x double> %a0) {
; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call nnan double @llvm.experimental.vector.reduce.fmax.f64.v16f64(<16 x double> %a0)
+ %1 = call nnan double @llvm.experimental.vector.reduce.fmax.v16f64(<16 x double> %a0)
ret double %1
}
-declare float @llvm.experimental.vector.reduce.fmax.f32.v2f32(<2 x float>)
-declare float @llvm.experimental.vector.reduce.fmax.f32.v4f32(<4 x float>)
-declare float @llvm.experimental.vector.reduce.fmax.f32.v8f32(<8 x float>)
-declare float @llvm.experimental.vector.reduce.fmax.f32.v16f32(<16 x float>)
+declare float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float>)
+declare float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float>)
+declare float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float>)
+declare float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float>)
-declare double @llvm.experimental.vector.reduce.fmax.f64.v2f64(<2 x double>)
-declare double @llvm.experimental.vector.reduce.fmax.f64.v4f64(<4 x double>)
-declare double @llvm.experimental.vector.reduce.fmax.f64.v8f64(<8 x double>)
-declare double @llvm.experimental.vector.reduce.fmax.f64.v16f64(<16 x double>)
+declare double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double>)
+declare double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double>)
+declare double @llvm.experimental.vector.reduce.fmax.v8f64(<8 x double>)
+declare double @llvm.experimental.vector.reduce.fmax.v16f64(<16 x double>)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmax.ll b/llvm/test/CodeGen/X86/vector-reduce-fmax.ll
index 7582deda097..f63d5dcdc09 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-fmax.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-fmax.ll
@@ -35,7 +35,7 @@ define float @test_v2f32(<2 x float> %a0) {
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
- %1 = call float @llvm.experimental.vector.reduce.fmax.f32.v2f32(<2 x float> %a0)
+ %1 = call float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float> %a0)
ret float %1
}
@@ -74,7 +74,7 @@ define float @test_v4f32(<4 x float> %a0) {
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
- %1 = call float @llvm.experimental.vector.reduce.fmax.f32.v4f32(<4 x float> %a0)
+ %1 = call float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> %a0)
ret float %1
}
@@ -121,7 +121,7 @@ define float @test_v8f32(<8 x float> %a0) {
; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call float @llvm.experimental.vector.reduce.fmax.f32.v8f32(<8 x float> %a0)
+ %1 = call float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float> %a0)
ret float %1
}
@@ -175,7 +175,7 @@ define float @test_v16f32(<16 x float> %a0) {
; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call float @llvm.experimental.vector.reduce.fmax.f32.v16f32(<16 x float> %a0)
+ %1 = call float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float> %a0)
ret float %1
}
@@ -202,7 +202,7 @@ define double @test_v2f64(<2 x double> %a0) {
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
- %1 = call double @llvm.experimental.vector.reduce.fmax.f64.v2f64(<2 x double> %a0)
+ %1 = call double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> %a0)
ret double %1
}
@@ -232,7 +232,7 @@ define double @test_v4f64(<4 x double> %a0) {
; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call double @llvm.experimental.vector.reduce.fmax.f64.v4f64(<4 x double> %a0)
+ %1 = call double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double> %a0)
ret double %1
}
@@ -267,7 +267,7 @@ define double @test_v8f64(<8 x double> %a0) {
; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call double @llvm.experimental.vector.reduce.fmax.f64.v8f64(<8 x double> %a0)
+ %1 = call double @llvm.experimental.vector.reduce.fmax.v8f64(<8 x double> %a0)
ret double %1
}
@@ -309,16 +309,16 @@ define double @test_v16f64(<16 x double> %a0) {
; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call double @llvm.experimental.vector.reduce.fmax.f64.v16f64(<16 x double> %a0)
+ %1 = call double @llvm.experimental.vector.reduce.fmax.v16f64(<16 x double> %a0)
ret double %1
}
-declare float @llvm.experimental.vector.reduce.fmax.f32.v2f32(<2 x float>)
-declare float @llvm.experimental.vector.reduce.fmax.f32.v4f32(<4 x float>)
-declare float @llvm.experimental.vector.reduce.fmax.f32.v8f32(<8 x float>)
-declare float @llvm.experimental.vector.reduce.fmax.f32.v16f32(<16 x float>)
+declare float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float>)
+declare float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float>)
+declare float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float>)
+declare float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float>)
-declare double @llvm.experimental.vector.reduce.fmax.f64.v2f64(<2 x double>)
-declare double @llvm.experimental.vector.reduce.fmax.f64.v4f64(<4 x double>)
-declare double @llvm.experimental.vector.reduce.fmax.f64.v8f64(<8 x double>)
-declare double @llvm.experimental.vector.reduce.fmax.f64.v16f64(<16 x double>)
+declare double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double>)
+declare double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double>)
+declare double @llvm.experimental.vector.reduce.fmax.v8f64(<8 x double>)
+declare double @llvm.experimental.vector.reduce.fmax.v16f64(<16 x double>)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll b/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll
index 1789738cb8f..d3a799f18a5 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll
@@ -35,7 +35,7 @@ define float @test_v2f32(<2 x float> %a0) {
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
- %1 = call nnan float @llvm.experimental.vector.reduce.fmin.f32.v2f32(<2 x float> %a0)
+ %1 = call nnan float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float> %a0)
ret float %1
}
@@ -74,7 +74,7 @@ define float @test_v4f32(<4 x float> %a0) {
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
- %1 = call nnan float @llvm.experimental.vector.reduce.fmin.f32.v4f32(<4 x float> %a0)
+ %1 = call nnan float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> %a0)
ret float %1
}
@@ -121,7 +121,7 @@ define float @test_v8f32(<8 x float> %a0) {
; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call nnan float @llvm.experimental.vector.reduce.fmin.f32.v8f32(<8 x float> %a0)
+ %1 = call nnan float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float> %a0)
ret float %1
}
@@ -175,7 +175,7 @@ define float @test_v16f32(<16 x float> %a0) {
; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call nnan float @llvm.experimental.vector.reduce.fmin.f32.v16f32(<16 x float> %a0)
+ %1 = call nnan float @llvm.experimental.vector.reduce.fmin.v16f32(<16 x float> %a0)
ret float %1
}
@@ -202,7 +202,7 @@ define double @test_v2f64(<2 x double> %a0) {
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
- %1 = call nnan double @llvm.experimental.vector.reduce.fmin.f64.v2f64(<2 x double> %a0)
+ %1 = call nnan double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double> %a0)
ret double %1
}
@@ -232,7 +232,7 @@ define double @test_v4f64(<4 x double> %a0) {
; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call nnan double @llvm.experimental.vector.reduce.fmin.f64.v4f64(<4 x double> %a0)
+ %1 = call nnan double @llvm.experimental.vector.reduce.fmin.v4f64(<4 x double> %a0)
ret double %1
}
@@ -267,7 +267,7 @@ define double @test_v8f64(<8 x double> %a0) {
; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call nnan double @llvm.experimental.vector.reduce.fmin.f64.v8f64(<8 x double> %a0)
+ %1 = call nnan double @llvm.experimental.vector.reduce.fmin.v8f64(<8 x double> %a0)
ret double %1
}
@@ -309,16 +309,16 @@ define double @test_v16f64(<16 x double> %a0) {
; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call nnan double @llvm.experimental.vector.reduce.fmin.f64.v16f64(<16 x double> %a0)
+ %1 = call nnan double @llvm.experimental.vector.reduce.fmin.v16f64(<16 x double> %a0)
ret double %1
}
-declare float @llvm.experimental.vector.reduce.fmin.f32.v2f32(<2 x float>)
-declare float @llvm.experimental.vector.reduce.fmin.f32.v4f32(<4 x float>)
-declare float @llvm.experimental.vector.reduce.fmin.f32.v8f32(<8 x float>)
-declare float @llvm.experimental.vector.reduce.fmin.f32.v16f32(<16 x float>)
+declare float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float>)
+declare float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float>)
+declare float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float>)
+declare float @llvm.experimental.vector.reduce.fmin.v16f32(<16 x float>)
-declare double @llvm.experimental.vector.reduce.fmin.f64.v2f64(<2 x double>)
-declare double @llvm.experimental.vector.reduce.fmin.f64.v4f64(<4 x double>)
-declare double @llvm.experimental.vector.reduce.fmin.f64.v8f64(<8 x double>)
-declare double @llvm.experimental.vector.reduce.fmin.f64.v16f64(<16 x double>)
+declare double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double>)
+declare double @llvm.experimental.vector.reduce.fmin.v4f64(<4 x double>)
+declare double @llvm.experimental.vector.reduce.fmin.v8f64(<8 x double>)
+declare double @llvm.experimental.vector.reduce.fmin.v16f64(<16 x double>)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmin.ll b/llvm/test/CodeGen/X86/vector-reduce-fmin.ll
index 66129ece478..74e5a722abd 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-fmin.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-fmin.ll
@@ -35,7 +35,7 @@ define float @test_v2f32(<2 x float> %a0) {
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
- %1 = call float @llvm.experimental.vector.reduce.fmin.f32.v2f32(<2 x float> %a0)
+ %1 = call float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float> %a0)
ret float %1
}
@@ -74,7 +74,7 @@ define float @test_v4f32(<4 x float> %a0) {
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
- %1 = call float @llvm.experimental.vector.reduce.fmin.f32.v4f32(<4 x float> %a0)
+ %1 = call float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> %a0)
ret float %1
}
@@ -121,7 +121,7 @@ define float @test_v8f32(<8 x float> %a0) {
; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call float @llvm.experimental.vector.reduce.fmin.f32.v8f32(<8 x float> %a0)
+ %1 = call float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float> %a0)
ret float %1
}
@@ -175,7 +175,7 @@ define float @test_v16f32(<16 x float> %a0) {
; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call float @llvm.experimental.vector.reduce.fmin.f32.v16f32(<16 x float> %a0)
+ %1 = call float @llvm.experimental.vector.reduce.fmin.v16f32(<16 x float> %a0)
ret float %1
}
@@ -202,7 +202,7 @@ define double @test_v2f64(<2 x double> %a0) {
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
- %1 = call double @llvm.experimental.vector.reduce.fmin.f64.v2f64(<2 x double> %a0)
+ %1 = call double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double> %a0)
ret double %1
}
@@ -232,7 +232,7 @@ define double @test_v4f64(<4 x double> %a0) {
; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call double @llvm.experimental.vector.reduce.fmin.f64.v4f64(<4 x double> %a0)
+ %1 = call double @llvm.experimental.vector.reduce.fmin.v4f64(<4 x double> %a0)
ret double %1
}
@@ -267,7 +267,7 @@ define double @test_v8f64(<8 x double> %a0) {
; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call double @llvm.experimental.vector.reduce.fmin.f64.v8f64(<8 x double> %a0)
+ %1 = call double @llvm.experimental.vector.reduce.fmin.v8f64(<8 x double> %a0)
ret double %1
}
@@ -309,16 +309,16 @@ define double @test_v16f64(<16 x double> %a0) {
; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call double @llvm.experimental.vector.reduce.fmin.f64.v16f64(<16 x double> %a0)
+ %1 = call double @llvm.experimental.vector.reduce.fmin.v16f64(<16 x double> %a0)
ret double %1
}
-declare float @llvm.experimental.vector.reduce.fmin.f32.v2f32(<2 x float>)
-declare float @llvm.experimental.vector.reduce.fmin.f32.v4f32(<4 x float>)
-declare float @llvm.experimental.vector.reduce.fmin.f32.v8f32(<8 x float>)
-declare float @llvm.experimental.vector.reduce.fmin.f32.v16f32(<16 x float>)
+declare float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float>)
+declare float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float>)
+declare float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float>)
+declare float @llvm.experimental.vector.reduce.fmin.v16f32(<16 x float>)
-declare double @llvm.experimental.vector.reduce.fmin.f64.v2f64(<2 x double>)
-declare double @llvm.experimental.vector.reduce.fmin.f64.v4f64(<4 x double>)
-declare double @llvm.experimental.vector.reduce.fmin.f64.v8f64(<8 x double>)
-declare double @llvm.experimental.vector.reduce.fmin.f64.v16f64(<16 x double>)
+declare double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double>)
+declare double @llvm.experimental.vector.reduce.fmin.v4f64(<4 x double>)
+declare double @llvm.experimental.vector.reduce.fmin.v8f64(<8 x double>)
+declare double @llvm.experimental.vector.reduce.fmin.v16f64(<16 x double>)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-mul-widen.ll b/llvm/test/CodeGen/X86/vector-reduce-mul-widen.ll
index 62143fb1ffe..573f9c836c2 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-mul-widen.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-mul-widen.ll
@@ -86,7 +86,7 @@ define i64 @test_v2i64(<2 x i64> %a0) {
; AVX512DQVL-NEXT: vpmullq %xmm1, %xmm0, %xmm0
; AVX512DQVL-NEXT: vmovq %xmm0, %rax
; AVX512DQVL-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64> %a0)
ret i64 %1
}
@@ -233,7 +233,7 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX512DQVL-NEXT: vmovq %xmm0, %rax
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64> %a0)
ret i64 %1
}
@@ -446,7 +446,7 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX512DQVL-NEXT: vmovq %xmm0, %rax
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.mul.v8i64(<8 x i64> %a0)
ret i64 %1
}
@@ -767,7 +767,7 @@ define i64 @test_v16i64(<16 x i64> %a0) {
; AVX512DQVL-NEXT: vmovq %xmm0, %rax
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.mul.v16i64(<16 x i64> %a0)
ret i64 %1
}
@@ -803,7 +803,7 @@ define i32 @test_v2i32(<2 x i32> %a0) {
; AVX512-NEXT: vpmulld %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> %a0)
ret i32 %1
}
@@ -849,7 +849,7 @@ define i32 @test_v4i32(<4 x i32> %a0) {
; AVX512-NEXT: vpmulld %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> %a0)
ret i32 %1
}
@@ -920,7 +920,7 @@ define i32 @test_v8i32(<8 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> %a0)
ret i32 %1
}
@@ -1005,7 +1005,7 @@ define i32 @test_v16i32(<16 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32> %a0)
ret i32 %1
}
@@ -1115,7 +1115,7 @@ define i32 @test_v32i32(<32 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32> %a0)
ret i32 %1
}
@@ -1148,7 +1148,7 @@ define i16 @test_v2i16(<2 x i16> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.mul.i16.v2i16(<2 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> %a0)
ret i16 %1
}
@@ -1183,7 +1183,7 @@ define i16 @test_v4i16(<4 x i16> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> %a0)
ret i16 %1
}
@@ -1224,7 +1224,7 @@ define i16 @test_v8i16(<8 x i16> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> %a0)
ret i16 %1
}
@@ -1287,7 +1287,7 @@ define i16 @test_v16i16(<16 x i16> %a0) {
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> %a0)
ret i16 %1
}
@@ -1407,7 +1407,7 @@ define i16 @test_v32i16(<32 x i16> %a0) {
; AVX512DQVL-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16> %a0)
ret i16 %1
}
@@ -1545,7 +1545,7 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; AVX512DQVL-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16> %a0)
ret i16 %1
}
@@ -1587,7 +1587,7 @@ define i8 @test_v2i8(<2 x i8> %a0) {
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.mul.i8.v2i8(<2 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> %a0)
ret i8 %1
}
@@ -1647,7 +1647,7 @@ define i8 @test_v4i8(<4 x i8> %a0) {
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.mul.i8.v4i8(<4 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> %a0)
ret i8 %1
}
@@ -1726,7 +1726,7 @@ define i8 @test_v8i8(<8 x i8> %a0) {
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> %a0)
ret i8 %1
}
@@ -1945,7 +1945,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX512DQVL-NEXT: # kill: def $al killed $al killed $eax
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> %a0)
ret i8 %1
}
@@ -2220,7 +2220,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX512DQVL-NEXT: # kill: def $al killed $al killed $eax
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> %a0)
ret i8 %1
}
@@ -2599,7 +2599,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX512DQVL-NEXT: # kill: def $al killed $al killed $eax
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> %a0)
ret i8 %1
}
@@ -3086,32 +3086,32 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX512DQVL-NEXT: # kill: def $al killed $al killed $eax
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8> %a0)
ret i8 %1
}
-declare i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64>)
-declare i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64>)
+declare i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64>)
+declare i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64>)
+declare i64 @llvm.experimental.vector.reduce.mul.v8i64(<8 x i64>)
+declare i64 @llvm.experimental.vector.reduce.mul.v16i64(<16 x i64>)
-declare i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32>)
+declare i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32>)
+declare i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32>)
+declare i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32>)
+declare i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32>)
+declare i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32>)
-declare i16 @llvm.experimental.vector.reduce.mul.i16.v2i16(<2 x i16>)
-declare i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16>)
-declare i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16>)
+declare i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16>)
+declare i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16>)
+declare i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16>)
+declare i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16>)
+declare i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16>)
+declare i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16>)
-declare i8 @llvm.experimental.vector.reduce.mul.i8.v2i8(<2 x i8>)
-declare i8 @llvm.experimental.vector.reduce.mul.i8.v4i8(<4 x i8>)
-declare i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8>)
-declare i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8>)
+declare i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8>)
+declare i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8>)
+declare i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8>)
+declare i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8>)
+declare i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8>)
+declare i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8>)
+declare i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8>)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-mul.ll b/llvm/test/CodeGen/X86/vector-reduce-mul.ll
index 64c6265530c..072752b9543 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-mul.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-mul.ll
@@ -86,7 +86,7 @@ define i64 @test_v2i64(<2 x i64> %a0) {
; AVX512DQVL-NEXT: vpmullq %xmm1, %xmm0, %xmm0
; AVX512DQVL-NEXT: vmovq %xmm0, %rax
; AVX512DQVL-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64> %a0)
ret i64 %1
}
@@ -233,7 +233,7 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX512DQVL-NEXT: vmovq %xmm0, %rax
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64> %a0)
ret i64 %1
}
@@ -446,7 +446,7 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX512DQVL-NEXT: vmovq %xmm0, %rax
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.mul.v8i64(<8 x i64> %a0)
ret i64 %1
}
@@ -767,7 +767,7 @@ define i64 @test_v16i64(<16 x i64> %a0) {
; AVX512DQVL-NEXT: vmovq %xmm0, %rax
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.mul.v16i64(<16 x i64> %a0)
ret i64 %1
}
@@ -796,7 +796,7 @@ define i32 @test_v2i32(<2 x i32> %a0) {
; AVX512-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32> %a0)
ret i32 %1
}
@@ -842,7 +842,7 @@ define i32 @test_v4i32(<4 x i32> %a0) {
; AVX512-NEXT: vpmulld %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> %a0)
ret i32 %1
}
@@ -913,7 +913,7 @@ define i32 @test_v8i32(<8 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32> %a0)
ret i32 %1
}
@@ -998,7 +998,7 @@ define i32 @test_v16i32(<16 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32> %a0)
ret i32 %1
}
@@ -1108,7 +1108,7 @@ define i32 @test_v32i32(<32 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32> %a0)
ret i32 %1
}
@@ -1140,7 +1140,7 @@ define i16 @test_v2i16(<2 x i16> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.mul.i16.v2i16(<2 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16> %a0)
ret i16 %1
}
@@ -1190,7 +1190,7 @@ define i16 @test_v4i16(<4 x i16> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16> %a0)
ret i16 %1
}
@@ -1231,7 +1231,7 @@ define i16 @test_v8i16(<8 x i16> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16> %a0)
ret i16 %1
}
@@ -1294,7 +1294,7 @@ define i16 @test_v16i16(<16 x i16> %a0) {
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16> %a0)
ret i16 %1
}
@@ -1414,7 +1414,7 @@ define i16 @test_v32i16(<32 x i16> %a0) {
; AVX512DQVL-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16> %a0)
ret i16 %1
}
@@ -1552,7 +1552,7 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; AVX512DQVL-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16> %a0)
ret i16 %1
}
@@ -1592,7 +1592,7 @@ define i8 @test_v2i8(<2 x i8> %a0) {
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.mul.i8.v2i8(<2 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> %a0)
ret i8 %1
}
@@ -1642,7 +1642,7 @@ define i8 @test_v4i8(<4 x i8> %a0) {
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.mul.i8.v4i8(<4 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> %a0)
ret i8 %1
}
@@ -1696,7 +1696,7 @@ define i8 @test_v8i8(<8 x i8> %a0) {
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> %a0)
ret i8 %1
}
@@ -1909,7 +1909,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX512DQVL-NEXT: # kill: def $al killed $al killed $eax
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8> %a0)
ret i8 %1
}
@@ -2182,7 +2182,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX512DQVL-NEXT: # kill: def $al killed $al killed $eax
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8> %a0)
ret i8 %1
}
@@ -2555,7 +2555,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX512DQVL-NEXT: # kill: def $al killed $al killed $eax
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8> %a0)
ret i8 %1
}
@@ -3032,32 +3032,32 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX512DQVL-NEXT: # kill: def $al killed $al killed $eax
; AVX512DQVL-NEXT: vzeroupper
; AVX512DQVL-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8> %a0)
ret i8 %1
}
-declare i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.mul.i64.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.mul.i64.v8i64(<8 x i64>)
-declare i64 @llvm.experimental.vector.reduce.mul.i64.v16i64(<16 x i64>)
+declare i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64>)
+declare i64 @llvm.experimental.vector.reduce.mul.v4i64(<4 x i64>)
+declare i64 @llvm.experimental.vector.reduce.mul.v8i64(<8 x i64>)
+declare i64 @llvm.experimental.vector.reduce.mul.v16i64(<16 x i64>)
-declare i32 @llvm.experimental.vector.reduce.mul.i32.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.mul.i32.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.mul.i32.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.mul.i32.v32i32(<32 x i32>)
+declare i32 @llvm.experimental.vector.reduce.mul.v2i32(<2 x i32>)
+declare i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32>)
+declare i32 @llvm.experimental.vector.reduce.mul.v8i32(<8 x i32>)
+declare i32 @llvm.experimental.vector.reduce.mul.v16i32(<16 x i32>)
+declare i32 @llvm.experimental.vector.reduce.mul.v32i32(<32 x i32>)
-declare i16 @llvm.experimental.vector.reduce.mul.i16.v2i16(<2 x i16>)
-declare i16 @llvm.experimental.vector.reduce.mul.i16.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.mul.i16.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.mul.i16.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.mul.i16.v32i16(<32 x i16>)
-declare i16 @llvm.experimental.vector.reduce.mul.i16.v64i16(<64 x i16>)
+declare i16 @llvm.experimental.vector.reduce.mul.v2i16(<2 x i16>)
+declare i16 @llvm.experimental.vector.reduce.mul.v4i16(<4 x i16>)
+declare i16 @llvm.experimental.vector.reduce.mul.v8i16(<8 x i16>)
+declare i16 @llvm.experimental.vector.reduce.mul.v16i16(<16 x i16>)
+declare i16 @llvm.experimental.vector.reduce.mul.v32i16(<32 x i16>)
+declare i16 @llvm.experimental.vector.reduce.mul.v64i16(<64 x i16>)
-declare i8 @llvm.experimental.vector.reduce.mul.i8.v2i8(<2 x i8>)
-declare i8 @llvm.experimental.vector.reduce.mul.i8.v4i8(<4 x i8>)
-declare i8 @llvm.experimental.vector.reduce.mul.i8.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.mul.i8.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.mul.i8.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.mul.i8.v64i8(<64 x i8>)
-declare i8 @llvm.experimental.vector.reduce.mul.i8.v128i8(<128 x i8>)
+declare i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8>)
+declare i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8>)
+declare i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8>)
+declare i8 @llvm.experimental.vector.reduce.mul.v16i8(<16 x i8>)
+declare i8 @llvm.experimental.vector.reduce.mul.v32i8(<32 x i8>)
+declare i8 @llvm.experimental.vector.reduce.mul.v64i8(<64 x i8>)
+declare i8 @llvm.experimental.vector.reduce.mul.v128i8(<128 x i8>)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-or-widen.ll b/llvm/test/CodeGen/X86/vector-reduce-or-widen.ll
index b5abc094dfd..720b47371e3 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-or-widen.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-or-widen.ll
@@ -24,7 +24,7 @@ define i64 @test_v2i64(<2 x i64> %a0) {
; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovq %xmm0, %rax
; AVX-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64> %a0)
ret i64 %1
}
@@ -66,7 +66,7 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.or.v4i64(<4 x i64> %a0)
ret i64 %1
}
@@ -114,7 +114,7 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.or.v8i64(<8 x i64> %a0)
ret i64 %1
}
@@ -171,7 +171,7 @@ define i64 @test_v16i64(<16 x i64> %a0) {
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.or.v16i64(<16 x i64> %a0)
ret i64 %1
}
@@ -193,7 +193,7 @@ define i32 @test_v2i32(<2 x i32> %a0) {
; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.or.v2i32(<2 x i32> %a0)
ret i32 %1
}
@@ -215,7 +215,7 @@ define i32 @test_v4i32(<4 x i32> %a0) {
; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> %a0)
ret i32 %1
}
@@ -265,7 +265,7 @@ define i32 @test_v8i32(<8 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> %a0)
ret i32 %1
}
@@ -321,7 +321,7 @@ define i32 @test_v16i32(<16 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.or.v16i32(<16 x i32> %a0)
ret i32 %1
}
@@ -386,7 +386,7 @@ define i32 @test_v32i32(<32 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.or.v32i32(<32 x i32> %a0)
ret i32 %1
}
@@ -411,7 +411,7 @@ define i16 @test_v2i16(<2 x i16> %a0) {
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.or.i16.v2i16(<2 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16> %a0)
ret i16 %1
}
@@ -436,7 +436,7 @@ define i16 @test_v4i16(<4 x i16> %a0) {
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> %a0)
ret i16 %1
}
@@ -465,7 +465,7 @@ define i16 @test_v8i16(<8 x i16> %a0) {
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16> %a0)
ret i16 %1
}
@@ -528,7 +528,7 @@ define i16 @test_v16i16(<16 x i16> %a0) {
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> %a0)
ret i16 %1
}
@@ -597,7 +597,7 @@ define i16 @test_v32i16(<32 x i16> %a0) {
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16> %a0)
ret i16 %1
}
@@ -675,7 +675,7 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16> %a0)
ret i16 %1
}
@@ -709,7 +709,7 @@ define i8 @test_v2i8(<2 x i8> %a0) {
; AVX-NEXT: vpextrb $0, %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.or.i8.v2i8(<2 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8> %a0)
ret i8 %1
}
@@ -747,7 +747,7 @@ define i8 @test_v4i8(<4 x i8> %a0) {
; AVX-NEXT: vpextrb $0, %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.or.i8.v4i8(<4 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8> %a0)
ret i8 %1
}
@@ -791,7 +791,7 @@ define i8 @test_v8i8(<8 x i8> %a0) {
; AVX-NEXT: vpextrb $0, %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> %a0)
ret i8 %1
}
@@ -841,7 +841,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX-NEXT: vpextrb $0, %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> %a0)
ret i8 %1
}
@@ -930,7 +930,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> %a0)
ret i8 %1
}
@@ -1027,7 +1027,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8> %a0)
ret i8 %1
}
@@ -1137,32 +1137,32 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8> %a0)
ret i8 %1
}
-declare i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64>)
-declare i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64>)
+declare i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64>)
+declare i64 @llvm.experimental.vector.reduce.or.v4i64(<4 x i64>)
+declare i64 @llvm.experimental.vector.reduce.or.v8i64(<8 x i64>)
+declare i64 @llvm.experimental.vector.reduce.or.v16i64(<16 x i64>)
-declare i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32>)
+declare i32 @llvm.experimental.vector.reduce.or.v2i32(<2 x i32>)
+declare i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32>)
+declare i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32>)
+declare i32 @llvm.experimental.vector.reduce.or.v16i32(<16 x i32>)
+declare i32 @llvm.experimental.vector.reduce.or.v32i32(<32 x i32>)
-declare i16 @llvm.experimental.vector.reduce.or.i16.v2i16(<2 x i16>)
-declare i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16>)
-declare i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16>)
+declare i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16>)
+declare i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16>)
+declare i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16>)
+declare i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16>)
+declare i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16>)
+declare i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16>)
-declare i8 @llvm.experimental.vector.reduce.or.i8.v2i8(<2 x i8>)
-declare i8 @llvm.experimental.vector.reduce.or.i8.v4i8(<4 x i8>)
-declare i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8>)
-declare i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8>)
+declare i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8>)
+declare i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8>)
+declare i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8>)
+declare i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8>)
+declare i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8>)
+declare i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8>)
+declare i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8>)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-or.ll b/llvm/test/CodeGen/X86/vector-reduce-or.ll
index ba9c3c6a08c..ddfe176d5e8 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-or.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-or.ll
@@ -24,7 +24,7 @@ define i64 @test_v2i64(<2 x i64> %a0) {
; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovq %xmm0, %rax
; AVX-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64> %a0)
ret i64 %1
}
@@ -66,7 +66,7 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.or.v4i64(<4 x i64> %a0)
ret i64 %1
}
@@ -114,7 +114,7 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.or.v8i64(<8 x i64> %a0)
ret i64 %1
}
@@ -171,7 +171,7 @@ define i64 @test_v16i64(<16 x i64> %a0) {
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.or.v16i64(<16 x i64> %a0)
ret i64 %1
}
@@ -193,7 +193,7 @@ define i32 @test_v2i32(<2 x i32> %a0) {
; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.or.v2i32(<2 x i32> %a0)
ret i32 %1
}
@@ -215,7 +215,7 @@ define i32 @test_v4i32(<4 x i32> %a0) {
; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> %a0)
ret i32 %1
}
@@ -265,7 +265,7 @@ define i32 @test_v8i32(<8 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> %a0)
ret i32 %1
}
@@ -321,7 +321,7 @@ define i32 @test_v16i32(<16 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.or.v16i32(<16 x i32> %a0)
ret i32 %1
}
@@ -386,7 +386,7 @@ define i32 @test_v32i32(<32 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.or.v32i32(<32 x i32> %a0)
ret i32 %1
}
@@ -410,7 +410,7 @@ define i16 @test_v2i16(<2 x i16> %a0) {
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.or.i16.v2i16(<2 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16> %a0)
ret i16 %1
}
@@ -434,7 +434,7 @@ define i16 @test_v4i16(<4 x i16> %a0) {
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> %a0)
ret i16 %1
}
@@ -463,7 +463,7 @@ define i16 @test_v8i16(<8 x i16> %a0) {
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16> %a0)
ret i16 %1
}
@@ -526,7 +526,7 @@ define i16 @test_v16i16(<16 x i16> %a0) {
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> %a0)
ret i16 %1
}
@@ -595,7 +595,7 @@ define i16 @test_v32i16(<32 x i16> %a0) {
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16> %a0)
ret i16 %1
}
@@ -673,7 +673,7 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16> %a0)
ret i16 %1
}
@@ -705,7 +705,7 @@ define i8 @test_v2i8(<2 x i8> %a0) {
; AVX-NEXT: vpextrb $0, %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.or.i8.v2i8(<2 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8> %a0)
ret i8 %1
}
@@ -739,7 +739,7 @@ define i8 @test_v4i8(<4 x i8> %a0) {
; AVX-NEXT: vpextrb $0, %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.or.i8.v4i8(<4 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8> %a0)
ret i8 %1
}
@@ -781,7 +781,7 @@ define i8 @test_v8i8(<8 x i8> %a0) {
; AVX-NEXT: vpextrb $0, %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> %a0)
ret i8 %1
}
@@ -831,7 +831,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX-NEXT: vpextrb $0, %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> %a0)
ret i8 %1
}
@@ -920,7 +920,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> %a0)
ret i8 %1
}
@@ -1017,7 +1017,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8> %a0)
ret i8 %1
}
@@ -1127,32 +1127,32 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8> %a0)
ret i8 %1
}
-declare i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64>)
-declare i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64>)
+declare i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64>)
+declare i64 @llvm.experimental.vector.reduce.or.v4i64(<4 x i64>)
+declare i64 @llvm.experimental.vector.reduce.or.v8i64(<8 x i64>)
+declare i64 @llvm.experimental.vector.reduce.or.v16i64(<16 x i64>)
-declare i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32>)
+declare i32 @llvm.experimental.vector.reduce.or.v2i32(<2 x i32>)
+declare i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32>)
+declare i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32>)
+declare i32 @llvm.experimental.vector.reduce.or.v16i32(<16 x i32>)
+declare i32 @llvm.experimental.vector.reduce.or.v32i32(<32 x i32>)
-declare i16 @llvm.experimental.vector.reduce.or.i16.v2i16(<2 x i16>)
-declare i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16>)
-declare i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16>)
+declare i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16>)
+declare i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16>)
+declare i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16>)
+declare i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16>)
+declare i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16>)
+declare i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16>)
-declare i8 @llvm.experimental.vector.reduce.or.i8.v2i8(<2 x i8>)
-declare i8 @llvm.experimental.vector.reduce.or.i8.v4i8(<4 x i8>)
-declare i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8>)
-declare i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8>)
+declare i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8>)
+declare i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8>)
+declare i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8>)
+declare i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8>)
+declare i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8>)
+declare i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8>)
+declare i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8>)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-smax-widen.ll b/llvm/test/CodeGen/X86/vector-reduce-smax-widen.ll
index f1eff5e7469..447c17a366f 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-smax-widen.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-smax-widen.ll
@@ -74,7 +74,7 @@ define i64 @test_v2i64(<2 x i64> %a0) {
; AVX512VL-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> %a0)
ret i64 %1
}
@@ -190,7 +190,7 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> %a0)
ret i64 %1
}
@@ -370,7 +370,7 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> %a0)
ret i64 %1
}
@@ -674,7 +674,7 @@ define i64 @test_v16i64(<16 x i64> %a0) {
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> %a0)
ret i64 %1
}
@@ -714,7 +714,7 @@ define i32 @test_v2i32(<2 x i32> %a0) {
; AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> %a0)
ret i32 %1
}
@@ -762,7 +762,7 @@ define i32 @test_v4i32(<4 x i32> %a0) {
; AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> %a0)
ret i32 %1
}
@@ -834,7 +834,7 @@ define i32 @test_v8i32(<8 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> %a0)
ret i32 %1
}
@@ -924,7 +924,7 @@ define i32 @test_v16i32(<16 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> %a0)
ret i32 %1
}
@@ -1047,7 +1047,7 @@ define i32 @test_v32i32(<32 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> %a0)
ret i32 %1
}
@@ -1080,7 +1080,7 @@ define i16 @test_v2i16(<2 x i16> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> %a0)
ret i16 %1
}
@@ -1115,7 +1115,7 @@ define i16 @test_v4i16(<4 x i16> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> %a0)
ret i16 %1
}
@@ -1159,7 +1159,7 @@ define i16 @test_v8i16(<8 x i16> %a0) {
; AVX512-NEXT: xorl $32767, %eax # imm = 0x7FFF
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %a0)
ret i16 %1
}
@@ -1223,7 +1223,7 @@ define i16 @test_v16i16(<16 x i16> %a0) {
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> %a0)
ret i16 %1
}
@@ -1297,7 +1297,7 @@ define i16 @test_v32i16(<32 x i16> %a0) {
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> %a0)
ret i16 %1
}
@@ -1388,7 +1388,7 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> %a0)
ret i16 %1
}
@@ -1434,7 +1434,7 @@ define i8 @test_v2i8(<2 x i8> %a0) {
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> %a0)
ret i8 %1
}
@@ -1490,7 +1490,7 @@ define i8 @test_v4i8(<4 x i8> %a0) {
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> %a0)
ret i8 %1
}
@@ -1558,7 +1558,7 @@ define i8 @test_v8i8(<8 x i8> %a0) {
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> %a0)
ret i8 %1
}
@@ -1628,7 +1628,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX512-NEXT: xorb $127, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %a0)
ret i8 %1
}
@@ -1724,7 +1724,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> %a0)
ret i8 %1
}
@@ -1838,7 +1838,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> %a0)
ret i8 %1
}
@@ -1985,32 +1985,32 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> %a0)
ret i8 %1
}
-declare i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64>)
-declare i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64>)
+declare i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64>)
+declare i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64>)
+declare i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64>)
+declare i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64>)
-declare i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32>)
+declare i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32>)
+declare i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32>)
+declare i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32>)
+declare i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32>)
+declare i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32>)
-declare i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16>)
-declare i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8>)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-smax.ll b/llvm/test/CodeGen/X86/vector-reduce-smax.ll
index 7d1126c7764..1b15929bc19 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-smax.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-smax.ll
@@ -74,7 +74,7 @@ define i64 @test_v2i64(<2 x i64> %a0) {
; AVX512VL-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> %a0)
ret i64 %1
}
@@ -190,7 +190,7 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> %a0)
ret i64 %1
}
@@ -370,7 +370,7 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> %a0)
ret i64 %1
}
@@ -674,7 +674,7 @@ define i64 @test_v16i64(<16 x i64> %a0) {
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> %a0)
ret i64 %1
}
@@ -788,7 +788,7 @@ define i32 @test_v2i32(<2 x i32> %a0) {
; AVX512VL-NEXT: vpmaxsq %xmm0, %xmm1, %xmm0
; AVX512VL-NEXT: vmovd %xmm0, %eax
; AVX512VL-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> %a0)
ret i32 %1
}
@@ -836,7 +836,7 @@ define i32 @test_v4i32(<4 x i32> %a0) {
; AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> %a0)
ret i32 %1
}
@@ -908,7 +908,7 @@ define i32 @test_v8i32(<8 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> %a0)
ret i32 %1
}
@@ -998,7 +998,7 @@ define i32 @test_v16i32(<16 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> %a0)
ret i32 %1
}
@@ -1121,7 +1121,7 @@ define i32 @test_v32i32(<32 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> %a0)
ret i32 %1
}
@@ -1259,7 +1259,7 @@ define i16 @test_v2i16(<2 x i16> %a0) {
; AVX512VL-NEXT: vmovd %xmm0, %eax
; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512VL-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> %a0)
ret i16 %1
}
@@ -1335,7 +1335,7 @@ define i16 @test_v4i16(<4 x i16> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> %a0)
ret i16 %1
}
@@ -1379,7 +1379,7 @@ define i16 @test_v8i16(<8 x i16> %a0) {
; AVX512-NEXT: xorl $32767, %eax # imm = 0x7FFF
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %a0)
ret i16 %1
}
@@ -1443,7 +1443,7 @@ define i16 @test_v16i16(<16 x i16> %a0) {
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> %a0)
ret i16 %1
}
@@ -1517,7 +1517,7 @@ define i16 @test_v32i16(<32 x i16> %a0) {
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> %a0)
ret i16 %1
}
@@ -1608,7 +1608,7 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> %a0)
ret i16 %1
}
@@ -1746,7 +1746,7 @@ define i8 @test_v2i8(<2 x i8> %a0) {
; AVX512VL-NEXT: vpextrb $0, %xmm0, %eax
; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
; AVX512VL-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> %a0)
ret i8 %1
}
@@ -1822,7 +1822,7 @@ define i8 @test_v4i8(<4 x i8> %a0) {
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> %a0)
ret i8 %1
}
@@ -1908,7 +1908,7 @@ define i8 @test_v8i8(<8 x i8> %a0) {
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> %a0)
ret i8 %1
}
@@ -1978,7 +1978,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX512-NEXT: xorb $127, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %a0)
ret i8 %1
}
@@ -2074,7 +2074,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> %a0)
ret i8 %1
}
@@ -2188,7 +2188,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> %a0)
ret i8 %1
}
@@ -2335,32 +2335,32 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> %a0)
ret i8 %1
}
-declare i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64>)
-declare i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64>)
+declare i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64>)
+declare i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64>)
+declare i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64>)
+declare i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64>)
-declare i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32>)
+declare i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32>)
+declare i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32>)
+declare i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32>)
+declare i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32>)
+declare i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32>)
-declare i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16>)
-declare i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8>)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-smin-widen.ll b/llvm/test/CodeGen/X86/vector-reduce-smin-widen.ll
index aacf799bbed..52e3b94036f 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-smin-widen.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-smin-widen.ll
@@ -74,7 +74,7 @@ define i64 @test_v2i64(<2 x i64> %a0) {
; AVX512VL-NEXT: vpminsq %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> %a0)
ret i64 %1
}
@@ -189,7 +189,7 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> %a0)
ret i64 %1
}
@@ -369,7 +369,7 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> %a0)
ret i64 %1
}
@@ -673,7 +673,7 @@ define i64 @test_v16i64(<16 x i64> %a0) {
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> %a0)
ret i64 %1
}
@@ -713,7 +713,7 @@ define i32 @test_v2i32(<2 x i32> %a0) {
; AVX512-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> %a0)
ret i32 %1
}
@@ -761,7 +761,7 @@ define i32 @test_v4i32(<4 x i32> %a0) {
; AVX512-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> %a0)
ret i32 %1
}
@@ -833,7 +833,7 @@ define i32 @test_v8i32(<8 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> %a0)
ret i32 %1
}
@@ -923,7 +923,7 @@ define i32 @test_v16i32(<16 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> %a0)
ret i32 %1
}
@@ -1046,7 +1046,7 @@ define i32 @test_v32i32(<32 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> %a0)
ret i32 %1
}
@@ -1079,7 +1079,7 @@ define i16 @test_v2i16(<2 x i16> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.smin.i16.v2i16(<2 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> %a0)
ret i16 %1
}
@@ -1114,7 +1114,7 @@ define i16 @test_v4i16(<4 x i16> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> %a0)
ret i16 %1
}
@@ -1158,7 +1158,7 @@ define i16 @test_v8i16(<8 x i16> %a0) {
; AVX512-NEXT: xorl $32768, %eax # imm = 0x8000
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> %a0)
ret i16 %1
}
@@ -1222,7 +1222,7 @@ define i16 @test_v16i16(<16 x i16> %a0) {
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> %a0)
ret i16 %1
}
@@ -1296,7 +1296,7 @@ define i16 @test_v32i16(<32 x i16> %a0) {
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> %a0)
ret i16 %1
}
@@ -1387,7 +1387,7 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> %a0)
ret i16 %1
}
@@ -1433,7 +1433,7 @@ define i8 @test_v2i8(<2 x i8> %a0) {
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> %a0)
ret i8 %1
}
@@ -1489,7 +1489,7 @@ define i8 @test_v4i8(<4 x i8> %a0) {
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> %a0)
ret i8 %1
}
@@ -1557,7 +1557,7 @@ define i8 @test_v8i8(<8 x i8> %a0) {
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> %a0)
ret i8 %1
}
@@ -1627,7 +1627,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX512-NEXT: xorb $-128, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> %a0)
ret i8 %1
}
@@ -1723,7 +1723,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> %a0)
ret i8 %1
}
@@ -1837,7 +1837,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> %a0)
ret i8 %1
}
@@ -1984,32 +1984,32 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> %a0)
ret i8 %1
}
-declare i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64>)
-declare i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64>)
+declare i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64>)
+declare i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64>)
+declare i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64>)
+declare i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64>)
-declare i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32>)
+declare i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32>)
+declare i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32>)
+declare i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32>)
+declare i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32>)
+declare i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32>)
-declare i16 @llvm.experimental.vector.reduce.smin.i16.v2i16(<2 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16>)
-declare i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8>)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-smin.ll b/llvm/test/CodeGen/X86/vector-reduce-smin.ll
index 4f9ad7b1e05..9625f4c8579 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-smin.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-smin.ll
@@ -74,7 +74,7 @@ define i64 @test_v2i64(<2 x i64> %a0) {
; AVX512VL-NEXT: vpminsq %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> %a0)
ret i64 %1
}
@@ -189,7 +189,7 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> %a0)
ret i64 %1
}
@@ -369,7 +369,7 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> %a0)
ret i64 %1
}
@@ -673,7 +673,7 @@ define i64 @test_v16i64(<16 x i64> %a0) {
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> %a0)
ret i64 %1
}
@@ -787,7 +787,7 @@ define i32 @test_v2i32(<2 x i32> %a0) {
; AVX512VL-NEXT: vpminsq %xmm0, %xmm1, %xmm0
; AVX512VL-NEXT: vmovd %xmm0, %eax
; AVX512VL-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> %a0)
ret i32 %1
}
@@ -835,7 +835,7 @@ define i32 @test_v4i32(<4 x i32> %a0) {
; AVX512-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> %a0)
ret i32 %1
}
@@ -907,7 +907,7 @@ define i32 @test_v8i32(<8 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> %a0)
ret i32 %1
}
@@ -997,7 +997,7 @@ define i32 @test_v16i32(<16 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> %a0)
ret i32 %1
}
@@ -1120,7 +1120,7 @@ define i32 @test_v32i32(<32 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> %a0)
ret i32 %1
}
@@ -1258,7 +1258,7 @@ define i16 @test_v2i16(<2 x i16> %a0) {
; AVX512VL-NEXT: vmovd %xmm0, %eax
; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512VL-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.smin.i16.v2i16(<2 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> %a0)
ret i16 %1
}
@@ -1334,7 +1334,7 @@ define i16 @test_v4i16(<4 x i16> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> %a0)
ret i16 %1
}
@@ -1378,7 +1378,7 @@ define i16 @test_v8i16(<8 x i16> %a0) {
; AVX512-NEXT: xorl $32768, %eax # imm = 0x8000
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> %a0)
ret i16 %1
}
@@ -1442,7 +1442,7 @@ define i16 @test_v16i16(<16 x i16> %a0) {
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> %a0)
ret i16 %1
}
@@ -1516,7 +1516,7 @@ define i16 @test_v32i16(<32 x i16> %a0) {
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> %a0)
ret i16 %1
}
@@ -1607,7 +1607,7 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> %a0)
ret i16 %1
}
@@ -1745,7 +1745,7 @@ define i8 @test_v2i8(<2 x i8> %a0) {
; AVX512VL-NEXT: vpextrb $0, %xmm0, %eax
; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
; AVX512VL-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> %a0)
ret i8 %1
}
@@ -1821,7 +1821,7 @@ define i8 @test_v4i8(<4 x i8> %a0) {
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> %a0)
ret i8 %1
}
@@ -1907,7 +1907,7 @@ define i8 @test_v8i8(<8 x i8> %a0) {
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> %a0)
ret i8 %1
}
@@ -1977,7 +1977,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX512-NEXT: xorb $-128, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> %a0)
ret i8 %1
}
@@ -2073,7 +2073,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> %a0)
ret i8 %1
}
@@ -2187,7 +2187,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> %a0)
ret i8 %1
}
@@ -2334,32 +2334,32 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> %a0)
ret i8 %1
}
-declare i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64>)
-declare i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64>)
+declare i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64>)
+declare i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64>)
+declare i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64>)
+declare i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64>)
-declare i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32>)
+declare i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32>)
+declare i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32>)
+declare i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32>)
+declare i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32>)
+declare i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32>)
-declare i16 @llvm.experimental.vector.reduce.smin.i16.v2i16(<2 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16>)
-declare i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16>)
-declare i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8>)
-declare i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8>)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-umax-widen.ll b/llvm/test/CodeGen/X86/vector-reduce-umax-widen.ll
index 4290994e740..7b3d4b4c1ef 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-umax-widen.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-umax-widen.ll
@@ -77,7 +77,7 @@ define i64 @test_v2i64(<2 x i64> %a0) {
; AVX512VL-NEXT: vpmaxuq %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> %a0)
ret i64 %1
}
@@ -203,7 +203,7 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> %a0)
ret i64 %1
}
@@ -399,7 +399,7 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> %a0)
ret i64 %1
}
@@ -731,7 +731,7 @@ define i64 @test_v16i64(<16 x i64> %a0) {
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> %a0)
ret i64 %1
}
@@ -774,7 +774,7 @@ define i32 @test_v2i32(<2 x i32> %a0) {
; AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> %a0)
ret i32 %1
}
@@ -828,7 +828,7 @@ define i32 @test_v4i32(<4 x i32> %a0) {
; AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> %a0)
ret i32 %1
}
@@ -909,7 +909,7 @@ define i32 @test_v8i32(<8 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> %a0)
ret i32 %1
}
@@ -1014,7 +1014,7 @@ define i32 @test_v16i32(<16 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> %a0)
ret i32 %1
}
@@ -1164,7 +1164,7 @@ define i32 @test_v32i32(<32 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> %a0)
ret i32 %1
}
@@ -1210,7 +1210,7 @@ define i16 @test_v2i16(<2 x i16> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> %a0)
ret i16 %1
}
@@ -1262,7 +1262,7 @@ define i16 @test_v4i16(<4 x i16> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> %a0)
ret i16 %1
}
@@ -1328,7 +1328,7 @@ define i16 @test_v8i16(<8 x i16> %a0) {
; AVX512VL-NEXT: notl %eax
; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512VL-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %a0)
ret i16 %1
}
@@ -1419,7 +1419,7 @@ define i16 @test_v16i16(<16 x i16> %a0) {
; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> %a0)
ret i16 %1
}
@@ -1524,7 +1524,7 @@ define i16 @test_v32i16(<32 x i16> %a0) {
; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> %a0)
ret i16 %1
}
@@ -1651,7 +1651,7 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> %a0)
ret i16 %1
}
@@ -1693,7 +1693,7 @@ define i8 @test_v2i8(<2 x i8> %a0) {
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> %a0)
ret i8 %1
}
@@ -1741,7 +1741,7 @@ define i8 @test_v4i8(<4 x i8> %a0) {
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> %a0)
ret i8 %1
}
@@ -1797,7 +1797,7 @@ define i8 @test_v8i8(<8 x i8> %a0) {
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> %a0)
ret i8 %1
}
@@ -1866,7 +1866,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX512VL-NEXT: notb %al
; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
; AVX512VL-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> %a0)
ret i8 %1
}
@@ -1959,7 +1959,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> %a0)
ret i8 %1
}
@@ -2064,7 +2064,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> %a0)
ret i8 %1
}
@@ -2187,32 +2187,32 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> %a0)
ret i8 %1
}
-declare i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64>)
-declare i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64>)
+declare i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64>)
+declare i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64>)
+declare i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64>)
+declare i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64>)
-declare i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32>)
+declare i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32>)
+declare i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32>)
+declare i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32>)
+declare i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32>)
+declare i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32>)
-declare i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16>)
-declare i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8>)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-umax.ll b/llvm/test/CodeGen/X86/vector-reduce-umax.ll
index db27adf0ebb..f92153e3fa8 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-umax.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-umax.ll
@@ -77,7 +77,7 @@ define i64 @test_v2i64(<2 x i64> %a0) {
; AVX512VL-NEXT: vpmaxuq %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> %a0)
ret i64 %1
}
@@ -203,7 +203,7 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> %a0)
ret i64 %1
}
@@ -399,7 +399,7 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> %a0)
ret i64 %1
}
@@ -731,7 +731,7 @@ define i64 @test_v16i64(<16 x i64> %a0) {
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> %a0)
ret i64 %1
}
@@ -823,7 +823,7 @@ define i32 @test_v2i32(<2 x i32> %a0) {
; AVX512VL-NEXT: vpmaxuq %xmm0, %xmm1, %xmm0
; AVX512VL-NEXT: vmovd %xmm0, %eax
; AVX512VL-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> %a0)
ret i32 %1
}
@@ -877,7 +877,7 @@ define i32 @test_v4i32(<4 x i32> %a0) {
; AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> %a0)
ret i32 %1
}
@@ -958,7 +958,7 @@ define i32 @test_v8i32(<8 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> %a0)
ret i32 %1
}
@@ -1063,7 +1063,7 @@ define i32 @test_v16i32(<16 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> %a0)
ret i32 %1
}
@@ -1213,7 +1213,7 @@ define i32 @test_v32i32(<32 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> %a0)
ret i32 %1
}
@@ -1303,7 +1303,7 @@ define i16 @test_v2i16(<2 x i16> %a0) {
; AVX512VL-NEXT: vmovd %xmm0, %eax
; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512VL-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> %a0)
ret i16 %1
}
@@ -1377,7 +1377,7 @@ define i16 @test_v4i16(<4 x i16> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> %a0)
ret i16 %1
}
@@ -1443,7 +1443,7 @@ define i16 @test_v8i16(<8 x i16> %a0) {
; AVX512VL-NEXT: notl %eax
; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512VL-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %a0)
ret i16 %1
}
@@ -1534,7 +1534,7 @@ define i16 @test_v16i16(<16 x i16> %a0) {
; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> %a0)
ret i16 %1
}
@@ -1639,7 +1639,7 @@ define i16 @test_v32i16(<32 x i16> %a0) {
; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> %a0)
ret i16 %1
}
@@ -1766,7 +1766,7 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> %a0)
ret i16 %1
}
@@ -1854,7 +1854,7 @@ define i8 @test_v2i8(<2 x i8> %a0) {
; AVX512VL-NEXT: vpextrb $0, %xmm0, %eax
; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
; AVX512VL-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> %a0)
ret i8 %1
}
@@ -1941,7 +1941,7 @@ define i8 @test_v4i8(<4 x i8> %a0) {
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> %a0)
ret i8 %1
}
@@ -2037,7 +2037,7 @@ define i8 @test_v8i8(<8 x i8> %a0) {
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> %a0)
ret i8 %1
}
@@ -2106,7 +2106,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX512VL-NEXT: notb %al
; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
; AVX512VL-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> %a0)
ret i8 %1
}
@@ -2199,7 +2199,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> %a0)
ret i8 %1
}
@@ -2304,7 +2304,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> %a0)
ret i8 %1
}
@@ -2427,32 +2427,32 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> %a0)
ret i8 %1
}
-declare i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64>)
-declare i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64>)
+declare i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64>)
+declare i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64>)
+declare i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64>)
+declare i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64>)
-declare i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32>)
+declare i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32>)
+declare i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32>)
+declare i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32>)
+declare i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32>)
+declare i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32>)
-declare i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16>)
-declare i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8>)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-umin-widen.ll b/llvm/test/CodeGen/X86/vector-reduce-umin-widen.ll
index 5e606cd5db8..e5cbd1b73e5 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-umin-widen.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-umin-widen.ll
@@ -77,7 +77,7 @@ define i64 @test_v2i64(<2 x i64> %a0) {
; AVX512VL-NEXT: vpminuq %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> %a0)
ret i64 %1
}
@@ -202,7 +202,7 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> %a0)
ret i64 %1
}
@@ -398,7 +398,7 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> %a0)
ret i64 %1
}
@@ -730,7 +730,7 @@ define i64 @test_v16i64(<16 x i64> %a0) {
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> %a0)
ret i64 %1
}
@@ -773,7 +773,7 @@ define i32 @test_v2i32(<2 x i32> %a0) {
; AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> %a0)
ret i32 %1
}
@@ -827,7 +827,7 @@ define i32 @test_v4i32(<4 x i32> %a0) {
; AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> %a0)
ret i32 %1
}
@@ -908,7 +908,7 @@ define i32 @test_v8i32(<8 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> %a0)
ret i32 %1
}
@@ -1013,7 +1013,7 @@ define i32 @test_v16i32(<16 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> %a0)
ret i32 %1
}
@@ -1163,7 +1163,7 @@ define i32 @test_v32i32(<32 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> %a0)
ret i32 %1
}
@@ -1209,7 +1209,7 @@ define i16 @test_v2i16(<2 x i16> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> %a0)
ret i16 %1
}
@@ -1261,7 +1261,7 @@ define i16 @test_v4i16(<4 x i16> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> %a0)
ret i16 %1
}
@@ -1308,7 +1308,7 @@ define i16 @test_v8i16(<8 x i16> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> %a0)
ret i16 %1
}
@@ -1376,7 +1376,7 @@ define i16 @test_v16i16(<16 x i16> %a0) {
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> %a0)
ret i16 %1
}
@@ -1456,7 +1456,7 @@ define i16 @test_v32i16(<32 x i16> %a0) {
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> %a0)
ret i16 %1
}
@@ -1557,7 +1557,7 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> %a0)
ret i16 %1
}
@@ -1599,7 +1599,7 @@ define i8 @test_v2i8(<2 x i8> %a0) {
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> %a0)
ret i8 %1
}
@@ -1647,7 +1647,7 @@ define i8 @test_v4i8(<4 x i8> %a0) {
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> %a0)
ret i8 %1
}
@@ -1703,7 +1703,7 @@ define i8 @test_v8i8(<8 x i8> %a0) {
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> %a0)
ret i8 %1
}
@@ -1751,7 +1751,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> %a0)
ret i8 %1
}
@@ -1819,7 +1819,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> %a0)
ret i8 %1
}
@@ -1897,7 +1897,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> %a0)
ret i8 %1
}
@@ -1992,32 +1992,32 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> %a0)
ret i8 %1
}
-declare i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64>)
-declare i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64>)
+declare i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64>)
+declare i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64>)
+declare i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64>)
+declare i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64>)
-declare i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32>)
+declare i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32>)
+declare i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32>)
+declare i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32>)
+declare i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32>)
+declare i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32>)
-declare i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16>)
-declare i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8>)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-umin.ll b/llvm/test/CodeGen/X86/vector-reduce-umin.ll
index ad52accbf7d..258cf462342 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-umin.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-umin.ll
@@ -77,7 +77,7 @@ define i64 @test_v2i64(<2 x i64> %a0) {
; AVX512VL-NEXT: vpminuq %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> %a0)
ret i64 %1
}
@@ -202,7 +202,7 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> %a0)
ret i64 %1
}
@@ -398,7 +398,7 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> %a0)
ret i64 %1
}
@@ -730,7 +730,7 @@ define i64 @test_v16i64(<16 x i64> %a0) {
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> %a0)
ret i64 %1
}
@@ -822,7 +822,7 @@ define i32 @test_v2i32(<2 x i32> %a0) {
; AVX512VL-NEXT: vpminuq %xmm0, %xmm1, %xmm0
; AVX512VL-NEXT: vmovd %xmm0, %eax
; AVX512VL-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> %a0)
ret i32 %1
}
@@ -876,7 +876,7 @@ define i32 @test_v4i32(<4 x i32> %a0) {
; AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> %a0)
ret i32 %1
}
@@ -957,7 +957,7 @@ define i32 @test_v8i32(<8 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> %a0)
ret i32 %1
}
@@ -1062,7 +1062,7 @@ define i32 @test_v16i32(<16 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> %a0)
ret i32 %1
}
@@ -1212,7 +1212,7 @@ define i32 @test_v32i32(<32 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> %a0)
ret i32 %1
}
@@ -1302,7 +1302,7 @@ define i16 @test_v2i16(<2 x i16> %a0) {
; AVX512VL-NEXT: vmovd %xmm0, %eax
; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512VL-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> %a0)
ret i16 %1
}
@@ -1376,7 +1376,7 @@ define i16 @test_v4i16(<4 x i16> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> %a0)
ret i16 %1
}
@@ -1423,7 +1423,7 @@ define i16 @test_v8i16(<8 x i16> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> %a0)
ret i16 %1
}
@@ -1491,7 +1491,7 @@ define i16 @test_v16i16(<16 x i16> %a0) {
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> %a0)
ret i16 %1
}
@@ -1571,7 +1571,7 @@ define i16 @test_v32i16(<32 x i16> %a0) {
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> %a0)
ret i16 %1
}
@@ -1672,7 +1672,7 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> %a0)
ret i16 %1
}
@@ -1760,7 +1760,7 @@ define i8 @test_v2i8(<2 x i8> %a0) {
; AVX512VL-NEXT: vpextrb $0, %xmm0, %eax
; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
; AVX512VL-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> %a0)
ret i8 %1
}
@@ -1847,7 +1847,7 @@ define i8 @test_v4i8(<4 x i8> %a0) {
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> %a0)
ret i8 %1
}
@@ -1943,7 +1943,7 @@ define i8 @test_v8i8(<8 x i8> %a0) {
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> %a0)
ret i8 %1
}
@@ -1991,7 +1991,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> %a0)
ret i8 %1
}
@@ -2059,7 +2059,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> %a0)
ret i8 %1
}
@@ -2137,7 +2137,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> %a0)
ret i8 %1
}
@@ -2232,32 +2232,32 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> %a0)
ret i8 %1
}
-declare i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64>)
-declare i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64>)
+declare i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64>)
+declare i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64>)
+declare i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64>)
+declare i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64>)
-declare i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32>)
+declare i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32>)
+declare i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32>)
+declare i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32>)
+declare i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32>)
+declare i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32>)
-declare i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16>)
-declare i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16>)
-declare i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8>)
-declare i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8>)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-xor-widen.ll b/llvm/test/CodeGen/X86/vector-reduce-xor-widen.ll
index 7486a190f9f..7bd3db37c83 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-xor-widen.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-xor-widen.ll
@@ -24,7 +24,7 @@ define i64 @test_v2i64(<2 x i64> %a0) {
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovq %xmm0, %rax
; AVX-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64> %a0)
ret i64 %1
}
@@ -66,7 +66,7 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.xor.v4i64(<4 x i64> %a0)
ret i64 %1
}
@@ -114,7 +114,7 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.xor.v8i64(<8 x i64> %a0)
ret i64 %1
}
@@ -171,7 +171,7 @@ define i64 @test_v16i64(<16 x i64> %a0) {
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.xor.v16i64(<16 x i64> %a0)
ret i64 %1
}
@@ -193,7 +193,7 @@ define i32 @test_v2i32(<2 x i32> %a0) {
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.xor.v2i32(<2 x i32> %a0)
ret i32 %1
}
@@ -215,7 +215,7 @@ define i32 @test_v4i32(<4 x i32> %a0) {
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32> %a0)
ret i32 %1
}
@@ -265,7 +265,7 @@ define i32 @test_v8i32(<8 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32> %a0)
ret i32 %1
}
@@ -321,7 +321,7 @@ define i32 @test_v16i32(<16 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.xor.v16i32(<16 x i32> %a0)
ret i32 %1
}
@@ -386,7 +386,7 @@ define i32 @test_v32i32(<32 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.xor.v32i32(<32 x i32> %a0)
ret i32 %1
}
@@ -411,7 +411,7 @@ define i16 @test_v2i16(<2 x i16> %a0) {
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.xor.i16.v2i16(<2 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16> %a0)
ret i16 %1
}
@@ -436,7 +436,7 @@ define i16 @test_v4i16(<4 x i16> %a0) {
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16> %a0)
ret i16 %1
}
@@ -465,7 +465,7 @@ define i16 @test_v8i16(<8 x i16> %a0) {
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16> %a0)
ret i16 %1
}
@@ -528,7 +528,7 @@ define i16 @test_v16i16(<16 x i16> %a0) {
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16> %a0)
ret i16 %1
}
@@ -597,7 +597,7 @@ define i16 @test_v32i16(<32 x i16> %a0) {
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16> %a0)
ret i16 %1
}
@@ -675,7 +675,7 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.xor.v64i16(<64 x i16> %a0)
ret i16 %1
}
@@ -709,7 +709,7 @@ define i8 @test_v2i8(<2 x i8> %a0) {
; AVX-NEXT: vpextrb $0, %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.xor.i8.v2i8(<2 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8> %a0)
ret i8 %1
}
@@ -747,7 +747,7 @@ define i8 @test_v4i8(<4 x i8> %a0) {
; AVX-NEXT: vpextrb $0, %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.xor.i8.v4i8(<4 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8> %a0)
ret i8 %1
}
@@ -791,7 +791,7 @@ define i8 @test_v8i8(<8 x i8> %a0) {
; AVX-NEXT: vpextrb $0, %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8> %a0)
ret i8 %1
}
@@ -841,7 +841,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX-NEXT: vpextrb $0, %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8> %a0)
ret i8 %1
}
@@ -930,7 +930,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8> %a0)
ret i8 %1
}
@@ -1027,7 +1027,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8> %a0)
ret i8 %1
}
@@ -1137,32 +1137,32 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.xor.v128i8(<128 x i8> %a0)
ret i8 %1
}
-declare i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64>)
-declare i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64>)
+declare i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64>)
+declare i64 @llvm.experimental.vector.reduce.xor.v4i64(<4 x i64>)
+declare i64 @llvm.experimental.vector.reduce.xor.v8i64(<8 x i64>)
+declare i64 @llvm.experimental.vector.reduce.xor.v16i64(<16 x i64>)
-declare i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32>)
+declare i32 @llvm.experimental.vector.reduce.xor.v2i32(<2 x i32>)
+declare i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32>)
+declare i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32>)
+declare i32 @llvm.experimental.vector.reduce.xor.v16i32(<16 x i32>)
+declare i32 @llvm.experimental.vector.reduce.xor.v32i32(<32 x i32>)
-declare i16 @llvm.experimental.vector.reduce.xor.i16.v2i16(<2 x i16>)
-declare i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16>)
-declare i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16>)
+declare i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16>)
+declare i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16>)
+declare i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16>)
+declare i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16>)
+declare i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16>)
+declare i16 @llvm.experimental.vector.reduce.xor.v64i16(<64 x i16>)
-declare i8 @llvm.experimental.vector.reduce.xor.i8.v2i8(<2 x i8>)
-declare i8 @llvm.experimental.vector.reduce.xor.i8.v4i8(<4 x i8>)
-declare i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8>)
-declare i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8>)
+declare i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8>)
+declare i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8>)
+declare i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8>)
+declare i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8>)
+declare i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8>)
+declare i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8>)
+declare i8 @llvm.experimental.vector.reduce.xor.v128i8(<128 x i8>)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-xor.ll b/llvm/test/CodeGen/X86/vector-reduce-xor.ll
index 85eafac30ef..7d590cd80d1 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-xor.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-xor.ll
@@ -24,7 +24,7 @@ define i64 @test_v2i64(<2 x i64> %a0) {
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovq %xmm0, %rax
; AVX-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64> %a0)
ret i64 %1
}
@@ -66,7 +66,7 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.xor.v4i64(<4 x i64> %a0)
ret i64 %1
}
@@ -114,7 +114,7 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.xor.v8i64(<8 x i64> %a0)
ret i64 %1
}
@@ -171,7 +171,7 @@ define i64 @test_v16i64(<16 x i64> %a0) {
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64> %a0)
+ %1 = call i64 @llvm.experimental.vector.reduce.xor.v16i64(<16 x i64> %a0)
ret i64 %1
}
@@ -193,7 +193,7 @@ define i32 @test_v2i32(<2 x i32> %a0) {
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.xor.v2i32(<2 x i32> %a0)
ret i32 %1
}
@@ -215,7 +215,7 @@ define i32 @test_v4i32(<4 x i32> %a0) {
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32> %a0)
ret i32 %1
}
@@ -265,7 +265,7 @@ define i32 @test_v8i32(<8 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32> %a0)
ret i32 %1
}
@@ -321,7 +321,7 @@ define i32 @test_v16i32(<16 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.xor.v16i32(<16 x i32> %a0)
ret i32 %1
}
@@ -386,7 +386,7 @@ define i32 @test_v32i32(<32 x i32> %a0) {
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32> %a0)
+ %1 = call i32 @llvm.experimental.vector.reduce.xor.v32i32(<32 x i32> %a0)
ret i32 %1
}
@@ -410,7 +410,7 @@ define i16 @test_v2i16(<2 x i16> %a0) {
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.xor.i16.v2i16(<2 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16> %a0)
ret i16 %1
}
@@ -434,7 +434,7 @@ define i16 @test_v4i16(<4 x i16> %a0) {
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16> %a0)
ret i16 %1
}
@@ -463,7 +463,7 @@ define i16 @test_v8i16(<8 x i16> %a0) {
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16> %a0)
ret i16 %1
}
@@ -526,7 +526,7 @@ define i16 @test_v16i16(<16 x i16> %a0) {
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16> %a0)
ret i16 %1
}
@@ -595,7 +595,7 @@ define i16 @test_v32i16(<32 x i16> %a0) {
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16> %a0)
ret i16 %1
}
@@ -673,7 +673,7 @@ define i16 @test_v64i16(<64 x i16> %a0) {
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16> %a0)
+ %1 = call i16 @llvm.experimental.vector.reduce.xor.v64i16(<64 x i16> %a0)
ret i16 %1
}
@@ -705,7 +705,7 @@ define i8 @test_v2i8(<2 x i8> %a0) {
; AVX-NEXT: vpextrb $0, %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.xor.i8.v2i8(<2 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8> %a0)
ret i8 %1
}
@@ -739,7 +739,7 @@ define i8 @test_v4i8(<4 x i8> %a0) {
; AVX-NEXT: vpextrb $0, %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.xor.i8.v4i8(<4 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8> %a0)
ret i8 %1
}
@@ -781,7 +781,7 @@ define i8 @test_v8i8(<8 x i8> %a0) {
; AVX-NEXT: vpextrb $0, %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8> %a0)
ret i8 %1
}
@@ -831,7 +831,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
; AVX-NEXT: vpextrb $0, %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8> %a0)
ret i8 %1
}
@@ -920,7 +920,7 @@ define i8 @test_v32i8(<32 x i8> %a0) {
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8> %a0)
ret i8 %1
}
@@ -1017,7 +1017,7 @@ define i8 @test_v64i8(<64 x i8> %a0) {
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8> %a0)
ret i8 %1
}
@@ -1127,32 +1127,32 @@ define i8 @test_v128i8(<128 x i8> %a0) {
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8> %a0)
+ %1 = call i8 @llvm.experimental.vector.reduce.xor.v128i8(<128 x i8> %a0)
ret i8 %1
}
-declare i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64>)
-declare i64 @llvm.experimental.vector.reduce.xor.i64.v4i64(<4 x i64>)
-declare i64 @llvm.experimental.vector.reduce.xor.i64.v8i64(<8 x i64>)
-declare i64 @llvm.experimental.vector.reduce.xor.i64.v16i64(<16 x i64>)
+declare i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64>)
+declare i64 @llvm.experimental.vector.reduce.xor.v4i64(<4 x i64>)
+declare i64 @llvm.experimental.vector.reduce.xor.v8i64(<8 x i64>)
+declare i64 @llvm.experimental.vector.reduce.xor.v16i64(<16 x i64>)
-declare i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32>)
-declare i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32>)
-declare i32 @llvm.experimental.vector.reduce.xor.i32.v8i32(<8 x i32>)
-declare i32 @llvm.experimental.vector.reduce.xor.i32.v16i32(<16 x i32>)
-declare i32 @llvm.experimental.vector.reduce.xor.i32.v32i32(<32 x i32>)
+declare i32 @llvm.experimental.vector.reduce.xor.v2i32(<2 x i32>)
+declare i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32>)
+declare i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32>)
+declare i32 @llvm.experimental.vector.reduce.xor.v16i32(<16 x i32>)
+declare i32 @llvm.experimental.vector.reduce.xor.v32i32(<32 x i32>)
-declare i16 @llvm.experimental.vector.reduce.xor.i16.v2i16(<2 x i16>)
-declare i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16>)
-declare i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16>)
-declare i16 @llvm.experimental.vector.reduce.xor.i16.v16i16(<16 x i16>)
-declare i16 @llvm.experimental.vector.reduce.xor.i16.v32i16(<32 x i16>)
-declare i16 @llvm.experimental.vector.reduce.xor.i16.v64i16(<64 x i16>)
+declare i16 @llvm.experimental.vector.reduce.xor.v2i16(<2 x i16>)
+declare i16 @llvm.experimental.vector.reduce.xor.v4i16(<4 x i16>)
+declare i16 @llvm.experimental.vector.reduce.xor.v8i16(<8 x i16>)
+declare i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16>)
+declare i16 @llvm.experimental.vector.reduce.xor.v32i16(<32 x i16>)
+declare i16 @llvm.experimental.vector.reduce.xor.v64i16(<64 x i16>)
-declare i8 @llvm.experimental.vector.reduce.xor.i8.v2i8(<2 x i8>)
-declare i8 @llvm.experimental.vector.reduce.xor.i8.v4i8(<4 x i8>)
-declare i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8>)
-declare i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8>)
-declare i8 @llvm.experimental.vector.reduce.xor.i8.v32i8(<32 x i8>)
-declare i8 @llvm.experimental.vector.reduce.xor.i8.v64i8(<64 x i8>)
-declare i8 @llvm.experimental.vector.reduce.xor.i8.v128i8(<128 x i8>)
+declare i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8>)
+declare i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8>)
+declare i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8>)
+declare i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8>)
+declare i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8>)
+declare i8 @llvm.experimental.vector.reduce.xor.v64i8(<64 x i8>)
+declare i8 @llvm.experimental.vector.reduce.xor.v128i8(<128 x i8>)
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll
index 9d9aea00e9a..50449d76b82 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll
@@ -20,7 +20,7 @@ target triple = "aarch64--linux-gnu"
; CHECK: add <16 x i8>
;
; CHECK: middle.block:
-; CHECK: [[Rdx:%[a-zA-Z0-9.]+]] = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8>
+; CHECK: [[Rdx:%[a-zA-Z0-9.]+]] = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8>
; CHECK: zext i8 [[Rdx]] to i32
;
define i8 @reduction_i8(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %n) {
@@ -75,7 +75,7 @@ for.body:
; CHECK: add <8 x i16>
;
; CHECK: middle.block:
-; CHECK: [[Rdx:%[a-zA-Z0-9.]+]] = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16>
+; CHECK: [[Rdx:%[a-zA-Z0-9.]+]] = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16>
; CHECK: zext i16 [[Rdx]] to i32
;
define i16 @reduction_i16_1(i16* nocapture readonly %a, i16* nocapture readonly %b, i32 %n) {
@@ -132,7 +132,7 @@ for.body:
; CHECK: add <8 x i16>
;
; CHECK: middle.block:
-; CHECK: [[Rdx:%[a-zA-Z0-9.]+]] = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16>
+; CHECK: [[Rdx:%[a-zA-Z0-9.]+]] = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16>
; CHECK: zext i16 [[Rdx]] to i32
;
define i16 @reduction_i16_2(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %n) {
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll
index 14a6d0eb72c..2bc5469a7d8 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll
@@ -23,7 +23,7 @@ define internal i32 @gather_multiple_use(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-NEXT: [[TMP7:%.*]] = mul nuw <4 x i32> [[TMP6]], <i32 65535, i32 65535, i32 65535, i32 65535>
; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[TMP7]], [[TMP4]]
; CHECK-NEXT: [[TMP9:%.*]] = xor <4 x i32> [[TMP8]], [[TMP7]]
-; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> [[TMP9]])
+; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP9]])
; CHECK-NEXT: ret i32 [[TMP10]]
;
%tmp00 = lshr i32 %a, 15
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll
index a142d4e0355..5e6db8b4836 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll
@@ -24,7 +24,7 @@ define void @PR28330(i32 %n) {
; DEFAULT-NEXT: [[P28:%.*]] = add i32 [[P26]], undef
; DEFAULT-NEXT: [[P30:%.*]] = add i32 [[P28]], undef
; DEFAULT-NEXT: [[P32:%.*]] = add i32 [[P30]], undef
-; DEFAULT-NEXT: [[TMP3:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> [[TMP2]])
+; DEFAULT-NEXT: [[TMP3:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP2]])
; DEFAULT-NEXT: [[OP_EXTRA]] = add i32 [[TMP3]], [[P17]]
; DEFAULT-NEXT: [[P34:%.*]] = add i32 [[P32]], undef
; DEFAULT-NEXT: br label [[FOR_BODY]]
@@ -76,7 +76,7 @@ define void @PR28330(i32 %n) {
; GATHER-NEXT: [[TMP32:%.*]] = insertelement <8 x i32> [[TMP31]], i32 [[TMP25]], i32 6
; GATHER-NEXT: [[TMP33:%.*]] = extractelement <8 x i32> [[TMP18]], i32 7
; GATHER-NEXT: [[TMP34:%.*]] = insertelement <8 x i32> [[TMP32]], i32 [[TMP33]], i32 7
-; GATHER-NEXT: [[TMP35:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> [[TMP34]])
+; GATHER-NEXT: [[TMP35:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP34]])
; GATHER-NEXT: [[OP_EXTRA]] = add i32 [[TMP35]], [[P17]]
; GATHER-NEXT: [[P34:%.*]] = add i32 [[P32]], [[TMP33]]
; GATHER-NEXT: br label [[FOR_BODY]]
@@ -176,7 +176,7 @@ define void @PR32038(i32 %n) {
; DEFAULT-NEXT: [[P28:%.*]] = add i32 [[P26]], undef
; DEFAULT-NEXT: [[P30:%.*]] = add i32 [[P28]], undef
; DEFAULT-NEXT: [[P32:%.*]] = add i32 [[P30]], undef
-; DEFAULT-NEXT: [[TMP3:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> [[TMP2]])
+; DEFAULT-NEXT: [[TMP3:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP2]])
; DEFAULT-NEXT: [[OP_EXTRA]] = add i32 [[TMP3]], -5
; DEFAULT-NEXT: [[P34:%.*]] = add i32 [[P32]], undef
; DEFAULT-NEXT: br label [[FOR_BODY]]
@@ -228,7 +228,7 @@ define void @PR32038(i32 %n) {
; GATHER-NEXT: [[TMP32:%.*]] = insertelement <8 x i32> [[TMP31]], i32 [[TMP25]], i32 6
; GATHER-NEXT: [[TMP33:%.*]] = extractelement <8 x i32> [[TMP18]], i32 7
; GATHER-NEXT: [[TMP34:%.*]] = insertelement <8 x i32> [[TMP32]], i32 [[TMP33]], i32 7
-; GATHER-NEXT: [[TMP35:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> [[TMP34]])
+; GATHER-NEXT: [[TMP35:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP34]])
; GATHER-NEXT: [[OP_EXTRA]] = add i32 [[TMP35]], -5
; GATHER-NEXT: [[P34:%.*]] = add i32 [[P32]], [[TMP33]]
; GATHER-NEXT: br label [[FOR_BODY]]
@@ -266,7 +266,7 @@ define void @PR32038(i32 %n) {
; MAX-COST-NEXT: [[P27:%.*]] = select i1 [[P9]], i32 -720, i32 -80
; MAX-COST-NEXT: [[P28:%.*]] = add i32 [[P26]], [[P27]]
; MAX-COST-NEXT: [[P29:%.*]] = select i1 [[P11]], i32 -720, i32 -80
-; MAX-COST-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> [[TMP8]])
+; MAX-COST-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP8]])
; MAX-COST-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], [[P27]]
; MAX-COST-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], [[P29]]
; MAX-COST-NEXT: [[OP_EXTRA:%.*]] = add i32 [[TMP11]], -5
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/horizontal.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/horizontal.ll
index c4a584fcbf3..7e4b95d7234 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/horizontal.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/horizontal.ll
@@ -49,7 +49,7 @@ define i32 @test_select(i32* noalias nocapture readonly %blk1, i32* noalias noca
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 undef, [[S_026]]
; CHECK-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD]], undef
; CHECK-NEXT: [[ADD19:%.*]] = add nsw i32 [[ADD11]], undef
-; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> [[TMP7]])
+; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP7]])
; CHECK-NEXT: [[OP_EXTRA]] = add nsw i32 [[TMP8]], [[S_026]]
; CHECK-NEXT: [[ADD27:%.*]] = add nsw i32 [[ADD19]], undef
; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds i32, i32* [[P1_023]], i64 [[IDX_EXT]]
@@ -176,7 +176,7 @@ define i32 @reduction_with_br(i32* noalias nocapture readonly %blk1, i32* noalia
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 undef, [[S_020]]
; CHECK-NEXT: [[ADD5:%.*]] = add nsw i32 [[ADD]], undef
; CHECK-NEXT: [[ADD9:%.*]] = add nsw i32 [[ADD5]], undef
-; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> [[TMP4]])
+; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP4]])
; CHECK-NEXT: [[OP_EXTRA]] = add nsw i32 [[TMP5]], [[S_020]]
; CHECK-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD9]], undef
; CHECK-NEXT: [[CMP14:%.*]] = icmp slt i32 [[OP_EXTRA]], [[LIM:%.*]]
@@ -300,7 +300,7 @@ define i32 @test_unrolled_select(i8* noalias nocapture readonly %blk1, i8* noali
; CHECK-NEXT: [[ADD49:%.*]] = add nsw i32 [[ADD38]], undef
; CHECK-NEXT: [[ADD60:%.*]] = add nsw i32 [[ADD49]], undef
; CHECK-NEXT: [[ADD71:%.*]] = add nsw i32 [[ADD60]], undef
-; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> [[TMP9]])
+; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP9]])
; CHECK-NEXT: [[OP_EXTRA]] = add nsw i32 [[TMP10]], [[S_047]]
; CHECK-NEXT: [[ADD82:%.*]] = add nsw i32 [[ADD71]], undef
; CHECK-NEXT: [[CMP83:%.*]] = icmp slt i32 [[OP_EXTRA]], [[LIM:%.*]]
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll
index 9e9f40825ef..e4842af52b1 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll
@@ -260,7 +260,7 @@ define i32 @reduction_v4i32(<4 x i32> %v0, <4 x i32> %v1) {
; CHECK-NEXT: [[TMP14:%.*]] = mul nuw <4 x i32> [[TMP13]], <i32 65535, i32 65535, i32 65535, i32 65535>
; CHECK-NEXT: [[TMP15:%.*]] = add <4 x i32> [[TMP14]], [[TMP11]]
; CHECK-NEXT: [[TMP16:%.*]] = xor <4 x i32> [[TMP15]], [[TMP14]]
-; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> [[TMP16]])
+; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP16]])
; CHECK-NEXT: ret i32 [[TMP17]]
;
%v0.0 = extractelement <4 x i32> %v0, i32 0
diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp
index ee403be0333..ab2f2d8c5dc 100644
--- a/llvm/utils/TableGen/IntrinsicEmitter.cpp
+++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp
@@ -219,7 +219,8 @@ enum IIT_Info {
IIT_STRUCT6 = 38,
IIT_STRUCT7 = 39,
IIT_STRUCT8 = 40,
- IIT_F128 = 41
+ IIT_F128 = 41,
+ IIT_VEC_ELEMENT = 42
};
static void EncodeFixedValueType(MVT::SimpleValueType VT,
@@ -289,6 +290,8 @@ static void EncodeFixedType(Record *R, std::vector<unsigned char> &ArgCodes,
return;
} else if (R->isSubClassOf("LLVMPointerToElt"))
Sig.push_back(IIT_PTR_TO_ELT);
+ else if (R->isSubClassOf("LLVMVectorElementType"))
+ Sig.push_back(IIT_VEC_ELEMENT);
else
Sig.push_back(IIT_ARG);
return Sig.push_back((Number << 3) | 7 /*IITDescriptor::AK_MatchType*/);
OpenPOWER on IntegriCloud