summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/include/llvm/IR/Intrinsics.td39
-rw-r--r--llvm/lib/IR/Function.cpp25
-rw-r--r--llvm/test/Analysis/CostModel/X86/arith-overflow.ll414
-rw-r--r--llvm/utils/TableGen/CodeGenTarget.cpp2
-rw-r--r--llvm/utils/TableGen/IntrinsicEmitter.cpp2
5 files changed, 455 insertions, 27 deletions
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 6123a782c31..5f256275b87 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -156,10 +156,15 @@ class LLVMMatchType<int num>
// the intrinsic is overloaded, so the matched type should be declared as iAny.
class LLVMExtendedType<int num> : LLVMMatchType<num>;
class LLVMTruncatedType<int num> : LLVMMatchType<num>;
-class LLVMVectorSameWidth<int num, LLVMType elty>
- : LLVMMatchType<num> {
+
+// Match the scalar/vector of another intrinsic parameter but with a different
+// element type. Either both are scalars or both are vectors with the same
+// number of elements.
+class LLVMScalarOrSameVectorWidth<int idx, LLVMType elty>
+ : LLVMMatchType<idx> {
ValueType ElTy = elty.VT;
}
+
class LLVMPointerTo<int num> : LLVMMatchType<num>;
class LLVMPointerToElt<int num> : LLVMMatchType<num>;
class LLVMVectorOfAnyPointersToElt<int num> : LLVMMatchType<num>;
@@ -796,24 +801,30 @@ def int_adjust_trampoline : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty],
//
// Expose the carry flag from add operations on two integrals.
-def int_sadd_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
+def int_sadd_with_overflow : Intrinsic<[llvm_anyint_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
[LLVMMatchType<0>, LLVMMatchType<0>],
[IntrNoMem, IntrSpeculatable]>;
-def int_uadd_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
+def int_uadd_with_overflow : Intrinsic<[llvm_anyint_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
[LLVMMatchType<0>, LLVMMatchType<0>],
[IntrNoMem, IntrSpeculatable]>;
-def int_ssub_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
+def int_ssub_with_overflow : Intrinsic<[llvm_anyint_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
[LLVMMatchType<0>, LLVMMatchType<0>],
[IntrNoMem, IntrSpeculatable]>;
-def int_usub_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
+def int_usub_with_overflow : Intrinsic<[llvm_anyint_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
[LLVMMatchType<0>, LLVMMatchType<0>],
[IntrNoMem, IntrSpeculatable]>;
-def int_smul_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
+def int_smul_with_overflow : Intrinsic<[llvm_anyint_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
[LLVMMatchType<0>, LLVMMatchType<0>],
[IntrNoMem, IntrSpeculatable]>;
-def int_umul_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
+def int_umul_with_overflow : Intrinsic<[llvm_anyint_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
[LLVMMatchType<0>, LLVMMatchType<0>],
[IntrNoMem, IntrSpeculatable]>;
@@ -1001,35 +1012,35 @@ def int_is_constant : Intrinsic<[llvm_i1_ty], [llvm_any_ty], [IntrNoMem], "llvm.
def int_masked_store : Intrinsic<[], [llvm_anyvector_ty,
LLVMAnyPointerType<LLVMMatchType<0>>,
llvm_i32_ty,
- LLVMVectorSameWidth<0, llvm_i1_ty>],
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
[IntrArgMemOnly]>;
def int_masked_load : Intrinsic<[llvm_anyvector_ty],
[LLVMAnyPointerType<LLVMMatchType<0>>, llvm_i32_ty,
- LLVMVectorSameWidth<0, llvm_i1_ty>, LLVMMatchType<0>],
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>],
[IntrReadMem, IntrArgMemOnly]>;
def int_masked_gather: Intrinsic<[llvm_anyvector_ty],
[LLVMVectorOfAnyPointersToElt<0>, llvm_i32_ty,
- LLVMVectorSameWidth<0, llvm_i1_ty>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
LLVMMatchType<0>],
[IntrReadMem]>;
def int_masked_scatter: Intrinsic<[],
[llvm_anyvector_ty,
LLVMVectorOfAnyPointersToElt<0>, llvm_i32_ty,
- LLVMVectorSameWidth<0, llvm_i1_ty>]>;
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>]>;
def int_masked_expandload: Intrinsic<[llvm_anyvector_ty],
[LLVMPointerToElt<0>,
- LLVMVectorSameWidth<0, llvm_i1_ty>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
LLVMMatchType<0>],
[IntrReadMem]>;
def int_masked_compressstore: Intrinsic<[],
[llvm_anyvector_ty,
LLVMPointerToElt<0>,
- LLVMVectorSameWidth<0, llvm_i1_ty>],
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
[IntrArgMemOnly]>;
// Test whether a pointer is associated with a type metadata identifier.
diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp
index 8601ec015a5..3ec421fe881 100644
--- a/llvm/lib/IR/Function.cpp
+++ b/llvm/lib/IR/Function.cpp
@@ -948,10 +948,9 @@ static Type *DecodeFixedType(ArrayRef<Intrinsic::IITDescriptor> &Infos,
case IITDescriptor::SameVecWidthArgument: {
Type *EltTy = DecodeFixedType(Infos, Tys, Context);
Type *Ty = Tys[D.getArgumentNumber()];
- if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
+ if (auto *VTy = dyn_cast<VectorType>(Ty))
return VectorType::get(EltTy, VTy->getNumElements());
- }
- llvm_unreachable("unhandled");
+ return EltTy;
}
case IITDescriptor::PtrToArgument: {
Type *Ty = Tys[D.getArgumentNumber()];
@@ -1135,15 +1134,19 @@ bool Intrinsic::matchIntrinsicType(Type *Ty, ArrayRef<Intrinsic::IITDescriptor>
case IITDescriptor::SameVecWidthArgument: {
if (D.getArgumentNumber() >= ArgTys.size())
return true;
- VectorType * ReferenceType =
- dyn_cast<VectorType>(ArgTys[D.getArgumentNumber()]);
- VectorType *ThisArgType = dyn_cast<VectorType>(Ty);
- if (!ThisArgType || !ReferenceType ||
- (ReferenceType->getVectorNumElements() !=
- ThisArgType->getVectorNumElements()))
+ auto *ReferenceType = dyn_cast<VectorType>(ArgTys[D.getArgumentNumber()]);
+ auto *ThisArgType = dyn_cast<VectorType>(Ty);
+ // Both must be vectors of the same number of elements or neither.
+ if ((ReferenceType != nullptr) != (ThisArgType != nullptr))
return true;
- return matchIntrinsicType(ThisArgType->getVectorElementType(),
- Infos, ArgTys);
+ Type *EltTy = Ty;
+ if (ThisArgType) {
+ if (ReferenceType->getVectorNumElements() !=
+ ThisArgType->getVectorNumElements())
+ return true;
+ EltTy = ThisArgType->getVectorElementType();
+ }
+ return matchIntrinsicType(EltTy, Infos, ArgTys);
}
case IITDescriptor::PtrToArgument: {
if (D.getArgumentNumber() >= ArgTys.size())
diff --git a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll
new file mode 100644
index 00000000000..2b5c9e951b8
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll
@@ -0,0 +1,414 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,SSE,SSSE3
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE42
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512DQ
+;
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SLM
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,GLM
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,BTVER2
+
+;
+; sadd.with.overflow
+;
+
+declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64)
+declare {<2 x i64>, <2 x i1>} @llvm.sadd.with.overflow.v2i64(<2 x i64>, <2 x i64>)
+declare {<4 x i64>, <4 x i1>} @llvm.sadd.with.overflow.v4i64(<4 x i64>, <4 x i64>)
+declare {<8 x i64>, <8 x i1>} @llvm.sadd.with.overflow.v8i64(<8 x i64>, <8 x i64>)
+
+declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32)
+declare {<4 x i32>, <4 x i1>} @llvm.sadd.with.overflow.v4i32(<4 x i32>, <4 x i32>)
+declare {<8 x i32>, <8 x i1>} @llvm.sadd.with.overflow.v8i32(<8 x i32>, <8 x i32>)
+declare {<16 x i32>, <16 x i1>} @llvm.sadd.with.overflow.v16i32(<16 x i32>, <16 x i32>)
+
+declare {i16, i1} @llvm.sadd.with.overflow.i16(i16, i16)
+declare {<8 x i16>, <8 x i1>} @llvm.sadd.with.overflow.v8i16(<8 x i16>, <8 x i16>)
+declare {<16 x i16>, <16 x i1>} @llvm.sadd.with.overflow.v16i16(<16 x i16>, <16 x i16>)
+declare {<32 x i16>, <32 x i1>} @llvm.sadd.with.overflow.v32i16(<32 x i16>, <32 x i16>)
+
+declare {i8, i1} @llvm.sadd.with.overflow.i8(i8, i8)
+declare {<16 x i8>, <16 x i1>} @llvm.sadd.with.overflow.v16i8(<16 x i8>, <16 x i8>)
+declare {<32 x i8>, <32 x i1>} @llvm.sadd.with.overflow.v32i8(<32 x i8>, <32 x i8>)
+declare {<64 x i8>, <64 x i1>} @llvm.sadd.with.overflow.v64i8(<64 x i8>, <64 x i8>)
+
+define i32 @sadd(i32 %arg) {
+; CHECK-LABEL: 'sadd'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 undef, i16 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.sadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 undef, i8 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 191 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+ %I64 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 undef, i64 undef)
+ %V2I64 = call {<2 x i64>, <2 x i1>} @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+ %V4I64 = call {<4 x i64>, <4 x i1>} @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+ %V8I64 = call {<8 x i64>, <8 x i1>} @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+
+ %I32 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 undef, i32 undef)
+ %V4I32 = call {<4 x i32>, <4 x i1>} @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
+ %V8I32 = call {<8 x i32>, <8 x i1>} @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+ %V16I32 = call {<16 x i32>, <16 x i1>} @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+
+ %I16 = call {i16, i1} @llvm.sadd.with.overflow.i16(i16 undef, i16 undef)
+ %V8I16 = call {<8 x i16>, <8 x i1>} @llvm.sadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
+ %V16I16 = call {<16 x i16>, <16 x i1>} @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+ %V32I16 = call {<32 x i16>, <32 x i1>} @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+
+ %I8 = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 undef, i8 undef)
+ %V16I8 = call {<16 x i8>, <16 x i1>} @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
+ %V32I8 = call {<32 x i8>, <32 x i1>} @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+ %V64I8 = call {<64 x i8>, <64 x i1>} @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+
+ ret i32 undef
+}
+
+;
+; uadd.with.overflow
+;
+
+declare {i64, i1} @llvm.uadd.with.overflow.i64(i64, i64)
+declare {<2 x i64>, <2 x i1>} @llvm.uadd.with.overflow.v2i64(<2 x i64>, <2 x i64>)
+declare {<4 x i64>, <4 x i1>} @llvm.uadd.with.overflow.v4i64(<4 x i64>, <4 x i64>)
+declare {<8 x i64>, <8 x i1>} @llvm.uadd.with.overflow.v8i64(<8 x i64>, <8 x i64>)
+
+declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32)
+declare {<4 x i32>, <4 x i1>} @llvm.uadd.with.overflow.v4i32(<4 x i32>, <4 x i32>)
+declare {<8 x i32>, <8 x i1>} @llvm.uadd.with.overflow.v8i32(<8 x i32>, <8 x i32>)
+declare {<16 x i32>, <16 x i1>} @llvm.uadd.with.overflow.v16i32(<16 x i32>, <16 x i32>)
+
+declare {i16, i1} @llvm.uadd.with.overflow.i16(i16, i16)
+declare {<8 x i16>, <8 x i1>} @llvm.uadd.with.overflow.v8i16(<8 x i16>, <8 x i16>)
+declare {<16 x i16>, <16 x i1>} @llvm.uadd.with.overflow.v16i16(<16 x i16>, <16 x i16>)
+declare {<32 x i16>, <32 x i1>} @llvm.uadd.with.overflow.v32i16(<32 x i16>, <32 x i16>)
+
+declare {i8, i1} @llvm.uadd.with.overflow.i8(i8, i8)
+declare {<16 x i8>, <16 x i1>} @llvm.uadd.with.overflow.v16i8(<16 x i8>, <16 x i8>)
+declare {<32 x i8>, <32 x i1>} @llvm.uadd.with.overflow.v32i8(<32 x i8>, <32 x i8>)
+declare {<64 x i8>, <64 x i1>} @llvm.uadd.with.overflow.v64i8(<64 x i8>, <64 x i8>)
+
+define i32 @uadd(i32 %arg) {
+; CHECK-LABEL: 'uadd'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 undef, i16 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 undef, i8 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 191 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+ %I64 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 undef, i64 undef)
+ %V2I64 = call {<2 x i64>, <2 x i1>} @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+ %V4I64 = call {<4 x i64>, <4 x i1>} @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+ %V8I64 = call {<8 x i64>, <8 x i1>} @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+
+ %I32 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 undef, i32 undef)
+ %V4I32 = call {<4 x i32>, <4 x i1>} @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
+ %V8I32 = call {<8 x i32>, <8 x i1>} @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+ %V16I32 = call {<16 x i32>, <16 x i1>} @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+
+ %I16 = call {i16, i1} @llvm.uadd.with.overflow.i16(i16 undef, i16 undef)
+ %V8I16 = call {<8 x i16>, <8 x i1>} @llvm.uadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
+ %V16I16 = call {<16 x i16>, <16 x i1>} @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+ %V32I16 = call {<32 x i16>, <32 x i1>} @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+
+ %I8 = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 undef, i8 undef)
+ %V16I8 = call {<16 x i8>, <16 x i1>} @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
+ %V32I8 = call {<32 x i8>, <32 x i1>} @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+ %V64I8 = call {<64 x i8>, <64 x i1>} @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+
+ ret i32 undef
+}
+
+;
+; ssub.with.overflow
+;
+
+declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64)
+declare {<2 x i64>, <2 x i1>} @llvm.ssub.with.overflow.v2i64(<2 x i64>, <2 x i64>)
+declare {<4 x i64>, <4 x i1>} @llvm.ssub.with.overflow.v4i64(<4 x i64>, <4 x i64>)
+declare {<8 x i64>, <8 x i1>} @llvm.ssub.with.overflow.v8i64(<8 x i64>, <8 x i64>)
+
+declare {i32, i1} @llvm.ssub.with.overflow.i32(i32, i32)
+declare {<4 x i32>, <4 x i1>} @llvm.ssub.with.overflow.v4i32(<4 x i32>, <4 x i32>)
+declare {<8 x i32>, <8 x i1>} @llvm.ssub.with.overflow.v8i32(<8 x i32>, <8 x i32>)
+declare {<16 x i32>, <16 x i1>} @llvm.ssub.with.overflow.v16i32(<16 x i32>, <16 x i32>)
+
+declare {i16, i1} @llvm.ssub.with.overflow.i16(i16, i16)
+declare {<8 x i16>, <8 x i1>} @llvm.ssub.with.overflow.v8i16(<8 x i16>, <8 x i16>)
+declare {<16 x i16>, <16 x i1>} @llvm.ssub.with.overflow.v16i16(<16 x i16>, <16 x i16>)
+declare {<32 x i16>, <32 x i1>} @llvm.ssub.with.overflow.v32i16(<32 x i16>, <32 x i16>)
+
+declare {i8, i1} @llvm.ssub.with.overflow.i8(i8, i8)
+declare {<16 x i8>, <16 x i1>} @llvm.ssub.with.overflow.v16i8(<16 x i8>, <16 x i8>)
+declare {<32 x i8>, <32 x i1>} @llvm.ssub.with.overflow.v32i8(<32 x i8>, <32 x i8>)
+declare {<64 x i8>, <64 x i1>} @llvm.ssub.with.overflow.v64i8(<64 x i8>, <64 x i8>)
+
+define i32 @ssub(i32 %arg) {
+; CHECK-LABEL: 'ssub'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 undef, i16 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.ssub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 undef, i8 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 191 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+ %I64 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 undef, i64 undef)
+ %V2I64 = call {<2 x i64>, <2 x i1>} @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+ %V4I64 = call {<4 x i64>, <4 x i1>} @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+ %V8I64 = call {<8 x i64>, <8 x i1>} @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+
+ %I32 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 undef, i32 undef)
+ %V4I32 = call {<4 x i32>, <4 x i1>} @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
+ %V8I32 = call {<8 x i32>, <8 x i1>} @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+ %V16I32 = call {<16 x i32>, <16 x i1>} @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+
+ %I16 = call {i16, i1} @llvm.ssub.with.overflow.i16(i16 undef, i16 undef)
+ %V8I16 = call {<8 x i16>, <8 x i1>} @llvm.ssub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
+ %V16I16 = call {<16 x i16>, <16 x i1>} @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+ %V32I16 = call {<32 x i16>, <32 x i1>} @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+
+ %I8 = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 undef, i8 undef)
+ %V16I8 = call {<16 x i8>, <16 x i1>} @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
+ %V32I8 = call {<32 x i8>, <32 x i1>} @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+ %V64I8 = call {<64 x i8>, <64 x i1>} @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+
+ ret i32 undef
+}
+
+;
+; usub.with.overflow
+;
+
+declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64)
+declare {<2 x i64>, <2 x i1>} @llvm.usub.with.overflow.v2i64(<2 x i64>, <2 x i64>)
+declare {<4 x i64>, <4 x i1>} @llvm.usub.with.overflow.v4i64(<4 x i64>, <4 x i64>)
+declare {<8 x i64>, <8 x i1>} @llvm.usub.with.overflow.v8i64(<8 x i64>, <8 x i64>)
+
+declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32)
+declare {<4 x i32>, <4 x i1>} @llvm.usub.with.overflow.v4i32(<4 x i32>, <4 x i32>)
+declare {<8 x i32>, <8 x i1>} @llvm.usub.with.overflow.v8i32(<8 x i32>, <8 x i32>)
+declare {<16 x i32>, <16 x i1>} @llvm.usub.with.overflow.v16i32(<16 x i32>, <16 x i32>)
+
+declare {i16, i1} @llvm.usub.with.overflow.i16(i16, i16)
+declare {<8 x i16>, <8 x i1>} @llvm.usub.with.overflow.v8i16(<8 x i16>, <8 x i16>)
+declare {<16 x i16>, <16 x i1>} @llvm.usub.with.overflow.v16i16(<16 x i16>, <16 x i16>)
+declare {<32 x i16>, <32 x i1>} @llvm.usub.with.overflow.v32i16(<32 x i16>, <32 x i16>)
+
+declare {i8, i1} @llvm.usub.with.overflow.i8(i8, i8)
+declare {<16 x i8>, <16 x i1>} @llvm.usub.with.overflow.v16i8(<16 x i8>, <16 x i8>)
+declare {<32 x i8>, <32 x i1>} @llvm.usub.with.overflow.v32i8(<32 x i8>, <32 x i8>)
+declare {<64 x i8>, <64 x i1>} @llvm.usub.with.overflow.v64i8(<64 x i8>, <64 x i8>)
+
+define i32 @usub(i32 %arg) {
+; CHECK-LABEL: 'usub'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 undef, i16 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.usub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 undef, i8 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 191 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+ %I64 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 undef, i64 undef)
+ %V2I64 = call {<2 x i64>, <2 x i1>} @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+ %V4I64 = call {<4 x i64>, <4 x i1>} @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+ %V8I64 = call {<8 x i64>, <8 x i1>} @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+
+ %I32 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 undef, i32 undef)
+ %V4I32 = call {<4 x i32>, <4 x i1>} @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
+ %V8I32 = call {<8 x i32>, <8 x i1>} @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+ %V16I32 = call {<16 x i32>, <16 x i1>} @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+
+ %I16 = call {i16, i1} @llvm.usub.with.overflow.i16(i16 undef, i16 undef)
+ %V8I16 = call {<8 x i16>, <8 x i1>} @llvm.usub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
+ %V16I16 = call {<16 x i16>, <16 x i1>} @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+ %V32I16 = call {<32 x i16>, <32 x i1>} @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+
+ %I8 = call {i8, i1} @llvm.usub.with.overflow.i8(i8 undef, i8 undef)
+ %V16I8 = call {<16 x i8>, <16 x i1>} @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
+ %V32I8 = call {<32 x i8>, <32 x i1>} @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+ %V64I8 = call {<64 x i8>, <64 x i1>} @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+
+ ret i32 undef
+}
+
+;
+; smul.with.overflow
+;
+
+declare {i64, i1} @llvm.smul.with.overflow.i64(i64, i64)
+declare {<2 x i64>, <2 x i1>} @llvm.smul.with.overflow.v2i64(<2 x i64>, <2 x i64>)
+declare {<4 x i64>, <4 x i1>} @llvm.smul.with.overflow.v4i64(<4 x i64>, <4 x i64>)
+declare {<8 x i64>, <8 x i1>} @llvm.smul.with.overflow.v8i64(<8 x i64>, <8 x i64>)
+
+declare {i32, i1} @llvm.smul.with.overflow.i32(i32, i32)
+declare {<4 x i32>, <4 x i1>} @llvm.smul.with.overflow.v4i32(<4 x i32>, <4 x i32>)
+declare {<8 x i32>, <8 x i1>} @llvm.smul.with.overflow.v8i32(<8 x i32>, <8 x i32>)
+declare {<16 x i32>, <16 x i1>} @llvm.smul.with.overflow.v16i32(<16 x i32>, <16 x i32>)
+
+declare {i16, i1} @llvm.smul.with.overflow.i16(i16, i16)
+declare {<8 x i16>, <8 x i1>} @llvm.smul.with.overflow.v8i16(<8 x i16>, <8 x i16>)
+declare {<16 x i16>, <16 x i1>} @llvm.smul.with.overflow.v16i16(<16 x i16>, <16 x i16>)
+declare {<32 x i16>, <32 x i1>} @llvm.smul.with.overflow.v32i16(<32 x i16>, <32 x i16>)
+
+declare {i8, i1} @llvm.smul.with.overflow.i8(i8, i8)
+declare {<16 x i8>, <16 x i1>} @llvm.smul.with.overflow.v16i8(<16 x i8>, <16 x i8>)
+declare {<32 x i8>, <32 x i1>} @llvm.smul.with.overflow.v32i8(<32 x i8>, <32 x i8>)
+declare {<64 x i8>, <64 x i1>} @llvm.smul.with.overflow.v64i8(<64 x i8>, <64 x i8>)
+
+define i32 @smul(i32 %arg) {
+; CHECK-LABEL: 'smul'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 191 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+ %I64 = call {i64, i1} @llvm.smul.with.overflow.i64(i64 undef, i64 undef)
+ %V2I64 = call {<2 x i64>, <2 x i1>} @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+ %V4I64 = call {<4 x i64>, <4 x i1>} @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+ %V8I64 = call {<8 x i64>, <8 x i1>} @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+
+ %I32 = call {i32, i1} @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
+ %V4I32 = call {<4 x i32>, <4 x i1>} @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
+ %V8I32 = call {<8 x i32>, <8 x i1>} @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+ %V16I32 = call {<16 x i32>, <16 x i1>} @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+
+ %I16 = call {i16, i1} @llvm.smul.with.overflow.i16(i16 undef, i16 undef)
+ %V8I16 = call {<8 x i16>, <8 x i1>} @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
+ %V16I16 = call {<16 x i16>, <16 x i1>} @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+ %V32I16 = call {<32 x i16>, <32 x i1>} @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+
+ %I8 = call {i8, i1} @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
+ %V16I8 = call {<16 x i8>, <16 x i1>} @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
+ %V32I8 = call {<32 x i8>, <32 x i1>} @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+ %V64I8 = call {<64 x i8>, <64 x i1>} @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+
+ ret i32 undef
+}
+
+;
+; umul.with.overflow
+;
+
+declare {i64, i1} @llvm.umul.with.overflow.i64(i64, i64)
+declare {<2 x i64>, <2 x i1>} @llvm.umul.with.overflow.v2i64(<2 x i64>, <2 x i64>)
+declare {<4 x i64>, <4 x i1>} @llvm.umul.with.overflow.v4i64(<4 x i64>, <4 x i64>)
+declare {<8 x i64>, <8 x i1>} @llvm.umul.with.overflow.v8i64(<8 x i64>, <8 x i64>)
+
+declare {i32, i1} @llvm.umul.with.overflow.i32(i32, i32)
+declare {<4 x i32>, <4 x i1>} @llvm.umul.with.overflow.v4i32(<4 x i32>, <4 x i32>)
+declare {<8 x i32>, <8 x i1>} @llvm.umul.with.overflow.v8i32(<8 x i32>, <8 x i32>)
+declare {<16 x i32>, <16 x i1>} @llvm.umul.with.overflow.v16i32(<16 x i32>, <16 x i32>)
+
+declare {i16, i1} @llvm.umul.with.overflow.i16(i16, i16)
+declare {<8 x i16>, <8 x i1>} @llvm.umul.with.overflow.v8i16(<8 x i16>, <8 x i16>)
+declare {<16 x i16>, <16 x i1>} @llvm.umul.with.overflow.v16i16(<16 x i16>, <16 x i16>)
+declare {<32 x i16>, <32 x i1>} @llvm.umul.with.overflow.v32i16(<32 x i16>, <32 x i16>)
+
+declare {i8, i1} @llvm.umul.with.overflow.i8(i8, i8)
+declare {<16 x i8>, <16 x i1>} @llvm.umul.with.overflow.v16i8(<16 x i8>, <16 x i8>)
+declare {<32 x i8>, <32 x i1>} @llvm.umul.with.overflow.v32i8(<32 x i8>, <32 x i8>)
+declare {<64 x i8>, <64 x i1>} @llvm.umul.with.overflow.v64i8(<64 x i8>, <64 x i8>)
+
+define i32 @umul(i32 %arg) {
+; CHECK-LABEL: 'umul'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 191 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+ %I64 = call {i64, i1} @llvm.umul.with.overflow.i64(i64 undef, i64 undef)
+ %V2I64 = call {<2 x i64>, <2 x i1>} @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+ %V4I64 = call {<4 x i64>, <4 x i1>} @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+ %V8I64 = call {<8 x i64>, <8 x i1>} @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+
+ %I32 = call {i32, i1} @llvm.umul.with.overflow.i32(i32 undef, i32 undef)
+ %V4I32 = call {<4 x i32>, <4 x i1>} @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
+ %V8I32 = call {<8 x i32>, <8 x i1>} @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+ %V16I32 = call {<16 x i32>, <16 x i1>} @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+
+ %I16 = call {i16, i1} @llvm.umul.with.overflow.i16(i16 undef, i16 undef)
+ %V8I16 = call {<8 x i16>, <8 x i1>} @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
+ %V16I16 = call {<16 x i16>, <16 x i1>} @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+ %V32I16 = call {<32 x i16>, <32 x i1>} @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+
+ %I8 = call {i8, i1} @llvm.umul.with.overflow.i8(i8 undef, i8 undef)
+ %V16I8 = call {<16 x i8>, <16 x i1>} @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
+ %V32I8 = call {<32 x i8>, <32 x i1>} @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+ %V64I8 = call {<64 x i8>, <64 x i1>} @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+
+ ret i32 undef
+}
diff --git a/llvm/utils/TableGen/CodeGenTarget.cpp b/llvm/utils/TableGen/CodeGenTarget.cpp
index adca99552b1..5dcb5e230cc 100644
--- a/llvm/utils/TableGen/CodeGenTarget.cpp
+++ b/llvm/utils/TableGen/CodeGenTarget.cpp
@@ -633,7 +633,7 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
// overloaded, all the types can be specified directly.
assert(((!TyEl->isSubClassOf("LLVMExtendedType") &&
!TyEl->isSubClassOf("LLVMTruncatedType") &&
- !TyEl->isSubClassOf("LLVMVectorSameWidth")) ||
+ !TyEl->isSubClassOf("LLVMScalarOrSameVectorWidth")) ||
VT == MVT::iAny || VT == MVT::vAny) &&
"Expected iAny or vAny type");
} else
diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp
index cb76f053a22..3c0726308a4 100644
--- a/llvm/utils/TableGen/IntrinsicEmitter.cpp
+++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp
@@ -269,7 +269,7 @@ static void EncodeFixedType(Record *R, std::vector<unsigned char> &ArgCodes,
Sig.push_back(IIT_TRUNC_ARG);
else if (R->isSubClassOf("LLVMHalfElementsVectorType"))
Sig.push_back(IIT_HALF_VEC_ARG);
- else if (R->isSubClassOf("LLVMVectorSameWidth")) {
+ else if (R->isSubClassOf("LLVMScalarOrSameVectorWidth")) {
Sig.push_back(IIT_SAME_VEC_WIDTH_ARG);
Sig.push_back((Number << 3) | ArgCodes[Number]);
MVT::SimpleValueType VT = getValueType(R->getValueAsDef("ElTy"));
OpenPOWER on IntegriCloud