summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/CodeGen/BasicTTIImpl.h34
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h2
-rw-r--r--llvm/test/Analysis/CostModel/ARM/cast.ll176
-rw-r--r--llvm/test/Analysis/CostModel/PowerPC/ext.ll2
-rw-r--r--llvm/test/Analysis/CostModel/X86/sitofp.ll118
-rw-r--r--llvm/test/Analysis/CostModel/X86/uitofp.ll124
-rw-r--r--llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll6
7 files changed, 241 insertions, 221 deletions
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 78cb0af1699..1aa13fb7359 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -315,6 +315,8 @@ public:
}
// Else, assume that we need to scalarize this op.
+ // TODO: If one of the types get legalized by splitting, handle this
+ // similarly to what getCastInstrCost() does.
if (Ty->isVectorTy()) {
unsigned Num = Ty->getVectorNumElements();
unsigned Cost = static_cast<T *>(this)
@@ -409,12 +411,25 @@ public:
return SrcLT.first * 1;
}
- // If we are converting vectors and the operation is illegal, or
- // if the vectors are legalized to different types, estimate the
- // scalarization costs.
- // TODO: This is probably a big overestimate. For splits, we should have
- // something like getTypeLegalizationCost() + 2 * getCastInstrCost().
- // The same applies to getCmpSelInstrCost() and getArithmeticInstrCost()
+ // If we are legalizing by splitting, query the concrete TTI for the cost
+ // of casting the original vector twice. We also need to factor int the
+ // cost of the split itself. Count that as 1, to be consistent with
+ // TLI->getTypeLegalizationCost().
+ if ((TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) ==
+ TargetLowering::TypeSplitVector) ||
+ (TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) ==
+ TargetLowering::TypeSplitVector)) {
+ Type *SplitDst = VectorType::get(Dst->getVectorElementType(),
+ Dst->getVectorNumElements() / 2);
+ Type *SplitSrc = VectorType::get(Src->getVectorElementType(),
+ Src->getVectorNumElements() / 2);
+ T *TTI = static_cast<T *>(this);
+ return TTI->getVectorSplitCost() +
+ (2 * TTI->getCastInstrCost(Opcode, SplitDst, SplitSrc));
+ }
+
+ // In other cases where the source or destination are illegal, assume
+ // the operation will get scalarized.
unsigned Num = Dst->getVectorNumElements();
unsigned Cost = static_cast<T *>(this)->getCastInstrCost(
Opcode, Dst->getScalarType(), Src->getScalarType());
@@ -472,6 +487,8 @@ public:
}
// Otherwise, assume that the cast is scalarized.
+ // TODO: If one of the types get legalized by splitting, handle this
+ // similarly to what getCastInstrCost() does.
if (ValTy->isVectorTy()) {
unsigned Num = ValTy->getVectorNumElements();
if (CondTy)
@@ -480,8 +497,7 @@ public:
Opcode, ValTy->getScalarType(), CondTy);
// Return the cost of multiple scalar invocation plus the cost of
- // inserting
- // and extracting the values.
+ // inserting and extracting the values.
return getScalarizationOverhead(ValTy, true, false) + Num * Cost;
}
@@ -906,6 +922,8 @@ public:
return ShuffleCost + ArithCost + getScalarizationOverhead(Ty, false, true);
}
+ unsigned getVectorSplitCost() { return 1; }
+
/// @}
};
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index d7efaa502e6..a82a0745808 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -96,6 +96,8 @@ public:
int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
bool isSourceOfDivergence(const Value *V) const;
+
+ unsigned getVectorSplitCost() { return 0; }
};
} // end namespace llvm
diff --git a/llvm/test/Analysis/CostModel/ARM/cast.ll b/llvm/test/Analysis/CostModel/ARM/cast.ll
index 21e29282209..72308834c53 100644
--- a/llvm/test/Analysis/CostModel/ARM/cast.ll
+++ b/llvm/test/Analysis/CostModel/ARM/cast.ll
@@ -264,39 +264,39 @@ define i32 @casts() {
%r116 = fptoui <4 x float> undef to <4 x i32>
; CHECK: Found an estimated cost of 1 for instruction: %r117 = fptosi <4 x float> undef to <4 x i32>
%r117 = fptosi <4 x float> undef to <4 x i32>
- ; CHECK: Found an estimated cost of 64 for instruction: %r118 = fptoui <4 x float> undef to <4 x i64>
+ ; CHECK: Found an estimated cost of 65 for instruction: %r118 = fptoui <4 x float> undef to <4 x i64>
%r118 = fptoui <4 x float> undef to <4 x i64>
- ; CHECK: Found an estimated cost of 64 for instruction: %r119 = fptosi <4 x float> undef to <4 x i64>
+ ; CHECK: Found an estimated cost of 65 for instruction: %r119 = fptosi <4 x float> undef to <4 x i64>
%r119 = fptosi <4 x float> undef to <4 x i64>
- ; CHECK: Found an estimated cost of 32 for instruction: %r120 = fptoui <4 x double> undef to <4 x i1>
+ ; CHECK: Found an estimated cost of 33 for instruction: %r120 = fptoui <4 x double> undef to <4 x i1>
%r120 = fptoui <4 x double> undef to <4 x i1>
- ; CHECK: Found an estimated cost of 32 for instruction: %r121 = fptosi <4 x double> undef to <4 x i1>
+ ; CHECK: Found an estimated cost of 33 for instruction: %r121 = fptosi <4 x double> undef to <4 x i1>
%r121 = fptosi <4 x double> undef to <4 x i1>
- ; CHECK: Found an estimated cost of 32 for instruction: %r122 = fptoui <4 x double> undef to <4 x i8>
+ ; CHECK: Found an estimated cost of 33 for instruction: %r122 = fptoui <4 x double> undef to <4 x i8>
%r122 = fptoui <4 x double> undef to <4 x i8>
- ; CHECK: Found an estimated cost of 32 for instruction: %r123 = fptosi <4 x double> undef to <4 x i8>
+ ; CHECK: Found an estimated cost of 33 for instruction: %r123 = fptosi <4 x double> undef to <4 x i8>
%r123 = fptosi <4 x double> undef to <4 x i8>
- ; CHECK: Found an estimated cost of 32 for instruction: %r124 = fptoui <4 x double> undef to <4 x i16>
+ ; CHECK: Found an estimated cost of 33 for instruction: %r124 = fptoui <4 x double> undef to <4 x i16>
%r124 = fptoui <4 x double> undef to <4 x i16>
- ; CHECK: Found an estimated cost of 32 for instruction: %r125 = fptosi <4 x double> undef to <4 x i16>
+ ; CHECK: Found an estimated cost of 33 for instruction: %r125 = fptosi <4 x double> undef to <4 x i16>
%r125 = fptosi <4 x double> undef to <4 x i16>
- ; CHECK: Found an estimated cost of 32 for instruction: %r126 = fptoui <4 x double> undef to <4 x i32>
+ ; CHECK: Found an estimated cost of 5 for instruction: %r126 = fptoui <4 x double> undef to <4 x i32>
%r126 = fptoui <4 x double> undef to <4 x i32>
- ; CHECK: Found an estimated cost of 32 for instruction: %r127 = fptosi <4 x double> undef to <4 x i32>
+ ; CHECK: Found an estimated cost of 5 for instruction: %r127 = fptosi <4 x double> undef to <4 x i32>
%r127 = fptosi <4 x double> undef to <4 x i32>
- ; CHECK: Found an estimated cost of 64 for instruction: %r128 = fptoui <4 x double> undef to <4 x i64>
+ ; CHECK: Found an estimated cost of 65 for instruction: %r128 = fptoui <4 x double> undef to <4 x i64>
%r128 = fptoui <4 x double> undef to <4 x i64>
- ; CHECK: Found an estimated cost of 64 for instruction: %r129 = fptosi <4 x double> undef to <4 x i64>
+ ; CHECK: Found an estimated cost of 65 for instruction: %r129 = fptosi <4 x double> undef to <4 x i64>
%r129 = fptosi <4 x double> undef to <4 x i64>
- ; CHECK: Found an estimated cost of 64 for instruction: %r130 = fptoui <8 x float> undef to <8 x i1>
+ ; CHECK: Found an estimated cost of 65 for instruction: %r130 = fptoui <8 x float> undef to <8 x i1>
%r130 = fptoui <8 x float> undef to <8 x i1>
- ; CHECK: Found an estimated cost of 64 for instruction: %r131 = fptosi <8 x float> undef to <8 x i1>
+ ; CHECK: Found an estimated cost of 65 for instruction: %r131 = fptosi <8 x float> undef to <8 x i1>
%r131 = fptosi <8 x float> undef to <8 x i1>
- ; CHECK: Found an estimated cost of 64 for instruction: %r132 = fptoui <8 x float> undef to <8 x i8>
+ ; CHECK: Found an estimated cost of 7 for instruction: %r132 = fptoui <8 x float> undef to <8 x i8>
%r132 = fptoui <8 x float> undef to <8 x i8>
- ; CHECK: Found an estimated cost of 64 for instruction: %r133 = fptosi <8 x float> undef to <8 x i8>
+ ; CHECK: Found an estimated cost of 7 for instruction: %r133 = fptosi <8 x float> undef to <8 x i8>
%r133 = fptosi <8 x float> undef to <8 x i8>
; CHECK: Found an estimated cost of 4 for instruction: %r134 = fptoui <8 x float> undef to <8 x i16>
%r134 = fptoui <8 x float> undef to <8 x i16>
@@ -306,39 +306,39 @@ define i32 @casts() {
%r136 = fptoui <8 x float> undef to <8 x i32>
; CHECK: Found an estimated cost of 2 for instruction: %r137 = fptosi <8 x float> undef to <8 x i32>
%r137 = fptosi <8 x float> undef to <8 x i32>
- ; CHECK: Found an estimated cost of 128 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64>
+ ; CHECK: Found an estimated cost of 131 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64>
%r138 = fptoui <8 x float> undef to <8 x i64>
- ; CHECK: Found an estimated cost of 128 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64>
+ ; CHECK: Found an estimated cost of 131 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64>
%r139 = fptosi <8 x float> undef to <8 x i64>
- ; CHECK: Found an estimated cost of 64 for instruction: %r140 = fptoui <8 x double> undef to <8 x i1>
+ ; CHECK: Found an estimated cost of 67 for instruction: %r140 = fptoui <8 x double> undef to <8 x i1>
%r140 = fptoui <8 x double> undef to <8 x i1>
- ; CHECK: Found an estimated cost of 64 for instruction: %r141 = fptosi <8 x double> undef to <8 x i1>
+ ; CHECK: Found an estimated cost of 67 for instruction: %r141 = fptosi <8 x double> undef to <8 x i1>
%r141 = fptosi <8 x double> undef to <8 x i1>
- ; CHECK: Found an estimated cost of 64 for instruction: %r142 = fptoui <8 x double> undef to <8 x i8>
+ ; CHECK: Found an estimated cost of 67 for instruction: %r142 = fptoui <8 x double> undef to <8 x i8>
%r142 = fptoui <8 x double> undef to <8 x i8>
- ; CHECK: Found an estimated cost of 64 for instruction: %r143 = fptosi <8 x double> undef to <8 x i8>
+ ; CHECK: Found an estimated cost of 67 for instruction: %r143 = fptosi <8 x double> undef to <8 x i8>
%r143 = fptosi <8 x double> undef to <8 x i8>
- ; CHECK: Found an estimated cost of 64 for instruction: %r144 = fptoui <8 x double> undef to <8 x i16>
+ ; CHECK: Found an estimated cost of 67 for instruction: %r144 = fptoui <8 x double> undef to <8 x i16>
%r144 = fptoui <8 x double> undef to <8 x i16>
- ; CHECK: Found an estimated cost of 64 for instruction: %r145 = fptosi <8 x double> undef to <8 x i16>
+ ; CHECK: Found an estimated cost of 67 for instruction: %r145 = fptosi <8 x double> undef to <8 x i16>
%r145 = fptosi <8 x double> undef to <8 x i16>
- ; CHECK: Found an estimated cost of 64 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32>
+ ; CHECK: Found an estimated cost of 11 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32>
%r146 = fptoui <8 x double> undef to <8 x i32>
- ; CHECK: Found an estimated cost of 64 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32>
+ ; CHECK: Found an estimated cost of 11 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32>
%r147 = fptosi <8 x double> undef to <8 x i32>
- ; CHECK: Found an estimated cost of 128 for instruction: %r148 = fptoui <8 x double> undef to <8 x i64>
+ ; CHECK: Found an estimated cost of 131 for instruction: %r148 = fptoui <8 x double> undef to <8 x i64>
%r148 = fptoui <8 x double> undef to <8 x i64>
- ; CHECK: Found an estimated cost of 128 for instruction: %r149 = fptosi <8 x double> undef to <8 x i64>
+ ; CHECK: Found an estimated cost of 131 for instruction: %r149 = fptosi <8 x double> undef to <8 x i64>
%r149 = fptosi <8 x double> undef to <8 x i64>
- ; CHECK: Found an estimated cost of 128 for instruction: %r150 = fptoui <16 x float> undef to <16 x i1>
+ ; CHECK: Found an estimated cost of 131 for instruction: %r150 = fptoui <16 x float> undef to <16 x i1>
%r150 = fptoui <16 x float> undef to <16 x i1>
- ; CHECK: Found an estimated cost of 128 for instruction: %r151 = fptosi <16 x float> undef to <16 x i1>
+ ; CHECK: Found an estimated cost of 131 for instruction: %r151 = fptosi <16 x float> undef to <16 x i1>
%r151 = fptosi <16 x float> undef to <16 x i1>
- ; CHECK: Found an estimated cost of 128 for instruction: %r152 = fptoui <16 x float> undef to <16 x i8>
+ ; CHECK: Found an estimated cost of 15 for instruction: %r152 = fptoui <16 x float> undef to <16 x i8>
%r152 = fptoui <16 x float> undef to <16 x i8>
- ; CHECK: Found an estimated cost of 128 for instruction: %r153 = fptosi <16 x float> undef to <16 x i8>
+ ; CHECK: Found an estimated cost of 15 for instruction: %r153 = fptosi <16 x float> undef to <16 x i8>
%r153 = fptosi <16 x float> undef to <16 x i8>
; CHECK: Found an estimated cost of 8 for instruction: %r154 = fptoui <16 x float> undef to <16 x i16>
%r154 = fptoui <16 x float> undef to <16 x i16>
@@ -348,30 +348,30 @@ define i32 @casts() {
%r156 = fptoui <16 x float> undef to <16 x i32>
; CHECK: Found an estimated cost of 4 for instruction: %r157 = fptosi <16 x float> undef to <16 x i32>
%r157 = fptosi <16 x float> undef to <16 x i32>
- ; CHECK: Found an estimated cost of 256 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64>
+ ; CHECK: Found an estimated cost of 263 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64>
%r158 = fptoui <16 x float> undef to <16 x i64>
- ; CHECK: Found an estimated cost of 256 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64>
+ ; CHECK: Found an estimated cost of 263 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64>
%r159 = fptosi <16 x float> undef to <16 x i64>
- ; CHECK: Found an estimated cost of 128 for instruction: %r160 = fptoui <16 x double> undef to <16 x i1>
+ ; CHECK: Found an estimated cost of 135 for instruction: %r160 = fptoui <16 x double> undef to <16 x i1>
%r160 = fptoui <16 x double> undef to <16 x i1>
- ; CHECK: Found an estimated cost of 128 for instruction: %r161 = fptosi <16 x double> undef to <16 x i1>
+ ; CHECK: Found an estimated cost of 135 for instruction: %r161 = fptosi <16 x double> undef to <16 x i1>
%r161 = fptosi <16 x double> undef to <16 x i1>
- ; CHECK: Found an estimated cost of 128 for instruction: %r162 = fptoui <16 x double> undef to <16 x i8>
+ ; CHECK: Found an estimated cost of 135 for instruction: %r162 = fptoui <16 x double> undef to <16 x i8>
%r162 = fptoui <16 x double> undef to <16 x i8>
- ; CHECK: Found an estimated cost of 128 for instruction: %r163 = fptosi <16 x double> undef to <16 x i8>
+ ; CHECK: Found an estimated cost of 135 for instruction: %r163 = fptosi <16 x double> undef to <16 x i8>
%r163 = fptosi <16 x double> undef to <16 x i8>
- ; CHECK: Found an estimated cost of 128 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16>
+ ; CHECK: Found an estimated cost of 135 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16>
%r164 = fptoui <16 x double> undef to <16 x i16>
- ; CHECK: Found an estimated cost of 128 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16>
+ ; CHECK: Found an estimated cost of 135 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16>
%r165 = fptosi <16 x double> undef to <16 x i16>
- ; CHECK: Found an estimated cost of 128 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32>
+ ; CHECK: Found an estimated cost of 23 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32>
%r166 = fptoui <16 x double> undef to <16 x i32>
- ; CHECK: Found an estimated cost of 128 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32>
+ ; CHECK: Found an estimated cost of 23 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32>
%r167 = fptosi <16 x double> undef to <16 x i32>
- ; CHECK: Found an estimated cost of 256 for instruction: %r168 = fptoui <16 x double> undef to <16 x i64>
+ ; CHECK: Found an estimated cost of 263 for instruction: %r168 = fptoui <16 x double> undef to <16 x i64>
%r168 = fptoui <16 x double> undef to <16 x i64>
- ; CHECK: Found an estimated cost of 256 for instruction: %r169 = fptosi <16 x double> undef to <16 x i64>
+ ; CHECK: Found an estimated cost of 263 for instruction: %r169 = fptosi <16 x double> undef to <16 x i64>
%r169 = fptosi <16 x double> undef to <16 x i64>
; CHECK: Found an estimated cost of 12 for instruction: %r170 = uitofp <2 x i1> undef to <2 x float>
@@ -432,39 +432,39 @@ define i32 @casts() {
%r196 = uitofp <4 x i32> undef to <4 x float>
; CHECK: Found an estimated cost of 1 for instruction: %r197 = sitofp <4 x i32> undef to <4 x float>
%r197 = sitofp <4 x i32> undef to <4 x float>
- ; CHECK: Found an estimated cost of 56 for instruction: %r198 = uitofp <4 x i64> undef to <4 x float>
+ ; CHECK: Found an estimated cost of 57 for instruction: %r198 = uitofp <4 x i64> undef to <4 x float>
%r198 = uitofp <4 x i64> undef to <4 x float>
- ; CHECK: Found an estimated cost of 56 for instruction: %r199 = sitofp <4 x i64> undef to <4 x float>
+ ; CHECK: Found an estimated cost of 57 for instruction: %r199 = sitofp <4 x i64> undef to <4 x float>
%r199 = sitofp <4 x i64> undef to <4 x float>
- ; CHECK: Found an estimated cost of 16 for instruction: %r200 = uitofp <4 x i1> undef to <4 x double>
+ ; CHECK: Found an estimated cost of 17 for instruction: %r200 = uitofp <4 x i1> undef to <4 x double>
%r200 = uitofp <4 x i1> undef to <4 x double>
- ; CHECK: Found an estimated cost of 16 for instruction: %r201 = sitofp <4 x i1> undef to <4 x double>
+ ; CHECK: Found an estimated cost of 17 for instruction: %r201 = sitofp <4 x i1> undef to <4 x double>
%r201 = sitofp <4 x i1> undef to <4 x double>
- ; CHECK: Found an estimated cost of 16 for instruction: %r202 = uitofp <4 x i8> undef to <4 x double>
+ ; CHECK: Found an estimated cost of 9 for instruction: %r202 = uitofp <4 x i8> undef to <4 x double>
%r202 = uitofp <4 x i8> undef to <4 x double>
- ; CHECK: Found an estimated cost of 16 for instruction: %r203 = sitofp <4 x i8> undef to <4 x double>
+ ; CHECK: Found an estimated cost of 9 for instruction: %r203 = sitofp <4 x i8> undef to <4 x double>
%r203 = sitofp <4 x i8> undef to <4 x double>
- ; CHECK: Found an estimated cost of 16 for instruction: %r204 = uitofp <4 x i16> undef to <4 x double>
+ ; CHECK: Found an estimated cost of 7 for instruction: %r204 = uitofp <4 x i16> undef to <4 x double>
%r204 = uitofp <4 x i16> undef to <4 x double>
- ; CHECK: Found an estimated cost of 16 for instruction: %r205 = sitofp <4 x i16> undef to <4 x double>
+ ; CHECK: Found an estimated cost of 7 for instruction: %r205 = sitofp <4 x i16> undef to <4 x double>
%r205 = sitofp <4 x i16> undef to <4 x double>
- ; CHECK: Found an estimated cost of 16 for instruction: %r206 = uitofp <4 x i32> undef to <4 x double>
+ ; CHECK: Found an estimated cost of 5 for instruction: %r206 = uitofp <4 x i32> undef to <4 x double>
%r206 = uitofp <4 x i32> undef to <4 x double>
- ; CHECK: Found an estimated cost of 16 for instruction: %r207 = sitofp <4 x i32> undef to <4 x double>
+ ; CHECK: Found an estimated cost of 5 for instruction: %r207 = sitofp <4 x i32> undef to <4 x double>
%r207 = sitofp <4 x i32> undef to <4 x double>
- ; CHECK: Found an estimated cost of 48 for instruction: %r208 = uitofp <4 x i64> undef to <4 x double>
+ ; CHECK: Found an estimated cost of 49 for instruction: %r208 = uitofp <4 x i64> undef to <4 x double>
%r208 = uitofp <4 x i64> undef to <4 x double>
- ; CHECK: Found an estimated cost of 48 for instruction: %r209 = sitofp <4 x i64> undef to <4 x double>
+ ; CHECK: Found an estimated cost of 49 for instruction: %r209 = sitofp <4 x i64> undef to <4 x double>
%r209 = sitofp <4 x i64> undef to <4 x double>
- ; CHECK: Found an estimated cost of 48 for instruction: %r210 = uitofp <8 x i1> undef to <8 x float>
+ ; CHECK: Found an estimated cost of 7 for instruction: %r210 = uitofp <8 x i1> undef to <8 x float>
%r210 = uitofp <8 x i1> undef to <8 x float>
- ; CHECK: Found an estimated cost of 48 for instruction: %r211 = sitofp <8 x i1> undef to <8 x float>
+ ; CHECK: Found an estimated cost of 7 for instruction: %r211 = sitofp <8 x i1> undef to <8 x float>
%r211 = sitofp <8 x i1> undef to <8 x float>
- ; CHECK: Found an estimated cost of 48 for instruction: %r212 = uitofp <8 x i8> undef to <8 x float>
+ ; CHECK: Found an estimated cost of 7 for instruction: %r212 = uitofp <8 x i8> undef to <8 x float>
%r212 = uitofp <8 x i8> undef to <8 x float>
- ; CHECK: Found an estimated cost of 48 for instruction: %r213 = sitofp <8 x i8> undef to <8 x float>
+ ; CHECK: Found an estimated cost of 7 for instruction: %r213 = sitofp <8 x i8> undef to <8 x float>
%r213 = sitofp <8 x i8> undef to <8 x float>
; CHECK: Found an estimated cost of 4 for instruction: %r214 = uitofp <8 x i16> undef to <8 x float>
%r214 = uitofp <8 x i16> undef to <8 x float>
@@ -474,39 +474,39 @@ define i32 @casts() {
%r216 = uitofp <8 x i32> undef to <8 x float>
; CHECK: Found an estimated cost of 2 for instruction: %r217 = sitofp <8 x i32> undef to <8 x float>
%r217 = sitofp <8 x i32> undef to <8 x float>
- ; CHECK: Found an estimated cost of 112 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float>
+ ; CHECK: Found an estimated cost of 115 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float>
%r218 = uitofp <8 x i64> undef to <8 x float>
- ; CHECK: Found an estimated cost of 112 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float>
+ ; CHECK: Found an estimated cost of 115 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float>
%r219 = sitofp <8 x i64> undef to <8 x float>
- ; CHECK: Found an estimated cost of 32 for instruction: %r220 = uitofp <8 x i1> undef to <8 x double>
+ ; CHECK: Found an estimated cost of 35 for instruction: %r220 = uitofp <8 x i1> undef to <8 x double>
%r220 = uitofp <8 x i1> undef to <8 x double>
- ; CHECK: Found an estimated cost of 32 for instruction: %r221 = sitofp <8 x i1> undef to <8 x double>
+ ; CHECK: Found an estimated cost of 35 for instruction: %r221 = sitofp <8 x i1> undef to <8 x double>
%r221 = sitofp <8 x i1> undef to <8 x double>
- ; CHECK: Found an estimated cost of 32 for instruction: %r222 = uitofp <8 x i8> undef to <8 x double>
+ ; CHECK: Found an estimated cost of 19 for instruction: %r222 = uitofp <8 x i8> undef to <8 x double>
%r222 = uitofp <8 x i8> undef to <8 x double>
- ; CHECK: Found an estimated cost of 32 for instruction: %r223 = sitofp <8 x i8> undef to <8 x double>
+ ; CHECK: Found an estimated cost of 19 for instruction: %r223 = sitofp <8 x i8> undef to <8 x double>
%r223 = sitofp <8 x i8> undef to <8 x double>
- ; CHECK: Found an estimated cost of 32 for instruction: %r224 = uitofp <8 x i16> undef to <8 x double>
+ ; CHECK: Found an estimated cost of 15 for instruction: %r224 = uitofp <8 x i16> undef to <8 x double>
%r224 = uitofp <8 x i16> undef to <8 x double>
- ; CHECK: Found an estimated cost of 32 for instruction: %r225 = sitofp <8 x i16> undef to <8 x double>
+ ; CHECK: Found an estimated cost of 15 for instruction: %r225 = sitofp <8 x i16> undef to <8 x double>
%r225 = sitofp <8 x i16> undef to <8 x double>
- ; CHECK: Found an estimated cost of 32 for instruction: %r226 = uitofp <8 x i16> undef to <8 x double>
+ ; CHECK: Found an estimated cost of 15 for instruction: %r226 = uitofp <8 x i16> undef to <8 x double>
%r226 = uitofp <8 x i16> undef to <8 x double>
- ; CHECK: Found an estimated cost of 32 for instruction: %r227 = sitofp <8 x i16> undef to <8 x double>
+ ; CHECK: Found an estimated cost of 15 for instruction: %r227 = sitofp <8 x i16> undef to <8 x double>
%r227 = sitofp <8 x i16> undef to <8 x double>
- ; CHECK: Found an estimated cost of 96 for instruction: %r228 = uitofp <8 x i64> undef to <8 x double>
+ ; CHECK: Found an estimated cost of 99 for instruction: %r228 = uitofp <8 x i64> undef to <8 x double>
%r228 = uitofp <8 x i64> undef to <8 x double>
- ; CHECK: Found an estimated cost of 96 for instruction: %r229 = sitofp <8 x i64> undef to <8 x double>
+ ; CHECK: Found an estimated cost of 99 for instruction: %r229 = sitofp <8 x i64> undef to <8 x double>
%r229 = sitofp <8 x i64> undef to <8 x double>
- ; CHECK: Found an estimated cost of 96 for instruction: %r230 = uitofp <16 x i1> undef to <16 x float>
+ ; CHECK: Found an estimated cost of 15 for instruction: %r230 = uitofp <16 x i1> undef to <16 x float>
%r230 = uitofp <16 x i1> undef to <16 x float>
- ; CHECK: Found an estimated cost of 96 for instruction: %r231 = sitofp <16 x i1> undef to <16 x float>
+ ; CHECK: Found an estimated cost of 15 for instruction: %r231 = sitofp <16 x i1> undef to <16 x float>
%r231 = sitofp <16 x i1> undef to <16 x float>
- ; CHECK: Found an estimated cost of 96 for instruction: %r232 = uitofp <16 x i8> undef to <16 x float>
+ ; CHECK: Found an estimated cost of 15 for instruction: %r232 = uitofp <16 x i8> undef to <16 x float>
%r232 = uitofp <16 x i8> undef to <16 x float>
- ; CHECK: Found an estimated cost of 96 for instruction: %r233 = sitofp <16 x i8> undef to <16 x float>
+ ; CHECK: Found an estimated cost of 15 for instruction: %r233 = sitofp <16 x i8> undef to <16 x float>
%r233 = sitofp <16 x i8> undef to <16 x float>
; CHECK: Found an estimated cost of 8 for instruction: %r234 = uitofp <16 x i16> undef to <16 x float>
%r234 = uitofp <16 x i16> undef to <16 x float>
@@ -516,30 +516,30 @@ define i32 @casts() {
%r236 = uitofp <16 x i32> undef to <16 x float>
; CHECK: Found an estimated cost of 4 for instruction: %r237 = sitofp <16 x i32> undef to <16 x float>
%r237 = sitofp <16 x i32> undef to <16 x float>
- ; CHECK: Found an estimated cost of 224 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float>
+ ; CHECK: Found an estimated cost of 231 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float>
%r238 = uitofp <16 x i64> undef to <16 x float>
- ; CHECK: Found an estimated cost of 224 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float>
+ ; CHECK: Found an estimated cost of 231 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float>
%r239 = sitofp <16 x i64> undef to <16 x float>
- ; CHECK: Found an estimated cost of 64 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double>
+ ; CHECK: Found an estimated cost of 71 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double>
%r240 = uitofp <16 x i1> undef to <16 x double>
- ; CHECK: Found an estimated cost of 64 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double>
+ ; CHECK: Found an estimated cost of 71 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double>
%r241 = sitofp <16 x i1> undef to <16 x double>
- ; CHECK: Found an estimated cost of 64 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double>
+ ; CHECK: Found an estimated cost of 39 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double>
%r242 = uitofp <16 x i8> undef to <16 x double>
- ; CHECK: Found an estimated cost of 64 for instruction: %r243 = sitofp <16 x i8> undef to <16 x double>
+ ; CHECK: Found an estimated cost of 39 for instruction: %r243 = sitofp <16 x i8> undef to <16 x double>
%r243 = sitofp <16 x i8> undef to <16 x double>
- ; CHECK: Found an estimated cost of 64 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double>
+ ; CHECK: Found an estimated cost of 31 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double>
%r244 = uitofp <16 x i16> undef to <16 x double>
- ; CHECK: Found an estimated cost of 64 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double>
+ ; CHECK: Found an estimated cost of 31 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double>
%r245 = sitofp <16 x i16> undef to <16 x double>
- ; CHECK: Found an estimated cost of 64 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double>
+ ; CHECK: Found an estimated cost of 31 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double>
%r246 = uitofp <16 x i16> undef to <16 x double>
- ; CHECK: Found an estimated cost of 64 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double>
+ ; CHECK: Found an estimated cost of 31 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double>
%r247 = sitofp <16 x i16> undef to <16 x double>
- ; CHECK: Found an estimated cost of 192 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double>
+ ; CHECK: Found an estimated cost of 199 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double>
%r248 = uitofp <16 x i64> undef to <16 x double>
- ; CHECK: Found an estimated cost of 192 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double>
+ ; CHECK: Found an estimated cost of 199 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double>
%r249 = sitofp <16 x i64> undef to <16 x double>
; CHECK: Found an estimated cost of 0 for instruction: ret i32 undef
diff --git a/llvm/test/Analysis/CostModel/PowerPC/ext.ll b/llvm/test/Analysis/CostModel/PowerPC/ext.ll
index 7d6a14e93cd..df9c53e6e57 100644
--- a/llvm/test/Analysis/CostModel/PowerPC/ext.ll
+++ b/llvm/test/Analysis/CostModel/PowerPC/ext.ll
@@ -13,7 +13,7 @@ define void @exts() {
; CHECK: cost of 1 {{.*}} sext
%v3 = sext <4 x i16> undef to <4 x i32>
- ; CHECK: cost of 112 {{.*}} sext
+ ; CHECK: cost of 3 {{.*}} sext
%v4 = sext <8 x i16> undef to <8 x i32>
ret void
diff --git a/llvm/test/Analysis/CostModel/X86/sitofp.ll b/llvm/test/Analysis/CostModel/X86/sitofp.ll
index 9f0c4065c17..d5fa0b96842 100644
--- a/llvm/test/Analysis/CostModel/X86/sitofp.ll
+++ b/llvm/test/Analysis/CostModel/X86/sitofp.ll
@@ -40,10 +40,10 @@ define <8 x double> @sitofpv8i8v8double(<8 x i8> %a) {
; SSE2: cost of 80 {{.*}} sitofp
;
; AVX1-LABEL: sitofpv8i8v8double
- ; AVX1: cost of 20 {{.*}} sitofp
+ ; AVX1: cost of 7 {{.*}} sitofp
;
; AVX2-LABEL: sitofpv8i8v8double
- ; AVX2: cost of 20 {{.*}} sitofp
+ ; AVX2: cost of 7 {{.*}} sitofp
;
; AVX512F-LABEL: sitofpv8i8v8double
; AVX512F: cost of 2 {{.*}} sitofp
@@ -56,13 +56,13 @@ define <16 x double> @sitofpv16i8v16double(<16 x i8> %a) {
; SSE2: cost of 160 {{.*}} sitofp
;
; AVX1-LABEL: sitofpv16i8v16double
- ; AVX1: cost of 40 {{.*}} sitofp
+ ; AVX1: cost of 15 {{.*}} sitofp
;
; AVX2-LABEL: sitofpv16i8v16double
- ; AVX2: cost of 40 {{.*}} sitofp
+ ; AVX2: cost of 15 {{.*}} sitofp
;
; AVX512F-LABEL: sitofpv16i8v16double
- ; AVX512F: cost of 44 {{.*}} sitofp
+ ; AVX512F: cost of 5 {{.*}} sitofp
%1 = sitofp <16 x i8> %a to <16 x double>
ret <16 x double> %1
}
@@ -72,13 +72,13 @@ define <32 x double> @sitofpv32i8v32double(<32 x i8> %a) {
; SSE2: cost of 320 {{.*}} sitofp
;
; AVX1-LABEL: sitofpv32i8v32double
- ; AVX1: cost of 80 {{.*}} sitofp
+ ; AVX1: cost of 31 {{.*}} sitofp
;
; AVX2-LABEL: sitofpv32i8v32double
- ; AVX2: cost of 80 {{.*}} sitofp
+ ; AVX2: cost of 31 {{.*}} sitofp
;
; AVX512F-LABEL: sitofpv32i8v32double
- ; AVX512F: cost of 88 {{.*}} sitofp
+ ; AVX512F: cost of 11 {{.*}} sitofp
%1 = sitofp <32 x i8> %a to <32 x double>
ret <32 x double> %1
}
@@ -120,10 +120,10 @@ define <8 x double> @sitofpv8i16v8double(<8 x i16> %a) {
; SSE2: cost of 80 {{.*}} sitofp
;
; AVX1-LABEL: sitofpv8i16v8double
- ; AVX1: cost of 20 {{.*}} sitofp
+ ; AVX1: cost of 7 {{.*}} sitofp
;
; AVX2-LABEL: sitofpv8i16v8double
- ; AVX2: cost of 20 {{.*}} sitofp
+ ; AVX2: cost of 7 {{.*}} sitofp
;
; AVX512F-LABEL: sitofpv8i16v8double
; AVX512F: cost of 2 {{.*}} sitofp
@@ -136,13 +136,13 @@ define <16 x double> @sitofpv16i16v16double(<16 x i16> %a) {
; SSE2: cost of 160 {{.*}} sitofp
;
; AVX1-LABEL: sitofpv16i16v16double
- ; AVX1: cost of 40 {{.*}} sitofp
+ ; AVX1: cost of 15 {{.*}} sitofp
;
; AVX2-LABEL: sitofpv16i16v16double
- ; AVX2: cost of 40 {{.*}} sitofp
+ ; AVX2: cost of 15 {{.*}} sitofp
;
; AVX512F-LABEL: sitofpv16i16v16double
- ; AVX512F: cost of 44 {{.*}} sitofp
+ ; AVX512F: cost of 5 {{.*}} sitofp
%1 = sitofp <16 x i16> %a to <16 x double>
ret <16 x double> %1
}
@@ -152,13 +152,13 @@ define <32 x double> @sitofpv32i16v32double(<32 x i16> %a) {
; SSE2: cost of 320 {{.*}} sitofp
;
; AVX1-LABEL: sitofpv32i16v32double
- ; AVX1: cost of 80 {{.*}} sitofp
+ ; AVX1: cost of 31 {{.*}} sitofp
;
; AVX2-LABEL: sitofpv32i16v32double
- ; AVX2: cost of 80 {{.*}} sitofp
+ ; AVX2: cost of 31 {{.*}} sitofp
;
; AVX512F-LABEL: sitofpv32i16v32double
- ; AVX512F: cost of 88 {{.*}} sitofp
+ ; AVX512F: cost of 11 {{.*}} sitofp
%1 = sitofp <32 x i16> %a to <32 x double>
ret <32 x double> %1
}
@@ -200,10 +200,10 @@ define <8 x double> @sitofpv8i32v8double(<8 x i32> %a) {
; SSE2: cost of 80 {{.*}} sitofp
;
; AVX1-LABEL: sitofpv8i32v8double
- ; AVX1: cost of 20 {{.*}} sitofp
+ ; AVX1: cost of 3 {{.*}} sitofp
;
; AVX2-LABEL: sitofpv8i32v8double
- ; AVX2: cost of 20 {{.*}} sitofp
+ ; AVX2: cost of 3 {{.*}} sitofp
;
; AVX512F-LABEL: sitofpv8i32v8double
; AVX512F: cost of 1 {{.*}} sitofp
@@ -216,13 +216,13 @@ define <16 x double> @sitofpv16i32v16double(<16 x i32> %a) {
; SSE2: cost of 160 {{.*}} sitofp
;
; AVX1-LABEL: sitofpv16i32v16double
- ; AVX1: cost of 40 {{.*}} sitofp
+ ; AVX1: cost of 7 {{.*}} sitofp
;
; AVX2-LABEL: sitofpv16i32v16double
- ; AVX2: cost of 40 {{.*}} sitofp
+ ; AVX2: cost of 7 {{.*}} sitofp
;
; AVX512F-LABEL: sitofpv16i32v16double
- ; AVX512F: cost of 44 {{.*}} sitofp
+ ; AVX512F: cost of 3 {{.*}} sitofp
%1 = sitofp <16 x i32> %a to <16 x double>
ret <16 x double> %1
}
@@ -232,13 +232,13 @@ define <32 x double> @sitofpv32i32v32double(<32 x i32> %a) {
; SSE2: cost of 320 {{.*}} sitofp
;
; AVX1-LABEL: sitofpv32i32v32double
- ; AVX1: cost of 80 {{.*}} sitofp
+ ; AVX1: cost of 15 {{.*}} sitofp
;
; AVX2-LABEL: sitofpv32i32v32double
- ; AVX2: cost of 80 {{.*}} sitofp
+ ; AVX2: cost of 15 {{.*}} sitofp
;
; AVX512F-LABEL: sitofpv32i32v32double
- ; AVX512F: cost of 88 {{.*}} sitofp
+ ; AVX512F: cost of 7 {{.*}} sitofp
%1 = sitofp <32 x i32> %a to <32 x double>
ret <32 x double> %1
}
@@ -280,10 +280,10 @@ define <8 x double> @sitofpv8i64v8double(<8 x i64> %a) {
; SSE2: cost of 80 {{.*}} sitofp
;
; AVX1-LABEL: sitofpv8i64v8double
- ; AVX1: cost of 20 {{.*}} sitofp
+ ; AVX1: cost of 21 {{.*}} sitofp
;
; AVX2-LABEL: sitofpv8i64v8double
- ; AVX2: cost of 20 {{.*}} sitofp
+ ; AVX2: cost of 21 {{.*}} sitofp
;
; AVX512F-LABEL: sitofpv8i64v8double
; AVX512F: cost of 22 {{.*}} sitofp
@@ -296,13 +296,13 @@ define <16 x double> @sitofpv16i64v16double(<16 x i64> %a) {
; SSE2: cost of 160 {{.*}} sitofp
;
; AVX1-LABEL: sitofpv16i64v16double
- ; AVX1: cost of 40 {{.*}} sitofp
+ ; AVX1: cost of 43 {{.*}} sitofp
;
; AVX2-LABEL: sitofpv16i64v16double
- ; AVX2: cost of 40 {{.*}} sitofp
+ ; AVX2: cost of 43 {{.*}} sitofp
;
; AVX512F-LABEL: sitofpv16i64v16double
- ; AVX512F: cost of 44 {{.*}} sitofp
+ ; AVX512F: cost of 45 {{.*}} sitofp
%1 = sitofp <16 x i64> %a to <16 x double>
ret <16 x double> %1
}
@@ -312,13 +312,13 @@ define <32 x double> @sitofpv32i64v32double(<32 x i64> %a) {
; SSE2: cost of 320 {{.*}} sitofp
;
; AVX1-LABEL: sitofpv32i64v32double
- ; AVX1: cost of 80 {{.*}} sitofp
+ ; AVX1: cost of 87 {{.*}} sitofp
;
; AVX2-LABEL: sitofpv32i64v32double
- ; AVX2: cost of 80 {{.*}} sitofp
+ ; AVX2: cost of 87 {{.*}} sitofp
;
; AVX512F-LABEL: sitofpv32i64v32double
- ; AVX512F: cost of 88 {{.*}} sitofp
+ ; AVX512F: cost of 91 {{.*}} sitofp
%1 = sitofp <32 x i64> %a to <32 x double>
ret <32 x double> %1
}
@@ -376,10 +376,10 @@ define <16 x float> @sitofpv16i8v16float(<16 x i8> %a) {
; SSE2: cost of 8 {{.*}} sitofp
;
; AVX1-LABEL: sitofpv16i8v16float
- ; AVX1: cost of 44 {{.*}} sitofp
+ ; AVX1: cost of 17 {{.*}} sitofp
;
; AVX2-LABEL: sitofpv16i8v16float
- ; AVX2: cost of 44 {{.*}} sitofp
+ ; AVX2: cost of 17 {{.*}} sitofp
;
; AVX512F-LABEL: sitofpv16i8v16float
; AVX512F: cost of 2 {{.*}} sitofp
@@ -392,13 +392,13 @@ define <32 x float> @sitofpv32i8v32float(<32 x i8> %a) {
; SSE2: cost of 16 {{.*}} sitofp
;
; AVX1-LABEL: sitofpv32i8v32float
- ; AVX1: cost of 88 {{.*}} sitofp
+ ; AVX1: cost of 35 {{.*}} sitofp
;
; AVX2-LABEL: sitofpv32i8v32float
- ; AVX2: cost of 88 {{.*}} sitofp
+ ; AVX2: cost of 35 {{.*}} sitofp
;
; AVX512F-LABEL: sitofpv32i8v32float
- ; AVX512F: cost of 92 {{.*}} sitofp
+ ; AVX512F: cost of 5 {{.*}} sitofp
%1 = sitofp <32 x i8> %a to <32 x float>
ret <32 x float> %1
}
@@ -456,10 +456,10 @@ define <16 x float> @sitofpv16i16v16float(<16 x i16> %a) {
; SSE2: cost of 30 {{.*}} sitofp
;
; AVX1-LABEL: sitofpv16i16v16float
- ; AVX1: cost of 44 {{.*}} sitofp
+ ; AVX1: cost of 11 {{.*}} sitofp
;
; AVX2-LABEL: sitofpv16i16v16float
- ; AVX2: cost of 44 {{.*}} sitofp
+ ; AVX2: cost of 11 {{.*}} sitofp
;
; AVX512F-LABEL: sitofpv16i16v16float
; AVX512F: cost of 2 {{.*}} sitofp
@@ -472,13 +472,13 @@ define <32 x float> @sitofpv32i16v32float(<32 x i16> %a) {
; SSE2: cost of 60 {{.*}} sitofp
;
; AVX1-LABEL: sitofpv32i16v32float
- ; AVX1: cost of 88 {{.*}} sitofp
+ ; AVX1: cost of 23 {{.*}} sitofp
;
; AVX2-LABEL: sitofpv32i16v32float
- ; AVX2: cost of 88 {{.*}} sitofp
+ ; AVX2: cost of 23 {{.*}} sitofp
;
; AVX512F-LABEL: sitofpv32i16v32float
- ; AVX512F: cost of 92 {{.*}} sitofp
+ ; AVX512F: cost of 5 {{.*}} sitofp
%1 = sitofp <32 x i16> %a to <32 x float>
ret <32 x float> %1
}
@@ -536,10 +536,10 @@ define <16 x float> @sitofpv16i32v16float(<16 x i32> %a) {
; SSE2: cost of 60 {{.*}} sitofp
;
; AVX1-LABEL: sitofpv16i32v16float
- ; AVX1: cost of 44 {{.*}} sitofp
+ ; AVX1: cost of 3 {{.*}} sitofp
;
; AVX2-LABEL: sitofpv16i32v16float
- ; AVX2: cost of 44 {{.*}} sitofp
+ ; AVX2: cost of 3 {{.*}} sitofp
;
; AVX512F-LABEL: sitofpv16i32v16float
; AVX512F: cost of 1 {{.*}} sitofp
@@ -552,13 +552,13 @@ define <32 x float> @sitofpv32i32v32float(<32 x i32> %a) {
; SSE2: cost of 120 {{.*}} sitofp
;
; AVX1-LABEL: sitofpv32i32v32float
- ; AVX1: cost of 88 {{.*}} sitofp
+ ; AVX1: cost of 7 {{.*}} sitofp
;
; AVX2-LABEL: sitofpv32i32v32float
- ; AVX2: cost of 88 {{.*}} sitofp
+ ; AVX2: cost of 7 {{.*}} sitofp
;
; AVX512F-LABEL: sitofpv32i32v32float
- ; AVX512F: cost of 92 {{.*}} sitofp
+ ; AVX512F: cost of 3 {{.*}} sitofp
%1 = sitofp <32 x i32> %a to <32 x float>
ret <32 x float> %1
}
@@ -600,10 +600,10 @@ define <8 x float> @sitofpv8i64v8float(<8 x i64> %a) {
; SSE2: cost of 60 {{.*}} sitofp
;
; AVX1-LABEL: sitofpv8i64v8float
- ; AVX1: cost of 22 {{.*}} sitofp
+ ; AVX1: cost of 21 {{.*}} sitofp
;
; AVX2-LABEL: sitofpv8i64v8float
- ; AVX2: cost of 22 {{.*}} sitofp
+ ; AVX2: cost of 21 {{.*}} sitofp
;
; AVX512F-LABEL: sitofpv8i64v8float
; AVX512F: cost of 22 {{.*}} sitofp
@@ -616,13 +616,13 @@ define <16 x float> @sitofpv16i64v16float(<16 x i64> %a) {
; SSE2: cost of 120 {{.*}} sitofp
;
; AVX1-LABEL: sitofpv16i64v16float
- ; AVX1: cost of 44 {{.*}} sitofp
+ ; AVX1: cost of 43 {{.*}} sitofp
;
; AVX2-LABEL: sitofpv16i64v16float
- ; AVX2: cost of 44 {{.*}} sitofp
+ ; AVX2: cost of 43 {{.*}} sitofp
;
; AVX512F-LABEL: sitofpv16i64v16float
- ; AVX512F: cost of 46 {{.*}} sitofp
+ ; AVX512F: cost of 45 {{.*}} sitofp
%1 = sitofp <16 x i64> %a to <16 x float>
ret <16 x float> %1
}
@@ -632,13 +632,13 @@ define <32 x float> @sitofpv32i64v32float(<32 x i64> %a) {
; SSE2: cost of 240 {{.*}} sitofp
;
; AVX1-LABEL: sitofpv32i64v32float
- ; AVX1: cost of 88 {{.*}} sitofp
+ ; AVX1: cost of 87 {{.*}} sitofp
;
; AVX2-LABEL: sitofpv32i64v32float
- ; AVX2: cost of 88 {{.*}} sitofp
+ ; AVX2: cost of 87 {{.*}} sitofp
;
; AVX512F-LABEL: sitofpv32i64v32float
- ; AVX512F: cost of 92 {{.*}} sitofp
+ ; AVX512F: cost of 91 {{.*}} sitofp
%1 = sitofp <32 x i64> %a to <32 x float>
ret <32 x float> %1
}
@@ -648,10 +648,10 @@ define <8 x double> @sitofpv8i1v8double(<8 x double> %a) {
; SSE2: cost of 80 {{.*}} sitofp
;
; AVX1-LABEL: sitofpv8i1v8double
- ; AVX1: cost of 20 {{.*}} sitofp
+ ; AVX1: cost of 7 {{.*}} sitofp
;
; AVX2-LABEL: sitofpv8i1v8double
- ; AVX2: cost of 20 {{.*}} sitofp
+ ; AVX2: cost of 7 {{.*}} sitofp
;
; AVX512F-LABEL: sitofpv8i1v8double
; AVX512F: cost of 4 {{.*}} sitofp
@@ -665,10 +665,10 @@ define <16 x float> @sitofpv16i1v16float(<16 x float> %a) {
; SSE2: cost of 8 {{.*}} sitofp
;
; AVX1-LABEL: sitofpv16i1v16float
- ; AVX1: cost of 44 {{.*}} sitofp
+ ; AVX1: cost of 17 {{.*}} sitofp
;
; AVX2-LABEL: sitofpv16i1v16float
- ; AVX2: cost of 44 {{.*}} sitofp
+ ; AVX2: cost of 17 {{.*}} sitofp
;
; AVX512F-LABEL: sitofpv16i1v16float
; AVX512F: cost of 3 {{.*}} sitofp
diff --git a/llvm/test/Analysis/CostModel/X86/uitofp.ll b/llvm/test/Analysis/CostModel/X86/uitofp.ll
index 08e36650bec..35f5d15ccab 100644
--- a/llvm/test/Analysis/CostModel/X86/uitofp.ll
+++ b/llvm/test/Analysis/CostModel/X86/uitofp.ll
@@ -41,10 +41,10 @@ define <8 x double> @uitofpv8i8v8double(<8 x i8> %a) {
; SSE2: cost of 80 {{.*}} uitofp
;
; AVX1-LABEL: uitofpv8i8v8double
- ; AVX1: cost of 20 {{.*}} uitofp
+ ; AVX1: cost of 5 {{.*}} uitofp
;
; AVX2-LABEL: uitofpv8i8v8double
- ; AVX2: cost of 20 {{.*}} uitofp
+ ; AVX2: cost of 5 {{.*}} uitofp
;
; AVX512F-LABEL: uitofpv8i8v8double
; AVX512F: cost of 2 {{.*}} uitofp
@@ -57,13 +57,13 @@ define <16 x double> @uitofpv16i8v16double(<16 x i8> %a) {
; SSE2: cost of 160 {{.*}} uitofp
;
; AVX1-LABEL: uitofpv16i8v16double
- ; AVX1: cost of 40 {{.*}} uitofp
+ ; AVX1: cost of 11 {{.*}} uitofp
;
; AVX2-LABEL: uitofpv16i8v16double
- ; AVX2: cost of 40 {{.*}} uitofp
+ ; AVX2: cost of 11 {{.*}} uitofp
;
; AVX512F-LABEL: uitofpv16i8v16double
- ; AVX512F: cost of 44 {{.*}} uitofp
+ ; AVX512F: cost of 5 {{.*}} uitofp
%1 = uitofp <16 x i8> %a to <16 x double>
ret <16 x double> %1
}
@@ -73,13 +73,13 @@ define <32 x double> @uitofpv32i8v32double(<32 x i8> %a) {
; SSE2: cost of 320 {{.*}} uitofp
;
; AVX1-LABEL: uitofpv32i8v32double
- ; AVX1: cost of 80 {{.*}} uitofp
+ ; AVX1: cost of 23 {{.*}} uitofp
;
; AVX2-LABEL: uitofpv32i8v32double
- ; AVX2: cost of 80 {{.*}} uitofp
+ ; AVX2: cost of 23 {{.*}} uitofp
;
; AVX512F-LABEL: uitofpv32i8v32double
- ; AVX512F: cost of 88 {{.*}} uitofp
+ ; AVX512F: cost of 11 {{.*}} uitofp
%1 = uitofp <32 x i8> %a to <32 x double>
ret <32 x double> %1
}
@@ -121,10 +121,10 @@ define <8 x double> @uitofpv8i16v8double(<8 x i16> %a) {
; SSE2: cost of 80 {{.*}} uitofp
;
; AVX1-LABEL: uitofpv8i16v8double
- ; AVX1: cost of 20 {{.*}} uitofp
+ ; AVX1: cost of 5 {{.*}} uitofp
;
; AVX2-LABEL: uitofpv8i16v8double
- ; AVX2: cost of 20 {{.*}} uitofp
+ ; AVX2: cost of 5 {{.*}} uitofp
;
; AVX512F-LABEL: uitofpv8i16v8double
; AVX512F: cost of 2 {{.*}} uitofp
@@ -137,13 +137,13 @@ define <16 x double> @uitofpv16i16v16double(<16 x i16> %a) {
; SSE2: cost of 160 {{.*}} uitofp
;
; AVX1-LABEL: uitofpv16i16v16double
- ; AVX1: cost of 40 {{.*}} uitofp
+ ; AVX1: cost of 11 {{.*}} uitofp
;
; AVX2-LABEL: uitofpv16i16v16double
- ; AVX2: cost of 40 {{.*}} uitofp
+ ; AVX2: cost of 11 {{.*}} uitofp
;
; AVX512F-LABEL: uitofpv16i16v16double
- ; AVX512F: cost of 44 {{.*}} uitofp
+ ; AVX512F: cost of 5 {{.*}} uitofp
%1 = uitofp <16 x i16> %a to <16 x double>
ret <16 x double> %1
}
@@ -153,13 +153,13 @@ define <32 x double> @uitofpv32i16v32double(<32 x i16> %a) {
; SSE2: cost of 320 {{.*}} uitofp
;
; AVX1-LABEL: uitofpv32i16v32double
- ; AVX1: cost of 80 {{.*}} uitofp
+ ; AVX1: cost of 23 {{.*}} uitofp
;
; AVX2-LABEL: uitofpv32i16v32double
- ; AVX2: cost of 80 {{.*}} uitofp
+ ; AVX2: cost of 23 {{.*}} uitofp
;
; AVX512F-LABEL: uitofpv32i16v32double
- ; AVX512F: cost of 88 {{.*}} uitofp
+ ; AVX512F: cost of 11 {{.*}} uitofp
%1 = uitofp <32 x i16> %a to <32 x double>
ret <32 x double> %1
}
@@ -201,10 +201,10 @@ define <8 x double> @uitofpv8i32v8double(<8 x i32> %a) {
; SSE2: cost of 80 {{.*}} uitofp
;
; AVX1-LABEL: uitofpv8i32v8double
- ; AVX1: cost of 20 {{.*}} uitofp
+ ; AVX1: cost of 13 {{.*}} uitofp
;
; AVX2-LABEL: uitofpv8i32v8double
- ; AVX2: cost of 20 {{.*}} uitofp
+ ; AVX2: cost of 13 {{.*}} uitofp
;
; AVX512F-LABEL: uitofpv8i32v8double
; AVX512F: cost of 1 {{.*}} uitofp
@@ -217,13 +217,13 @@ define <16 x double> @uitofpv16i32v16double(<16 x i32> %a) {
; SSE2: cost of 160 {{.*}} uitofp
;
; AVX1-LABEL: uitofpv16i32v16double
- ; AVX1: cost of 40 {{.*}} uitofp
+ ; AVX1: cost of 27 {{.*}} uitofp
;
; AVX2-LABEL: uitofpv16i32v16double
- ; AVX2: cost of 40 {{.*}} uitofp
+ ; AVX2: cost of 27 {{.*}} uitofp
;
; AVX512F-LABEL: uitofpv16i32v16double
- ; AVX512F: cost of 44 {{.*}} uitofp
+ ; AVX512F: cost of 3 {{.*}} uitofp
%1 = uitofp <16 x i32> %a to <16 x double>
ret <16 x double> %1
}
@@ -233,13 +233,13 @@ define <32 x double> @uitofpv32i32v32double(<32 x i32> %a) {
; SSE2: cost of 320 {{.*}} uitofp
;
; AVX1-LABEL: uitofpv32i32v32double
- ; AVX1: cost of 80 {{.*}} uitofp
+ ; AVX1: cost of 55 {{.*}} uitofp
;
; AVX2-LABEL: uitofpv32i32v32double
- ; AVX2: cost of 80 {{.*}} uitofp
+ ; AVX2: cost of 55 {{.*}} uitofp
;
; AVX512F-LABEL: uitofpv32i32v32double
- ; AVX512F: cost of 88 {{.*}} uitofp
+ ; AVX512F: cost of 7 {{.*}} uitofp
%1 = uitofp <32 x i32> %a to <32 x double>
ret <32 x double> %1
}
@@ -257,7 +257,7 @@ define <2 x double> @uitofpv2i64v2double(<2 x i64> %a) {
; AVX512F-LABEL: uitofpv2i64v2double
; AVX512F: cost of 5 {{.*}} uitofp
;
- ; AVX512DQ: uitofpv2i64v2double
+ ; AVX512DQ-LABEL: uitofpv2i64v2double
; AVX512DQ: cost of 1 {{.*}} uitofp
%1 = uitofp <2 x i64> %a to <2 x double>
ret <2 x double> %1
@@ -276,7 +276,7 @@ define <4 x double> @uitofpv4i64v4double(<4 x i64> %a) {
; AVX512F-LABEL: uitofpv4i64v4double
; AVX512F: cost of 12 {{.*}} uitofp
;
- ; AVX512DQ: uitofpv4i64v4double
+ ; AVX512DQ-LABEL: uitofpv4i64v4double
; AVX512DQ: cost of 1 {{.*}} uitofp
%1 = uitofp <4 x i64> %a to <4 x double>
ret <4 x double> %1
@@ -287,15 +287,15 @@ define <8 x double> @uitofpv8i64v8double(<8 x i64> %a) {
; SSE2: cost of 80 {{.*}} uitofp
;
; AVX1-LABEL: uitofpv8i64v8double
- ; AVX1: cost of 20 {{.*}} uitofp
+ ; AVX1: cost of 81 {{.*}} uitofp
;
; AVX2-LABEL: uitofpv8i64v8double
- ; AVX2: cost of 20 {{.*}} uitofp
+ ; AVX2: cost of 81 {{.*}} uitofp
;
; AVX512F-LABEL: uitofpv8i64v8double
; AVX512F: cost of 26 {{.*}} uitofp
;
- ; AVX512DQ: uitofpv8i64v8double
+ ; AVX512DQ-LABEL: uitofpv8i64v8double
; AVX512DQ: cost of 1 {{.*}} uitofp
%1 = uitofp <8 x i64> %a to <8 x double>
ret <8 x double> %1
@@ -306,16 +306,16 @@ define <16 x double> @uitofpv16i64v16double(<16 x i64> %a) {
; SSE2: cost of 160 {{.*}} uitofp
;
; AVX1-LABEL: uitofpv16i64v16double
- ; AVX1: cost of 40 {{.*}} uitofp
+ ; AVX1: cost of 163 {{.*}} uitofp
;
; AVX2-LABEL: uitofpv16i64v16double
- ; AVX2: cost of 40 {{.*}} uitofp
+ ; AVX2: cost of 163 {{.*}} uitofp
;
; AVX512F-LABEL: uitofpv16i64v16double
- ; AVX512F: cost of 44 {{.*}} uitofp
+ ; AVX512F: cost of 53 {{.*}} uitofp
;
- ; AVX512DQ: uitofpv16i64v16double
- ; AVX512DQ: cost of 44 {{.*}} uitofp
+ ; AVX512DQ-LABEL: uitofpv16i64v16double
+ ; AVX512DQ: cost of 3 {{.*}} uitofp
%1 = uitofp <16 x i64> %a to <16 x double>
ret <16 x double> %1
}
@@ -325,16 +325,16 @@ define <32 x double> @uitofpv32i64v32double(<32 x i64> %a) {
; SSE2: cost of 320 {{.*}} uitofp
;
; AVX1-LABEL: uitofpv32i64v32double
- ; AVX1: cost of 80 {{.*}} uitofp
+ ; AVX1: cost of 327 {{.*}} uitofp
;
; AVX2-LABEL: uitofpv32i64v32double
- ; AVX2: cost of 80 {{.*}} uitofp
+ ; AVX2: cost of 327 {{.*}} uitofp
;
; AVX512F-LABEL: uitofpv32i64v32double
- ; AVX512F: cost of 88 {{.*}} uitofp
+ ; AVX512F: cost of 107 {{.*}} uitofp
;
- ; AVX512DQ: uitofpv32i64v32double
- ; AVX512DQ: cost of 88 {{.*}} uitofp
+ ; AVX512DQ-LABEL: uitofpv32i64v32double
+ ; AVX512DQ: cost of 2 {{.*}} uitofp
%1 = uitofp <32 x i64> %a to <32 x double>
ret <32 x double> %1
}
@@ -392,10 +392,10 @@ define <16 x float> @uitofpv16i8v16float(<16 x i8> %a) {
; SSE2: cost of 8 {{.*}} uitofp
;
; AVX1-LABEL: uitofpv16i8v16float
- ; AVX1: cost of 44 {{.*}} uitofp
+ ; AVX1: cost of 11 {{.*}} uitofp
;
; AVX2-LABEL: uitofpv16i8v16float
- ; AVX2: cost of 44 {{.*}} uitofp
+ ; AVX2: cost of 11 {{.*}} uitofp
;
; AVX512F-LABEL: uitofpv16i8v16float
; AVX512F: cost of 2 {{.*}} uitofp
@@ -408,13 +408,13 @@ define <32 x float> @uitofpv32i8v32float(<32 x i8> %a) {
; SSE2: cost of 16 {{.*}} uitofp
;
; AVX1-LABEL: uitofpv32i8v32float
- ; AVX1: cost of 88 {{.*}} uitofp
+ ; AVX1: cost of 23 {{.*}} uitofp
;
; AVX2-LABEL: uitofpv32i8v32float
- ; AVX2: cost of 88 {{.*}} uitofp
+ ; AVX2: cost of 23 {{.*}} uitofp
;
; AVX512F-LABEL: uitofpv32i8v32float
- ; AVX512F: cost of 92 {{.*}} uitofp
+ ; AVX512F: cost of 5 {{.*}} uitofp
%1 = uitofp <32 x i8> %a to <32 x float>
ret <32 x float> %1
}
@@ -472,10 +472,10 @@ define <16 x float> @uitofpv16i16v16float(<16 x i16> %a) {
; SSE2: cost of 30 {{.*}} uitofp
;
; AVX1-LABEL: uitofpv16i16v16float
- ; AVX1: cost of 44 {{.*}} uitofp
+ ; AVX1: cost of 11 {{.*}} uitofp
;
; AVX2-LABEL: uitofpv16i16v16float
- ; AVX2: cost of 44 {{.*}} uitofp
+ ; AVX2: cost of 11 {{.*}} uitofp
;
; AVX512F-LABEL: uitofpv16i16v16float
; AVX512F: cost of 2 {{.*}} uitofp
@@ -488,13 +488,13 @@ define <32 x float> @uitofpv32i16v32float(<32 x i16> %a) {
; SSE2: cost of 60 {{.*}} uitofp
;
; AVX1-LABEL: uitofpv32i16v32float
- ; AVX1: cost of 88 {{.*}} uitofp
+ ; AVX1: cost of 23 {{.*}} uitofp
;
; AVX2-LABEL: uitofpv32i16v32float
- ; AVX2: cost of 88 {{.*}} uitofp
+ ; AVX2: cost of 23 {{.*}} uitofp
;
; AVX512F-LABEL: uitofpv32i16v32float
- ; AVX512F: cost of 92 {{.*}} uitofp
+ ; AVX512F: cost of 5 {{.*}} uitofp
%1 = uitofp <32 x i16> %a to <32 x float>
ret <32 x float> %1
}
@@ -552,10 +552,10 @@ define <16 x float> @uitofpv16i32v16float(<16 x i32> %a) {
; SSE2: cost of 32 {{.*}} uitofp
;
; AVX1-LABEL: uitofpv16i32v16float
- ; AVX1: cost of 44 {{.*}} uitofp
+ ; AVX1: cost of 19 {{.*}} uitofp
;
; AVX2-LABEL: uitofpv16i32v16float
- ; AVX2: cost of 44 {{.*}} uitofp
+ ; AVX2: cost of 17 {{.*}} uitofp
;
; AVX512F-LABEL: uitofpv16i32v16float
; AVX512F: cost of 1 {{.*}} uitofp
@@ -568,13 +568,13 @@ define <32 x float> @uitofpv32i32v32float(<32 x i32> %a) {
; SSE2: cost of 64 {{.*}} uitofp
;
; AVX1-LABEL: uitofpv32i32v32float
- ; AVX1: cost of 88 {{.*}} uitofp
+ ; AVX1: cost of 39 {{.*}} uitofp
;
; AVX2-LABEL: uitofpv32i32v32float
- ; AVX2: cost of 88 {{.*}} uitofp
+ ; AVX2: cost of 35 {{.*}} uitofp
;
; AVX512F-LABEL: uitofpv32i32v32float
- ; AVX512F: cost of 92 {{.*}} uitofp
+ ; AVX512F: cost of 3 {{.*}} uitofp
%1 = uitofp <32 x i32> %a to <32 x float>
ret <32 x float> %1
}
@@ -616,10 +616,10 @@ define <8 x float> @uitofpv8i64v8float(<8 x i64> %a) {
; SSE2: cost of 60 {{.*}} uitofp
;
; AVX1-LABEL: uitofpv8i64v8float
- ; AVX1: cost of 22 {{.*}} uitofp
+ ; AVX1: cost of 21 {{.*}} uitofp
;
; AVX2-LABEL: uitofpv8i64v8float
- ; AVX2: cost of 22 {{.*}} uitofp
+ ; AVX2: cost of 21 {{.*}} uitofp
;
; AVX512F-LABEL: uitofpv8i64v8float
; AVX512F: cost of 22 {{.*}} uitofp
@@ -632,13 +632,13 @@ define <16 x float> @uitofpv16i64v16float(<16 x i64> %a) {
; SSE2: cost of 120 {{.*}} uitofp
;
; AVX1-LABEL: uitofpv16i64v16float
- ; AVX1: cost of 44 {{.*}} uitofp
+ ; AVX1: cost of 43 {{.*}} uitofp
;
; AVX2-LABEL: uitofpv16i64v16float
- ; AVX2: cost of 44 {{.*}} uitofp
+ ; AVX2: cost of 43 {{.*}} uitofp
;
; AVX512F-LABEL: uitofpv16i64v16float
- ; AVX512F: cost of 46 {{.*}} uitofp
+ ; AVX512F: cost of 45 {{.*}} uitofp
%1 = uitofp <16 x i64> %a to <16 x float>
ret <16 x float> %1
}
@@ -648,13 +648,13 @@ define <32 x float> @uitofpv32i64v32float(<32 x i64> %a) {
; SSE2: cost of 240 {{.*}} uitofp
;
; AVX1-LABEL: uitofpv32i64v32float
- ; AVX1: cost of 88 {{.*}} uitofp
+ ; AVX1: cost of 87 {{.*}} uitofp
;
; AVX2-LABEL: uitofpv32i64v32float
- ; AVX2: cost of 88 {{.*}} uitofp
+ ; AVX2: cost of 87 {{.*}} uitofp
;
; AVX512F-LABEL: uitofpv32i64v32float
- ; AVX512F: cost of 92 {{.*}} uitofp
+ ; AVX512F: cost of 91 {{.*}} uitofp
%1 = uitofp <32 x i64> %a to <32 x float>
ret <32 x float> %1
}
diff --git a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll
index 23e363eae02..ec7ad1183ee 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll
@@ -17,9 +17,9 @@ target triple = "x86_64-pc_linux"
;}
;AVX512-LABEL: @foo1
-;AVX512: llvm.masked.load.v8i32
-;AVX512: llvm.masked.gather.v8f32
-;AVX512: llvm.masked.store.v8f32
+;AVX512: llvm.masked.load.v16i32
+;AVX512: llvm.masked.gather.v16f32
+;AVX512: llvm.masked.store.v16f32
;AVX512: ret void
; Function Attrs: nounwind uwtable
OpenPOWER on IntegriCloud