diff options
-rw-r--r-- | llvm/lib/CodeGen/BasicTargetTransformInfo.cpp | 26 | ||||
-rw-r--r-- | llvm/test/Analysis/CostModel/PowerPC/load_store.ll | 5 |
2 files changed, 29 insertions, 2 deletions
diff --git a/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp b/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp index 25486cf3919..c6654ec22c1 100644 --- a/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp +++ b/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -416,8 +416,30 @@ unsigned BasicTTI::getMemoryOpCost(unsigned Opcode, Type *Src, assert(!Src->isVoidTy() && "Invalid type"); std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(Src); - // Assume that all loads of legal types cost 1. - return LT.first; + // Assuming that all loads of legal types cost 1. + unsigned Cost = LT.first; + + if (Src->isVectorTy() && + Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()) { + // This is a vector load that legalizes to a larger type than the vector + // itself. Unless the corresponding extending load or truncating store is + // legal, then this will scalarize. + TargetLowering::LegalizeAction LA; + MVT MemVT = getTLI()->getSimpleValueType(Src, true); + if (Opcode == Instruction::Store) + LA = getTLI()->getTruncStoreAction(LT.second, MemVT); + else + LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, MemVT); + + if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) { + // This is a vector load/store for some illegal type that is scalarized. + // We must account for the cost of building or decomposing the vector. + Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store, + Opcode == Instruction::Store); + } + } + + return Cost; } unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, diff --git a/llvm/test/Analysis/CostModel/PowerPC/load_store.ll b/llvm/test/Analysis/CostModel/PowerPC/load_store.ll index c77cce955ab..8145a1dc715 100644 --- a/llvm/test/Analysis/CostModel/PowerPC/load_store.ll +++ b/llvm/test/Analysis/CostModel/PowerPC/load_store.ll @@ -29,6 +29,11 @@ define i32 @loads(i32 %arg) { ; CHECK: cost of 4 {{.*}} load load i128* undef, align 4 + ; FIXME: There actually are sub-vector Altivec loads, and so we could handle + ; this with a small expense, but we don't currently. + ; CHECK: cost of 60 {{.*}} load + load <4 x i16>* undef, align 2 + ret i32 undef } |