2 files changed, 29 insertions, 2 deletions
diff --git a/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp b/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp
index 25486cf3919..c6654ec22c1 100644
--- a/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp
+++ b/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -416,8 +416,30 @@ unsigned BasicTTI::getMemoryOpCost(unsigned Opcode, Type *Src,
   assert(!Src->isVoidTy() && "Invalid type");
   std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(Src);
 
-  // Assume that all loads of legal types cost 1.
-  return LT.first;
+  // Assuming that all loads of legal types cost 1.
+  unsigned Cost = LT.first;
+
+  if (Src->isVectorTy() &&
+      Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()) {
+    // This is a vector load that legalizes to a larger type than the vector
+    // itself. Unless the corresponding extending load or truncating store is
+    // legal, then this will scalarize.
+    TargetLowering::LegalizeAction LA;
+    MVT MemVT = getTLI()->getSimpleValueType(Src, true);
+    if (Opcode == Instruction::Store)
+      LA = getTLI()->getTruncStoreAction(LT.second, MemVT);
+    else
+      LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, MemVT);
+
+    if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) {
+      // This is a vector load/store for some illegal type that is scalarized.
+      // We must account for the cost of building or decomposing the vector.
+      Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store,
+                                            Opcode == Instruction::Store);
+    }
+  }
+
+  return Cost;
 }
 
 unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
diff --git a/llvm/test/Analysis/CostModel/PowerPC/load_store.ll b/llvm/test/Analysis/CostModel/PowerPC/load_store.ll
index c77cce955ab..8145a1dc715 100644
--- a/llvm/test/Analysis/CostModel/PowerPC/load_store.ll
+++ b/llvm/test/Analysis/CostModel/PowerPC/load_store.ll
@@ -29,6 +29,11 @@ define i32 @loads(i32 %arg) {
   ; CHECK: cost of 4 {{.*}} load
   load i128* undef, align 4
 
+  ; FIXME: There actually are sub-vector Altivec loads, and so we could handle
+  ; this with a small expense, but we don't currently.
+  ; CHECK: cost of 60 {{.*}} load
+  load <4 x i16>* undef, align 2
+
   ret i32 undef
 }