summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp20
-rw-r--r--llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll2
2 files changed, 19 insertions, 3 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
index ceadb8d0634..e0438b037c0 100644
--- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
@@ -579,7 +579,15 @@ bool Vectorizer::vectorizeInstructions(ArrayRef<Value *> Instrs) {
bool Vectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain) {
StoreInst *S0 = cast<StoreInst>(Chain[0]);
- Type *StoreTy = S0->getValueOperand()->getType();
+
+ // If the vector has an int element, default to int for the whole load.
+ Type *StoreTy;
+ for (const auto &V : Chain) {
+ StoreTy = cast<StoreInst>(V)->getValueOperand()->getType();
+ if (StoreTy->isIntOrIntVectorTy())
+ break;
+ }
+
unsigned Sz = DL.getTypeSizeInBits(StoreTy);
unsigned VF = VecRegSize / Sz;
unsigned ChainSize = Chain.size();
@@ -700,7 +708,15 @@ bool Vectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain) {
bool Vectorizer::vectorizeLoadChain(ArrayRef<Value *> Chain) {
LoadInst *L0 = cast<LoadInst>(Chain[0]);
- Type *LoadTy = L0->getType();
+
+ // If the vector has an int element, default to int for the whole load.
+ Type *LoadTy;
+ for (const auto &V : Chain) {
+ LoadTy = cast<LoadInst>(V)->getType();
+ if (LoadTy->isIntOrIntVectorTy())
+ break;
+ }
+
unsigned Sz = DL.getTypeSizeInBits(LoadTy);
unsigned VF = VecRegSize / Sz;
unsigned ChainSize = Chain.size();
diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll
index 2a4015d915e..57aa5ef6cef 100644
--- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll
+++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll
@@ -140,7 +140,7 @@ define void @merge_global_store_4_constants_f32(float addrspace(1)* %out) #0 {
}
; CHECK-LABEL: @merge_global_store_4_constants_mixed_i32_f32
-; CHECK: store <4 x float> <float 8.000000e+00, float 0x36D6000000000000, float 2.000000e+00, float 0x36E1000000000000>, <4 x float> addrspace(1)* %{{[0-9]+}}
+; CHECK: store <4 x i32> <i32 1090519040, i32 11, i32 1073741824, i32 17>, <4 x i32> addrspace(1)* %{{[0-9]+$}}
define void @merge_global_store_4_constants_mixed_i32_f32(float addrspace(1)* %out) #0 {
%out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
%out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2
OpenPOWER on IntegriCloud