diff options
| -rw-r--r-- | llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp | 15 | ||||
| -rw-r--r-- | llvm/test/Transforms/LoadStoreVectorizer/NVPTX/non-instr-bitcast.ll | 14 |
2 files changed, 20 insertions, 9 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp index 804db412e29..acec94ecd05 100644 --- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -969,8 +969,6 @@ bool Vectorizer::vectorizeLoadChain( if (VecLoadTy) { SmallVector<Instruction *, 16> InstrsToErase; - SmallVector<Instruction *, 16> InstrsToReorder; - InstrsToReorder.push_back(cast<Instruction>(Bitcast)); unsigned VecWidth = VecLoadTy->getNumElements(); for (unsigned I = 0, E = Chain.size(); I != E; ++I) { @@ -990,15 +988,14 @@ bool Vectorizer::vectorizeLoadChain( } } - for (Instruction *ModUser : InstrsToReorder) - reorder(ModUser); + // Bitcast might not be an Instruction, if the value being loaded is a + // constant. In that case, no need to reorder anything. + if (Instruction *BitcastInst = dyn_cast<Instruction>(Bitcast)) + reorder(BitcastInst); for (auto I : InstrsToErase) I->eraseFromParent(); } else { - SmallVector<Instruction *, 16> InstrsToReorder; - InstrsToReorder.push_back(cast<Instruction>(Bitcast)); - for (unsigned I = 0, E = Chain.size(); I != E; ++I) { Value *V = Builder.CreateExtractElement(LI, Builder.getInt32(I)); Instruction *Extracted = cast<Instruction>(V); @@ -1012,8 +1009,8 @@ bool Vectorizer::vectorizeLoadChain( UI->replaceAllUsesWith(Extracted); } - for (Instruction *ModUser : InstrsToReorder) - reorder(ModUser); + if (Instruction *BitcastInst = dyn_cast<Instruction>(Bitcast)) + reorder(BitcastInst); } eraseInstructions(Chain); diff --git a/llvm/test/Transforms/LoadStoreVectorizer/NVPTX/non-instr-bitcast.ll b/llvm/test/Transforms/LoadStoreVectorizer/NVPTX/non-instr-bitcast.ll new file mode 100644 index 00000000000..c8c3c51dfb0 --- /dev/null +++ b/llvm/test/Transforms/LoadStoreVectorizer/NVPTX/non-instr-bitcast.ll @@ -0,0 +1,14 @@ +; RUN: opt -mtriple=nvptx64-nvidia-cuda -load-store-vectorizer -S -o - %s | FileCheck %s + +; Load from a constant. This can be vectorized, but shouldn't crash us. + +@global = internal addrspace(1) constant [4 x float] [float 0xBF71111120000000, float 0x3F70410420000000, float 0xBF81111120000000, float 0x3FB5555560000000], align 4 + +define void @foo() { + ; CHECK: load <4 x float> + %a = load float, float addrspace(1)* getelementptr inbounds ([4 x float], [4 x float] addrspace(1)* @global, i64 0, i64 0), align 4 + %b = load float, float addrspace(1)* getelementptr inbounds ([4 x float], [4 x float] addrspace(1)* @global, i64 0, i64 1), align 4 + %c = load float, float addrspace(1)* getelementptr inbounds ([4 x float], [4 x float] addrspace(1)* @global, i64 0, i64 2), align 4 + %d = load float, float addrspace(1)* getelementptr inbounds ([4 x float], [4 x float] addrspace(1)* @global, i64 0, i64 3), align 4 + ret void +} |

