2 files changed, 20 insertions, 9 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
index 804db412e29..acec94ecd05 100644
--- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
@@ -969,8 +969,6 @@ bool Vectorizer::vectorizeLoadChain(
 
   if (VecLoadTy) {
     SmallVector<Instruction *, 16> InstrsToErase;
-    SmallVector<Instruction *, 16> InstrsToReorder;
-    InstrsToReorder.push_back(cast<Instruction>(Bitcast));
 
     unsigned VecWidth = VecLoadTy->getNumElements();
     for (unsigned I = 0, E = Chain.size(); I != E; ++I) {
@@ -990,15 +988,14 @@ bool Vectorizer::vectorizeLoadChain(
       }
     }
 
-    for (Instruction *ModUser : InstrsToReorder)
-      reorder(ModUser);
+    // Bitcast might not be an Instruction, if the value being loaded is a
+    // constant.  In that case, no need to reorder anything.
+    if (Instruction *BitcastInst = dyn_cast<Instruction>(Bitcast))
+      reorder(BitcastInst);
 
     for (auto I : InstrsToErase)
       I->eraseFromParent();
   } else {
-    SmallVector<Instruction *, 16> InstrsToReorder;
-    InstrsToReorder.push_back(cast<Instruction>(Bitcast));
-
     for (unsigned I = 0, E = Chain.size(); I != E; ++I) {
       Value *V = Builder.CreateExtractElement(LI, Builder.getInt32(I));
       Instruction *Extracted = cast<Instruction>(V);
@@ -1012,8 +1009,8 @@ bool Vectorizer::vectorizeLoadChain(
       UI->replaceAllUsesWith(Extracted);
     }
 
-    for (Instruction *ModUser : InstrsToReorder)
-      reorder(ModUser);
+    if (Instruction *BitcastInst = dyn_cast<Instruction>(Bitcast))
+      reorder(BitcastInst);
   }
 
   eraseInstructions(Chain);
diff --git a/llvm/test/Transforms/LoadStoreVectorizer/NVPTX/non-instr-bitcast.ll b/llvm/test/Transforms/LoadStoreVectorizer/NVPTX/non-instr-bitcast.ll
new file mode 100644
index 00000000000..c8c3c51dfb0
--- /dev/null
+++ b/llvm/test/Transforms/LoadStoreVectorizer/NVPTX/non-instr-bitcast.ll
@@ -0,0 +1,14 @@
+; RUN: opt -mtriple=nvptx64-nvidia-cuda -load-store-vectorizer -S -o - %s | FileCheck %s
+
+; Load from a constant.  This can be vectorized, but shouldn't crash us.
+
+@global = internal addrspace(1) constant [4 x float] [float 0xBF71111120000000, float 0x3F70410420000000, float 0xBF81111120000000, float 0x3FB5555560000000], align 4
+
+define void @foo() {
+  ; CHECK: load <4 x float>
+  %a = load float, float addrspace(1)* getelementptr inbounds ([4 x float], [4 x float] addrspace(1)* @global, i64 0, i64 0), align 4
+  %b = load float, float addrspace(1)* getelementptr inbounds ([4 x float], [4 x float] addrspace(1)* @global, i64 0, i64 1), align 4
+  %c = load float, float addrspace(1)* getelementptr inbounds ([4 x float], [4 x float] addrspace(1)* @global, i64 0, i64 2), align 4
+  %d = load float, float addrspace(1)* getelementptr inbounds ([4 x float], [4 x float] addrspace(1)* @global, i64 0, i64 3), align 4
+  ret void
+}