[opaque pointer type] Add textual IR support for explicit type parameter to load instruction

Essentially the same as the GEP change in r230786. A similar migration script can be used to update test cases, though a few more test case improvements/changes were required this time around: (r229269-r229278) import fileinput import sys import re pat = re.compile(r"((?:=|:|^)\s*load (?:atomic )?(?:volatile )?(.*?))(| addrspace$\d+$ *)\*($| *(?:%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|\[\[[a-zA-Z]|\{\{).*$)") for line in sys.stdin: sys.stdout.write(re.sub(pat, r"\1, \2\3*\4", line)) Reviewers: rafael, dexonsmith, grosser Differential Revision: http://reviews.llvm.org/D7649 llvm-svn: 230794
author: David Blaikie <dblaikie@gmail.com> 2015-02-27 21:17:42 +0000
committer: David Blaikie <dblaikie@gmail.com> 2015-02-27 21:17:42 +0000
commit: a79ac14fa68297f9888bc70a10df5ed9b8864e38 (patch)
tree: 8d8217a8928e3ee599bdde405e2e178b3a55b645 /llvm/test/CodeGen/ARM
parent: 83687fb9e654c9d0086e7f6b728c26fa0b729e71 (diff)
download: bcm5719-llvm-a79ac14fa68297f9888bc70a10df5ed9b8864e38.tar.gz
bcm5719-llvm-a79ac14fa68297f9888bc70a10df5ed9b8864e38.zip
366 files changed, 3341 insertions, 3341 deletions
diff --git a/llvm/test/CodeGen/ARM/2006-11-10-CycleInDAG.ll b/llvm/test/CodeGen/ARM/2006-11-10-CycleInDAG.ll
index ab9c4d675eb..b719f9f4d23 100644
--- a/llvm/test/CodeGen/ARM/2006-11-10-CycleInDAG.ll
+++ b/llvm/test/CodeGen/ARM/2006-11-10-CycleInDAG.ll
@@ -11,7 +11,7 @@ bb169.i:                ; preds = %entry
         ret void
 
 cond_true11:            ; preds = %entry
-        %tmp.i32 = load %struct.layer_data** @ld                ; <%struct.layer_data*> [#uses=2]
+        %tmp.i32 = load %struct.layer_data*, %struct.layer_data** @ld                ; <%struct.layer_data*> [#uses=2]
         %tmp3.i35 = getelementptr %struct.layer_data, %struct.layer_data* %tmp.i32, i32 0, i32 1, i32 2048; <i8*> [#uses=2]
         %tmp.i36 = getelementptr %struct.layer_data, %struct.layer_data* %tmp.i32, i32 0, i32 2          ; <i8**> [#uses=1]
         store i8* %tmp3.i35, i8** %tmp.i36
diff --git a/llvm/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll b/llvm/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
index c2b0ad4a588..8b94b7bf816 100644
--- a/llvm/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
+++ b/llvm/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
@@ -15,15 +15,15 @@ entry:
 	br label %cond_next489
 
 cond_next489:		; preds = %cond_false, %bb471
-	%j.7.in = load i8* null		; <i8> [#uses=1]
-	%i.8.in = load i8* null		; <i8> [#uses=1]
+	%j.7.in = load i8, i8* null		; <i8> [#uses=1]
+	%i.8.in = load i8, i8* null		; <i8> [#uses=1]
 	%i.8 = zext i8 %i.8.in to i32		; <i32> [#uses=4]
 	%j.7 = zext i8 %j.7.in to i32		; <i32> [#uses=4]
 	%tmp495 = getelementptr [4 x [4 x i32]], [4 x [4 x i32]]* %predicted_block, i32 0, i32 %i.8, i32 %j.7		; <i32*> [#uses=2]
-	%tmp496 = load i32* %tmp495		; <i32> [#uses=2]
-	%tmp502 = load i32* null		; <i32> [#uses=1]
+	%tmp496 = load i32, i32* %tmp495		; <i32> [#uses=2]
+	%tmp502 = load i32, i32* null		; <i32> [#uses=1]
 	%tmp542 = getelementptr [6 x [4 x [4 x i32]]], [6 x [4 x [4 x i32]]]* @quant_coef, i32 0, i32 0, i32 %i.8, i32 %j.7		; <i32*> [#uses=1]
-	%tmp543 = load i32* %tmp542		; <i32> [#uses=1]
+	%tmp543 = load i32, i32* %tmp542		; <i32> [#uses=1]
 	%tmp548 = ashr i32 0, 0		; <i32> [#uses=3]
 	%tmp561 = sub i32 0, %tmp496		; <i32> [#uses=3]
 	%abscond563 = icmp sgt i32 %tmp561, -1		; <i1> [#uses=1]
@@ -36,9 +36,9 @@ cond_next489:		; preds = %cond_false, %bb471
 
 cond_next589:		; preds = %cond_next489
 	%tmp605 = getelementptr [6 x [4 x [4 x i32]]], [6 x [4 x [4 x i32]]]* @dequant_coef, i32 0, i32 0, i32 %i.8, i32 %j.7		; <i32*> [#uses=1]
-	%tmp606 = load i32* %tmp605		; <i32> [#uses=1]
-	%tmp612 = load i32* null		; <i32> [#uses=1]
-	%tmp629 = load i32* null		; <i32> [#uses=1]
+	%tmp606 = load i32, i32* %tmp605		; <i32> [#uses=1]
+	%tmp612 = load i32, i32* null		; <i32> [#uses=1]
+	%tmp629 = load i32, i32* null		; <i32> [#uses=1]
 	%tmp629a = sitofp i32 %tmp629 to double		; <double> [#uses=1]
 	%tmp631 = fmul double %tmp629a, 0.000000e+00		; <double> [#uses=1]
 	%tmp632 = fadd double 0.000000e+00, %tmp631		; <double> [#uses=1]
@@ -85,9 +85,9 @@ bb737:		; preds = %cond_false689
 
 cond_true740:		; preds = %bb737
 	%tmp761 = call fastcc i32 @sign( i32 %tmp576, i32 0 )		; <i32> [#uses=1]
-	%tmp780 = load i32* null		; <i32> [#uses=1]
+	%tmp780 = load i32, i32* null		; <i32> [#uses=1]
 	%tmp785 = getelementptr [4 x [4 x i32]], [4 x [4 x i32]]* @A, i32 0, i32 %i.8, i32 %j.7		; <i32*> [#uses=1]
-	%tmp786 = load i32* %tmp785		; <i32> [#uses=1]
+	%tmp786 = load i32, i32* %tmp785		; <i32> [#uses=1]
 	%tmp781 = mul i32 %tmp780, %tmp761		; <i32> [#uses=1]
 	%tmp787 = mul i32 %tmp781, %tmp786		; <i32> [#uses=1]
 	%tmp789 = shl i32 %tmp787, 0		; <i32> [#uses=1]
@@ -96,7 +96,7 @@ cond_true740:		; preds = %bb737
 
 cond_next791:		; preds = %cond_true740, %bb737
 	%ilev.1 = phi i32 [ %tmp790, %cond_true740 ], [ 0, %bb737 ]		; <i32> [#uses=1]
-	%tmp796 = load i32* %tmp495		; <i32> [#uses=1]
+	%tmp796 = load i32, i32* %tmp495		; <i32> [#uses=1]
 	%tmp798 = add i32 %tmp796, %ilev.1		; <i32> [#uses=1]
 	%tmp812 = mul i32 0, %tmp502		; <i32> [#uses=0]
 	%tmp818 = call fastcc i32 @sign( i32 0, i32 %tmp798 )		; <i32> [#uses=0]
diff --git a/llvm/test/CodeGen/ARM/2007-03-07-CombinerCrash.ll b/llvm/test/CodeGen/ARM/2007-03-07-CombinerCrash.ll
index 80eda54d873..472a345a0d7 100644
--- a/llvm/test/CodeGen/ARM/2007-03-07-CombinerCrash.ll
+++ b/llvm/test/CodeGen/ARM/2007-03-07-CombinerCrash.ll
@@ -5,7 +5,7 @@ define fastcc i8* @read_sleb128(i8* %p, i32* %val) {
 
 bb:		; preds = %bb, %0
 	%p_addr.0 = getelementptr i8, i8* %p, i32 0		; <i8*> [#uses=1]
-	%tmp2 = load i8* %p_addr.0		; <i8> [#uses=2]
+	%tmp2 = load i8, i8* %p_addr.0		; <i8> [#uses=2]
 	%tmp4.rec = add i32 0, 1		; <i32> [#uses=1]
 	%tmp4 = getelementptr i8, i8* %p, i32 %tmp4.rec		; <i8*> [#uses=1]
 	%tmp56 = zext i8 %tmp2 to i32		; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/ARM/2007-03-13-InstrSched.ll b/llvm/test/CodeGen/ARM/2007-03-13-InstrSched.ll
index b8c8e70f889..9c0143be06c 100644
--- a/llvm/test/CodeGen/ARM/2007-03-13-InstrSched.ll
+++ b/llvm/test/CodeGen/ARM/2007-03-13-InstrSched.ll
@@ -23,25 +23,25 @@ bb74:		; preds = %bb26, %newFuncRoot
 	%d1.1 = phi i32 [ %tmp54, %bb26 ], [ 8192, %newFuncRoot ]		; <i32> [#uses=2]
 	%d2.1 = phi i32 [ %tmp64, %bb26 ], [ 8192, %newFuncRoot ]		; <i32> [#uses=2]
 	%d3.1 = phi i32 [ %tmp69, %bb26 ], [ 8192, %newFuncRoot ]		; <i32> [#uses=2]
-	%fm.1 = load i32* %fm.1.in		; <i32> [#uses=4]
+	%fm.1 = load i32, i32* %fm.1.in		; <i32> [#uses=4]
 	icmp eq i32 %fp.1.rec, %tmp8		; <i1>:0 [#uses=1]
 	br i1 %0, label %bb78.exitStub, label %bb26
 
 bb26:		; preds = %bb74
 	%tmp28 = getelementptr i32*, i32** %tmp1, i32 %fp.1.rec		; <i32**> [#uses=1]
-	%tmp30 = load i32** %tmp28		; <i32*> [#uses=4]
+	%tmp30 = load i32*, i32** %tmp28		; <i32*> [#uses=4]
 	%tmp33 = getelementptr i32, i32* %tmp30, i32 %i.0196.0.ph		; <i32*> [#uses=1]
-	%tmp34 = load i32* %tmp33		; <i32> [#uses=1]
+	%tmp34 = load i32, i32* %tmp33		; <i32> [#uses=1]
 	%tmp38 = getelementptr i32, i32* %tmp30, i32 %tmp36224		; <i32*> [#uses=1]
-	%tmp39 = load i32* %tmp38		; <i32> [#uses=1]
+	%tmp39 = load i32, i32* %tmp38		; <i32> [#uses=1]
 	%tmp42 = mul i32 %tmp34, %fm.1		; <i32> [#uses=1]
 	%tmp44 = add i32 %tmp42, %d0.1		; <i32> [#uses=1]
 	%tmp48 = getelementptr i32, i32* %tmp30, i32 %tmp46223		; <i32*> [#uses=1]
-	%tmp49 = load i32* %tmp48		; <i32> [#uses=1]
+	%tmp49 = load i32, i32* %tmp48		; <i32> [#uses=1]
 	%tmp52 = mul i32 %tmp39, %fm.1		; <i32> [#uses=1]
 	%tmp54 = add i32 %tmp52, %d1.1		; <i32> [#uses=1]
 	%tmp58 = getelementptr i32, i32* %tmp30, i32 %tmp56222		; <i32*> [#uses=1]
-	%tmp59 = load i32* %tmp58		; <i32> [#uses=1]
+	%tmp59 = load i32, i32* %tmp58		; <i32> [#uses=1]
 	%tmp62 = mul i32 %tmp49, %fm.1		; <i32> [#uses=1]
 	%tmp64 = add i32 %tmp62, %d2.1		; <i32> [#uses=1]
 	%tmp67 = mul i32 %tmp59, %fm.1		; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/ARM/2007-03-21-JoinIntervalsCrash.ll b/llvm/test/CodeGen/ARM/2007-03-21-JoinIntervalsCrash.ll
index 9d8c526b2bc..0162d7f55ce 100644
--- a/llvm/test/CodeGen/ARM/2007-03-21-JoinIntervalsCrash.ll
+++ b/llvm/test/CodeGen/ARM/2007-03-21-JoinIntervalsCrash.ll
@@ -71,7 +71,7 @@ cond_next856:		; preds = %cond_true851
 	ret void
 
 bb866:		; preds = %cond_true851
-	%tmp874 = load i32* %tmp2122		; <i32> [#uses=1]
+	%tmp874 = load i32, i32* %tmp2122		; <i32> [#uses=1]
 	%tmp876877 = trunc i32 %tmp874 to i8		; <i8> [#uses=1]
 	icmp eq i8 %tmp876877, 1		; <i1>:0 [#uses=1]
 	br i1 %0, label %cond_next881, label %cond_true878
@@ -82,7 +82,7 @@ cond_true878:		; preds = %bb866
 cond_next881:		; preds = %bb866
 	%tmp884885 = inttoptr i64 %tmp10959 to %struct.tree_identifier*		; <%struct.tree_identifier*> [#uses=1]
 	%tmp887 = getelementptr %struct.tree_identifier, %struct.tree_identifier* %tmp884885, i32 0, i32 1, i32 0		; <i8**> [#uses=1]
-	%tmp888 = load i8** %tmp887		; <i8*> [#uses=1]
+	%tmp888 = load i8*, i8** %tmp887		; <i8*> [#uses=1]
 	tail call void (i32, ...)* @error( i32 undef, i8* %tmp888 )
 	ret void
 
diff --git a/llvm/test/CodeGen/ARM/2007-03-27-RegScavengerAssert.ll b/llvm/test/CodeGen/ARM/2007-03-27-RegScavengerAssert.ll
index 19628597b8b..2a0ef770f3b 100644
--- a/llvm/test/CodeGen/ARM/2007-03-27-RegScavengerAssert.ll
+++ b/llvm/test/CodeGen/ARM/2007-03-27-RegScavengerAssert.ll
@@ -13,7 +13,7 @@ cond_true340:		; preds = %entry
 	ret void
 
 cond_next416:		; preds = %entry
-	%tmp1085 = load %struct.rtx_def** %ad_addr		; <%struct.rtx_def*> [#uses=1]
+	%tmp1085 = load %struct.rtx_def*, %struct.rtx_def** %ad_addr		; <%struct.rtx_def*> [#uses=1]
 	br i1 false, label %bb1084, label %cond_true418
 
 cond_true418:		; preds = %cond_next416
@@ -25,7 +25,7 @@ bb1084:		; preds = %cond_next416
 cond_true1092:		; preds = %bb1084
 	%tmp1094 = getelementptr %struct.rtx_def, %struct.rtx_def* %tmp1085, i32 0, i32 3		; <%struct.u*> [#uses=1]
 	%tmp10981099 = bitcast %struct.u* %tmp1094 to %struct.rtx_def**		; <%struct.rtx_def**> [#uses=2]
-	%tmp1101 = load %struct.rtx_def** %tmp10981099		; <%struct.rtx_def*> [#uses=1]
+	%tmp1101 = load %struct.rtx_def*, %struct.rtx_def** %tmp10981099		; <%struct.rtx_def*> [#uses=1]
 	store %struct.rtx_def* %tmp1101, %struct.rtx_def** %ad_addr
 	br label %cond_next1102
 
diff --git a/llvm/test/CodeGen/ARM/2007-04-02-RegScavengerAssert.ll b/llvm/test/CodeGen/ARM/2007-04-02-RegScavengerAssert.ll
index 49958be4e3d..7b74e6ce948 100644
--- a/llvm/test/CodeGen/ARM/2007-04-02-RegScavengerAssert.ll
+++ b/llvm/test/CodeGen/ARM/2007-04-02-RegScavengerAssert.ll
@@ -28,7 +28,7 @@ entry:
 	br i1 false, label %bb.preheader, label %return
 
 bb.preheader:		; preds = %entry
-	%tbl.014.us = load i32* null		; <i32> [#uses=1]
+	%tbl.014.us = load i32, i32* null		; <i32> [#uses=1]
 	br i1 false, label %cond_next.us, label %bb
 
 cond_next51.us:		; preds = %cond_next.us, %cond_true33.us.cond_true46.us_crit_edge
@@ -41,7 +41,7 @@ cond_true33.us.cond_true46.us_crit_edge:		; preds = %cond_next.us
 
 cond_next.us:		; preds = %bb.preheader
 	%tmp37.us = getelementptr %struct.X_Y, %struct.X_Y* %cinfo, i32 0, i32 17, i32 %tbl.014.us		; <%struct.H_TBL**> [#uses=3]
-	%tmp4524.us = load %struct.H_TBL** %tmp37.us		; <%struct.H_TBL*> [#uses=1]
+	%tmp4524.us = load %struct.H_TBL*, %struct.H_TBL** %tmp37.us		; <%struct.H_TBL*> [#uses=1]
 	icmp eq %struct.H_TBL* %tmp4524.us, null		; <i1>:0 [#uses=1]
 	br i1 %0, label %cond_true33.us.cond_true46.us_crit_edge, label %cond_next51.us
 
diff --git a/llvm/test/CodeGen/ARM/2007-04-03-UndefinedSymbol.ll b/llvm/test/CodeGen/ARM/2007-04-03-UndefinedSymbol.ll
index 6bb82992ad5..d34c078f054 100644
--- a/llvm/test/CodeGen/ARM/2007-04-03-UndefinedSymbol.ll
+++ b/llvm/test/CodeGen/ARM/2007-04-03-UndefinedSymbol.ll
@@ -9,7 +9,7 @@
 define internal void @_ZN1B1iEv(%struct.B* %this) {
 entry:
 	%tmp1 = getelementptr %struct.B, %struct.B* %this, i32 0, i32 0		; <i32*> [#uses=1]
-	%tmp2 = load i32* %tmp1		; <i32> [#uses=1]
+	%tmp2 = load i32, i32* %tmp1		; <i32> [#uses=1]
 	%tmp4 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([7 x i8]* @str, i32 0, i32 0), i32 %tmp2 )		; <i32> [#uses=0]
 	ret void
 }
@@ -19,7 +19,7 @@ declare i32 @printf(i8*, ...)
 define internal void @_ZN1B1jEv(%struct.B* %this) {
 entry:
 	%tmp1 = getelementptr %struct.B, %struct.B* %this, i32 0, i32 0		; <i32*> [#uses=1]
-	%tmp2 = load i32* %tmp1		; <i32> [#uses=1]
+	%tmp2 = load i32, i32* %tmp1		; <i32> [#uses=1]
 	%tmp4 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([7 x i8]* @str1, i32 0, i32 0), i32 %tmp2 )		; <i32> [#uses=0]
 	ret void
 }
@@ -37,11 +37,11 @@ cond_true.i:		; preds = %entry
 	%b2.i = bitcast %struct.B* %b.i to i8*		; <i8*> [#uses=1]
 	%ctg23.i = getelementptr i8, i8* %b2.i, i32 ashr (i32 trunc (i64 lshr (i64 zext (i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32) to i64), i64 32) to i32), i32 1)		; <i8*> [#uses=1]
 	%tmp121314.i = bitcast i8* %ctg23.i to i32 (...)***		; <i32 (...)***> [#uses=1]
-	%tmp15.i = load i32 (...)*** %tmp121314.i		; <i32 (...)**> [#uses=1]
+	%tmp15.i = load i32 (...)**, i32 (...)*** %tmp121314.i		; <i32 (...)**> [#uses=1]
 	%tmp151.i = bitcast i32 (...)** %tmp15.i to i8*		; <i8*> [#uses=1]
 	%ctg2.i = getelementptr i8, i8* %tmp151.i, i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32)		; <i8*> [#uses=1]
 	%tmp2021.i = bitcast i8* %ctg2.i to i32 (...)**		; <i32 (...)**> [#uses=1]
-	%tmp22.i = load i32 (...)** %tmp2021.i		; <i32 (...)*> [#uses=1]
+	%tmp22.i = load i32 (...)*, i32 (...)** %tmp2021.i		; <i32 (...)*> [#uses=1]
 	%tmp2223.i = bitcast i32 (...)* %tmp22.i to void (%struct.B*)*		; <void (%struct.B*)*> [#uses=1]
 	br label %_Z3fooiM1BFvvE.exit
 
@@ -59,11 +59,11 @@ cond_true.i46:		; preds = %_Z3fooiM1BFvvE.exit
 	%b2.i35 = bitcast %struct.B* %b.i29 to i8*		; <i8*> [#uses=1]
 	%ctg23.i36 = getelementptr i8, i8* %b2.i35, i32 ashr (i32 trunc (i64 lshr (i64 zext (i32 ptrtoint (void (%struct.B*)* @_ZN1B1jEv to i32) to i64), i64 32) to i32), i32 1)		; <i8*> [#uses=1]
 	%tmp121314.i37 = bitcast i8* %ctg23.i36 to i32 (...)***		; <i32 (...)***> [#uses=1]
-	%tmp15.i38 = load i32 (...)*** %tmp121314.i37		; <i32 (...)**> [#uses=1]
+	%tmp15.i38 = load i32 (...)**, i32 (...)*** %tmp121314.i37		; <i32 (...)**> [#uses=1]
 	%tmp151.i41 = bitcast i32 (...)** %tmp15.i38 to i8*		; <i8*> [#uses=1]
 	%ctg2.i42 = getelementptr i8, i8* %tmp151.i41, i32 ptrtoint (void (%struct.B*)* @_ZN1B1jEv to i32)		; <i8*> [#uses=1]
 	%tmp2021.i43 = bitcast i8* %ctg2.i42 to i32 (...)**		; <i32 (...)**> [#uses=1]
-	%tmp22.i44 = load i32 (...)** %tmp2021.i43		; <i32 (...)*> [#uses=1]
+	%tmp22.i44 = load i32 (...)*, i32 (...)** %tmp2021.i43		; <i32 (...)*> [#uses=1]
 	%tmp2223.i45 = bitcast i32 (...)* %tmp22.i44 to void (%struct.B*)*		; <void (%struct.B*)*> [#uses=1]
 	br label %_Z3fooiM1BFvvE.exit56
 
@@ -81,11 +81,11 @@ cond_true.i18:		; preds = %_Z3fooiM1BFvvE.exit56
 	%b2.i7 = bitcast %struct.B* %b.i1 to i8*		; <i8*> [#uses=1]
 	%ctg23.i8 = getelementptr i8, i8* %b2.i7, i32 ashr (i32 trunc (i64 lshr (i64 zext (i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32) to i64), i64 32) to i32), i32 1)		; <i8*> [#uses=1]
 	%tmp121314.i9 = bitcast i8* %ctg23.i8 to i32 (...)***		; <i32 (...)***> [#uses=1]
-	%tmp15.i10 = load i32 (...)*** %tmp121314.i9		; <i32 (...)**> [#uses=1]
+	%tmp15.i10 = load i32 (...)**, i32 (...)*** %tmp121314.i9		; <i32 (...)**> [#uses=1]
 	%tmp151.i13 = bitcast i32 (...)** %tmp15.i10 to i8*		; <i8*> [#uses=1]
 	%ctg2.i14 = getelementptr i8, i8* %tmp151.i13, i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32)		; <i8*> [#uses=1]
 	%tmp2021.i15 = bitcast i8* %ctg2.i14 to i32 (...)**		; <i32 (...)**> [#uses=1]
-	%tmp22.i16 = load i32 (...)** %tmp2021.i15		; <i32 (...)*> [#uses=1]
+	%tmp22.i16 = load i32 (...)*, i32 (...)** %tmp2021.i15		; <i32 (...)*> [#uses=1]
 	%tmp2223.i17 = bitcast i32 (...)* %tmp22.i16 to void (%struct.B*)*		; <void (%struct.B*)*> [#uses=1]
 	br label %_Z3fooiM1BFvvE.exit28
 
diff --git a/llvm/test/CodeGen/ARM/2007-04-30-CombinerCrash.ll b/llvm/test/CodeGen/ARM/2007-04-30-CombinerCrash.ll
index a89e937d3e1..7973f223ee0 100644
--- a/llvm/test/CodeGen/ARM/2007-04-30-CombinerCrash.ll
+++ b/llvm/test/CodeGen/ARM/2007-04-30-CombinerCrash.ll
@@ -9,15 +9,15 @@ target triple = "arm-apple-darwin8"
 
 define fastcc void @EvaluateDevelopment() {
 entry:
-        %tmp7 = load i64* getelementptr (%struct.CHESS_POSITION* @search, i32 0, i32 7)         ; <i64> [#uses=1]
-        %tmp50 = load i64* getelementptr (%struct.CHESS_POSITION* @search, i32 0, i32 0)                ; <i64> [#uses=1]
-        %tmp52 = load i64* getelementptr (%struct.CHESS_POSITION* @search, i32 0, i32 1)                ; <i64> [#uses=1]
+        %tmp7 = load i64, i64* getelementptr (%struct.CHESS_POSITION* @search, i32 0, i32 7)         ; <i64> [#uses=1]
+        %tmp50 = load i64, i64* getelementptr (%struct.CHESS_POSITION* @search, i32 0, i32 0)                ; <i64> [#uses=1]
+        %tmp52 = load i64, i64* getelementptr (%struct.CHESS_POSITION* @search, i32 0, i32 1)                ; <i64> [#uses=1]
         %tmp53 = or i64 %tmp52, %tmp50          ; <i64> [#uses=1]
-        %tmp57.b = load i1* @rank_mask.1.b              ; <i1> [#uses=1]
+        %tmp57.b = load i1, i1* @rank_mask.1.b              ; <i1> [#uses=1]
         %tmp57 = select i1 %tmp57.b, i64 71776119061217280, i64 0               ; <i64> [#uses=1]
         %tmp58 = and i64 %tmp57, %tmp7          ; <i64> [#uses=1]
         %tmp59 = lshr i64 %tmp58, 8             ; <i64> [#uses=1]
-        %tmp63 = load i64* getelementptr ([8 x i64]* @file_mask, i32 0, i32 4)          ; <i64> [#uses=1]
+        %tmp63 = load i64, i64* getelementptr ([8 x i64]* @file_mask, i32 0, i32 4)          ; <i64> [#uses=1]
         %tmp64 = or i64 %tmp63, 0               ; <i64> [#uses=1]
         %tmp65 = and i64 %tmp59, %tmp53         ; <i64> [#uses=1]
         %tmp66 = and i64 %tmp65, %tmp64         ; <i64> [#uses=1]
diff --git a/llvm/test/CodeGen/ARM/2007-05-03-BadPostIndexedLd.ll b/llvm/test/CodeGen/ARM/2007-05-03-BadPostIndexedLd.ll
index 7fd0bd5d7dc..f59d081d51d 100644
--- a/llvm/test/CodeGen/ARM/2007-05-03-BadPostIndexedLd.ll
+++ b/llvm/test/CodeGen/ARM/2007-05-03-BadPostIndexedLd.ll
@@ -14,7 +14,7 @@
 
 define fastcc void @Draw7(i32 %Option, i32* %Status) {
 entry:
-	%tmp115.b = load i1* @FirstTime.4637.b		; <i1> [#uses=1]
+	%tmp115.b = load i1, i1* @FirstTime.4637.b		; <i1> [#uses=1]
 	br i1 %tmp115.b, label %cond_next239, label %cond_next.i
 
 cond_next.i:		; preds = %entry
@@ -88,11 +88,11 @@ cond_next1267:		; preds = %cond_next1235
 	br i1 %tmp1148, label %cond_next1275, label %cond_true1272
 
 cond_true1272:		; preds = %cond_next1267
-	%tmp1273 = load %struct.TestObj** null		; <%struct.TestObj*> [#uses=2]
+	%tmp1273 = load %struct.TestObj*, %struct.TestObj** null		; <%struct.TestObj*> [#uses=2]
 	%tmp2930.i = ptrtoint %struct.TestObj* %tmp1273 to i32		; <i32> [#uses=1]
 	%tmp42.i348 = sub i32 0, %tmp2930.i		; <i32> [#uses=1]
 	%tmp45.i = getelementptr %struct.TestObj, %struct.TestObj* %tmp1273, i32 0, i32 0		; <i8**> [#uses=2]
-	%tmp48.i = load i8** %tmp45.i		; <i8*> [#uses=1]
+	%tmp48.i = load i8*, i8** %tmp45.i		; <i8*> [#uses=1]
 	%tmp50.i350 = call i32 (i8*, i8*, ...)* @sprintf( i8* getelementptr ([256 x i8]* @Msg, i32 0, i32 0), i8* getelementptr ([48 x i8]* @.str53615, i32 0, i32 0), i8* null, i8** %tmp45.i, i8* %tmp48.i )		; <i32> [#uses=0]
 	br i1 false, label %cond_true.i632.i, label %Ut_TraceMsg.exit648.i
 
diff --git a/llvm/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll b/llvm/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll
index 90a3b372937..5895a3263e3 100644
--- a/llvm/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll
+++ b/llvm/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll
@@ -17,7 +17,7 @@ entry:
 	%retval = alloca i32, align 4		; <i32*> [#uses=1]
 	store i32 %i, i32* %i_addr
 	store i32 %q, i32* %q_addr
-	%tmp = load i32* %i_addr		; <i32> [#uses=1]
+	%tmp = load i32, i32* %i_addr		; <i32> [#uses=1]
 	%tmp1 = icmp ne i32 %tmp, 0		; <i1> [#uses=1]
 	%tmp12 = zext i1 %tmp1 to i8		; <i8> [#uses=1]
 	%toBool = icmp ne i8 %tmp12, 0		; <i1> [#uses=1]
@@ -34,7 +34,7 @@ cond_false:		; preds = %entry
 	br label %cond_next
 
 cond_next:		; preds = %cond_false, %cond_true
-	%tmp7 = load i32* %q_addr		; <i32> [#uses=1]
+	%tmp7 = load i32, i32* %q_addr		; <i32> [#uses=1]
 	%tmp8 = icmp ne i32 %tmp7, 0		; <i1> [#uses=1]
 	%tmp89 = zext i1 %tmp8 to i8		; <i8> [#uses=1]
 	%toBool10 = icmp ne i8 %tmp89, 0		; <i1> [#uses=1]
@@ -55,7 +55,7 @@ cond_next18:		; preds = %cond_false15, %cond_true11
 	br label %return
 
 return:		; preds = %cond_next18
-	%retval20 = load i32* %retval		; <i32> [#uses=1]
+	%retval20 = load i32, i32* %retval		; <i32> [#uses=1]
 	ret i32 %retval20
 }
 
diff --git a/llvm/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll b/llvm/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll
index 37e41ecc4b1..abb6a33f601 100644
--- a/llvm/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll
+++ b/llvm/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll
@@ -19,7 +19,7 @@ entry:
 	%retval = alloca i32, align 4		; <i32*> [#uses=1]
 	store i32 %i, i32* %i_addr
 	store i32 %q, i32* %q_addr
-	%tmp = load i32* %i_addr		; <i32> [#uses=1]
+	%tmp = load i32, i32* %i_addr		; <i32> [#uses=1]
 	%tmp1 = icmp ne i32 %tmp, 0		; <i1> [#uses=1]
 	%tmp12 = zext i1 %tmp1 to i8		; <i8> [#uses=1]
 	%toBool = icmp ne i8 %tmp12, 0		; <i1> [#uses=1]
@@ -28,7 +28,7 @@ entry:
 cond_true:		; preds = %entry
 	%tmp3 = call i32 (...)* @bar( )		; <i32> [#uses=0]
 	%tmp4 = call i32 (...)* @baz( i32 5, i32 6 )		; <i32> [#uses=0]
-	%tmp7 = load i32* %q_addr		; <i32> [#uses=1]
+	%tmp7 = load i32, i32* %q_addr		; <i32> [#uses=1]
 	%tmp8 = icmp ne i32 %tmp7, 0		; <i1> [#uses=1]
 	%tmp89 = zext i1 %tmp8 to i8		; <i8> [#uses=1]
 	%toBool10 = icmp ne i8 %tmp89, 0		; <i1> [#uses=1]
@@ -37,7 +37,7 @@ cond_true:		; preds = %entry
 cond_false:		; preds = %entry
 	%tmp5 = call i32 (...)* @foo( )		; <i32> [#uses=0]
 	%tmp6 = call i32 (...)* @baz( i32 5, i32 6 )		; <i32> [#uses=0]
-	%tmp27 = load i32* %q_addr		; <i32> [#uses=1]
+	%tmp27 = load i32, i32* %q_addr		; <i32> [#uses=1]
 	%tmp28 = icmp ne i32 %tmp27, 0		; <i1> [#uses=1]
 	%tmp289 = zext i1 %tmp28 to i8		; <i8> [#uses=1]
 	%toBool210 = icmp ne i8 %tmp289, 0		; <i1> [#uses=1]
@@ -58,7 +58,7 @@ cond_next18:		; preds = %cond_false15, %cond_true11
 	br label %return
 
 return:		; preds = %cond_next18
-	%retval20 = load i32* %retval		; <i32> [#uses=1]
+	%retval20 = load i32, i32* %retval		; <i32> [#uses=1]
 	ret i32 %retval20
 }
 
diff --git a/llvm/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll b/llvm/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll
index 30ae7237395..1edaefbc034 100644
--- a/llvm/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll
+++ b/llvm/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll
@@ -29,7 +29,7 @@ entry:
 	%retval = alloca i32, align 4		; <i32*> [#uses=1]
 	store i32 %i, i32* %i_addr
 	store i32 %q, i32* %q_addr
-	%tmp = load i32* %i_addr		; <i32> [#uses=1]
+	%tmp = load i32, i32* %i_addr		; <i32> [#uses=1]
 	%tmp1 = icmp ne i32 %tmp, 0		; <i1> [#uses=1]
 	%tmp12 = zext i1 %tmp1 to i8		; <i8> [#uses=1]
 	%toBool = icmp ne i8 %tmp12, 0		; <i1> [#uses=1]
@@ -38,7 +38,7 @@ entry:
 cond_true:		; preds = %entry
 	%tmp3 = call i32 (...)* @bar( )		; <i32> [#uses=0]
 	%tmp4 = call i32 (...)* @baz( i32 5, i32 6 )		; <i32> [#uses=0]
-	%tmp7 = load i32* %q_addr		; <i32> [#uses=1]
+	%tmp7 = load i32, i32* %q_addr		; <i32> [#uses=1]
 	%tmp8 = icmp ne i32 %tmp7, 0		; <i1> [#uses=1]
 	%tmp89 = zext i1 %tmp8 to i8		; <i8> [#uses=1]
 	%toBool10 = icmp ne i8 %tmp89, 0		; <i1> [#uses=1]
@@ -47,7 +47,7 @@ cond_true:		; preds = %entry
 cond_false:		; preds = %entry
 	%tmp5 = call i32 (...)* @foo( )		; <i32> [#uses=0]
 	%tmp6 = call i32 (...)* @baz( i32 5, i32 6 )		; <i32> [#uses=0]
-	%tmp27 = load i32* %q_addr		; <i32> [#uses=1]
+	%tmp27 = load i32, i32* %q_addr		; <i32> [#uses=1]
 	%tmp28 = icmp ne i32 %tmp27, 0		; <i1> [#uses=1]
 	%tmp289 = zext i1 %tmp28 to i8		; <i8> [#uses=1]
 	%toBool210 = icmp ne i8 %tmp289, 0		; <i1> [#uses=1]
@@ -68,7 +68,7 @@ cond_next18:		; preds = %cond_false15, %cond_true11
 	br label %return
 
 return:		; preds = %cond_next18
-	%retval20 = load i32* %retval		; <i32> [#uses=1]
+	%retval20 = load i32, i32* %retval		; <i32> [#uses=1]
 	ret i32 %retval20
 }
 
diff --git a/llvm/test/CodeGen/ARM/2007-08-15-ReuseBug.ll b/llvm/test/CodeGen/ARM/2007-08-15-ReuseBug.ll
index fd136761ebe..7d6396c6f68 100644
--- a/llvm/test/CodeGen/ARM/2007-08-15-ReuseBug.ll
+++ b/llvm/test/CodeGen/ARM/2007-08-15-ReuseBug.ll
@@ -45,7 +45,7 @@ bb102.i:		; preds = %cond_next212.i
 
 cond_true110.i:		; preds = %bb102.i
 	%tmp116.i = getelementptr i8*, i8** %argv_addr.2321.0.i, i32 2		; <i8**> [#uses=1]
-	%tmp117.i = load i8** %tmp116.i		; <i8*> [#uses=1]
+	%tmp117.i = load i8*, i8** %tmp116.i		; <i8*> [#uses=1]
 	%tmp126425.i = call %struct.FILE* @fopen( i8* %tmp117.i, i8* getelementptr ([2 x i8]* @.str44, i32 0, i32 0) )		; <%struct.FILE*> [#uses=0]
 	ret i32 0
 
@@ -60,7 +60,7 @@ C_addcmd.exit120.i:		; preds = %cond_next212.i
 	%tmp3.i.i.i.i105.i = call i8* @calloc( i32 15, i32 1 )		; <i8*> [#uses=1]
 	%tmp1.i108.i = getelementptr [100 x i8*], [100 x i8*]* @_C_cmds, i32 0, i32 0		; <i8**> [#uses=1]
 	store i8* %tmp3.i.i.i.i105.i, i8** %tmp1.i108.i, align 4
-	%tmp.i91.i = load i32* @_C_nextcmd, align 4		; <i32> [#uses=1]
+	%tmp.i91.i = load i32, i32* @_C_nextcmd, align 4		; <i32> [#uses=1]
 	store i32 0, i32* @_C_nextcmd, align 4
 	%tmp3.i.i.i.i95.i = call i8* @calloc( i32 15, i32 1 )		; <i8*> [#uses=1]
 	%tmp1.i98.i = getelementptr [100 x i8*], [100 x i8*]* @_C_cmds, i32 0, i32 %tmp.i91.i		; <i8**> [#uses=1]
@@ -78,7 +78,7 @@ cond_next212.i:		; preds = %cond_next212.i, %cond_next212.i, %cond_next212.i, %c
 	%argv_addr.2321.0.i = phi i8** [ %argv, %entry ], [ %tmp214.i, %bb192.i ], [ %tmp214.i, %C_addcmd.exit120.i ], [ %tmp214.i, %bb30.i ], [ %tmp214.i, %bb21.i ], [ %tmp214.i, %cond_next212.i ], [ %tmp214.i, %cond_next212.i ], [ %tmp214.i, %cond_next212.i ], [ %tmp214.i, %cond_next212.i ]		; <i8**> [#uses=2]
 	%argc_addr.2358.0.i = phi i32 [ %argc, %entry ], [ %tmp205399.i, %bb30.i ], [ 0, %bb21.i ], [ 0, %C_addcmd.exit120.i ], [ 0, %bb192.i ], [ 0, %cond_next212.i ], [ 0, %cond_next212.i ], [ 0, %cond_next212.i ], [ 0, %cond_next212.i ]		; <i32> [#uses=1]
 	%tmp214.i = getelementptr i8*, i8** %argv_addr.2321.0.i, i32 1		; <i8**> [#uses=9]
-	%tmp215.i = load i8** %tmp214.i		; <i8*> [#uses=1]
+	%tmp215.i = load i8*, i8** %tmp214.i		; <i8*> [#uses=1]
 	%tmp1314.i = sext i8 0 to i32		; <i32> [#uses=1]
 	switch i32 %tmp1314.i, label %bb192.i [
 		 i32 76, label %C_addcmd.exit120.i
diff --git a/llvm/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll b/llvm/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll
index 3754db01fdd..989410552f3 100644
--- a/llvm/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll
+++ b/llvm/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll
@@ -10,7 +10,7 @@
 define i32 @vorbis_staticbook_pack(%struct.static_codebook* %c, %struct.oggpack_buffer* %opb) {
 entry:
 	%opb_addr = alloca %struct.oggpack_buffer*		; <%struct.oggpack_buffer**> [#uses=1]
-	%tmp1 = load %struct.oggpack_buffer** %opb_addr, align 4		; <%struct.oggpack_buffer*> [#uses=1]
+	%tmp1 = load %struct.oggpack_buffer*, %struct.oggpack_buffer** %opb_addr, align 4		; <%struct.oggpack_buffer*> [#uses=1]
 	call void @oggpack_write( %struct.oggpack_buffer* %tmp1, i32 5653314, i32 24 ) nounwind 
 	call void @oggpack_write( %struct.oggpack_buffer* null, i32 0, i32 24 ) nounwind 
 	unreachable
diff --git a/llvm/test/CodeGen/ARM/2008-03-05-SxtInRegBug.ll b/llvm/test/CodeGen/ARM/2008-03-05-SxtInRegBug.ll
index dabe62003d9..b0a50a49a76 100644
--- a/llvm/test/CodeGen/ARM/2008-03-05-SxtInRegBug.ll
+++ b/llvm/test/CodeGen/ARM/2008-03-05-SxtInRegBug.ll
@@ -4,7 +4,7 @@ define i32 @main(i32 %argc, i8** %argv) {
 entry:
 	br label %bb1
 bb1:		; preds = %entry
-	%tmp3.i.i = load i8* null, align 1		; <i8> [#uses=1]
+	%tmp3.i.i = load i8, i8* null, align 1		; <i8> [#uses=1]
 	%tmp4.i.i = icmp slt i8 %tmp3.i.i, 0		; <i1> [#uses=1]
 	br i1 %tmp4.i.i, label %bb2, label %bb3
 bb2:		; preds = %bb1
diff --git a/llvm/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll b/llvm/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll
index 94c562bf012..24e664c71fb 100644
--- a/llvm/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll
+++ b/llvm/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll
@@ -11,7 +11,7 @@ bb74.i:		; preds = %bb88.i, %bb74.i, %entry
 bb88.i:		; preds = %bb74.i
 	br i1 false, label %mandel.exit, label %bb74.i
 mandel.exit:		; preds = %bb88.i
-	%tmp2 = load volatile double* getelementptr ({ double, double }* @accum, i32 0, i32 0), align 8		; <double> [#uses=1]
+	%tmp2 = load volatile double, double* getelementptr ({ double, double }* @accum, i32 0, i32 0), align 8		; <double> [#uses=1]
 	%tmp23 = fptosi double %tmp2 to i32		; <i32> [#uses=1]
 	%tmp5 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i32 %tmp23 )		; <i32> [#uses=0]
 	ret i32 0
diff --git a/llvm/test/CodeGen/ARM/2008-04-10-ScavengerAssert.ll b/llvm/test/CodeGen/ARM/2008-04-10-ScavengerAssert.ll
index 5c7e2500f51..428aa1113a1 100644
--- a/llvm/test/CodeGen/ARM/2008-04-10-ScavengerAssert.ll
+++ b/llvm/test/CodeGen/ARM/2008-04-10-ScavengerAssert.ll
@@ -57,7 +57,7 @@ cond_false373.i.i:		; preds = %bb.i350.i
 	br i1 false, label %cond_true380.i.i, label %cond_next602.i.i
 cond_true380.i.i:		; preds = %cond_false373.i.i
 	%tmp394.i418.i = add i32 %cell.0.i.i, 1		; <i32> [#uses=1]
-	%tmp397.i420.i = load %struct.cellbox** null, align 4		; <%struct.cellbox*> [#uses=1]
+	%tmp397.i420.i = load %struct.cellbox*, %struct.cellbox** null, align 4		; <%struct.cellbox*> [#uses=1]
 	br label %bb398.i.i
 bb398.i.i:		; preds = %bb398.i.i, %cond_true380.i.i
 	br i1 false, label %bb414.i.i, label %bb398.i.i
@@ -74,7 +74,7 @@ bb609.i.i:		; preds = %cond_next602.i.i
 bb620.i.i:		; preds = %bb620.i.i, %bb609.i.i
 	%indvar166.i465.i = phi i32 [ %indvar.next167.i.i, %bb620.i.i ], [ 0, %bb609.i.i ]		; <i32> [#uses=1]
 	%tmp640.i.i = call i32 (%struct.FILE*, i8*, ...)* @fscanf( %struct.FILE* %tmp61, i8* getelementptr ([5 x i8]* @.str584, i32 0, i32 0), [1024 x i8]* null )		; <i32> [#uses=0]
-	%tmp648.i.i = load i32* null, align 4		; <i32> [#uses=1]
+	%tmp648.i.i = load i32, i32* null, align 4		; <i32> [#uses=1]
 	%tmp650.i468.i = icmp sgt i32 0, %tmp648.i.i		; <i1> [#uses=1]
 	%tmp624.i469.i = call i32 (%struct.FILE*, i8*, ...)* @fscanf( %struct.FILE* %tmp61, i8* getelementptr ([5 x i8]* @.str584, i32 0, i32 0), [1024 x i8]* null )		; <i32> [#uses=0]
 	%indvar.next167.i.i = add i32 %indvar166.i465.i, 1		; <i32> [#uses=1]
@@ -126,7 +126,7 @@ cond_true163:		; preds = %cond_next144
 bb.i53:		; preds = %cond_true163
 	ret void
 bb34.i:		; preds = %cond_true163
-	%tmp37.i55 = load i32* null, align 4		; <i32> [#uses=1]
+	%tmp37.i55 = load i32, i32* null, align 4		; <i32> [#uses=1]
 	br i1 false, label %bb65.preheader.i, label %bb78.i
 bb65.preheader.i:		; preds = %bb34.i
 	br label %bb65.outer.us.i
@@ -149,7 +149,7 @@ bb155.i:		; preds = %cond_next215.i, %bb151.i
 	%indvar90.i = phi i32 [ %indvar.next91.i, %cond_next215.i ], [ 0, %bb151.i ]		; <i32> [#uses=2]
 	%sfb.3.reg2mem.0.i = add i32 %indvar90.i, %tmp37.i55		; <i32> [#uses=4]
 	%tmp161.i = getelementptr [4 x [21 x double]], [4 x [21 x double]]* null, i32 0, i32 %tmp15747.i, i32 %sfb.3.reg2mem.0.i		; <double*> [#uses=1]
-	%tmp162.i74 = load double* %tmp161.i, align 4		; <double> [#uses=0]
+	%tmp162.i74 = load double, double* %tmp161.i, align 4		; <double> [#uses=0]
 	br i1 false, label %cond_true167.i, label %cond_next215.i
 cond_true167.i:		; preds = %bb155.i
 	%tmp173.i = getelementptr %struct.III_scalefac_t, %struct.III_scalefac_t* null, i32 0, i32 1, i32 %sfb.3.reg2mem.0.i, i32 %i.154.i		; <i32*> [#uses=1]
@@ -157,7 +157,7 @@ cond_true167.i:		; preds = %bb155.i
 	%tmp182.1.i = getelementptr [14 x i32], [14 x i32]* @scalefac_band.1, i32 0, i32 %sfb.3.reg2mem.0.i		; <i32*> [#uses=0]
 	%tmp185.i78 = add i32 %sfb.3.reg2mem.0.i, 1		; <i32> [#uses=1]
 	%tmp187.1.i = getelementptr [14 x i32], [14 x i32]* @scalefac_band.1, i32 0, i32 %tmp185.i78		; <i32*> [#uses=1]
-	%tmp188.i = load i32* %tmp187.1.i, align 4		; <i32> [#uses=1]
+	%tmp188.i = load i32, i32* %tmp187.1.i, align 4		; <i32> [#uses=1]
 	%tmp21153.i = icmp slt i32 0, %tmp188.i		; <i1> [#uses=1]
 	br i1 %tmp21153.i, label %bb190.preheader.i, label %cond_next215.i
 bb190.preheader.i:		; preds = %cond_true167.i
@@ -224,7 +224,7 @@ cond_next144:		; preds = %cond_next104, %bb
 	%over.1 = phi i32 [ 0, %bb ], [ 0, %cond_next104 ]		; <i32> [#uses=1]
 	%best_over.0 = phi i32 [ 0, %bb ], [ 0, %cond_next104 ]		; <i32> [#uses=1]
 	%notdone.0 = phi i32 [ 0, %bb ], [ 0, %cond_next104 ]		; <i32> [#uses=1]
-	%tmp147 = load i32* null, align 4		; <i32> [#uses=1]
+	%tmp147 = load i32, i32* null, align 4		; <i32> [#uses=1]
 	%tmp148 = icmp eq i32 %tmp147, 0		; <i1> [#uses=1]
 	%tmp153 = icmp eq i32 %over.1, 0		; <i1> [#uses=1]
 	%bothcond = and i1 %tmp148, %tmp153		; <i1> [#uses=1]
diff --git a/llvm/test/CodeGen/ARM/2008-05-19-LiveIntervalsBug.ll b/llvm/test/CodeGen/ARM/2008-05-19-LiveIntervalsBug.ll
index d74fea84aa7..c9c78e1f6d8 100644
--- a/llvm/test/CodeGen/ARM/2008-05-19-LiveIntervalsBug.ll
+++ b/llvm/test/CodeGen/ARM/2008-05-19-LiveIntervalsBug.ll
@@ -21,8 +21,8 @@ entry:
 	br i1 false, label %init_orig_buffers.exit, label %cond_true.i29
 
 cond_true.i29:		; preds = %entry
-	%tmp17.i = load i32* getelementptr (%struct.ImageParameters* @images, i32 0, i32 20), align 8		; <i32> [#uses=1]
-	%tmp20.i27 = load i32* getelementptr (%struct.ImageParameters* @images, i32 0, i32 16), align 8		; <i32> [#uses=1]
+	%tmp17.i = load i32, i32* getelementptr (%struct.ImageParameters* @images, i32 0, i32 20), align 8		; <i32> [#uses=1]
+	%tmp20.i27 = load i32, i32* getelementptr (%struct.ImageParameters* @images, i32 0, i32 16), align 8		; <i32> [#uses=1]
 	%tmp8.i.i = select i1 false, i32 1, i32 0		; <i32> [#uses=1]
 	br label %bb.i8.us.i
 
diff --git a/llvm/test/CodeGen/ARM/2008-07-24-CodeGenPrepCrash.ll b/llvm/test/CodeGen/ARM/2008-07-24-CodeGenPrepCrash.ll
index e86bc1ba5cc..cf98d7f91df 100644
--- a/llvm/test/CodeGen/ARM/2008-07-24-CodeGenPrepCrash.ll
+++ b/llvm/test/CodeGen/ARM/2008-07-24-CodeGenPrepCrash.ll
@@ -4,6 +4,6 @@
 define void @main({ i32 }*) {
 entry:
 	%sret1 = alloca { i32 }		; <{ i32 }*> [#uses=1]
-	load { i32 }* %sret1		; <{ i32 }>:1 [#uses=0]
+	load { i32 }, { i32 }* %sret1		; <{ i32 }>:1 [#uses=0]
 	ret void
 }
diff --git a/llvm/test/CodeGen/ARM/2008-08-07-AsmPrintBug.ll b/llvm/test/CodeGen/ARM/2008-08-07-AsmPrintBug.ll
index adb01127760..520e800b902 100644
--- a/llvm/test/CodeGen/ARM/2008-08-07-AsmPrintBug.ll
+++ b/llvm/test/CodeGen/ARM/2008-08-07-AsmPrintBug.ll
@@ -8,6 +8,6 @@
 
 define i32 @__gcov_close() nounwind {
 entry:
-	load i32* getelementptr (%struct.__gcov_var* @__gcov_var, i32 0, i32 5), align 4		; <i32>:0 [#uses=1]
+	load i32, i32* getelementptr (%struct.__gcov_var* @__gcov_var, i32 0, i32 5), align 4		; <i32>:0 [#uses=1]
 	ret i32 %0
 }
diff --git a/llvm/test/CodeGen/ARM/2009-02-16-SpillerBug.ll b/llvm/test/CodeGen/ARM/2009-02-16-SpillerBug.ll
index 4c0c59ccfbc..c581222944d 100644
--- a/llvm/test/CodeGen/ARM/2009-02-16-SpillerBug.ll
+++ b/llvm/test/CodeGen/ARM/2009-02-16-SpillerBug.ll
@@ -87,7 +87,7 @@ bb394:		; preds = %bb122
 bb396:		; preds = %bb394, %bb131, %bb122, %bb122, %bb122, %bb122, %RESUME
 	%stop_link.3 = phi %struct.rec* [ null, %RESUME ], [ %stop_link.3, %bb394 ], [ %stop_link.3, %bb122 ], [ %stop_link.3, %bb122 ], [ %stop_link.3, %bb122 ], [ %stop_link.3, %bb122 ], [ %link.1, %bb131 ]		; <%struct.rec*> [#uses=7]
 	%headers_seen.1 = phi i32 [ 0, %RESUME ], [ %headers_seen.1, %bb394 ], [ 1, %bb122 ], [ 1, %bb122 ], [ 1, %bb122 ], [ 1, %bb122 ], [ %headers_seen.1, %bb131 ]		; <i32> [#uses=2]
-	%link.1 = load %struct.rec** null		; <%struct.rec*> [#uses=2]
+	%link.1 = load %struct.rec*, %struct.rec** null		; <%struct.rec*> [#uses=2]
 	%1 = icmp eq %struct.rec* %link.1, %hd		; <i1> [#uses=1]
 	br i1 %1, label %bb398, label %bb122
 
diff --git a/llvm/test/CodeGen/ARM/2009-02-22-SoftenFloatVaArg.ll b/llvm/test/CodeGen/ARM/2009-02-22-SoftenFloatVaArg.ll
index a48f0033acc..a14589fa47d 100644
--- a/llvm/test/CodeGen/ARM/2009-02-22-SoftenFloatVaArg.ll
+++ b/llvm/test/CodeGen/ARM/2009-02-22-SoftenFloatVaArg.ll
@@ -15,6 +15,6 @@ entry:
 	br label %return
 
 return:		; preds = %entry
-	%2 = load i32* %retval		; <i32> [#uses=1]
+	%2 = load i32, i32* %retval		; <i32> [#uses=1]
 	ret i32 %2
 }
diff --git a/llvm/test/CodeGen/ARM/2009-02-27-SpillerBug.ll b/llvm/test/CodeGen/ARM/2009-02-27-SpillerBug.ll
index bc5e6023409..d9ec4d28c5d 100644
--- a/llvm/test/CodeGen/ARM/2009-02-27-SpillerBug.ll
+++ b/llvm/test/CodeGen/ARM/2009-02-27-SpillerBug.ll
@@ -28,12 +28,12 @@ bb53:		; preds = %bb52
 	br i1 %phitmp, label %bb55, label %bb52
 
 bb55:		; preds = %bb53
-	%4 = load double* @a, align 4		; <double> [#uses=10]
+	%4 = load double, double* @a, align 4		; <double> [#uses=10]
 	%5 = fadd double %4, 0.000000e+00		; <double> [#uses=16]
 	%6 = fcmp ogt double %k.4, 0.000000e+00		; <i1> [#uses=1]
 	%.pn404 = fmul double %4, %4		; <double> [#uses=4]
 	%.pn402 = fmul double %5, %5		; <double> [#uses=5]
-	%.pn165.in = load double* @N		; <double> [#uses=5]
+	%.pn165.in = load double, double* @N		; <double> [#uses=5]
 	%.pn198 = fmul double 0.000000e+00, %5		; <double> [#uses=1]
 	%.pn185 = fsub double -0.000000e+00, 0.000000e+00		; <double> [#uses=1]
 	%.pn147 = fsub double -0.000000e+00, 0.000000e+00		; <double> [#uses=1]
diff --git a/llvm/test/CodeGen/ARM/2009-03-07-SpillerBug.ll b/llvm/test/CodeGen/ARM/2009-03-07-SpillerBug.ll
index 377bbd21175..567400318ee 100644
--- a/llvm/test/CodeGen/ARM/2009-03-07-SpillerBug.ll
+++ b/llvm/test/CodeGen/ARM/2009-03-07-SpillerBug.ll
@@ -42,7 +42,7 @@ bb3:                                              ; preds = %entry
   %17 = fdiv double %16, %0
   %18 = fadd double 0.000000e+00, %17
   %19 = call double @acos(double %18) nounwind readonly
-  %20 = load double* null, align 4
+  %20 = load double, double* null, align 4
   %21 = fmul double %20, 0x401921FB54442D18
   %22 = call double @sin(double %19) nounwind readonly
   %23 = fmul double %22, 0.000000e+00
diff --git a/llvm/test/CodeGen/ARM/2009-04-06-AsmModifier.ll b/llvm/test/CodeGen/ARM/2009-04-06-AsmModifier.ll
index e90c5b322db..bc7dbd4f695 100644
--- a/llvm/test/CodeGen/ARM/2009-04-06-AsmModifier.ll
+++ b/llvm/test/CodeGen/ARM/2009-04-06-AsmModifier.ll
@@ -8,14 +8,14 @@ entry:
 	store i32 0, i32* %retval
 	%res = alloca i32		; <i32*> [#uses=0]
 	%fh = alloca i32		; <i32*> [#uses=1]
-	%1 = load i32* %fh		; <i32> [#uses=1]
-	%2 = load i32* %ptr		; <i32> [#uses=1]
+	%1 = load i32, i32* %fh		; <i32> [#uses=1]
+	%2 = load i32, i32* %ptr		; <i32> [#uses=1]
 	%3 = call i32 asm "mov r0, $2; mov r1, $3; swi ${1:a}; mov $0, r0", "=r,i,r,r,~{r0},~{r1}"(i32 107, i32 %1, i32 %2) nounwind		; <i32> [#uses=1]
         store i32 %3, i32* %retval
 	br label %return
 
 return:		; preds = %entry
-	%4 = load i32* %retval		; <i32> [#uses=1]
+	%4 = load i32, i32* %retval		; <i32> [#uses=1]
 	ret i32 %4
 }
 
diff --git a/llvm/test/CodeGen/ARM/2009-04-08-AggregateAddr.ll b/llvm/test/CodeGen/ARM/2009-04-08-AggregateAddr.ll
index f166e7e3252..edeae9b88bc 100644
--- a/llvm/test/CodeGen/ARM/2009-04-08-AggregateAddr.ll
+++ b/llvm/test/CodeGen/ARM/2009-04-08-AggregateAddr.ll
@@ -8,11 +8,11 @@ entry:
 	%b = alloca { double, double }		; <{ double, double }*> [#uses=1]
 	store { i32, { double, double }* } %d_arg, { i32, { double, double }* }* %d
 	store i32 %x_arg, i32* %x
-	%tmp = load i32* %x		; <i32> [#uses=1]
+	%tmp = load i32, i32* %x		; <i32> [#uses=1]
 	%tmp1 = getelementptr { i32, { double, double }* }, { i32, { double, double }* }* %d, i32 0, i32 1		; <{ double, double }**> [#uses=1]
-	%.ptr = load { double, double }** %tmp1		; <{ double, double }*> [#uses=1]
+	%.ptr = load { double, double }*, { double, double }** %tmp1		; <{ double, double }*> [#uses=1]
 	%tmp2 = getelementptr { double, double }, { double, double }* %.ptr, i32 %tmp		; <{ double, double }*> [#uses=1]
-	%tmp3 = load { double, double }* %tmp2		; <{ double, double }> [#uses=1]
+	%tmp3 = load { double, double }, { double, double }* %tmp2		; <{ double, double }> [#uses=1]
 	store { double, double } %tmp3, { double, double }* %b
 	ret void
 }
diff --git a/llvm/test/CodeGen/ARM/2009-04-08-FloatUndef.ll b/llvm/test/CodeGen/ARM/2009-04-08-FloatUndef.ll
index 9e32e05b040..949e1072b2b 100644
--- a/llvm/test/CodeGen/ARM/2009-04-08-FloatUndef.ll
+++ b/llvm/test/CodeGen/ARM/2009-04-08-FloatUndef.ll
@@ -2,7 +2,7 @@
 
 define void @execute_shader(<4 x float>* %OUT, <4 x float>* %IN, <4 x float>* %CONST) {
 entry:
-	%input2 = load <4 x float>* null, align 16		; <<4 x float>> [#uses=2]
+	%input2 = load <4 x float>, <4 x float>* null, align 16		; <<4 x float>> [#uses=2]
 	%shuffle7 = shufflevector <4 x float> %input2, <4 x float> <float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00>, <4 x i32> <i32 2, i32 2, i32 2, i32 2>		; <<4 x float>> [#uses=1]
 	%mul1 = fmul <4 x float> %shuffle7, zeroinitializer		; <<4 x float>> [#uses=1]
 	%add2 = fadd <4 x float> %mul1, %input2		; <<4 x float>> [#uses=1]
diff --git a/llvm/test/CodeGen/ARM/2009-04-09-RegScavengerAsm.ll b/llvm/test/CodeGen/ARM/2009-04-09-RegScavengerAsm.ll
index 5b1746301f4..f2532d798f8 100644
--- a/llvm/test/CodeGen/ARM/2009-04-09-RegScavengerAsm.ll
+++ b/llvm/test/CodeGen/ARM/2009-04-09-RegScavengerAsm.ll
@@ -4,7 +4,7 @@
 define void @foo(...) nounwind {
 entry:
 	%rr = alloca i32		; <i32*> [#uses=2]
-	%0 = load i32* %rr		; <i32> [#uses=1]
+	%0 = load i32, i32* %rr		; <i32> [#uses=1]
 	%1 = call i32 asm "nop", "=r,0"(i32 %0) nounwind		; <i32> [#uses=1]
 	store i32 %1, i32* %rr
 	br label %return
diff --git a/llvm/test/CodeGen/ARM/2009-05-11-CodePlacementCrash.ll b/llvm/test/CodeGen/ARM/2009-05-11-CodePlacementCrash.ll
index 2bc7df02853..06456cc9ca4 100644
--- a/llvm/test/CodeGen/ARM/2009-05-11-CodePlacementCrash.ll
+++ b/llvm/test/CodeGen/ARM/2009-05-11-CodePlacementCrash.ll
@@ -20,7 +20,7 @@ bb1:		; preds = %bb
 bb3:		; preds = %bb1, %bb
 	%iftmp.0.0 = phi i32 [ 0, %bb1 ], [ -1, %bb ]		; <i32> [#uses=1]
 	%1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([7 x i8]* @"\01LC", i32 0, i32 0), i32 0, i32 %iftmp.0.0) nounwind		; <i32> [#uses=0]
-	%2 = load %struct.List** null, align 4		; <%struct.List*> [#uses=2]
+	%2 = load %struct.List*, %struct.List** null, align 4		; <%struct.List*> [#uses=2]
 	%phitmp = icmp eq %struct.List* %2, null		; <i1> [#uses=1]
 	br i1 %phitmp, label %bb5, label %bb
 
diff --git a/llvm/test/CodeGen/ARM/2009-06-04-MissingLiveIn.ll b/llvm/test/CodeGen/ARM/2009-06-04-MissingLiveIn.ll
index 98e00230255..17beb3c2594 100644
--- a/llvm/test/CodeGen/ARM/2009-06-04-MissingLiveIn.ll
+++ b/llvm/test/CodeGen/ARM/2009-06-04-MissingLiveIn.ll
@@ -136,7 +136,7 @@ bb138:		; preds = %bb77
 	br label %bb141
 
 bb139:		; preds = %bb141
-	%scevgep441442881 = load i16* undef		; <i16> [#uses=1]
+	%scevgep441442881 = load i16, i16* undef		; <i16> [#uses=1]
 	%1 = icmp ugt i16 %scevgep441442881, %0		; <i1> [#uses=1]
 	br i1 %1, label %bb141, label %bb142
 
diff --git a/llvm/test/CodeGen/ARM/2009-06-22-CoalescerBug.ll b/llvm/test/CodeGen/ARM/2009-06-22-CoalescerBug.ll
index 380d43af682..4ab54c2e8fa 100644
--- a/llvm/test/CodeGen/ARM/2009-06-22-CoalescerBug.ll
+++ b/llvm/test/CodeGen/ARM/2009-06-22-CoalescerBug.ll
@@ -5,9 +5,9 @@
 
 define void @simplify_unary_real(i8* nocapture %p) nounwind {
 entry:
-	%tmp121 = load i64* null, align 4		; <i64> [#uses=1]
+	%tmp121 = load i64, i64* null, align 4		; <i64> [#uses=1]
 	%0 = getelementptr %struct.rtx_def, %struct.rtx_def* null, i32 0, i32 3, i32 3, i32 0		; <i64*> [#uses=1]
-	%tmp122 = load i64* %0, align 4		; <i64> [#uses=1]
+	%tmp122 = load i64, i64* %0, align 4		; <i64> [#uses=1]
 	%1 = zext i64 undef to i192		; <i192> [#uses=2]
 	%2 = zext i64 %tmp121 to i192		; <i192> [#uses=1]
 	%3 = shl i192 %2, 64		; <i192> [#uses=2]
diff --git a/llvm/test/CodeGen/ARM/2009-06-30-RegScavengerAssert.ll b/llvm/test/CodeGen/ARM/2009-06-30-RegScavengerAssert.ll
index 2c892651435..243726c9149 100644
--- a/llvm/test/CodeGen/ARM/2009-06-30-RegScavengerAssert.ll
+++ b/llvm/test/CodeGen/ARM/2009-06-30-RegScavengerAssert.ll
@@ -35,10 +35,10 @@ bb10:		; preds = %bb9
 	unreachable
 
 bb11:		; preds = %bb9
-	%0 = load i32* undef, align 4		; <i32> [#uses=2]
+	%0 = load i32, i32* undef, align 4		; <i32> [#uses=2]
 	%1 = add i32 %0, 1		; <i32> [#uses=2]
 	store i32 %1, i32* undef, align 4
-	%2 = load i32* undef, align 4		; <i32> [#uses=1]
+	%2 = load i32, i32* undef, align 4		; <i32> [#uses=1]
 	store i32 %2, i32* @nn, align 4
 	store i32 0, i32* @al_len, align 4
 	store i32 0, i32* @no_mat, align 4
@@ -48,9 +48,9 @@ bb11:		; preds = %bb9
 	%4 = sitofp i32 undef to double		; <double> [#uses=1]
 	%5 = fdiv double %4, 1.000000e+01		; <double> [#uses=1]
 	%6 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr ([29 x i8]* @"\01LC12", i32 0, i32 0), double %5) nounwind		; <i32> [#uses=0]
-	%7 = load i32* @al_len, align 4		; <i32> [#uses=1]
-	%8 = load i32* @no_mat, align 4		; <i32> [#uses=1]
-	%9 = load i32* @no_mis, align 4		; <i32> [#uses=1]
+	%7 = load i32, i32* @al_len, align 4		; <i32> [#uses=1]
+	%8 = load i32, i32* @no_mat, align 4		; <i32> [#uses=1]
+	%9 = load i32, i32* @no_mis, align 4		; <i32> [#uses=1]
 	%10 = sub i32 %7, %8		; <i32> [#uses=1]
 	%11 = sub i32 %10, %9		; <i32> [#uses=1]
 	%12 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr ([33 x i8]* @"\01LC16", i32 0, i32 0), i32 %11) nounwind		; <i32> [#uses=0]
diff --git a/llvm/test/CodeGen/ARM/2009-06-30-RegScavengerAssert2.ll b/llvm/test/CodeGen/ARM/2009-06-30-RegScavengerAssert2.ll
index 599f2916da4..17051df96b5 100644
--- a/llvm/test/CodeGen/ARM/2009-06-30-RegScavengerAssert2.ll
+++ b/llvm/test/CodeGen/ARM/2009-06-30-RegScavengerAssert2.ll
@@ -33,17 +33,17 @@ bb10:		; preds = %bb9
 	unreachable
 
 bb11:		; preds = %bb9
-	%0 = load i32* undef, align 4		; <i32> [#uses=3]
+	%0 = load i32, i32* undef, align 4		; <i32> [#uses=3]
 	%1 = add i32 %0, 1		; <i32> [#uses=2]
 	store i32 %1, i32* undef, align 4
-	%2 = load i32* undef, align 4		; <i32> [#uses=2]
+	%2 = load i32, i32* undef, align 4		; <i32> [#uses=2]
 	%3 = sub i32 %2, %0		; <i32> [#uses=1]
 	store i32 0, i32* @no_mat, align 4
 	store i32 0, i32* @no_mis, align 4
 	%4 = getelementptr i8, i8* %B, i32 %0		; <i8*> [#uses=1]
 	tail call  void @diff(i8* undef, i8* %4, i32 undef, i32 %3, i32 undef, i32 undef) nounwind
 	%5 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr ([33 x i8]* @"\01LC11", i32 0, i32 0), i32 %tmp13) nounwind		; <i32> [#uses=0]
-	%6 = load i32* @no_mis, align 4		; <i32> [#uses=1]
+	%6 = load i32, i32* @no_mis, align 4		; <i32> [#uses=1]
 	%7 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr ([33 x i8]* @"\01LC15", i32 0, i32 0), i32 %6) nounwind		; <i32> [#uses=0]
 	%8 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr ([47 x i8]* @"\01LC17", i32 0, i32 0), i32 undef, i32 %1, i32 undef, i32 %2) nounwind		; <i32> [#uses=0]
 	br i1 undef, label %bb15, label %bb12
diff --git a/llvm/test/CodeGen/ARM/2009-06-30-RegScavengerAssert3.ll b/llvm/test/CodeGen/ARM/2009-06-30-RegScavengerAssert3.ll
index fb6ca109fc5..cf7325ddf89 100644
--- a/llvm/test/CodeGen/ARM/2009-06-30-RegScavengerAssert3.ll
+++ b/llvm/test/CodeGen/ARM/2009-06-30-RegScavengerAssert3.ll
@@ -28,7 +28,7 @@ bb11:		; preds = %bb9
 	br i1 undef, label %bb15, label %bb12
 
 bb12:		; preds = %bb11
-	%0 = load i32** @JJ, align 4		; <i32*> [#uses=1]
+	%0 = load i32*, i32** @JJ, align 4		; <i32*> [#uses=1]
 	br label %bb228.i
 
 bb74.i:		; preds = %bb228.i
@@ -85,9 +85,9 @@ bb168.i:		; preds = %bb167.i, %bb163.i, %bb161.i, %bb160.i, %bb158.i
 	%fi.5.i = phi i32 [ undef, %bb167.i ], [ %ci.910.i, %bb158.i ], [ undef, %bb160.i ], [ %ci.910.i, %bb161.i ], [ undef, %bb163.i ]		; <i32> [#uses=1]
 	%fj.4.i = phi i32 [ undef, %bb167.i ], [ undef, %bb158.i ], [ %fj.515.i, %bb160.i ], [ undef, %bb161.i ], [ %fj.515.i, %bb163.i ]		; <i32> [#uses=2]
 	%scevgep88.i = getelementptr i32, i32* null, i32 %i.121.i		; <i32*> [#uses=3]
-	%4 = load i32* %scevgep88.i, align 4		; <i32> [#uses=2]
+	%4 = load i32, i32* %scevgep88.i, align 4		; <i32> [#uses=2]
 	%scevgep89.i = getelementptr i32, i32* %0, i32 %i.121.i		; <i32*> [#uses=3]
-	%5 = load i32* %scevgep89.i, align 4		; <i32> [#uses=1]
+	%5 = load i32, i32* %scevgep89.i, align 4		; <i32> [#uses=1]
 	%ci.10.i = select i1 undef, i32 %pi.316.i, i32 %i.121.i		; <i32> [#uses=0]
 	%cj.9.i = select i1 undef, i32 %pj.317.i, i32 undef		; <i32> [#uses=0]
 	%6 = icmp slt i32 undef, 0		; <i1> [#uses=3]
@@ -95,8 +95,8 @@ bb168.i:		; preds = %bb167.i, %bb163.i, %bb161.i, %bb160.i, %bb158.i
 	%cj.11.i100 = select i1 %6, i32 %fj.4.i, i32 %5		; <i32> [#uses=1]
 	%c.14.i = select i1 %6, i32 0, i32 undef		; <i32> [#uses=2]
 	store i32 %c.14.i, i32* undef, align 4
-	%7 = load i32* %scevgep88.i, align 4		; <i32> [#uses=1]
-	%8 = load i32* %scevgep89.i, align 4		; <i32> [#uses=1]
+	%7 = load i32, i32* %scevgep88.i, align 4		; <i32> [#uses=1]
+	%8 = load i32, i32* %scevgep89.i, align 4		; <i32> [#uses=1]
 	store i32 %ci.12.i, i32* %scevgep88.i, align 4
 	store i32 %cj.11.i100, i32* %scevgep89.i, align 4
 	store i32 %4, i32* undef, align 4
diff --git a/llvm/test/CodeGen/ARM/2009-06-30-RegScavengerAssert4.ll b/llvm/test/CodeGen/ARM/2009-06-30-RegScavengerAssert4.ll
index 0485ab0f136..203608ac1d4 100644
--- a/llvm/test/CodeGen/ARM/2009-06-30-RegScavengerAssert4.ll
+++ b/llvm/test/CodeGen/ARM/2009-06-30-RegScavengerAssert4.ll
@@ -22,7 +22,7 @@ bb6:		; preds = %bb6, %bb5
 	br i1 undef, label %bb8, label %bb6
 
 bb8:		; preds = %bb6, %bb5
-	%0 = load i8** @name1, align 4		; <i8*> [#uses=0]
+	%0 = load i8*, i8** @name1, align 4		; <i8*> [#uses=0]
 	br label %bb15
 
 bb9:		; preds = %bb15
@@ -39,9 +39,9 @@ bb11:		; preds = %bb9
 	br i1 undef, label %bb15, label %bb12
 
 bb12:		; preds = %bb11
-	%3 = load i32** @II, align 4		; <i32*> [#uses=1]
-	%4 = load i32* @r, align 4		; <i32> [#uses=1]
-	%5 = load i32* @qr, align 4		; <i32> [#uses=1]
+	%3 = load i32*, i32** @II, align 4		; <i32*> [#uses=1]
+	%4 = load i32, i32* @r, align 4		; <i32> [#uses=1]
+	%5 = load i32, i32* @qr, align 4		; <i32> [#uses=1]
 	br label %bb228.i
 
 bb74.i:		; preds = %bb228.i
@@ -100,7 +100,7 @@ bb168.i:		; preds = %bb167.i, %bb163.i, %bb161.i, %bb160.i, %bb158.i
 	%ci.12.i = select i1 undef, i32 %fi.5.i, i32 undef		; <i32> [#uses=1]
 	%cj.11.i100 = select i1 undef, i32 %fj.4.i, i32 undef		; <i32> [#uses=1]
 	%c.14.i = select i1 undef, i32 %f.5.i, i32 undef		; <i32> [#uses=1]
-	%10 = load i32* %scevgep88.i, align 4		; <i32> [#uses=1]
+	%10 = load i32, i32* %scevgep88.i, align 4		; <i32> [#uses=1]
 	br i1 undef, label %bb211.i, label %bb218.i
 
 bb211.i:		; preds = %bb168.i
diff --git a/llvm/test/CodeGen/ARM/2009-06-30-RegScavengerAssert5.ll b/llvm/test/CodeGen/ARM/2009-06-30-RegScavengerAssert5.ll
index 16f5d1dc150..b3c91ed3fb0 100644
--- a/llvm/test/CodeGen/ARM/2009-06-30-RegScavengerAssert5.ll
+++ b/llvm/test/CodeGen/ARM/2009-06-30-RegScavengerAssert5.ll
@@ -28,7 +28,7 @@ bb11:		; preds = %bb9
 	br i1 undef, label %bb15, label %bb12
 
 bb12:		; preds = %bb11
-	%0 = load i32** @XX, align 4		; <i32*> [#uses=0]
+	%0 = load i32*, i32** @XX, align 4		; <i32*> [#uses=0]
 	br label %bb228.i
 
 bb74.i:		; preds = %bb228.i
diff --git a/llvm/test/CodeGen/ARM/2009-07-01-CommuteBug.ll b/llvm/test/CodeGen/ARM/2009-07-01-CommuteBug.ll
index ae826fe6705..55039dd7f57 100644
--- a/llvm/test/CodeGen/ARM/2009-07-01-CommuteBug.ll
+++ b/llvm/test/CodeGen/ARM/2009-07-01-CommuteBug.ll
@@ -30,9 +30,9 @@ bb11:		; preds = %bb9
 	br i1 undef, label %bb15, label %bb12
 
 bb12:		; preds = %bb11
-	%0 = load i32** @II, align 4		; <i32*> [#uses=1]
-	%1 = load i32** @JJ, align 4		; <i32*> [#uses=1]
-	%2 = load i32* @qr, align 4		; <i32> [#uses=1]
+	%0 = load i32*, i32** @II, align 4		; <i32*> [#uses=1]
+	%1 = load i32*, i32** @JJ, align 4		; <i32*> [#uses=1]
+	%2 = load i32, i32* @qr, align 4		; <i32> [#uses=1]
 	br label %bb228.i
 
 bb74.i:		; preds = %bb228.i
@@ -97,8 +97,8 @@ bb168.i:		; preds = %bb167.i, %bb163.i, %bb161.i, %bb160.i, %bb158.i
 	%ci.12.i = select i1 undef, i32 %fi.5.i, i32 undef		; <i32> [#uses=2]
 	%cj.11.i100 = select i1 undef, i32 %fj.4.i, i32 undef		; <i32> [#uses=2]
 	%c.14.i = select i1 undef, i32 %f.5.i, i32 undef		; <i32> [#uses=1]
-	%6 = load i32* %scevgep88.i, align 4		; <i32> [#uses=1]
-	%7 = load i32* %scevgep89.i, align 4		; <i32> [#uses=1]
+	%6 = load i32, i32* %scevgep88.i, align 4		; <i32> [#uses=1]
+	%7 = load i32, i32* %scevgep89.i, align 4		; <i32> [#uses=1]
 	store i32 %ci.12.i, i32* %scevgep88.i, align 4
 	store i32 %cj.11.i100, i32* %scevgep89.i, align 4
 	br i1 undef, label %bb211.i, label %bb218.i
diff --git a/llvm/test/CodeGen/ARM/2009-07-18-RewriterBug.ll b/llvm/test/CodeGen/ARM/2009-07-18-RewriterBug.ll
index 0c6378a1080..2cb267894a8 100644
--- a/llvm/test/CodeGen/ARM/2009-07-18-RewriterBug.ll
+++ b/llvm/test/CodeGen/ARM/2009-07-18-RewriterBug.ll
@@ -17,42 +17,42 @@ entry:
 
 bb:		; preds = %entry
 	%1 = getelementptr %struct.VERTEX, %struct.VERTEX* %tree, i32 0, i32 2		; <%struct.VERTEX**> [#uses=1]
-	%2 = load %struct.VERTEX** %1, align 4		; <%struct.VERTEX*> [#uses=2]
+	%2 = load %struct.VERTEX*, %struct.VERTEX** %1, align 4		; <%struct.VERTEX*> [#uses=2]
 	%3 = icmp eq %struct.VERTEX* %2, null		; <i1> [#uses=1]
 	br i1 %3, label %bb7, label %bb1.i
 
 bb1.i:		; preds = %bb1.i, %bb
 	%tree_addr.0.i = phi %struct.VERTEX* [ %5, %bb1.i ], [ %tree, %bb ]		; <%struct.VERTEX*> [#uses=3]
 	%4 = getelementptr %struct.VERTEX, %struct.VERTEX* %tree_addr.0.i, i32 0, i32 1		; <%struct.VERTEX**> [#uses=1]
-	%5 = load %struct.VERTEX** %4, align 4		; <%struct.VERTEX*> [#uses=2]
+	%5 = load %struct.VERTEX*, %struct.VERTEX** %4, align 4		; <%struct.VERTEX*> [#uses=2]
 	%6 = icmp eq %struct.VERTEX* %5, null		; <i1> [#uses=1]
 	br i1 %6, label %get_low.exit, label %bb1.i
 
 get_low.exit:		; preds = %bb1.i
 	call  void @build_delaunay(%struct.EDGE_PAIR* noalias sret %delright, %struct.VERTEX* %2, %struct.VERTEX* %extra) nounwind
 	%7 = getelementptr %struct.VERTEX, %struct.VERTEX* %tree, i32 0, i32 1		; <%struct.VERTEX**> [#uses=1]
-	%8 = load %struct.VERTEX** %7, align 4		; <%struct.VERTEX*> [#uses=1]
+	%8 = load %struct.VERTEX*, %struct.VERTEX** %7, align 4		; <%struct.VERTEX*> [#uses=1]
 	call  void @build_delaunay(%struct.EDGE_PAIR* noalias sret %delleft, %struct.VERTEX* %8, %struct.VERTEX* %tree) nounwind
 	%9 = getelementptr %struct.EDGE_PAIR, %struct.EDGE_PAIR* %delleft, i32 0, i32 0		; <%struct.edge_rec**> [#uses=1]
-	%10 = load %struct.edge_rec** %9, align 8		; <%struct.edge_rec*> [#uses=2]
+	%10 = load %struct.edge_rec*, %struct.edge_rec** %9, align 8		; <%struct.edge_rec*> [#uses=2]
 	%11 = getelementptr %struct.EDGE_PAIR, %struct.EDGE_PAIR* %delleft, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
-	%12 = load %struct.edge_rec** %11, align 4		; <%struct.edge_rec*> [#uses=1]
+	%12 = load %struct.edge_rec*, %struct.edge_rec** %11, align 4		; <%struct.edge_rec*> [#uses=1]
 	%13 = getelementptr %struct.EDGE_PAIR, %struct.EDGE_PAIR* %delright, i32 0, i32 0		; <%struct.edge_rec**> [#uses=1]
-	%14 = load %struct.edge_rec** %13, align 8		; <%struct.edge_rec*> [#uses=1]
+	%14 = load %struct.edge_rec*, %struct.edge_rec** %13, align 8		; <%struct.edge_rec*> [#uses=1]
 	%15 = getelementptr %struct.EDGE_PAIR, %struct.EDGE_PAIR* %delright, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
-	%16 = load %struct.edge_rec** %15, align 4		; <%struct.edge_rec*> [#uses=2]
+	%16 = load %struct.edge_rec*, %struct.edge_rec** %15, align 4		; <%struct.edge_rec*> [#uses=2]
 	br label %bb.i
 
 bb.i:		; preds = %bb4.i, %get_low.exit
 	%rdi_addr.0.i = phi %struct.edge_rec* [ %14, %get_low.exit ], [ %72, %bb4.i ]		; <%struct.edge_rec*> [#uses=2]
 	%ldi_addr.1.i = phi %struct.edge_rec* [ %12, %get_low.exit ], [ %ldi_addr.0.i, %bb4.i ]		; <%struct.edge_rec*> [#uses=3]
 	%17 = getelementptr %struct.edge_rec, %struct.edge_rec* %rdi_addr.0.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
-	%18 = load %struct.VERTEX** %17, align 4		; <%struct.VERTEX*> [#uses=3]
+	%18 = load %struct.VERTEX*, %struct.VERTEX** %17, align 4		; <%struct.VERTEX*> [#uses=3]
 	%19 = ptrtoint %struct.edge_rec* %ldi_addr.1.i to i32		; <i32> [#uses=1]
 	%20 = getelementptr %struct.VERTEX, %struct.VERTEX* %18, i32 0, i32 0, i32 0		; <double*> [#uses=1]
-	%21 = load double* %20, align 4		; <double> [#uses=3]
+	%21 = load double, double* %20, align 4		; <double> [#uses=3]
 	%22 = getelementptr %struct.VERTEX, %struct.VERTEX* %18, i32 0, i32 0, i32 1		; <double*> [#uses=1]
-	%23 = load double* %22, align 4		; <double> [#uses=3]
+	%23 = load double, double* %22, align 4		; <double> [#uses=3]
 	br label %bb2.i
 
 bb1.i1:		; preds = %bb2.i
@@ -63,7 +63,7 @@ bb1.i1:		; preds = %bb2.i
 	%28 = or i32 %26, %27		; <i32> [#uses=1]
 	%29 = inttoptr i32 %28 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%30 = getelementptr %struct.edge_rec, %struct.edge_rec* %29, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
-	%31 = load %struct.edge_rec** %30, align 4		; <%struct.edge_rec*> [#uses=1]
+	%31 = load %struct.edge_rec*, %struct.edge_rec** %30, align 4		; <%struct.edge_rec*> [#uses=1]
 	%32 = ptrtoint %struct.edge_rec* %31 to i32		; <i32> [#uses=2]
 	%33 = add i32 %32, 16		; <i32> [#uses=1]
 	%34 = and i32 %33, 63		; <i32> [#uses=1]
@@ -80,16 +80,16 @@ bb2.i:		; preds = %bb1.i1, %bb.i
 	%.pn6.i = inttoptr i32 %.pn6.in.i to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%t1.0.in.i = getelementptr %struct.edge_rec, %struct.edge_rec* %ldi_addr.1.pn.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
 	%t2.0.in.i = getelementptr %struct.edge_rec, %struct.edge_rec* %.pn6.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
-	%t1.0.i = load %struct.VERTEX** %t1.0.in.i		; <%struct.VERTEX*> [#uses=2]
-	%t2.0.i = load %struct.VERTEX** %t2.0.in.i		; <%struct.VERTEX*> [#uses=2]
+	%t1.0.i = load %struct.VERTEX*, %struct.VERTEX** %t1.0.in.i		; <%struct.VERTEX*> [#uses=2]
+	%t2.0.i = load %struct.VERTEX*, %struct.VERTEX** %t2.0.in.i		; <%struct.VERTEX*> [#uses=2]
 	%38 = getelementptr %struct.VERTEX, %struct.VERTEX* %t1.0.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
-	%39 = load double* %38, align 4		; <double> [#uses=3]
+	%39 = load double, double* %38, align 4		; <double> [#uses=3]
 	%40 = getelementptr %struct.VERTEX, %struct.VERTEX* %t1.0.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
-	%41 = load double* %40, align 4		; <double> [#uses=3]
+	%41 = load double, double* %40, align 4		; <double> [#uses=3]
 	%42 = getelementptr %struct.VERTEX, %struct.VERTEX* %t2.0.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
-	%43 = load double* %42, align 4		; <double> [#uses=1]
+	%43 = load double, double* %42, align 4		; <double> [#uses=1]
 	%44 = getelementptr %struct.VERTEX, %struct.VERTEX* %t2.0.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
-	%45 = load double* %44, align 4		; <double> [#uses=1]
+	%45 = load double, double* %44, align 4		; <double> [#uses=1]
 	%46 = fsub double %39, %21		; <double> [#uses=1]
 	%47 = fsub double %45, %23		; <double> [#uses=1]
 	%48 = fmul double %46, %47		; <double> [#uses=1]
@@ -105,11 +105,11 @@ bb3.i:		; preds = %bb2.i
 	%55 = xor i32 %54, 32		; <i32> [#uses=3]
 	%56 = inttoptr i32 %55 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
 	%57 = getelementptr %struct.edge_rec, %struct.edge_rec* %56, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
-	%58 = load %struct.VERTEX** %57, align 4		; <%struct.VERTEX*> [#uses=2]
+	%58 = load %struct.VERTEX*, %struct.VERTEX** %57, align 4		; <%struct.VERTEX*> [#uses=2]
 	%59 = getelementptr %struct.VERTEX, %struct.VERTEX* %58, i32 0, i32 0, i32 0		; <double*> [#uses=1]
-	%60 = load double* %59, align 4		; <double> [#uses=1]
+	%60 = load double, double* %59, align 4		; <double> [#uses=1]
 	%61 = getelementptr %struct.VERTEX, %struct.VERTEX* %58, i32 0, i32 0, i32 1		; <double*> [#uses=1]
-	%62 = load double* %61, align 4		; <double> [#uses=1]
+	%62 = load double, double* %61, align 4		; <double> [#uses=1]
 	%63 = fsub double %60, %39		; <double> [#uses=1]
 	%64 = fsub double %23, %41		; <double> [#uses=1]
 	%65 = fmul double %63, %64		; <double> [#uses=1]
@@ -122,7 +122,7 @@ bb3.i:		; preds = %bb2.i
 
 bb4.i:		; preds = %bb3.i
 	%71 = getelementptr %struct.edge_rec, %struct.edge_rec* %56, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
-	%72 = load %struct.edge_rec** %71, align 4		; <%struct.edge_rec*> [#uses=1]
+	%72 = load %struct.edge_rec*, %struct.edge_rec** %71, align 4		; <%struct.edge_rec*> [#uses=1]
 	br label %bb.i
 
 bb5.i:		; preds = %bb3.i
@@ -132,7 +132,7 @@ bb5.i:		; preds = %bb3.i
 	%76 = or i32 %74, %75		; <i32> [#uses=1]
 	%77 = inttoptr i32 %76 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%78 = getelementptr %struct.edge_rec, %struct.edge_rec* %77, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
-	%79 = load %struct.edge_rec** %78, align 4		; <%struct.edge_rec*> [#uses=1]
+	%79 = load %struct.edge_rec*, %struct.edge_rec** %78, align 4		; <%struct.edge_rec*> [#uses=1]
 	%80 = ptrtoint %struct.edge_rec* %79 to i32		; <i32> [#uses=2]
 	%81 = add i32 %80, 16		; <i32> [#uses=1]
 	%82 = and i32 %81, 63		; <i32> [#uses=1]
@@ -140,7 +140,7 @@ bb5.i:		; preds = %bb3.i
 	%84 = or i32 %82, %83		; <i32> [#uses=1]
 	%85 = inttoptr i32 %84 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%86 = getelementptr %struct.edge_rec, %struct.edge_rec* %ldi_addr.0.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
-	%87 = load %struct.VERTEX** %86, align 4		; <%struct.VERTEX*> [#uses=1]
+	%87 = load %struct.VERTEX*, %struct.VERTEX** %86, align 4		; <%struct.VERTEX*> [#uses=1]
 	%88 = call  %struct.edge_rec* @alloc_edge() nounwind		; <%struct.edge_rec*> [#uses=6]
 	%89 = getelementptr %struct.edge_rec, %struct.edge_rec* %88, i32 0, i32 1		; <%struct.edge_rec**> [#uses=4]
 	store %struct.edge_rec* %88, %struct.edge_rec** %89, align 4
@@ -161,7 +161,7 @@ bb5.i:		; preds = %bb3.i
 	store %struct.VERTEX* %87, %struct.VERTEX** %100, align 4
 	%101 = getelementptr %struct.edge_rec, %struct.edge_rec* %95, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
 	store %struct.edge_rec* %93, %struct.edge_rec** %101, align 4
-	%102 = load %struct.edge_rec** %89, align 4		; <%struct.edge_rec*> [#uses=1]
+	%102 = load %struct.edge_rec*, %struct.edge_rec** %89, align 4		; <%struct.edge_rec*> [#uses=1]
 	%103 = ptrtoint %struct.edge_rec* %102 to i32		; <i32> [#uses=2]
 	%104 = add i32 %103, 16		; <i32> [#uses=1]
 	%105 = and i32 %104, 63		; <i32> [#uses=1]
@@ -169,7 +169,7 @@ bb5.i:		; preds = %bb3.i
 	%107 = or i32 %105, %106		; <i32> [#uses=1]
 	%108 = inttoptr i32 %107 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%109 = getelementptr %struct.edge_rec, %struct.edge_rec* %85, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
-	%110 = load %struct.edge_rec** %109, align 4		; <%struct.edge_rec*> [#uses=1]
+	%110 = load %struct.edge_rec*, %struct.edge_rec** %109, align 4		; <%struct.edge_rec*> [#uses=1]
 	%111 = ptrtoint %struct.edge_rec* %110 to i32		; <i32> [#uses=2]
 	%112 = add i32 %111, 16		; <i32> [#uses=1]
 	%113 = and i32 %112, 63		; <i32> [#uses=1]
@@ -177,19 +177,19 @@ bb5.i:		; preds = %bb3.i
 	%115 = or i32 %113, %114		; <i32> [#uses=1]
 	%116 = inttoptr i32 %115 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%117 = getelementptr %struct.edge_rec, %struct.edge_rec* %116, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%118 = load %struct.edge_rec** %117, align 4		; <%struct.edge_rec*> [#uses=1]
+	%118 = load %struct.edge_rec*, %struct.edge_rec** %117, align 4		; <%struct.edge_rec*> [#uses=1]
 	%119 = getelementptr %struct.edge_rec, %struct.edge_rec* %108, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%120 = load %struct.edge_rec** %119, align 4		; <%struct.edge_rec*> [#uses=1]
+	%120 = load %struct.edge_rec*, %struct.edge_rec** %119, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %118, %struct.edge_rec** %119, align 4
 	store %struct.edge_rec* %120, %struct.edge_rec** %117, align 4
-	%121 = load %struct.edge_rec** %89, align 4		; <%struct.edge_rec*> [#uses=1]
-	%122 = load %struct.edge_rec** %109, align 4		; <%struct.edge_rec*> [#uses=1]
+	%121 = load %struct.edge_rec*, %struct.edge_rec** %89, align 4		; <%struct.edge_rec*> [#uses=1]
+	%122 = load %struct.edge_rec*, %struct.edge_rec** %109, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %121, %struct.edge_rec** %109, align 4
 	store %struct.edge_rec* %122, %struct.edge_rec** %89, align 4
 	%123 = xor i32 %91, 32		; <i32> [#uses=1]
 	%124 = inttoptr i32 %123 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
 	%125 = getelementptr %struct.edge_rec, %struct.edge_rec* %124, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
-	%126 = load %struct.edge_rec** %125, align 4		; <%struct.edge_rec*> [#uses=1]
+	%126 = load %struct.edge_rec*, %struct.edge_rec** %125, align 4		; <%struct.edge_rec*> [#uses=1]
 	%127 = ptrtoint %struct.edge_rec* %126 to i32		; <i32> [#uses=2]
 	%128 = add i32 %127, 16		; <i32> [#uses=1]
 	%129 = and i32 %128, 63		; <i32> [#uses=1]
@@ -197,7 +197,7 @@ bb5.i:		; preds = %bb3.i
 	%131 = or i32 %129, %130		; <i32> [#uses=1]
 	%132 = inttoptr i32 %131 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%133 = getelementptr %struct.edge_rec, %struct.edge_rec* %ldi_addr.0.i, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
-	%134 = load %struct.edge_rec** %133, align 4		; <%struct.edge_rec*> [#uses=1]
+	%134 = load %struct.edge_rec*, %struct.edge_rec** %133, align 4		; <%struct.edge_rec*> [#uses=1]
 	%135 = ptrtoint %struct.edge_rec* %134 to i32		; <i32> [#uses=2]
 	%136 = add i32 %135, 16		; <i32> [#uses=1]
 	%137 = and i32 %136, 63		; <i32> [#uses=1]
@@ -205,13 +205,13 @@ bb5.i:		; preds = %bb3.i
 	%139 = or i32 %137, %138		; <i32> [#uses=1]
 	%140 = inttoptr i32 %139 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%141 = getelementptr %struct.edge_rec, %struct.edge_rec* %140, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%142 = load %struct.edge_rec** %141, align 4		; <%struct.edge_rec*> [#uses=1]
+	%142 = load %struct.edge_rec*, %struct.edge_rec** %141, align 4		; <%struct.edge_rec*> [#uses=1]
 	%143 = getelementptr %struct.edge_rec, %struct.edge_rec* %132, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%144 = load %struct.edge_rec** %143, align 4		; <%struct.edge_rec*> [#uses=1]
+	%144 = load %struct.edge_rec*, %struct.edge_rec** %143, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %142, %struct.edge_rec** %143, align 4
 	store %struct.edge_rec* %144, %struct.edge_rec** %141, align 4
-	%145 = load %struct.edge_rec** %125, align 4		; <%struct.edge_rec*> [#uses=1]
-	%146 = load %struct.edge_rec** %133, align 4		; <%struct.edge_rec*> [#uses=2]
+	%145 = load %struct.edge_rec*, %struct.edge_rec** %125, align 4		; <%struct.edge_rec*> [#uses=1]
+	%146 = load %struct.edge_rec*, %struct.edge_rec** %133, align 4		; <%struct.edge_rec*> [#uses=2]
 	store %struct.edge_rec* %145, %struct.edge_rec** %133, align 4
 	store %struct.edge_rec* %146, %struct.edge_rec** %125, align 4
 	%147 = and i32 %92, 63		; <i32> [#uses=1]
@@ -219,22 +219,22 @@ bb5.i:		; preds = %bb3.i
 	%149 = or i32 %147, %148		; <i32> [#uses=1]
 	%150 = inttoptr i32 %149 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%151 = getelementptr %struct.edge_rec, %struct.edge_rec* %150, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
-	%152 = load %struct.edge_rec** %151, align 4		; <%struct.edge_rec*> [#uses=1]
+	%152 = load %struct.edge_rec*, %struct.edge_rec** %151, align 4		; <%struct.edge_rec*> [#uses=1]
 	%153 = ptrtoint %struct.edge_rec* %152 to i32		; <i32> [#uses=2]
 	%154 = add i32 %153, 16		; <i32> [#uses=1]
 	%155 = and i32 %154, 63		; <i32> [#uses=1]
 	%156 = and i32 %153, -64		; <i32> [#uses=1]
 	%157 = or i32 %155, %156		; <i32> [#uses=1]
 	%158 = inttoptr i32 %157 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
-	%159 = load %struct.VERTEX** %90, align 4		; <%struct.VERTEX*> [#uses=1]
+	%159 = load %struct.VERTEX*, %struct.VERTEX** %90, align 4		; <%struct.VERTEX*> [#uses=1]
 	%160 = getelementptr %struct.edge_rec, %struct.edge_rec* %124, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
-	%161 = load %struct.VERTEX** %160, align 4		; <%struct.VERTEX*> [#uses=1]
+	%161 = load %struct.VERTEX*, %struct.VERTEX** %160, align 4		; <%struct.VERTEX*> [#uses=1]
 	%162 = getelementptr %struct.edge_rec, %struct.edge_rec* %16, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
-	%163 = load %struct.VERTEX** %162, align 4		; <%struct.VERTEX*> [#uses=1]
+	%163 = load %struct.VERTEX*, %struct.VERTEX** %162, align 4		; <%struct.VERTEX*> [#uses=1]
 	%164 = icmp eq %struct.VERTEX* %163, %159		; <i1> [#uses=1]
 	%rdo_addr.0.i = select i1 %164, %struct.edge_rec* %88, %struct.edge_rec* %16		; <%struct.edge_rec*> [#uses=3]
 	%165 = getelementptr %struct.edge_rec, %struct.edge_rec* %10, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
-	%166 = load %struct.VERTEX** %165, align 4		; <%struct.VERTEX*> [#uses=1]
+	%166 = load %struct.VERTEX*, %struct.VERTEX** %165, align 4		; <%struct.VERTEX*> [#uses=1]
 	%167 = icmp eq %struct.VERTEX* %166, %161		; <i1> [#uses=1]
 	%ldo_addr.0.ph.i = select i1 %167, %struct.edge_rec* %124, %struct.edge_rec* %10		; <%struct.edge_rec*> [#uses=3]
 	br label %bb9.i
@@ -244,31 +244,31 @@ bb9.i:		; preds = %bb25.i, %bb24.i, %bb5.i
 	%rcand.2.i = phi %struct.edge_rec* [ %158, %bb5.i ], [ %666, %bb24.i ], [ %rcand.1.i, %bb25.i ]		; <%struct.edge_rec*> [#uses=5]
 	%basel.0.i = phi %struct.edge_rec* [ %88, %bb5.i ], [ %595, %bb24.i ], [ %716, %bb25.i ]		; <%struct.edge_rec*> [#uses=2]
 	%168 = getelementptr %struct.edge_rec, %struct.edge_rec* %lcand.2.i, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
-	%169 = load %struct.edge_rec** %168, align 4		; <%struct.edge_rec*> [#uses=3]
+	%169 = load %struct.edge_rec*, %struct.edge_rec** %168, align 4		; <%struct.edge_rec*> [#uses=3]
 	%170 = getelementptr %struct.edge_rec, %struct.edge_rec* %basel.0.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=3]
-	%171 = load %struct.VERTEX** %170, align 4		; <%struct.VERTEX*> [#uses=4]
+	%171 = load %struct.VERTEX*, %struct.VERTEX** %170, align 4		; <%struct.VERTEX*> [#uses=4]
 	%172 = ptrtoint %struct.edge_rec* %basel.0.i to i32		; <i32> [#uses=3]
 	%173 = xor i32 %172, 32		; <i32> [#uses=1]
 	%174 = inttoptr i32 %173 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
 	%175 = getelementptr %struct.edge_rec, %struct.edge_rec* %174, i32 0, i32 0		; <%struct.VERTEX**> [#uses=3]
-	%176 = load %struct.VERTEX** %175, align 4		; <%struct.VERTEX*> [#uses=3]
+	%176 = load %struct.VERTEX*, %struct.VERTEX** %175, align 4		; <%struct.VERTEX*> [#uses=3]
 	%177 = ptrtoint %struct.edge_rec* %169 to i32		; <i32> [#uses=1]
 	%178 = xor i32 %177, 32		; <i32> [#uses=1]
 	%179 = inttoptr i32 %178 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%180 = getelementptr %struct.edge_rec, %struct.edge_rec* %179, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
-	%181 = load %struct.VERTEX** %180, align 4		; <%struct.VERTEX*> [#uses=2]
+	%181 = load %struct.VERTEX*, %struct.VERTEX** %180, align 4		; <%struct.VERTEX*> [#uses=2]
 	%182 = getelementptr %struct.VERTEX, %struct.VERTEX* %171, i32 0, i32 0, i32 0		; <double*> [#uses=2]
-	%183 = load double* %182, align 4		; <double> [#uses=2]
+	%183 = load double, double* %182, align 4		; <double> [#uses=2]
 	%184 = getelementptr %struct.VERTEX, %struct.VERTEX* %171, i32 0, i32 0, i32 1		; <double*> [#uses=2]
-	%185 = load double* %184, align 4		; <double> [#uses=2]
+	%185 = load double, double* %184, align 4		; <double> [#uses=2]
 	%186 = getelementptr %struct.VERTEX, %struct.VERTEX* %181, i32 0, i32 0, i32 0		; <double*> [#uses=1]
-	%187 = load double* %186, align 4		; <double> [#uses=1]
+	%187 = load double, double* %186, align 4		; <double> [#uses=1]
 	%188 = getelementptr %struct.VERTEX, %struct.VERTEX* %181, i32 0, i32 0, i32 1		; <double*> [#uses=1]
-	%189 = load double* %188, align 4		; <double> [#uses=1]
+	%189 = load double, double* %188, align 4		; <double> [#uses=1]
 	%190 = getelementptr %struct.VERTEX, %struct.VERTEX* %176, i32 0, i32 0, i32 0		; <double*> [#uses=1]
-	%191 = load double* %190, align 4		; <double> [#uses=2]
+	%191 = load double, double* %190, align 4		; <double> [#uses=2]
 	%192 = getelementptr %struct.VERTEX, %struct.VERTEX* %176, i32 0, i32 0, i32 1		; <double*> [#uses=1]
-	%193 = load double* %192, align 4		; <double> [#uses=2]
+	%193 = load double, double* %192, align 4		; <double> [#uses=2]
 	%194 = fsub double %183, %191		; <double> [#uses=1]
 	%195 = fsub double %189, %193		; <double> [#uses=1]
 	%196 = fmul double %194, %195		; <double> [#uses=1]
@@ -281,7 +281,7 @@ bb9.i:		; preds = %bb25.i, %bb24.i, %bb5.i
 
 bb10.i:		; preds = %bb9.i
 	%202 = getelementptr %struct.VERTEX, %struct.VERTEX* %171, i32 0, i32 0, i32 2		; <double*> [#uses=1]
-	%avail_edge.promoted25 = load %struct.edge_rec** @avail_edge		; <%struct.edge_rec*> [#uses=1]
+	%avail_edge.promoted25 = load %struct.edge_rec*, %struct.edge_rec** @avail_edge		; <%struct.edge_rec*> [#uses=1]
 	br label %bb12.i
 
 bb11.i:		; preds = %bb12.i
@@ -292,7 +292,7 @@ bb11.i:		; preds = %bb12.i
 	%207 = or i32 %205, %206		; <i32> [#uses=1]
 	%208 = inttoptr i32 %207 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%209 = getelementptr %struct.edge_rec, %struct.edge_rec* %208, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
-	%210 = load %struct.edge_rec** %209, align 4		; <%struct.edge_rec*> [#uses=1]
+	%210 = load %struct.edge_rec*, %struct.edge_rec** %209, align 4		; <%struct.edge_rec*> [#uses=1]
 	%211 = ptrtoint %struct.edge_rec* %210 to i32		; <i32> [#uses=2]
 	%212 = add i32 %211, 16		; <i32> [#uses=1]
 	%213 = and i32 %212, 63		; <i32> [#uses=1]
@@ -300,7 +300,7 @@ bb11.i:		; preds = %bb12.i
 	%215 = or i32 %213, %214		; <i32> [#uses=1]
 	%216 = inttoptr i32 %215 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%217 = getelementptr %struct.edge_rec, %struct.edge_rec* %lcand.0.i, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
-	%218 = load %struct.edge_rec** %217, align 4		; <%struct.edge_rec*> [#uses=1]
+	%218 = load %struct.edge_rec*, %struct.edge_rec** %217, align 4		; <%struct.edge_rec*> [#uses=1]
 	%219 = ptrtoint %struct.edge_rec* %218 to i32		; <i32> [#uses=2]
 	%220 = add i32 %219, 16		; <i32> [#uses=1]
 	%221 = and i32 %220, 63		; <i32> [#uses=1]
@@ -308,7 +308,7 @@ bb11.i:		; preds = %bb12.i
 	%223 = or i32 %221, %222		; <i32> [#uses=1]
 	%224 = inttoptr i32 %223 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%225 = getelementptr %struct.edge_rec, %struct.edge_rec* %216, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
-	%226 = load %struct.edge_rec** %225, align 4		; <%struct.edge_rec*> [#uses=1]
+	%226 = load %struct.edge_rec*, %struct.edge_rec** %225, align 4		; <%struct.edge_rec*> [#uses=1]
 	%227 = ptrtoint %struct.edge_rec* %226 to i32		; <i32> [#uses=2]
 	%228 = add i32 %227, 16		; <i32> [#uses=1]
 	%229 = and i32 %228, 63		; <i32> [#uses=1]
@@ -316,13 +316,13 @@ bb11.i:		; preds = %bb12.i
 	%231 = or i32 %229, %230		; <i32> [#uses=1]
 	%232 = inttoptr i32 %231 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%233 = getelementptr %struct.edge_rec, %struct.edge_rec* %232, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%234 = load %struct.edge_rec** %233, align 4		; <%struct.edge_rec*> [#uses=1]
+	%234 = load %struct.edge_rec*, %struct.edge_rec** %233, align 4		; <%struct.edge_rec*> [#uses=1]
 	%235 = getelementptr %struct.edge_rec, %struct.edge_rec* %224, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%236 = load %struct.edge_rec** %235, align 4		; <%struct.edge_rec*> [#uses=1]
+	%236 = load %struct.edge_rec*, %struct.edge_rec** %235, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %234, %struct.edge_rec** %235, align 4
 	store %struct.edge_rec* %236, %struct.edge_rec** %233, align 4
-	%237 = load %struct.edge_rec** %217, align 4		; <%struct.edge_rec*> [#uses=1]
-	%238 = load %struct.edge_rec** %225, align 4		; <%struct.edge_rec*> [#uses=1]
+	%237 = load %struct.edge_rec*, %struct.edge_rec** %217, align 4		; <%struct.edge_rec*> [#uses=1]
+	%238 = load %struct.edge_rec*, %struct.edge_rec** %225, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %237, %struct.edge_rec** %225, align 4
 	store %struct.edge_rec* %238, %struct.edge_rec** %217, align 4
 	%239 = xor i32 %203, 32		; <i32> [#uses=2]
@@ -331,7 +331,7 @@ bb11.i:		; preds = %bb12.i
 	%242 = or i32 %241, %206		; <i32> [#uses=1]
 	%243 = inttoptr i32 %242 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%244 = getelementptr %struct.edge_rec, %struct.edge_rec* %243, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
-	%245 = load %struct.edge_rec** %244, align 4		; <%struct.edge_rec*> [#uses=1]
+	%245 = load %struct.edge_rec*, %struct.edge_rec** %244, align 4		; <%struct.edge_rec*> [#uses=1]
 	%246 = ptrtoint %struct.edge_rec* %245 to i32		; <i32> [#uses=2]
 	%247 = add i32 %246, 16		; <i32> [#uses=1]
 	%248 = and i32 %247, 63		; <i32> [#uses=1]
@@ -340,7 +340,7 @@ bb11.i:		; preds = %bb12.i
 	%251 = inttoptr i32 %250 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%252 = inttoptr i32 %239 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%253 = getelementptr %struct.edge_rec, %struct.edge_rec* %252, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
-	%254 = load %struct.edge_rec** %253, align 4		; <%struct.edge_rec*> [#uses=1]
+	%254 = load %struct.edge_rec*, %struct.edge_rec** %253, align 4		; <%struct.edge_rec*> [#uses=1]
 	%255 = ptrtoint %struct.edge_rec* %254 to i32		; <i32> [#uses=2]
 	%256 = add i32 %255, 16		; <i32> [#uses=1]
 	%257 = and i32 %256, 63		; <i32> [#uses=1]
@@ -348,7 +348,7 @@ bb11.i:		; preds = %bb12.i
 	%259 = or i32 %257, %258		; <i32> [#uses=1]
 	%260 = inttoptr i32 %259 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%261 = getelementptr %struct.edge_rec, %struct.edge_rec* %251, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
-	%262 = load %struct.edge_rec** %261, align 4		; <%struct.edge_rec*> [#uses=1]
+	%262 = load %struct.edge_rec*, %struct.edge_rec** %261, align 4		; <%struct.edge_rec*> [#uses=1]
 	%263 = ptrtoint %struct.edge_rec* %262 to i32		; <i32> [#uses=2]
 	%264 = add i32 %263, 16		; <i32> [#uses=1]
 	%265 = and i32 %264, 63		; <i32> [#uses=1]
@@ -356,22 +356,22 @@ bb11.i:		; preds = %bb12.i
 	%267 = or i32 %265, %266		; <i32> [#uses=1]
 	%268 = inttoptr i32 %267 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%269 = getelementptr %struct.edge_rec, %struct.edge_rec* %268, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%270 = load %struct.edge_rec** %269, align 4		; <%struct.edge_rec*> [#uses=1]
+	%270 = load %struct.edge_rec*, %struct.edge_rec** %269, align 4		; <%struct.edge_rec*> [#uses=1]
 	%271 = getelementptr %struct.edge_rec, %struct.edge_rec* %260, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%272 = load %struct.edge_rec** %271, align 4		; <%struct.edge_rec*> [#uses=1]
+	%272 = load %struct.edge_rec*, %struct.edge_rec** %271, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %270, %struct.edge_rec** %271, align 4
 	store %struct.edge_rec* %272, %struct.edge_rec** %269, align 4
-	%273 = load %struct.edge_rec** %253, align 4		; <%struct.edge_rec*> [#uses=1]
-	%274 = load %struct.edge_rec** %261, align 4		; <%struct.edge_rec*> [#uses=1]
+	%273 = load %struct.edge_rec*, %struct.edge_rec** %253, align 4		; <%struct.edge_rec*> [#uses=1]
+	%274 = load %struct.edge_rec*, %struct.edge_rec** %261, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %273, %struct.edge_rec** %261, align 4
 	store %struct.edge_rec* %274, %struct.edge_rec** %253, align 4
 	%275 = inttoptr i32 %206 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
 	%276 = getelementptr %struct.edge_rec, %struct.edge_rec* %275, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
 	store %struct.edge_rec* %avail_edge.tmp.026, %struct.edge_rec** %276, align 4
 	%277 = getelementptr %struct.edge_rec, %struct.edge_rec* %t.0.i, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
-	%278 = load %struct.edge_rec** %277, align 4		; <%struct.edge_rec*> [#uses=2]
-	%.pre.i = load double* %182, align 4		; <double> [#uses=1]
-	%.pre22.i = load double* %184, align 4		; <double> [#uses=1]
+	%278 = load %struct.edge_rec*, %struct.edge_rec** %277, align 4		; <%struct.edge_rec*> [#uses=2]
+	%.pre.i = load double, double* %182, align 4		; <double> [#uses=1]
+	%.pre22.i = load double, double* %184, align 4		; <double> [#uses=1]
 	br label %bb12.i
 
 bb12.i:		; preds = %bb11.i, %bb10.i
@@ -392,34 +392,34 @@ bb12.i:		; preds = %bb11.i, %bb10.i
 	%v1.0.in.i = getelementptr %struct.edge_rec, %struct.edge_rec* %.pn5.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
 	%v2.0.in.i = getelementptr %struct.edge_rec, %struct.edge_rec* %.pn4.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
 	%v3.0.in.i = getelementptr %struct.edge_rec, %struct.edge_rec* %lcand.2.pn.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
-	%v1.0.i = load %struct.VERTEX** %v1.0.in.i		; <%struct.VERTEX*> [#uses=3]
-	%v2.0.i = load %struct.VERTEX** %v2.0.in.i		; <%struct.VERTEX*> [#uses=3]
-	%v3.0.i = load %struct.VERTEX** %v3.0.in.i		; <%struct.VERTEX*> [#uses=3]
-	%281 = load double* %202, align 4		; <double> [#uses=3]
+	%v1.0.i = load %struct.VERTEX*, %struct.VERTEX** %v1.0.in.i		; <%struct.VERTEX*> [#uses=3]
+	%v2.0.i = load %struct.VERTEX*, %struct.VERTEX** %v2.0.in.i		; <%struct.VERTEX*> [#uses=3]
+	%v3.0.i = load %struct.VERTEX*, %struct.VERTEX** %v3.0.in.i		; <%struct.VERTEX*> [#uses=3]
+	%281 = load double, double* %202, align 4		; <double> [#uses=3]
 	%282 = getelementptr %struct.VERTEX, %struct.VERTEX* %v1.0.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
-	%283 = load double* %282, align 4		; <double> [#uses=1]
+	%283 = load double, double* %282, align 4		; <double> [#uses=1]
 	%284 = fsub double %283, %280		; <double> [#uses=2]
 	%285 = getelementptr %struct.VERTEX, %struct.VERTEX* %v1.0.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
-	%286 = load double* %285, align 4		; <double> [#uses=1]
+	%286 = load double, double* %285, align 4		; <double> [#uses=1]
 	%287 = fsub double %286, %279		; <double> [#uses=2]
 	%288 = getelementptr %struct.VERTEX, %struct.VERTEX* %v1.0.i, i32 0, i32 0, i32 2		; <double*> [#uses=1]
-	%289 = load double* %288, align 4		; <double> [#uses=1]
+	%289 = load double, double* %288, align 4		; <double> [#uses=1]
 	%290 = getelementptr %struct.VERTEX, %struct.VERTEX* %v2.0.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
-	%291 = load double* %290, align 4		; <double> [#uses=1]
+	%291 = load double, double* %290, align 4		; <double> [#uses=1]
 	%292 = fsub double %291, %280		; <double> [#uses=2]
 	%293 = getelementptr %struct.VERTEX, %struct.VERTEX* %v2.0.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
-	%294 = load double* %293, align 4		; <double> [#uses=1]
+	%294 = load double, double* %293, align 4		; <double> [#uses=1]
 	%295 = fsub double %294, %279		; <double> [#uses=2]
 	%296 = getelementptr %struct.VERTEX, %struct.VERTEX* %v2.0.i, i32 0, i32 0, i32 2		; <double*> [#uses=1]
-	%297 = load double* %296, align 4		; <double> [#uses=1]
+	%297 = load double, double* %296, align 4		; <double> [#uses=1]
 	%298 = getelementptr %struct.VERTEX, %struct.VERTEX* %v3.0.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
-	%299 = load double* %298, align 4		; <double> [#uses=1]
+	%299 = load double, double* %298, align 4		; <double> [#uses=1]
 	%300 = fsub double %299, %280		; <double> [#uses=2]
 	%301 = getelementptr %struct.VERTEX, %struct.VERTEX* %v3.0.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
-	%302 = load double* %301, align 4		; <double> [#uses=1]
+	%302 = load double, double* %301, align 4		; <double> [#uses=1]
 	%303 = fsub double %302, %279		; <double> [#uses=2]
 	%304 = getelementptr %struct.VERTEX, %struct.VERTEX* %v3.0.i, i32 0, i32 0, i32 2		; <double*> [#uses=1]
-	%305 = load double* %304, align 4		; <double> [#uses=1]
+	%305 = load double, double* %304, align 4		; <double> [#uses=1]
 	%306 = fsub double %289, %281		; <double> [#uses=1]
 	%307 = fmul double %292, %303		; <double> [#uses=1]
 	%308 = fmul double %295, %300		; <double> [#uses=1]
@@ -442,8 +442,8 @@ bb12.i:		; preds = %bb11.i, %bb10.i
 
 bb13.loopexit.i:		; preds = %bb12.i
 	store %struct.edge_rec* %avail_edge.tmp.026, %struct.edge_rec** @avail_edge
-	%.pre23.i = load %struct.VERTEX** %170, align 4		; <%struct.VERTEX*> [#uses=1]
-	%.pre24.i = load %struct.VERTEX** %175, align 4		; <%struct.VERTEX*> [#uses=1]
+	%.pre23.i = load %struct.VERTEX*, %struct.VERTEX** %170, align 4		; <%struct.VERTEX*> [#uses=1]
+	%.pre24.i = load %struct.VERTEX*, %struct.VERTEX** %175, align 4		; <%struct.VERTEX*> [#uses=1]
 	br label %bb13.i
 
 bb13.i:		; preds = %bb13.loopexit.i, %bb9.i
@@ -457,7 +457,7 @@ bb13.i:		; preds = %bb13.loopexit.i, %bb9.i
 	%330 = or i32 %328, %329		; <i32> [#uses=1]
 	%331 = inttoptr i32 %330 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%332 = getelementptr %struct.edge_rec, %struct.edge_rec* %331, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
-	%333 = load %struct.edge_rec** %332, align 4		; <%struct.edge_rec*> [#uses=1]
+	%333 = load %struct.edge_rec*, %struct.edge_rec** %332, align 4		; <%struct.edge_rec*> [#uses=1]
 	%334 = ptrtoint %struct.edge_rec* %333 to i32		; <i32> [#uses=2]
 	%335 = add i32 %334, 16		; <i32> [#uses=1]
 	%336 = and i32 %335, 63		; <i32> [#uses=1]
@@ -466,19 +466,19 @@ bb13.i:		; preds = %bb13.loopexit.i, %bb9.i
 	%339 = xor i32 %338, 32		; <i32> [#uses=1]
 	%340 = inttoptr i32 %339 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%341 = getelementptr %struct.edge_rec, %struct.edge_rec* %340, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
-	%342 = load %struct.VERTEX** %341, align 4		; <%struct.VERTEX*> [#uses=2]
+	%342 = load %struct.VERTEX*, %struct.VERTEX** %341, align 4		; <%struct.VERTEX*> [#uses=2]
 	%343 = getelementptr %struct.VERTEX, %struct.VERTEX* %325, i32 0, i32 0, i32 0		; <double*> [#uses=1]
-	%344 = load double* %343, align 4		; <double> [#uses=1]
+	%344 = load double, double* %343, align 4		; <double> [#uses=1]
 	%345 = getelementptr %struct.VERTEX, %struct.VERTEX* %325, i32 0, i32 0, i32 1		; <double*> [#uses=1]
-	%346 = load double* %345, align 4		; <double> [#uses=1]
+	%346 = load double, double* %345, align 4		; <double> [#uses=1]
 	%347 = getelementptr %struct.VERTEX, %struct.VERTEX* %342, i32 0, i32 0, i32 0		; <double*> [#uses=1]
-	%348 = load double* %347, align 4		; <double> [#uses=1]
+	%348 = load double, double* %347, align 4		; <double> [#uses=1]
 	%349 = getelementptr %struct.VERTEX, %struct.VERTEX* %342, i32 0, i32 0, i32 1		; <double*> [#uses=1]
-	%350 = load double* %349, align 4		; <double> [#uses=1]
+	%350 = load double, double* %349, align 4		; <double> [#uses=1]
 	%351 = getelementptr %struct.VERTEX, %struct.VERTEX* %324, i32 0, i32 0, i32 0		; <double*> [#uses=2]
-	%352 = load double* %351, align 4		; <double> [#uses=3]
+	%352 = load double, double* %351, align 4		; <double> [#uses=3]
 	%353 = getelementptr %struct.VERTEX, %struct.VERTEX* %324, i32 0, i32 0, i32 1		; <double*> [#uses=2]
-	%354 = load double* %353, align 4		; <double> [#uses=3]
+	%354 = load double, double* %353, align 4		; <double> [#uses=3]
 	%355 = fsub double %344, %352		; <double> [#uses=1]
 	%356 = fsub double %350, %354		; <double> [#uses=1]
 	%357 = fmul double %355, %356		; <double> [#uses=1]
@@ -491,7 +491,7 @@ bb13.i:		; preds = %bb13.loopexit.i, %bb9.i
 
 bb14.i:		; preds = %bb13.i
 	%363 = getelementptr %struct.VERTEX, %struct.VERTEX* %324, i32 0, i32 0, i32 2		; <double*> [#uses=1]
-	%avail_edge.promoted = load %struct.edge_rec** @avail_edge		; <%struct.edge_rec*> [#uses=1]
+	%avail_edge.promoted = load %struct.edge_rec*, %struct.edge_rec** @avail_edge		; <%struct.edge_rec*> [#uses=1]
 	br label %bb16.i
 
 bb15.i:		; preds = %bb16.i
@@ -502,7 +502,7 @@ bb15.i:		; preds = %bb16.i
 	%368 = or i32 %366, %367		; <i32> [#uses=1]
 	%369 = inttoptr i32 %368 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%370 = getelementptr %struct.edge_rec, %struct.edge_rec* %369, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
-	%371 = load %struct.edge_rec** %370, align 4		; <%struct.edge_rec*> [#uses=1]
+	%371 = load %struct.edge_rec*, %struct.edge_rec** %370, align 4		; <%struct.edge_rec*> [#uses=1]
 	%372 = ptrtoint %struct.edge_rec* %371 to i32		; <i32> [#uses=2]
 	%373 = add i32 %372, 16		; <i32> [#uses=1]
 	%374 = and i32 %373, 63		; <i32> [#uses=1]
@@ -510,7 +510,7 @@ bb15.i:		; preds = %bb16.i
 	%376 = or i32 %374, %375		; <i32> [#uses=1]
 	%377 = inttoptr i32 %376 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%378 = getelementptr %struct.edge_rec, %struct.edge_rec* %rcand.0.i, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
-	%379 = load %struct.edge_rec** %378, align 4		; <%struct.edge_rec*> [#uses=1]
+	%379 = load %struct.edge_rec*, %struct.edge_rec** %378, align 4		; <%struct.edge_rec*> [#uses=1]
 	%380 = ptrtoint %struct.edge_rec* %379 to i32		; <i32> [#uses=2]
 	%381 = add i32 %380, 16		; <i32> [#uses=1]
 	%382 = and i32 %381, 63		; <i32> [#uses=1]
@@ -518,7 +518,7 @@ bb15.i:		; preds = %bb16.i
 	%384 = or i32 %382, %383		; <i32> [#uses=1]
 	%385 = inttoptr i32 %384 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%386 = getelementptr %struct.edge_rec, %struct.edge_rec* %377, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
-	%387 = load %struct.edge_rec** %386, align 4		; <%struct.edge_rec*> [#uses=1]
+	%387 = load %struct.edge_rec*, %struct.edge_rec** %386, align 4		; <%struct.edge_rec*> [#uses=1]
 	%388 = ptrtoint %struct.edge_rec* %387 to i32		; <i32> [#uses=2]
 	%389 = add i32 %388, 16		; <i32> [#uses=1]
 	%390 = and i32 %389, 63		; <i32> [#uses=1]
@@ -526,13 +526,13 @@ bb15.i:		; preds = %bb16.i
 	%392 = or i32 %390, %391		; <i32> [#uses=1]
 	%393 = inttoptr i32 %392 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%394 = getelementptr %struct.edge_rec, %struct.edge_rec* %393, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%395 = load %struct.edge_rec** %394, align 4		; <%struct.edge_rec*> [#uses=1]
+	%395 = load %struct.edge_rec*, %struct.edge_rec** %394, align 4		; <%struct.edge_rec*> [#uses=1]
 	%396 = getelementptr %struct.edge_rec, %struct.edge_rec* %385, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%397 = load %struct.edge_rec** %396, align 4		; <%struct.edge_rec*> [#uses=1]
+	%397 = load %struct.edge_rec*, %struct.edge_rec** %396, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %395, %struct.edge_rec** %396, align 4
 	store %struct.edge_rec* %397, %struct.edge_rec** %394, align 4
-	%398 = load %struct.edge_rec** %378, align 4		; <%struct.edge_rec*> [#uses=1]
-	%399 = load %struct.edge_rec** %386, align 4		; <%struct.edge_rec*> [#uses=1]
+	%398 = load %struct.edge_rec*, %struct.edge_rec** %378, align 4		; <%struct.edge_rec*> [#uses=1]
+	%399 = load %struct.edge_rec*, %struct.edge_rec** %386, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %398, %struct.edge_rec** %386, align 4
 	store %struct.edge_rec* %399, %struct.edge_rec** %378, align 4
 	%400 = xor i32 %364, 32		; <i32> [#uses=2]
@@ -541,7 +541,7 @@ bb15.i:		; preds = %bb16.i
 	%403 = or i32 %402, %367		; <i32> [#uses=1]
 	%404 = inttoptr i32 %403 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%405 = getelementptr %struct.edge_rec, %struct.edge_rec* %404, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
-	%406 = load %struct.edge_rec** %405, align 4		; <%struct.edge_rec*> [#uses=1]
+	%406 = load %struct.edge_rec*, %struct.edge_rec** %405, align 4		; <%struct.edge_rec*> [#uses=1]
 	%407 = ptrtoint %struct.edge_rec* %406 to i32		; <i32> [#uses=2]
 	%408 = add i32 %407, 16		; <i32> [#uses=1]
 	%409 = and i32 %408, 63		; <i32> [#uses=1]
@@ -550,7 +550,7 @@ bb15.i:		; preds = %bb16.i
 	%412 = inttoptr i32 %411 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%413 = inttoptr i32 %400 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%414 = getelementptr %struct.edge_rec, %struct.edge_rec* %413, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
-	%415 = load %struct.edge_rec** %414, align 4		; <%struct.edge_rec*> [#uses=1]
+	%415 = load %struct.edge_rec*, %struct.edge_rec** %414, align 4		; <%struct.edge_rec*> [#uses=1]
 	%416 = ptrtoint %struct.edge_rec* %415 to i32		; <i32> [#uses=2]
 	%417 = add i32 %416, 16		; <i32> [#uses=1]
 	%418 = and i32 %417, 63		; <i32> [#uses=1]
@@ -558,7 +558,7 @@ bb15.i:		; preds = %bb16.i
 	%420 = or i32 %418, %419		; <i32> [#uses=1]
 	%421 = inttoptr i32 %420 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%422 = getelementptr %struct.edge_rec, %struct.edge_rec* %412, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
-	%423 = load %struct.edge_rec** %422, align 4		; <%struct.edge_rec*> [#uses=1]
+	%423 = load %struct.edge_rec*, %struct.edge_rec** %422, align 4		; <%struct.edge_rec*> [#uses=1]
 	%424 = ptrtoint %struct.edge_rec* %423 to i32		; <i32> [#uses=2]
 	%425 = add i32 %424, 16		; <i32> [#uses=1]
 	%426 = and i32 %425, 63		; <i32> [#uses=1]
@@ -566,13 +566,13 @@ bb15.i:		; preds = %bb16.i
 	%428 = or i32 %426, %427		; <i32> [#uses=1]
 	%429 = inttoptr i32 %428 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%430 = getelementptr %struct.edge_rec, %struct.edge_rec* %429, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%431 = load %struct.edge_rec** %430, align 4		; <%struct.edge_rec*> [#uses=1]
+	%431 = load %struct.edge_rec*, %struct.edge_rec** %430, align 4		; <%struct.edge_rec*> [#uses=1]
 	%432 = getelementptr %struct.edge_rec, %struct.edge_rec* %421, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%433 = load %struct.edge_rec** %432, align 4		; <%struct.edge_rec*> [#uses=1]
+	%433 = load %struct.edge_rec*, %struct.edge_rec** %432, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %431, %struct.edge_rec** %432, align 4
 	store %struct.edge_rec* %433, %struct.edge_rec** %430, align 4
-	%434 = load %struct.edge_rec** %414, align 4		; <%struct.edge_rec*> [#uses=1]
-	%435 = load %struct.edge_rec** %422, align 4		; <%struct.edge_rec*> [#uses=1]
+	%434 = load %struct.edge_rec*, %struct.edge_rec** %414, align 4		; <%struct.edge_rec*> [#uses=1]
+	%435 = load %struct.edge_rec*, %struct.edge_rec** %422, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %434, %struct.edge_rec** %422, align 4
 	store %struct.edge_rec* %435, %struct.edge_rec** %414, align 4
 	%436 = inttoptr i32 %367 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
@@ -584,14 +584,14 @@ bb15.i:		; preds = %bb16.i
 	%441 = or i32 %439, %440		; <i32> [#uses=1]
 	%442 = inttoptr i32 %441 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%443 = getelementptr %struct.edge_rec, %struct.edge_rec* %442, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
-	%444 = load %struct.edge_rec** %443, align 4		; <%struct.edge_rec*> [#uses=1]
+	%444 = load %struct.edge_rec*, %struct.edge_rec** %443, align 4		; <%struct.edge_rec*> [#uses=1]
 	%445 = ptrtoint %struct.edge_rec* %444 to i32		; <i32> [#uses=2]
 	%446 = add i32 %445, 16		; <i32> [#uses=1]
 	%447 = and i32 %446, 63		; <i32> [#uses=1]
 	%448 = and i32 %445, -64		; <i32> [#uses=1]
 	%449 = or i32 %447, %448		; <i32> [#uses=2]
-	%.pre25.i = load double* %351, align 4		; <double> [#uses=1]
-	%.pre26.i = load double* %353, align 4		; <double> [#uses=1]
+	%.pre25.i = load double, double* %351, align 4		; <double> [#uses=1]
+	%.pre26.i = load double, double* %353, align 4		; <double> [#uses=1]
 	br label %bb16.i
 
 bb16.i:		; preds = %bb15.i, %bb14.i
@@ -612,34 +612,34 @@ bb16.i:		; preds = %bb15.i, %bb14.i
 	%v1.1.in.i = getelementptr %struct.edge_rec, %struct.edge_rec* %.pn3.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
 	%v2.1.in.i = getelementptr %struct.edge_rec, %struct.edge_rec* %.pn.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
 	%v3.1.in.i = getelementptr %struct.edge_rec, %struct.edge_rec* %rcand.2.pn.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
-	%v1.1.i = load %struct.VERTEX** %v1.1.in.i		; <%struct.VERTEX*> [#uses=3]
-	%v2.1.i = load %struct.VERTEX** %v2.1.in.i		; <%struct.VERTEX*> [#uses=3]
-	%v3.1.i = load %struct.VERTEX** %v3.1.in.i		; <%struct.VERTEX*> [#uses=3]
-	%452 = load double* %363, align 4		; <double> [#uses=3]
+	%v1.1.i = load %struct.VERTEX*, %struct.VERTEX** %v1.1.in.i		; <%struct.VERTEX*> [#uses=3]
+	%v2.1.i = load %struct.VERTEX*, %struct.VERTEX** %v2.1.in.i		; <%struct.VERTEX*> [#uses=3]
+	%v3.1.i = load %struct.VERTEX*, %struct.VERTEX** %v3.1.in.i		; <%struct.VERTEX*> [#uses=3]
+	%452 = load double, double* %363, align 4		; <double> [#uses=3]
 	%453 = getelementptr %struct.VERTEX, %struct.VERTEX* %v1.1.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
-	%454 = load double* %453, align 4		; <double> [#uses=1]
+	%454 = load double, double* %453, align 4		; <double> [#uses=1]
 	%455 = fsub double %454, %451		; <double> [#uses=2]
 	%456 = getelementptr %struct.VERTEX, %struct.VERTEX* %v1.1.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
-	%457 = load double* %456, align 4		; <double> [#uses=1]
+	%457 = load double, double* %456, align 4		; <double> [#uses=1]
 	%458 = fsub double %457, %450		; <double> [#uses=2]
 	%459 = getelementptr %struct.VERTEX, %struct.VERTEX* %v1.1.i, i32 0, i32 0, i32 2		; <double*> [#uses=1]
-	%460 = load double* %459, align 4		; <double> [#uses=1]
+	%460 = load double, double* %459, align 4		; <double> [#uses=1]
 	%461 = getelementptr %struct.VERTEX, %struct.VERTEX* %v2.1.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
-	%462 = load double* %461, align 4		; <double> [#uses=1]
+	%462 = load double, double* %461, align 4		; <double> [#uses=1]
 	%463 = fsub double %462, %451		; <double> [#uses=2]
 	%464 = getelementptr %struct.VERTEX, %struct.VERTEX* %v2.1.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
-	%465 = load double* %464, align 4		; <double> [#uses=1]
+	%465 = load double, double* %464, align 4		; <double> [#uses=1]
 	%466 = fsub double %465, %450		; <double> [#uses=2]
 	%467 = getelementptr %struct.VERTEX, %struct.VERTEX* %v2.1.i, i32 0, i32 0, i32 2		; <double*> [#uses=1]
-	%468 = load double* %467, align 4		; <double> [#uses=1]
+	%468 = load double, double* %467, align 4		; <double> [#uses=1]
 	%469 = getelementptr %struct.VERTEX, %struct.VERTEX* %v3.1.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
-	%470 = load double* %469, align 4		; <double> [#uses=1]
+	%470 = load double, double* %469, align 4		; <double> [#uses=1]
 	%471 = fsub double %470, %451		; <double> [#uses=2]
 	%472 = getelementptr %struct.VERTEX, %struct.VERTEX* %v3.1.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
-	%473 = load double* %472, align 4		; <double> [#uses=1]
+	%473 = load double, double* %472, align 4		; <double> [#uses=1]
 	%474 = fsub double %473, %450		; <double> [#uses=2]
 	%475 = getelementptr %struct.VERTEX, %struct.VERTEX* %v3.1.i, i32 0, i32 0, i32 2		; <double*> [#uses=1]
-	%476 = load double* %475, align 4		; <double> [#uses=1]
+	%476 = load double, double* %475, align 4		; <double> [#uses=1]
 	%477 = fsub double %460, %452		; <double> [#uses=1]
 	%478 = fmul double %463, %474		; <double> [#uses=1]
 	%479 = fmul double %466, %471		; <double> [#uses=1]
@@ -662,8 +662,8 @@ bb16.i:		; preds = %bb15.i, %bb14.i
 
 bb17.loopexit.i:		; preds = %bb16.i
 	store %struct.edge_rec* %avail_edge.tmp.0, %struct.edge_rec** @avail_edge
-	%.pre27.i = load %struct.VERTEX** %170, align 4		; <%struct.VERTEX*> [#uses=1]
-	%.pre28.i = load %struct.VERTEX** %175, align 4		; <%struct.VERTEX*> [#uses=1]
+	%.pre27.i = load %struct.VERTEX*, %struct.VERTEX** %170, align 4		; <%struct.VERTEX*> [#uses=1]
+	%.pre28.i = load %struct.VERTEX*, %struct.VERTEX** %175, align 4		; <%struct.VERTEX*> [#uses=1]
 	br label %bb17.i
 
 bb17.i:		; preds = %bb17.loopexit.i, %bb13.i
@@ -674,19 +674,19 @@ bb17.i:		; preds = %bb17.loopexit.i, %bb13.i
 	%498 = xor i32 %497, 32		; <i32> [#uses=1]
 	%499 = inttoptr i32 %498 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
 	%500 = getelementptr %struct.edge_rec, %struct.edge_rec* %499, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
-	%501 = load %struct.VERTEX** %500, align 4		; <%struct.VERTEX*> [#uses=4]
+	%501 = load %struct.VERTEX*, %struct.VERTEX** %500, align 4		; <%struct.VERTEX*> [#uses=4]
 	%502 = getelementptr %struct.VERTEX, %struct.VERTEX* %496, i32 0, i32 0, i32 0		; <double*> [#uses=1]
-	%503 = load double* %502, align 4		; <double> [#uses=1]
+	%503 = load double, double* %502, align 4		; <double> [#uses=1]
 	%504 = getelementptr %struct.VERTEX, %struct.VERTEX* %496, i32 0, i32 0, i32 1		; <double*> [#uses=1]
-	%505 = load double* %504, align 4		; <double> [#uses=1]
+	%505 = load double, double* %504, align 4		; <double> [#uses=1]
 	%506 = getelementptr %struct.VERTEX, %struct.VERTEX* %501, i32 0, i32 0, i32 0		; <double*> [#uses=1]
-	%507 = load double* %506, align 4		; <double> [#uses=2]
+	%507 = load double, double* %506, align 4		; <double> [#uses=2]
 	%508 = getelementptr %struct.VERTEX, %struct.VERTEX* %501, i32 0, i32 0, i32 1		; <double*> [#uses=1]
-	%509 = load double* %508, align 4		; <double> [#uses=2]
+	%509 = load double, double* %508, align 4		; <double> [#uses=2]
 	%510 = getelementptr %struct.VERTEX, %struct.VERTEX* %495, i32 0, i32 0, i32 0		; <double*> [#uses=1]
-	%511 = load double* %510, align 4		; <double> [#uses=3]
+	%511 = load double, double* %510, align 4		; <double> [#uses=3]
 	%512 = getelementptr %struct.VERTEX, %struct.VERTEX* %495, i32 0, i32 0, i32 1		; <double*> [#uses=1]
-	%513 = load double* %512, align 4		; <double> [#uses=3]
+	%513 = load double, double* %512, align 4		; <double> [#uses=3]
 	%514 = fsub double %503, %511		; <double> [#uses=2]
 	%515 = fsub double %509, %513		; <double> [#uses=1]
 	%516 = fmul double %514, %515		; <double> [#uses=1]
@@ -699,11 +699,11 @@ bb17.i:		; preds = %bb17.loopexit.i, %bb13.i
 	%523 = xor i32 %522, 32		; <i32> [#uses=1]
 	%524 = inttoptr i32 %523 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%525 = getelementptr %struct.edge_rec, %struct.edge_rec* %524, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
-	%526 = load %struct.VERTEX** %525, align 4		; <%struct.VERTEX*> [#uses=4]
+	%526 = load %struct.VERTEX*, %struct.VERTEX** %525, align 4		; <%struct.VERTEX*> [#uses=4]
 	%527 = getelementptr %struct.VERTEX, %struct.VERTEX* %526, i32 0, i32 0, i32 0		; <double*> [#uses=1]
-	%528 = load double* %527, align 4		; <double> [#uses=4]
+	%528 = load double, double* %527, align 4		; <double> [#uses=4]
 	%529 = getelementptr %struct.VERTEX, %struct.VERTEX* %526, i32 0, i32 0, i32 1		; <double*> [#uses=1]
-	%530 = load double* %529, align 4		; <double> [#uses=4]
+	%530 = load double, double* %529, align 4		; <double> [#uses=4]
 	%531 = fsub double %530, %513		; <double> [#uses=1]
 	%532 = fmul double %514, %531		; <double> [#uses=1]
 	%533 = fsub double %528, %511		; <double> [#uses=1]
@@ -715,9 +715,9 @@ bb17.i:		; preds = %bb17.loopexit.i, %bb13.i
 
 bb21.i:		; preds = %bb17.i
 	%538 = getelementptr %struct.edge_rec, %struct.edge_rec* %lcand.1.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
-	%539 = load %struct.VERTEX** %538, align 4		; <%struct.VERTEX*> [#uses=3]
+	%539 = load %struct.VERTEX*, %struct.VERTEX** %538, align 4		; <%struct.VERTEX*> [#uses=3]
 	%540 = getelementptr %struct.edge_rec, %struct.edge_rec* %rcand.1.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
-	%541 = load %struct.VERTEX** %540, align 4		; <%struct.VERTEX*> [#uses=3]
+	%541 = load %struct.VERTEX*, %struct.VERTEX** %540, align 4		; <%struct.VERTEX*> [#uses=3]
 	br i1 %521, label %bb22.i, label %bb24.i
 
 bb22.i:		; preds = %bb21.i
@@ -725,27 +725,27 @@ bb22.i:		; preds = %bb21.i
 
 bb23.i:		; preds = %bb22.i
 	%542 = getelementptr %struct.VERTEX, %struct.VERTEX* %526, i32 0, i32 0, i32 2		; <double*> [#uses=1]
-	%543 = load double* %542, align 4		; <double> [#uses=3]
+	%543 = load double, double* %542, align 4		; <double> [#uses=3]
 	%544 = fsub double %507, %528		; <double> [#uses=2]
 	%545 = fsub double %509, %530		; <double> [#uses=2]
 	%546 = getelementptr %struct.VERTEX, %struct.VERTEX* %501, i32 0, i32 0, i32 2		; <double*> [#uses=1]
-	%547 = load double* %546, align 4		; <double> [#uses=1]
+	%547 = load double, double* %546, align 4		; <double> [#uses=1]
 	%548 = getelementptr %struct.VERTEX, %struct.VERTEX* %539, i32 0, i32 0, i32 0		; <double*> [#uses=1]
-	%549 = load double* %548, align 4		; <double> [#uses=1]
+	%549 = load double, double* %548, align 4		; <double> [#uses=1]
 	%550 = fsub double %549, %528		; <double> [#uses=2]
 	%551 = getelementptr %struct.VERTEX, %struct.VERTEX* %539, i32 0, i32 0, i32 1		; <double*> [#uses=1]
-	%552 = load double* %551, align 4		; <double> [#uses=1]
+	%552 = load double, double* %551, align 4		; <double> [#uses=1]
 	%553 = fsub double %552, %530		; <double> [#uses=2]
 	%554 = getelementptr %struct.VERTEX, %struct.VERTEX* %539, i32 0, i32 0, i32 2		; <double*> [#uses=1]
-	%555 = load double* %554, align 4		; <double> [#uses=1]
+	%555 = load double, double* %554, align 4		; <double> [#uses=1]
 	%556 = getelementptr %struct.VERTEX, %struct.VERTEX* %541, i32 0, i32 0, i32 0		; <double*> [#uses=1]
-	%557 = load double* %556, align 4		; <double> [#uses=1]
+	%557 = load double, double* %556, align 4		; <double> [#uses=1]
 	%558 = fsub double %557, %528		; <double> [#uses=2]
 	%559 = getelementptr %struct.VERTEX, %struct.VERTEX* %541, i32 0, i32 0, i32 1		; <double*> [#uses=1]
-	%560 = load double* %559, align 4		; <double> [#uses=1]
+	%560 = load double, double* %559, align 4		; <double> [#uses=1]
 	%561 = fsub double %560, %530		; <double> [#uses=2]
 	%562 = getelementptr %struct.VERTEX, %struct.VERTEX* %541, i32 0, i32 0, i32 2		; <double*> [#uses=1]
-	%563 = load double* %562, align 4		; <double> [#uses=1]
+	%563 = load double, double* %562, align 4		; <double> [#uses=1]
 	%564 = fsub double %547, %543		; <double> [#uses=1]
 	%565 = fmul double %550, %561		; <double> [#uses=1]
 	%566 = fmul double %553, %558		; <double> [#uses=1]
@@ -773,7 +773,7 @@ bb24.i:		; preds = %bb23.i, %bb21.i
 	%585 = or i32 %583, %584		; <i32> [#uses=1]
 	%586 = inttoptr i32 %585 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%587 = getelementptr %struct.edge_rec, %struct.edge_rec* %586, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
-	%588 = load %struct.edge_rec** %587, align 4		; <%struct.edge_rec*> [#uses=1]
+	%588 = load %struct.edge_rec*, %struct.edge_rec** %587, align 4		; <%struct.edge_rec*> [#uses=1]
 	%589 = ptrtoint %struct.edge_rec* %588 to i32		; <i32> [#uses=2]
 	%590 = add i32 %589, 16		; <i32> [#uses=1]
 	%591 = and i32 %590, 63		; <i32> [#uses=1]
@@ -800,7 +800,7 @@ bb24.i:		; preds = %bb23.i, %bb21.i
 	store %struct.VERTEX* %495, %struct.VERTEX** %607, align 4
 	%608 = getelementptr %struct.edge_rec, %struct.edge_rec* %602, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
 	store %struct.edge_rec* %600, %struct.edge_rec** %608, align 4
-	%609 = load %struct.edge_rec** %596, align 4		; <%struct.edge_rec*> [#uses=1]
+	%609 = load %struct.edge_rec*, %struct.edge_rec** %596, align 4		; <%struct.edge_rec*> [#uses=1]
 	%610 = ptrtoint %struct.edge_rec* %609 to i32		; <i32> [#uses=2]
 	%611 = add i32 %610, 16		; <i32> [#uses=1]
 	%612 = and i32 %611, 63		; <i32> [#uses=1]
@@ -808,7 +808,7 @@ bb24.i:		; preds = %bb23.i, %bb21.i
 	%614 = or i32 %612, %613		; <i32> [#uses=1]
 	%615 = inttoptr i32 %614 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%616 = getelementptr %struct.edge_rec, %struct.edge_rec* %594, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
-	%617 = load %struct.edge_rec** %616, align 4		; <%struct.edge_rec*> [#uses=1]
+	%617 = load %struct.edge_rec*, %struct.edge_rec** %616, align 4		; <%struct.edge_rec*> [#uses=1]
 	%618 = ptrtoint %struct.edge_rec* %617 to i32		; <i32> [#uses=2]
 	%619 = add i32 %618, 16		; <i32> [#uses=1]
 	%620 = and i32 %619, 63		; <i32> [#uses=1]
@@ -816,19 +816,19 @@ bb24.i:		; preds = %bb23.i, %bb21.i
 	%622 = or i32 %620, %621		; <i32> [#uses=1]
 	%623 = inttoptr i32 %622 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%624 = getelementptr %struct.edge_rec, %struct.edge_rec* %623, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%625 = load %struct.edge_rec** %624, align 4		; <%struct.edge_rec*> [#uses=1]
+	%625 = load %struct.edge_rec*, %struct.edge_rec** %624, align 4		; <%struct.edge_rec*> [#uses=1]
 	%626 = getelementptr %struct.edge_rec, %struct.edge_rec* %615, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%627 = load %struct.edge_rec** %626, align 4		; <%struct.edge_rec*> [#uses=1]
+	%627 = load %struct.edge_rec*, %struct.edge_rec** %626, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %625, %struct.edge_rec** %626, align 4
 	store %struct.edge_rec* %627, %struct.edge_rec** %624, align 4
-	%628 = load %struct.edge_rec** %596, align 4		; <%struct.edge_rec*> [#uses=1]
-	%629 = load %struct.edge_rec** %616, align 4		; <%struct.edge_rec*> [#uses=1]
+	%628 = load %struct.edge_rec*, %struct.edge_rec** %596, align 4		; <%struct.edge_rec*> [#uses=1]
+	%629 = load %struct.edge_rec*, %struct.edge_rec** %616, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %628, %struct.edge_rec** %616, align 4
 	store %struct.edge_rec* %629, %struct.edge_rec** %596, align 4
 	%630 = xor i32 %598, 32		; <i32> [#uses=2]
 	%631 = inttoptr i32 %630 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%632 = getelementptr %struct.edge_rec, %struct.edge_rec* %631, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
-	%633 = load %struct.edge_rec** %632, align 4		; <%struct.edge_rec*> [#uses=1]
+	%633 = load %struct.edge_rec*, %struct.edge_rec** %632, align 4		; <%struct.edge_rec*> [#uses=1]
 	%634 = ptrtoint %struct.edge_rec* %633 to i32		; <i32> [#uses=2]
 	%635 = add i32 %634, 16		; <i32> [#uses=1]
 	%636 = and i32 %635, 63		; <i32> [#uses=1]
@@ -836,7 +836,7 @@ bb24.i:		; preds = %bb23.i, %bb21.i
 	%638 = or i32 %636, %637		; <i32> [#uses=1]
 	%639 = inttoptr i32 %638 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%640 = getelementptr %struct.edge_rec, %struct.edge_rec* %174, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
-	%641 = load %struct.edge_rec** %640, align 4		; <%struct.edge_rec*> [#uses=1]
+	%641 = load %struct.edge_rec*, %struct.edge_rec** %640, align 4		; <%struct.edge_rec*> [#uses=1]
 	%642 = ptrtoint %struct.edge_rec* %641 to i32		; <i32> [#uses=2]
 	%643 = add i32 %642, 16		; <i32> [#uses=1]
 	%644 = and i32 %643, 63		; <i32> [#uses=1]
@@ -844,13 +844,13 @@ bb24.i:		; preds = %bb23.i, %bb21.i
 	%646 = or i32 %644, %645		; <i32> [#uses=1]
 	%647 = inttoptr i32 %646 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%648 = getelementptr %struct.edge_rec, %struct.edge_rec* %647, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%649 = load %struct.edge_rec** %648, align 4		; <%struct.edge_rec*> [#uses=1]
+	%649 = load %struct.edge_rec*, %struct.edge_rec** %648, align 4		; <%struct.edge_rec*> [#uses=1]
 	%650 = getelementptr %struct.edge_rec, %struct.edge_rec* %639, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%651 = load %struct.edge_rec** %650, align 4		; <%struct.edge_rec*> [#uses=1]
+	%651 = load %struct.edge_rec*, %struct.edge_rec** %650, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %649, %struct.edge_rec** %650, align 4
 	store %struct.edge_rec* %651, %struct.edge_rec** %648, align 4
-	%652 = load %struct.edge_rec** %632, align 4		; <%struct.edge_rec*> [#uses=1]
-	%653 = load %struct.edge_rec** %640, align 4		; <%struct.edge_rec*> [#uses=1]
+	%652 = load %struct.edge_rec*, %struct.edge_rec** %632, align 4		; <%struct.edge_rec*> [#uses=1]
+	%653 = load %struct.edge_rec*, %struct.edge_rec** %640, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %652, %struct.edge_rec** %640, align 4
 	store %struct.edge_rec* %653, %struct.edge_rec** %632, align 4
 	%654 = add i32 %630, 48		; <i32> [#uses=1]
@@ -859,7 +859,7 @@ bb24.i:		; preds = %bb23.i, %bb21.i
 	%657 = or i32 %655, %656		; <i32> [#uses=1]
 	%658 = inttoptr i32 %657 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%659 = getelementptr %struct.edge_rec, %struct.edge_rec* %658, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
-	%660 = load %struct.edge_rec** %659, align 4		; <%struct.edge_rec*> [#uses=1]
+	%660 = load %struct.edge_rec*, %struct.edge_rec** %659, align 4		; <%struct.edge_rec*> [#uses=1]
 	%661 = ptrtoint %struct.edge_rec* %660 to i32		; <i32> [#uses=2]
 	%662 = add i32 %661, 16		; <i32> [#uses=1]
 	%663 = and i32 %662, 63		; <i32> [#uses=1]
@@ -875,7 +875,7 @@ bb25.i:		; preds = %bb23.i, %bb22.i
 	%670 = or i32 %668, %669		; <i32> [#uses=1]
 	%671 = inttoptr i32 %670 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%672 = getelementptr %struct.edge_rec, %struct.edge_rec* %671, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
-	%673 = load %struct.edge_rec** %672, align 4		; <%struct.edge_rec*> [#uses=1]
+	%673 = load %struct.edge_rec*, %struct.edge_rec** %672, align 4		; <%struct.edge_rec*> [#uses=1]
 	%674 = ptrtoint %struct.edge_rec* %673 to i32		; <i32> [#uses=2]
 	%675 = add i32 %674, 16		; <i32> [#uses=1]
 	%676 = and i32 %675, 63		; <i32> [#uses=1]
@@ -902,7 +902,7 @@ bb25.i:		; preds = %bb23.i, %bb22.i
 	store %struct.VERTEX* %496, %struct.VERTEX** %692, align 4
 	%693 = getelementptr %struct.edge_rec, %struct.edge_rec* %687, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
 	store %struct.edge_rec* %685, %struct.edge_rec** %693, align 4
-	%694 = load %struct.edge_rec** %681, align 4		; <%struct.edge_rec*> [#uses=1]
+	%694 = load %struct.edge_rec*, %struct.edge_rec** %681, align 4		; <%struct.edge_rec*> [#uses=1]
 	%695 = ptrtoint %struct.edge_rec* %694 to i32		; <i32> [#uses=2]
 	%696 = add i32 %695, 16		; <i32> [#uses=1]
 	%697 = and i32 %696, 63		; <i32> [#uses=1]
@@ -910,7 +910,7 @@ bb25.i:		; preds = %bb23.i, %bb22.i
 	%699 = or i32 %697, %698		; <i32> [#uses=1]
 	%700 = inttoptr i32 %699 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%701 = getelementptr %struct.edge_rec, %struct.edge_rec* %499, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
-	%702 = load %struct.edge_rec** %701, align 4		; <%struct.edge_rec*> [#uses=1]
+	%702 = load %struct.edge_rec*, %struct.edge_rec** %701, align 4		; <%struct.edge_rec*> [#uses=1]
 	%703 = ptrtoint %struct.edge_rec* %702 to i32		; <i32> [#uses=2]
 	%704 = add i32 %703, 16		; <i32> [#uses=1]
 	%705 = and i32 %704, 63		; <i32> [#uses=1]
@@ -918,19 +918,19 @@ bb25.i:		; preds = %bb23.i, %bb22.i
 	%707 = or i32 %705, %706		; <i32> [#uses=1]
 	%708 = inttoptr i32 %707 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%709 = getelementptr %struct.edge_rec, %struct.edge_rec* %708, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%710 = load %struct.edge_rec** %709, align 4		; <%struct.edge_rec*> [#uses=1]
+	%710 = load %struct.edge_rec*, %struct.edge_rec** %709, align 4		; <%struct.edge_rec*> [#uses=1]
 	%711 = getelementptr %struct.edge_rec, %struct.edge_rec* %700, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%712 = load %struct.edge_rec** %711, align 4		; <%struct.edge_rec*> [#uses=1]
+	%712 = load %struct.edge_rec*, %struct.edge_rec** %711, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %710, %struct.edge_rec** %711, align 4
 	store %struct.edge_rec* %712, %struct.edge_rec** %709, align 4
-	%713 = load %struct.edge_rec** %681, align 4		; <%struct.edge_rec*> [#uses=1]
-	%714 = load %struct.edge_rec** %701, align 4		; <%struct.edge_rec*> [#uses=1]
+	%713 = load %struct.edge_rec*, %struct.edge_rec** %681, align 4		; <%struct.edge_rec*> [#uses=1]
+	%714 = load %struct.edge_rec*, %struct.edge_rec** %701, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %713, %struct.edge_rec** %701, align 4
 	store %struct.edge_rec* %714, %struct.edge_rec** %681, align 4
 	%715 = xor i32 %683, 32		; <i32> [#uses=1]
 	%716 = inttoptr i32 %715 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
 	%717 = getelementptr %struct.edge_rec, %struct.edge_rec* %716, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
-	%718 = load %struct.edge_rec** %717, align 4		; <%struct.edge_rec*> [#uses=1]
+	%718 = load %struct.edge_rec*, %struct.edge_rec** %717, align 4		; <%struct.edge_rec*> [#uses=1]
 	%719 = ptrtoint %struct.edge_rec* %718 to i32		; <i32> [#uses=2]
 	%720 = add i32 %719, 16		; <i32> [#uses=1]
 	%721 = and i32 %720, 63		; <i32> [#uses=1]
@@ -938,7 +938,7 @@ bb25.i:		; preds = %bb23.i, %bb22.i
 	%723 = or i32 %721, %722		; <i32> [#uses=1]
 	%724 = inttoptr i32 %723 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%725 = getelementptr %struct.edge_rec, %struct.edge_rec* %679, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
-	%726 = load %struct.edge_rec** %725, align 4		; <%struct.edge_rec*> [#uses=1]
+	%726 = load %struct.edge_rec*, %struct.edge_rec** %725, align 4		; <%struct.edge_rec*> [#uses=1]
 	%727 = ptrtoint %struct.edge_rec* %726 to i32		; <i32> [#uses=2]
 	%728 = add i32 %727, 16		; <i32> [#uses=1]
 	%729 = and i32 %728, 63		; <i32> [#uses=1]
@@ -946,21 +946,21 @@ bb25.i:		; preds = %bb23.i, %bb22.i
 	%731 = or i32 %729, %730		; <i32> [#uses=1]
 	%732 = inttoptr i32 %731 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%733 = getelementptr %struct.edge_rec, %struct.edge_rec* %732, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%734 = load %struct.edge_rec** %733, align 4		; <%struct.edge_rec*> [#uses=1]
+	%734 = load %struct.edge_rec*, %struct.edge_rec** %733, align 4		; <%struct.edge_rec*> [#uses=1]
 	%735 = getelementptr %struct.edge_rec, %struct.edge_rec* %724, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%736 = load %struct.edge_rec** %735, align 4		; <%struct.edge_rec*> [#uses=1]
+	%736 = load %struct.edge_rec*, %struct.edge_rec** %735, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %734, %struct.edge_rec** %735, align 4
 	store %struct.edge_rec* %736, %struct.edge_rec** %733, align 4
-	%737 = load %struct.edge_rec** %717, align 4		; <%struct.edge_rec*> [#uses=1]
-	%738 = load %struct.edge_rec** %725, align 4		; <%struct.edge_rec*> [#uses=1]
+	%737 = load %struct.edge_rec*, %struct.edge_rec** %717, align 4		; <%struct.edge_rec*> [#uses=1]
+	%738 = load %struct.edge_rec*, %struct.edge_rec** %725, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %737, %struct.edge_rec** %725, align 4
 	store %struct.edge_rec* %738, %struct.edge_rec** %717, align 4
-	%739 = load %struct.edge_rec** %681, align 4		; <%struct.edge_rec*> [#uses=1]
+	%739 = load %struct.edge_rec*, %struct.edge_rec** %681, align 4		; <%struct.edge_rec*> [#uses=1]
 	br label %bb9.i
 
 do_merge.exit:		; preds = %bb17.i
 	%740 = getelementptr %struct.edge_rec, %struct.edge_rec* %ldo_addr.0.ph.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
-	%741 = load %struct.VERTEX** %740, align 4		; <%struct.VERTEX*> [#uses=1]
+	%741 = load %struct.VERTEX*, %struct.VERTEX** %740, align 4		; <%struct.VERTEX*> [#uses=1]
 	%742 = icmp eq %struct.VERTEX* %741, %tree_addr.0.i		; <i1> [#uses=1]
 	br i1 %742, label %bb5.loopexit, label %bb2
 
@@ -970,28 +970,28 @@ bb2:		; preds = %bb2, %do_merge.exit
 	%744 = xor i32 %743, 32		; <i32> [#uses=1]
 	%745 = inttoptr i32 %744 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%746 = getelementptr %struct.edge_rec, %struct.edge_rec* %745, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
-	%747 = load %struct.edge_rec** %746, align 4		; <%struct.edge_rec*> [#uses=3]
+	%747 = load %struct.edge_rec*, %struct.edge_rec** %746, align 4		; <%struct.edge_rec*> [#uses=3]
 	%748 = getelementptr %struct.edge_rec, %struct.edge_rec* %747, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
-	%749 = load %struct.VERTEX** %748, align 4		; <%struct.VERTEX*> [#uses=1]
+	%749 = load %struct.VERTEX*, %struct.VERTEX** %748, align 4		; <%struct.VERTEX*> [#uses=1]
 	%750 = icmp eq %struct.VERTEX* %749, %tree_addr.0.i		; <i1> [#uses=1]
 	br i1 %750, label %bb5.loopexit, label %bb2
 
 bb4:		; preds = %bb5.loopexit, %bb4
 	%rdo.05 = phi %struct.edge_rec* [ %755, %bb4 ], [ %rdo_addr.0.i, %bb5.loopexit ]		; <%struct.edge_rec*> [#uses=1]
 	%751 = getelementptr %struct.edge_rec, %struct.edge_rec* %rdo.05, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
-	%752 = load %struct.edge_rec** %751, align 4		; <%struct.edge_rec*> [#uses=1]
+	%752 = load %struct.edge_rec*, %struct.edge_rec** %751, align 4		; <%struct.edge_rec*> [#uses=1]
 	%753 = ptrtoint %struct.edge_rec* %752 to i32		; <i32> [#uses=1]
 	%754 = xor i32 %753, 32		; <i32> [#uses=1]
 	%755 = inttoptr i32 %754 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
 	%756 = getelementptr %struct.edge_rec, %struct.edge_rec* %755, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
-	%757 = load %struct.VERTEX** %756, align 4		; <%struct.VERTEX*> [#uses=1]
+	%757 = load %struct.VERTEX*, %struct.VERTEX** %756, align 4		; <%struct.VERTEX*> [#uses=1]
 	%758 = icmp eq %struct.VERTEX* %757, %extra		; <i1> [#uses=1]
 	br i1 %758, label %bb6, label %bb4
 
 bb5.loopexit:		; preds = %bb2, %do_merge.exit
 	%ldo.0.lcssa = phi %struct.edge_rec* [ %ldo_addr.0.ph.i, %do_merge.exit ], [ %747, %bb2 ]		; <%struct.edge_rec*> [#uses=1]
 	%759 = getelementptr %struct.edge_rec, %struct.edge_rec* %rdo_addr.0.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
-	%760 = load %struct.VERTEX** %759, align 4		; <%struct.VERTEX*> [#uses=1]
+	%760 = load %struct.VERTEX*, %struct.VERTEX** %759, align 4		; <%struct.VERTEX*> [#uses=1]
 	%761 = icmp eq %struct.VERTEX* %760, %extra		; <i1> [#uses=1]
 	br i1 %761, label %bb6, label %bb4
 
@@ -1003,7 +1003,7 @@ bb6:		; preds = %bb5.loopexit, %bb4
 
 bb7:		; preds = %bb
 	%762 = getelementptr %struct.VERTEX, %struct.VERTEX* %tree, i32 0, i32 1		; <%struct.VERTEX**> [#uses=1]
-	%763 = load %struct.VERTEX** %762, align 4		; <%struct.VERTEX*> [#uses=4]
+	%763 = load %struct.VERTEX*, %struct.VERTEX** %762, align 4		; <%struct.VERTEX*> [#uses=4]
 	%764 = icmp eq %struct.VERTEX* %763, null		; <i1> [#uses=1]
 	%765 = call  %struct.edge_rec* @alloc_edge() nounwind		; <%struct.edge_rec*> [#uses=5]
 	%766 = getelementptr %struct.edge_rec, %struct.edge_rec* %765, i32 0, i32 1		; <%struct.edge_rec**> [#uses=4]
@@ -1076,14 +1076,14 @@ bb11:		; preds = %bb7
 	%806 = xor i32 %781, 32		; <i32> [#uses=1]
 	%807 = inttoptr i32 %806 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%808 = getelementptr %struct.edge_rec, %struct.edge_rec* %807, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
-	%809 = load %struct.edge_rec** %808, align 4		; <%struct.edge_rec*> [#uses=1]
+	%809 = load %struct.edge_rec*, %struct.edge_rec** %808, align 4		; <%struct.edge_rec*> [#uses=1]
 	%810 = ptrtoint %struct.edge_rec* %809 to i32		; <i32> [#uses=2]
 	%811 = add i32 %810, 16		; <i32> [#uses=1]
 	%812 = and i32 %811, 63		; <i32> [#uses=1]
 	%813 = and i32 %810, -64		; <i32> [#uses=1]
 	%814 = or i32 %812, %813		; <i32> [#uses=1]
 	%815 = inttoptr i32 %814 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
-	%816 = load %struct.edge_rec** %793, align 4		; <%struct.edge_rec*> [#uses=1]
+	%816 = load %struct.edge_rec*, %struct.edge_rec** %793, align 4		; <%struct.edge_rec*> [#uses=1]
 	%817 = ptrtoint %struct.edge_rec* %816 to i32		; <i32> [#uses=2]
 	%818 = add i32 %817, 16		; <i32> [#uses=1]
 	%819 = and i32 %818, 63		; <i32> [#uses=1]
@@ -1091,32 +1091,32 @@ bb11:		; preds = %bb7
 	%821 = or i32 %819, %820		; <i32> [#uses=1]
 	%822 = inttoptr i32 %821 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%823 = getelementptr %struct.edge_rec, %struct.edge_rec* %822, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%824 = load %struct.edge_rec** %823, align 4		; <%struct.edge_rec*> [#uses=1]
+	%824 = load %struct.edge_rec*, %struct.edge_rec** %823, align 4		; <%struct.edge_rec*> [#uses=1]
 	%825 = getelementptr %struct.edge_rec, %struct.edge_rec* %815, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%826 = load %struct.edge_rec** %825, align 4		; <%struct.edge_rec*> [#uses=1]
+	%826 = load %struct.edge_rec*, %struct.edge_rec** %825, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %824, %struct.edge_rec** %825, align 4
 	store %struct.edge_rec* %826, %struct.edge_rec** %823, align 4
-	%827 = load %struct.edge_rec** %808, align 4		; <%struct.edge_rec*> [#uses=1]
-	%828 = load %struct.edge_rec** %793, align 4		; <%struct.edge_rec*> [#uses=1]
+	%827 = load %struct.edge_rec*, %struct.edge_rec** %808, align 4		; <%struct.edge_rec*> [#uses=1]
+	%828 = load %struct.edge_rec*, %struct.edge_rec** %793, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %827, %struct.edge_rec** %793, align 4
 	store %struct.edge_rec* %828, %struct.edge_rec** %808, align 4
 	%829 = xor i32 %795, 32		; <i32> [#uses=3]
 	%830 = inttoptr i32 %829 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%831 = getelementptr %struct.edge_rec, %struct.edge_rec* %830, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
-	%832 = load %struct.VERTEX** %831, align 4		; <%struct.VERTEX*> [#uses=1]
+	%832 = load %struct.VERTEX*, %struct.VERTEX** %831, align 4		; <%struct.VERTEX*> [#uses=1]
 	%833 = and i32 %798, 63		; <i32> [#uses=1]
 	%834 = and i32 %795, -64		; <i32> [#uses=1]
 	%835 = or i32 %833, %834		; <i32> [#uses=1]
 	%836 = inttoptr i32 %835 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%837 = getelementptr %struct.edge_rec, %struct.edge_rec* %836, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
-	%838 = load %struct.edge_rec** %837, align 4		; <%struct.edge_rec*> [#uses=1]
+	%838 = load %struct.edge_rec*, %struct.edge_rec** %837, align 4		; <%struct.edge_rec*> [#uses=1]
 	%839 = ptrtoint %struct.edge_rec* %838 to i32		; <i32> [#uses=2]
 	%840 = add i32 %839, 16		; <i32> [#uses=1]
 	%841 = and i32 %840, 63		; <i32> [#uses=1]
 	%842 = and i32 %839, -64		; <i32> [#uses=1]
 	%843 = or i32 %841, %842		; <i32> [#uses=1]
 	%844 = inttoptr i32 %843 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
-	%845 = load %struct.VERTEX** %767, align 4		; <%struct.VERTEX*> [#uses=1]
+	%845 = load %struct.VERTEX*, %struct.VERTEX** %767, align 4		; <%struct.VERTEX*> [#uses=1]
 	%846 = call  %struct.edge_rec* @alloc_edge() nounwind		; <%struct.edge_rec*> [#uses=4]
 	%847 = getelementptr %struct.edge_rec, %struct.edge_rec* %846, i32 0, i32 1		; <%struct.edge_rec**> [#uses=7]
 	store %struct.edge_rec* %846, %struct.edge_rec** %847, align 4
@@ -1137,7 +1137,7 @@ bb11:		; preds = %bb7
 	store %struct.VERTEX* %845, %struct.VERTEX** %858, align 4
 	%859 = getelementptr %struct.edge_rec, %struct.edge_rec* %853, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
 	store %struct.edge_rec* %851, %struct.edge_rec** %859, align 4
-	%860 = load %struct.edge_rec** %847, align 4		; <%struct.edge_rec*> [#uses=1]
+	%860 = load %struct.edge_rec*, %struct.edge_rec** %847, align 4		; <%struct.edge_rec*> [#uses=1]
 	%861 = ptrtoint %struct.edge_rec* %860 to i32		; <i32> [#uses=2]
 	%862 = add i32 %861, 16		; <i32> [#uses=1]
 	%863 = and i32 %862, 63		; <i32> [#uses=1]
@@ -1145,7 +1145,7 @@ bb11:		; preds = %bb7
 	%865 = or i32 %863, %864		; <i32> [#uses=1]
 	%866 = inttoptr i32 %865 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%867 = getelementptr %struct.edge_rec, %struct.edge_rec* %844, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
-	%868 = load %struct.edge_rec** %867, align 4		; <%struct.edge_rec*> [#uses=1]
+	%868 = load %struct.edge_rec*, %struct.edge_rec** %867, align 4		; <%struct.edge_rec*> [#uses=1]
 	%869 = ptrtoint %struct.edge_rec* %868 to i32		; <i32> [#uses=2]
 	%870 = add i32 %869, 16		; <i32> [#uses=1]
 	%871 = and i32 %870, 63		; <i32> [#uses=1]
@@ -1153,26 +1153,26 @@ bb11:		; preds = %bb7
 	%873 = or i32 %871, %872		; <i32> [#uses=1]
 	%874 = inttoptr i32 %873 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%875 = getelementptr %struct.edge_rec, %struct.edge_rec* %874, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%876 = load %struct.edge_rec** %875, align 4		; <%struct.edge_rec*> [#uses=1]
+	%876 = load %struct.edge_rec*, %struct.edge_rec** %875, align 4		; <%struct.edge_rec*> [#uses=1]
 	%877 = getelementptr %struct.edge_rec, %struct.edge_rec* %866, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%878 = load %struct.edge_rec** %877, align 4		; <%struct.edge_rec*> [#uses=1]
+	%878 = load %struct.edge_rec*, %struct.edge_rec** %877, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %876, %struct.edge_rec** %877, align 4
 	store %struct.edge_rec* %878, %struct.edge_rec** %875, align 4
-	%879 = load %struct.edge_rec** %847, align 4		; <%struct.edge_rec*> [#uses=1]
-	%880 = load %struct.edge_rec** %867, align 4		; <%struct.edge_rec*> [#uses=1]
+	%879 = load %struct.edge_rec*, %struct.edge_rec** %847, align 4		; <%struct.edge_rec*> [#uses=1]
+	%880 = load %struct.edge_rec*, %struct.edge_rec** %867, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %879, %struct.edge_rec** %867, align 4
 	store %struct.edge_rec* %880, %struct.edge_rec** %847, align 4
 	%881 = xor i32 %849, 32		; <i32> [#uses=3]
 	%882 = inttoptr i32 %881 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%883 = getelementptr %struct.edge_rec, %struct.edge_rec* %882, i32 0, i32 1		; <%struct.edge_rec**> [#uses=6]
-	%884 = load %struct.edge_rec** %883, align 4		; <%struct.edge_rec*> [#uses=1]
+	%884 = load %struct.edge_rec*, %struct.edge_rec** %883, align 4		; <%struct.edge_rec*> [#uses=1]
 	%885 = ptrtoint %struct.edge_rec* %884 to i32		; <i32> [#uses=2]
 	%886 = add i32 %885, 16		; <i32> [#uses=1]
 	%887 = and i32 %886, 63		; <i32> [#uses=1]
 	%888 = and i32 %885, -64		; <i32> [#uses=1]
 	%889 = or i32 %887, %888		; <i32> [#uses=1]
 	%890 = inttoptr i32 %889 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
-	%891 = load %struct.edge_rec** %766, align 4		; <%struct.edge_rec*> [#uses=1]
+	%891 = load %struct.edge_rec*, %struct.edge_rec** %766, align 4		; <%struct.edge_rec*> [#uses=1]
 	%892 = ptrtoint %struct.edge_rec* %891 to i32		; <i32> [#uses=2]
 	%893 = add i32 %892, 16		; <i32> [#uses=1]
 	%894 = and i32 %893, 63		; <i32> [#uses=1]
@@ -1180,27 +1180,27 @@ bb11:		; preds = %bb7
 	%896 = or i32 %894, %895		; <i32> [#uses=1]
 	%897 = inttoptr i32 %896 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%898 = getelementptr %struct.edge_rec, %struct.edge_rec* %897, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%899 = load %struct.edge_rec** %898, align 4		; <%struct.edge_rec*> [#uses=1]
+	%899 = load %struct.edge_rec*, %struct.edge_rec** %898, align 4		; <%struct.edge_rec*> [#uses=1]
 	%900 = getelementptr %struct.edge_rec, %struct.edge_rec* %890, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%901 = load %struct.edge_rec** %900, align 4		; <%struct.edge_rec*> [#uses=1]
+	%901 = load %struct.edge_rec*, %struct.edge_rec** %900, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %899, %struct.edge_rec** %900, align 4
 	store %struct.edge_rec* %901, %struct.edge_rec** %898, align 4
-	%902 = load %struct.edge_rec** %883, align 4		; <%struct.edge_rec*> [#uses=1]
-	%903 = load %struct.edge_rec** %766, align 4		; <%struct.edge_rec*> [#uses=1]
+	%902 = load %struct.edge_rec*, %struct.edge_rec** %883, align 4		; <%struct.edge_rec*> [#uses=1]
+	%903 = load %struct.edge_rec*, %struct.edge_rec** %766, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %902, %struct.edge_rec** %766, align 4
 	store %struct.edge_rec* %903, %struct.edge_rec** %883, align 4
 	%904 = getelementptr %struct.VERTEX, %struct.VERTEX* %763, i32 0, i32 0, i32 0		; <double*> [#uses=1]
-	%905 = load double* %904, align 4		; <double> [#uses=2]
+	%905 = load double, double* %904, align 4		; <double> [#uses=2]
 	%906 = getelementptr %struct.VERTEX, %struct.VERTEX* %763, i32 0, i32 0, i32 1		; <double*> [#uses=1]
-	%907 = load double* %906, align 4		; <double> [#uses=2]
+	%907 = load double, double* %906, align 4		; <double> [#uses=2]
 	%908 = getelementptr %struct.VERTEX, %struct.VERTEX* %extra, i32 0, i32 0, i32 0		; <double*> [#uses=1]
-	%909 = load double* %908, align 4		; <double> [#uses=3]
+	%909 = load double, double* %908, align 4		; <double> [#uses=3]
 	%910 = getelementptr %struct.VERTEX, %struct.VERTEX* %extra, i32 0, i32 0, i32 1		; <double*> [#uses=1]
-	%911 = load double* %910, align 4		; <double> [#uses=3]
+	%911 = load double, double* %910, align 4		; <double> [#uses=3]
 	%912 = getelementptr %struct.VERTEX, %struct.VERTEX* %tree, i32 0, i32 0, i32 0		; <double*> [#uses=1]
-	%913 = load double* %912, align 4		; <double> [#uses=3]
+	%913 = load double, double* %912, align 4		; <double> [#uses=3]
 	%914 = getelementptr %struct.VERTEX, %struct.VERTEX* %tree, i32 0, i32 0, i32 1		; <double*> [#uses=1]
-	%915 = load double* %914, align 4		; <double> [#uses=3]
+	%915 = load double, double* %914, align 4		; <double> [#uses=3]
 	%916 = fsub double %905, %913		; <double> [#uses=1]
 	%917 = fsub double %911, %915		; <double> [#uses=1]
 	%918 = fmul double %916, %917		; <double> [#uses=1]
@@ -1228,14 +1228,14 @@ bb14:		; preds = %bb13
 	%934 = or i32 %932, %933		; <i32> [#uses=1]
 	%935 = inttoptr i32 %934 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%936 = getelementptr %struct.edge_rec, %struct.edge_rec* %935, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
-	%937 = load %struct.edge_rec** %936, align 4		; <%struct.edge_rec*> [#uses=1]
+	%937 = load %struct.edge_rec*, %struct.edge_rec** %936, align 4		; <%struct.edge_rec*> [#uses=1]
 	%938 = ptrtoint %struct.edge_rec* %937 to i32		; <i32> [#uses=2]
 	%939 = add i32 %938, 16		; <i32> [#uses=1]
 	%940 = and i32 %939, 63		; <i32> [#uses=1]
 	%941 = and i32 %938, -64		; <i32> [#uses=1]
 	%942 = or i32 %940, %941		; <i32> [#uses=1]
 	%943 = inttoptr i32 %942 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
-	%944 = load %struct.edge_rec** %847, align 4		; <%struct.edge_rec*> [#uses=1]
+	%944 = load %struct.edge_rec*, %struct.edge_rec** %847, align 4		; <%struct.edge_rec*> [#uses=1]
 	%945 = ptrtoint %struct.edge_rec* %944 to i32		; <i32> [#uses=2]
 	%946 = add i32 %945, 16		; <i32> [#uses=1]
 	%947 = and i32 %946, 63		; <i32> [#uses=1]
@@ -1243,7 +1243,7 @@ bb14:		; preds = %bb13
 	%949 = or i32 %947, %948		; <i32> [#uses=1]
 	%950 = inttoptr i32 %949 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%951 = getelementptr %struct.edge_rec, %struct.edge_rec* %943, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
-	%952 = load %struct.edge_rec** %951, align 4		; <%struct.edge_rec*> [#uses=1]
+	%952 = load %struct.edge_rec*, %struct.edge_rec** %951, align 4		; <%struct.edge_rec*> [#uses=1]
 	%953 = ptrtoint %struct.edge_rec* %952 to i32		; <i32> [#uses=2]
 	%954 = add i32 %953, 16		; <i32> [#uses=1]
 	%955 = and i32 %954, 63		; <i32> [#uses=1]
@@ -1251,13 +1251,13 @@ bb14:		; preds = %bb13
 	%957 = or i32 %955, %956		; <i32> [#uses=1]
 	%958 = inttoptr i32 %957 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%959 = getelementptr %struct.edge_rec, %struct.edge_rec* %958, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%960 = load %struct.edge_rec** %959, align 4		; <%struct.edge_rec*> [#uses=1]
+	%960 = load %struct.edge_rec*, %struct.edge_rec** %959, align 4		; <%struct.edge_rec*> [#uses=1]
 	%961 = getelementptr %struct.edge_rec, %struct.edge_rec* %950, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%962 = load %struct.edge_rec** %961, align 4		; <%struct.edge_rec*> [#uses=1]
+	%962 = load %struct.edge_rec*, %struct.edge_rec** %961, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %960, %struct.edge_rec** %961, align 4
 	store %struct.edge_rec* %962, %struct.edge_rec** %959, align 4
-	%963 = load %struct.edge_rec** %847, align 4		; <%struct.edge_rec*> [#uses=1]
-	%964 = load %struct.edge_rec** %951, align 4		; <%struct.edge_rec*> [#uses=1]
+	%963 = load %struct.edge_rec*, %struct.edge_rec** %847, align 4		; <%struct.edge_rec*> [#uses=1]
+	%964 = load %struct.edge_rec*, %struct.edge_rec** %951, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %963, %struct.edge_rec** %951, align 4
 	store %struct.edge_rec* %964, %struct.edge_rec** %847, align 4
 	%965 = add i32 %881, 16		; <i32> [#uses=1]
@@ -1265,14 +1265,14 @@ bb14:		; preds = %bb13
 	%967 = or i32 %966, %933		; <i32> [#uses=1]
 	%968 = inttoptr i32 %967 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%969 = getelementptr %struct.edge_rec, %struct.edge_rec* %968, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
-	%970 = load %struct.edge_rec** %969, align 4		; <%struct.edge_rec*> [#uses=1]
+	%970 = load %struct.edge_rec*, %struct.edge_rec** %969, align 4		; <%struct.edge_rec*> [#uses=1]
 	%971 = ptrtoint %struct.edge_rec* %970 to i32		; <i32> [#uses=2]
 	%972 = add i32 %971, 16		; <i32> [#uses=1]
 	%973 = and i32 %972, 63		; <i32> [#uses=1]
 	%974 = and i32 %971, -64		; <i32> [#uses=1]
 	%975 = or i32 %973, %974		; <i32> [#uses=1]
 	%976 = inttoptr i32 %975 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
-	%977 = load %struct.edge_rec** %883, align 4		; <%struct.edge_rec*> [#uses=1]
+	%977 = load %struct.edge_rec*, %struct.edge_rec** %883, align 4		; <%struct.edge_rec*> [#uses=1]
 	%978 = ptrtoint %struct.edge_rec* %977 to i32		; <i32> [#uses=2]
 	%979 = add i32 %978, 16		; <i32> [#uses=1]
 	%980 = and i32 %979, 63		; <i32> [#uses=1]
@@ -1280,7 +1280,7 @@ bb14:		; preds = %bb13
 	%982 = or i32 %980, %981		; <i32> [#uses=1]
 	%983 = inttoptr i32 %982 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%984 = getelementptr %struct.edge_rec, %struct.edge_rec* %976, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
-	%985 = load %struct.edge_rec** %984, align 4		; <%struct.edge_rec*> [#uses=1]
+	%985 = load %struct.edge_rec*, %struct.edge_rec** %984, align 4		; <%struct.edge_rec*> [#uses=1]
 	%986 = ptrtoint %struct.edge_rec* %985 to i32		; <i32> [#uses=2]
 	%987 = add i32 %986, 16		; <i32> [#uses=1]
 	%988 = and i32 %987, 63		; <i32> [#uses=1]
@@ -1288,17 +1288,17 @@ bb14:		; preds = %bb13
 	%990 = or i32 %988, %989		; <i32> [#uses=1]
 	%991 = inttoptr i32 %990 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%992 = getelementptr %struct.edge_rec, %struct.edge_rec* %991, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%993 = load %struct.edge_rec** %992, align 4		; <%struct.edge_rec*> [#uses=1]
+	%993 = load %struct.edge_rec*, %struct.edge_rec** %992, align 4		; <%struct.edge_rec*> [#uses=1]
 	%994 = getelementptr %struct.edge_rec, %struct.edge_rec* %983, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%995 = load %struct.edge_rec** %994, align 4		; <%struct.edge_rec*> [#uses=1]
+	%995 = load %struct.edge_rec*, %struct.edge_rec** %994, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %993, %struct.edge_rec** %994, align 4
 	store %struct.edge_rec* %995, %struct.edge_rec** %992, align 4
-	%996 = load %struct.edge_rec** %883, align 4		; <%struct.edge_rec*> [#uses=1]
-	%997 = load %struct.edge_rec** %984, align 4		; <%struct.edge_rec*> [#uses=1]
+	%996 = load %struct.edge_rec*, %struct.edge_rec** %883, align 4		; <%struct.edge_rec*> [#uses=1]
+	%997 = load %struct.edge_rec*, %struct.edge_rec** %984, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %996, %struct.edge_rec** %984, align 4
 	store %struct.edge_rec* %997, %struct.edge_rec** %883, align 4
 	%998 = inttoptr i32 %933 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
-	%999 = load %struct.edge_rec** @avail_edge, align 4		; <%struct.edge_rec*> [#uses=1]
+	%999 = load %struct.edge_rec*, %struct.edge_rec** @avail_edge, align 4		; <%struct.edge_rec*> [#uses=1]
 	%1000 = getelementptr %struct.edge_rec, %struct.edge_rec* %998, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
 	store %struct.edge_rec* %999, %struct.edge_rec** %1000, align 4
 	store %struct.edge_rec* %998, %struct.edge_rec** @avail_edge, align 4
diff --git a/llvm/test/CodeGen/ARM/2009-07-22-ScavengerAssert.ll b/llvm/test/CodeGen/ARM/2009-07-22-ScavengerAssert.ll
index d477ba9835b..d746b104baf 100644
--- a/llvm/test/CodeGen/ARM/2009-07-22-ScavengerAssert.ll
+++ b/llvm/test/CodeGen/ARM/2009-07-22-ScavengerAssert.ll
@@ -83,7 +83,7 @@ bb45:		; preds = %bb43.preheader, %cli_calloc.exit54
 	br i1 undef, label %cli_calloc.exit70.thread, label %cli_calloc.exit70
 
 bb52:		; preds = %cli_calloc.exit
-	%0 = load i16* undef, align 4		; <i16> [#uses=1]
+	%0 = load i16, i16* undef, align 4		; <i16> [#uses=1]
 	%1 = icmp eq i16 %0, 0		; <i1> [#uses=1]
 	%iftmp.20.0 = select i1 %1, i8* %hexsig, i8* null		; <i8*> [#uses=1]
 	%2 = tail call  i32 @strlen(i8* %iftmp.20.0) nounwind readonly		; <i32> [#uses=0]
diff --git a/llvm/test/CodeGen/ARM/2009-07-22-SchedulerAssert.ll b/llvm/test/CodeGen/ARM/2009-07-22-SchedulerAssert.ll
index 198efa70e9e..156fd8843bc 100644
--- a/llvm/test/CodeGen/ARM/2009-07-22-SchedulerAssert.ll
+++ b/llvm/test/CodeGen/ARM/2009-07-22-SchedulerAssert.ll
@@ -67,7 +67,7 @@ bb18:		; preds = %bb18, %bb.nph
 bb22:		; preds = %bb18, %bb17
 	%0 = getelementptr i8, i8* null, i32 10		; <i8*> [#uses=1]
 	%1 = bitcast i8* %0 to i16*		; <i16*> [#uses=1]
-	%2 = load i16* %1, align 2		; <i16> [#uses=1]
+	%2 = load i16, i16* %1, align 2		; <i16> [#uses=1]
 	%3 = add i16 %2, 1		; <i16> [#uses=1]
 	%4 = zext i16 %3 to i32		; <i32> [#uses=1]
 	%5 = mul i32 %4, 3		; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/ARM/2009-07-29-VFP3Registers.ll b/llvm/test/CodeGen/ARM/2009-07-29-VFP3Registers.ll
index 5003fbdedb2..01591c80362 100644
--- a/llvm/test/CodeGen/ARM/2009-07-29-VFP3Registers.ll
+++ b/llvm/test/CodeGen/ARM/2009-07-29-VFP3Registers.ll
@@ -12,7 +12,7 @@ bb:		; preds = %bb, %entry
 	br i1 undef, label %bb28, label %bb
 
 bb28:		; preds = %bb
-	%0 = load double* @a, align 4		; <double> [#uses=2]
+	%0 = load double, double* @a, align 4		; <double> [#uses=2]
 	%1 = fadd double %0, undef		; <double> [#uses=2]
 	br i1 undef, label %bb59, label %bb60
 
diff --git a/llvm/test/CodeGen/ARM/2009-08-02-RegScavengerAssert-Neon.ll b/llvm/test/CodeGen/ARM/2009-08-02-RegScavengerAssert-Neon.ll
index a656c495f79..e277b4cf91a 100644
--- a/llvm/test/CodeGen/ARM/2009-08-02-RegScavengerAssert-Neon.ll
+++ b/llvm/test/CodeGen/ARM/2009-08-02-RegScavengerAssert-Neon.ll
@@ -13,17 +13,17 @@ entry:
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store <4 x i32> %v, <4 x i32>* %v_addr
 	store i32 %f, i32* %f_addr
-	%1 = load <4 x i32>* %v_addr, align 16		; <<4 x i32>> [#uses=1]
-	%2 = load i32* %f_addr, align 4		; <i32> [#uses=1]
+	%1 = load <4 x i32>, <4 x i32>* %v_addr, align 16		; <<4 x i32>> [#uses=1]
+	%2 = load i32, i32* %f_addr, align 4		; <i32> [#uses=1]
 	%3 = insertelement <4 x i32> undef, i32 %2, i32 0		; <<4 x i32>> [#uses=1]
 	%4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> zeroinitializer		; <<4 x i32>> [#uses=1]
 	%5 = mul <4 x i32> %1, %4		; <<4 x i32>> [#uses=1]
 	store <4 x i32> %5, <4 x i32>* %0, align 16
-	%6 = load <4 x i32>* %0, align 16		; <<4 x i32>> [#uses=1]
+	%6 = load <4 x i32>, <4 x i32>* %0, align 16		; <<4 x i32>> [#uses=1]
 	store <4 x i32> %6, <4 x i32>* %retval, align 16
 	br label %return
 
 return:		; preds = %entry
-	%retval1 = load <4 x i32>* %retval		; <<4 x i32>> [#uses=1]
+	%retval1 = load <4 x i32>, <4 x i32>* %retval		; <<4 x i32>> [#uses=1]
 	ret <4 x i32> %retval1
 }
diff --git a/llvm/test/CodeGen/ARM/2009-08-15-RegScavenger-EarlyClobber.ll b/llvm/test/CodeGen/ARM/2009-08-15-RegScavenger-EarlyClobber.ll
index 574a06cea39..a5e9692a008 100644
--- a/llvm/test/CodeGen/ARM/2009-08-15-RegScavenger-EarlyClobber.ll
+++ b/llvm/test/CodeGen/ARM/2009-08-15-RegScavenger-EarlyClobber.ll
@@ -29,7 +29,7 @@ bb7:                                              ; preds = %bb2
 bb8:                                              ; preds = %bb7, %entry
   %2 = phi i32 [ 0, %entry ], [ %1, %bb7 ]        ; <i32> [#uses=3]
   %scevgep22 = getelementptr %struct.iovec, %struct.iovec* %iov, i32 %2, i32 0; <i8**> [#uses=0]
-  %3 = load i32* %nr_segs, align 4                ; <i32> [#uses=1]
+  %3 = load i32, i32* %nr_segs, align 4                ; <i32> [#uses=1]
   %4 = icmp ult i32 %2, %3                        ; <i1> [#uses=1]
   br i1 %4, label %bb, label %bb9
 
diff --git a/llvm/test/CodeGen/ARM/2009-08-21-PostRAKill.ll b/llvm/test/CodeGen/ARM/2009-08-21-PostRAKill.ll
index 5bb9b1ecff3..0d258e66b7e 100644
--- a/llvm/test/CodeGen/ARM/2009-08-21-PostRAKill.ll
+++ b/llvm/test/CodeGen/ARM/2009-08-21-PostRAKill.ll
@@ -9,18 +9,18 @@ target triple = "armv7-apple-darwin9"
 
 define %struct.tree* @tsp(%struct.tree* %t, i32 %nproc) nounwind {
 entry:
-  %t.idx51.val.i = load double* null              ; <double> [#uses=1]
+  %t.idx51.val.i = load double, double* null              ; <double> [#uses=1]
   br i1 undef, label %bb4.i, label %bb.i
 
 bb.i:                                             ; preds = %entry
   unreachable
 
 bb4.i:                                            ; preds = %entry
-  %0 = load %struct.tree** @g, align 4         ; <%struct.tree*> [#uses=2]
+  %0 = load %struct.tree*, %struct.tree** @g, align 4         ; <%struct.tree*> [#uses=2]
   %.idx45.i = getelementptr %struct.tree, %struct.tree* %0, i32 0, i32 1 ; <double*> [#uses=1]
-  %.idx45.val.i = load double* %.idx45.i          ; <double> [#uses=1]
+  %.idx45.val.i = load double, double* %.idx45.i          ; <double> [#uses=1]
   %.idx46.i = getelementptr %struct.tree, %struct.tree* %0, i32 0, i32 2 ; <double*> [#uses=1]
-  %.idx46.val.i = load double* %.idx46.i          ; <double> [#uses=1]
+  %.idx46.val.i = load double, double* %.idx46.i          ; <double> [#uses=1]
   %1 = fsub double 0.000000e+00, %.idx45.val.i    ; <double> [#uses=2]
   %2 = fmul double %1, %1                         ; <double> [#uses=1]
   %3 = fsub double %t.idx51.val.i, %.idx46.val.i  ; <double> [#uses=2]
diff --git a/llvm/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll b/llvm/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll
index 3373ba40186..d6babb6c55d 100644
--- a/llvm/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll
+++ b/llvm/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll
@@ -22,14 +22,14 @@ invcont:                                          ; preds = %entry
   br label %return
 
 bb:                                               ; preds = %ppad
-  %eh_select = load i32* %eh_selector
+  %eh_select = load i32, i32* %eh_selector
   store i32 %eh_select, i32* %save_filt.1, align 4
-  %eh_value = load i8** %eh_exception
+  %eh_value = load i8*, i8** %eh_exception
   store i8* %eh_value, i8** %save_eptr.0, align 4
   call void @_ZN1AD1Ev(%struct.A* %a) nounwind
-  %0 = load i8** %save_eptr.0, align 4
+  %0 = load i8*, i8** %save_eptr.0, align 4
   store i8* %0, i8** %eh_exception, align 4
-  %1 = load i32* %save_filt.1, align 4
+  %1 = load i32, i32* %save_filt.1, align 4
   store i32 %1, i32* %eh_selector, align 4
   br label %Unwind
 
@@ -49,7 +49,7 @@ ppad:                                             ; preds = %lpad
   br label %bb
 
 Unwind:                                           ; preds = %bb
-  %eh_ptr3 = load i8** %eh_exception
+  %eh_ptr3 = load i8*, i8** %eh_exception
   call void @_Unwind_SjLj_Resume(i8* %eh_ptr3)
   unreachable
 }
@@ -61,7 +61,7 @@ entry:
   store %struct.A* %this, %struct.A** %this_addr
   %0 = call i8* @_Znwm(i32 4)
   %1 = bitcast i8* %0 to i32*
-  %2 = load %struct.A** %this_addr, align 4
+  %2 = load %struct.A*, %struct.A** %this_addr, align 4
   %3 = getelementptr inbounds %struct.A, %struct.A* %2, i32 0, i32 0
   store i32* %1, i32** %3, align 4
   br label %return
@@ -77,9 +77,9 @@ entry:
   %this_addr = alloca %struct.A*
   %"alloca point" = bitcast i32 0 to i32
   store %struct.A* %this, %struct.A** %this_addr
-  %0 = load %struct.A** %this_addr, align 4
+  %0 = load %struct.A*, %struct.A** %this_addr, align 4
   %1 = getelementptr inbounds %struct.A, %struct.A* %0, i32 0, i32 0
-  %2 = load i32** %1, align 4
+  %2 = load i32*, i32** %1, align 4
   %3 = bitcast i32* %2 to i8*
   call void @_ZdlPv(i8* %3) nounwind
   br label %bb
diff --git a/llvm/test/CodeGen/ARM/2009-08-31-TwoRegShuffle.ll b/llvm/test/CodeGen/ARM/2009-08-31-TwoRegShuffle.ll
index b078ec06dbb..a6d128d9e0c 100644
--- a/llvm/test/CodeGen/ARM/2009-08-31-TwoRegShuffle.ll
+++ b/llvm/test/CodeGen/ARM/2009-08-31-TwoRegShuffle.ll
@@ -4,7 +4,7 @@
 define <4 x i16> @v2regbug(<4 x i16>* %B) nounwind {
 ;CHECK-LABEL: v2regbug:
 ;CHECK: vzip.16
-	%tmp1 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %B
 	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32><i32 0, i32 0, i32 1, i32 1>
 	ret <4 x i16> %tmp2
 }
diff --git a/llvm/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll b/llvm/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll
index 426bd17590b..4437d37e9f4 100644
--- a/llvm/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll
+++ b/llvm/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll
@@ -2,8 +2,8 @@
 ; pr4939
 
 define void @test(double* %x, double* %y) nounwind {
-  %1 = load double* %x
-  %2 = load double* %y
+  %1 = load double, double* %x
+  %2 = load double, double* %y
   %3 = fsub double -0.000000e+00, %1
   %4 = fcmp ugt double %2, %3
   br i1 %4, label %bb1, label %bb2
diff --git a/llvm/test/CodeGen/ARM/2009-09-13-InvalidSubreg.ll b/llvm/test/CodeGen/ARM/2009-09-13-InvalidSubreg.ll
index 5d3722caafb..de927a8f8b6 100644
--- a/llvm/test/CodeGen/ARM/2009-09-13-InvalidSubreg.ll
+++ b/llvm/test/CodeGen/ARM/2009-09-13-InvalidSubreg.ll
@@ -13,10 +13,10 @@ declare <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float>, <2 x float>) nounwin
 
 define arm_aapcs_vfpcc i8 @foo(%struct.fr* nocapture %this, %struct.obb* %box) nounwind {
 entry:
-  %val.i.i = load <4 x float>* undef              ; <<4 x float>> [#uses=1]
-  %val2.i.i = load <4 x float>* null              ; <<4 x float>> [#uses=1]
+  %val.i.i = load <4 x float>, <4 x float>* undef              ; <<4 x float>> [#uses=1]
+  %val2.i.i = load <4 x float>, <4 x float>* null              ; <<4 x float>> [#uses=1]
   %elt3.i.i = getelementptr inbounds %struct.obb, %struct.obb* %box, i32 0, i32 0, i32 2, i32 0 ; <<4 x float>*> [#uses=1]
-  %val4.i.i = load <4 x float>* %elt3.i.i         ; <<4 x float>> [#uses=1]
+  %val4.i.i = load <4 x float>, <4 x float>* %elt3.i.i         ; <<4 x float>> [#uses=1]
   %0 = shufflevector <2 x float> undef, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
   %1 = fadd <4 x float> undef, zeroinitializer    ; <<4 x float>> [#uses=1]
   br label %bb33
diff --git a/llvm/test/CodeGen/ARM/2009-09-13-InvalidSuperReg.ll b/llvm/test/CodeGen/ARM/2009-09-13-InvalidSuperReg.ll
index dd9a6fd12d7..b8a1479fd34 100644
--- a/llvm/test/CodeGen/ARM/2009-09-13-InvalidSuperReg.ll
+++ b/llvm/test/CodeGen/ARM/2009-09-13-InvalidSuperReg.ll
@@ -16,13 +16,13 @@ define arm_aapcs_vfpcc <4 x float> @foo(i8* nocapture %pBuffer, i32 %numItems) n
   %tmp3738 = inttoptr i32 %tmp37 to float*
   %tmp39 = add i32 %1, 24
   %tmp3940 = inttoptr i32 %tmp39 to float*
-  %2 = load float* %lsr.iv2641, align 4
-  %3 = load float* %tmp2930, align 4
-  %4 = load float* %tmp3132, align 4
-  %5 = load float* %tmp3334, align 4
-  %6 = load float* %tmp3536, align 4
-  %7 = load float* %tmp3738, align 4
-  %8 = load float* %tmp3940, align 4
+  %2 = load float, float* %lsr.iv2641, align 4
+  %3 = load float, float* %tmp2930, align 4
+  %4 = load float, float* %tmp3132, align 4
+  %5 = load float, float* %tmp3334, align 4
+  %6 = load float, float* %tmp3536, align 4
+  %7 = load float, float* %tmp3738, align 4
+  %8 = load float, float* %tmp3940, align 4
   %9 = insertelement <4 x float> undef, float %6, i32 0
   %10 = shufflevector <4 x float> %9, <4 x float> undef, <4 x i32> zeroinitializer
   %11 = insertelement <4 x float> %10, float %7, i32 1
diff --git a/llvm/test/CodeGen/ARM/2009-09-23-LiveVariablesBug.ll b/llvm/test/CodeGen/ARM/2009-09-23-LiveVariablesBug.ll
index 2ff479b2178..4bbd0470564 100644
--- a/llvm/test/CodeGen/ARM/2009-09-23-LiveVariablesBug.ll
+++ b/llvm/test/CodeGen/ARM/2009-09-23-LiveVariablesBug.ll
@@ -9,7 +9,7 @@ define arm_aapcs_vfpcc %struct.1* @hhh3(%struct.1* %this, <4 x float> %lenation.
 entry:
   %0 = call arm_aapcs_vfpcc  %struct.4* @sss1(%struct.4* undef, float 0.000000e+00) nounwind ; <%struct.4*> [#uses=0]
   %1 = call arm_aapcs_vfpcc  %struct.4* @qqq1(%struct.4* null, float 5.000000e-01) nounwind ; <%struct.4*> [#uses=0]
-  %val92 = load <4 x float>* null                 ; <<4 x float>> [#uses=1]
+  %val92 = load <4 x float>, <4 x float>* null                 ; <<4 x float>> [#uses=1]
   %2 = call arm_aapcs_vfpcc  %struct.4* @zzz2(%struct.4* undef, <4 x float> %val92) nounwind ; <%struct.4*> [#uses=0]
   ret %struct.1* %this
 }
diff --git a/llvm/test/CodeGen/ARM/2009-09-24-spill-align.ll b/llvm/test/CodeGen/ARM/2009-09-24-spill-align.ll
index 224bd019481..4502542809f 100644
--- a/llvm/test/CodeGen/ARM/2009-09-24-spill-align.ll
+++ b/llvm/test/CodeGen/ARM/2009-09-24-spill-align.ll
@@ -7,7 +7,7 @@ entry:
   %out_poly16_t = alloca i16                      ; <i16*> [#uses=1]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
 ; CHECK: vldr
-  %0 = load <4 x i16>* %arg0_poly16x4_t, align 8  ; <<4 x i16>> [#uses=1]
+  %0 = load <4 x i16>, <4 x i16>* %arg0_poly16x4_t, align 8  ; <<4 x i16>> [#uses=1]
   %1 = extractelement <4 x i16> %0, i32 1         ; <i16> [#uses=1]
   store i16 %1, i16* %out_poly16_t, align 2
   br label %return
diff --git a/llvm/test/CodeGen/ARM/2009-10-02-NEONSubregsBug.ll b/llvm/test/CodeGen/ARM/2009-10-02-NEONSubregsBug.ll
index 465368b0ba8..641036f684b 100644
--- a/llvm/test/CodeGen/ARM/2009-10-02-NEONSubregsBug.ll
+++ b/llvm/test/CodeGen/ARM/2009-10-02-NEONSubregsBug.ll
@@ -6,8 +6,8 @@ entry:
   br i1 undef, label %return, label %bb
 
 bb:                                               ; preds = %bb, %entry
-  %0 = load float* undef, align 4                 ; <float> [#uses=1]
-  %1 = load float* null, align 4                  ; <float> [#uses=1]
+  %0 = load float, float* undef, align 4                 ; <float> [#uses=1]
+  %1 = load float, float* null, align 4                  ; <float> [#uses=1]
   %2 = insertelement <4 x float> undef, float undef, i32 1 ; <<4 x float>> [#uses=1]
   %3 = insertelement <4 x float> %2, float %1, i32 2 ; <<4 x float>> [#uses=2]
   %4 = insertelement <4 x float> undef, float %0, i32 2 ; <<4 x float>> [#uses=1]
diff --git a/llvm/test/CodeGen/ARM/2009-11-02-NegativeLane.ll b/llvm/test/CodeGen/ARM/2009-11-02-NegativeLane.ll
index 2597b413ec7..154cd65e4ec 100644
--- a/llvm/test/CodeGen/ARM/2009-11-02-NegativeLane.ll
+++ b/llvm/test/CodeGen/ARM/2009-11-02-NegativeLane.ll
@@ -8,7 +8,7 @@ entry:
 
 bb:                                               ; preds = %bb, %entry
 ; CHECK: vld1.16 {d16[], d17[]}
-  %0 = load i16* undef, align 2
+  %0 = load i16, i16* undef, align 2
   %1 = insertelement <8 x i16> undef, i16 %0, i32 2
   %2 = insertelement <8 x i16> %1, i16 undef, i32 3
   %3 = mul <8 x i16> %2, %2
diff --git a/llvm/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll b/llvm/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll
index 38eb0ea2c89..9632c773010 100644
--- a/llvm/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll
+++ b/llvm/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll
@@ -6,7 +6,7 @@ target triple = "armv7-eabi"
 
 define arm_aapcs_vfpcc void @foo() {
 entry:
-  %0 = load float* null, align 4                  ; <float> [#uses=2]
+  %0 = load float, float* null, align 4                  ; <float> [#uses=2]
   %1 = fmul float %0, undef                       ; <float> [#uses=2]
   %2 = fmul float 0.000000e+00, %1                ; <float> [#uses=2]
   %3 = fmul float %0, %1                          ; <float> [#uses=1]
@@ -18,7 +18,7 @@ entry:
   %7 = fsub float %2, undef                       ; <float> [#uses=1]
   %8 = fsub float 0.000000e+00, undef             ; <float> [#uses=3]
   %9 = fadd float %2, undef                       ; <float> [#uses=3]
-  %10 = load float* undef, align 8                ; <float> [#uses=3]
+  %10 = load float, float* undef, align 8                ; <float> [#uses=3]
   %11 = fmul float %8, %10                        ; <float> [#uses=1]
   %12 = fadd float undef, %11                     ; <float> [#uses=2]
   %13 = fmul float undef, undef                   ; <float> [#uses=1]
@@ -30,10 +30,10 @@ entry:
   %19 = fadd float %18, 0.000000e+00              ; <float> [#uses=1]
   %20 = fmul float undef, %10                     ; <float> [#uses=1]
   %21 = fadd float %19, %20                       ; <float> [#uses=1]
-  %22 = load float* undef, align 8                ; <float> [#uses=1]
+  %22 = load float, float* undef, align 8                ; <float> [#uses=1]
   %23 = fmul float %5, %22                        ; <float> [#uses=1]
   %24 = fadd float %23, undef                     ; <float> [#uses=1]
-  %25 = load float* undef, align 8                ; <float> [#uses=2]
+  %25 = load float, float* undef, align 8                ; <float> [#uses=2]
   %26 = fmul float %8, %25                        ; <float> [#uses=1]
   %27 = fadd float %24, %26                       ; <float> [#uses=1]
   %28 = fmul float %9, %25                        ; <float> [#uses=1]
diff --git a/llvm/test/CodeGen/ARM/2009-11-13-ScavengerAssert.ll b/llvm/test/CodeGen/ARM/2009-11-13-ScavengerAssert.ll
index 19824b89447..07e910b3e07 100644
--- a/llvm/test/CodeGen/ARM/2009-11-13-ScavengerAssert.ll
+++ b/llvm/test/CodeGen/ARM/2009-11-13-ScavengerAssert.ll
@@ -14,10 +14,10 @@ entry:
 
 bb:                                               ; preds = %entry
   %0 = getelementptr inbounds %bar, %bar* null, i32 0, i32 0, i32 0, i32 2 ; <float*> [#uses=2]
-  %1 = load float* undef, align 4                 ; <float> [#uses=1]
+  %1 = load float, float* undef, align 4                 ; <float> [#uses=1]
   %2 = fsub float 0.000000e+00, undef             ; <float> [#uses=2]
   %3 = fmul float 0.000000e+00, undef             ; <float> [#uses=1]
-  %4 = load float* %0, align 4                    ; <float> [#uses=3]
+  %4 = load float, float* %0, align 4                    ; <float> [#uses=3]
   %5 = fmul float %4, %2                          ; <float> [#uses=1]
   %6 = fsub float %3, %5                          ; <float> [#uses=1]
   %7 = fmul float %4, undef                       ; <float> [#uses=1]
diff --git a/llvm/test/CodeGen/ARM/2009-11-13-ScavengerAssert2.ll b/llvm/test/CodeGen/ARM/2009-11-13-ScavengerAssert2.ll
index ebe261bd9ba..9eddcf71cb3 100644
--- a/llvm/test/CodeGen/ARM/2009-11-13-ScavengerAssert2.ll
+++ b/llvm/test/CodeGen/ARM/2009-11-13-ScavengerAssert2.ll
@@ -22,19 +22,19 @@ bb3.i:                                            ; preds = %bb2.i, %bb
   %0 = getelementptr inbounds %quuz, %quuz* %a, i32 0, i32 1, i32 0, i32 0 ; <float*> [#uses=0]
   %1 = fsub float 0.000000e+00, undef             ; <float> [#uses=1]
   %2 = getelementptr inbounds %quuz, %quuz* %b, i32 0, i32 1, i32 0, i32 1 ; <float*> [#uses=2]
-  %3 = load float* %2, align 4                    ; <float> [#uses=1]
+  %3 = load float, float* %2, align 4                    ; <float> [#uses=1]
   %4 = getelementptr inbounds %quuz, %quuz* %a, i32 0, i32 1, i32 0, i32 1 ; <float*> [#uses=1]
   %5 = fsub float %3, undef                       ; <float> [#uses=2]
   %6 = getelementptr inbounds %quuz, %quuz* %b, i32 0, i32 1, i32 0, i32 2 ; <float*> [#uses=2]
-  %7 = load float* %6, align 4                    ; <float> [#uses=1]
+  %7 = load float, float* %6, align 4                    ; <float> [#uses=1]
   %8 = fsub float %7, undef                       ; <float> [#uses=1]
   %9 = getelementptr inbounds %quuz, %quuz* %c, i32 0, i32 1, i32 0, i32 0 ; <float*> [#uses=2]
-  %10 = load float* %9, align 4                   ; <float> [#uses=1]
+  %10 = load float, float* %9, align 4                   ; <float> [#uses=1]
   %11 = fsub float %10, undef                     ; <float> [#uses=2]
   %12 = getelementptr inbounds %quuz, %quuz* %c, i32 0, i32 1, i32 0, i32 1 ; <float*> [#uses=2]
-  %13 = load float* %12, align 4                  ; <float> [#uses=1]
+  %13 = load float, float* %12, align 4                  ; <float> [#uses=1]
   %14 = fsub float %13, undef                     ; <float> [#uses=1]
-  %15 = load float* undef, align 4                ; <float> [#uses=1]
+  %15 = load float, float* undef, align 4                ; <float> [#uses=1]
   %16 = fsub float %15, undef                     ; <float> [#uses=1]
   %17 = fmul float %5, %16                        ; <float> [#uses=1]
   %18 = fsub float %17, 0.000000e+00              ; <float> [#uses=5]
@@ -52,11 +52,11 @@ bb3.i:                                            ; preds = %bb2.i, %bb
   %27 = fadd float %26, undef                     ; <float> [#uses=1]
   %28 = fadd float %27, undef                     ; <float> [#uses=1]
   %29 = call arm_aapcs_vfpcc  float @sqrtf(float %28) readnone ; <float> [#uses=1]
-  %30 = load float* null, align 4                 ; <float> [#uses=2]
-  %31 = load float* %4, align 4                   ; <float> [#uses=2]
-  %32 = load float* %2, align 4                   ; <float> [#uses=2]
-  %33 = load float* null, align 4                 ; <float> [#uses=3]
-  %34 = load float* %6, align 4                   ; <float> [#uses=2]
+  %30 = load float, float* null, align 4                 ; <float> [#uses=2]
+  %31 = load float, float* %4, align 4                   ; <float> [#uses=2]
+  %32 = load float, float* %2, align 4                   ; <float> [#uses=2]
+  %33 = load float, float* null, align 4                 ; <float> [#uses=3]
+  %34 = load float, float* %6, align 4                   ; <float> [#uses=2]
   %35 = fsub float %33, %34                       ; <float> [#uses=2]
   %36 = fmul float %20, %35                       ; <float> [#uses=1]
   %37 = fsub float %36, undef                     ; <float> [#uses=1]
@@ -71,12 +71,12 @@ bb3.i:                                            ; preds = %bb2.i, %bb
   %46 = fadd float %44, %45                       ; <float> [#uses=1]
   %47 = fmul float %33, %43                       ; <float> [#uses=1]
   %48 = fadd float %46, %47                       ; <float> [#uses=2]
-  %49 = load float* %9, align 4                   ; <float> [#uses=2]
+  %49 = load float, float* %9, align 4                   ; <float> [#uses=2]
   %50 = fsub float %30, %49                       ; <float> [#uses=1]
-  %51 = load float* %12, align 4                  ; <float> [#uses=3]
+  %51 = load float, float* %12, align 4                  ; <float> [#uses=3]
   %52 = fsub float %32, %51                       ; <float> [#uses=2]
-  %53 = load float* undef, align 4                ; <float> [#uses=2]
-  %54 = load float* %24, align 4                  ; <float> [#uses=2]
+  %53 = load float, float* undef, align 4                ; <float> [#uses=2]
+  %54 = load float, float* %24, align 4                  ; <float> [#uses=2]
   %55 = fmul float %54, undef                     ; <float> [#uses=1]
   %56 = fmul float undef, %52                     ; <float> [#uses=1]
   %57 = fsub float %55, %56                       ; <float> [#uses=1]
@@ -93,7 +93,7 @@ bb3.i:                                            ; preds = %bb2.i, %bb
   %68 = fsub float %51, %31                       ; <float> [#uses=1]
   %69 = fsub float %53, %33                       ; <float> [#uses=1]
   %70 = fmul float undef, %67                     ; <float> [#uses=1]
-  %71 = load float* undef, align 4                ; <float> [#uses=2]
+  %71 = load float, float* undef, align 4                ; <float> [#uses=2]
   %72 = fmul float %71, %69                       ; <float> [#uses=1]
   %73 = fsub float %70, %72                       ; <float> [#uses=1]
   %74 = fmul float %71, %68                       ; <float> [#uses=1]
diff --git a/llvm/test/CodeGen/ARM/2009-11-13-VRRewriterCrash.ll b/llvm/test/CodeGen/ARM/2009-11-13-VRRewriterCrash.ll
index 0aff7183464..8a14804dcf8 100644
--- a/llvm/test/CodeGen/ARM/2009-11-13-VRRewriterCrash.ll
+++ b/llvm/test/CodeGen/ARM/2009-11-13-VRRewriterCrash.ll
@@ -11,7 +11,7 @@
 
 define arm_aapcs_vfpcc %bar* @aaa(%foo* nocapture %this, %quuz* %a, %quuz* %b, %quuz* %c, i8 zeroext %forced) {
 entry:
-  %0 = load %bar** undef, align 4                 ; <%bar*> [#uses=2]
+  %0 = load %bar*, %bar** undef, align 4                 ; <%bar*> [#uses=2]
   br i1 false, label %bb85, label %bb
 
 bb:                                               ; preds = %entry
@@ -43,10 +43,10 @@ bb3.i:                                            ; preds = %bb2.i, %bb
   %18 = fadd float %17, undef                     ; <float> [#uses=1]
   %19 = call arm_aapcs_vfpcc  float @sqrtf(float %18) readnone ; <float> [#uses=2]
   %20 = fcmp ogt float %19, 0x3F1A36E2E0000000    ; <i1> [#uses=1]
-  %21 = load float* %1, align 4                   ; <float> [#uses=2]
-  %22 = load float* %3, align 4                   ; <float> [#uses=2]
-  %23 = load float* undef, align 4                ; <float> [#uses=2]
-  %24 = load float* %4, align 4                   ; <float> [#uses=2]
+  %21 = load float, float* %1, align 4                   ; <float> [#uses=2]
+  %22 = load float, float* %3, align 4                   ; <float> [#uses=2]
+  %23 = load float, float* undef, align 4                ; <float> [#uses=2]
+  %24 = load float, float* %4, align 4                   ; <float> [#uses=2]
   %25 = fsub float %23, %24                       ; <float> [#uses=2]
   %26 = fmul float 0.000000e+00, %25              ; <float> [#uses=1]
   %27 = fsub float %26, undef                     ; <float> [#uses=1]
@@ -59,11 +59,11 @@ bb3.i:                                            ; preds = %bb2.i, %bb
   %34 = fadd float %32, %33                       ; <float> [#uses=1]
   %35 = fmul float %23, %31                       ; <float> [#uses=1]
   %36 = fadd float %34, %35                       ; <float> [#uses=1]
-  %37 = load float* %6, align 4                   ; <float> [#uses=2]
-  %38 = load float* %7, align 4                   ; <float> [#uses=2]
+  %37 = load float, float* %6, align 4                   ; <float> [#uses=2]
+  %38 = load float, float* %7, align 4                   ; <float> [#uses=2]
   %39 = fsub float %22, %38                       ; <float> [#uses=2]
-  %40 = load float* undef, align 4                ; <float> [#uses=1]
-  %41 = load float* null, align 4                 ; <float> [#uses=2]
+  %40 = load float, float* undef, align 4                ; <float> [#uses=1]
+  %41 = load float, float* null, align 4                 ; <float> [#uses=2]
   %42 = fmul float %41, undef                     ; <float> [#uses=1]
   %43 = fmul float undef, %39                     ; <float> [#uses=1]
   %44 = fsub float %42, %43                       ; <float> [#uses=1]
@@ -80,7 +80,7 @@ bb3.i:                                            ; preds = %bb2.i, %bb
   %55 = fmul float undef, undef                   ; <float> [#uses=1]
   %56 = fsub float %54, %55                       ; <float> [#uses=1]
   %57 = fmul float undef, %53                     ; <float> [#uses=1]
-  %58 = load float* undef, align 4                ; <float> [#uses=2]
+  %58 = load float, float* undef, align 4                ; <float> [#uses=2]
   %59 = fmul float %58, undef                     ; <float> [#uses=1]
   %60 = fsub float %57, %59                       ; <float> [#uses=1]
   %61 = fmul float %58, undef                     ; <float> [#uses=1]
@@ -100,7 +100,7 @@ bb3.i:                                            ; preds = %bb2.i, %bb
   br i1 %72, label %bb4.i97, label %ccc.exit98
 
 bb4.i97:                                          ; preds = %bb3.i
-  %73 = load %bar** undef, align 4                ; <%bar*> [#uses=0]
+  %73 = load %bar*, %bar** undef, align 4                ; <%bar*> [#uses=0]
   br label %ccc.exit98
 
 ccc.exit98:                                       ; preds = %bb4.i97, %bb3.i
diff --git a/llvm/test/CodeGen/ARM/2010-03-04-eabi-fp-spill.ll b/llvm/test/CodeGen/ARM/2010-03-04-eabi-fp-spill.ll
index 5de609bdcb5..d21b488bb3a 100644
--- a/llvm/test/CodeGen/ARM/2010-03-04-eabi-fp-spill.ll
+++ b/llvm/test/CodeGen/ARM/2010-03-04-eabi-fp-spill.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=arm-unknown-linux-gnueabi
 
 define void @"java.lang.String::getChars"([84 x i8]* %method, i32 %base_pc, [788 x i8]* %thread) {
-  %1 = load i32* undef                            ; <i32> [#uses=1]
+  %1 = load i32, i32* undef                            ; <i32> [#uses=1]
   %2 = sub i32 %1, 48                             ; <i32> [#uses=1]
   br i1 undef, label %stack_overflow, label %no_overflow
 
@@ -10,13 +10,13 @@ stack_overflow:                                   ; preds = %0
 
 no_overflow:                                      ; preds = %0
   %frame = inttoptr i32 %2 to [17 x i32]*         ; <[17 x i32]*> [#uses=4]
-  %3 = load i32* undef                            ; <i32> [#uses=1]
-  %4 = load i32* null                             ; <i32> [#uses=1]
+  %3 = load i32, i32* undef                            ; <i32> [#uses=1]
+  %4 = load i32, i32* null                             ; <i32> [#uses=1]
   %5 = getelementptr inbounds [17 x i32], [17 x i32]* %frame, i32 0, i32 13 ; <i32*> [#uses=1]
   %6 = bitcast i32* %5 to [8 x i8]**              ; <[8 x i8]**> [#uses=1]
-  %7 = load [8 x i8]** %6                         ; <[8 x i8]*> [#uses=1]
+  %7 = load [8 x i8]*, [8 x i8]** %6                         ; <[8 x i8]*> [#uses=1]
   %8 = getelementptr inbounds [17 x i32], [17 x i32]* %frame, i32 0, i32 12 ; <i32*> [#uses=1]
-  %9 = load i32* %8                               ; <i32> [#uses=1]
+  %9 = load i32, i32* %8                               ; <i32> [#uses=1]
   br i1 undef, label %bci_13, label %bci_4
 
 bci_13:                                           ; preds = %no_overflow
@@ -27,13 +27,13 @@ bci_30:                                           ; preds = %bci_13
 
 bci_46:                                           ; preds = %bci_30
   %10 = sub i32 %4, %3                            ; <i32> [#uses=1]
-  %11 = load [8 x i8]** null                      ; <[8 x i8]*> [#uses=1]
+  %11 = load [8 x i8]*, [8 x i8]** null                      ; <[8 x i8]*> [#uses=1]
   %callee = bitcast [8 x i8]* %11 to [84 x i8]*   ; <[84 x i8]*> [#uses=1]
   %12 = bitcast i8* undef to i32*                 ; <i32*> [#uses=1]
-  %base_pc7 = load i32* %12                       ; <i32> [#uses=2]
+  %base_pc7 = load i32, i32* %12                       ; <i32> [#uses=2]
   %13 = add i32 %base_pc7, 0                      ; <i32> [#uses=1]
   %14 = inttoptr i32 %13 to void ([84 x i8]*, i32, [788 x i8]*)** ; <void ([84 x i8]*, i32, [788 x i8]*)**> [#uses=1]
-  %entry_point = load void ([84 x i8]*, i32, [788 x i8]*)** %14 ; <void ([84 x i8]*, i32, [788 x i8]*)*> [#uses=1]
+  %entry_point = load void ([84 x i8]*, i32, [788 x i8]*)*, void ([84 x i8]*, i32, [788 x i8]*)** %14 ; <void ([84 x i8]*, i32, [788 x i8]*)*> [#uses=1]
   %15 = getelementptr inbounds [17 x i32], [17 x i32]* %frame, i32 0, i32 1 ; <i32*> [#uses=1]
   %16 = ptrtoint i32* %15 to i32                  ; <i32> [#uses=1]
   %stack_pointer_addr9 = bitcast i8* undef to i32* ; <i32*> [#uses=1]
diff --git a/llvm/test/CodeGen/ARM/2010-03-04-stm-undef-addr.ll b/llvm/test/CodeGen/ARM/2010-03-04-stm-undef-addr.ll
index 1bc58f48704..a1923ec2c3e 100644
--- a/llvm/test/CodeGen/ARM/2010-03-04-stm-undef-addr.ll
+++ b/llvm/test/CodeGen/ARM/2010-03-04-stm-undef-addr.ll
@@ -9,10 +9,10 @@ stack_overflow:                                   ; preds = %0
 
 no_overflow:                                      ; preds = %0
   %frame = inttoptr i32 %1 to [17 x i32]*         ; <[17 x i32]*> [#uses=4]
-  %2 = load i32* null                             ; <i32> [#uses=2]
+  %2 = load i32, i32* null                             ; <i32> [#uses=2]
   %3 = getelementptr inbounds [17 x i32], [17 x i32]* %frame, i32 0, i32 14 ; <i32*> [#uses=1]
-  %4 = load i32* %3                               ; <i32> [#uses=2]
-  %5 = load [8 x i8]** undef                      ; <[8 x i8]*> [#uses=2]
+  %4 = load i32, i32* %3                               ; <i32> [#uses=2]
+  %5 = load [8 x i8]*, [8 x i8]** undef                      ; <[8 x i8]*> [#uses=2]
   br i1 undef, label %bci_13, label %bci_4
 
 bci_13:                                           ; preds = %no_overflow
diff --git a/llvm/test/CodeGen/ARM/2010-05-17-FastAllocCrash.ll b/llvm/test/CodeGen/ARM/2010-05-17-FastAllocCrash.ll
index 52244aae5f5..d6febe6750e 100644
--- a/llvm/test/CodeGen/ARM/2010-05-17-FastAllocCrash.ll
+++ b/llvm/test/CodeGen/ARM/2010-05-17-FastAllocCrash.ll
@@ -82,9 +82,9 @@ cond_true1369.preheader:                          ; preds = %cond_true1254
   ret void
 
 bb1567:                                           ; preds = %cond_true1254
-  %tmp1591 = load i64* getelementptr inbounds (%struct.CHESS_POSITION* @search, i32 0, i32 4) ; <i64> [#uses=1]
+  %tmp1591 = load i64, i64* getelementptr inbounds (%struct.CHESS_POSITION* @search, i32 0, i32 4) ; <i64> [#uses=1]
   %tmp1572 = tail call fastcc i32 @FirstOne()     ; <i32> [#uses=1]
-  %tmp1594 = load i32* undef                      ; <i32> [#uses=1]
+  %tmp1594 = load i32, i32* undef                      ; <i32> [#uses=1]
   %tmp1594.upgrd.5 = trunc i32 %tmp1594 to i8     ; <i8> [#uses=1]
   %shift.upgrd.6 = zext i8 %tmp1594.upgrd.5 to i64 ; <i64> [#uses=1]
   %tmp1595 = lshr i64 %tmp1591, %shift.upgrd.6    ; <i64> [#uses=1]
@@ -92,7 +92,7 @@ bb1567:                                           ; preds = %cond_true1254
   %tmp1596 = and i32 %tmp1595.upgrd.7, 255        ; <i32> [#uses=1]
   %gep.upgrd.8 = zext i32 %tmp1596 to i64         ; <i64> [#uses=1]
   %tmp1598 = getelementptr [64 x [256 x i32]], [64 x [256 x i32]]* @bishop_mobility_rr45, i32 0, i32 %tmp1572, i64 %gep.upgrd.8 ; <i32*> [#uses=1]
-  %tmp1599 = load i32* %tmp1598                   ; <i32> [#uses=1]
+  %tmp1599 = load i32, i32* %tmp1598                   ; <i32> [#uses=1]
   %tmp1602 = sub i32 0, %tmp1599                  ; <i32> [#uses=1]
   br i1 undef, label %cond_next1637, label %cond_true1607
 
diff --git a/llvm/test/CodeGen/ARM/2010-05-18-LocalAllocCrash.ll b/llvm/test/CodeGen/ARM/2010-05-18-LocalAllocCrash.ll
index 946164321a2..16c49537720 100644
--- a/llvm/test/CodeGen/ARM/2010-05-18-LocalAllocCrash.ll
+++ b/llvm/test/CodeGen/ARM/2010-05-18-LocalAllocCrash.ll
@@ -16,7 +16,7 @@ entry:
   %vla10 = alloca i8, i32 undef, align 1          ; <i8*> [#uses=1]
   %vla14 = alloca i8, i32 undef, align 1          ; <i8*> [#uses=1]
   %vla18 = alloca i8, i32 undef, align 1          ; <i8*> [#uses=1]
-  %tmp21 = load i32* undef                        ; <i32> [#uses=1]
+  %tmp21 = load i32, i32* undef                        ; <i32> [#uses=1]
   %0 = mul i32 1, %tmp21                          ; <i32> [#uses=1]
   %vla22 = alloca i8, i32 %0, align 1             ; <i8*> [#uses=1]
   call  void (...)* @zz(i8* getelementptr inbounds ([1 x i8]* @.str, i32 0, i32 0), i32 2, i32 1)
diff --git a/llvm/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll b/llvm/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll
index caa00c1fad3..24469cc3717 100644
--- a/llvm/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll
+++ b/llvm/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll
@@ -21,7 +21,7 @@ entry:
   br i1 %tst, label %bb.nph96, label %bb3
 
 bb3:                                              ; preds = %entry
-  %1 = load i64* %0, align 4                      ; <i64> [#uses=0]
+  %1 = load i64, i64* %0, align 4                      ; <i64> [#uses=0]
   ret i8 42
 
 bb.nph96:                                         ; preds = %entry
diff --git a/llvm/test/CodeGen/ARM/2010-05-19-Shuffles.ll b/llvm/test/CodeGen/ARM/2010-05-19-Shuffles.ll
index 587c0afcb71..94d0f4abfb7 100644
--- a/llvm/test/CodeGen/ARM/2010-05-19-Shuffles.ll
+++ b/llvm/test/CodeGen/ARM/2010-05-19-Shuffles.ll
@@ -14,7 +14,7 @@ define <8 x i8> @f2(<8 x i8> %x) nounwind {
 }
 
 define void @f3(<4 x i64>* %xp) nounwind {
-  %x = load <4 x i64>* %xp
+  %x = load <4 x i64>, <4 x i64>* %xp
   %y = shufflevector <4 x i64> %x, <4 x i64> undef, <4 x i32> <i32 0, i32 3, i32 2, i32 1>
   store <4 x i64> %y, <4 x i64>* %xp
   ret void
diff --git a/llvm/test/CodeGen/ARM/2010-05-21-BuildVector.ll b/llvm/test/CodeGen/ARM/2010-05-21-BuildVector.ll
index 6ec49be179d..6a6ccf3d0a0 100644
--- a/llvm/test/CodeGen/ARM/2010-05-21-BuildVector.ll
+++ b/llvm/test/CodeGen/ARM/2010-05-21-BuildVector.ll
@@ -3,7 +3,7 @@
 
 define void @test(float* %fltp, i32 %packedValue, float* %table) nounwind {
 entry:
-  %0 = load float* %fltp
+  %0 = load float, float* %fltp
   %1 = insertelement <4 x float> undef, float %0, i32 0
   %2 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> zeroinitializer
   %3 = shl i32 %packedValue, 16
@@ -11,28 +11,28 @@ entry:
   %.sum = add i32 %4, 4
   %5 = getelementptr inbounds float, float* %table, i32 %.sum
 ;CHECK: vldr s
-  %6 = load float* %5, align 4
+  %6 = load float, float* %5, align 4
   %tmp11 = insertelement <4 x float> undef, float %6, i32 0
   %7 = shl i32 %packedValue, 18
   %8 = ashr i32 %7, 30
   %.sum12 = add i32 %8, 4
   %9 = getelementptr inbounds float, float* %table, i32 %.sum12
 ;CHECK: vldr s
-  %10 = load float* %9, align 4
+  %10 = load float, float* %9, align 4
   %tmp9 = insertelement <4 x float> %tmp11, float %10, i32 1
   %11 = shl i32 %packedValue, 20
   %12 = ashr i32 %11, 30
   %.sum13 = add i32 %12, 4
   %13 = getelementptr inbounds float, float* %table, i32 %.sum13
 ;CHECK: vldr s
-  %14 = load float* %13, align 4
+  %14 = load float, float* %13, align 4
   %tmp7 = insertelement <4 x float> %tmp9, float %14, i32 2
   %15 = shl i32 %packedValue, 22
   %16 = ashr i32 %15, 30
   %.sum14 = add i32 %16, 4
   %17 = getelementptr inbounds float, float* %table, i32 %.sum14
 ;CHECK: vldr s
-  %18 = load float* %17, align 4
+  %18 = load float, float* %17, align 4
   %tmp5 = insertelement <4 x float> %tmp7, float %18, i32 3
   %19 = fmul <4 x float> %tmp5, %2
   %20 = bitcast float* %fltp to i8*
diff --git a/llvm/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll b/llvm/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll
index f7ceb6e7e48..f86c3ba9ef6 100644
--- a/llvm/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll
+++ b/llvm/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll
@@ -6,7 +6,7 @@
 define void @foo(%struct.__int8x8x2_t* nocapture %a, i8* %b) nounwind {
 entry:
  %0 = bitcast %struct.__int8x8x2_t* %a to i128*  ; <i128*> [#uses=1]
- %srcval = load i128* %0, align 8                ; <i128> [#uses=2]
+ %srcval = load i128, i128* %0, align 8                ; <i128> [#uses=2]
  %tmp6 = trunc i128 %srcval to i64               ; <i64> [#uses=1]
  %tmp8 = lshr i128 %srcval, 64                   ; <i128> [#uses=1]
  %tmp9 = trunc i128 %tmp8 to i64                 ; <i64> [#uses=1]
diff --git a/llvm/test/CodeGen/ARM/2010-06-21-LdStMultipleBug.ll b/llvm/test/CodeGen/ARM/2010-06-21-LdStMultipleBug.ll
index ac442aad65f..a1ab27e6ea3 100644
--- a/llvm/test/CodeGen/ARM/2010-06-21-LdStMultipleBug.ll
+++ b/llvm/test/CodeGen/ARM/2010-06-21-LdStMultipleBug.ll
@@ -18,10 +18,10 @@ entry:
   br i1 %1, label %bb, label %return
 
 bb:                                               ; preds = %bb445, %entry
-  %2 = load %struct.cellbox** undef, align 4      ; <%struct.cellbox*> [#uses=2]
+  %2 = load %struct.cellbox*, %struct.cellbox** undef, align 4      ; <%struct.cellbox*> [#uses=2]
   %3 = getelementptr inbounds %struct.cellbox, %struct.cellbox* %2, i32 0, i32 3 ; <i32*> [#uses=1]
   store i32 undef, i32* %3, align 4
-  %4 = load i32* undef, align 4                   ; <i32> [#uses=3]
+  %4 = load i32, i32* undef, align 4                   ; <i32> [#uses=3]
   %5 = icmp eq i32 undef, 1                       ; <i1> [#uses=1]
   br i1 %5, label %bb10, label %bb445
 
@@ -29,12 +29,12 @@ bb10:                                             ; preds = %bb
   br i1 undef, label %bb11, label %bb445
 
 bb11:                                             ; preds = %bb10
-  %6 = load %struct.tilebox** undef, align 4      ; <%struct.tilebox*> [#uses=3]
-  %7 = load %struct.termbox** null, align 4       ; <%struct.termbox*> [#uses=1]
+  %6 = load %struct.tilebox*, %struct.tilebox** undef, align 4      ; <%struct.tilebox*> [#uses=3]
+  %7 = load %struct.termbox*, %struct.termbox** null, align 4       ; <%struct.termbox*> [#uses=1]
   %8 = getelementptr inbounds %struct.tilebox, %struct.tilebox* %6, i32 0, i32 13 ; <i32*> [#uses=1]
-  %9 = load i32* %8, align 4                      ; <i32> [#uses=3]
+  %9 = load i32, i32* %8, align 4                      ; <i32> [#uses=3]
   %10 = getelementptr inbounds %struct.tilebox, %struct.tilebox* %6, i32 0, i32 15 ; <i32*> [#uses=1]
-  %11 = load i32* %10, align 4                    ; <i32> [#uses=1]
+  %11 = load i32, i32* %10, align 4                    ; <i32> [#uses=1]
   br i1 false, label %bb12, label %bb13
 
 bb12:                                             ; preds = %bb11
@@ -77,7 +77,7 @@ bb21:                                             ; preds = %bb13
 
 bb36:                                             ; preds = %bb43.loopexit, %bb36
   %termptr.0478 = phi %struct.termbox* [ %42, %bb36 ], [ %7, %bb43.loopexit ] ; <%struct.termbox*> [#uses=1]
-  %30 = load i32* undef, align 4                  ; <i32> [#uses=1]
+  %30 = load i32, i32* undef, align 4                  ; <i32> [#uses=1]
   %31 = sub nsw i32 %30, %9                       ; <i32> [#uses=1]
   %32 = sitofp i32 %31 to double                  ; <double> [#uses=1]
   %33 = fdiv double %32, 0.000000e+00             ; <double> [#uses=1]
@@ -93,7 +93,7 @@ bb36:                                             ; preds = %bb43.loopexit, %bb3
   %40 = add i32 %iftmp.47.0, 0                    ; <i32> [#uses=1]
   store i32 %40, i32* undef, align 4
   %41 = getelementptr inbounds %struct.termbox, %struct.termbox* %termptr.0478, i32 0, i32 0 ; <%struct.termbox**> [#uses=1]
-  %42 = load %struct.termbox** %41, align 4       ; <%struct.termbox*> [#uses=2]
+  %42 = load %struct.termbox*, %struct.termbox** %41, align 4       ; <%struct.termbox*> [#uses=2]
   %43 = icmp eq %struct.termbox* %42, null        ; <i1> [#uses=1]
   br i1 %43, label %bb52.loopexit, label %bb36
 
@@ -128,7 +128,7 @@ bb248:                                            ; preds = %bb322, %bb.nph485
 
 bb249:                                            ; preds = %bb248
   %46 = getelementptr inbounds %struct.cellbox, %struct.cellbox* %2, i32 0, i32 21, i32 undef ; <%struct.tilebox**> [#uses=1]
-  %47 = load %struct.tilebox** %46, align 4       ; <%struct.tilebox*> [#uses=1]
+  %47 = load %struct.tilebox*, %struct.tilebox** %46, align 4       ; <%struct.tilebox*> [#uses=1]
   %48 = getelementptr inbounds %struct.tilebox, %struct.tilebox* %47, i32 0, i32 11 ; <i32*> [#uses=1]
   store i32 undef, i32* %48, align 4
   unreachable
diff --git a/llvm/test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll b/llvm/test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll
index c33b16e5dd2..4ca8ef8dc2a 100644
--- a/llvm/test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll
+++ b/llvm/test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll
@@ -35,7 +35,7 @@ entry:
   %.loc = alloca i32                              ; <i32*> [#uses=2]
   %tmp.i = getelementptr inbounds %"class.llvm::StringInit", %"class.llvm::StringInit"* %this, i32 0, i32 0, i32 4 ; <i8*> [#uses=1]
   %0 = bitcast i8* %tmp.i to %"struct.llvm::Init"** ; <%"struct.llvm::Init"**> [#uses=1]
-  %tmp2.i = load %"struct.llvm::Init"** %0        ; <%"struct.llvm::Init"*> [#uses=2]
+  %tmp2.i = load %"struct.llvm::Init"*, %"struct.llvm::Init"** %0        ; <%"struct.llvm::Init"*> [#uses=2]
   %1 = icmp eq %"struct.llvm::Init"* %tmp2.i, null ; <i1> [#uses=1]
   br i1 %1, label %entry.return_crit_edge, label %tmpbb
 
@@ -53,16 +53,16 @@ tmpbb:                                            ; preds = %entry
 
 if.then:                                          ; preds = %tmpbb
   %tmp2.i.i.i.i = getelementptr inbounds %"class.llvm::StringInit", %"class.llvm::StringInit"* %this, i32 0, i32 1, i32 0, i32 0 ; <i8**> [#uses=1]
-  %tmp3.i.i.i.i = load i8** %tmp2.i.i.i.i         ; <i8*> [#uses=2]
+  %tmp3.i.i.i.i = load i8*, i8** %tmp2.i.i.i.i         ; <i8*> [#uses=2]
   %arrayidx.i.i.i.i = getelementptr inbounds i8, i8* %tmp3.i.i.i.i, i32 -12 ; <i8*> [#uses=1]
   %tmp.i.i.i = bitcast i8* %arrayidx.i.i.i.i to i32* ; <i32*> [#uses=1]
-  %tmp2.i.i.i = load i32* %tmp.i.i.i              ; <i32> [#uses=1]
+  %tmp2.i.i.i = load i32, i32* %tmp.i.i.i              ; <i32> [#uses=1]
   %tmp.i5 = getelementptr inbounds %"class.llvm::Record", %"class.llvm::Record"* %R, i32 0, i32 4 ; <%"class.std::vector"*> [#uses=1]
   %tmp2.i.i = getelementptr inbounds %"class.llvm::Record", %"class.llvm::Record"* %R, i32 0, i32 4, i32 0, i32 4 ; <i8*> [#uses=1]
   %4 = bitcast i8* %tmp2.i.i to %"class.llvm::RecordVal"** ; <%"class.llvm::RecordVal"**> [#uses=1]
-  %tmp3.i.i6 = load %"class.llvm::RecordVal"** %4 ; <%"class.llvm::RecordVal"*> [#uses=1]
+  %tmp3.i.i6 = load %"class.llvm::RecordVal"*, %"class.llvm::RecordVal"** %4 ; <%"class.llvm::RecordVal"*> [#uses=1]
   %tmp5.i.i = bitcast %"class.std::vector"* %tmp.i5 to %"class.llvm::RecordVal"** ; <%"class.llvm::RecordVal"**> [#uses=1]
-  %tmp6.i.i = load %"class.llvm::RecordVal"** %tmp5.i.i ; <%"class.llvm::RecordVal"*> [#uses=5]
+  %tmp6.i.i = load %"class.llvm::RecordVal"*, %"class.llvm::RecordVal"** %tmp5.i.i ; <%"class.llvm::RecordVal"*> [#uses=5]
   %sub.ptr.lhs.cast.i.i = ptrtoint %"class.llvm::RecordVal"* %tmp3.i.i6 to i32 ; <i32> [#uses=1]
   %sub.ptr.rhs.cast.i.i = ptrtoint %"class.llvm::RecordVal"* %tmp6.i.i to i32 ; <i32> [#uses=1]
   %sub.ptr.sub.i.i = sub i32 %sub.ptr.lhs.cast.i.i, %sub.ptr.rhs.cast.i.i ; <i32> [#uses=1]
@@ -71,7 +71,7 @@ if.then:                                          ; preds = %tmpbb
 
 codeRepl:                                         ; preds = %if.then
   %targetBlock = call i1 @_ZNK4llvm7VarInit12getFieldInitERNS_6RecordEPKNS_9RecordValERKSs_for.cond.i(i32 %sub.ptr.div.i.i, %"class.llvm::RecordVal"* %tmp6.i.i, i32 %tmp2.i.i.i, i8* %tmp3.i.i.i.i, i32* %.loc) ; <i1> [#uses=1]
-  %.reload = load i32* %.loc                      ; <i32> [#uses=3]
+  %.reload = load i32, i32* %.loc                      ; <i32> [#uses=3]
   br i1 %targetBlock, label %for.cond.i.return_crit_edge, label %_ZN4llvm6Record8getValueENS_9StringRefE.exit
 
 for.cond.i.return_crit_edge:                      ; preds = %codeRepl
@@ -101,7 +101,7 @@ land.lhs.true.return_crit_edge:                   ; preds = %land.lhs.true
 
 lor.lhs.false:                                    ; preds = %land.lhs.true
   %tmp.i3 = getelementptr inbounds %"class.llvm::RecordVal", %"class.llvm::RecordVal"* %tmp6.i.i, i32 %.reload, i32 3 ; <%"struct.llvm::Init"**> [#uses=1]
-  %tmp2.i4 = load %"struct.llvm::Init"** %tmp.i3  ; <%"struct.llvm::Init"*> [#uses=2]
+  %tmp2.i4 = load %"struct.llvm::Init"*, %"struct.llvm::Init"** %tmp.i3  ; <%"struct.llvm::Init"*> [#uses=2]
   %5 = icmp eq %"struct.llvm::Init"* %tmp2.i4, null ; <i1> [#uses=1]
   br i1 %5, label %lor.lhs.false.if.end_crit_edge, label %tmpbb1
 
@@ -122,7 +122,7 @@ tmpbb1:                                           ; preds = %lor.lhs.false
 
 if.end:                                           ; preds = %.if.end_crit_edge, %lor.lhs.false.if.end_crit_edge, %if.then6.if.end_crit_edge
   %tmp.i1 = getelementptr inbounds %"class.llvm::RecordVal", %"class.llvm::RecordVal"* %tmp6.i.i, i32 %.reload, i32 3 ; <%"struct.llvm::Init"**> [#uses=1]
-  %tmp2.i2 = load %"struct.llvm::Init"** %tmp.i1  ; <%"struct.llvm::Init"*> [#uses=3]
+  %tmp2.i2 = load %"struct.llvm::Init"*, %"struct.llvm::Init"** %tmp.i1  ; <%"struct.llvm::Init"*> [#uses=3]
   %8 = bitcast %"class.llvm::StringInit"* %this to %"struct.llvm::Init"* ; <%"struct.llvm::Init"*> [#uses=1]
   %cmp19 = icmp eq %"struct.llvm::Init"* %tmp2.i2, %8 ; <i1> [#uses=1]
   br i1 %cmp19, label %cond.false, label %cond.end
@@ -133,9 +133,9 @@ cond.false:                                       ; preds = %if.end
 
 cond.end:                                         ; preds = %if.end
   %9 = bitcast %"struct.llvm::Init"* %tmp2.i2 to %"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)*** ; <%"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)***> [#uses=1]
-  %10 = load %"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)*** %9 ; <%"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)**> [#uses=1]
+  %10 = load %"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)**, %"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)*** %9 ; <%"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)**> [#uses=1]
   %vfn = getelementptr inbounds %"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)*, %"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)** %10, i32 8 ; <%"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)**> [#uses=1]
-  %11 = load %"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)** %vfn ; <%"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)*> [#uses=1]
+  %11 = load %"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)*, %"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)** %vfn ; <%"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)*> [#uses=1]
   %call25 = tail call %"struct.llvm::Init"* %11(%"struct.llvm::Init"* %tmp2.i2, %"class.llvm::Record"* %R, %"class.llvm::RecordVal"* %RV, %"class.std::basic_string"* %FieldName) ; <%"struct.llvm::Init"*> [#uses=1]
   ret %"struct.llvm::Init"* %call25
 
diff --git a/llvm/test/CodeGen/ARM/2010-06-25-Thumb2ITInvalidIterator.ll b/llvm/test/CodeGen/ARM/2010-06-25-Thumb2ITInvalidIterator.ll
index 96bc9c41d57..b341a832042 100644
--- a/llvm/test/CodeGen/ARM/2010-06-25-Thumb2ITInvalidIterator.ll
+++ b/llvm/test/CodeGen/ARM/2010-06-25-Thumb2ITInvalidIterator.ll
@@ -9,7 +9,7 @@ define void @x0(i8* nocapture %buf, i32 %nbytes) nounwind optsize {
 entry:
   tail call void @llvm.dbg.value(metadata i8* %buf, i64 0, metadata !0, metadata !{!"0x102"}), !dbg !15
   tail call void @llvm.dbg.value(metadata i32 %nbytes, i64 0, metadata !8, metadata !{!"0x102"}), !dbg !16
-  %tmp = load i32* @length, !dbg !17              ; <i32> [#uses=3]
+  %tmp = load i32, i32* @length, !dbg !17              ; <i32> [#uses=3]
   %cmp = icmp eq i32 %tmp, -1, !dbg !17           ; <i1> [#uses=1]
   %cmp.not = xor i1 %cmp, true                    ; <i1> [#uses=1]
   %cmp3 = icmp ult i32 %tmp, %nbytes, !dbg !17    ; <i1> [#uses=1]
diff --git a/llvm/test/CodeGen/ARM/2010-06-29-SubregImpDefs.ll b/llvm/test/CodeGen/ARM/2010-06-29-SubregImpDefs.ll
index fcabc900afa..c7ef46c89ff 100644
--- a/llvm/test/CodeGen/ARM/2010-06-29-SubregImpDefs.ll
+++ b/llvm/test/CodeGen/ARM/2010-06-29-SubregImpDefs.ll
@@ -7,7 +7,7 @@ define i32 @main(i32 %argc, i8** %argv) nounwind {
 entry:
   %0 = shufflevector <2 x i64> undef, <2 x i64> zeroinitializer, <2 x i32> <i32 1, i32 2> ; <<2 x i64>> [#uses=1]
   store <2 x i64> %0, <2 x i64>* undef, align 16
-  %val4723 = load <8 x i16>* undef                ; <<8 x i16>> [#uses=1]
+  %val4723 = load <8 x i16>, <8 x i16>* undef                ; <<8 x i16>> [#uses=1]
   call void @PrintShortX(i8* getelementptr inbounds ([21 x i8]* @.str271, i32 0, i32 0), <8 x i16> %val4723, i32 0) nounwind
   ret i32 undef
 }
diff --git a/llvm/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll b/llvm/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll
index fcc93818b76..d1259d5bdb6 100644
--- a/llvm/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll
+++ b/llvm/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll
@@ -30,9 +30,9 @@ declare void @_Unwind_SjLj_Resume(i8*)
 define internal void @_ZN1AD1Ev(%struct.A* nocapture %this) nounwind ssp align 2 {
 entry:
   %tmp.i = getelementptr inbounds %struct.A, %struct.A* %this, i32 0, i32 0 ; <i32*> [#uses=1]
-  %tmp2.i = load i32* %tmp.i                      ; <i32> [#uses=1]
+  %tmp2.i = load i32, i32* %tmp.i                      ; <i32> [#uses=1]
   %call.i = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str4, i32 0, i32 0), i32 %tmp2.i) nounwind ; <i32> [#uses=0]
-  %tmp3.i = load i32* @d                          ; <i32> [#uses=1]
+  %tmp3.i = load i32, i32* @d                          ; <i32> [#uses=1]
   %inc.i = add nsw i32 %tmp3.i, 1                 ; <i32> [#uses=1]
   store i32 %inc.i, i32* @d
   ret void
@@ -59,13 +59,13 @@ try.cont:                                         ; preds = %lpad
   %1 = tail call i8* @__cxa_begin_catch(i8* %exn) nounwind ; <i8*> [#uses=0]
   %puts = tail call i32 @puts(i8* getelementptr inbounds ([8 x i8]* @str1, i32 0, i32 0)) ; <i32> [#uses=0]
   %call.i.i3 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str4, i32 0, i32 0), i32 2) nounwind ; <i32> [#uses=0]
-  %tmp3.i.i = load i32* @d                        ; <i32> [#uses=1]
+  %tmp3.i.i = load i32, i32* @d                        ; <i32> [#uses=1]
   %inc.i.i4 = add nsw i32 %tmp3.i.i, 1            ; <i32> [#uses=1]
   store i32 %inc.i.i4, i32* @d
   tail call void @__cxa_end_catch()
-  %tmp13 = load i32* @d                           ; <i32> [#uses=1]
+  %tmp13 = load i32, i32* @d                           ; <i32> [#uses=1]
   %call14 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([18 x i8]* @.str2, i32 0, i32 0), i32 2, i32 %tmp13) ; <i32> [#uses=0]
-  %tmp16 = load i32* @d                           ; <i32> [#uses=1]
+  %tmp16 = load i32, i32* @d                           ; <i32> [#uses=1]
   %cmp = icmp ne i32 %tmp16, 2                    ; <i1> [#uses=1]
   %conv = zext i1 %cmp to i32                     ; <i32> [#uses=1]
   ret i32 %conv
diff --git a/llvm/test/CodeGen/ARM/2010-08-04-EHCrash.ll b/llvm/test/CodeGen/ARM/2010-08-04-EHCrash.ll
index 4b47085afd5..13214c52153 100644
--- a/llvm/test/CodeGen/ARM/2010-08-04-EHCrash.ll
+++ b/llvm/test/CodeGen/ARM/2010-08-04-EHCrash.ll
@@ -12,17 +12,17 @@ entry:
   br label %return
 
 bb:                                               ; No predecessors!
-  %eh_select = load i32* %eh_selector             ; <i32> [#uses=1]
+  %eh_select = load i32, i32* %eh_selector             ; <i32> [#uses=1]
   store i32 %eh_select, i32* %save_filt.936, align 4
-  %eh_value = load i8** %eh_exception             ; <i8*> [#uses=1]
+  %eh_value = load i8*, i8** %eh_exception             ; <i8*> [#uses=1]
   store i8* %eh_value, i8** %save_eptr.935, align 4
   invoke arm_apcscc  void @func3()
           to label %invcont unwind label %lpad
 
 invcont:                                          ; preds = %bb
-  %tmp6 = load i8** %save_eptr.935, align 4          ; <i8*> [#uses=1]
+  %tmp6 = load i8*, i8** %save_eptr.935, align 4          ; <i8*> [#uses=1]
   store i8* %tmp6, i8** %eh_exception, align 4
-  %tmp7 = load i32* %save_filt.936, align 4          ; <i32> [#uses=1]
+  %tmp7 = load i32, i32* %save_filt.936, align 4          ; <i32> [#uses=1]
   store i32 %tmp7, i32* %eh_selector, align 4
   br label %Unwind
 
@@ -38,7 +38,7 @@ lpad:                                             ; preds = %bb
               cleanup
   %exn = extractvalue { i8*, i32 } %eh_ptr, 0
   store i8* %exn, i8** %eh_exception
-  %eh_ptr13 = load i8** %eh_exception             ; <i8*> [#uses=1]
+  %eh_ptr13 = load i8*, i8** %eh_exception             ; <i8*> [#uses=1]
   %eh_select14 = extractvalue { i8*, i32 } %eh_ptr, 1
   store i32 %eh_select14, i32* %eh_selector
   br label %ppad
@@ -47,7 +47,7 @@ ppad:
   br label %bb12
 
 Unwind:
-  %eh_ptr15 = load i8** %eh_exception
+  %eh_ptr15 = load i8*, i8** %eh_exception
   call arm_apcscc  void @_Unwind_SjLj_Resume(i8* %eh_ptr15)
   unreachable
 }
diff --git a/llvm/test/CodeGen/ARM/2010-08-04-StackVariable.ll b/llvm/test/CodeGen/ARM/2010-08-04-StackVariable.ll
index 7f01cb7b38e..5d30995ebbe 100644
--- a/llvm/test/CodeGen/ARM/2010-08-04-StackVariable.ll
+++ b/llvm/test/CodeGen/ARM/2010-08-04-StackVariable.ll
@@ -13,13 +13,13 @@ entry:
 
 bb:                                               ; preds = %entry
   %1 = getelementptr inbounds %struct.SVal, %struct.SVal* %location, i32 0, i32 1, !dbg !29 ; <i32*> [#uses=1]
-  %2 = load i32* %1, align 8, !dbg !29            ; <i32> [#uses=1]
+  %2 = load i32, i32* %1, align 8, !dbg !29            ; <i32> [#uses=1]
   %3 = add i32 %2, %i, !dbg !29                   ; <i32> [#uses=1]
   br label %bb2, !dbg !29
 
 bb1:                                              ; preds = %entry
   %4 = getelementptr inbounds %struct.SVal, %struct.SVal* %location, i32 0, i32 1, !dbg !30 ; <i32*> [#uses=1]
-  %5 = load i32* %4, align 8, !dbg !30            ; <i32> [#uses=1]
+  %5 = load i32, i32* %4, align 8, !dbg !30            ; <i32> [#uses=1]
   %6 = sub i32 %5, 1, !dbg !30                    ; <i32> [#uses=1]
   br label %bb2, !dbg !30
 
@@ -58,11 +58,11 @@ entry:
   store i32 1, i32* %1, align 8, !dbg !42
   %2 = getelementptr inbounds %struct.SVal, %struct.SVal* %0, i32 0, i32 0, !dbg !43 ; <i8**> [#uses=1]
   %3 = getelementptr inbounds %struct.SVal, %struct.SVal* %v, i32 0, i32 0, !dbg !43 ; <i8**> [#uses=1]
-  %4 = load i8** %3, align 8, !dbg !43            ; <i8*> [#uses=1]
+  %4 = load i8*, i8** %3, align 8, !dbg !43            ; <i8*> [#uses=1]
   store i8* %4, i8** %2, align 8, !dbg !43
   %5 = getelementptr inbounds %struct.SVal, %struct.SVal* %0, i32 0, i32 1, !dbg !43 ; <i32*> [#uses=1]
   %6 = getelementptr inbounds %struct.SVal, %struct.SVal* %v, i32 0, i32 1, !dbg !43 ; <i32*> [#uses=1]
-  %7 = load i32* %6, align 8, !dbg !43            ; <i32> [#uses=1]
+  %7 = load i32, i32* %6, align 8, !dbg !43            ; <i32> [#uses=1]
   store i32 %7, i32* %5, align 8, !dbg !43
   %8 = call i32 @_Z3fooi4SVal(i32 2, %struct.SVal* noalias %0) nounwind, !dbg !43 ; <i32> [#uses=0]
   call void @llvm.dbg.value(metadata i32 %8, i64 0, metadata !44, metadata !{!"0x102"}), !dbg !43
diff --git a/llvm/test/CodeGen/ARM/2010-11-15-SpillEarlyClobber.ll b/llvm/test/CodeGen/ARM/2010-11-15-SpillEarlyClobber.ll
index 5295a0a4770..09428ce9c33 100644
--- a/llvm/test/CodeGen/ARM/2010-11-15-SpillEarlyClobber.ll
+++ b/llvm/test/CodeGen/ARM/2010-11-15-SpillEarlyClobber.ll
@@ -39,29 +39,29 @@ presymmetry.exit:                                 ; preds = %bb28.i
   %scevgep97.i = getelementptr i32, i32* %in, i32 undef
   %tmp198410 = or i32 undef, 1
   %scevgep.i48 = getelementptr i32, i32* %in, i32 undef
-  %0 = load i32* %scevgep.i48, align 4
+  %0 = load i32, i32* %scevgep.i48, align 4
   %1 = add nsw i32 %0, 0
   store i32 %1, i32* undef, align 4
   %asmtmp.i.i33.i.i.i = tail call %0 asm "smull\09$0, $1, $2, $3", "=&r,=&r,%r,r,~{cc}"(i32 undef, i32 1518500250) nounwind
   %asmresult1.i.i34.i.i.i = extractvalue %0 %asmtmp.i.i33.i.i.i, 1
   %2 = shl i32 %asmresult1.i.i34.i.i.i, 1
-  %3 = load i32* null, align 4
-  %4 = load i32* undef, align 4
+  %3 = load i32, i32* null, align 4
+  %4 = load i32, i32* undef, align 4
   %5 = sub nsw i32 %3, %4
-  %6 = load i32* undef, align 4
-  %7 = load i32* null, align 4
+  %6 = load i32, i32* undef, align 4
+  %7 = load i32, i32* null, align 4
   %8 = sub nsw i32 %6, %7
-  %9 = load i32* %scevgep97.i, align 4
-  %10 = load i32* undef, align 4
+  %9 = load i32, i32* %scevgep97.i, align 4
+  %10 = load i32, i32* undef, align 4
   %11 = sub nsw i32 %9, %10
-  %12 = load i32* null, align 4
-  %13 = load i32* %scevgep101.i, align 4
+  %12 = load i32, i32* null, align 4
+  %13 = load i32, i32* %scevgep101.i, align 4
   %14 = sub nsw i32 %12, %13
-  %15 = load i32* %scevgep.i48, align 4
-  %16 = load i32* null, align 4
+  %15 = load i32, i32* %scevgep.i48, align 4
+  %16 = load i32, i32* null, align 4
   %17 = add nsw i32 %16, %15
   %18 = sub nsw i32 %15, %16
-  %19 = load i32* undef, align 4
+  %19 = load i32, i32* undef, align 4
   %20 = add nsw i32 %19, %2
   %21 = sub nsw i32 %19, %2
   %22 = add nsw i32 %14, %5
diff --git a/llvm/test/CodeGen/ARM/2010-12-08-tpsoft.ll b/llvm/test/CodeGen/ARM/2010-12-08-tpsoft.ll
index 1351a26756e..9dfe4da2767 100644
--- a/llvm/test/CodeGen/ARM/2010-12-08-tpsoft.ll
+++ b/llvm/test/CodeGen/ARM/2010-12-08-tpsoft.ll
@@ -12,7 +12,7 @@
 
 define arm_aapcs_vfpcc i32 @main() nounwind {
 entry:
-  %0 = load i32* @i, align 4
+  %0 = load i32, i32* @i, align 4
   switch i32 %0, label %bb2 [
     i32 12, label %bb
     i32 13, label %bb1
diff --git a/llvm/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll b/llvm/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll
index c58a79ae6e4..8db166afec6 100644
--- a/llvm/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll
+++ b/llvm/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll
@@ -31,9 +31,9 @@
 ; OBJ-NEXT:     Section: .bss
 
 define i32 @main(i32 %argc) nounwind {
-  %1 = load i32* @sum, align 4
+  %1 = load i32, i32* @sum, align 4
   %2 = getelementptr [80 x i8], [80 x i8]* @array00, i32 0, i32 %argc
-  %3 = load i8* %2
+  %3 = load i8, i8* %2
   %4 = zext i8 %3 to i32
   %5 = add i32 %1, %4
   ret i32 %5
diff --git a/llvm/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll b/llvm/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
index 67dda672719..5dc07e4730e 100644
--- a/llvm/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
+++ b/llvm/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
@@ -31,7 +31,7 @@ target triple = "thumbv7-apple-darwin10"
 define zeroext i8 @get1(i8 zeroext %a) nounwind optsize {
 entry:
   tail call void @llvm.dbg.value(metadata i8 %a, i64 0, metadata !10, metadata !{!"0x102"}), !dbg !30
-  %0 = load i8* @x1, align 4, !dbg !30
+  %0 = load i8, i8* @x1, align 4, !dbg !30
   tail call void @llvm.dbg.value(metadata i8 %0, i64 0, metadata !11, metadata !{!"0x102"}), !dbg !30
   store i8 %a, i8* @x1, align 4, !dbg !30
   ret i8 %0, !dbg !31
@@ -42,7 +42,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 define zeroext i8 @get2(i8 zeroext %a) nounwind optsize {
 entry:
   tail call void @llvm.dbg.value(metadata i8 %a, i64 0, metadata !18, metadata !{!"0x102"}), !dbg !32
-  %0 = load i8* @x2, align 4, !dbg !32
+  %0 = load i8, i8* @x2, align 4, !dbg !32
   tail call void @llvm.dbg.value(metadata i8 %0, i64 0, metadata !19, metadata !{!"0x102"}), !dbg !32
   store i8 %a, i8* @x2, align 4, !dbg !32
   ret i8 %0, !dbg !33
@@ -51,7 +51,7 @@ entry:
 define zeroext i8 @get3(i8 zeroext %a) nounwind optsize {
 entry:
   tail call void @llvm.dbg.value(metadata i8 %a, i64 0, metadata !21, metadata !{!"0x102"}), !dbg !34
-  %0 = load i8* @x3, align 4, !dbg !34
+  %0 = load i8, i8* @x3, align 4, !dbg !34
   tail call void @llvm.dbg.value(metadata i8 %0, i64 0, metadata !22, metadata !{!"0x102"}), !dbg !34
   store i8 %a, i8* @x3, align 4, !dbg !34
   ret i8 %0, !dbg !35
@@ -60,7 +60,7 @@ entry:
 define zeroext i8 @get4(i8 zeroext %a) nounwind optsize {
 entry:
   tail call void @llvm.dbg.value(metadata i8 %a, i64 0, metadata !24, metadata !{!"0x102"}), !dbg !36
-  %0 = load i8* @x4, align 4, !dbg !36
+  %0 = load i8, i8* @x4, align 4, !dbg !36
   tail call void @llvm.dbg.value(metadata i8 %0, i64 0, metadata !25, metadata !{!"0x102"}), !dbg !36
   store i8 %a, i8* @x4, align 4, !dbg !36
   ret i8 %0, !dbg !37
@@ -69,7 +69,7 @@ entry:
 define zeroext i8 @get5(i8 zeroext %a) nounwind optsize {
 entry:
   tail call void @llvm.dbg.value(metadata i8 %a, i64 0, metadata !27, metadata !{!"0x102"}), !dbg !38
-  %0 = load i8* @x5, align 4, !dbg !38
+  %0 = load i8, i8* @x5, align 4, !dbg !38
   tail call void @llvm.dbg.value(metadata i8 %0, i64 0, metadata !28, metadata !{!"0x102"}), !dbg !38
   store i8 %a, i8* @x5, align 4, !dbg !38
   ret i8 %0, !dbg !39
diff --git a/llvm/test/CodeGen/ARM/2011-02-07-AntidepClobber.ll b/llvm/test/CodeGen/ARM/2011-02-07-AntidepClobber.ll
index b65c41fb01f..e96641bf667 100644
--- a/llvm/test/CodeGen/ARM/2011-02-07-AntidepClobber.ll
+++ b/llvm/test/CodeGen/ARM/2011-02-07-AntidepClobber.ll
@@ -39,10 +39,10 @@ bb134:                                            ; preds = %bb131
   unreachable
 
 bb135:                                            ; preds = %meshBB396
-  %uriHash.1.phi.load = load i32* undef
-  %.load120 = load i8*** %.SV116
-  %.phi24 = load i8* null
-  %.phi26 = load i8** null
+  %uriHash.1.phi.load = load i32, i32* undef
+  %.load120 = load i8**, i8*** %.SV116
+  %.phi24 = load i8, i8* null
+  %.phi26 = load i8*, i8** null
   store i8 %.phi24, i8* %.phi26, align 1
   %0 = getelementptr inbounds i8, i8* %.phi26, i32 1
   store i8* %0, i8** %.load120, align 4
@@ -52,7 +52,7 @@ bb135:                                            ; preds = %meshBB396
   %1 = mul i32 %uriHash.1.phi.load, 1000003
   %2 = xor i32 0, %1
   store i32 %2, i32* null
-  %3 = load i8* null, align 1
+  %3 = load i8, i8* null, align 1
   %4 = icmp eq i8 %3, 0
   store i8* %0, i8** undef
   br i1 %4, label %meshBB472, label %bb131
diff --git a/llvm/test/CodeGen/ARM/2011-03-10-DAGCombineCrash.ll b/llvm/test/CodeGen/ARM/2011-03-10-DAGCombineCrash.ll
index 521c9a2c695..c447a1f25b6 100644
--- a/llvm/test/CodeGen/ARM/2011-03-10-DAGCombineCrash.ll
+++ b/llvm/test/CodeGen/ARM/2011-03-10-DAGCombineCrash.ll
@@ -20,7 +20,7 @@ bb1:                                              ; preds = %entry
   %1 = getelementptr inbounds %struct.ui, %struct.ui* %0, i32 0, i32 0
   store %struct.mo* undef, %struct.mo** %1, align 4
   %2 = getelementptr inbounds %struct.ui, %struct.ui* %0, i32 0, i32 5
-  %3 = load i64* %2, align 4
+  %3 = load i64, i64* %2, align 4
   %4 = call i32 @mo_create_nnm(%struct.mo* undef, i64 %3, i32** undef) nounwind
   br i1 undef, label %bb3, label %bb2
 
diff --git a/llvm/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll b/llvm/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll
index 07180d83fee..92bdd19a7b3 100644
--- a/llvm/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll
+++ b/llvm/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll
@@ -21,8 +21,8 @@ for.body:                                         ; preds = %_Z14printIsNotZeroi
   %x = getelementptr %struct.Outer, %struct.Outer* @oStruct, i32 0, i32 1, i32 %i.022, i32 0
   %y = getelementptr %struct.Outer, %struct.Outer* @oStruct, i32 0, i32 1, i32 %i.022, i32 1
   %inc = add i32 %i.022, 1
-  %tmp8 = load i32* %x, align 4
-  %tmp11 = load i32* %y, align 4
+  %tmp8 = load i32, i32* %x, align 4
+  %tmp11 = load i32, i32* %y, align 4
   %mul = mul nsw i32 %tmp11, %tmp8
   %tobool.i14 = icmp eq i32 %mul, 0
   br i1 %tobool.i14, label %_Z14printIsNotZeroi.exit17, label %if.then.i16
@@ -35,7 +35,7 @@ _Z14printIsNotZeroi.exit17:                       ; preds = %_Z14printIsNotZeroi
 
 _Z14printIsNotZeroi.exit17.for.body_crit_edge:    ; preds = %_Z14printIsNotZeroi.exit17
   %b.phi.trans.insert = getelementptr %struct.Outer, %struct.Outer* @oStruct, i32 0, i32 1, i32 %inc, i32 3
-  %tmp3.pre = load i8* %b.phi.trans.insert, align 1
+  %tmp3.pre = load i8, i8* %b.phi.trans.insert, align 1
   %phitmp27 = icmp eq i8 %val8, 0
   br label %for.body
 
diff --git a/llvm/test/CodeGen/ARM/2011-04-07-schediv.ll b/llvm/test/CodeGen/ARM/2011-04-07-schediv.ll
index 305d4cda80c..7f603157c5d 100644
--- a/llvm/test/CodeGen/ARM/2011-04-07-schediv.ll
+++ b/llvm/test/CodeGen/ARM/2011-04-07-schediv.ll
@@ -21,7 +21,7 @@ bb:                                               ; preds = %entry, %bb
   %uglygep = getelementptr i8, i8* %src_copy_start6, i32 %tmp
   %src_copy_start_addr.04 = bitcast i8* %uglygep to float*
   %dst_copy_start_addr.03 = getelementptr float, float* %dst_copy_start, i32 %j.05
-  %1 = load float* %src_copy_start_addr.04, align 4
+  %1 = load float, float* %src_copy_start_addr.04, align 4
   store float %1, float* %dst_copy_start_addr.03, align 4
   %2 = add i32 %j.05, 1
   %exitcond = icmp eq i32 %2, %src_width
diff --git a/llvm/test/CodeGen/ARM/2011-04-11-MachineLICMBug.ll b/llvm/test/CodeGen/ARM/2011-04-11-MachineLICMBug.ll
index f497fc21c77..54fc9b049b8 100644
--- a/llvm/test/CodeGen/ARM/2011-04-11-MachineLICMBug.ll
+++ b/llvm/test/CodeGen/ARM/2011-04-11-MachineLICMBug.ll
@@ -19,7 +19,7 @@ for.body:
 ; CHECK: mov{{.*}} r{{[0-9]+}}, #{{[01]}}
 ; CHECK-NOT: mov r{{[0-9]+}}, #{{[01]}}
   %arrayidx = getelementptr i32, i32* %A, i32 %0
-  %tmp4 = load i32* %arrayidx, align 4
+  %tmp4 = load i32, i32* %arrayidx, align 4
   %cmp6 = icmp eq i32 %tmp4, %value
   br i1 %cmp6, label %return, label %for.inc
 
diff --git a/llvm/test/CodeGen/ARM/2011-04-12-FastRegAlloc.ll b/llvm/test/CodeGen/ARM/2011-04-12-FastRegAlloc.ll
index e9a6793a768..8ad654fc8f9 100644
--- a/llvm/test/CodeGen/ARM/2011-04-12-FastRegAlloc.ll
+++ b/llvm/test/CodeGen/ARM/2011-04-12-FastRegAlloc.ll
@@ -7,7 +7,7 @@ target triple = "thumbv7-apple-darwin10.0.0"
 define void @_Z8TestCasev() nounwind ssp {
 entry:
   %a = alloca float, align 4
-  %tmp = load float* %a, align 4
+  %tmp = load float, float* %a, align 4
   call void asm sideeffect "", "w,~{s0},~{s16}"(float %tmp) nounwind, !srcloc !0
   ret void
 }
diff --git a/llvm/test/CodeGen/ARM/2011-04-26-SchedTweak.ll b/llvm/test/CodeGen/ARM/2011-04-26-SchedTweak.ll
index 057c19948c3..132b78e153b 100644
--- a/llvm/test/CodeGen/ARM/2011-04-26-SchedTweak.ll
+++ b/llvm/test/CodeGen/ARM/2011-04-26-SchedTweak.ll
@@ -22,7 +22,7 @@ entry:
   %block_count = alloca i32, align 4
   %index_cache = alloca i32, align 4
   store i32 0, i32* %index_cache, align 4
-  %tmp = load i32* @G, align 4
+  %tmp = load i32, i32* @G, align 4
   %tmp1 = call i32 @bar(i32 0, i32 0, i32 %tmp) nounwind
   switch i32 %tmp1, label %bb8 [
     i32 0, label %bb
@@ -31,7 +31,7 @@ entry:
   ]
 
 bb:
-  %tmp2 = load i32* @G, align 4
+  %tmp2 = load i32, i32* @G, align 4
   %tmp4 = icmp eq i32 %tmp2, 0
   br i1 %tmp4, label %bb1, label %bb8
 
@@ -41,8 +41,8 @@ bb1:
 ; CHECK: blx _Get
 ; CHECK: umull
 ; CHECK: blx _foo
-  %tmp5 = load i32* %block_size, align 4
-  %tmp6 = load i32* %block_count, align 4
+  %tmp5 = load i32, i32* %block_size, align 4
+  %tmp6 = load i32, i32* %block_count, align 4
   %tmp7 = call %struct.FF* @Get() nounwind
   store %struct.FF* %tmp7, %struct.FF** @FuncPtr, align 4
   %tmp10 = zext i32 %tmp6 to i64
diff --git a/llvm/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll b/llvm/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
index 3edc946825b..cc843471342 100644
--- a/llvm/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
+++ b/llvm/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
@@ -30,7 +30,7 @@ target triple = "thumbv7-apple-macosx10.7.0"
 
 define i32 @get1(i32 %a) nounwind optsize ssp {
   tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !10, metadata !{!"0x102"}), !dbg !30
-  %1 = load i32* @x1, align 4, !dbg !31
+  %1 = load i32, i32* @x1, align 4, !dbg !31
   tail call void @llvm.dbg.value(metadata i32 %1, i64 0, metadata !11, metadata !{!"0x102"}), !dbg !31
   store i32 %a, i32* @x1, align 4, !dbg !31
   ret i32 %1, !dbg !31
@@ -38,7 +38,7 @@ define i32 @get1(i32 %a) nounwind optsize ssp {
 
 define i32 @get2(i32 %a) nounwind optsize ssp {
   tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !13, metadata !{!"0x102"}), !dbg !32
-  %1 = load i32* @x2, align 4, !dbg !33
+  %1 = load i32, i32* @x2, align 4, !dbg !33
   tail call void @llvm.dbg.value(metadata i32 %1, i64 0, metadata !14, metadata !{!"0x102"}), !dbg !33
   store i32 %a, i32* @x2, align 4, !dbg !33
   ret i32 %1, !dbg !33
@@ -46,7 +46,7 @@ define i32 @get2(i32 %a) nounwind optsize ssp {
 
 define i32 @get3(i32 %a) nounwind optsize ssp {
   tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !16, metadata !{!"0x102"}), !dbg !34
-  %1 = load i32* @x3, align 4, !dbg !35
+  %1 = load i32, i32* @x3, align 4, !dbg !35
   tail call void @llvm.dbg.value(metadata i32 %1, i64 0, metadata !17, metadata !{!"0x102"}), !dbg !35
   store i32 %a, i32* @x3, align 4, !dbg !35
   ret i32 %1, !dbg !35
@@ -54,7 +54,7 @@ define i32 @get3(i32 %a) nounwind optsize ssp {
 
 define i32 @get4(i32 %a) nounwind optsize ssp {
   tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !19, metadata !{!"0x102"}), !dbg !36
-  %1 = load i32* @x4, align 4, !dbg !37
+  %1 = load i32, i32* @x4, align 4, !dbg !37
   tail call void @llvm.dbg.value(metadata i32 %1, i64 0, metadata !20, metadata !{!"0x102"}), !dbg !37
   store i32 %a, i32* @x4, align 4, !dbg !37
   ret i32 %1, !dbg !37
@@ -62,7 +62,7 @@ define i32 @get4(i32 %a) nounwind optsize ssp {
 
 define i32 @get5(i32 %a) nounwind optsize ssp {
   tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !27, metadata !{!"0x102"}), !dbg !38
-  %1 = load i32* @x5, align 4, !dbg !39
+  %1 = load i32, i32* @x5, align 4, !dbg !39
   tail call void @llvm.dbg.value(metadata i32 %1, i64 0, metadata !28, metadata !{!"0x102"}), !dbg !39
   store i32 %a, i32* @x5, align 4, !dbg !39
   ret i32 %1, !dbg !39
diff --git a/llvm/test/CodeGen/ARM/2011-08-29-SchedCycle.ll b/llvm/test/CodeGen/ARM/2011-08-29-SchedCycle.ll
index be188ef630f..fb845447950 100644
--- a/llvm/test/CodeGen/ARM/2011-08-29-SchedCycle.ll
+++ b/llvm/test/CodeGen/ARM/2011-08-29-SchedCycle.ll
@@ -32,7 +32,7 @@
 
 define void @t() nounwind {
 entry:
-  %tmp = load i64* undef, align 4
+  %tmp = load i64, i64* undef, align 4
   %tmp5 = udiv i64 %tmp, 30
   %tmp13 = and i64 %tmp5, 64739244643450880
   %tmp16 = sub i64 0, %tmp13
diff --git a/llvm/test/CodeGen/ARM/2011-08-29-ldr_pre_imm.ll b/llvm/test/CodeGen/ARM/2011-08-29-ldr_pre_imm.ll
index 4cea77bd345..d9b38b5e573 100644
--- a/llvm/test/CodeGen/ARM/2011-08-29-ldr_pre_imm.ll
+++ b/llvm/test/CodeGen/ARM/2011-08-29-ldr_pre_imm.ll
@@ -18,7 +18,7 @@ bb.i:                                             ; preds = %bb5.i
   %1 = shl nsw i32 %k_addr.0.i, 1
   %.sum8.i = add i32 %1, -1
   %2 = getelementptr inbounds [256 x i32], [256 x i32]* %heap, i32 0, i32 %.sum8.i
-  %3 = load i32* %2, align 4
+  %3 = load i32, i32* %2, align 4
   br i1 false, label %bb5.i, label %bb4.i
 
 bb4.i:                                            ; preds = %bb.i
diff --git a/llvm/test/CodeGen/ARM/2011-09-09-OddVectorDivision.ll b/llvm/test/CodeGen/ARM/2011-09-09-OddVectorDivision.ll
index 8fe91028e0c..2561af707d7 100644
--- a/llvm/test/CodeGen/ARM/2011-09-09-OddVectorDivision.ll
+++ b/llvm/test/CodeGen/ARM/2011-09-09-OddVectorDivision.ll
@@ -11,12 +11,12 @@ target triple = "armv7-none-linux-gnueabi"
 @z2 = common global <4 x i16> zeroinitializer
 
 define void @f() {
-  %1 = load <3 x i16>* @x1
-  %2 = load <3 x i16>* @y1
+  %1 = load <3 x i16>, <3 x i16>* @x1
+  %2 = load <3 x i16>, <3 x i16>* @y1
   %3 = sdiv <3 x i16> %1, %2
   store <3 x i16> %3, <3 x i16>* @z1
-  %4 = load <4 x i16>* @x2
-  %5 = load <4 x i16>* @y2
+  %4 = load <4 x i16>, <4 x i16>* @x2
+  %5 = load <4 x i16>, <4 x i16>* @y2
   %6 = sdiv <4 x i16> %4, %5
   store <4 x i16> %6, <4 x i16>* @z2
   ret void
diff --git a/llvm/test/CodeGen/ARM/2011-09-28-CMovCombineBug.ll b/llvm/test/CodeGen/ARM/2011-09-28-CMovCombineBug.ll
index c6f4a93def1..53e3bed5383 100644
--- a/llvm/test/CodeGen/ARM/2011-09-28-CMovCombineBug.ll
+++ b/llvm/test/CodeGen/ARM/2011-09-28-CMovCombineBug.ll
@@ -4,7 +4,7 @@
 ; ARM target specific dag combine created a cycle in DAG.
 
 define void @t() nounwind ssp {
-  %1 = load i64* undef, align 4
+  %1 = load i64, i64* undef, align 4
   %2 = shl i32 5, 0
   %3 = zext i32 %2 to i64
   %4 = and i64 %1, %3
diff --git a/llvm/test/CodeGen/ARM/2011-10-26-ExpandUnalignedLoadCrash.ll b/llvm/test/CodeGen/ARM/2011-10-26-ExpandUnalignedLoadCrash.ll
index 297a0ae39d1..9f2fa63a70b 100644
--- a/llvm/test/CodeGen/ARM/2011-10-26-ExpandUnalignedLoadCrash.ll
+++ b/llvm/test/CodeGen/ARM/2011-10-26-ExpandUnalignedLoadCrash.ll
@@ -9,11 +9,11 @@ L.entry:
   %2 = mul i32 %0, 6
   %3 = getelementptr i8, i8* %1, i32 %2
   %4 = bitcast i8* %3 to <3 x i16>*
-  %5 = load <3 x i16>* %4, align 1
+  %5 = load <3 x i16>, <3 x i16>* %4, align 1
   %6 = bitcast i16* %sourceA to i8*
   %7 = getelementptr i8, i8* %6, i32 %2
   %8 = bitcast i8* %7 to <3 x i16>*
-  %9 = load <3 x i16>* %8, align 1
+  %9 = load <3 x i16>, <3 x i16>* %8, align 1
   %10 = or <3 x i16> %9, %5
   store <3 x i16> %10, <3 x i16>* %4, align 1
   ret void
diff --git a/llvm/test/CodeGen/ARM/2011-11-07-PromoteVectorLoadStore.ll b/llvm/test/CodeGen/ARM/2011-11-07-PromoteVectorLoadStore.ll
index a707a92c9fa..e7059716c49 100644
--- a/llvm/test/CodeGen/ARM/2011-11-07-PromoteVectorLoadStore.ll
+++ b/llvm/test/CodeGen/ARM/2011-11-07-PromoteVectorLoadStore.ll
@@ -7,8 +7,8 @@
 
 define void @test_neon_vector_add_2xi8() nounwind {
 ; CHECK-LABEL: test_neon_vector_add_2xi8:
-  %1 = load <2 x i8>* @i8_src1
-  %2 = load <2 x i8>* @i8_src2
+  %1 = load <2 x i8>, <2 x i8>* @i8_src1
+  %2 = load <2 x i8>, <2 x i8>* @i8_src2
   %3 = add <2 x i8> %1, %2
   store <2 x i8> %3, <2 x i8>* @i8_res
   ret void
@@ -16,8 +16,8 @@ define void @test_neon_vector_add_2xi8() nounwind {
 
 define void @test_neon_ld_st_volatile_with_ashr_2xi8() {
 ; CHECK-LABEL: test_neon_ld_st_volatile_with_ashr_2xi8:
-  %1 = load volatile <2 x i8>* @i8_src1
-  %2 = load volatile <2 x i8>* @i8_src2
+  %1 = load volatile <2 x i8>, <2 x i8>* @i8_src1
+  %2 = load volatile <2 x i8>, <2 x i8>* @i8_src2
   %3 = ashr <2 x i8> %1, %2
   store volatile <2 x i8> %3, <2 x i8>* @i8_res
   ret void
diff --git a/llvm/test/CodeGen/ARM/2011-11-09-BitcastVectorDouble.ll b/llvm/test/CodeGen/ARM/2011-11-09-BitcastVectorDouble.ll
index c1554d848c4..6dc9d4b7025 100644
--- a/llvm/test/CodeGen/ARM/2011-11-09-BitcastVectorDouble.ll
+++ b/llvm/test/CodeGen/ARM/2011-11-09-BitcastVectorDouble.ll
@@ -8,7 +8,7 @@ declare <2 x i16> @foo_v2i16(<2 x i16>) nounwind
 
 define void @test_neon_call_return_v2i16() {
 ; CHECK-LABEL: test_neon_call_return_v2i16:
-  %1 = load <2 x i16>* @src1_v2i16
+  %1 = load <2 x i16>, <2 x i16>* @src1_v2i16
   %2 = call <2 x i16> @foo_v2i16(<2 x i16> %1) nounwind
   store <2 x i16> %2, <2 x i16>* @res_v2i16
   ret void
diff --git a/llvm/test/CodeGen/ARM/2011-11-09-IllegalVectorFPIntConvert.ll b/llvm/test/CodeGen/ARM/2011-11-09-IllegalVectorFPIntConvert.ll
index c50461a42d8..1da93bdd7c9 100644
--- a/llvm/test/CodeGen/ARM/2011-11-09-IllegalVectorFPIntConvert.ll
+++ b/llvm/test/CodeGen/ARM/2011-11-09-IllegalVectorFPIntConvert.ll
@@ -4,7 +4,7 @@ define <2 x i32> @test1(<2 x double>* %A) {
 ; CHECK: test1
 ; CHECK: vcvt.s32.f64
 ; CHECK: vcvt.s32.f64
-  %tmp1 = load <2 x double>* %A
+  %tmp1 = load <2 x double>, <2 x double>* %A
 	%tmp2 = fptosi <2 x double> %tmp1 to <2 x i32>
 	ret <2 x i32> %tmp2
 }
@@ -13,7 +13,7 @@ define <2 x i32> @test2(<2 x double>* %A) {
 ; CHECK: test2
 ; CHECK: vcvt.u32.f64
 ; CHECK: vcvt.u32.f64
-  %tmp1 = load <2 x double>* %A
+  %tmp1 = load <2 x double>, <2 x double>* %A
 	%tmp2 = fptoui <2 x double> %tmp1 to <2 x i32>
 	ret <2 x i32> %tmp2
 }
@@ -22,7 +22,7 @@ define <2 x double> @test3(<2 x i32>* %A) {
 ; CHECK: test3
 ; CHECK: vcvt.f64.s32
 ; CHECK: vcvt.f64.s32
-  %tmp1 = load <2 x i32>* %A
+  %tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = sitofp <2 x i32> %tmp1 to <2 x double>
 	ret <2 x double> %tmp2
 }
@@ -31,7 +31,7 @@ define <2 x double> @test4(<2 x i32>* %A) {
 ; CHECK: test4
 ; CHECK: vcvt.f64.u32
 ; CHECK: vcvt.f64.u32
-  %tmp1 = load <2 x i32>* %A
+  %tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = uitofp <2 x i32> %tmp1 to <2 x double>
 	ret <2 x double> %tmp2
 }
diff --git a/llvm/test/CodeGen/ARM/2011-11-14-EarlyClobber.ll b/llvm/test/CodeGen/ARM/2011-11-14-EarlyClobber.ll
index eadcbab9fd9..7f4057143a0 100644
--- a/llvm/test/CodeGen/ARM/2011-11-14-EarlyClobber.ll
+++ b/llvm/test/CodeGen/ARM/2011-11-14-EarlyClobber.ll
@@ -31,25 +31,25 @@ entry:
   store double 0.000000e+00, double* null, align 4
   %call = tail call double @cos(double %angle) nounwind readnone
   %call1 = tail call double @sin(double %angle) nounwind readnone
-  %0 = load double* %V1, align 4
+  %0 = load double, double* %V1, align 4
   %arrayidx2 = getelementptr inbounds double, double* %V1, i32 1
-  %1 = load double* %arrayidx2, align 4
+  %1 = load double, double* %arrayidx2, align 4
   %mul = fmul double %0, %1
   %sub = fsub double 1.000000e+00, %call
   %mul3 = fmul double %mul, %sub
-  %2 = load double* undef, align 4
+  %2 = load double, double* undef, align 4
   %mul5 = fmul double %2, %call1
   %add = fadd double %mul3, %mul5
   store double %add, double* %arrayidx5.1.i, align 4
-  %3 = load double* %V1, align 4
+  %3 = load double, double* %V1, align 4
   %mul11 = fmul double %3, undef
   %mul13 = fmul double %mul11, %sub
-  %4 = load double* %arrayidx2, align 4
+  %4 = load double, double* %arrayidx2, align 4
   %mul15 = fmul double %4, %call1
   %sub16 = fsub double %mul13, %mul15
   store double %sub16, double* %arrayidx5.2.i, align 4
-  %5 = load double* %V1, align 4
-  %6 = load double* %arrayidx2, align 4
+  %5 = load double, double* %V1, align 4
+  %6 = load double, double* %arrayidx2, align 4
   %mul22 = fmul double %5, %6
   %mul24 = fmul double %mul22, %sub
   %sub27 = fsub double %mul24, undef
diff --git a/llvm/test/CodeGen/ARM/2011-11-28-DAGCombineBug.ll b/llvm/test/CodeGen/ARM/2011-11-28-DAGCombineBug.ll
index bc496b99f4a..c2cafaac6be 100644
--- a/llvm/test/CodeGen/ARM/2011-11-28-DAGCombineBug.ll
+++ b/llvm/test/CodeGen/ARM/2011-11-28-DAGCombineBug.ll
@@ -15,14 +15,14 @@ define hidden void @foo() {
 ; CHECK: ldr.w
 ; CHECK-NOT: ldm
 entry:
-  %tmp13 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 1, i32 0, i32 0), align 1
-  %tmp15 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 1, i32 0, i32 1), align 1
-  %tmp17 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 1, i32 0, i32 2), align 1
-  %tmp19 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 1, i32 0, i32 3), align 1
-  %tmp = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 2, i32 0, i32 0), align 1
-  %tmp3 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 2, i32 0, i32 1), align 1
-  %tmp4 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 2, i32 0, i32 2), align 1
-  %tmp5 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 2, i32 0, i32 3), align 1
+  %tmp13 = load i32, i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 1, i32 0, i32 0), align 1
+  %tmp15 = load i32, i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 1, i32 0, i32 1), align 1
+  %tmp17 = load i32, i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 1, i32 0, i32 2), align 1
+  %tmp19 = load i32, i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 1, i32 0, i32 3), align 1
+  %tmp = load i32, i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 2, i32 0, i32 0), align 1
+  %tmp3 = load i32, i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 2, i32 0, i32 1), align 1
+  %tmp4 = load i32, i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 2, i32 0, i32 2), align 1
+  %tmp5 = load i32, i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 2, i32 0, i32 3), align 1
   %insert21 = insertvalue [4 x i32] undef, i32 %tmp13, 0
   %insert23 = insertvalue [4 x i32] %insert21, i32 %tmp15, 1
   %insert25 = insertvalue [4 x i32] %insert23, i32 %tmp17, 2
diff --git a/llvm/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll b/llvm/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
index 86b58c8186b..404aca13cdb 100644
--- a/llvm/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
+++ b/llvm/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
@@ -16,7 +16,7 @@ define void @test_sqrt(<4 x float>* %X) nounwind {
 ; CHECK:      vst1.64  {{.*}}
 
 L.entry:
-  %0 = load <4 x float>* @A, align 16
+  %0 = load <4 x float>, <4 x float>* @A, align 16
   %1 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %0)
   store <4 x float> %1, <4 x float>* %X, align 16
   ret void
@@ -48,7 +48,7 @@ define void @test_cos(<4 x float>* %X) nounwind {
 ; CHECK:      vst1.64
 
 L.entry:
-  %0 = load <4 x float>* @A, align 16
+  %0 = load <4 x float>, <4 x float>* @A, align 16
   %1 = call <4 x float> @llvm.cos.v4f32(<4 x float> %0)
   store <4 x float> %1, <4 x float>* %X, align 16
   ret void
@@ -79,7 +79,7 @@ define void @test_exp(<4 x float>* %X) nounwind {
 ; CHECK:      vst1.64
 
 L.entry:
-  %0 = load <4 x float>* @A, align 16
+  %0 = load <4 x float>, <4 x float>* @A, align 16
   %1 = call <4 x float> @llvm.exp.v4f32(<4 x float> %0)
   store <4 x float> %1, <4 x float>* %X, align 16
   ret void
@@ -110,7 +110,7 @@ define void @test_exp2(<4 x float>* %X) nounwind {
 ; CHECK:      vst1.64
 
 L.entry:
-  %0 = load <4 x float>* @A, align 16
+  %0 = load <4 x float>, <4 x float>* @A, align 16
   %1 = call <4 x float> @llvm.exp2.v4f32(<4 x float> %0)
   store <4 x float> %1, <4 x float>* %X, align 16
   ret void
@@ -141,7 +141,7 @@ define void @test_log10(<4 x float>* %X) nounwind {
 ; CHECK:      vst1.64
 
 L.entry:
-  %0 = load <4 x float>* @A, align 16
+  %0 = load <4 x float>, <4 x float>* @A, align 16
   %1 = call <4 x float> @llvm.log10.v4f32(<4 x float> %0)
   store <4 x float> %1, <4 x float>* %X, align 16
   ret void
@@ -172,7 +172,7 @@ define void @test_log(<4 x float>* %X) nounwind {
 ; CHECK:      vst1.64
 
 L.entry:
-  %0 = load <4 x float>* @A, align 16
+  %0 = load <4 x float>, <4 x float>* @A, align 16
   %1 = call <4 x float> @llvm.log.v4f32(<4 x float> %0)
   store <4 x float> %1, <4 x float>* %X, align 16
   ret void
@@ -203,7 +203,7 @@ define void @test_log2(<4 x float>* %X) nounwind {
 ; CHECK:      vst1.64
 
 L.entry:
-  %0 = load <4 x float>* @A, align 16
+  %0 = load <4 x float>, <4 x float>* @A, align 16
   %1 = call <4 x float> @llvm.log2.v4f32(<4 x float> %0)
   store <4 x float> %1, <4 x float>* %X, align 16
   ret void
@@ -236,7 +236,7 @@ define void @test_pow(<4 x float>* %X) nounwind {
 
 L.entry:
 
-  %0 = load <4 x float>* @A, align 16
+  %0 = load <4 x float>, <4 x float>* @A, align 16
   %1 = call <4 x float> @llvm.pow.v4f32(<4 x float> %0, <4 x float> <float 2., float 2., float 2., float 2.>)
 
   store <4 x float> %1, <4 x float>* %X, align 16
@@ -259,7 +259,7 @@ define void @test_powi(<4 x float>* %X) nounwind {
 
 L.entry:
 
-  %0 = load <4 x float>* @A, align 16
+  %0 = load <4 x float>, <4 x float>* @A, align 16
   %1 = call <4 x float> @llvm.powi.v4f32(<4 x float> %0, i32 2)
 
   store <4 x float> %1, <4 x float>* %X, align 16
@@ -292,7 +292,7 @@ define void @test_sin(<4 x float>* %X) nounwind {
 ; CHECK:      vst1.64
 
 L.entry:
-  %0 = load <4 x float>* @A, align 16
+  %0 = load <4 x float>, <4 x float>* @A, align 16
   %1 = call <4 x float> @llvm.sin.v4f32(<4 x float> %0)
   store <4 x float> %1, <4 x float>* %X, align 16
   ret void
@@ -323,7 +323,7 @@ define void @test_floor(<4 x float>* %X) nounwind {
 ; CHECK:      vst1.64
 
 L.entry:
-  %0 = load <4 x float>* @A, align 16
+  %0 = load <4 x float>, <4 x float>* @A, align 16
   %1 = call <4 x float> @llvm.floor.v4f32(<4 x float> %0)
   store <4 x float> %1, <4 x float>* %X, align 16
   ret void
diff --git a/llvm/test/CodeGen/ARM/2011-11-30-MergeAlignment.ll b/llvm/test/CodeGen/ARM/2011-11-30-MergeAlignment.ll
index 0c90f4cf949..0d324404d7b 100644
--- a/llvm/test/CodeGen/ARM/2011-11-30-MergeAlignment.ll
+++ b/llvm/test/CodeGen/ARM/2011-11-30-MergeAlignment.ll
@@ -8,9 +8,9 @@ target triple = "thumbv7-apple-darwin10"
 @x2 = internal global i64 12
 
 define i64 @f() {
-  %ax = load i32* @x1
+  %ax = load i32, i32* @x1
   %a = zext i32 %ax to i64
-  %b = load i64* @x2
+  %b = load i64, i64* @x2
   %c = add i64 %a, %b
   ret i64 %c
 }
diff --git a/llvm/test/CodeGen/ARM/2011-12-14-machine-sink.ll b/llvm/test/CodeGen/ARM/2011-12-14-machine-sink.ll
index 9334bf36d80..47b2260d41f 100644
--- a/llvm/test/CodeGen/ARM/2011-12-14-machine-sink.ll
+++ b/llvm/test/CodeGen/ARM/2011-12-14-machine-sink.ll
@@ -15,13 +15,13 @@ for.cond:                                         ; preds = %for.body, %entry
 
 for.body:                                         ; preds = %for.cond
   %v.5 = select i1 undef, i32 undef, i32 0
-  %0 = load i8* undef, align 1
+  %0 = load i8, i8* undef, align 1
   %conv88 = zext i8 %0 to i32
   %sub89 = sub nsw i32 0, %conv88
   %v.8 = select i1 undef, i32 undef, i32 %sub89
-  %1 = load i8* null, align 1
+  %1 = load i8, i8* null, align 1
   %conv108 = zext i8 %1 to i32
-  %2 = load i8* undef, align 1
+  %2 = load i8, i8* undef, align 1
   %conv110 = zext i8 %2 to i32
   %sub111 = sub nsw i32 %conv108, %conv110
   %cmp112 = icmp slt i32 %sub111, 0
diff --git a/llvm/test/CodeGen/ARM/2011-12-19-sjlj-clobber.ll b/llvm/test/CodeGen/ARM/2011-12-19-sjlj-clobber.ll
index ddb76326947..40d1f628aaa 100644
--- a/llvm/test/CodeGen/ARM/2011-12-19-sjlj-clobber.ll
+++ b/llvm/test/CodeGen/ARM/2011-12-19-sjlj-clobber.ll
@@ -22,13 +22,13 @@ bb:
   store i32 %b, i32* %tmp1, align 4
   store i8* %d, i8** %tmp2, align 4
   store i1 false, i1* %tmp3
-  %tmp7 = load i8** %c
+  %tmp7 = load i8*, i8** %c
   %tmp10 = invoke %0* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %0* (i8*, i8*, %0*)*)(i8* %tmp7, i8* %d, %0* null)
           to label %bb11 unwind label %bb15
 
 bb11:                                             ; preds = %bb
   store %0* %tmp10, %0** %myException, align 4
-  %tmp12 = load %0** %myException, align 4
+  %tmp12 = load %0*, %0** %myException, align 4
   %tmp13 = bitcast %0* %tmp12 to i8*
   invoke void @objc_exception_throw(i8* %tmp13) noreturn
           to label %bb14 unwind label %bb15
diff --git a/llvm/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll b/llvm/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll
index 0f1c452b867..3f827f8e702 100644
--- a/llvm/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll
+++ b/llvm/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll
@@ -18,7 +18,7 @@ bb3:                                              ; preds = %bb4, %bb2
   br i1 %tmp, label %bb4, label %bb67
 
 bb4:                                              ; preds = %bb3
-  %tmp5 = load <4 x i32>* undef, align 16
+  %tmp5 = load <4 x i32>, <4 x i32>* undef, align 16
   %tmp6 = and <4 x i32> %tmp5, <i32 8388607, i32 8388607, i32 8388607, i32 8388607>
   %tmp7 = or <4 x i32> %tmp6, <i32 1065353216, i32 1065353216, i32 1065353216, i32 1065353216>
   %tmp8 = bitcast <4 x i32> %tmp7 to <4 x float>
@@ -41,9 +41,9 @@ bb4:                                              ; preds = %bb3
   %tmp24 = trunc i128 %tmp23 to i64
   %tmp25 = insertvalue [2 x i64] undef, i64 %tmp24, 0
   %tmp26 = insertvalue [2 x i64] %tmp25, i64 0, 1
-  %tmp27 = load float* undef, align 4
+  %tmp27 = load float, float* undef, align 4
   %tmp28 = insertelement <4 x float> undef, float %tmp27, i32 3
-  %tmp29 = load <4 x i32>* undef, align 16
+  %tmp29 = load <4 x i32>, <4 x i32>* undef, align 16
   %tmp30 = and <4 x i32> %tmp29, <i32 8388607, i32 8388607, i32 8388607, i32 8388607>
   %tmp31 = or <4 x i32> %tmp30, <i32 1065353216, i32 1065353216, i32 1065353216, i32 1065353216>
   %tmp32 = bitcast <4 x i32> %tmp31 to <4 x float>
@@ -52,10 +52,10 @@ bb4:                                              ; preds = %bb3
   %tmp35 = fmul <4 x float> %tmp34, undef
   %tmp36 = fmul <4 x float> %tmp35, undef
   %tmp37 = call arm_aapcs_vfpcc  i8* undef(i8* undef) nounwind
-  %tmp38 = load float* undef, align 4
+  %tmp38 = load float, float* undef, align 4
   %tmp39 = insertelement <2 x float> undef, float %tmp38, i32 0
   %tmp40 = call arm_aapcs_vfpcc  i8* undef(i8* undef) nounwind
-  %tmp41 = load float* undef, align 4
+  %tmp41 = load float, float* undef, align 4
   %tmp42 = insertelement <4 x float> undef, float %tmp41, i32 3
   %tmp43 = shufflevector <2 x float> %tmp39, <2 x float> undef, <4 x i32> zeroinitializer
   %tmp44 = fmul <4 x float> %tmp33, %tmp43
@@ -64,10 +64,10 @@ bb4:                                              ; preds = %bb3
   %tmp47 = fmul <4 x float> %tmp46, %tmp36
   %tmp48 = fadd <4 x float> undef, %tmp47
   %tmp49 = call arm_aapcs_vfpcc  i8* undef(i8* undef) nounwind
-  %tmp50 = load float* undef, align 4
+  %tmp50 = load float, float* undef, align 4
   %tmp51 = insertelement <4 x float> undef, float %tmp50, i32 3
   %tmp52 = call arm_aapcs_vfpcc float* null(i8* undef) nounwind
-  %tmp54 = load float* %tmp52, align 4
+  %tmp54 = load float, float* %tmp52, align 4
   %tmp55 = insertelement <4 x float> undef, float %tmp54, i32 3
   %tmp56 = fsub <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %tmp22
   %tmp57 = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %tmp56, <4 x float> %tmp55) nounwind
diff --git a/llvm/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll b/llvm/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll
index 61623ec1b6a..b70b7f6f3b2 100644
--- a/llvm/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll
+++ b/llvm/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll
@@ -7,7 +7,7 @@ target triple = "armv7-none-linux-eabi"
 ; This test case is exercising REG_SEQUENCE, and chains of REG_SEQUENCE.
 define arm_aapcs_vfpcc void @foo(i8* nocapture %arg, i8* %arg1) nounwind align 2 {
 bb:
-  %tmp = load <2 x float>* undef, align 8
+  %tmp = load <2 x float>, <2 x float>* undef, align 8
   %tmp2 = extractelement <2 x float> %tmp, i32 0
   %tmp3 = insertelement <4 x float> undef, float %tmp2, i32 0
   %tmp4 = insertelement <4 x float> %tmp3, float 0.000000e+00, i32 1
diff --git a/llvm/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll b/llvm/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll
index a9e2ebb7fe1..2484f0d42ed 100644
--- a/llvm/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll
+++ b/llvm/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll
@@ -56,9 +56,9 @@ bb3:                                              ; preds = %bb2
   %tmp39 = shufflevector <2 x i64> %tmp38, <2 x i64> undef, <1 x i32> zeroinitializer
   %tmp40 = bitcast <1 x i64> %tmp39 to <2 x float>
   %tmp41 = shufflevector <2 x float> %tmp40, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp42 = load <4 x float>* null, align 16
+  %tmp42 = load <4 x float>, <4 x float>* null, align 16
   %tmp43 = fmul <4 x float> %tmp42, %tmp41
-  %tmp44 = load <4 x float>* undef, align 16
+  %tmp44 = load <4 x float>, <4 x float>* undef, align 16
   %tmp45 = fadd <4 x float> undef, %tmp43
   %tmp46 = fadd <4 x float> undef, %tmp45
   %tmp47 = bitcast <4 x float> %tmp36 to <2 x i64>
@@ -76,7 +76,7 @@ bb3:                                              ; preds = %bb2
   %tmp59 = fmul <4 x float> undef, %tmp58
   %tmp60 = fadd <4 x float> %tmp59, undef
   %tmp61 = fadd <4 x float> %tmp60, zeroinitializer
-  %tmp62 = load void (i8*, i8*)** undef, align 4
+  %tmp62 = load void (i8*, i8*)*, void (i8*, i8*)** undef, align 4
   call arm_aapcs_vfpcc  void %tmp62(i8* sret undef, i8* undef) nounwind
   %tmp63 = bitcast <4 x float> %tmp46 to i128
   %tmp64 = bitcast <4 x float> %tmp54 to i128
@@ -96,7 +96,7 @@ bb3:                                              ; preds = %bb2
   call arm_aapcs_vfpcc  void @bar(i8* sret null, [8 x i64] %tmp77) nounwind
   %tmp78 = call arm_aapcs_vfpcc  i8* null(i8* null) nounwind
   %tmp79 = bitcast i8* %tmp78 to i512*
-  %tmp80 = load i512* %tmp79, align 16
+  %tmp80 = load i512, i512* %tmp79, align 16
   %tmp81 = lshr i512 %tmp80, 128
   %tmp82 = trunc i512 %tmp80 to i128
   %tmp83 = trunc i512 %tmp81 to i128
diff --git a/llvm/test/CodeGen/ARM/2012-02-01-CoalescerBug.ll b/llvm/test/CodeGen/ARM/2012-02-01-CoalescerBug.ll
index 6c7aaad7c69..2495b306ab0 100644
--- a/llvm/test/CodeGen/ARM/2012-02-01-CoalescerBug.ll
+++ b/llvm/test/CodeGen/ARM/2012-02-01-CoalescerBug.ll
@@ -10,7 +10,7 @@ target triple = "armv7-none-linux-gnueabi"
 @foo = external global %0, align 16
 
 define arm_aapcs_vfpcc void @bar(float, i1 zeroext, i1 zeroext) nounwind {
-  %4 = load <4 x float>* getelementptr inbounds (%0* @foo, i32 0, i32 0), align 16
+  %4 = load <4 x float>, <4 x float>* getelementptr inbounds (%0* @foo, i32 0, i32 0), align 16
   %5 = extractelement <4 x float> %4, i32 0
   %6 = extractelement <4 x float> %4, i32 1
   %7 = extractelement <4 x float> %4, i32 2
diff --git a/llvm/test/CodeGen/ARM/2012-03-13-DAGCombineBug.ll b/llvm/test/CodeGen/ARM/2012-03-13-DAGCombineBug.ll
index 6206cd74d58..6fb760c4bcc 100644
--- a/llvm/test/CodeGen/ARM/2012-03-13-DAGCombineBug.ll
+++ b/llvm/test/CodeGen/ARM/2012-03-13-DAGCombineBug.ll
@@ -7,7 +7,7 @@
 define void @test_hi_short3(<3 x i16> * nocapture %srcA, <2 x i16> * nocapture %dst) nounwind {
 entry:
 ; CHECK: vst1.32
-  %0 = load <3 x i16> * %srcA, align 8
+  %0 = load <3 x i16> , <3 x i16> * %srcA, align 8
   %1 = shufflevector <3 x i16> %0, <3 x i16> undef, <2 x i32> <i32 2, i32 undef>
   store <2 x i16> %1, <2 x i16> * %dst, align 4
   ret void
diff --git a/llvm/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll b/llvm/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll
index a288015d601..6f92613fa1f 100644
--- a/llvm/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll
+++ b/llvm/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll
@@ -27,13 +27,13 @@
 define i32 @f1(i32* nocapture %p1, i32* nocapture %p2) nounwind {
 entry:
   store volatile i32 65540, i32* %p1, align 4
-  %0 = load volatile i32* %p2, align 4
+  %0 = load volatile i32, i32* %p2, align 4
   ret i32 %0
 }
 
 define i32 @f2(i32* nocapture %p1, i32* nocapture %p2) nounwind {
 entry:
   store i32 65540, i32* %p1, align 4
-  %0 = load i32* %p2, align 4
+  %0 = load i32, i32* %p2, align 4
   ret i32 %0
 }
diff --git a/llvm/test/CodeGen/ARM/2012-08-04-DtripleSpillReload.ll b/llvm/test/CodeGen/ARM/2012-08-04-DtripleSpillReload.ll
index 70e30793455..6e0b828ad24 100644
--- a/llvm/test/CodeGen/ARM/2012-08-04-DtripleSpillReload.ll
+++ b/llvm/test/CodeGen/ARM/2012-08-04-DtripleSpillReload.ll
@@ -129,7 +129,7 @@ define arm_aapcs_vfpcc void @foo(float, i1 zeroext, i1 zeroext) nounwind uwtable
   %45 = fmul <4 x float> undef, undef
   %46 = fmul <4 x float> %45, %43
   %47 = fmul <4 x float> undef, %44
-  %48 = load <4 x float>* undef, align 8
+  %48 = load <4 x float>, <4 x float>* undef, align 8
   %49 = bitcast <4 x float> %48 to <2 x i64>
   %50 = shufflevector <2 x i64> %49, <2 x i64> undef, <1 x i32> <i32 1>
   %51 = bitcast <1 x i64> %50 to <2 x float>
diff --git a/llvm/test/CodeGen/ARM/2012-08-08-legalize-unaligned.ll b/llvm/test/CodeGen/ARM/2012-08-08-legalize-unaligned.ll
index bdcd1b6ad4b..576dff4d001 100644
--- a/llvm/test/CodeGen/ARM/2012-08-08-legalize-unaligned.ll
+++ b/llvm/test/CodeGen/ARM/2012-08-08-legalize-unaligned.ll
@@ -6,7 +6,7 @@ target triple = "armv7-none-linux-gnueabi"
 
 define void @test_hi_char8() noinline {
 entry:
-  %0 = load <4 x i8>* undef, align 1
+  %0 = load <4 x i8>, <4 x i8>* undef, align 1
   store <4 x i8> %0, <4 x i8>* null, align 4
   ret void
 }
diff --git a/llvm/test/CodeGen/ARM/2012-08-09-neon-extload.ll b/llvm/test/CodeGen/ARM/2012-08-09-neon-extload.ll
index a7108253cb6..285a431a6ec 100644
--- a/llvm/test/CodeGen/ARM/2012-08-09-neon-extload.ll
+++ b/llvm/test/CodeGen/ARM/2012-08-09-neon-extload.ll
@@ -14,7 +14,7 @@
 define void @test_v2i8tov2i32() {
 ; CHECK-LABEL: test_v2i8tov2i32:
 
-  %i8val = load <2 x i8>* @var_v2i8
+  %i8val = load <2 x i8>, <2 x i8>* @var_v2i8
 
   %i32val = sext <2 x i8> %i8val to <2 x i32>
   store <2 x i32> %i32val, <2 x i32>* @var_v2i32
@@ -28,7 +28,7 @@ define void @test_v2i8tov2i32() {
 define void @test_v2i8tov2i64() {
 ; CHECK-LABEL: test_v2i8tov2i64:
 
-  %i8val = load <2 x i8>* @var_v2i8
+  %i8val = load <2 x i8>, <2 x i8>* @var_v2i8
 
   %i64val = sext <2 x i8> %i8val to <2 x i64>
   store <2 x i64> %i64val, <2 x i64>* @var_v2i64
@@ -46,7 +46,7 @@ define void @test_v2i8tov2i64() {
 define void @test_v4i8tov4i16() {
 ; CHECK-LABEL: test_v4i8tov4i16:
 
-  %i8val = load <4 x i8>* @var_v4i8
+  %i8val = load <4 x i8>, <4 x i8>* @var_v4i8
 
   %i16val = sext <4 x i8> %i8val to <4 x i16>
   store <4 x i16> %i16val, <4 x i16>* @var_v4i16
@@ -61,7 +61,7 @@ define void @test_v4i8tov4i16() {
 define void @test_v4i8tov4i32() {
 ; CHECK-LABEL: test_v4i8tov4i32:
 
-  %i8val = load <4 x i8>* @var_v4i8
+  %i8val = load <4 x i8>, <4 x i8>* @var_v4i8
 
   %i16val = sext <4 x i8> %i8val to <4 x i32>
   store <4 x i32> %i16val, <4 x i32>* @var_v4i32
@@ -75,7 +75,7 @@ define void @test_v4i8tov4i32() {
 define void @test_v2i16tov2i32() {
 ; CHECK-LABEL: test_v2i16tov2i32:
 
-  %i16val = load <2 x i16>* @var_v2i16
+  %i16val = load <2 x i16>, <2 x i16>* @var_v2i16
 
   %i32val = sext <2 x i16> %i16val to <2 x i32>
   store <2 x i32> %i32val, <2 x i32>* @var_v2i32
@@ -90,7 +90,7 @@ define void @test_v2i16tov2i32() {
 define void @test_v2i16tov2i64() {
 ; CHECK-LABEL: test_v2i16tov2i64:
 
-  %i16val = load <2 x i16>* @var_v2i16
+  %i16val = load <2 x i16>, <2 x i16>* @var_v2i16
 
   %i64val = sext <2 x i16> %i16val to <2 x i64>
   store <2 x i64> %i64val, <2 x i64>* @var_v2i64
diff --git a/llvm/test/CodeGen/ARM/2012-08-23-legalize-vmull.ll b/llvm/test/CodeGen/ARM/2012-08-23-legalize-vmull.ll
index e8d4fb22a59..3a851d68f0a 100644
--- a/llvm/test/CodeGen/ARM/2012-08-23-legalize-vmull.ll
+++ b/llvm/test/CodeGen/ARM/2012-08-23-legalize-vmull.ll
@@ -15,7 +15,7 @@
 define void @sextload_v4i8_c(<4 x i8>* %v) nounwind {
 ;CHECK-LABEL: sextload_v4i8_c:
 entry:
-  %0 = load <4 x i8>* %v, align 8
+  %0 = load <4 x i8>, <4 x i8>* %v, align 8
   %v0  = sext <4 x i8> %0 to <4 x i32>
 ;CHECK: vmull
   %v1 = mul <4 x i32>  %v0, <i32 3, i32 3, i32 3, i32 3>
@@ -28,7 +28,7 @@ entry:
 define void @sextload_v2i8_c(<2 x i8>* %v) nounwind {
 ;CHECK-LABEL: sextload_v2i8_c:
 entry:
-  %0   = load <2 x i8>* %v, align 8
+  %0   = load <2 x i8>, <2 x i8>* %v, align 8
   %v0  = sext <2 x i8>  %0 to <2 x i64>
 ;CHECK: vmull
   %v1  = mul <2 x i64>  %v0, <i64 3, i64 3>
@@ -41,7 +41,7 @@ entry:
 define void @sextload_v2i16_c(<2 x i16>* %v) nounwind {
 ;CHECK-LABEL: sextload_v2i16_c:
 entry:
-  %0   = load <2 x i16>* %v, align 8
+  %0   = load <2 x i16>, <2 x i16>* %v, align 8
   %v0  = sext <2 x i16>  %0 to <2 x i64>
 ;CHECK: vmull
   %v1  = mul <2 x i64>  %v0, <i64 3, i64 3>
@@ -56,10 +56,10 @@ entry:
 define void @sextload_v4i8_v(<4 x i8>* %v, <4 x i8>* %p) nounwind {
 ;CHECK-LABEL: sextload_v4i8_v:
 entry:
-  %0 = load <4 x i8>* %v, align 8
+  %0 = load <4 x i8>, <4 x i8>* %v, align 8
   %v0  = sext <4 x i8> %0 to <4 x i32>
 
-  %1  = load <4 x i8>* %p, align 8
+  %1  = load <4 x i8>, <4 x i8>* %p, align 8
   %v2 = sext <4 x i8> %1 to <4 x i32>
 ;CHECK: vmull
   %v1 = mul <4 x i32>  %v0, %v2
@@ -72,10 +72,10 @@ entry:
 define void @sextload_v2i8_v(<2 x i8>* %v, <2 x i8>* %p) nounwind {
 ;CHECK-LABEL: sextload_v2i8_v:
 entry:
-  %0 = load <2 x i8>* %v, align 8
+  %0 = load <2 x i8>, <2 x i8>* %v, align 8
   %v0  = sext <2 x i8> %0 to <2 x i64>
 
-  %1  = load <2 x i8>* %p, align 8
+  %1  = load <2 x i8>, <2 x i8>* %p, align 8
   %v2 = sext <2 x i8> %1 to <2 x i64>
 ;CHECK: vmull
   %v1 = mul <2 x i64>  %v0, %v2
@@ -88,10 +88,10 @@ entry:
 define void @sextload_v2i16_v(<2 x i16>* %v, <2 x i16>* %p) nounwind {
 ;CHECK-LABEL: sextload_v2i16_v:
 entry:
-  %0 = load <2 x i16>* %v, align 8
+  %0 = load <2 x i16>, <2 x i16>* %v, align 8
   %v0  = sext <2 x i16> %0 to <2 x i64>
 
-  %1  = load <2 x i16>* %p, align 8
+  %1  = load <2 x i16>, <2 x i16>* %p, align 8
   %v2 = sext <2 x i16> %1 to <2 x i64>
 ;CHECK: vmull
   %v1 = mul <2 x i64>  %v0, %v2
@@ -106,10 +106,10 @@ entry:
 define void @sextload_v4i8_vs(<4 x i8>* %v, <4 x i16>* %p) nounwind {
 ;CHECK-LABEL: sextload_v4i8_vs:
 entry:
-  %0 = load <4 x i8>* %v, align 8
+  %0 = load <4 x i8>, <4 x i8>* %v, align 8
   %v0  = sext <4 x i8> %0 to <4 x i32>
 
-  %1  = load <4 x i16>* %p, align 8
+  %1  = load <4 x i16>, <4 x i16>* %p, align 8
   %v2 = sext <4 x i16> %1 to <4 x i32>
 ;CHECK: vmull
   %v1 = mul <4 x i32>  %v0, %v2
@@ -122,10 +122,10 @@ entry:
 define void @sextload_v2i8_vs(<2 x i8>* %v, <2 x i16>* %p) nounwind {
 ;CHECK-LABEL: sextload_v2i8_vs:
 entry:
-  %0 = load <2 x i8>* %v, align 8
+  %0 = load <2 x i8>, <2 x i8>* %v, align 8
   %v0  = sext <2 x i8> %0 to <2 x i64>
 
-  %1  = load <2 x i16>* %p, align 8
+  %1  = load <2 x i16>, <2 x i16>* %p, align 8
   %v2 = sext <2 x i16> %1 to <2 x i64>
 ;CHECK: vmull
   %v1 = mul <2 x i64>  %v0, %v2
@@ -138,10 +138,10 @@ entry:
 define void @sextload_v2i16_vs(<2 x i16>* %v, <2 x i32>* %p) nounwind {
 ;CHECK-LABEL: sextload_v2i16_vs:
 entry:
-  %0 = load <2 x i16>* %v, align 8
+  %0 = load <2 x i16>, <2 x i16>* %v, align 8
   %v0  = sext <2 x i16> %0 to <2 x i64>
 
-  %1  = load <2 x i32>* %p, align 8
+  %1  = load <2 x i32>, <2 x i32>* %p, align 8
   %v2 = sext <2 x i32> %1 to <2 x i64>
 ;CHECK: vmull
   %v1 = mul <2 x i64>  %v0, %v2
diff --git a/llvm/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll b/llvm/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll
index bfbd07889ee..b5bdc1b9dfa 100644
--- a/llvm/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll
+++ b/llvm/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll
@@ -45,7 +45,7 @@ declare void @f(double);
 define void @test_byval_8_bytes_alignment_fixed_arg(i32 %n1, %struct_t* byval %val) nounwind {
 entry:
   %a = getelementptr inbounds %struct_t, %struct_t* %val, i32 0, i32 0
-  %0 = load double* %a
+  %0 = load double, double* %a
   call void (double)* @f(double %0)
   ret void
 }
diff --git a/llvm/test/CodeGen/ARM/2012-10-04-FixedFrame-vs-byval.ll b/llvm/test/CodeGen/ARM/2012-10-04-FixedFrame-vs-byval.ll
index 694025af0d7..203ba4db361 100644
--- a/llvm/test/CodeGen/ARM/2012-10-04-FixedFrame-vs-byval.ll
+++ b/llvm/test/CodeGen/ARM/2012-10-04-FixedFrame-vs-byval.ll
@@ -13,7 +13,7 @@ declare i32 @printf(i8*, ...)
 define void @test_byval_usage_scheduling(i32 %n1, i32 %n2, %struct_t* byval %val) nounwind {
 entry:
   %a = getelementptr inbounds %struct_t, %struct_t* %val, i32 0, i32 0
-  %0 = load double* %a
+  %0 = load double, double* %a
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 x i8]* @.str, i32 0, i32 0), double %0)
   ret void
 }
diff --git a/llvm/test/CodeGen/ARM/2013-01-21-PR14992.ll b/llvm/test/CodeGen/ARM/2013-01-21-PR14992.ll
index f6cd742efd2..4c1f2a741e4 100644
--- a/llvm/test/CodeGen/ARM/2013-01-21-PR14992.ll
+++ b/llvm/test/CodeGen/ARM/2013-01-21-PR14992.ll
@@ -6,11 +6,11 @@
 ;CHECK-LABEL: foo:
 define i32 @foo(i32* %a) nounwind optsize {
 entry:
-  %0 = load i32* %a, align 4
+  %0 = load i32, i32* %a, align 4
   %arrayidx1 = getelementptr inbounds i32, i32* %a, i32 1
-  %1 = load i32* %arrayidx1, align 4
+  %1 = load i32, i32* %arrayidx1, align 4
   %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 2
-  %2 = load i32* %arrayidx2, align 4
+  %2 = load i32, i32* %arrayidx2, align 4
   %add.ptr = getelementptr inbounds i32, i32* %a, i32 3
 ;Make sure we do not have a duplicated register in the front of the reg list
 ;EXPECTED:  ldm [[BASE:r[0-9]+]]!, {[[REG:r[0-9]+]], {{r[0-9]+}},
diff --git a/llvm/test/CodeGen/ARM/2013-04-18-load-overlap-PR14824.ll b/llvm/test/CodeGen/ARM/2013-04-18-load-overlap-PR14824.ll
index 9378e4c790a..6c8b0ff2de1 100644
--- a/llvm/test/CodeGen/ARM/2013-04-18-load-overlap-PR14824.ll
+++ b/llvm/test/CodeGen/ARM/2013-04-18-load-overlap-PR14824.ll
@@ -9,9 +9,9 @@ define void @sample_test(<8 x i64> * %secondSource, <8 x i64> * %source, <8 x i6
 entry:
 
 ; Load %source
-  %s0 = load <8 x i64> * %source, align 64
+  %s0 = load <8 x i64> , <8 x i64> * %source, align 64
   %arrayidx64 = getelementptr inbounds <8 x i64>, <8 x i64> * %source, i32 6
-  %s120 = load <8 x i64> * %arrayidx64, align 64
+  %s120 = load <8 x i64> , <8 x i64> * %arrayidx64, align 64
   %s122 = bitcast <8 x i64> %s120 to i512
   %data.i.i677.48.extract.shift = lshr i512 %s122, 384
   %data.i.i677.48.extract.trunc = trunc i512 %data.i.i677.48.extract.shift to i64
@@ -33,9 +33,9 @@ entry:
   %s130 = insertelement <8 x i64> %s129, i64 %data.i.i677.56.extract.trunc, i32 7
 
 ; Load %secondSource
-  %s1 = load <8 x i64> * %secondSource, align 64
+  %s1 = load <8 x i64> , <8 x i64> * %secondSource, align 64
   %arrayidx67 = getelementptr inbounds <8 x i64>, <8 x i64> * %secondSource, i32 6
-  %s121 = load <8 x i64> * %arrayidx67, align 64
+  %s121 = load <8 x i64> , <8 x i64> * %arrayidx67, align 64
   %s131 = bitcast <8 x i64> %s121 to i512
   %data.i1.i676.48.extract.shift = lshr i512 %s131, 384
   %data.i1.i676.48.extract.trunc = trunc i512 %data.i1.i676.48.extract.shift to i64
@@ -64,7 +64,7 @@ entry:
   %arrayidx72 = getelementptr inbounds <8 x i64>, <8 x i64> * %dest, i32 6
   store <8 x i64> %vecinit49.i.i702, <8 x i64> * %arrayidx72, align 64
   %arrayidx78 = getelementptr inbounds <8 x i64>, <8 x i64> * %secondSource, i32 7
-  %s141 = load <8 x i64> * %arrayidx78, align 64
+  %s141 = load <8 x i64> , <8 x i64> * %arrayidx78, align 64
   %s151 = bitcast <8 x i64> %s141 to i512
   %data.i1.i649.32.extract.shift = lshr i512 %s151, 256
   %data.i1.i649.32.extract.trunc = trunc i512 %data.i1.i649.32.extract.shift to i64
diff --git a/llvm/test/CodeGen/ARM/2013-05-07-ByteLoadSameAddress.ll b/llvm/test/CodeGen/ARM/2013-05-07-ByteLoadSameAddress.ll
index 63605ed3e04..aabbfae8b87 100644
--- a/llvm/test/CodeGen/ARM/2013-05-07-ByteLoadSameAddress.ll
+++ b/llvm/test/CodeGen/ARM/2013-05-07-ByteLoadSameAddress.ll
@@ -8,45 +8,45 @@ entry:
         %sub19 = sub i32 %add, %Width
         %sub20 = add i32 %sub19, -1
         %arrayidx21 = getelementptr inbounds i8, i8* %call1, i32 %sub20
-        %0 = load i8* %arrayidx21, align 1
+        %0 = load i8, i8* %arrayidx21, align 1
         %conv22 = zext i8 %0 to i32
         %arrayidx25 = getelementptr inbounds i8, i8* %call1, i32 %sub19
-        %1 = load i8* %arrayidx25, align 1
+        %1 = load i8, i8* %arrayidx25, align 1
         %conv26 = zext i8 %1 to i32
         %mul23189 = add i32 %conv26, %conv22
         %add30 = add i32 %sub19, 1
         %arrayidx31 = getelementptr inbounds i8, i8* %call1, i32 %add30
-        %2 = load i8* %arrayidx31, align 1
+        %2 = load i8, i8* %arrayidx31, align 1
         %conv32 = zext i8 %2 to i32
 ; CHECK: ldrb r{{[0-9]*}}, [r{{[0-9]*}}, #-1]
 ; CHECK-NEXT: ldrb r{{[0-9]*}}, [r{{[0-9]*}}, #1]
         %add28190 = add i32 %mul23189, %conv32
         %sub35 = add i32 %add, -1
         %arrayidx36 = getelementptr inbounds i8, i8* %call1, i32 %sub35
-        %3 = load i8* %arrayidx36, align 1
+        %3 = load i8, i8* %arrayidx36, align 1
         %conv37 = zext i8 %3 to i32
         %add34191 = add i32 %add28190, %conv37
         %arrayidx40 = getelementptr inbounds i8, i8* %call1, i32 %add
-        %4 = load i8* %arrayidx40, align 1
+        %4 = load i8, i8* %arrayidx40, align 1
         %conv41 = zext i8 %4 to i32
         %mul42 = mul nsw i32 %conv41, 255
         %add44 = add i32 %add, 1
         %arrayidx45 = getelementptr inbounds i8, i8* %call1, i32 %add44
-        %5 = load i8* %arrayidx45, align 1
+        %5 = load i8, i8* %arrayidx45, align 1
         %conv46 = zext i8 %5 to i32
 ; CHECK: ldrb r{{[0-9]*}}, [r{{[0-9]*}}, #-1]
 ; CHECK-NEXT: ldrb r{{[0-9]*}}, [r{{[0-9]*}}, #1]
         %add49 = add i32 %add, %Width
         %sub50 = add i32 %add49, -1
         %arrayidx51 = getelementptr inbounds i8, i8* %call1, i32 %sub50
-        %6 = load i8* %arrayidx51, align 1
+        %6 = load i8, i8* %arrayidx51, align 1
         %conv52 = zext i8 %6 to i32
         %arrayidx56 = getelementptr inbounds i8, i8* %call1, i32 %add49
-        %7 = load i8* %arrayidx56, align 1
+        %7 = load i8, i8* %arrayidx56, align 1
         %conv57 = zext i8 %7 to i32
         %add61 = add i32 %add49, 1
         %arrayidx62 = getelementptr inbounds i8, i8* %call1, i32 %add61
-        %8 = load i8* %arrayidx62, align 1
+        %8 = load i8, i8* %arrayidx62, align 1
         %conv63 = zext i8 %8 to i32
 ; CHECK: ldrb r{{[0-9]*}}, [r{{[0-9]*}}, #-1]
 ; CHECK-NEXT: ldrb{{[.w]*}} r{{[0-9]*}}, [r{{[0-9]*}}, #1]
diff --git a/llvm/test/CodeGen/ARM/2013-05-31-char-shift-crash.ll b/llvm/test/CodeGen/ARM/2013-05-31-char-shift-crash.ll
index 0130f7ab68f..617271264b4 100644
--- a/llvm/test/CodeGen/ARM/2013-05-31-char-shift-crash.ll
+++ b/llvm/test/CodeGen/ARM/2013-05-31-char-shift-crash.ll
@@ -10,7 +10,7 @@ define arm_aapcscc void @f2(i8 signext %a) #0 {
 entry:
   %a.addr = alloca i8, align 1
   store i8 %a, i8* %a.addr, align 1
-  %0 = load i8* %a.addr, align 1
+  %0 = load i8, i8* %a.addr, align 1
   %conv = sext i8 %0 to i32
   %shr = ashr i32 %conv, 56
   %conv1 = trunc i32 %shr to i8
diff --git a/llvm/test/CodeGen/ARM/2013-07-29-vector-or-combine.ll b/llvm/test/CodeGen/ARM/2013-07-29-vector-or-combine.ll
index 05a4ef05e95..979df3072fb 100644
--- a/llvm/test/CodeGen/ARM/2013-07-29-vector-or-combine.ll
+++ b/llvm/test/CodeGen/ARM/2013-07-29-vector-or-combine.ll
@@ -17,7 +17,7 @@ entry:
 ; CHECK: vorr q9, q9, q10
 ; CHECK: vst1.32 {d18, d19}, [r0]
 vector.body:
-  %wide.load = load <4 x i32>* undef, align 4
+  %wide.load = load <4 x i32>, <4 x i32>* undef, align 4
   %0 = and <4 x i32> %wide.load, <i32 -16711936, i32 -16711936, i32 -16711936, i32 -16711936>
   %1 = sub <4 x i32> %wide.load, zeroinitializer
   %2 = and <4 x i32> %1, <i32 16711680, i32 16711680, i32 16711680, i32 16711680>
diff --git a/llvm/test/CodeGen/ARM/2014-01-09-pseudo_expand_implicit_reg.ll b/llvm/test/CodeGen/ARM/2014-01-09-pseudo_expand_implicit_reg.ll
index 6c0fbd00bd1..dc7f3081e03 100644
--- a/llvm/test/CodeGen/ARM/2014-01-09-pseudo_expand_implicit_reg.ll
+++ b/llvm/test/CodeGen/ARM/2014-01-09-pseudo_expand_implicit_reg.ll
@@ -38,13 +38,13 @@ entry:
 define <8 x i8> @vtbx4(<8 x i8>* %A, %struct.__neon_int8x8x4_t* %B, <8 x i8>* %C) nounwind {
 ; CHECK: vtbx4:
 ; CHECK: VTBX4 {{.*}}, pred:14, pred:%noreg, %Q{{[0-9]+}}_Q{{[0-9]+}}<imp-use>
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load %struct.__neon_int8x8x4_t* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load %struct.__neon_int8x8x4_t, %struct.__neon_int8x8x4_t* %B
         %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
         %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
         %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3
-	%tmp7 = load <8 x i8>* %C
+	%tmp7 = load <8 x i8>, <8 x i8>* %C
 	%tmp8 = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5, <8 x i8> %tmp6, <8 x i8> %tmp7)
   call void @bar2(%struct.__neon_int8x8x4_t %tmp2, <8 x i8> %tmp8)
 	ret <8 x i8> %tmp8
diff --git a/llvm/test/CodeGen/ARM/2015-01-21-thumbv4t-ldstr-opt.ll b/llvm/test/CodeGen/ARM/2015-01-21-thumbv4t-ldstr-opt.ll
index 202138c9adc..2efd91f503e 100644
--- a/llvm/test/CodeGen/ARM/2015-01-21-thumbv4t-ldstr-opt.ll
+++ b/llvm/test/CodeGen/ARM/2015-01-21-thumbv4t-ldstr-opt.ll
@@ -12,10 +12,10 @@ define i32 @test1(i32* %p) {
   %4 = getelementptr inbounds i32, i32* %p, i32 4
 
 ; CHECK-NEXT: ldm [[NEWBASE]],
-  %5 = load i32* %1, align 4
-  %6 = load i32* %2, align 4
-  %7 = load i32* %3, align 4
-  %8 = load i32* %4, align 4
+  %5 = load i32, i32* %1, align 4
+  %6 = load i32, i32* %2, align 4
+  %7 = load i32, i32* %3, align 4
+  %8 = load i32, i32* %4, align 4
 
   %9 = add nsw i32 %5, %6
   %10 = add nsw i32 %9, %7
@@ -36,10 +36,10 @@ define i32 @test2(i32* %p) {
   %4 = getelementptr inbounds i32, i32* %p, i32 5
 
 ; CHECK-NEXT: ldm [[NEWBASE]],
-  %5 = load i32* %1, align 4
-  %6 = load i32* %2, align 4
-  %7 = load i32* %3, align 4
-  %8 = load i32* %4, align 4
+  %5 = load i32, i32* %1, align 4
+  %6 = load i32, i32* %2, align 4
+  %7 = load i32, i32* %3, align 4
+  %8 = load i32, i32* %4, align 4
 
   %9 = add nsw i32 %5, %6
   %10 = add nsw i32 %9, %7
diff --git a/llvm/test/CodeGen/ARM/MergeConsecutiveStores.ll b/llvm/test/CodeGen/ARM/MergeConsecutiveStores.ll
index db4346e3f5d..3f7d625244b 100644
--- a/llvm/test/CodeGen/ARM/MergeConsecutiveStores.ll
+++ b/llvm/test/CodeGen/ARM/MergeConsecutiveStores.ll
@@ -13,12 +13,12 @@ define void @MergeLoadStoreBaseIndexOffset(i32* %a, i8* %b, i8* %c, i32 %n) {
   %.08 = phi i8* [ %b, %0 ], [ %10, %1 ]
   %.0 = phi i32* [ %a, %0 ], [ %2, %1 ]
   %2 = getelementptr inbounds i32, i32* %.0, i32 1
-  %3 = load i32* %.0, align 1
+  %3 = load i32, i32* %.0, align 1
   %4 = getelementptr inbounds i8, i8* %c, i32 %3
-  %5 = load i8* %4, align 1
+  %5 = load i8, i8* %4, align 1
   %6 = add i32 %3, 1
   %7 = getelementptr inbounds i8, i8* %c, i32 %6
-  %8 = load i8* %7, align 1
+  %8 = load i8, i8* %7, align 1
   store i8 %5, i8* %.08, align 1
   %9 = getelementptr inbounds i8, i8* %.08, i32 1
   store i8 %8, i8* %9, align 1
@@ -45,13 +45,13 @@ define void @MergeLoadStoreBaseIndexOffsetSext(i8* %a, i8* %b, i8* %c, i32 %n) {
   %.08 = phi i8* [ %b, %0 ], [ %11, %1 ]
   %.0 = phi i8* [ %a, %0 ], [ %2, %1 ]
   %2 = getelementptr inbounds i8, i8* %.0, i32 1
-  %3 = load i8* %.0, align 1
+  %3 = load i8, i8* %.0, align 1
   %4 = sext i8 %3 to i32
   %5 = getelementptr inbounds i8, i8* %c, i32 %4
-  %6 = load i8* %5, align 1
+  %6 = load i8, i8* %5, align 1
   %7 = add i32 %4, 1
   %8 = getelementptr inbounds i8, i8* %c, i32 %7
-  %9 = load i8* %8, align 1
+  %9 = load i8, i8* %8, align 1
   store i8 %6, i8* %.08, align 1
   %10 = getelementptr inbounds i8, i8* %.08, i32 1
   store i8 %9, i8* %10, align 1
@@ -77,14 +77,14 @@ define void @loadStoreBaseIndexOffsetSextNoSex(i8* %a, i8* %b, i8* %c, i32 %n) {
   %.08 = phi i8* [ %b, %0 ], [ %11, %1 ]
   %.0 = phi i8* [ %a, %0 ], [ %2, %1 ]
   %2 = getelementptr inbounds i8, i8* %.0, i32 1
-  %3 = load i8* %.0, align 1
+  %3 = load i8, i8* %.0, align 1
   %4 = sext i8 %3 to i32
   %5 = getelementptr inbounds i8, i8* %c, i32 %4
-  %6 = load i8* %5, align 1
+  %6 = load i8, i8* %5, align 1
   %7 = add i8 %3, 1
   %wrap.4 = sext i8 %7 to i32
   %8 = getelementptr inbounds i8, i8* %c, i32 %wrap.4
-  %9 = load i8* %8, align 1
+  %9 = load i8, i8* %8, align 1
   store i8 %6, i8* %.08, align 1
   %10 = getelementptr inbounds i8, i8* %.08, i32 1
   store i8 %9, i8* %10, align 1
diff --git a/llvm/test/CodeGen/ARM/Windows/chkstk-movw-movt-isel.ll b/llvm/test/CodeGen/ARM/Windows/chkstk-movw-movt-isel.ll
index a29aaadcc9f..a314259e499 100644
--- a/llvm/test/CodeGen/ARM/Windows/chkstk-movw-movt-isel.ll
+++ b/llvm/test/CodeGen/ARM/Windows/chkstk-movw-movt-isel.ll
@@ -10,10 +10,10 @@ entry:
   %i.addr = alloca i32, align 4
   %buffer = alloca [4096 x i8], align 1
   store i32 %i, i32* %i.addr, align 4
-  %0 = load i32* %i.addr, align 4
+  %0 = load i32, i32* %i.addr, align 4
   %rem = urem i32 %0, 4096
   %arrayidx = getelementptr inbounds [4096 x i8], [4096 x i8]* %buffer, i32 0, i32 %rem
-  %1 = load volatile i8* %arrayidx, align 1
+  %1 = load volatile i8, i8* %arrayidx, align 1
   ret i8 %1
 }
 
diff --git a/llvm/test/CodeGen/ARM/Windows/dllimport.ll b/llvm/test/CodeGen/ARM/Windows/dllimport.ll
index bc737bd4182..6786be3322e 100644
--- a/llvm/test/CodeGen/ARM/Windows/dllimport.ll
+++ b/llvm/test/CodeGen/ARM/Windows/dllimport.ll
@@ -8,7 +8,7 @@ declare dllimport arm_aapcs_vfpcc i32 @external()
 declare arm_aapcs_vfpcc i32 @internal()
 
 define arm_aapcs_vfpcc i32 @get_var() {
-  %1 = load i32* @var, align 4
+  %1 = load i32, i32* @var, align 4
   ret i32 %1
 }
 
@@ -20,7 +20,7 @@ define arm_aapcs_vfpcc i32 @get_var() {
 ; CHECK: bx lr
 
 define arm_aapcs_vfpcc i32 @get_ext() {
-  %1 = load i32* @ext, align 4
+  %1 = load i32, i32* @ext, align 4
   ret i32 %1
 }
 
diff --git a/llvm/test/CodeGen/ARM/Windows/frame-register.ll b/llvm/test/CodeGen/ARM/Windows/frame-register.ll
index 31167d7352e..7ecfc1a7171 100644
--- a/llvm/test/CodeGen/ARM/Windows/frame-register.ll
+++ b/llvm/test/CodeGen/ARM/Windows/frame-register.ll
@@ -8,12 +8,12 @@ entry:
   %i.addr = alloca i32, align 4
   %j = alloca i32, align 4
   store i32 %i, i32* %i.addr, align 4
-  %0 = load i32* %i.addr, align 4
+  %0 = load i32, i32* %i.addr, align 4
   %add = add nsw i32 %0, 1
   store i32 %add, i32* %j, align 4
-  %1 = load i32* %j, align 4
+  %1 = load i32, i32* %j, align 4
   call void @callee(i32 %1)
-  %2 = load i32* %j, align 4
+  %2 = load i32, i32* %j, align 4
   %add1 = add nsw i32 %2, 1
   ret i32 %add1
 }
diff --git a/llvm/test/CodeGen/ARM/Windows/movw-movt-relocations.ll b/llvm/test/CodeGen/ARM/Windows/movw-movt-relocations.ll
index 3ae6428d3a6..c21aee087cf 100644
--- a/llvm/test/CodeGen/ARM/Windows/movw-movt-relocations.ll
+++ b/llvm/test/CodeGen/ARM/Windows/movw-movt-relocations.ll
@@ -10,8 +10,8 @@
 ; Function Attrs: nounwind optsize readonly
 define i32 @relocation(i32 %j, i32 %k) {
 entry:
-  %0 = load i32* @i, align 4
-  %1 = load i32* @j, align 4
+  %0 = load i32, i32* @i, align 4
+  %1 = load i32, i32* @j, align 4
   %add = add nsw i32 %1, %0
   ret i32 %add
 }
diff --git a/llvm/test/CodeGen/ARM/Windows/pic.ll b/llvm/test/CodeGen/ARM/Windows/pic.ll
index 28d371f4521..9ef7c35c553 100644
--- a/llvm/test/CodeGen/ARM/Windows/pic.ll
+++ b/llvm/test/CodeGen/ARM/Windows/pic.ll
@@ -5,7 +5,7 @@
 
 define arm_aapcs_vfpcc i8 @return_external() {
 entry:
-  %0 = load i8* @external, align 1
+  %0 = load i8, i8* @external, align 1
   ret i8 %0
 }
 
diff --git a/llvm/test/CodeGen/ARM/Windows/stack-probe-non-default.ll b/llvm/test/CodeGen/ARM/Windows/stack-probe-non-default.ll
index 814c8b7ff59..d66e93ad34e 100644
--- a/llvm/test/CodeGen/ARM/Windows/stack-probe-non-default.ll
+++ b/llvm/test/CodeGen/ARM/Windows/stack-probe-non-default.ll
@@ -12,7 +12,7 @@ entry:
   %0 = getelementptr inbounds [4096 x i8], [4096 x i8]* %buffer, i32 0, i32 0
   call arm_aapcs_vfpcc void @initialise(i8* %0)
   %arrayidx = getelementptr inbounds [4096 x i8], [4096 x i8]* %buffer, i32 0, i32 %offset
-  %1 = load i8* %arrayidx, align 1
+  %1 = load i8, i8* %arrayidx, align 1
   ret i8 %1
 }
 
diff --git a/llvm/test/CodeGen/ARM/Windows/vla.ll b/llvm/test/CodeGen/ARM/Windows/vla.ll
index 13180cd65a2..1c0632e25e5 100644
--- a/llvm/test/CodeGen/ARM/Windows/vla.ll
+++ b/llvm/test/CodeGen/ARM/Windows/vla.ll
@@ -9,7 +9,7 @@ define arm_aapcs_vfpcc i8 @function(i32 %sz, i32 %idx) {
 entry:
   %vla = alloca i8, i32 %sz, align 1
   %arrayidx = getelementptr inbounds i8, i8* %vla, i32 %idx
-  %0 = load volatile i8* %arrayidx, align 1
+  %0 = load volatile i8, i8* %arrayidx, align 1
   ret i8 %0
 }
 
diff --git a/llvm/test/CodeGen/ARM/a15-partial-update.ll b/llvm/test/CodeGen/ARM/a15-partial-update.ll
index 71b95eef88d..576eb7a2439 100644
--- a/llvm/test/CodeGen/ARM/a15-partial-update.ll
+++ b/llvm/test/CodeGen/ARM/a15-partial-update.ll
@@ -10,7 +10,7 @@ define <2 x float> @t1(float* %A, <2 x float> %B) {
 ; generated.
 
 ; CHECK-NOT: vmov.{{.*}} d{{[0-9]+}},
-  %tmp2 = load float* %A, align 4
+  %tmp2 = load float, float* %A, align 4
   %tmp3 = insertelement <2 x float> %B, float %tmp2, i32 1
   ret <2 x float> %tmp3
 }
@@ -29,7 +29,7 @@ loop:
   %newcount = add i32 %oldcount, 1
   %p1 = getelementptr <4 x i8>, <4 x i8> *%in, i32 %newcount
   %p2 = getelementptr <4 x i8>, <4 x i8> *%out, i32 %newcount
-  %tmp1 = load <4 x i8> *%p1, align 4
+  %tmp1 = load <4 x i8> , <4 x i8> *%p1, align 4
   store <4 x i8> %tmp1, <4 x i8> *%p2
   %cmp = icmp eq i32 %newcount, %n
   br i1 %cmp, label %loop, label %ret
diff --git a/llvm/test/CodeGen/ARM/addrmode.ll b/llvm/test/CodeGen/ARM/addrmode.ll
index 8fd1da791f1..52bb9a20662 100644
--- a/llvm/test/CodeGen/ARM/addrmode.ll
+++ b/llvm/test/CodeGen/ARM/addrmode.ll
@@ -4,14 +4,14 @@
 define i32 @t1(i32 %a) {
 	%b = mul i32 %a, 9
         %c = inttoptr i32 %b to i32*
-        %d = load i32* %c
+        %d = load i32, i32* %c
 	ret i32 %d
 }
 
 define i32 @t2(i32 %a) {
 	%b = mul i32 %a, -7
         %c = inttoptr i32 %b to i32*
-        %d = load i32* %c
+        %d = load i32, i32* %c
 	ret i32 %d
 }
 
diff --git a/llvm/test/CodeGen/ARM/aliases.ll b/llvm/test/CodeGen/ARM/aliases.ll
index 5a737ad995a..c24d0d23a60 100644
--- a/llvm/test/CodeGen/ARM/aliases.ll
+++ b/llvm/test/CodeGen/ARM/aliases.ll
@@ -33,9 +33,9 @@ define i32 @foo_f() {
 
 define i32 @test() {
 entry:
-   %tmp = load i32* @foo1
-   %tmp1 = load i32* @foo2
-   %tmp0 = load i32* @bar_i
+   %tmp = load i32, i32* @foo1
+   %tmp1 = load i32, i32* @foo2
+   %tmp0 = load i32, i32* @bar_i
    %tmp2 = call i32 @foo_f()
    %tmp3 = add i32 %tmp, %tmp2
    %tmp4 = call %FunTy* @bar_f()
diff --git a/llvm/test/CodeGen/ARM/alloc-no-stack-realign.ll b/llvm/test/CodeGen/ARM/alloc-no-stack-realign.ll
index 5ad87191efe..600fb6aa44b 100644
--- a/llvm/test/CodeGen/ARM/alloc-no-stack-realign.ll
+++ b/llvm/test/CodeGen/ARM/alloc-no-stack-realign.ll
@@ -31,9 +31,9 @@ entry:
 ; NO-REALIGN: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128]!
 ; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128]
  %retval = alloca <16 x float>, align 16
- %0 = load <16 x float>* @T3_retval, align 16
+ %0 = load <16 x float>, <16 x float>* @T3_retval, align 16
  store <16 x float> %0, <16 x float>* %retval
- %1 = load <16 x float>* %retval
+ %1 = load <16 x float>, <16 x float>* %retval
  store <16 x float> %1, <16 x float>* %agg.result, align 16
  ret void
 }
@@ -66,9 +66,9 @@ entry:
 ; REALIGN: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128]!
 ; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128]
  %retval = alloca <16 x float>, align 16
- %0 = load <16 x float>* @T3_retval, align 16
+ %0 = load <16 x float>, <16 x float>* @T3_retval, align 16
  store <16 x float> %0, <16 x float>* %retval
- %1 = load <16 x float>* %retval
+ %1 = load <16 x float>, <16 x float>* %retval
  store <16 x float> %1, <16 x float>* %agg.result, align 16
  ret void
 }
diff --git a/llvm/test/CodeGen/ARM/arm-and-tst-peephole.ll b/llvm/test/CodeGen/ARM/arm-and-tst-peephole.ll
index 3b5161ee4e6..7d7fae95bfc 100644
--- a/llvm/test/CodeGen/ARM/arm-and-tst-peephole.ll
+++ b/llvm/test/CodeGen/ARM/arm-and-tst-peephole.ll
@@ -22,7 +22,7 @@ tailrecurse:                                      ; preds = %sw.bb, %entry
   %acc.tr = phi i32 [ %or, %sw.bb ], [ %acc, %entry ]
   %lsr.iv24 = bitcast %struct.Foo* %lsr.iv2 to i8**
   %scevgep5 = getelementptr i8*, i8** %lsr.iv24, i32 -1
-  %tmp2 = load i8** %scevgep5
+  %tmp2 = load i8*, i8** %scevgep5
   %0 = ptrtoint i8* %tmp2 to i32
 
 ; ARM:      ands {{r[0-9]+}}, {{r[0-9]+}}, #3
@@ -90,7 +90,7 @@ sw.epilog:                                        ; preds = %tailrecurse.switch
 define internal zeroext i8 @bar(%struct.S* %x, %struct.S* nocapture %y) nounwind readonly {
 entry:
   %0 = getelementptr inbounds %struct.S, %struct.S* %x, i32 0, i32 1, i32 0
-  %1 = load i8* %0, align 1
+  %1 = load i8, i8* %0, align 1
   %2 = zext i8 %1 to i32
 ; ARM: ands
 ; THUMB: ands
@@ -104,7 +104,7 @@ entry:
 bb:                                               ; preds = %entry
 ; V8-NEXT: %bb
   %5 = getelementptr inbounds %struct.S, %struct.S* %y, i32 0, i32 1, i32 0
-  %6 = load i8* %5, align 1
+  %6 = load i8, i8* %5, align 1
   %7 = zext i8 %6 to i32
 ; ARM: andsne
 ; THUMB: ands
diff --git a/llvm/test/CodeGen/ARM/arm-modifier.ll b/llvm/test/CodeGen/ARM/arm-modifier.ll
index 580f7e7a90c..67d468e8abd 100644
--- a/llvm/test/CodeGen/ARM/arm-modifier.ll
+++ b/llvm/test/CodeGen/ARM/arm-modifier.ll
@@ -6,8 +6,8 @@ entry:
   %scale2.addr = alloca float, align 4
   store float %scale, float* %scale.addr, align 4
   store float %scale2, float* %scale2.addr, align 4
-  %tmp = load float* %scale.addr, align 4
-  %tmp1 = load float* %scale2.addr, align 4
+  %tmp = load float, float* %scale.addr, align 4
+  %tmp1 = load float, float* %scale2.addr, align 4
   call void asm sideeffect "vmul.f32    q0, q0, ${0:y} \0A\09vmul.f32    q1, q1, ${0:y} \0A\09vmul.f32    q1, q0, ${1:y} \0A\09", "w,w,~{q0},~{q1}"(float %tmp, float %tmp1) nounwind
   ret i32 0
 }
@@ -49,8 +49,8 @@ entry:
 ; CHECK: stm {{lr|r[0-9]+}}, {[[REG1:(r[0-9]+)]], r{{[0-9]+}}}
 ; CHECK: adds {{lr|r[0-9]+}}, [[REG1]]
 ; CHECK: ldm {{lr|r[0-9]+}}, {r{{[0-9]+}}, r{{[0-9]+}}}
-%tmp = load i64* @f3_var, align 4
-%tmp1 = load i64* @f3_var2, align 4
+%tmp = load i64, i64* @f3_var, align 4
+%tmp1 = load i64, i64* @f3_var2, align 4
 %0 = call i64 asm sideeffect "stm ${0:m}, ${1:M}\0A\09adds $3, $1\0A\09", "=*m,=r,1,r"(i64** @f3_ptr, i64 %tmp, i64 %tmp1) nounwind
 store i64 %0, i64* @f3_var, align 4
 %1 = call i64 asm sideeffect "ldm ${1:m}, ${0:M}\0A\09", "=r,*m"(i64** @f3_ptr) nounwind
diff --git a/llvm/test/CodeGen/ARM/atomic-64bit.ll b/llvm/test/CodeGen/ARM/atomic-64bit.ll
index 0c0769f1b14..7510d6ccdc3 100644
--- a/llvm/test/CodeGen/ARM/atomic-64bit.ll
+++ b/llvm/test/CodeGen/ARM/atomic-64bit.ll
@@ -214,7 +214,7 @@ define i64 @test8(i64* %ptr) {
 ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
 ; CHECK-THUMB: dmb {{ish$}}
 
-  %r = load atomic i64* %ptr seq_cst, align 8
+  %r = load atomic i64, i64* %ptr seq_cst, align 8
   ret i64 %r
 }
 
diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll b/llvm/test/CodeGen/ARM/atomic-load-store.ll
index af13dfc80d2..5db81781a7f 100644
--- a/llvm/test/CodeGen/ARM/atomic-load-store.ll
+++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll
@@ -44,7 +44,7 @@ define i32 @test2(i32* %ptr) {
 ; THUMBM-LABEL: test2
 ; THUMBM: ldr
 ; THUMBM: dmb sy
-  %val = load atomic i32* %ptr seq_cst, align 4
+  %val = load atomic i32, i32* %ptr seq_cst, align 4
   ret i32 %val
 }
 
@@ -76,7 +76,7 @@ define void @test3(i8* %ptr1, i8* %ptr2) {
 ; ARMV6-NOT: mcr
 ; THUMBM-LABEL: test3
 ; THUMBM-NOT: dmb sy
-  %val = load atomic i8* %ptr1 unordered, align 1
+  %val = load atomic i8, i8* %ptr1 unordered, align 1
   store atomic i8 %val, i8* %ptr2 unordered, align 1
   ret void
 }
@@ -87,7 +87,7 @@ define void @test4(i8* %ptr1, i8* %ptr2) {
 ; THUMBONE: ___sync_lock_test_and_set_1
 ; ARMV6-LABEL: test4
 ; THUMBM-LABEL: test4
-  %val = load atomic i8* %ptr1 seq_cst, align 1
+  %val = load atomic i8, i8* %ptr1 seq_cst, align 1
   store atomic i8 %val, i8* %ptr2 seq_cst, align 1
   ret void
 }
@@ -95,7 +95,7 @@ define void @test4(i8* %ptr1, i8* %ptr2) {
 define i64 @test_old_load_64bit(i64* %p) {
 ; ARMV4-LABEL: test_old_load_64bit
 ; ARMV4: ___sync_val_compare_and_swap_8
-  %1 = load atomic i64* %p seq_cst, align 8
+  %1 = load atomic i64, i64* %p seq_cst, align 8
   ret i64 %1
 }
 
diff --git a/llvm/test/CodeGen/ARM/atomic-op.ll b/llvm/test/CodeGen/ARM/atomic-op.ll
index 1ac86485c55..db32bffdd5d 100644
--- a/llvm/test/CodeGen/ARM/atomic-op.ll
+++ b/llvm/test/CodeGen/ARM/atomic-op.ll
@@ -25,7 +25,7 @@ entry:
 	store i32 3855, i32* %ort
 	store i32 3855, i32* %xort
 	store i32 4, i32* %temp
-	%tmp = load i32* %temp
+	%tmp = load i32, i32* %temp
   ; CHECK: ldrex
   ; CHECK: add
   ; CHECK: strex
@@ -308,8 +308,8 @@ define i32 @test_cmpxchg_fail_order1(i32 *%addr, i32 %desired, i32 %new) {
 
 define i32 @load_load_add_acquire(i32* %mem1, i32* %mem2) nounwind {
 ; CHECK-LABEL: load_load_add_acquire
-  %val1 = load atomic i32* %mem1 acquire, align 4
-  %val2 = load atomic i32* %mem2 acquire, align 4
+  %val1 = load atomic i32, i32* %mem1 acquire, align 4
+  %val2 = load atomic i32, i32* %mem2 acquire, align 4
   %tmp = add i32 %val1, %val2
 
 ; CHECK: ldr {{r[0-9]}}, [r0]
@@ -353,7 +353,7 @@ define void @store_store_release(i32* %mem1, i32 %val1, i32* %mem2, i32 %val2) {
 
 define void @load_fence_store_monotonic(i32* %mem1, i32* %mem2) {
 ; CHECK-LABEL: load_fence_store_monotonic
-  %val = load atomic i32* %mem1 monotonic, align 4
+  %val = load atomic i32, i32* %mem1 monotonic, align 4
   fence seq_cst
   store atomic i32 %val, i32* %mem2 monotonic, align 4
 
diff --git a/llvm/test/CodeGen/ARM/atomic-ops-v8.ll b/llvm/test/CodeGen/ARM/atomic-ops-v8.ll
index 6ba1352fb18..db5007b0758 100644
--- a/llvm/test/CodeGen/ARM/atomic-ops-v8.ll
+++ b/llvm/test/CodeGen/ARM/atomic-ops-v8.ll
@@ -1166,7 +1166,7 @@ define void @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
 
 define i8 @test_atomic_load_monotonic_i8() nounwind {
 ; CHECK-LABEL: test_atomic_load_monotonic_i8:
-  %val = load atomic i8* @var8 monotonic, align 1
+  %val = load atomic i8, i8* @var8 monotonic, align 1
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
 ; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
@@ -1183,7 +1183,7 @@ define i8 @test_atomic_load_monotonic_regoff_i8(i64 %base, i64 %off) nounwind {
   %addr_int = add i64 %base, %off
   %addr = inttoptr i64 %addr_int to i8*
 
-  %val = load atomic i8* %addr monotonic, align 1
+  %val = load atomic i8, i8* %addr monotonic, align 1
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
 ; CHECK-LE: ldrb r0, [r0, r2]
@@ -1196,7 +1196,7 @@ define i8 @test_atomic_load_monotonic_regoff_i8(i64 %base, i64 %off) nounwind {
 
 define i8 @test_atomic_load_acquire_i8() nounwind {
 ; CHECK-LABEL: test_atomic_load_acquire_i8:
-  %val = load atomic i8* @var8 acquire, align 1
+  %val = load atomic i8, i8* @var8 acquire, align 1
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
 ; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
@@ -1213,7 +1213,7 @@ define i8 @test_atomic_load_acquire_i8() nounwind {
 
 define i8 @test_atomic_load_seq_cst_i8() nounwind {
 ; CHECK-LABEL: test_atomic_load_seq_cst_i8:
-  %val = load atomic i8* @var8 seq_cst, align 1
+  %val = load atomic i8, i8* @var8 seq_cst, align 1
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
 ; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
@@ -1230,7 +1230,7 @@ define i8 @test_atomic_load_seq_cst_i8() nounwind {
 
 define i16 @test_atomic_load_monotonic_i16() nounwind {
 ; CHECK-LABEL: test_atomic_load_monotonic_i16:
-  %val = load atomic i16* @var16 monotonic, align 2
+  %val = load atomic i16, i16* @var16 monotonic, align 2
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
 ; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
@@ -1251,7 +1251,7 @@ define i32 @test_atomic_load_monotonic_regoff_i32(i64 %base, i64 %off) nounwind
   %addr_int = add i64 %base, %off
   %addr = inttoptr i64 %addr_int to i32*
 
-  %val = load atomic i32* %addr monotonic, align 4
+  %val = load atomic i32, i32* %addr monotonic, align 4
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
 ; CHECK-LE: ldr r0, [r0, r2]
@@ -1264,7 +1264,7 @@ define i32 @test_atomic_load_monotonic_regoff_i32(i64 %base, i64 %off) nounwind
 
 define i64 @test_atomic_load_seq_cst_i64() nounwind {
 ; CHECK-LABEL: test_atomic_load_seq_cst_i64:
-  %val = load atomic i64* @var64 seq_cst, align 8
+  %val = load atomic i64, i64* @var64 seq_cst, align 8
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
 ; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64
@@ -1399,7 +1399,7 @@ define i32 @not.barriers(i32* %var, i1 %cond) {
 ; CHECK-LABEL: not.barriers:
   br i1 %cond, label %atomic_ver, label %simple_ver
 simple_ver:
-  %oldval = load i32* %var
+  %oldval = load i32, i32* %var
   %newval = add nsw i32 %oldval, -1
   store i32 %newval, i32* %var
   br label %somewhere
diff --git a/llvm/test/CodeGen/ARM/available_externally.ll b/llvm/test/CodeGen/ARM/available_externally.ll
index d925b5c7770..055074738e5 100644
--- a/llvm/test/CodeGen/ARM/available_externally.ll
+++ b/llvm/test/CodeGen/ARM/available_externally.ll
@@ -5,7 +5,7 @@
 @B = external hidden constant i32
 
 define i32 @t1() {
-  %tmp = load i32* @A
+  %tmp = load i32, i32* @A
   store i32 %tmp, i32* @B
   ret i32 %tmp
 }
diff --git a/llvm/test/CodeGen/ARM/avoid-cpsr-rmw.ll b/llvm/test/CodeGen/ARM/avoid-cpsr-rmw.ll
index 7bdcb7990d2..c3de07e03b6 100644
--- a/llvm/test/CodeGen/ARM/avoid-cpsr-rmw.ll
+++ b/llvm/test/CodeGen/ARM/avoid-cpsr-rmw.ll
@@ -30,13 +30,13 @@ while.body:
 ; CHECK-NOT: muls
   %ptr1.addr.09 = phi i32* [ %add.ptr, %while.body ], [ %ptr1, %entry ]
   %ptr2.addr.08 = phi i32* [ %incdec.ptr, %while.body ], [ %ptr2, %entry ]
-  %0 = load i32* %ptr1.addr.09, align 4
+  %0 = load i32, i32* %ptr1.addr.09, align 4
   %arrayidx1 = getelementptr inbounds i32, i32* %ptr1.addr.09, i32 1
-  %1 = load i32* %arrayidx1, align 4
+  %1 = load i32, i32* %arrayidx1, align 4
   %arrayidx3 = getelementptr inbounds i32, i32* %ptr1.addr.09, i32 2
-  %2 = load i32* %arrayidx3, align 4
+  %2 = load i32, i32* %arrayidx3, align 4
   %arrayidx4 = getelementptr inbounds i32, i32* %ptr1.addr.09, i32 3
-  %3 = load i32* %arrayidx4, align 4
+  %3 = load i32, i32* %arrayidx4, align 4
   %add.ptr = getelementptr inbounds i32, i32* %ptr1.addr.09, i32 4
   %mul = mul i32 %1, %0
   %mul5 = mul i32 %mul, %2
@@ -64,13 +64,13 @@ while.body:
 ; CHECK: muls
   %ptr1.addr.09 = phi i32* [ %add.ptr, %while.body ], [ %ptr1, %entry ]
   %ptr2.addr.08 = phi i32* [ %incdec.ptr, %while.body ], [ %ptr2, %entry ]
-  %0 = load i32* %ptr1.addr.09, align 4
+  %0 = load i32, i32* %ptr1.addr.09, align 4
   %arrayidx1 = getelementptr inbounds i32, i32* %ptr1.addr.09, i32 1
-  %1 = load i32* %arrayidx1, align 4
+  %1 = load i32, i32* %arrayidx1, align 4
   %arrayidx3 = getelementptr inbounds i32, i32* %ptr1.addr.09, i32 2
-  %2 = load i32* %arrayidx3, align 4
+  %2 = load i32, i32* %arrayidx3, align 4
   %arrayidx4 = getelementptr inbounds i32, i32* %ptr1.addr.09, i32 3
-  %3 = load i32* %arrayidx4, align 4
+  %3 = load i32, i32* %arrayidx4, align 4
   %add.ptr = getelementptr inbounds i32, i32* %ptr1.addr.09, i32 4
   %mul = mul i32 %1, %0
   %mul5 = mul i32 %mul, %2
@@ -92,7 +92,7 @@ entry:
 ; CHECK: vmrs APSR_nzcv, fpscr
 ; CHECK: if.then
 ; CHECK-NOT: movs
-  %0 = load double* %q, align 4
+  %0 = load double, double* %q, align 4
   %cmp = fcmp olt double %0, 1.000000e+01
   %incdec.ptr1 = getelementptr inbounds i32, i32* %p, i32 1
   br i1 %cmp, label %if.then, label %if.else
diff --git a/llvm/test/CodeGen/ARM/bfi.ll b/llvm/test/CodeGen/ARM/bfi.ll
index bce09da7618..0661960d1ae 100644
--- a/llvm/test/CodeGen/ARM/bfi.ll
+++ b/llvm/test/CodeGen/ARM/bfi.ll
@@ -9,7 +9,7 @@ entry:
 ; CHECK: f1
 ; CHECK: mov r2, #10
 ; CHECK: bfi r1, r2, #22, #4
-  %0 = load i32* bitcast (%struct.F* @X to i32*), align 4 ; <i32> [#uses=1]
+  %0 = load i32, i32* bitcast (%struct.F* @X to i32*), align 4 ; <i32> [#uses=1]
   %1 = and i32 %0, -62914561                      ; <i32> [#uses=1]
   %2 = or i32 %1, 41943040                        ; <i32> [#uses=1]
   store i32 %2, i32* bitcast (%struct.F* @X to i32*), align 4
diff --git a/llvm/test/CodeGen/ARM/bfx.ll b/llvm/test/CodeGen/ARM/bfx.ll
index b2161e67fbe..edb0c1a5a54 100644
--- a/llvm/test/CodeGen/ARM/bfx.ll
+++ b/llvm/test/CodeGen/ARM/bfx.ll
@@ -42,12 +42,12 @@ entry:
   %shr2 = and i32 %and1, 255
   %shr4 = lshr i32 %x, 24
   %arrayidx = getelementptr inbounds i32, i32* %ctx, i32 %shr4
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %arrayidx5 = getelementptr inbounds i32, i32* %ctx, i32 %shr2
-  %1 = load i32* %arrayidx5, align 4
+  %1 = load i32, i32* %arrayidx5, align 4
   %add = add i32 %1, %0
   %arrayidx6 = getelementptr inbounds i32, i32* %ctx, i32 %shr
-  %2 = load i32* %arrayidx6, align 4
+  %2 = load i32, i32* %arrayidx6, align 4
   %add7 = add i32 %add, %2
   ret i32 %add7
 }
diff --git a/llvm/test/CodeGen/ARM/big-endian-neon-bitconv.ll b/llvm/test/CodeGen/ARM/big-endian-neon-bitconv.ll
index 427d2e73142..b5a840a48f7 100644
--- a/llvm/test/CodeGen/ARM/big-endian-neon-bitconv.ll
+++ b/llvm/test/CodeGen/ARM/big-endian-neon-bitconv.ll
@@ -19,7 +19,7 @@ define void @conv_i64_to_v8i8( i64 %val,  <8 x i8>* %store ) {
 ; CHECK-LABEL: conv_i64_to_v8i8:
 ; CHECK: vrev64.8
   %v = bitcast i64 %val to <8 x i8>
-  %w = load <8 x i8>* @v8i8
+  %w = load <8 x i8>, <8 x i8>* @v8i8
   %a = add <8 x i8> %v, %w
   store <8 x i8> %a, <8 x i8>* %store
   ret void
@@ -28,8 +28,8 @@ define void @conv_i64_to_v8i8( i64 %val,  <8 x i8>* %store ) {
 define void @conv_v8i8_to_i64( <8 x i8>* %load, <8 x i8>* %store ) {
 ; CHECK-LABEL: conv_v8i8_to_i64:
 ; CHECK: vrev64.8
-  %v = load <8 x i8>* %load
-  %w = load <8 x i8>* @v8i8
+  %v = load <8 x i8>, <8 x i8>* %load
+  %w = load <8 x i8>, <8 x i8>* @v8i8
   %a = add <8 x i8> %v, %w
   %f = bitcast <8 x i8> %a to i64
   call void @conv_i64_to_v8i8( i64 %f, <8 x i8>* %store )
@@ -40,7 +40,7 @@ define void @conv_i64_to_v4i16( i64 %val,  <4 x i16>* %store ) {
 ; CHECK-LABEL: conv_i64_to_v4i16:
 ; CHECK: vrev64.16
   %v = bitcast i64 %val to <4 x i16>
-  %w = load <4 x i16>* @v4i16
+  %w = load <4 x i16>, <4 x i16>* @v4i16
   %a = add <4 x i16> %v, %w
   store <4 x i16> %a, <4 x i16>* %store
   ret void
@@ -49,8 +49,8 @@ define void @conv_i64_to_v4i16( i64 %val,  <4 x i16>* %store ) {
 define void @conv_v4i16_to_i64( <4 x i16>* %load, <4 x i16>* %store ) {
 ; CHECK-LABEL: conv_v4i16_to_i64:
 ; CHECK: vrev64.16
-  %v = load <4 x i16>* %load
-  %w = load <4 x i16>* @v4i16
+  %v = load <4 x i16>, <4 x i16>* %load
+  %w = load <4 x i16>, <4 x i16>* @v4i16
   %a = add <4 x i16> %v, %w
   %f = bitcast <4 x i16> %a to i64
   call void @conv_i64_to_v4i16( i64 %f, <4 x i16>* %store )
@@ -61,7 +61,7 @@ define void @conv_i64_to_v2i32( i64 %val,  <2 x i32>* %store ) {
 ; CHECK-LABEL: conv_i64_to_v2i32:
 ; CHECK: vrev64.32
   %v = bitcast i64 %val to <2 x i32>
-  %w = load <2 x i32>* @v2i32
+  %w = load <2 x i32>, <2 x i32>* @v2i32
   %a = add <2 x i32> %v, %w
   store <2 x i32> %a, <2 x i32>* %store
   ret void
@@ -70,8 +70,8 @@ define void @conv_i64_to_v2i32( i64 %val,  <2 x i32>* %store ) {
 define void @conv_v2i32_to_i64( <2 x i32>* %load, <2 x i32>* %store ) {
 ; CHECK-LABEL: conv_v2i32_to_i64:
 ; CHECK: vrev64.32
-  %v = load <2 x i32>* %load
-  %w = load <2 x i32>* @v2i32
+  %v = load <2 x i32>, <2 x i32>* %load
+  %w = load <2 x i32>, <2 x i32>* @v2i32
   %a = add <2 x i32> %v, %w
   %f = bitcast <2 x i32> %a to i64
   call void @conv_i64_to_v2i32( i64 %f, <2 x i32>* %store )
@@ -82,7 +82,7 @@ define void @conv_i64_to_v2f32( i64 %val,  <2 x float>* %store ) {
 ; CHECK-LABEL: conv_i64_to_v2f32:
 ; CHECK: vrev64.32
   %v = bitcast i64 %val to <2 x float>
-  %w = load <2 x float>* @v2f32
+  %w = load <2 x float>, <2 x float>* @v2f32
   %a = fadd <2 x float> %v, %w
   store <2 x float> %a, <2 x float>* %store
   ret void
@@ -91,8 +91,8 @@ define void @conv_i64_to_v2f32( i64 %val,  <2 x float>* %store ) {
 define void @conv_v2f32_to_i64( <2 x float>* %load, <2 x float>* %store ) {
 ; CHECK-LABEL: conv_v2f32_to_i64:
 ; CHECK: vrev64.32
-  %v = load <2 x float>* %load
-  %w = load <2 x float>* @v2f32
+  %v = load <2 x float>, <2 x float>* %load
+  %w = load <2 x float>, <2 x float>* @v2f32
   %a = fadd <2 x float> %v, %w
   %f = bitcast <2 x float> %a to i64
   call void @conv_i64_to_v2f32( i64 %f, <2 x float>* %store )
@@ -103,7 +103,7 @@ define void @conv_f64_to_v8i8( double %val,  <8 x i8>* %store ) {
 ; CHECK-LABEL: conv_f64_to_v8i8:
 ; CHECK: vrev64.8
   %v = bitcast double %val to <8 x i8>
-  %w = load <8 x i8>* @v8i8
+  %w = load <8 x i8>, <8 x i8>* @v8i8
   %a = add <8 x i8> %v, %w
   store <8 x i8> %a, <8 x i8>* %store
   ret void
@@ -112,8 +112,8 @@ define void @conv_f64_to_v8i8( double %val,  <8 x i8>* %store ) {
 define void @conv_v8i8_to_f64( <8 x i8>* %load, <8 x i8>* %store ) {
 ; CHECK-LABEL: conv_v8i8_to_f64:
 ; CHECK: vrev64.8
-  %v = load <8 x i8>* %load
-  %w = load <8 x i8>* @v8i8
+  %v = load <8 x i8>, <8 x i8>* %load
+  %w = load <8 x i8>, <8 x i8>* @v8i8
   %a = add <8 x i8> %v, %w
   %f = bitcast <8 x i8> %a to double
   call void @conv_f64_to_v8i8( double %f, <8 x i8>* %store )
@@ -124,7 +124,7 @@ define void @conv_f64_to_v4i16( double %val,  <4 x i16>* %store ) {
 ; CHECK-LABEL: conv_f64_to_v4i16:
 ; CHECK: vrev64.16
   %v = bitcast double %val to <4 x i16>
-  %w = load <4 x i16>* @v4i16
+  %w = load <4 x i16>, <4 x i16>* @v4i16
   %a = add <4 x i16> %v, %w
   store <4 x i16> %a, <4 x i16>* %store
   ret void
@@ -133,8 +133,8 @@ define void @conv_f64_to_v4i16( double %val,  <4 x i16>* %store ) {
 define void @conv_v4i16_to_f64( <4 x i16>* %load, <4 x i16>* %store ) {
 ; CHECK-LABEL: conv_v4i16_to_f64:
 ; CHECK: vrev64.16
-  %v = load <4 x i16>* %load
-  %w = load <4 x i16>* @v4i16
+  %v = load <4 x i16>, <4 x i16>* %load
+  %w = load <4 x i16>, <4 x i16>* @v4i16
   %a = add <4 x i16> %v, %w
   %f = bitcast <4 x i16> %a to double
   call void @conv_f64_to_v4i16( double %f, <4 x i16>* %store )
@@ -145,7 +145,7 @@ define void @conv_f64_to_v2i32( double %val,  <2 x i32>* %store ) {
 ; CHECK-LABEL: conv_f64_to_v2i32:
 ; CHECK: vrev64.32
   %v = bitcast double %val to <2 x i32>
-  %w = load <2 x i32>* @v2i32
+  %w = load <2 x i32>, <2 x i32>* @v2i32
   %a = add <2 x i32> %v, %w
   store <2 x i32> %a, <2 x i32>* %store
   ret void
@@ -154,8 +154,8 @@ define void @conv_f64_to_v2i32( double %val,  <2 x i32>* %store ) {
 define void @conv_v2i32_to_f64( <2 x i32>* %load, <2 x i32>* %store ) {
 ; CHECK-LABEL: conv_v2i32_to_f64:
 ; CHECK: vrev64.32
-  %v = load <2 x i32>* %load
-  %w = load <2 x i32>* @v2i32
+  %v = load <2 x i32>, <2 x i32>* %load
+  %w = load <2 x i32>, <2 x i32>* @v2i32
   %a = add <2 x i32> %v, %w
   %f = bitcast <2 x i32> %a to double
   call void @conv_f64_to_v2i32( double %f, <2 x i32>* %store )
@@ -166,7 +166,7 @@ define void @conv_f64_to_v2f32( double %val,  <2 x float>* %store ) {
 ; CHECK-LABEL: conv_f64_to_v2f32:
 ; CHECK: vrev64.32
   %v = bitcast double %val to <2 x float>
-  %w = load <2 x float>* @v2f32
+  %w = load <2 x float>, <2 x float>* @v2f32
   %a = fadd <2 x float> %v, %w
   store <2 x float> %a, <2 x float>* %store
   ret void
@@ -175,8 +175,8 @@ define void @conv_f64_to_v2f32( double %val,  <2 x float>* %store ) {
 define void @conv_v2f32_to_f64( <2 x float>* %load, <2 x float>* %store ) {
 ; CHECK-LABEL: conv_v2f32_to_f64:
 ; CHECK: vrev64.32
-  %v = load <2 x float>* %load
-  %w = load <2 x float>* @v2f32
+  %v = load <2 x float>, <2 x float>* %load
+  %w = load <2 x float>, <2 x float>* @v2f32
   %a = fadd <2 x float> %v, %w
   %f = bitcast <2 x float> %a to double
   call void @conv_f64_to_v2f32( double %f, <2 x float>* %store )
@@ -190,7 +190,7 @@ define void @conv_i128_to_v16i8( i128 %val,  <16 x i8>* %store ) {
 ; CHECK-LABEL: conv_i128_to_v16i8:
 ; CHECK: vrev32.8
   %v = bitcast i128 %val to <16 x i8>
-  %w = load  <16 x i8>* @v16i8
+  %w = load  <16 x i8>,  <16 x i8>* @v16i8
   %a = add <16 x i8> %v, %w
   store <16 x i8> %a, <16 x i8>* %store
   ret void
@@ -199,8 +199,8 @@ define void @conv_i128_to_v16i8( i128 %val,  <16 x i8>* %store ) {
 define void @conv_v16i8_to_i128( <16 x i8>* %load, <16 x i8>* %store ) {
 ; CHECK-LABEL: conv_v16i8_to_i128:
 ; CHECK: vrev32.8
-  %v = load <16 x i8>* %load
-  %w = load <16 x i8>* @v16i8
+  %v = load <16 x i8>, <16 x i8>* %load
+  %w = load <16 x i8>, <16 x i8>* @v16i8
   %a = add <16 x i8> %v, %w
   %f = bitcast <16 x i8> %a to i128
   call void @conv_i128_to_v16i8( i128 %f, <16 x i8>* %store )
@@ -211,7 +211,7 @@ define void @conv_i128_to_v8i16( i128 %val,  <8 x i16>* %store ) {
 ; CHECK-LABEL: conv_i128_to_v8i16:
 ; CHECK: vrev32.16
   %v = bitcast i128 %val to <8 x i16>
-  %w = load  <8 x i16>* @v8i16
+  %w = load  <8 x i16>,  <8 x i16>* @v8i16
   %a = add <8 x i16> %v, %w
   store <8 x i16> %a, <8 x i16>* %store
   ret void
@@ -220,8 +220,8 @@ define void @conv_i128_to_v8i16( i128 %val,  <8 x i16>* %store ) {
 define void @conv_v8i16_to_i128( <8 x i16>* %load, <8 x i16>* %store ) {
 ; CHECK-LABEL: conv_v8i16_to_i128:
 ; CHECK: vrev32.16
-  %v = load <8 x i16>* %load
-  %w = load <8 x i16>* @v8i16
+  %v = load <8 x i16>, <8 x i16>* %load
+  %w = load <8 x i16>, <8 x i16>* @v8i16
   %a = add <8 x i16> %v, %w
   %f = bitcast <8 x i16> %a to i128
   call void @conv_i128_to_v8i16( i128 %f, <8 x i16>* %store )
@@ -232,7 +232,7 @@ define void @conv_i128_to_v4i32( i128 %val,  <4 x i32>* %store ) {
 ; CHECK-LABEL: conv_i128_to_v4i32:
 ; CHECK: vrev64.32
   %v = bitcast i128 %val to <4 x i32>
-  %w = load <4 x i32>* @v4i32
+  %w = load <4 x i32>, <4 x i32>* @v4i32
   %a = add <4 x i32> %v, %w
   store <4 x i32> %a, <4 x i32>* %store
   ret void
@@ -241,8 +241,8 @@ define void @conv_i128_to_v4i32( i128 %val,  <4 x i32>* %store ) {
 define void @conv_v4i32_to_i128( <4 x i32>* %load, <4 x i32>* %store ) {
 ; CHECK-LABEL: conv_v4i32_to_i128:
 ; CHECK: vrev64.32
-  %v = load <4 x i32>* %load
-  %w = load <4 x i32>* @v4i32
+  %v = load <4 x i32>, <4 x i32>* %load
+  %w = load <4 x i32>, <4 x i32>* @v4i32
   %a = add <4 x i32> %v, %w
   %f = bitcast <4 x i32> %a to i128
   call void @conv_i128_to_v4i32( i128 %f, <4 x i32>* %store )
@@ -253,7 +253,7 @@ define void @conv_i128_to_v4f32( i128 %val,  <4 x float>* %store ) {
 ; CHECK-LABEL: conv_i128_to_v4f32:
 ; CHECK: vrev64.32
   %v = bitcast i128 %val to <4 x float>
-  %w = load <4 x float>* @v4f32
+  %w = load <4 x float>, <4 x float>* @v4f32
   %a = fadd <4 x float> %v, %w
   store <4 x float> %a, <4 x float>* %store
   ret void
@@ -262,8 +262,8 @@ define void @conv_i128_to_v4f32( i128 %val,  <4 x float>* %store ) {
 define void @conv_v4f32_to_i128( <4 x float>* %load, <4 x float>* %store ) {
 ; CHECK-LABEL: conv_v4f32_to_i128:
 ; CHECK: vrev64.32
-  %v = load <4 x float>* %load
-  %w = load <4 x float>* @v4f32
+  %v = load <4 x float>, <4 x float>* %load
+  %w = load <4 x float>, <4 x float>* @v4f32
   %a = fadd <4 x float> %v, %w
   %f = bitcast <4 x float> %a to i128
   call void @conv_i128_to_v4f32( i128 %f, <4 x float>* %store )
@@ -274,7 +274,7 @@ define void @conv_f128_to_v2f64( fp128 %val,  <2 x double>* %store ) {
 ; CHECK-LABEL: conv_f128_to_v2f64:
 ; CHECK: vrev64.32
   %v = bitcast fp128 %val to <2 x double>
-  %w = load <2 x double>* @v2f64
+  %w = load <2 x double>, <2 x double>* @v2f64
   %a = fadd <2 x double> %v, %w
   store <2 x double> %a, <2 x double>* %store
   ret void
@@ -283,8 +283,8 @@ define void @conv_f128_to_v2f64( fp128 %val,  <2 x double>* %store ) {
 define void @conv_v2f64_to_f128( <2 x double>* %load, <2 x double>* %store ) {
 ; CHECK-LABEL: conv_v2f64_to_f128:
 ; CHECK: vrev64.32
-  %v = load <2 x double>* %load
-  %w = load <2 x double>* @v2f64
+  %v = load <2 x double>, <2 x double>* %load
+  %w = load <2 x double>, <2 x double>* @v2f64
   %a = fadd <2 x double> %v, %w
   %f = bitcast <2 x double> %a to fp128
   call void @conv_f128_to_v2f64( fp128 %f, <2 x double>* %store )
@@ -295,7 +295,7 @@ define void @conv_f128_to_v16i8( fp128 %val,  <16 x i8>* %store ) {
 ; CHECK-LABEL: conv_f128_to_v16i8:
 ; CHECK: vrev32.8
   %v = bitcast fp128 %val to <16 x i8>
-  %w = load  <16 x i8>* @v16i8
+  %w = load  <16 x i8>,  <16 x i8>* @v16i8
   %a = add <16 x i8> %v, %w
   store <16 x i8> %a, <16 x i8>* %store
   ret void
@@ -304,8 +304,8 @@ define void @conv_f128_to_v16i8( fp128 %val,  <16 x i8>* %store ) {
 define void @conv_v16i8_to_f128( <16 x i8>* %load, <16 x i8>* %store ) {
 ; CHECK-LABEL: conv_v16i8_to_f128:
 ; CHECK: vrev32.8
-  %v = load <16 x i8>* %load
-  %w = load <16 x i8>* @v16i8
+  %v = load <16 x i8>, <16 x i8>* %load
+  %w = load <16 x i8>, <16 x i8>* @v16i8
   %a = add <16 x i8> %v, %w
   %f = bitcast <16 x i8> %a to fp128
   call void @conv_f128_to_v16i8( fp128 %f, <16 x i8>* %store )
@@ -316,7 +316,7 @@ define void @conv_f128_to_v8i16( fp128 %val,  <8 x i16>* %store ) {
 ; CHECK-LABEL: conv_f128_to_v8i16:
 ; CHECK: vrev32.16
   %v = bitcast fp128 %val to <8 x i16>
-  %w = load  <8 x i16>* @v8i16
+  %w = load  <8 x i16>,  <8 x i16>* @v8i16
   %a = add <8 x i16> %v, %w
   store <8 x i16> %a, <8 x i16>* %store
   ret void
@@ -325,8 +325,8 @@ define void @conv_f128_to_v8i16( fp128 %val,  <8 x i16>* %store ) {
 define void @conv_v8i16_to_f128( <8 x i16>* %load, <8 x i16>* %store ) {
 ; CHECK-LABEL: conv_v8i16_to_f128:
 ; CHECK: vrev32.16
-  %v = load <8 x i16>* %load
-  %w = load <8 x i16>* @v8i16
+  %v = load <8 x i16>, <8 x i16>* %load
+  %w = load <8 x i16>, <8 x i16>* @v8i16
   %a = add <8 x i16> %v, %w
   %f = bitcast <8 x i16> %a to fp128
   call void @conv_f128_to_v8i16( fp128 %f, <8 x i16>* %store )
@@ -337,7 +337,7 @@ define void @conv_f128_to_v4f32( fp128 %val,  <4 x float>* %store ) {
 ; CHECK-LABEL: conv_f128_to_v4f32:
 ; CHECK: vrev64.32
   %v = bitcast fp128 %val to <4 x float>
-  %w = load <4 x float>* @v4f32
+  %w = load <4 x float>, <4 x float>* @v4f32
   %a = fadd <4 x float> %v, %w
   store <4 x float> %a, <4 x float>* %store
   ret void
@@ -346,8 +346,8 @@ define void @conv_f128_to_v4f32( fp128 %val,  <4 x float>* %store ) {
 define void @conv_v4f32_to_f128( <4 x float>* %load, <4 x float>* %store ) {
 ; CHECK-LABEL: conv_v4f32_to_f128:
 ; CHECK: vrev64.32
-  %v = load <4 x float>* %load
-  %w = load <4 x float>* @v4f32
+  %v = load <4 x float>, <4 x float>* %load
+  %w = load <4 x float>, <4 x float>* @v4f32
   %a = fadd <4 x float> %v, %w
   %f = bitcast <4 x float> %a to fp128
   call void @conv_f128_to_v4f32( fp128 %f, <4 x float>* %store )
diff --git a/llvm/test/CodeGen/ARM/big-endian-neon-extend.ll b/llvm/test/CodeGen/ARM/big-endian-neon-extend.ll
index 1498356eb97..1e35305bdba 100644
--- a/llvm/test/CodeGen/ARM/big-endian-neon-extend.ll
+++ b/llvm/test/CodeGen/ARM/big-endian-neon-extend.ll
@@ -14,7 +14,7 @@ define void @vector_ext_2i8_to_2i64( <2 x i8>* %loadaddr, <2 x i64>* %storeaddr
 ; CHECK-NEXT: vrev64.32 [[QREG]], [[QREG]]
 ; CHECK-NEXT: vst1.64   {[[REG]], {{d[0-9]+}}}, [r1]
 ; CHECK-NEXT: bx        lr
-  %1 = load <2 x i8>* %loadaddr
+  %1 = load <2 x i8>, <2 x i8>* %loadaddr
   %2 = zext <2 x i8> %1 to <2 x i64>
   store <2 x i64> %2, <2 x i64>* %storeaddr
   ret void
@@ -33,7 +33,7 @@ define void @vector_ext_2i16_to_2i64( <2 x i16>* %loadaddr, <2 x i64>* %storeadd
 ; CHECK-NEXT: vrev64.32 [[QREG]], [[QREG]]
 ; CHECK-NEXT: vst1.64   {[[REG]], {{d[0-9]+}}}, [r1]
 ; CHECK-NEXT: bx        lr
-  %1 = load <2 x i16>* %loadaddr
+  %1 = load <2 x i16>, <2 x i16>* %loadaddr
   %2 = zext <2 x i16> %1 to <2 x i64>
   store <2 x i64> %2, <2 x i64>* %storeaddr
   ret void
@@ -49,7 +49,7 @@ define void @vector_ext_2i8_to_2i32( <2 x i8>* %loadaddr, <2 x i32>* %storeaddr
 ; CHECK-NEXT: vrev64.32 [[REG]], [[REG]]
 ; CHECK-NEXT: vstr      [[REG]], [r1]
 ; CHECK-NEXT: bx        lr
-  %1 = load <2 x i8>* %loadaddr
+  %1 = load <2 x i8>, <2 x i8>* %loadaddr
   %2 = zext <2 x i8> %1 to <2 x i32>
   store <2 x i32> %2, <2 x i32>* %storeaddr
   ret void
@@ -63,7 +63,7 @@ define void @vector_ext_2i16_to_2i32( <2 x i16>* %loadaddr, <2 x i32>* %storeadd
 ; CHECK-NEXT: vrev64.32 [[REG]], [[REG]]
 ; CHECK-NEXT: vstr      [[REG]], [r1]
 ; CHECK-NEXT: bx        lr
-  %1 = load <2 x i16>* %loadaddr
+  %1 = load <2 x i16>, <2 x i16>* %loadaddr
   %2 = zext <2 x i16> %1 to <2 x i32>
   store <2 x i32> %2, <2 x i32>* %storeaddr
   ret void
@@ -80,7 +80,7 @@ define void @vector_ext_2i8_to_2i16( <2 x i8>* %loadaddr, <2 x i16>* %storeaddr
 ; CHECK-NEXT: vrev32.16 [[REG]], {{d[0-9]+}}
 ; CHECK-NEXT: vst1.32   {[[REG]][0]}, [r1:32]
 ; CHECK-NEXT: bx        lr
-  %1 = load <2 x i8>* %loadaddr
+  %1 = load <2 x i8>, <2 x i8>* %loadaddr
   %2 = zext <2 x i8> %1 to <2 x i16>
   store <2 x i16> %2, <2 x i16>* %storeaddr
   ret void
@@ -95,7 +95,7 @@ define void @vector_ext_4i8_to_4i32( <4 x i8>* %loadaddr, <4 x i32>* %storeaddr
 ; CHECK-NEXT: vrev64.32 [[QREG]], [[QREG]]
 ; CHECK-NEXT: vst1.64   {[[REG]], {{d[0-9]+}}}, [r1]
 ; CHECK-NEXT: bx        lr
-  %1 = load <4 x i8>* %loadaddr
+  %1 = load <4 x i8>, <4 x i8>* %loadaddr
   %2 = zext <4 x i8> %1 to <4 x i32>
   store <4 x i32> %2, <4 x i32>* %storeaddr
   ret void
@@ -109,7 +109,7 @@ define void @vector_ext_4i8_to_4i16( <4 x i8>* %loadaddr, <4 x i16>* %storeaddr
 ; CHECK-NEXT: vrev64.16 [[REG]], [[REG]]
 ; CHECK-NEXT: vstr      [[REG]], [r1]
 ; CHECK-NEXT: bx        lr
-  %1 = load <4 x i8>* %loadaddr
+  %1 = load <4 x i8>, <4 x i8>* %loadaddr
   %2 = zext <4 x i8> %1 to <4 x i16>
   store <4 x i16> %2, <4 x i16>* %storeaddr
   ret void
diff --git a/llvm/test/CodeGen/ARM/big-endian-neon-trunc-store.ll b/llvm/test/CodeGen/ARM/big-endian-neon-trunc-store.ll
index 65147ad5d3f..cbfc46ed255 100644
--- a/llvm/test/CodeGen/ARM/big-endian-neon-trunc-store.ll
+++ b/llvm/test/CodeGen/ARM/big-endian-neon-trunc-store.ll
@@ -6,7 +6,7 @@ define void @vector_trunc_store_2i64_to_2i16( <2 x i64>* %loadaddr, <2 x i16>* %
 ; CHECK:       vrev32.16  [[REG]], [[REG]]
 ; CHECK:       vuzp.16    [[REG]], [[REG2:d[0-9]+]]
 ; CHECK:       vrev32.16  [[REG]], [[REG2]]
-  %1 = load <2 x i64>* %loadaddr
+  %1 = load <2 x i64>, <2 x i64>* %loadaddr
   %2 = trunc <2 x i64> %1 to <2 x i16>
   store <2 x i16> %2, <2 x i16>* %storeaddr
   ret void
@@ -18,7 +18,7 @@ define void @vector_trunc_store_4i32_to_4i8( <4 x i32>* %loadaddr, <4 x i8>* %st
 ; CHECK:       vrev16.8  [[REG]], [[REG]]
 ; CHECK:       vuzp.8    [[REG]], [[REG2:d[0-9]+]]
 ; CHECK:       vrev32.8  [[REG]], [[REG2]]
-  %1 = load <4 x i32>* %loadaddr
+  %1 = load <4 x i32>, <4 x i32>* %loadaddr
   %2 = trunc <4 x i32> %1 to <4 x i8>
   store <4 x i8> %2, <4 x i8>* %storeaddr
   ret void
diff --git a/llvm/test/CodeGen/ARM/big-endian-ret-f64.ll b/llvm/test/CodeGen/ARM/big-endian-ret-f64.ll
index 614bfc0a5b3..f83e0864100 100644
--- a/llvm/test/CodeGen/ARM/big-endian-ret-f64.ll
+++ b/llvm/test/CodeGen/ARM/big-endian-ret-f64.ll
@@ -6,7 +6,7 @@ define double @fn() {
 ; CHECK: ldr r0, [sp]
 ; CHECK: ldr r1, [sp, #4]
   %r = alloca double, align 8
-  %1 = load double* %r, align 8
+  %1 = load double, double* %r, align 8
   ret double %1
 }
 
diff --git a/llvm/test/CodeGen/ARM/big-endian-vector-caller.ll b/llvm/test/CodeGen/ARM/big-endian-vector-caller.ll
index d01b0a7c974..54bda66c54a 100644
--- a/llvm/test/CodeGen/ARM/big-endian-vector-caller.ll
+++ b/llvm/test/CodeGen/ARM/big-endian-vector-caller.ll
@@ -7,7 +7,7 @@ define void @test_i64_f64(double* %p, i64* %q) {
 ; SOFT: vadd.f64 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vadd.f64 d0
-    %1 = load double* %p
+    %1 = load double, double* %p
     %2 = fadd double %1, %1
     %3 = call i64 @test_i64_f64_helper(double %2)
     %4 = add i64 %3, %3
@@ -23,7 +23,7 @@ define void @test_i64_v1i64(<1 x i64>* %p, i64* %q) {
 ; SOFT: vadd.i64 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vadd.i64 d0
-    %1 = load <1 x i64>* %p
+    %1 = load <1 x i64>, <1 x i64>* %p
     %2 = add <1 x i64> %1, %1
     %3 = call i64 @test_i64_v1i64_helper(<1 x i64> %2)
     %4 = add i64 %3, %3
@@ -39,7 +39,7 @@ define void @test_i64_v2f32(<2 x float>* %p, i64* %q) {
 ; SOFT: vrev64.32 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vrev64.32 d0
-    %1 = load <2 x float>* %p
+    %1 = load <2 x float>, <2 x float>* %p
     %2 = fadd <2 x float> %1, %1
     %3 = call i64 @test_i64_v2f32_helper(<2 x float> %2)
     %4 = add i64 %3, %3
@@ -55,7 +55,7 @@ define void @test_i64_v2i32(<2 x i32>* %p, i64* %q) {
 ; SOFT: vrev64.32 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vrev64.32 d0
-    %1 = load <2 x i32>* %p
+    %1 = load <2 x i32>, <2 x i32>* %p
     %2 = add <2 x i32> %1, %1
     %3 = call i64 @test_i64_v2i32_helper(<2 x i32> %2)
     %4 = add i64 %3, %3
@@ -71,7 +71,7 @@ define void @test_i64_v4i16(<4 x i16>* %p, i64* %q) {
 ; SOFT: vrev64.16 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vrev64.16 d0
-    %1 = load <4 x i16>* %p
+    %1 = load <4 x i16>, <4 x i16>* %p
     %2 = add <4 x i16> %1, %1
     %3 = call i64 @test_i64_v4i16_helper(<4 x i16> %2)
     %4 = add i64 %3, %3
@@ -87,7 +87,7 @@ define void @test_i64_v8i8(<8 x i8>* %p, i64* %q) {
 ; SOFT: vrev64.8 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vrev64.8 d0
-    %1 = load <8 x i8>* %p
+    %1 = load <8 x i8>, <8 x i8>* %p
     %2 = add <8 x i8> %1, %1
     %3 = call i64 @test_i64_v8i8_helper(<8 x i8> %2)
     %4 = add i64 %3, %3
@@ -102,7 +102,7 @@ declare double @test_f64_i64_helper(i64 %p)
 define void @test_f64_i64(i64* %p, double* %q) {
 ; CHECK: adds r1
 ; CHECK: adc r0
-    %1 = load i64* %p
+    %1 = load i64, i64* %p
     %2 = add i64 %1, %1
     %3 = call double @test_f64_i64_helper(i64 %2)
     %4 = fadd double %3, %3
@@ -119,7 +119,7 @@ define void @test_f64_v1i64(<1 x i64>* %p, double* %q) {
 ; SOFT: vadd.i64 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vadd.i64 d0
-    %1 = load <1 x i64>* %p
+    %1 = load <1 x i64>, <1 x i64>* %p
     %2 = add <1 x i64> %1, %1
     %3 = call double @test_f64_v1i64_helper(<1 x i64> %2)
     %4 = fadd double %3, %3
@@ -136,7 +136,7 @@ define void @test_f64_v2f32(<2 x float>* %p, double* %q) {
 ; SOFT: vrev64.32 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vrev64.32 d0
-    %1 = load <2 x float>* %p
+    %1 = load <2 x float>, <2 x float>* %p
     %2 = fadd <2 x float> %1, %1
     %3 = call double @test_f64_v2f32_helper(<2 x float> %2)
     %4 = fadd double %3, %3
@@ -153,7 +153,7 @@ define void @test_f64_v2i32(<2 x i32>* %p, double* %q) {
 ; SOFT: vrev64.32 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vrev64.32 d0
-    %1 = load <2 x i32>* %p
+    %1 = load <2 x i32>, <2 x i32>* %p
     %2 = add <2 x i32> %1, %1
     %3 = call double @test_f64_v2i32_helper(<2 x i32> %2)
     %4 = fadd double %3, %3
@@ -170,7 +170,7 @@ define void @test_f64_v4i16(<4 x i16>* %p, double* %q) {
 ; SOFT: vrev64.16 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vrev64.16 d0
-    %1 = load <4 x i16>* %p
+    %1 = load <4 x i16>, <4 x i16>* %p
     %2 = add <4 x i16> %1, %1
     %3 = call double @test_f64_v4i16_helper(<4 x i16> %2)
     %4 = fadd double %3, %3
@@ -187,7 +187,7 @@ define void @test_f64_v8i8(<8 x i8>* %p, double* %q) {
 ; SOFT: vrev64.8 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vrev64.8 d0
-    %1 = load <8 x i8>* %p
+    %1 = load <8 x i8>, <8 x i8>* %p
     %2 = add <8 x i8> %1, %1
     %3 = call double @test_f64_v8i8_helper(<8 x i8> %2)
     %4 = fadd double %3, %3
@@ -203,7 +203,7 @@ declare <1 x i64> @test_v1i64_i64_helper(i64 %p)
 define void @test_v1i64_i64(i64* %p, <1 x i64>* %q) {
 ; CHECK: adds r1
 ; CHECK: adc r0
-    %1 = load i64* %p
+    %1 = load i64, i64* %p
     %2 = add i64 %1, %1
     %3 = call <1 x i64> @test_v1i64_i64_helper(i64 %2)
     %4 = add <1 x i64> %3, %3
@@ -220,7 +220,7 @@ define void @test_v1i64_f64(double* %p, <1 x i64>* %q) {
 ; SOFT: vadd.f64 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vadd.f64 d0
-    %1 = load double* %p
+    %1 = load double, double* %p
     %2 = fadd double %1, %1
     %3 = call <1 x i64> @test_v1i64_f64_helper(double %2)
     %4 = add <1 x i64> %3, %3
@@ -237,7 +237,7 @@ define void @test_v1i64_v2f32(<2 x float>* %p, <1 x i64>* %q) {
 ; HARD: vrev64.32 d0
 ; SOFT: vadd.f32 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
-    %1 = load <2 x float>* %p
+    %1 = load <2 x float>, <2 x float>* %p
     %2 = fadd <2 x float> %1, %1
     %3 = call <1 x i64> @test_v1i64_v2f32_helper(<2 x float> %2)
     %4 = add <1 x i64> %3, %3
@@ -255,7 +255,7 @@ define void @test_v1i64_v2i32(<2 x i32>* %p, <1 x i64>* %q) {
 ; SOFT: vadd.i32 [[REG:d[0-9]+]]
 ; SOFT: vrev64.32 [[REG]]
 ; SOFT: vmov r1, r0, [[REG]]
-    %1 = load <2 x i32>* %p
+    %1 = load <2 x i32>, <2 x i32>* %p
     %2 = add <2 x i32> %1, %1
     %3 = call <1 x i64> @test_v1i64_v2i32_helper(<2 x i32> %2)
     %4 = add <1 x i64> %3, %3
@@ -272,7 +272,7 @@ define void @test_v1i64_v4i16(<4 x i16>* %p, <1 x i64>* %q) {
 ; SOFT: vrev64.16 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vrev64.16 d0
-    %1 = load <4 x i16>* %p
+    %1 = load <4 x i16>, <4 x i16>* %p
     %2 = add <4 x i16> %1, %1
     %3 = call <1 x i64> @test_v1i64_v4i16_helper(<4 x i16> %2)
     %4 = add <1 x i64> %3, %3
@@ -289,7 +289,7 @@ define void @test_v1i64_v8i8(<8 x i8>* %p, <1 x i64>* %q) {
 ; SOFT: vrev64.8 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vrev64.8 d0
-    %1 = load <8 x i8>* %p
+    %1 = load <8 x i8>, <8 x i8>* %p
     %2 = add <8 x i8> %1, %1
     %3 = call <1 x i64> @test_v1i64_v8i8_helper(<8 x i8> %2)
     %4 = add <1 x i64> %3, %3
@@ -305,7 +305,7 @@ declare <2 x float> @test_v2f32_i64_helper(i64 %p)
 define void @test_v2f32_i64(i64* %p, <2 x float>* %q) {
 ; CHECK: adds r1
 ; CHECK: adc r0
-    %1 = load i64* %p
+    %1 = load i64, i64* %p
     %2 = add i64 %1, %1
     %3 = call <2 x float> @test_v2f32_i64_helper(i64 %2)
     %4 = fadd <2 x float> %3, %3
@@ -322,7 +322,7 @@ define void @test_v2f32_f64(double* %p, <2 x float>* %q) {
 ; SOFT: vadd.f64 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vadd.f64 d0
-    %1 = load double* %p
+    %1 = load double, double* %p
     %2 = fadd double %1, %1
     %3 = call <2 x float> @test_v2f32_f64_helper(double %2)
     %4 = fadd <2 x float> %3, %3
@@ -339,7 +339,7 @@ define void @test_v2f32_v1i64(<1 x i64>* %p, <2 x float>* %q) {
 ; SOFT: vadd.i64 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vadd.i64 d0
-    %1 = load <1 x i64>* %p
+    %1 = load <1 x i64>, <1 x i64>* %p
     %2 = add <1 x i64> %1, %1
     %3 = call <2 x float> @test_v2f32_v1i64_helper(<1 x i64> %2)
     %4 = fadd <2 x float> %3, %3
@@ -357,7 +357,7 @@ define void @test_v2f32_v2i32(<2 x i32>* %p, <2 x float>* %q) {
 ; SOFT: vadd.i32 [[REG:d[0-9]+]]
 ; SOFT: vrev64.32 [[REG]]
 ; SOFT: vmov r1, r0, [[REG]]
-    %1 = load <2 x i32>* %p
+    %1 = load <2 x i32>, <2 x i32>* %p
     %2 = add <2 x i32> %1, %1
     %3 = call <2 x float> @test_v2f32_v2i32_helper(<2 x i32> %2)
     %4 = fadd <2 x float> %3, %3
@@ -374,7 +374,7 @@ define void @test_v2f32_v4i16(<4 x i16>* %p, <2 x float>* %q) {
 ; SOFT: vrev64.16 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vrev64.16 d0
-    %1 = load <4 x i16>* %p
+    %1 = load <4 x i16>, <4 x i16>* %p
     %2 = add <4 x i16> %1, %1
     %3 = call <2 x float> @test_v2f32_v4i16_helper(<4 x i16> %2)
     %4 = fadd <2 x float> %3, %3
@@ -391,7 +391,7 @@ define void @test_v2f32_v8i8(<8 x i8>* %p, <2 x float>* %q) {
 ; SOFT: vrev64.8 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vrev64.8 d0
-    %1 = load <8 x i8>* %p
+    %1 = load <8 x i8>, <8 x i8>* %p
     %2 = add <8 x i8> %1, %1
     %3 = call <2 x float> @test_v2f32_v8i8_helper(<8 x i8> %2)
     %4 = fadd <2 x float> %3, %3
@@ -407,7 +407,7 @@ declare <2 x i32> @test_v2i32_i64_helper(i64 %p)
 define void @test_v2i32_i64(i64* %p, <2 x i32>* %q) {
 ; CHECK: adds r1
 ; CHECK: adc r0
-    %1 = load i64* %p
+    %1 = load i64, i64* %p
     %2 = add i64 %1, %1
     %3 = call <2 x i32> @test_v2i32_i64_helper(i64 %2)
     %4 = add <2 x i32> %3, %3
@@ -424,7 +424,7 @@ define void @test_v2i32_f64(double* %p, <2 x i32>* %q) {
 ; SOFT: vadd.f64 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vadd.f64 d0
-    %1 = load double* %p
+    %1 = load double, double* %p
     %2 = fadd double %1, %1
     %3 = call <2 x i32> @test_v2i32_f64_helper(double %2)
     %4 = add <2 x i32> %3, %3
@@ -441,7 +441,7 @@ define void @test_v2i32_v1i64(<1 x i64>* %p, <2 x i32>* %q) {
 ; SOFT: vadd.i64 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vadd.i64 d0
-    %1 = load <1 x i64>* %p
+    %1 = load <1 x i64>, <1 x i64>* %p
     %2 = add <1 x i64> %1, %1
     %3 = call <2 x i32> @test_v2i32_v1i64_helper(<1 x i64> %2)
     %4 = add <2 x i32> %3, %3
@@ -460,7 +460,7 @@ define void @test_v2i32_v2f32(<2 x float>* %p, <2 x i32>* %q) {
 ; SOFT: vadd.f32 [[REG:d[0-9]+]]
 ; SOFT: vrev64.32 [[REG]]
 ; SOFT: vmov r1, r0, [[REG]]
-    %1 = load <2 x float>* %p
+    %1 = load <2 x float>, <2 x float>* %p
     %2 = fadd <2 x float> %1, %1
     %3 = call <2 x i32> @test_v2i32_v2f32_helper(<2 x float> %2)
     %4 = add <2 x i32> %3, %3
@@ -477,7 +477,7 @@ define void @test_v2i32_v4i16(<4 x i16>* %p, <2 x i32>* %q) {
 ; SOFT: vrev64.16 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vrev64.16 d0
-    %1 = load <4 x i16>* %p
+    %1 = load <4 x i16>, <4 x i16>* %p
     %2 = add <4 x i16> %1, %1
     %3 = call <2 x i32> @test_v2i32_v4i16_helper(<4 x i16> %2)
     %4 = add <2 x i32> %3, %3
@@ -494,7 +494,7 @@ define void @test_v2i32_v8i8(<8 x i8>* %p, <2 x i32>* %q) {
 ; SOFT: vrev64.8 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vrev64.8 d0
-    %1 = load <8 x i8>* %p
+    %1 = load <8 x i8>, <8 x i8>* %p
     %2 = add <8 x i8> %1, %1
     %3 = call <2 x i32> @test_v2i32_v8i8_helper(<8 x i8> %2)
     %4 = add <2 x i32> %3, %3
@@ -510,7 +510,7 @@ declare <4 x i16> @test_v4i16_i64_helper(i64 %p)
 define void @test_v4i16_i64(i64* %p, <4 x i16>* %q) {
 ; CHECK: adds r1
 ; CHECK: adc r0
-    %1 = load i64* %p
+    %1 = load i64, i64* %p
     %2 = add i64 %1, %1
     %3 = call <4 x i16> @test_v4i16_i64_helper(i64 %2)
     %4 = add <4 x i16> %3, %3
@@ -527,7 +527,7 @@ define void @test_v4i16_f64(double* %p, <4 x i16>* %q) {
 ; SOFT: vadd.f64 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vadd.f64 d0
-    %1 = load double* %p
+    %1 = load double, double* %p
     %2 = fadd double %1, %1
     %3 = call <4 x i16> @test_v4i16_f64_helper(double %2)
     %4 = add <4 x i16> %3, %3
@@ -544,7 +544,7 @@ define void @test_v4i16_v1i64(<1 x i64>* %p, <4 x i16>* %q) {
 ; SOFT: vadd.i64 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vadd.i64 d0
-    %1 = load <1 x i64>* %p
+    %1 = load <1 x i64>, <1 x i64>* %p
     %2 = add <1 x i64> %1, %1
     %3 = call <4 x i16> @test_v4i16_v1i64_helper(<1 x i64> %2)
     %4 = add <4 x i16> %3, %3
@@ -563,7 +563,7 @@ define void @test_v4i16_v2f32(<2 x float>* %p, <4 x i16>* %q) {
 ; SOFT: vadd.f32 [[REG:d[0-9]+]]
 ; SOFT: vrev64.32 [[REG]]
 ; SOFT: vmov r1, r0, [[REG]]
-    %1 = load <2 x float>* %p
+    %1 = load <2 x float>, <2 x float>* %p
     %2 = fadd <2 x float> %1, %1
     %3 = call <4 x i16> @test_v4i16_v2f32_helper(<2 x float> %2)
     %4 = add <4 x i16> %3, %3
@@ -582,7 +582,7 @@ define void @test_v4i16_v2i32(<2 x i32>* %p, <4 x i16>* %q) {
 ; SOFT: vadd.i32 [[REG:d[0-9]+]]
 ; SOFT: vrev64.32 [[REG]]
 ; SOFT: vmov r1, r0, [[REG]]
-    %1 = load <2 x i32>* %p
+    %1 = load <2 x i32>, <2 x i32>* %p
     %2 = add <2 x i32> %1, %1
     %3 = call <4 x i16> @test_v4i16_v2i32_helper(<2 x i32> %2)
     %4 = add <4 x i16> %3, %3
@@ -599,7 +599,7 @@ define void @test_v4i16_v8i8(<8 x i8>* %p, <4 x i16>* %q) {
 ; SOFT: vrev64.8 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vrev64.8 d0
-    %1 = load <8 x i8>* %p
+    %1 = load <8 x i8>, <8 x i8>* %p
     %2 = add <8 x i8> %1, %1
     %3 = call <4 x i16> @test_v4i16_v8i8_helper(<8 x i8> %2)
     %4 = add <4 x i16> %3, %3
@@ -615,7 +615,7 @@ declare <8 x i8> @test_v8i8_i64_helper(i64 %p)
 define void @test_v8i8_i64(i64* %p, <8 x i8>* %q) {
 ; CHECK: adds r1
 ; CHECK: adc r0
-    %1 = load i64* %p
+    %1 = load i64, i64* %p
     %2 = add i64 %1, %1
     %3 = call <8 x i8> @test_v8i8_i64_helper(i64 %2)
     %4 = add <8 x i8> %3, %3
@@ -632,7 +632,7 @@ define void @test_v8i8_f64(double* %p, <8 x i8>* %q) {
 ; SOFT: vadd.f64 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vadd.f64 d0
-    %1 = load double* %p
+    %1 = load double, double* %p
     %2 = fadd double %1, %1
     %3 = call <8 x i8> @test_v8i8_f64_helper(double %2)
     %4 = add <8 x i8> %3, %3
@@ -649,7 +649,7 @@ define void @test_v8i8_v1i64(<1 x i64>* %p, <8 x i8>* %q) {
 ; SOFT: vadd.i64 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vadd.i64 d0
-    %1 = load <1 x i64>* %p
+    %1 = load <1 x i64>, <1 x i64>* %p
     %2 = add <1 x i64> %1, %1
     %3 = call <8 x i8> @test_v8i8_v1i64_helper(<1 x i64> %2)
     %4 = add <8 x i8> %3, %3
@@ -666,7 +666,7 @@ define void @test_v8i8_v2f32(<2 x float>* %p, <8 x i8>* %q) {
 ; SOFT: vrev64.32 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vrev64.32 d0
-    %1 = load <2 x float>* %p
+    %1 = load <2 x float>, <2 x float>* %p
     %2 = fadd <2 x float> %1, %1
     %3 = call <8 x i8> @test_v8i8_v2f32_helper(<2 x float> %2)
     %4 = add <8 x i8> %3, %3
@@ -683,7 +683,7 @@ define void @test_v8i8_v2i32(<2 x i32>* %p, <8 x i8>* %q) {
 ; SOFT: vrev64.32 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vrev64.32 d0
-    %1 = load <2 x i32>* %p
+    %1 = load <2 x i32>, <2 x i32>* %p
     %2 = add <2 x i32> %1, %1
     %3 = call <8 x i8> @test_v8i8_v2i32_helper(<2 x i32> %2)
     %4 = add <8 x i8> %3, %3
@@ -700,7 +700,7 @@ define void @test_v8i8_v4i16(<4 x i16>* %p, <8 x i8>* %q) {
 ; SOFT: vrev64.16 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vrev64.16 d0
-    %1 = load <4 x i16>* %p
+    %1 = load <4 x i16>, <4 x i16>* %p
     %2 = add <4 x i16> %1, %1
     %3 = call <8 x i8> @test_v8i8_v4i16_helper(<4 x i16> %2)
     %4 = add <8 x i8> %3, %3
@@ -720,7 +720,7 @@ define void @test_f128_v2f64(<2 x double>* %p, fp128* %q) {
 ; SOFT: vmov r3, r2, [[REG2]]
 ; HARD: vadd.f64 d1
 ; HARD: vadd.f64 d0
-    %1 = load <2 x double>* %p
+    %1 = load <2 x double>, <2 x double>* %p
     %2 = fadd <2 x double> %1, %1
     %3 = call fp128 @test_f128_v2f64_helper(<2 x double> %2)
     %4 = fadd fp128 %3, %3
@@ -735,7 +735,7 @@ define void @test_f128_v2i64(<2 x i64>* %p, fp128* %q) {
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vadd.i64 q0
-    %1 = load <2 x i64>* %p
+    %1 = load <2 x i64>, <2 x i64>* %p
     %2 = add <2 x i64> %1, %1
     %3 = call fp128 @test_f128_v2i64_helper(<2 x i64> %2)
     %4 = fadd fp128 %3, %3
@@ -750,7 +750,7 @@ define void @test_f128_v4f32(<4 x float>* %p, fp128* %q) {
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vrev64.32 q0
-    %1 = load <4 x float>* %p
+    %1 = load <4 x float>, <4 x float>* %p
     %2 = fadd <4 x float> %1, %1
     %3 = call fp128 @test_f128_v4f32_helper(<4 x float> %2)
     %4 = fadd fp128 %3, %3
@@ -765,7 +765,7 @@ define void @test_f128_v4i32(<4 x i32>* %p, fp128* %q) {
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vrev64.32 q0
-    %1 = load <4 x i32>* %p
+    %1 = load <4 x i32>, <4 x i32>* %p
     %2 = add <4 x i32> %1, %1
     %3 = call fp128 @test_f128_v4i32_helper(<4 x i32> %2)
     %4 = fadd fp128 %3, %3
@@ -780,7 +780,7 @@ define void @test_f128_v8i16(<8 x i16>* %p, fp128* %q) {
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vrev64.16 q0
-    %1 = load <8 x i16>* %p
+    %1 = load <8 x i16>, <8 x i16>* %p
     %2 = add <8 x i16> %1, %1
     %3 = call fp128 @test_f128_v8i16_helper(<8 x i16> %2)
     %4 = fadd fp128 %3, %3
@@ -795,7 +795,7 @@ define void @test_f128_v16i8(<16 x i8>* %p, fp128* %q) {
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vrev64.8 q0
-    %1 = load <16 x i8>* %p
+    %1 = load <16 x i8>, <16 x i8>* %p
     %2 = add <16 x i8> %1, %1
     %3 = call fp128 @test_f128_v16i8_helper(<16 x i8> %2)
     %4 = fadd fp128 %3, %3
@@ -807,7 +807,7 @@ define void @test_f128_v16i8(<16 x i8>* %p, fp128* %q) {
 ; CHECK-LABEL: test_v2f64_f128:
 declare <2 x double> @test_v2f64_f128_helper(fp128 %p)
 define void @test_v2f64_f128(fp128* %p, <2 x double>* %q) {
-    %1 = load fp128* %p
+    %1 = load fp128, fp128* %p
     %2 = fadd fp128 %1, %1
     %3 = call <2 x double> @test_v2f64_f128_helper(fp128 %2)
     %4 = fadd <2 x double> %3, %3
@@ -824,7 +824,7 @@ define void @test_v2f64_v2i64(<2 x i64>* %p, <2 x double>* %q) {
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vadd.i64 q0
-    %1 = load <2 x i64>* %p
+    %1 = load <2 x i64>, <2 x i64>* %p
     %2 = add <2 x i64> %1, %1
     %3 = call <2 x double> @test_v2f64_v2i64_helper(<2 x i64> %2)
     %4 = fadd <2 x double> %3, %3
@@ -840,7 +840,7 @@ define void @test_v2f64_v4f32(<4 x float>* %p, <2 x double>* %q) {
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vrev64.32 q0
-    %1 = load <4 x float>* %p
+    %1 = load <4 x float>, <4 x float>* %p
     %2 = fadd <4 x float> %1, %1
     %3 = call <2 x double> @test_v2f64_v4f32_helper(<4 x float> %2)
     %4 = fadd <2 x double> %3, %3
@@ -856,7 +856,7 @@ define void @test_v2f64_v4i32(<4 x i32>* %p, <2 x double>* %q) {
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vrev64.32 q0
-    %1 = load <4 x i32>* %p
+    %1 = load <4 x i32>, <4 x i32>* %p
     %2 = add <4 x i32> %1, %1
     %3 = call <2 x double> @test_v2f64_v4i32_helper(<4 x i32> %2)
     %4 = fadd <2 x double> %3, %3
@@ -872,7 +872,7 @@ define void @test_v2f64_v8i16(<8 x i16>* %p, <2 x double>* %q) {
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vrev64.16 q0
-    %1 = load <8 x i16>* %p
+    %1 = load <8 x i16>, <8 x i16>* %p
     %2 = add <8 x i16> %1, %1
     %3 = call <2 x double> @test_v2f64_v8i16_helper(<8 x i16> %2)
     %4 = fadd <2 x double> %3, %3
@@ -888,7 +888,7 @@ define void @test_v2f64_v16i8(<16 x i8>* %p, <2 x double>* %q) {
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vrev64.8 q0
-    %1 = load <16 x i8>* %p
+    %1 = load <16 x i8>, <16 x i8>* %p
     %2 = add <16 x i8> %1, %1
     %3 = call <2 x double> @test_v2f64_v16i8_helper(<16 x i8> %2)
     %4 = fadd <2 x double> %3, %3
@@ -901,7 +901,7 @@ define void @test_v2f64_v16i8(<16 x i8>* %p, <2 x double>* %q) {
 ; CHECK-LABEL: test_v2i64_f128:
 declare <2 x i64> @test_v2i64_f128_helper(fp128 %p)
 define void @test_v2i64_f128(fp128* %p, <2 x i64>* %q) {
-    %1 = load fp128* %p
+    %1 = load fp128, fp128* %p
     %2 = fadd fp128 %1, %1
     %3 = call <2 x i64> @test_v2i64_f128_helper(fp128 %2)
     %4 = add <2 x i64> %3, %3
@@ -918,7 +918,7 @@ define void @test_v2i64_v2f64(<2 x double>* %p, <2 x i64>* %q) {
 ; SOFT: vmov r3, r2, [[REG2]]
 ; HARD: vadd.f64 d1
 ; HARD: vadd.f64 d0
-    %1 = load <2 x double>* %p
+    %1 = load <2 x double>, <2 x double>* %p
     %2 = fadd <2 x double> %1, %1
     %3 = call <2 x i64> @test_v2i64_v2f64_helper(<2 x double> %2)
     %4 = add <2 x i64> %3, %3
@@ -934,7 +934,7 @@ define void @test_v2i64_v4f32(<4 x float>* %p, <2 x i64>* %q) {
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vrev64.32 q0
-    %1 = load <4 x float>* %p
+    %1 = load <4 x float>, <4 x float>* %p
     %2 = fadd <4 x float> %1, %1
     %3 = call <2 x i64> @test_v2i64_v4f32_helper(<4 x float> %2)
     %4 = add <2 x i64> %3, %3
@@ -950,7 +950,7 @@ define void @test_v2i64_v4i32(<4 x i32>* %p, <2 x i64>* %q) {
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vrev64.32 q0
-    %1 = load <4 x i32>* %p
+    %1 = load <4 x i32>, <4 x i32>* %p
     %2 = add <4 x i32> %1, %1
     %3 = call <2 x i64> @test_v2i64_v4i32_helper(<4 x i32> %2)
     %4 = add <2 x i64> %3, %3
@@ -966,7 +966,7 @@ define void @test_v2i64_v8i16(<8 x i16>* %p, <2 x i64>* %q) {
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vrev64.16 q0
-    %1 = load <8 x i16>* %p
+    %1 = load <8 x i16>, <8 x i16>* %p
     %2 = add <8 x i16> %1, %1
     %3 = call <2 x i64> @test_v2i64_v8i16_helper(<8 x i16> %2)
     %4 = add <2 x i64> %3, %3
@@ -982,7 +982,7 @@ define void @test_v2i64_v16i8(<16 x i8>* %p, <2 x i64>* %q) {
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vrev64.8 q0
-    %1 = load <16 x i8>* %p
+    %1 = load <16 x i8>, <16 x i8>* %p
     %2 = add <16 x i8> %1, %1
     %3 = call <2 x i64> @test_v2i64_v16i8_helper(<16 x i8> %2)
     %4 = add <2 x i64> %3, %3
@@ -995,7 +995,7 @@ define void @test_v2i64_v16i8(<16 x i8>* %p, <2 x i64>* %q) {
 ; CHECK-LABEL: test_v4f32_f128:
 declare <4 x float> @test_v4f32_f128_helper(fp128 %p)
 define void @test_v4f32_f128(fp128* %p, <4 x float>* %q) {
-    %1 = load fp128* %p
+    %1 = load fp128, fp128* %p
     %2 = fadd fp128 %1, %1
     %3 = call <4 x float> @test_v4f32_f128_helper(fp128 %2)
     %4 = fadd <4 x float> %3, %3
@@ -1012,7 +1012,7 @@ define void @test_v4f32_v2f64(<2 x double>* %p, <4 x float>* %q) {
 ; SOFT: vmov r3, r2
 ; HARD: vadd.f64  d1
 ; HARD: vadd.f64  d0
-    %1 = load <2 x double>* %p
+    %1 = load <2 x double>, <2 x double>* %p
     %2 = fadd <2 x double> %1, %1
     %3 = call <4 x float> @test_v4f32_v2f64_helper(<2 x double> %2)
     %4 = fadd <4 x float> %3, %3
@@ -1028,7 +1028,7 @@ define void @test_v4f32_v2i64(<2 x i64>* %p, <4 x float>* %q) {
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vadd.i64 q0
-    %1 = load <2 x i64>* %p
+    %1 = load <2 x i64>, <2 x i64>* %p
     %2 = add <2 x i64> %1, %1
     %3 = call <4 x float> @test_v4f32_v2i64_helper(<2 x i64> %2)
     %4 = fadd <4 x float> %3, %3
@@ -1044,7 +1044,7 @@ define void @test_v4f32_v4i32(<4 x i32>* %p, <4 x float>* %q) {
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vrev64.32 q0
-    %1 = load <4 x i32>* %p
+    %1 = load <4 x i32>, <4 x i32>* %p
     %2 = add <4 x i32> %1, %1
     %3 = call <4 x float> @test_v4f32_v4i32_helper(<4 x i32> %2)
     %4 = fadd <4 x float> %3, %3
@@ -1060,7 +1060,7 @@ define void @test_v4f32_v8i16(<8 x i16>* %p, <4 x float>* %q) {
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vrev64.16 q0
-    %1 = load <8 x i16>* %p
+    %1 = load <8 x i16>, <8 x i16>* %p
     %2 = add <8 x i16> %1, %1
     %3 = call <4 x float> @test_v4f32_v8i16_helper(<8 x i16> %2)
     %4 = fadd <4 x float> %3, %3
@@ -1076,7 +1076,7 @@ define void @test_v4f32_v16i8(<16 x i8>* %p, <4 x float>* %q) {
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vrev64.8 q0
-    %1 = load <16 x i8>* %p
+    %1 = load <16 x i8>, <16 x i8>* %p
     %2 = add <16 x i8> %1, %1
     %3 = call <4 x float> @test_v4f32_v16i8_helper(<16 x i8> %2)
     %4 = fadd <4 x float> %3, %3
@@ -1089,7 +1089,7 @@ define void @test_v4f32_v16i8(<16 x i8>* %p, <4 x float>* %q) {
 ; CHECK-LABEL: test_v4i32_f128:
 declare <4 x i32> @test_v4i32_f128_helper(fp128 %p)
 define void @test_v4i32_f128(fp128* %p, <4 x i32>* %q) {
-    %1 = load fp128* %p
+    %1 = load fp128, fp128* %p
     %2 = fadd fp128 %1, %1
     %3 = call <4 x i32> @test_v4i32_f128_helper(fp128 %2)
     %4 = add <4 x i32> %3, %3
@@ -1106,7 +1106,7 @@ define void @test_v4i32_v2f64(<2 x double>* %p, <4 x i32>* %q) {
 ; SOFT: vmov r3, r2
 ; HARD: vadd.f64 d1
 ; HARD: vadd.f64 d0
-    %1 = load <2 x double>* %p
+    %1 = load <2 x double>, <2 x double>* %p
     %2 = fadd <2 x double> %1, %1
     %3 = call <4 x i32> @test_v4i32_v2f64_helper(<2 x double> %2)
     %4 = add <4 x i32> %3, %3
@@ -1122,7 +1122,7 @@ define void @test_v4i32_v2i64(<2 x i64>* %p, <4 x i32>* %q) {
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vadd.i64 q0
-    %1 = load <2 x i64>* %p
+    %1 = load <2 x i64>, <2 x i64>* %p
     %2 = add <2 x i64> %1, %1
     %3 = call <4 x i32> @test_v4i32_v2i64_helper(<2 x i64> %2)
     %4 = add <4 x i32> %3, %3
@@ -1138,7 +1138,7 @@ define void @test_v4i32_v4f32(<4 x float>* %p, <4 x i32>* %q) {
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vrev64.32 q0
-    %1 = load <4 x float>* %p
+    %1 = load <4 x float>, <4 x float>* %p
     %2 = fadd <4 x float> %1, %1
     %3 = call <4 x i32> @test_v4i32_v4f32_helper(<4 x float> %2)
     %4 = add <4 x i32> %3, %3
@@ -1154,7 +1154,7 @@ define void @test_v4i32_v8i16(<8 x i16>* %p, <4 x i32>* %q) {
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vrev64.16 q0
-    %1 = load <8 x i16>* %p
+    %1 = load <8 x i16>, <8 x i16>* %p
     %2 = add <8 x i16> %1, %1
     %3 = call <4 x i32> @test_v4i32_v8i16_helper(<8 x i16> %2)
     %4 = add <4 x i32> %3, %3
@@ -1170,7 +1170,7 @@ define void @test_v4i32_v16i8(<16 x i8>* %p, <4 x i32>* %q) {
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vrev64.8 q0
-    %1 = load <16 x i8>* %p
+    %1 = load <16 x i8>, <16 x i8>* %p
     %2 = add <16 x i8> %1, %1
     %3 = call <4 x i32> @test_v4i32_v16i8_helper(<16 x i8> %2)
     %4 = add <4 x i32> %3, %3
@@ -1183,7 +1183,7 @@ define void @test_v4i32_v16i8(<16 x i8>* %p, <4 x i32>* %q) {
 ; CHECK-LABEL: test_v8i16_f128:
 declare <8 x i16> @test_v8i16_f128_helper(fp128 %p)
 define void @test_v8i16_f128(fp128* %p, <8 x i16>* %q) {
-    %1 = load fp128* %p
+    %1 = load fp128, fp128* %p
     %2 = fadd fp128 %1, %1
     %3 = call <8 x i16> @test_v8i16_f128_helper(fp128 %2)
     %4 = add <8 x i16> %3, %3
@@ -1200,7 +1200,7 @@ define void @test_v8i16_v2f64(<2 x double>* %p, <8 x i16>* %q) {
 ; SOFT: vmov r3, r2
 ; HARD: vadd.f64 d1
 ; HARD: vadd.f64 d0
-    %1 = load <2 x double>* %p
+    %1 = load <2 x double>, <2 x double>* %p
     %2 = fadd <2 x double> %1, %1
     %3 = call <8 x i16> @test_v8i16_v2f64_helper(<2 x double> %2)
     %4 = add <8 x i16> %3, %3
@@ -1216,7 +1216,7 @@ define void @test_v8i16_v2i64(<2 x i64>* %p, <8 x i16>* %q) {
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vadd.i64 q0
-    %1 = load <2 x i64>* %p
+    %1 = load <2 x i64>, <2 x i64>* %p
     %2 = add <2 x i64> %1, %1
     %3 = call <8 x i16> @test_v8i16_v2i64_helper(<2 x i64> %2)
     %4 = add <8 x i16> %3, %3
@@ -1232,7 +1232,7 @@ define void @test_v8i16_v4f32(<4 x float>* %p, <8 x i16>* %q) {
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vrev64.32 q0
-    %1 = load <4 x float>* %p
+    %1 = load <4 x float>, <4 x float>* %p
     %2 = fadd <4 x float> %1, %1
     %3 = call <8 x i16> @test_v8i16_v4f32_helper(<4 x float> %2)
     %4 = add <8 x i16> %3, %3
@@ -1248,7 +1248,7 @@ define void @test_v8i16_v4i32(<4 x i32>* %p, <8 x i16>* %q) {
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vrev64.32 q0
-    %1 = load <4 x i32>* %p
+    %1 = load <4 x i32>, <4 x i32>* %p
     %2 = add <4 x i32> %1, %1
     %3 = call <8 x i16> @test_v8i16_v4i32_helper(<4 x i32> %2)
     %4 = add <8 x i16> %3, %3
@@ -1264,7 +1264,7 @@ define void @test_v8i16_v16i8(<16 x i8>* %p, <8 x i16>* %q) {
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vrev64.8 q0
-    %1 = load <16 x i8>* %p
+    %1 = load <16 x i8>, <16 x i8>* %p
     %2 = add <16 x i8> %1, %1
     %3 = call <8 x i16> @test_v8i16_v16i8_helper(<16 x i8> %2)
     %4 = add <8 x i16> %3, %3
@@ -1277,7 +1277,7 @@ define void @test_v8i16_v16i8(<16 x i8>* %p, <8 x i16>* %q) {
 ; CHECK-LABEL: test_v16i8_f128:
 declare <16 x i8> @test_v16i8_f128_helper(fp128 %p)
 define void @test_v16i8_f128(fp128* %p, <16 x i8>* %q) {
-    %1 = load fp128* %p
+    %1 = load fp128, fp128* %p
     %2 = fadd fp128 %1, %1
     %3 = call <16 x i8> @test_v16i8_f128_helper(fp128 %2)
     %4 = add <16 x i8> %3, %3
@@ -1294,7 +1294,7 @@ define void @test_v16i8_v2f64(<2 x double>* %p, <16 x i8>* %q) {
 ; SOFT: vmov r3, r2
 ; HARD: vadd.f64 d1
 ; HARD: vadd.f64 d0
-    %1 = load <2 x double>* %p
+    %1 = load <2 x double>, <2 x double>* %p
     %2 = fadd <2 x double> %1, %1
     %3 = call <16 x i8> @test_v16i8_v2f64_helper(<2 x double> %2)
     %4 = add <16 x i8> %3, %3
@@ -1310,7 +1310,7 @@ define void @test_v16i8_v2i64(<2 x i64>* %p, <16 x i8>* %q) {
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vadd.i64 q0
-    %1 = load <2 x i64>* %p
+    %1 = load <2 x i64>, <2 x i64>* %p
     %2 = add <2 x i64> %1, %1
     %3 = call <16 x i8> @test_v16i8_v2i64_helper(<2 x i64> %2)
     %4 = add <16 x i8> %3, %3
@@ -1326,7 +1326,7 @@ define void @test_v16i8_v4f32(<4 x float>* %p, <16 x i8>* %q) {
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vrev64.32 q0
-    %1 = load <4 x float>* %p
+    %1 = load <4 x float>, <4 x float>* %p
     %2 = fadd <4 x float> %1, %1
     %3 = call <16 x i8> @test_v16i8_v4f32_helper(<4 x float> %2)
     %4 = add <16 x i8> %3, %3
@@ -1342,7 +1342,7 @@ define void @test_v16i8_v4i32(<4 x i32>* %p, <16 x i8>* %q) {
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vrev64.32 q0
-    %1 = load <4 x i32>* %p
+    %1 = load <4 x i32>, <4 x i32>* %p
     %2 = add <4 x i32> %1, %1
     %3 = call <16 x i8> @test_v16i8_v4i32_helper(<4 x i32> %2)
     %4 = add <16 x i8> %3, %3
@@ -1358,7 +1358,7 @@ define void @test_v16i8_v8i16(<8 x i16>* %p, <16 x i8>* %q) {
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vrev64.16 q0
-    %1 = load <8 x i16>* %p
+    %1 = load <8 x i16>, <8 x i16>* %p
     %2 = add <8 x i16> %1, %1
     %3 = call <16 x i8> @test_v16i8_v8i16_helper(<8 x i16> %2)
     %4 = add <16 x i8> %3, %3
diff --git a/llvm/test/CodeGen/ARM/bswap16.ll b/llvm/test/CodeGen/ARM/bswap16.ll
index 70c62d294ee..dc0e468b72d 100644
--- a/llvm/test/CodeGen/ARM/bswap16.ll
+++ b/llvm/test/CodeGen/ARM/bswap16.ll
@@ -4,7 +4,7 @@
 
 define void @test1(i16* nocapture %data) {
 entry:
-  %0 = load i16* %data, align 2
+  %0 = load i16, i16* %data, align 2
   %1 = tail call i16 @llvm.bswap.i16(i16 %0)
   store i16 %1, i16* %data, align 2
   ret void
@@ -30,7 +30,7 @@ entry:
 
 define i16 @test3(i16* nocapture %data) {
 entry:
-  %0 = load i16* %data, align 2
+  %0 = load i16, i16* %data, align 2
   %1 = tail call i16 @llvm.bswap.i16(i16 %0)
   ret i16 %1
 
diff --git a/llvm/test/CodeGen/ARM/call-tc.ll b/llvm/test/CodeGen/ARM/call-tc.ll
index a35fd747646..b2b6aaec813 100644
--- a/llvm/test/CodeGen/ARM/call-tc.ll
+++ b/llvm/test/CodeGen/ARM/call-tc.ll
@@ -24,7 +24,7 @@ define void @t2() {
 ; CHECKT2D: ldr
 ; CHECKT2D-NEXT: ldr
 ; CHECKT2D-NEXT: bx r0
-        %tmp = load i32 ()** @t         ; <i32 ()*> [#uses=1]
+        %tmp = load i32 ()*, i32 ()** @t         ; <i32 ()*> [#uses=1]
         %tmp.upgrd.2 = tail call i32 %tmp( )            ; <i32> [#uses=0]
         ret void
 }
@@ -153,7 +153,7 @@ define i32 @t9() nounwind {
 ; CHECKT2D: b.w ___divsi3
   %lock = alloca %class.MutexLock, align 1
   %1 = call %class.MutexLock* @_ZN9MutexLockC1Ev(%class.MutexLock* %lock)
-  %2 = load i32* @x, align 4
+  %2 = load i32, i32* @x, align 4
   %3 = sdiv i32 1000, %2
   %4 = call %class.MutexLock* @_ZN9MutexLockD1Ev(%class.MutexLock* %lock)
   ret i32 %3
@@ -170,7 +170,7 @@ define float @libcall_tc_test2(float* nocapture %a, float %b) {
 ; CHECKT2D-LABEL: libcall_tc_test2:
 ; CHECKT2D: blx _floorf
 ; CHECKT2D: b.w _truncf
-  %1 = load float* %a, align 4
+  %1 = load float, float* %a, align 4
   %call = tail call float @floorf(float %1)
   store float %call, float* %a, align 4
   %call1 = tail call float @truncf(float %b)
diff --git a/llvm/test/CodeGen/ARM/call.ll b/llvm/test/CodeGen/ARM/call.ll
index 97827bc6205..87252a91e1b 100644
--- a/llvm/test/CodeGen/ARM/call.ll
+++ b/llvm/test/CodeGen/ARM/call.ll
@@ -20,7 +20,7 @@ define void @f() {
 define void @g.upgrd.1() {
 ; CHECKV4: mov lr, pc
 ; CHECKV5: blx
-        %tmp = load i32 ()** @t         ; <i32 ()*> [#uses=1]
+        %tmp = load i32 ()*, i32 ()** @t         ; <i32 ()*> [#uses=1]
         %tmp.upgrd.2 = call i32 %tmp( )            ; <i32> [#uses=0]
         ret void
 }
@@ -30,10 +30,10 @@ define i32* @m_231b(i32, i32, i32*, i32*, i32*) nounwind {
 ; CHECKV4: bx r{{.*}}
 BB0:
   %5 = inttoptr i32 %0 to i32*                    ; <i32*> [#uses=1]
-  %t35 = load volatile i32* %5                    ; <i32> [#uses=1]
+  %t35 = load volatile i32, i32* %5                    ; <i32> [#uses=1]
   %6 = inttoptr i32 %t35 to i32**                 ; <i32**> [#uses=1]
   %7 = getelementptr i32*, i32** %6, i32 86             ; <i32**> [#uses=1]
-  %8 = load i32** %7                              ; <i32*> [#uses=1]
+  %8 = load i32*, i32** %7                              ; <i32*> [#uses=1]
   %9 = bitcast i32* %8 to i32* (i32, i32*, i32, i32*, i32*, i32*)* ; <i32* (i32, i32*, i32, i32*, i32*, i32*)*> [#uses=1]
   %10 = call i32* %9(i32 %0, i32* null, i32 %1, i32* %2, i32* %3, i32* %4) ; <i32*> [#uses=1]
   ret i32* %10
diff --git a/llvm/test/CodeGen/ARM/call_nolink.ll b/llvm/test/CodeGen/ARM/call_nolink.ll
index 93be566212f..0cd5bcd086c 100644
--- a/llvm/test/CodeGen/ARM/call_nolink.ll
+++ b/llvm/test/CodeGen/ARM/call_nolink.ll
@@ -23,31 +23,31 @@ bb115.i.i.bb115.i.i_crit_edge:		; preds = %bb115.i.i
 bb115.i.i:		; preds = %bb115.i.i.bb115.i.i_crit_edge, %newFuncRoot
 	%i_addr.3210.0.i.i = phi i32 [ %tmp166.i.i, %bb115.i.i.bb115.i.i_crit_edge ], [ 0, %newFuncRoot ]		; <i32> [#uses=7]
 	%tmp124.i.i = getelementptr [2 x { i32, [3 x i32] }], [2 x { i32, [3 x i32] }]* @pgm, i32 0, i32 %i_addr.3210.0.i.i, i32 1, i32 0		; <i32*> [#uses=1]
-	%tmp125.i.i = load i32* %tmp124.i.i		; <i32> [#uses=1]
+	%tmp125.i.i = load i32, i32* %tmp124.i.i		; <i32> [#uses=1]
 	%tmp126.i.i = getelementptr [14 x i32], [14 x i32]* @r, i32 0, i32 %tmp125.i.i		; <i32*> [#uses=1]
-	%tmp127.i.i = load i32* %tmp126.i.i		; <i32> [#uses=1]
+	%tmp127.i.i = load i32, i32* %tmp126.i.i		; <i32> [#uses=1]
 	%tmp131.i.i = getelementptr [2 x { i32, [3 x i32] }], [2 x { i32, [3 x i32] }]* @pgm, i32 0, i32 %i_addr.3210.0.i.i, i32 1, i32 1		; <i32*> [#uses=1]
-	%tmp132.i.i = load i32* %tmp131.i.i		; <i32> [#uses=1]
+	%tmp132.i.i = load i32, i32* %tmp131.i.i		; <i32> [#uses=1]
 	%tmp133.i.i = getelementptr [14 x i32], [14 x i32]* @r, i32 0, i32 %tmp132.i.i		; <i32*> [#uses=1]
-	%tmp134.i.i = load i32* %tmp133.i.i		; <i32> [#uses=1]
+	%tmp134.i.i = load i32, i32* %tmp133.i.i		; <i32> [#uses=1]
 	%tmp138.i.i = getelementptr [2 x { i32, [3 x i32] }], [2 x { i32, [3 x i32] }]* @pgm, i32 0, i32 %i_addr.3210.0.i.i, i32 1, i32 2		; <i32*> [#uses=1]
-	%tmp139.i.i = load i32* %tmp138.i.i		; <i32> [#uses=1]
+	%tmp139.i.i = load i32, i32* %tmp138.i.i		; <i32> [#uses=1]
 	%tmp140.i.i = getelementptr [14 x i32], [14 x i32]* @r, i32 0, i32 %tmp139.i.i		; <i32*> [#uses=1]
-	%tmp141.i.i = load i32* %tmp140.i.i		; <i32> [#uses=1]
+	%tmp141.i.i = load i32, i32* %tmp140.i.i		; <i32> [#uses=1]
 	%tmp143.i.i = add i32 %i_addr.3210.0.i.i, 12		; <i32> [#uses=1]
 	%tmp146.i.i = getelementptr [2 x { i32, [3 x i32] }], [2 x { i32, [3 x i32] }]* @pgm, i32 0, i32 %i_addr.3210.0.i.i, i32 0		; <i32*> [#uses=1]
-	%tmp147.i.i = load i32* %tmp146.i.i		; <i32> [#uses=1]
+	%tmp147.i.i = load i32, i32* %tmp146.i.i		; <i32> [#uses=1]
 	%tmp149.i.i = getelementptr [13 x %struct.anon], [13 x %struct.anon]* @isa, i32 0, i32 %tmp147.i.i, i32 0		; <i32 (i32, i32, i32)**> [#uses=1]
-	%tmp150.i.i = load i32 (i32, i32, i32)** %tmp149.i.i		; <i32 (i32, i32, i32)*> [#uses=1]
+	%tmp150.i.i = load i32 (i32, i32, i32)*, i32 (i32, i32, i32)** %tmp149.i.i		; <i32 (i32, i32, i32)*> [#uses=1]
 	%tmp154.i.i = tail call i32 %tmp150.i.i( i32 %tmp127.i.i, i32 %tmp134.i.i, i32 %tmp141.i.i )		; <i32> [#uses=1]
 	%tmp155.i.i = getelementptr [14 x i32], [14 x i32]* @r, i32 0, i32 %tmp143.i.i		; <i32*> [#uses=1]
 	store i32 %tmp154.i.i, i32* %tmp155.i.i
 	%tmp159.i.i = getelementptr [2 x i32], [2 x i32]* @counter, i32 0, i32 %i_addr.3210.0.i.i		; <i32*> [#uses=2]
-	%tmp160.i.i = load i32* %tmp159.i.i		; <i32> [#uses=1]
+	%tmp160.i.i = load i32, i32* %tmp159.i.i		; <i32> [#uses=1]
 	%tmp161.i.i = add i32 %tmp160.i.i, 1		; <i32> [#uses=1]
 	store i32 %tmp161.i.i, i32* %tmp159.i.i
 	%tmp166.i.i = add i32 %i_addr.3210.0.i.i, 1		; <i32> [#uses=2]
-	%tmp168.i.i = load i32* @numi		; <i32> [#uses=1]
+	%tmp168.i.i = load i32, i32* @numi		; <i32> [#uses=1]
 	icmp slt i32 %tmp166.i.i, %tmp168.i.i		; <i1>:0 [#uses=1]
 	br i1 %0, label %bb115.i.i.bb115.i.i_crit_edge, label %bb115.i.i.bb170.i.i_crit_edge.exitStub
 }
diff --git a/llvm/test/CodeGen/ARM/coalesce-dbgvalue.ll b/llvm/test/CodeGen/ARM/coalesce-dbgvalue.ll
index 4e5fb5e5c60..4f2b66d54dc 100644
--- a/llvm/test/CodeGen/ARM/coalesce-dbgvalue.ll
+++ b/llvm/test/CodeGen/ARM/coalesce-dbgvalue.ll
@@ -28,7 +28,7 @@ for.cond1:                                        ; preds = %for.end9, %for.cond
 for.body2:                                        ; preds = %for.cond1
   store i32 %storemerge11, i32* @b, align 4, !dbg !26
   tail call void @llvm.dbg.value(metadata i32* null, i64 0, metadata !11, metadata !{!"0x102"}), !dbg !28
-  %0 = load i64* @a, align 8, !dbg !29
+  %0 = load i64, i64* @a, align 8, !dbg !29
   %xor = xor i64 %0, %e.1.ph, !dbg !29
   %conv3 = trunc i64 %xor to i32, !dbg !29
   tail call void @llvm.dbg.value(metadata i32 %conv3, i64 0, metadata !10, metadata !{!"0x102"}), !dbg !29
@@ -44,7 +44,7 @@ land.end:                                         ; preds = %land.rhs, %for.body
   %1 = phi i1 [ false, %for.body2 ], [ %tobool5, %land.rhs ]
   %land.ext = zext i1 %1 to i32
   %call6 = tail call i32 bitcast (i32 (...)* @fn2 to i32 (i32, i32*)*)(i32 %land.ext, i32* null) #3
-  %2 = load i32* @b, align 4, !dbg !26
+  %2 = load i32, i32* @b, align 4, !dbg !26
   %inc8 = add nsw i32 %2, 1, !dbg !26
   %phitmp = and i64 %xor, 4294967295, !dbg !26
   br label %for.cond1.outer, !dbg !26
@@ -52,7 +52,7 @@ land.end:                                         ; preds = %land.rhs, %for.body
 for.cond1.outer:                                  ; preds = %land.end, %for.cond1.preheader
   %storemerge11.ph = phi i32 [ %inc8, %land.end ], [ 0, %for.cond1.preheader ]
   %e.1.ph = phi i64 [ %phitmp, %land.end ], [ 0, %for.cond1.preheader ]
-  %3 = load i32* @d, align 4, !dbg !31
+  %3 = load i32, i32* @d, align 4, !dbg !31
   %tobool10 = icmp eq i32 %3, 0, !dbg !31
   br label %for.cond1
 
diff --git a/llvm/test/CodeGen/ARM/coalesce-subregs.ll b/llvm/test/CodeGen/ARM/coalesce-subregs.ll
index 5cc3ecab366..72fefeacfc5 100644
--- a/llvm/test/CodeGen/ARM/coalesce-subregs.ll
+++ b/llvm/test/CodeGen/ARM/coalesce-subregs.ll
@@ -86,22 +86,22 @@ declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>, i32) nounw
 define void @f3(float* %p, float* %q) nounwind ssp {
 entry:
   %arrayidx = getelementptr inbounds float, float* %p, i32 3
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %vecins = insertelement <2 x float> undef, float %0, i32 1
   %tobool = icmp eq float* %q, null
   br i1 %tobool, label %if.else, label %if.then
 
 if.then:                                          ; preds = %entry
-  %1 = load float* %q, align 4
+  %1 = load float, float* %q, align 4
   %arrayidx2 = getelementptr inbounds float, float* %q, i32 1
-  %2 = load float* %arrayidx2, align 4
+  %2 = load float, float* %arrayidx2, align 4
   %add = fadd float %1, %2
   %vecins3 = insertelement <2 x float> %vecins, float %add, i32 0
   br label %if.end
 
 if.else:                                          ; preds = %entry
   %arrayidx4 = getelementptr inbounds float, float* %p, i32 2
-  %3 = load float* %arrayidx4, align 4
+  %3 = load float, float* %arrayidx4, align 4
   %vecins5 = insertelement <2 x float> %vecins, float %3, i32 0
   br label %if.end
 
@@ -129,9 +129,9 @@ entry:
   br i1 %tobool, label %if.end, label %if.then
 
 if.then:                                          ; preds = %entry
-  %1 = load float* %q, align 4
+  %1 = load float, float* %q, align 4
   %arrayidx1 = getelementptr inbounds float, float* %q, i32 1
-  %2 = load float* %arrayidx1, align 4
+  %2 = load float, float* %arrayidx1, align 4
   %add = fadd float %1, %2
   %vecins = insertelement <2 x float> %vld1, float %add, i32 1
   br label %if.end
@@ -165,12 +165,12 @@ entry:
 
 if.then:                                          ; preds = %entry
   %arrayidx = getelementptr inbounds float, float* %q, i32 1
-  %1 = load float* %arrayidx, align 4
+  %1 = load float, float* %arrayidx, align 4
   %add4 = fadd float %vecext, %1
-  %2 = load float* %q, align 4
+  %2 = load float, float* %q, align 4
   %add6 = fadd float %vecext1, %2
   %arrayidx7 = getelementptr inbounds float, float* %q, i32 2
-  %3 = load float* %arrayidx7, align 4
+  %3 = load float, float* %arrayidx7, align 4
   %add8 = fadd float %vecext2, %3
   br label %if.end
 
@@ -231,7 +231,7 @@ bb3:                                              ; preds = %bb12, %bb
   br i1 undef, label %bb10, label %bb12
 
 bb10:                                             ; preds = %bb3
-  %tmp11 = load <4 x float>* undef, align 8
+  %tmp11 = load <4 x float>, <4 x float>* undef, align 8
   br label %bb12
 
 bb12:                                             ; preds = %bb10, %bb3
@@ -333,7 +333,7 @@ for.body:                                         ; preds = %for.end, %entry
   br i1 undef, label %for.body29, label %for.end
 
 for.body29:                                       ; preds = %for.body29, %for.body
-  %0 = load <2 x double>* null, align 1
+  %0 = load <2 x double>, <2 x double>* null, align 1
   %splat40 = shufflevector <2 x double> %0, <2 x double> undef, <2 x i32> zeroinitializer
   %mul41 = fmul <2 x double> undef, %splat40
   %add42 = fadd <2 x double> undef, %mul41
diff --git a/llvm/test/CodeGen/ARM/code-placement.ll b/llvm/test/CodeGen/ARM/code-placement.ll
index 9f27eee3f76..bf5cf52d8b5 100644
--- a/llvm/test/CodeGen/ARM/code-placement.ll
+++ b/llvm/test/CodeGen/ARM/code-placement.ll
@@ -19,7 +19,7 @@ bb:
   %list_addr.05 = phi %struct.list_head* [ %2, %bb ], [ %list, %entry ]
   %next.04 = phi %struct.list_head* [ %list_addr.05, %bb ], [ null, %entry ]
   %1 = getelementptr inbounds %struct.list_head, %struct.list_head* %list_addr.05, i32 0, i32 0
-  %2 = load %struct.list_head** %1, align 4
+  %2 = load %struct.list_head*, %struct.list_head** %1, align 4
   store %struct.list_head* %next.04, %struct.list_head** %1, align 4
   %3 = icmp eq %struct.list_head* %2, null
   br i1 %3, label %bb2, label %bb
@@ -46,7 +46,7 @@ bb1:                                              ; preds = %bb2.preheader, %bb1
   %sum.08 = phi i32 [ %2, %bb1 ], [ %sum.110, %bb2.preheader ] ; <i32> [#uses=1]
   %tmp17 = sub i32 %i.07, %indvar                 ; <i32> [#uses=1]
   %scevgep = getelementptr i32, i32* %src, i32 %tmp17  ; <i32*> [#uses=1]
-  %1 = load i32* %scevgep, align 4                ; <i32> [#uses=1]
+  %1 = load i32, i32* %scevgep, align 4                ; <i32> [#uses=1]
   %2 = add nsw i32 %1, %sum.08                    ; <i32> [#uses=2]
   %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=2]
   %exitcond = icmp eq i32 %indvar.next, %size     ; <i1> [#uses=1]
diff --git a/llvm/test/CodeGen/ARM/commute-movcc.ll b/llvm/test/CodeGen/ARM/commute-movcc.ll
index 60025762be2..2978d317ad7 100644
--- a/llvm/test/CodeGen/ARM/commute-movcc.ll
+++ b/llvm/test/CodeGen/ARM/commute-movcc.ll
@@ -32,7 +32,7 @@ for.body:                                         ; preds = %entry, %if.end8
   %BestCost.011 = phi i32 [ -1, %entry ], [ %BestCost.1, %if.end8 ]
   %BestIdx.010 = phi i32 [ 0, %entry ], [ %BestIdx.1, %if.end8 ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i32 %i.012
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %mul = mul i32 %0, %0
   %sub = add nsw i32 %i.012, -5
   %cmp2 = icmp eq i32 %sub, %Pref
diff --git a/llvm/test/CodeGen/ARM/compare-call.ll b/llvm/test/CodeGen/ARM/compare-call.ll
index 61034b3c8cb..d4bd92b8baa 100644
--- a/llvm/test/CodeGen/ARM/compare-call.ll
+++ b/llvm/test/CodeGen/ARM/compare-call.ll
@@ -2,9 +2,9 @@
 
 define void @test3(float* %glob, i32 %X) {
 entry:
-        %tmp = load float* %glob                ; <float> [#uses=1]
+        %tmp = load float, float* %glob                ; <float> [#uses=1]
         %tmp2 = getelementptr float, float* %glob, i32 2               ; <float*> [#uses=1]
-        %tmp3 = load float* %tmp2               ; <float> [#uses=1]
+        %tmp3 = load float, float* %tmp2               ; <float> [#uses=1]
         %tmp.upgrd.1 = fcmp ogt float %tmp, %tmp3               ; <i1> [#uses=1]
         br i1 %tmp.upgrd.1, label %cond_true, label %UnifiedReturnBlock
 
diff --git a/llvm/test/CodeGen/ARM/copy-paired-reg.ll b/llvm/test/CodeGen/ARM/copy-paired-reg.ll
index 17a4461c682..453fac4b150 100644
--- a/llvm/test/CodeGen/ARM/copy-paired-reg.ll
+++ b/llvm/test/CodeGen/ARM/copy-paired-reg.ll
@@ -11,7 +11,7 @@ define void @f() {
   store atomic i64 0, i64* %c seq_cst, align 8
   store atomic i64 0, i64* %d seq_cst, align 8
 
-  %e = load atomic i64* %d seq_cst, align 8
+  %e = load atomic i64, i64* %d seq_cst, align 8
 
   ret void
 }
diff --git a/llvm/test/CodeGen/ARM/crash-greedy-v6.ll b/llvm/test/CodeGen/ARM/crash-greedy-v6.ll
index 96b6bb6dd1d..287c081ac5e 100644
--- a/llvm/test/CodeGen/ARM/crash-greedy-v6.ll
+++ b/llvm/test/CodeGen/ARM/crash-greedy-v6.ll
@@ -38,7 +38,7 @@ for.body:                                         ; preds = %for.body, %for.body
   %arrayidx22 = getelementptr i8, i8* %green, i32 %i.031
   %arrayidx25 = getelementptr i8, i8* %blue, i32 %i.031
   %arrayidx28 = getelementptr i8, i8* %alpha, i32 %i.031
-  %tmp12 = load float* %arrayidx11, align 4
+  %tmp12 = load float, float* %arrayidx11, align 4
   tail call fastcc void @sample_3d_nearest(i8* %tObj, i8* undef, float undef, float %tmp12, float undef, i8* %arrayidx19, i8* %arrayidx22, i8* %arrayidx25, i8* %arrayidx28)
   %0 = add i32 %i.031, 1
   %exitcond = icmp eq i32 %0, %n
diff --git a/llvm/test/CodeGen/ARM/crash.ll b/llvm/test/CodeGen/ARM/crash.ll
index 8e367011dfe..3b01d8113b9 100644
--- a/llvm/test/CodeGen/ARM/crash.ll
+++ b/llvm/test/CodeGen/ARM/crash.ll
@@ -5,7 +5,7 @@
 
 define void @func() nounwind {
 entry:
-  %tmp = load i32* undef, align 4
+  %tmp = load i32, i32* undef, align 4
   br label %bb1
 
 bb1:
diff --git a/llvm/test/CodeGen/ARM/cse-ldrlit.ll b/llvm/test/CodeGen/ARM/cse-ldrlit.ll
index 3f5d4c2e3c2..e76e47eea30 100644
--- a/llvm/test/CodeGen/ARM/cse-ldrlit.ll
+++ b/llvm/test/CodeGen/ARM/cse-ldrlit.ll
@@ -9,7 +9,7 @@
 declare void @bar(i32*)
 
 define void @foo() {
-  %flag = load i32* getelementptr inbounds([16 x i32]* @var, i32 0, i32 1)
+  %flag = load i32, i32* getelementptr inbounds([16 x i32]* @var, i32 0, i32 1)
   %tst = icmp eq i32 %flag, 0
   br i1 %tst, label %true, label %false
 true:
diff --git a/llvm/test/CodeGen/ARM/cse-libcalls.ll b/llvm/test/CodeGen/ARM/cse-libcalls.ll
index 4f5b7592c84..1255ec5a78f 100644
--- a/llvm/test/CodeGen/ARM/cse-libcalls.ll
+++ b/llvm/test/CodeGen/ARM/cse-libcalls.ll
@@ -10,7 +10,7 @@ target triple = "i386-apple-darwin8"
 
 define double @u_f_nonbon(double %lambda) nounwind {
 entry:
-	%tmp19.i.i = load double* null, align 4		; <double> [#uses=2]
+	%tmp19.i.i = load double, double* null, align 4		; <double> [#uses=2]
 	%tmp6.i = fcmp olt double %tmp19.i.i, 1.000000e+00		; <i1> [#uses=1]
 	%dielectric.0.i = select i1 %tmp6.i, double 1.000000e+00, double %tmp19.i.i		; <double> [#uses=1]
 	%tmp10.i4 = fdiv double 0x4074C2D71F36262D, %dielectric.0.i		; <double> [#uses=1]
diff --git a/llvm/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll b/llvm/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll
index 8950abdef6a..98a2ce973ea 100644
--- a/llvm/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll
+++ b/llvm/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll
@@ -5,7 +5,7 @@ define float @f(<4 x i16>* nocapture %in) {
   ; CHECK: vldr
   ; CHECK: vmovl.u16
   ; CHECK-NOT: vand
-  %1 = load <4 x i16>* %in
+  %1 = load <4 x i16>, <4 x i16>* %in
   ; CHECK: vcvt.f32.u32
   %2 = uitofp <4 x i16> %1 to <4 x float>
   %3 = extractelement <4 x float> %2, i32 0
@@ -21,7 +21,7 @@ define float @f(<4 x i16>* nocapture %in) {
 
 define float @g(<4 x i16>* nocapture %in) {
   ; CHECK: vldr
-  %1 = load <4 x i16>* %in
+  %1 = load <4 x i16>, <4 x i16>* %in
   ; CHECK-NOT: uxth
   %2 = extractelement <4 x i16> %1, i32 0
   ; CHECK: vcvt.f32.u32
diff --git a/llvm/test/CodeGen/ARM/debug-frame-large-stack.ll b/llvm/test/CodeGen/ARM/debug-frame-large-stack.ll
index 1addf639bfe..1f814e70d54 100644
--- a/llvm/test/CodeGen/ARM/debug-frame-large-stack.ll
+++ b/llvm/test/CodeGen/ARM/debug-frame-large-stack.ll
@@ -48,7 +48,7 @@ define i32 @test3() {
 	%tmp = alloca i32, align 4
 	%a = alloca [805306369 x i8], align 16
 	store i32 0, i32* %tmp
-	%tmp1 = load i32* %tmp
+	%tmp1 = load i32, i32* %tmp
         ret i32 %tmp1
 }
 
diff --git a/llvm/test/CodeGen/ARM/debug-frame-vararg.ll b/llvm/test/CodeGen/ARM/debug-frame-vararg.ll
index 063e32145c6..934e125c4db 100644
--- a/llvm/test/CodeGen/ARM/debug-frame-vararg.ll
+++ b/llvm/test/CodeGen/ARM/debug-frame-vararg.ll
@@ -118,11 +118,11 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.05 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
-  %ap.cur = load i8** %vl, align 4
+  %ap.cur = load i8*, i8** %vl, align 4
   %ap.next = getelementptr i8, i8* %ap.cur, i32 4
   store i8* %ap.next, i8** %vl, align 4
   %0 = bitcast i8* %ap.cur to i32*
-  %1 = load i32* %0, align 4
+  %1 = load i32, i32* %0, align 4
   %call = call i32 @foo(i32 %1) #1
   %inc = add nsw i32 %i.05, 1
   %exitcond = icmp eq i32 %inc, %count
diff --git a/llvm/test/CodeGen/ARM/debug-info-blocks.ll b/llvm/test/CodeGen/ARM/debug-info-blocks.ll
index 8e8431bcd04..fcdf43bb61a 100644
--- a/llvm/test/CodeGen/ARM/debug-info-blocks.ll
+++ b/llvm/test/CodeGen/ARM/debug-info-blocks.ll
@@ -47,21 +47,21 @@ define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %load
   call void @llvm.dbg.declare(metadata %2* %6, metadata !136, metadata !163), !dbg !137
   call void @llvm.dbg.declare(metadata %2* %6, metadata !138, metadata !164), !dbg !137
   call void @llvm.dbg.declare(metadata %2* %6, metadata !139, metadata !165), !dbg !140
-  %8 = load %0** %1, align 4, !dbg !141
-  %9 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_13", !dbg !141
+  %8 = load %0*, %0** %1, align 4, !dbg !141
+  %9 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_13", !dbg !141
   %10 = bitcast %0* %8 to i8*, !dbg !141
   %11 = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* %10, i8* %9), !dbg !141
   %12 = bitcast i8* %11 to %0*, !dbg !141
   %13 = getelementptr inbounds %2, %2* %6, i32 0, i32 5, !dbg !141
-  %14 = load i8** %13, !dbg !141
+  %14 = load i8*, i8** %13, !dbg !141
   %15 = bitcast i8* %14 to %struct.__block_byref_mydata*, !dbg !141
   %16 = getelementptr inbounds %struct.__block_byref_mydata, %struct.__block_byref_mydata* %15, i32 0, i32 1, !dbg !141
-  %17 = load %struct.__block_byref_mydata** %16, !dbg !141
+  %17 = load %struct.__block_byref_mydata*, %struct.__block_byref_mydata** %16, !dbg !141
   %18 = getelementptr inbounds %struct.__block_byref_mydata, %struct.__block_byref_mydata* %17, i32 0, i32 6, !dbg !141
   store %0* %12, %0** %18, align 4, !dbg !141
   %19 = getelementptr inbounds %2, %2* %6, i32 0, i32 6, !dbg !143
-  %20 = load %3** %19, align 4, !dbg !143
-  %21 = load i32* @"OBJC_IVAR_$_MyWork._data", !dbg !143
+  %20 = load %3*, %3** %19, align 4, !dbg !143
+  %21 = load i32, i32* @"OBJC_IVAR_$_MyWork._data", !dbg !143
   %22 = bitcast %3* %20 to i8*, !dbg !143
   %23 = getelementptr inbounds i8, i8* %22, i32 %21, !dbg !143
   %24 = bitcast i8* %23 to %struct.CR*, !dbg !143
@@ -69,8 +69,8 @@ define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %load
   %26 = bitcast %struct.CR* %data to i8*, !dbg !143
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %25, i8* %26, i32 16, i32 4, i1 false), !dbg !143
   %27 = getelementptr inbounds %2, %2* %6, i32 0, i32 6, !dbg !144
-  %28 = load %3** %27, align 4, !dbg !144
-  %29 = load i32* @"OBJC_IVAR_$_MyWork._bounds", !dbg !144
+  %28 = load %3*, %3** %27, align 4, !dbg !144
+  %29 = load i32, i32* @"OBJC_IVAR_$_MyWork._bounds", !dbg !144
   %30 = bitcast %3* %28 to i8*, !dbg !144
   %31 = getelementptr inbounds i8, i8* %30, i32 %29, !dbg !144
   %32 = bitcast i8* %31 to %struct.CR*, !dbg !144
@@ -78,15 +78,15 @@ define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %load
   %34 = bitcast %struct.CR* %bounds to i8*, !dbg !144
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %33, i8* %34, i32 16, i32 4, i1 false), !dbg !144
   %35 = getelementptr inbounds %2, %2* %6, i32 0, i32 6, !dbg !145
-  %36 = load %3** %35, align 4, !dbg !145
+  %36 = load %3*, %3** %35, align 4, !dbg !145
   %37 = getelementptr inbounds %2, %2* %6, i32 0, i32 5, !dbg !145
-  %38 = load i8** %37, !dbg !145
+  %38 = load i8*, i8** %37, !dbg !145
   %39 = bitcast i8* %38 to %struct.__block_byref_mydata*, !dbg !145
   %40 = getelementptr inbounds %struct.__block_byref_mydata, %struct.__block_byref_mydata* %39, i32 0, i32 1, !dbg !145
-  %41 = load %struct.__block_byref_mydata** %40, !dbg !145
+  %41 = load %struct.__block_byref_mydata*, %struct.__block_byref_mydata** %40, !dbg !145
   %42 = getelementptr inbounds %struct.__block_byref_mydata, %struct.__block_byref_mydata* %41, i32 0, i32 6, !dbg !145
-  %43 = load %0** %42, align 4, !dbg !145
-  %44 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_222", !dbg !145
+  %43 = load %0*, %0** %42, align 4, !dbg !145
+  %44 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_222", !dbg !145
   %45 = bitcast %3* %36 to i8*, !dbg !145
   call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, %0*)*)(i8* %45, i8* %44, %0* %43), !dbg !145
   ret void, !dbg !146
diff --git a/llvm/test/CodeGen/ARM/divmod.ll b/llvm/test/CodeGen/ARM/divmod.ll
index fa290c4eae1..9336d0c477d 100644
--- a/llvm/test/CodeGen/ARM/divmod.ll
+++ b/llvm/test/CodeGen/ARM/divmod.ll
@@ -47,7 +47,7 @@ define void @do_indent(i32 %cols) nounwind {
 entry:
 ; A8-LABEL: do_indent:
 ; SWIFT-LABEL: do_indent:
-  %0 = load i32* @flags, align 4
+  %0 = load i32, i32* @flags, align 4
   %1 = and i32 %0, 67108864
   %2 = icmp eq i32 %1, 0
   br i1 %2, label %bb1, label %bb
@@ -57,7 +57,7 @@ bb:
 ; SWIFT: sdiv
 ; SWIFT: mls
 ; SWIFT-NOT: bl __divmodsi4
-  %3 = load i32* @tabsize, align 4
+  %3 = load i32, i32* @tabsize, align 4
   %4 = srem i32 %cols, %3
   %5 = sdiv i32 %cols, %3
   %6 = tail call i32 @llvm.objectsize.i32.p0i8(i8* null, i1 false)
diff --git a/llvm/test/CodeGen/ARM/dwarf-eh.ll b/llvm/test/CodeGen/ARM/dwarf-eh.ll
index 4bbfe8b7408..228d8b9e40e 100644
--- a/llvm/test/CodeGen/ARM/dwarf-eh.ll
+++ b/llvm/test/CodeGen/ARM/dwarf-eh.ll
@@ -34,12 +34,12 @@ define void @f() uwtable {
   store i32 %7, i32* %2
   br label %8
 
-  %9 = load i32* %2
+  %9 = load i32, i32* %2
   %10 = call i32 @llvm.eh.typeid.for(i8* bitcast ({ i8*, i8* }* @_ZTI9exception to i8*)) nounwind
   %11 = icmp eq i32 %9, %10
   br i1 %11, label %12, label %17
 
-  %13 = load i8** %1
+  %13 = load i8*, i8** %1
   %14 = call i8* @__cxa_begin_catch(i8* %13) #3
   %15 = bitcast i8* %14 to %struct.exception*
   store %struct.exception* %15, %struct.exception** %e
@@ -48,8 +48,8 @@ define void @f() uwtable {
 
   ret void
 
-  %18 = load i8** %1
-  %19 = load i32* %2
+  %18 = load i8*, i8** %1
+  %19 = load i32, i32* %2
   %20 = insertvalue { i8*, i32 } undef, i8* %18, 0
   %21 = insertvalue { i8*, i32 } %20, i32 %19, 1
   resume { i8*, i32 } %21
diff --git a/llvm/test/CodeGen/ARM/dyn-stackalloc.ll b/llvm/test/CodeGen/ARM/dyn-stackalloc.ll
index 487b131fef3..1b64a01aee1 100644
--- a/llvm/test/CodeGen/ARM/dyn-stackalloc.ll
+++ b/llvm/test/CodeGen/ARM/dyn-stackalloc.ll
@@ -19,7 +19,7 @@ define void @t1(%struct.state* %v) {
 ; CHECK-NOT: sub r{{[0-9]+}}, sp, [[REG1]]
 ; CHECK: sub sp, sp, [[REG1]]
 
-  %tmp6 = load i32* null
+  %tmp6 = load i32, i32* null
   %tmp8 = alloca float, i32 %tmp6
   store i32 1, i32* null
   br i1 false, label %bb123.preheader, label %return
@@ -29,7 +29,7 @@ bb123.preheader:                                  ; preds = %0
 
 bb43:                                             ; preds = %bb123.preheader
   call fastcc void @f1(float* %tmp8, float* null, i32 0)
-  %tmp70 = load i32* null
+  %tmp70 = load i32, i32* null
   %tmp85 = getelementptr float, float* %tmp8, i32 0
   call fastcc void @f2(float* null, float* null, float* %tmp85, i32 %tmp70)
   ret void
diff --git a/llvm/test/CodeGen/ARM/emit-big-cst.ll b/llvm/test/CodeGen/ARM/emit-big-cst.ll
index 01d789c492f..7453e8caa94 100644
--- a/llvm/test/CodeGen/ARM/emit-big-cst.ll
+++ b/llvm/test/CodeGen/ARM/emit-big-cst.ll
@@ -11,7 +11,7 @@
 
 define void @accessBig(i64* %storage) {
   %addr = bitcast i64* %storage to i82*
-  %bigLoadedCst = load volatile i82* @bigCst
+  %bigLoadedCst = load volatile i82, i82* @bigCst
   %tmp = add i82 %bigLoadedCst, 1
   store i82 %tmp, i82* %addr
   ret void
diff --git a/llvm/test/CodeGen/ARM/extload-knownzero.ll b/llvm/test/CodeGen/ARM/extload-knownzero.ll
index f55b95104b8..da340f7a943 100644
--- a/llvm/test/CodeGen/ARM/extload-knownzero.ll
+++ b/llvm/test/CodeGen/ARM/extload-knownzero.ll
@@ -8,7 +8,7 @@ entry:
   br i1 %tmp1, label %bb1, label %bb2
 bb1:
 ; CHECK: ldrh
-  %tmp2 = load i16* %ptr, align 2
+  %tmp2 = load i16, i16* %ptr, align 2
   br label %bb2
 bb2:
 ; CHECK-NOT: uxth
diff --git a/llvm/test/CodeGen/ARM/extloadi1.ll b/llvm/test/CodeGen/ARM/extloadi1.ll
index 2504c6c61e3..a67859d60d1 100644
--- a/llvm/test/CodeGen/ARM/extloadi1.ll
+++ b/llvm/test/CodeGen/ARM/extloadi1.ll
@@ -4,7 +4,7 @@
 
 define void @__mf_sigusr1_respond() {
 entry:
-        %tmp8.b = load i1* @handler_installed.6144.b            ; <i1> [#uses=1]
+        %tmp8.b = load i1, i1* @handler_installed.6144.b            ; <i1> [#uses=1]
         br i1 false, label %cond_true7, label %cond_next
 
 cond_next:              ; preds = %entry
diff --git a/llvm/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll b/llvm/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll
index 010b77f8464..a52cd830195 100644
--- a/llvm/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll
+++ b/llvm/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll
@@ -17,7 +17,7 @@ entry:
   store i32* getelementptr inbounds ([2 x [2 x [2 x [2 x [2 x i32]]]]]* @arr, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1), i32** %addr, align 4
 ; ARM: add r0, r0, #124
 ; THUMB: adds r0, #124
-  %0 = load i32** %addr, align 4
+  %0 = load i32*, i32** %addr, align 4
   ret i32* %0
 }
 
@@ -30,7 +30,7 @@ entry:
 ; ARM: movw [[R:r[0-9]+]], #1148
 ; ARM: add r0, r{{[0-9]+}}, [[R]]
 ; THUMB: addw r0, r0, #1148
-  %0 = load i32** %addr, align 4
+  %0 = load i32*, i32** %addr, align 4
   ret i32* %0
 }
 
@@ -42,7 +42,7 @@ entry:
   store i32* getelementptr inbounds ([3 x [3 x %struct.A]]* @A, i32 0, i32 0, i32 1, i32 1, i32 0, i32 1), i32** %addr, align 4
 ; ARM: add r0, r0, #140
 ; THUMB: adds r0, #140
-  %0 = load i32** %addr, align 4
+  %0 = load i32*, i32** %addr, align 4
   ret i32* %0
 }
 
@@ -61,6 +61,6 @@ entry:
 ; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #4
 ; ARM: movw r{{[0-9]}}, #1284
 ; THUMB: addw r{{[0-9]}}, r{{[0-9]}}, #1284
-  %0 = load i32** %addr, align 4
+  %0 = load i32*, i32** %addr, align 4
   ret i32* %0
 }
diff --git a/llvm/test/CodeGen/ARM/fast-isel-align.ll b/llvm/test/CodeGen/ARM/fast-isel-align.ll
index 42685429994..3d98dcc1fb2 100644
--- a/llvm/test/CodeGen/ARM/fast-isel-align.ll
+++ b/llvm/test/CodeGen/ARM/fast-isel-align.ll
@@ -34,7 +34,7 @@ entry:
 ; THUMB: str r1, [r0]
 
   %add = fadd float %x, %y
-  %0 = load %struct.anon** @a, align 4
+  %0 = load %struct.anon*, %struct.anon** @a, align 4
   %x1 = getelementptr inbounds %struct.anon, %struct.anon* %0, i32 0, i32 0
   store float %add, float* %x1, align 1
   ret void
@@ -66,9 +66,9 @@ entry:
 ; THUMB: @unaligned_f32_load
   %0 = alloca %class.TAlignTest*, align 4
   store %class.TAlignTest* %this, %class.TAlignTest** %0, align 4
-  %1 = load %class.TAlignTest** %0
+  %1 = load %class.TAlignTest*, %class.TAlignTest** %0
   %2 = getelementptr inbounds %class.TAlignTest, %class.TAlignTest* %1, i32 0, i32 1
-  %3 = load float* %2, align 1
+  %3 = load float, float* %2, align 1
   %4 = fcmp une float %3, 0.000000e+00
 ; ARM: ldr r[[R:[0-9]+]], [r0, #2]
 ; ARM: vmov s0, r[[R]]
@@ -103,7 +103,7 @@ entry:
 ; THUMB-STRICT-ALIGN: ldrb
 ; THUMB-STRICT-ALIGN: ldrb
 
-  %0 = load i16* %x, align 1
+  %0 = load i16, i16* %x, align 1
   ret i16 %0
 }
 
@@ -139,6 +139,6 @@ entry:
 ; THUMB-STRICT-ALIGN: ldrb
 ; THUMB-STRICT-ALIGN: ldrb
 
-  %0 = load i32* %x, align 1
+  %0 = load i32, i32* %x, align 1
   ret i32 %0
 }
diff --git a/llvm/test/CodeGen/ARM/fast-isel-call.ll b/llvm/test/CodeGen/ARM/fast-isel-call.ll
index 0a6c86579e0..bd170f30d97 100644
--- a/llvm/test/CodeGen/ARM/fast-isel-call.ll
+++ b/llvm/test/CodeGen/ARM/fast-isel-call.ll
@@ -157,7 +157,7 @@ define void @foo3() uwtable {
 ; THUMB: blx     r1
   %fptr = alloca i32 (i32)*, align 8
   store i32 (i32)* @bar0, i32 (i32)** %fptr, align 8
-  %1 = load i32 (i32)** %fptr, align 8
+  %1 = load i32 (i32)*, i32 (i32)** %fptr, align 8
   %call = call i32 %1(i32 0)
   ret void
 }
diff --git a/llvm/test/CodeGen/ARM/fast-isel-fold.ll b/llvm/test/CodeGen/ARM/fast-isel-fold.ll
index 145cffca9d0..37e93c0a701 100644
--- a/llvm/test/CodeGen/ARM/fast-isel-fold.ll
+++ b/llvm/test/CodeGen/ARM/fast-isel-fold.ll
@@ -14,7 +14,7 @@ define void @t1() nounwind uwtable ssp {
 ; THUMB: ldrb
 ; THUMB-NOT: uxtb
 ; THUMB-NOT: and{{.*}}, #255
-  %1 = load i8* @a, align 1
+  %1 = load i8, i8* @a, align 1
   call void @foo1(i8 zeroext %1)
   ret void
 }
@@ -26,7 +26,7 @@ define void @t2() nounwind uwtable ssp {
 ; THUMB: t2
 ; THUMB: ldrh
 ; THUMB-NOT: uxth
-  %1 = load i16* @b, align 2
+  %1 = load i16, i16* @b, align 2
   call void @foo2(i16 zeroext %1)
   ret void
 }
@@ -43,7 +43,7 @@ define i32 @t3() nounwind uwtable ssp {
 ; THUMB: ldrb
 ; THUMB-NOT: uxtb
 ; THUMB-NOT: and{{.*}}, #255
-  %1 = load i8* @a, align 1
+  %1 = load i8, i8* @a, align 1
   %2 = zext i8 %1 to i32
   ret i32 %2
 }
@@ -55,7 +55,7 @@ define i32 @t4() nounwind uwtable ssp {
 ; THUMB: t4
 ; THUMB: ldrh
 ; THUMB-NOT: uxth
-  %1 = load i16* @b, align 2
+  %1 = load i16, i16* @b, align 2
   %2 = zext i16 %1 to i32
   ret i32 %2
 }
@@ -67,7 +67,7 @@ define i32 @t5() nounwind uwtable ssp {
 ; THUMB: t5
 ; THUMB: ldrsh
 ; THUMB-NOT: sxth
-  %1 = load i16* @b, align 2
+  %1 = load i16, i16* @b, align 2
   %2 = sext i16 %1 to i32
   ret i32 %2
 }
@@ -79,7 +79,7 @@ define i32 @t6() nounwind uwtable ssp {
 ; THUMB: t6
 ; THUMB: ldrsb
 ; THUMB-NOT: sxtb
-  %1 = load i8* @a, align 2
+  %1 = load i8, i8* @a, align 2
   %2 = sext i8 %1 to i32
   ret i32 %2
 }
diff --git a/llvm/test/CodeGen/ARM/fast-isel-ldr-str-arm.ll b/llvm/test/CodeGen/ARM/fast-isel-ldr-str-arm.ll
index fcc685d20dd..cce914b094f 100644
--- a/llvm/test/CodeGen/ARM/fast-isel-ldr-str-arm.ll
+++ b/llvm/test/CodeGen/ARM/fast-isel-ldr-str-arm.ll
@@ -4,7 +4,7 @@ define i32 @t1(i32* nocapture %ptr) nounwind readonly {
 entry:
 ; ARM: t1
   %add.ptr = getelementptr inbounds i32, i32* %ptr, i32 1
-  %0 = load i32* %add.ptr, align 4
+  %0 = load i32, i32* %add.ptr, align 4
 ; ARM: ldr r{{[0-9]}}, [r0, #4]
   ret i32 %0
 }
@@ -13,7 +13,7 @@ define i32 @t2(i32* nocapture %ptr) nounwind readonly {
 entry:
 ; ARM: t2
   %add.ptr = getelementptr inbounds i32, i32* %ptr, i32 63
-  %0 = load i32* %add.ptr, align 4
+  %0 = load i32, i32* %add.ptr, align 4
 ; ARM: ldr.w r{{[0-9]}}, [r0, #252]
   ret i32 %0
 }
@@ -22,7 +22,7 @@ define zeroext i16 @t3(i16* nocapture %ptr) nounwind readonly {
 entry:
 ; ARM: t3
   %add.ptr = getelementptr inbounds i16, i16* %ptr, i16 1
-  %0 = load i16* %add.ptr, align 4
+  %0 = load i16, i16* %add.ptr, align 4
 ; ARM: ldrh r{{[0-9]}}, [r0, #2]
   ret i16 %0
 }
@@ -31,7 +31,7 @@ define zeroext i16 @t4(i16* nocapture %ptr) nounwind readonly {
 entry:
 ; ARM: t4
   %add.ptr = getelementptr inbounds i16, i16* %ptr, i16 63
-  %0 = load i16* %add.ptr, align 4
+  %0 = load i16, i16* %add.ptr, align 4
 ; ARM: ldrh.w r{{[0-9]}}, [r0, #126]
   ret i16 %0
 }
@@ -40,7 +40,7 @@ define zeroext i8 @t5(i8* nocapture %ptr) nounwind readonly {
 entry:
 ; ARM: t5
   %add.ptr = getelementptr inbounds i8, i8* %ptr, i8 1
-  %0 = load i8* %add.ptr, align 4
+  %0 = load i8, i8* %add.ptr, align 4
 ; ARM: ldrb r{{[0-9]}}, [r0, #1]
   ret i8 %0
 }
@@ -49,7 +49,7 @@ define zeroext i8 @t6(i8* nocapture %ptr) nounwind readonly {
 entry:
 ; ARM: t6
   %add.ptr = getelementptr inbounds i8, i8* %ptr, i8 63
-  %0 = load i8* %add.ptr, align 4
+  %0 = load i8, i8* %add.ptr, align 4
 ; ARM: ldrb.w r{{[0-9]}}, [r0, #63]
   ret i8 %0
 }
diff --git a/llvm/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll b/llvm/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll
index e433ee76c8d..f24100b36db 100644
--- a/llvm/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll
+++ b/llvm/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll
@@ -4,7 +4,7 @@ define i32 @t1(i32* nocapture %ptr) nounwind readonly {
 entry:
 ; THUMB: t1
   %add.ptr = getelementptr inbounds i32, i32* %ptr, i32 -1
-  %0 = load i32* %add.ptr, align 4
+  %0 = load i32, i32* %add.ptr, align 4
 ; THUMB: ldr r{{[0-9]}}, [r0, #-4]
   ret i32 %0
 }
@@ -13,7 +13,7 @@ define i32 @t2(i32* nocapture %ptr) nounwind readonly {
 entry:
 ; THUMB: t2
   %add.ptr = getelementptr inbounds i32, i32* %ptr, i32 -63
-  %0 = load i32* %add.ptr, align 4
+  %0 = load i32, i32* %add.ptr, align 4
 ; THUMB: ldr r{{[0-9]}}, [r0, #-252]
   ret i32 %0
 }
@@ -22,7 +22,7 @@ define i32 @t3(i32* nocapture %ptr) nounwind readonly {
 entry:
 ; THUMB: t3
   %add.ptr = getelementptr inbounds i32, i32* %ptr, i32 -64
-  %0 = load i32* %add.ptr, align 4
+  %0 = load i32, i32* %add.ptr, align 4
 ; THUMB: ldr r{{[0-9]}}, [r0]
   ret i32 %0
 }
@@ -31,7 +31,7 @@ define zeroext i16 @t4(i16* nocapture %ptr) nounwind readonly {
 entry:
 ; THUMB: t4
   %add.ptr = getelementptr inbounds i16, i16* %ptr, i32 -1
-  %0 = load i16* %add.ptr, align 2
+  %0 = load i16, i16* %add.ptr, align 2
 ; THUMB: ldrh r{{[0-9]}}, [r0, #-2]
   ret i16 %0
 }
@@ -40,7 +40,7 @@ define zeroext i16 @t5(i16* nocapture %ptr) nounwind readonly {
 entry:
 ; THUMB: t5
   %add.ptr = getelementptr inbounds i16, i16* %ptr, i32 -127
-  %0 = load i16* %add.ptr, align 2
+  %0 = load i16, i16* %add.ptr, align 2
 ; THUMB: ldrh r{{[0-9]}}, [r0, #-254]
   ret i16 %0
 }
@@ -49,7 +49,7 @@ define zeroext i16 @t6(i16* nocapture %ptr) nounwind readonly {
 entry:
 ; THUMB: t6
   %add.ptr = getelementptr inbounds i16, i16* %ptr, i32 -128
-  %0 = load i16* %add.ptr, align 2
+  %0 = load i16, i16* %add.ptr, align 2
 ; THUMB: ldrh r{{[0-9]}}, [r0]
   ret i16 %0
 }
@@ -58,7 +58,7 @@ define zeroext i8 @t7(i8* nocapture %ptr) nounwind readonly {
 entry:
 ; THUMB: t7
   %add.ptr = getelementptr inbounds i8, i8* %ptr, i32 -1
-  %0 = load i8* %add.ptr, align 1
+  %0 = load i8, i8* %add.ptr, align 1
 ; THUMB: ldrb r{{[0-9]}}, [r0, #-1]
   ret i8 %0
 }
@@ -67,7 +67,7 @@ define zeroext i8 @t8(i8* nocapture %ptr) nounwind readonly {
 entry:
 ; THUMB: t8
   %add.ptr = getelementptr inbounds i8, i8* %ptr, i32 -255
-  %0 = load i8* %add.ptr, align 1
+  %0 = load i8, i8* %add.ptr, align 1
 ; THUMB: ldrb r{{[0-9]}}, [r0, #-255]
   ret i8 %0
 }
@@ -76,7 +76,7 @@ define zeroext i8 @t9(i8* nocapture %ptr) nounwind readonly {
 entry:
 ; THUMB: t9
   %add.ptr = getelementptr inbounds i8, i8* %ptr, i32 -256
-  %0 = load i8* %add.ptr, align 1
+  %0 = load i8, i8* %add.ptr, align 1
 ; THUMB: ldrb r{{[0-9]}}, [r0]
   ret i8 %0
 }
diff --git a/llvm/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll b/llvm/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll
index 572233ea608..ca512970c9c 100644
--- a/llvm/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll
+++ b/llvm/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll
@@ -6,7 +6,7 @@ define zeroext i16 @t1(i16* nocapture %a) nounwind uwtable readonly ssp {
 entry:
 ; ARM: t1
   %add.ptr = getelementptr inbounds i16, i16* %a, i64 -8
-  %0 = load i16* %add.ptr, align 2
+  %0 = load i16, i16* %add.ptr, align 2
 ; ARM: ldrh r0, [r0, #-16]
   ret i16 %0
 }
@@ -15,7 +15,7 @@ define zeroext i16 @t2(i16* nocapture %a) nounwind uwtable readonly ssp {
 entry:
 ; ARM: t2
   %add.ptr = getelementptr inbounds i16, i16* %a, i64 -16
-  %0 = load i16* %add.ptr, align 2
+  %0 = load i16, i16* %add.ptr, align 2
 ; ARM: ldrh r0, [r0, #-32]
   ret i16 %0
 }
@@ -24,7 +24,7 @@ define zeroext i16 @t3(i16* nocapture %a) nounwind uwtable readonly ssp {
 entry:
 ; ARM: t3
   %add.ptr = getelementptr inbounds i16, i16* %a, i64 -127
-  %0 = load i16* %add.ptr, align 2
+  %0 = load i16, i16* %add.ptr, align 2
 ; ARM: ldrh r0, [r0, #-254]
   ret i16 %0
 }
@@ -33,7 +33,7 @@ define zeroext i16 @t4(i16* nocapture %a) nounwind uwtable readonly ssp {
 entry:
 ; ARM: t4
   %add.ptr = getelementptr inbounds i16, i16* %a, i64 -128
-  %0 = load i16* %add.ptr, align 2
+  %0 = load i16, i16* %add.ptr, align 2
 ; ARM: mvn r{{[1-9]}}, #255
 ; ARM: add r0, r0, r{{[1-9]}}
 ; ARM: ldrh r0, [r0]
@@ -44,7 +44,7 @@ define zeroext i16 @t5(i16* nocapture %a) nounwind uwtable readonly ssp {
 entry:
 ; ARM: t5
   %add.ptr = getelementptr inbounds i16, i16* %a, i64 8
-  %0 = load i16* %add.ptr, align 2
+  %0 = load i16, i16* %add.ptr, align 2
 ; ARM: ldrh r0, [r0, #16]
   ret i16 %0
 }
@@ -53,7 +53,7 @@ define zeroext i16 @t6(i16* nocapture %a) nounwind uwtable readonly ssp {
 entry:
 ; ARM: t6
   %add.ptr = getelementptr inbounds i16, i16* %a, i64 16
-  %0 = load i16* %add.ptr, align 2
+  %0 = load i16, i16* %add.ptr, align 2
 ; ARM: ldrh r0, [r0, #32]
   ret i16 %0
 }
@@ -62,7 +62,7 @@ define zeroext i16 @t7(i16* nocapture %a) nounwind uwtable readonly ssp {
 entry:
 ; ARM: t7
   %add.ptr = getelementptr inbounds i16, i16* %a, i64 127
-  %0 = load i16* %add.ptr, align 2
+  %0 = load i16, i16* %add.ptr, align 2
 ; ARM: ldrh r0, [r0, #254]
   ret i16 %0
 }
@@ -71,7 +71,7 @@ define zeroext i16 @t8(i16* nocapture %a) nounwind uwtable readonly ssp {
 entry:
 ; ARM: t8
   %add.ptr = getelementptr inbounds i16, i16* %a, i64 128
-  %0 = load i16* %add.ptr, align 2
+  %0 = load i16, i16* %add.ptr, align 2
 ; ARM: add r0, r0, #256
 ; ARM: ldrh r0, [r0]
   ret i16 %0
@@ -124,7 +124,7 @@ define signext i8 @t13(i8* nocapture %a) nounwind uwtable readonly ssp {
 entry:
 ; ARM: t13
   %add.ptr = getelementptr inbounds i8, i8* %a, i64 -8
-  %0 = load i8* %add.ptr, align 2
+  %0 = load i8, i8* %add.ptr, align 2
 ; ARM: ldrsb r0, [r0, #-8]
   ret i8 %0
 }
@@ -133,7 +133,7 @@ define signext i8 @t14(i8* nocapture %a) nounwind uwtable readonly ssp {
 entry:
 ; ARM: t14
   %add.ptr = getelementptr inbounds i8, i8* %a, i64 -255
-  %0 = load i8* %add.ptr, align 2
+  %0 = load i8, i8* %add.ptr, align 2
 ; ARM: ldrsb r0, [r0, #-255]
   ret i8 %0
 }
@@ -142,7 +142,7 @@ define signext i8 @t15(i8* nocapture %a) nounwind uwtable readonly ssp {
 entry:
 ; ARM: t15
   %add.ptr = getelementptr inbounds i8, i8* %a, i64 -256
-  %0 = load i8* %add.ptr, align 2
+  %0 = load i8, i8* %add.ptr, align 2
 ; ARM: mvn r{{[1-9]}}, #255
 ; ARM: add r0, r0, r{{[1-9]}}
 ; ARM: ldrsb r0, [r0]
diff --git a/llvm/test/CodeGen/ARM/fast-isel-load-store-verify.ll b/llvm/test/CodeGen/ARM/fast-isel-load-store-verify.ll
index 770b9b3ba34..acf10c8b719 100644
--- a/llvm/test/CodeGen/ARM/fast-isel-load-store-verify.ll
+++ b/llvm/test/CodeGen/ARM/fast-isel-load-store-verify.ll
@@ -17,7 +17,7 @@ define i8 @t1() nounwind uwtable ssp {
 ; ALL: @t1
 ; ALL: ldrb
 ; ALL: add
-  %1 = load i8* @a, align 1
+  %1 = load i8, i8* @a, align 1
   %2 = add nsw i8 %1, 1
   ret i8 %2
 }
@@ -26,7 +26,7 @@ define i16 @t2() nounwind uwtable ssp {
 ; ALL: @t2
 ; ALL: ldrh
 ; ALL: add
-  %1 = load i16* @b, align 2
+  %1 = load i16, i16* @b, align 2
   %2 = add nsw i16 %1, 1
   ret i16 %2
 }
@@ -35,7 +35,7 @@ define i32 @t3() nounwind uwtable ssp {
 ; ALL: @t3
 ; ALL: ldr
 ; ALL: add
-  %1 = load i32* @c, align 4
+  %1 = load i32, i32* @c, align 4
   %2 = add nsw i32 %1, 1
   ret i32 %2
 }
diff --git a/llvm/test/CodeGen/ARM/fast-isel-pic.ll b/llvm/test/CodeGen/ARM/fast-isel-pic.ll
index fdbdf034c0c..70e15daaca6 100644
--- a/llvm/test/CodeGen/ARM/fast-isel-pic.ll
+++ b/llvm/test/CodeGen/ARM/fast-isel-pic.ll
@@ -29,7 +29,7 @@ entry:
 ; ARMv7-ELF-NEXT: add r[[reg2]], pc
 ; ARMv7-ELF: ldr r[[reg3:[0-9]+]],
 ; ARMv7-ELF: ldr r[[reg2]], [r[[reg3]], r[[reg2]]]
-  %tmp = load i32* @g
+  %tmp = load i32, i32* @g
   ret i32 %tmp
 }
 
@@ -60,6 +60,6 @@ entry:
 ; ARMv7-ELF-NEXT: add r[[reg5]], pc
 ; ARMv7-ELF: ldr r[[reg6:[0-9]+]],
 ; ARMv7-ELF: ldr r[[reg5]], [r[[reg6]], r[[reg5]]]
-  %tmp = load i32* @i
+  %tmp = load i32, i32* @i
   ret i32 %tmp
 }
diff --git a/llvm/test/CodeGen/ARM/fast-isel-pred.ll b/llvm/test/CodeGen/ARM/fast-isel-pred.ll
index bf1593beef3..ae8b67d7157 100644
--- a/llvm/test/CodeGen/ARM/fast-isel-pred.ll
+++ b/llvm/test/CodeGen/ARM/fast-isel-pred.ll
@@ -7,9 +7,9 @@ entry:
   %X = alloca <4 x i32>, align 16
   %Y = alloca <4 x float>, align 16
   store i32 0, i32* %retval
-  %tmp = load <4 x i32>* %X, align 16
+  %tmp = load <4 x i32>, <4 x i32>* %X, align 16
   call void @__aa(<4 x i32> %tmp, i8* null, i32 3, <4 x float>* %Y)
-  %0 = load i32* %retval
+  %0 = load i32, i32* %retval
   ret i32 %0
 }
 
@@ -24,15 +24,15 @@ entry:
   store i8* %p, i8** %p.addr, align 4
   store i32 %offset, i32* %offset.addr, align 4
   store <4 x float>* %constants, <4 x float>** %constants.addr, align 4
-  %tmp = load <4 x i32>* %v.addr, align 16
+  %tmp = load <4 x i32>, <4 x i32>* %v.addr, align 16
   store <4 x i32> %tmp, <4 x i32>* %__a.addr.i, align 16
-  %tmp.i = load <4 x i32>* %__a.addr.i, align 16
+  %tmp.i = load <4 x i32>, <4 x i32>* %__a.addr.i, align 16
   %0 = bitcast <4 x i32> %tmp.i to <16 x i8>
   %1 = bitcast <16 x i8> %0 to <4 x i32>
   %vcvt.i = sitofp <4 x i32> %1 to <4 x float>
-  %tmp1 = load i8** %p.addr, align 4
-  %tmp2 = load i32* %offset.addr, align 4
-  %tmp3 = load <4 x float>** %constants.addr, align 4
+  %tmp1 = load i8*, i8** %p.addr, align 4
+  %tmp2 = load i32, i32* %offset.addr, align 4
+  %tmp3 = load <4 x float>*, <4 x float>** %constants.addr, align 4
   call void @__bb(<4 x float> %vcvt.i, i8* %tmp1, i32 %tmp2, <4 x float>* %tmp3)
   ret void
 }
@@ -48,9 +48,9 @@ entry:
   store i8* %p, i8** %p.addr, align 4
   store i32 %offset, i32* %offset.addr, align 4
   store <4 x float>* %constants, <4 x float>** %constants.addr, align 4
-  %tmp = load i64* %data, align 4
-  %tmp1 = load i8** %p.addr, align 4
-  %tmp2 = load i32* %offset.addr, align 4
+  %tmp = load i64, i64* %data, align 4
+  %tmp1 = load i8*, i8** %p.addr, align 4
+  %tmp2 = load i32, i32* %offset.addr, align 4
   %add.ptr = getelementptr i8, i8* %tmp1, i32 %tmp2
   %0 = bitcast i8* %add.ptr to i64*
   %arrayidx = getelementptr inbounds i64, i64* %0, i32 0
diff --git a/llvm/test/CodeGen/ARM/fast-isel-redefinition.ll b/llvm/test/CodeGen/ARM/fast-isel-redefinition.ll
index 7e8ed9af591..a1c8657cb81 100644
--- a/llvm/test/CodeGen/ARM/fast-isel-redefinition.ll
+++ b/llvm/test/CodeGen/ARM/fast-isel-redefinition.ll
@@ -6,6 +6,6 @@ target triple = "thumbv7-apple-macosx10.6.7"
 
 define i32 @f(i32* %x) nounwind ssp {
   %y = getelementptr inbounds i32, i32* %x, i32 5000
-  %tmp103 = load i32* %y, align 4
+  %tmp103 = load i32, i32* %y, align 4
   ret i32 %tmp103
 }
diff --git a/llvm/test/CodeGen/ARM/fast-isel-static.ll b/llvm/test/CodeGen/ARM/fast-isel-static.ll
index 3a11d692a1d..c3980cb51f6 100644
--- a/llvm/test/CodeGen/ARM/fast-isel-static.ll
+++ b/llvm/test/CodeGen/ARM/fast-isel-static.ll
@@ -9,12 +9,12 @@ entry:
   %addend.addr = alloca float*, align 4
   store float* %sum, float** %sum.addr, align 4
   store float* %addend, float** %addend.addr, align 4
-  %tmp = load float** %sum.addr, align 4
-  %tmp1 = load float* %tmp
-  %tmp2 = load float** %addend.addr, align 4
-  %tmp3 = load float* %tmp2
+  %tmp = load float*, float** %sum.addr, align 4
+  %tmp1 = load float, float* %tmp
+  %tmp2 = load float*, float** %addend.addr, align 4
+  %tmp3 = load float, float* %tmp2
   %add = fadd float %tmp1, %tmp3
-  %tmp4 = load float** %sum.addr, align 4
+  %tmp4 = load float*, float** %sum.addr, align 4
   store float %add, float* %tmp4
   ret void
 }
diff --git a/llvm/test/CodeGen/ARM/fast-isel-vararg.ll b/llvm/test/CodeGen/ARM/fast-isel-vararg.ll
index e8c40017675..aa37e7d2271 100644
--- a/llvm/test/CodeGen/ARM/fast-isel-vararg.ll
+++ b/llvm/test/CodeGen/ARM/fast-isel-vararg.ll
@@ -10,11 +10,11 @@ entry:
   %m = alloca i32, align 4
   %n = alloca i32, align 4
   %tmp = alloca i32, align 4
-  %0 = load i32* %i, align 4
-  %1 = load i32* %j, align 4
-  %2 = load i32* %k, align 4
-  %3 = load i32* %m, align 4
-  %4 = load i32* %n, align 4
+  %0 = load i32, i32* %i, align 4
+  %1 = load i32, i32* %j, align 4
+  %2 = load i32, i32* %k, align 4
+  %3 = load i32, i32* %m, align 4
+  %4 = load i32, i32* %n, align 4
 ; ARM: VarArg
 ; ARM: mov [[FP:r[0-9]+]], sp
 ; ARM: sub sp, sp, #32
@@ -39,7 +39,7 @@ entry:
 ; THUMB: bl {{_?}}CallVariadic
   %call = call i32 (i32, ...)* @CallVariadic(i32 5, i32 %0, i32 %1, i32 %2, i32 %3, i32 %4)
   store i32 %call, i32* %tmp, align 4
-  %5 = load i32* %tmp, align 4
+  %5 = load i32, i32* %tmp, align 4
   ret i32 %5
 }
 
diff --git a/llvm/test/CodeGen/ARM/fast-isel.ll b/llvm/test/CodeGen/ARM/fast-isel.ll
index c8d9e3b362b..49460220c47 100644
--- a/llvm/test/CodeGen/ARM/fast-isel.ll
+++ b/llvm/test/CodeGen/ARM/fast-isel.ll
@@ -9,8 +9,8 @@ entry:
   %b.addr = alloca i32, align 4
   store i32 %a, i32* %a.addr
   store i32 %b, i32* %b.addr
-  %tmp = load i32* %a.addr
-  %tmp1 = load i32* %b.addr
+  %tmp = load i32, i32* %a.addr
+  %tmp1 = load i32, i32* %b.addr
   %add = add nsw i32 %tmp, %tmp1
   ret i32 %add
 }
@@ -110,9 +110,9 @@ bb2:
 ; ARM: sxth
 
 bb3:
-  %c1 = load i8* %ptr3
-  %c2 = load i16* %ptr2
-  %c3 = load i32* %ptr1
+  %c1 = load i8, i8* %ptr3
+  %c2 = load i16, i16* %ptr2
+  %c3 = load i32, i32* %ptr1
   %c4 = zext i8 %c1 to i32
   %c5 = sext i16 %c2 to i32
   %c6 = add i32 %c4, %c5
@@ -138,7 +138,7 @@ bb3:
 @test4g = external global i32
 
 define void @test4() {
-  %a = load i32* @test4g
+  %a = load i32, i32* @test4g
   %b = add i32 %a, 1
   store i32 %b, i32* @test4g
   ret void
diff --git a/llvm/test/CodeGen/ARM/fastisel-gep-promote-before-add.ll b/llvm/test/CodeGen/ARM/fastisel-gep-promote-before-add.ll
index dbe431c8dc5..232ab50c3ee 100644
--- a/llvm/test/CodeGen/ARM/fastisel-gep-promote-before-add.ll
+++ b/llvm/test/CodeGen/ARM/fastisel-gep-promote-before-add.ll
@@ -6,13 +6,13 @@ define zeroext i8 @gep_promotion(i8* %ptr) nounwind uwtable ssp {
 entry:
   %ptr.addr = alloca i8*, align 8
   %add = add i8 64, 64 ; 0x40 + 0x40
-  %0 = load i8** %ptr.addr, align 8
+  %0 = load i8*, i8** %ptr.addr, align 8
 
   ; CHECK-LABEL: _gep_promotion:
   ; CHECK: ldrb {{r[0-9]+}}, {{\[r[0-9]+\]}}
   %arrayidx = getelementptr inbounds i8, i8* %0, i8 %add
 
-  %1 = load i8* %arrayidx, align 1
+  %1 = load i8, i8* %arrayidx, align 1
   ret i8 %1
 }
 
diff --git a/llvm/test/CodeGen/ARM/flag-crash.ll b/llvm/test/CodeGen/ARM/flag-crash.ll
index 9952f566007..66eb8a51c18 100644
--- a/llvm/test/CodeGen/ARM/flag-crash.ll
+++ b/llvm/test/CodeGen/ARM/flag-crash.ll
@@ -6,12 +6,12 @@
 define fastcc void @func(%struct.gs_matrix* nocapture %pm1) nounwind {
 entry:
   %0 = getelementptr inbounds %struct.gs_matrix, %struct.gs_matrix* %pm1, i32 0, i32 6
-  %1 = load float* %0, align 4
+  %1 = load float, float* %0, align 4
   %2 = getelementptr inbounds %struct.gs_matrix, %struct.gs_matrix* %pm1, i32 0, i32 8
-  %3 = load float* %2, align 4
+  %3 = load float, float* %2, align 4
   %4 = getelementptr inbounds %struct.gs_matrix, %struct.gs_matrix* %pm1, i32 0, i32 2
   %5 = bitcast float* %4 to i32*
-  %6 = load i32* %5, align 4
+  %6 = load i32, i32* %5, align 4
   %7 = or i32 0, %6
   %.mask = and i32 %7, 2147483647
   %8 = icmp eq i32 %.mask, 0
diff --git a/llvm/test/CodeGen/ARM/fnegs.ll b/llvm/test/CodeGen/ARM/fnegs.ll
index 65fe9e36fa1..3a4767e9173 100644
--- a/llvm/test/CodeGen/ARM/fnegs.ll
+++ b/llvm/test/CodeGen/ARM/fnegs.ll
@@ -21,7 +21,7 @@
 
 define float @test1(float* %a) {
 entry:
-	%0 = load float* %a, align 4		; <float> [#uses=2]
+	%0 = load float, float* %a, align 4		; <float> [#uses=2]
 	%1 = fsub float -0.000000e+00, %0		; <float> [#uses=2]
 	%2 = fpext float %1 to double		; <double> [#uses=1]
 	%3 = fcmp olt double %2, 1.234000e+00		; <i1> [#uses=1]
@@ -48,7 +48,7 @@ entry:
 
 define float @test2(float* %a) {
 entry:
-	%0 = load float* %a, align 4		; <float> [#uses=2]
+	%0 = load float, float* %a, align 4		; <float> [#uses=2]
 	%1 = fmul float -1.000000e+00, %0		; <float> [#uses=2]
 	%2 = fpext float %1 to double		; <double> [#uses=1]
 	%3 = fcmp olt double %2, 1.234000e+00		; <i1> [#uses=1]
diff --git a/llvm/test/CodeGen/ARM/fold-stack-adjust.ll b/llvm/test/CodeGen/ARM/fold-stack-adjust.ll
index c5ff82eaf83..aff79a1c113 100644
--- a/llvm/test/CodeGen/ARM/fold-stack-adjust.ll
+++ b/llvm/test/CodeGen/ARM/fold-stack-adjust.ll
@@ -82,7 +82,7 @@ define void @check_vfp_fold() minsize {
 
   %var = alloca i8, i32 16
 
-  %tmp = load %bigVec* @var
+  %tmp = load %bigVec, %bigVec* @var
   call void @bar(i8* %var)
   store %bigVec %tmp, %bigVec* @var
 
@@ -119,7 +119,7 @@ define arm_aapcs_vfpcc double @check_vfp_no_return_clobber() minsize {
 
   %var = alloca i8, i32 64
 
-  %tmp = load %bigVec* @var
+  %tmp = load %bigVec, %bigVec* @var
   call void @bar(i8* %var)
   store %bigVec %tmp, %bigVec* @var
 
@@ -152,7 +152,7 @@ define void @test_fold_point(i1 %tst) minsize {
 
   ; We want a long-lived floating register so that a callee-saved dN is used and
   ; there's both a vpop and a pop.
-  %live_val = load double* @dbl
+  %live_val = load double, double* @dbl
   br i1 %tst, label %true, label %end
 true:
   call void @bar(i8* %var)
diff --git a/llvm/test/CodeGen/ARM/fp.ll b/llvm/test/CodeGen/ARM/fp.ll
index 7e1f000e88d..cc47e3badda 100644
--- a/llvm/test/CodeGen/ARM/fp.ll
+++ b/llvm/test/CodeGen/ARM/fp.ll
@@ -45,7 +45,7 @@ define double @h(double* %v) {
 ;CHECK: vldr
 ;CHECK-NEXT: vmov
 entry:
-        %tmp = load double* %v          ; <double> [#uses=1]
+        %tmp = load double, double* %v          ; <double> [#uses=1]
         ret double %tmp
 }
 
diff --git a/llvm/test/CodeGen/ARM/fp16.ll b/llvm/test/CodeGen/ARM/fp16.ll
index 5a926acc543..25fbf9070cb 100644
--- a/llvm/test/CodeGen/ARM/fp16.ll
+++ b/llvm/test/CodeGen/ARM/fp16.ll
@@ -16,8 +16,8 @@ define void @foo() nounwind {
 ; CHECK-ARMV8-LABEL: foo:
 ; CHECK-SOFTFLOAT-LABEL: foo:
 entry:
-  %0 = load i16* @x, align 2
-  %1 = load i16* @y, align 2
+  %0 = load i16, i16* @x, align 2
+  %1 = load i16, i16* @y, align 2
   %2 = tail call float @llvm.convert.from.fp16.f32(i16 %0)
 ; CHECK: __gnu_h2f_ieee
 ; CHECK-FP16: vcvtb.f32.f16
diff --git a/llvm/test/CodeGen/ARM/fpcmp-opt.ll b/llvm/test/CodeGen/ARM/fpcmp-opt.ll
index eab5988e3eb..45bb6d2f702 100644
--- a/llvm/test/CodeGen/ARM/fpcmp-opt.ll
+++ b/llvm/test/CodeGen/ARM/fpcmp-opt.ll
@@ -13,8 +13,8 @@ entry:
 ; CHECK: vcmpe.f32 [[S1]], [[S0]]
 ; CHECK: vmrs APSR_nzcv, fpscr
 ; CHECK: beq
-  %0 = load float* %a
-  %1 = load float* %b
+  %0 = load float, float* %a
+  %1 = load float, float* %b
   %2 = fcmp une float %0, %1
   br i1 %2, label %bb1, label %bb2
 
@@ -41,7 +41,7 @@ entry:
 ; CHECK-NOT: vcmpe.f32
 ; CHECK-NOT: vmrs
 ; CHECK: bne
-  %0 = load double* %a
+  %0 = load double, double* %a
   %1 = fcmp oeq double %0, 0.000000e+00
   br i1 %1, label %bb1, label %bb2
 
@@ -64,7 +64,7 @@ entry:
 ; CHECK-NOT: vcmpe.f32
 ; CHECK-NOT: vmrs
 ; CHECK: bne
-  %0 = load float* %a
+  %0 = load float, float* %a
   %1 = fcmp oeq float %0, 0.000000e+00
   br i1 %1, label %bb1, label %bb2
 
diff --git a/llvm/test/CodeGen/ARM/fpmem.ll b/llvm/test/CodeGen/ARM/fpmem.ll
index 99a5930b855..23fbea911e5 100644
--- a/llvm/test/CodeGen/ARM/fpmem.ll
+++ b/llvm/test/CodeGen/ARM/fpmem.ll
@@ -9,7 +9,7 @@ define float @f1(float %a) {
 define float @f2(float* %v, float %u) {
 ; CHECK-LABEL: f2:
 ; CHECK: vldr{{.*}}[
-        %tmp = load float* %v           ; <float> [#uses=1]
+        %tmp = load float, float* %v           ; <float> [#uses=1]
         %tmp1 = fadd float %tmp, %u              ; <float> [#uses=1]
         ret float %tmp1
 }
@@ -18,7 +18,7 @@ define float @f2offset(float* %v, float %u) {
 ; CHECK-LABEL: f2offset:
 ; CHECK: vldr{{.*}}, #4]
         %addr = getelementptr float, float* %v, i32 1
-        %tmp = load float* %addr
+        %tmp = load float, float* %addr
         %tmp1 = fadd float %tmp, %u
         ret float %tmp1
 }
@@ -27,7 +27,7 @@ define float @f2noffset(float* %v, float %u) {
 ; CHECK-LABEL: f2noffset:
 ; CHECK: vldr{{.*}}, #-4]
         %addr = getelementptr float, float* %v, i32 -1
-        %tmp = load float* %addr
+        %tmp = load float, float* %addr
         %tmp1 = fadd float %tmp, %u
         ret float %tmp1
 }
diff --git a/llvm/test/CodeGen/ARM/fptoint.ll b/llvm/test/CodeGen/ARM/fptoint.ll
index f50d0b96fe9..6cbb30b23fb 100644
--- a/llvm/test/CodeGen/ARM/fptoint.ll
+++ b/llvm/test/CodeGen/ARM/fptoint.ll
@@ -4,13 +4,13 @@
 @u = weak global i32 0		; <i32*> [#uses=2]
 
 define i32 @foo1(float *%x) {
-        %tmp1 = load float* %x
+        %tmp1 = load float, float* %x
 	%tmp2 = bitcast float %tmp1 to i32
 	ret i32 %tmp2
 }
 
 define i64 @foo2(double *%x) {
-        %tmp1 = load double* %x
+        %tmp1 = load double, double* %x
 	%tmp2 = bitcast double %tmp1 to i64
 	ret i64 %tmp2
 }
diff --git a/llvm/test/CodeGen/ARM/frame-register.ll b/llvm/test/CodeGen/ARM/frame-register.ll
index b04e376693d..0cc5005ec48 100644
--- a/llvm/test/CodeGen/ARM/frame-register.ll
+++ b/llvm/test/CodeGen/ARM/frame-register.ll
@@ -17,12 +17,12 @@ entry:
   %i.addr = alloca i32, align 4
   %j = alloca i32, align 4
   store i32 %i, i32* %i.addr, align 4
-  %0 = load i32* %i.addr, align 4
+  %0 = load i32, i32* %i.addr, align 4
   %add = add nsw i32 %0, 1
   store i32 %add, i32* %j, align 4
-  %1 = load i32* %j, align 4
+  %1 = load i32, i32* %j, align 4
   call void @callee(i32 %1)
-  %2 = load i32* %j, align 4
+  %2 = load i32, i32* %j, align 4
   %add1 = add nsw i32 %2, 1
   ret i32 %add1
 }
diff --git a/llvm/test/CodeGen/ARM/fusedMAC.ll b/llvm/test/CodeGen/ARM/fusedMAC.ll
index e29f291dc2c..6f6cdc11491 100644
--- a/llvm/test/CodeGen/ARM/fusedMAC.ll
+++ b/llvm/test/CodeGen/ARM/fusedMAC.ll
@@ -144,7 +144,7 @@ entry:
 define float @test_fnms_f32(float %a, float %b, float* %c) nounwind readnone ssp {
 ; CHECK: test_fnms_f32
 ; CHECK: vfnms.f32
-  %tmp1 = load float* %c, align 4
+  %tmp1 = load float, float* %c, align 4
   %tmp2 = fsub float -0.0, %tmp1
   %tmp3 = tail call float @llvm.fma.f32(float %a, float %b, float %tmp2) nounwind readnone
   ret float %tmp3 
diff --git a/llvm/test/CodeGen/ARM/ghc-tcreturn-lowered.ll b/llvm/test/CodeGen/ARM/ghc-tcreturn-lowered.ll
index 623b4220c21..9731b3d39b6 100644
--- a/llvm/test/CodeGen/ARM/ghc-tcreturn-lowered.ll
+++ b/llvm/test/CodeGen/ARM/ghc-tcreturn-lowered.ll
@@ -15,7 +15,7 @@ define ghccc void @test_direct_tail() {
 define ghccc void @test_indirect_tail() {
 ; CHECK-LABEL: test_indirect_tail:
 ; CHECK: bx {{r[0-9]+}}
-  %func = load void()** @ind_func
+  %func = load void()*, void()** @ind_func
   tail call ghccc void()* %func()
   ret void
 }
diff --git a/llvm/test/CodeGen/ARM/global-merge-1.ll b/llvm/test/CodeGen/ARM/global-merge-1.ll
index e5d4def938d..20b4ba5bceb 100644
--- a/llvm/test/CodeGen/ARM/global-merge-1.ll
+++ b/llvm/test/CodeGen/ARM/global-merge-1.ll
@@ -55,12 +55,12 @@ declare i32 @calc(...) #1
 
 ; Function Attrs: nounwind ssp
 define internal void @calculate() #0 {
-  %1 = load <4 x i32>* bitcast ([5 x i32]* @bar to <4 x i32>*), align 4
-  %2 = load <4 x i32>* bitcast ([5 x i32]* @baz to <4 x i32>*), align 4
+  %1 = load <4 x i32>, <4 x i32>* bitcast ([5 x i32]* @bar to <4 x i32>*), align 4
+  %2 = load <4 x i32>, <4 x i32>* bitcast ([5 x i32]* @baz to <4 x i32>*), align 4
   %3 = mul <4 x i32> %2, %1
   store <4 x i32> %3, <4 x i32>* bitcast ([5 x i32]* @foo to <4 x i32>*), align 4
-  %4 = load i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 4), align 4, !tbaa !1
-  %5 = load i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 4), align 4, !tbaa !1
+  %4 = load i32, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 4), align 4, !tbaa !1
+  %5 = load i32, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 4), align 4, !tbaa !1
   %6 = mul nsw i32 %5, %4
   store i32 %6, i32* getelementptr inbounds ([5 x i32]* @foo, i32 0, i32 4), align 4, !tbaa !1
   ret void
diff --git a/llvm/test/CodeGen/ARM/globals.ll b/llvm/test/CodeGen/ARM/globals.ll
index 2c599bf011a..bab96dadce5 100644
--- a/llvm/test/CodeGen/ARM/globals.ll
+++ b/llvm/test/CodeGen/ARM/globals.ll
@@ -6,7 +6,7 @@
 @G = external global i32
 
 define i32 @test1() {
-	%tmp = load i32* @G
+	%tmp = load i32, i32* @G
 	ret i32 %tmp
 }
 
diff --git a/llvm/test/CodeGen/ARM/gv-stubs-crash.ll b/llvm/test/CodeGen/ARM/gv-stubs-crash.ll
index c4c4180a620..6e82afeacf8 100644
--- a/llvm/test/CodeGen/ARM/gv-stubs-crash.ll
+++ b/llvm/test/CodeGen/ARM/gv-stubs-crash.ll
@@ -4,7 +4,7 @@
 @Exn = external hidden unnamed_addr constant { i8*, i8* }
 
 define hidden void @func(i32* %this, i32* %e) optsize align 2 {
-  %e.ld = load i32* %e, align 4
+  %e.ld = load i32, i32* %e, align 4
   %inv = invoke zeroext i1 @func2(i32* %this, i32 %e.ld) optsize
           to label %ret unwind label %lpad
 
diff --git a/llvm/test/CodeGen/ARM/half.ll b/llvm/test/CodeGen/ARM/half.ll
index 10cebb38c56..777aff2f007 100644
--- a/llvm/test/CodeGen/ARM/half.ll
+++ b/llvm/test/CodeGen/ARM/half.ll
@@ -6,7 +6,7 @@ define void @test_load_store(half* %in, half* %out) {
 ; CHECK-LABEL: test_load_store:
 ; CHECK: ldrh [[TMP:r[0-9]+]], [r0]
 ; CHECK: strh [[TMP]], [r1]
-  %val = load half* %in
+  %val = load half, half* %in
   store half %val, half* %out
   ret void
 }
@@ -14,7 +14,7 @@ define void @test_load_store(half* %in, half* %out) {
 define i16 @test_bitcast_from_half(half* %addr) {
 ; CHECK-LABEL: test_bitcast_from_half:
 ; CHECK: ldrh r0, [r0]
-  %val = load half* %addr
+  %val = load half, half* %addr
   %val_int = bitcast half %val to i16
   ret i16 %val_int
 }
@@ -33,7 +33,7 @@ define float @test_extend32(half* %addr) {
 ; CHECK-OLD: b.w ___gnu_h2f_ieee
 ; CHECK-F16: vcvtb.f32.f16
 ; CHECK-V8: vcvtb.f32.f16
-  %val16 = load half* %addr
+  %val16 = load half, half* %addr
   %val32 = fpext half %val16 to float
   ret float %val32
 }
@@ -46,7 +46,7 @@ define double @test_extend64(half* %addr) {
 ; CHECK-F16: vcvtb.f32.f16
 ; CHECK-F16: vcvt.f64.f32
 ; CHECK-V8: vcvtb.f64.f16
-  %val16 = load half* %addr
+  %val16 = load half, half* %addr
   %val32 = fpext half %val16 to double
   ret double %val32
 }
diff --git a/llvm/test/CodeGen/ARM/hidden-vis-2.ll b/llvm/test/CodeGen/ARM/hidden-vis-2.ll
index 18d38d40072..a104f354295 100644
--- a/llvm/test/CodeGen/ARM/hidden-vis-2.ll
+++ b/llvm/test/CodeGen/ARM/hidden-vis-2.ll
@@ -7,6 +7,6 @@ entry:
 ; CHECK-LABEL: t:
 ; CHECK: ldr
 ; CHECK-NEXT: ldr
-	%0 = load i32* @x, align 4		; <i32> [#uses=1]
+	%0 = load i32, i32* @x, align 4		; <i32> [#uses=1]
 	ret i32 %0
 }
diff --git a/llvm/test/CodeGen/ARM/hidden-vis-3.ll b/llvm/test/CodeGen/ARM/hidden-vis-3.ll
index 3bc3312e9c4..0cf2f779704 100644
--- a/llvm/test/CodeGen/ARM/hidden-vis-3.ll
+++ b/llvm/test/CodeGen/ARM/hidden-vis-3.ll
@@ -10,8 +10,8 @@ entry:
 ; CHECK: LCPI0_1:
 ; CHECK-NEXT: .long _y
 
-	%0 = load i32* @x, align 4		; <i32> [#uses=1]
-	%1 = load i32* @y, align 4		; <i32> [#uses=1]
+	%0 = load i32, i32* @x, align 4		; <i32> [#uses=1]
+	%1 = load i32, i32* @y, align 4		; <i32> [#uses=1]
 	%2 = add i32 %1, %0		; <i32> [#uses=1]
 	ret i32 %2
 }
diff --git a/llvm/test/CodeGen/ARM/ifconv-kills.ll b/llvm/test/CodeGen/ARM/ifconv-kills.ll
index c9f67896fd5..3a458e48193 100644
--- a/llvm/test/CodeGen/ARM/ifconv-kills.ll
+++ b/llvm/test/CodeGen/ARM/ifconv-kills.ll
@@ -10,7 +10,7 @@ entry:
 ; present something which can be easily if-converted
 if.then:
   ; %R0 should be killed here
-  %valt = load i32* %ptr, align 4
+  %valt = load i32, i32* %ptr, align 4
   br label %return
 
 if.else:
@@ -18,7 +18,7 @@ if.else:
   ; has to be removed because if.then will follow after this and still
   ; read it.
   %addr = getelementptr inbounds i32, i32* %ptr, i32 4
-  %vale = load i32* %addr, align 4
+  %vale = load i32, i32* %addr, align 4
   br label %return
 
 return:
diff --git a/llvm/test/CodeGen/ARM/ifconv-regmask.ll b/llvm/test/CodeGen/ARM/ifconv-regmask.ll
index d45f65f9567..11ad6f23544 100644
--- a/llvm/test/CodeGen/ARM/ifconv-regmask.ll
+++ b/llvm/test/CodeGen/ARM/ifconv-regmask.ll
@@ -7,7 +7,7 @@
 ; Function Attrs: nounwind ssp
 define i32 @sfu() {
 entry:
-  %bf.load = load i32* getelementptr inbounds (%union.opcode* @opcode, i32 0, i32 0), align 4
+  %bf.load = load i32, i32* getelementptr inbounds (%union.opcode* @opcode, i32 0, i32 0), align 4
   %bf.lshr = lshr i32 %bf.load, 26
   %bf.clear = and i32 %bf.lshr, 7
   switch i32 %bf.clear, label %return [
diff --git a/llvm/test/CodeGen/ARM/ifcvt-branch-weight.ll b/llvm/test/CodeGen/ARM/ifcvt-branch-weight.ll
index 9cc8738ea4b..41d78e53acc 100644
--- a/llvm/test/CodeGen/ARM/ifcvt-branch-weight.ll
+++ b/llvm/test/CodeGen/ARM/ifcvt-branch-weight.ll
@@ -4,7 +4,7 @@
 define internal zeroext i8 @bar(%struct.S* %x, %struct.S* nocapture %y) nounwind readonly {
 entry:
   %0 = getelementptr inbounds %struct.S, %struct.S* %x, i32 0, i32 1, i32 0
-  %1 = load i8* %0, align 1
+  %1 = load i8, i8* %0, align 1
   %2 = zext i8 %1 to i32
   %3 = and i32 %2, 112
   %4 = icmp eq i32 %3, 0
@@ -12,7 +12,7 @@ entry:
 
 bb:
   %5 = getelementptr inbounds %struct.S, %struct.S* %y, i32 0, i32 1, i32 0
-  %6 = load i8* %5, align 1
+  %6 = load i8, i8* %5, align 1
   %7 = zext i8 %6 to i32
   %8 = and i32 %7, 112
   %9 = icmp eq i32 %8, 0
diff --git a/llvm/test/CodeGen/ARM/ifcvt11.ll b/llvm/test/CodeGen/ARM/ifcvt11.ll
index a02dff0923a..eae41e21c61 100644
--- a/llvm/test/CodeGen/ARM/ifcvt11.ll
+++ b/llvm/test/CodeGen/ARM/ifcvt11.ll
@@ -23,8 +23,8 @@ bb:                                               ; preds = %bb4, %bb.nph
   %n.08 = phi i32 [ 0, %bb.nph ], [ %10, %bb4 ]
   %scevgep10 = getelementptr inbounds %struct.xyz_t, %struct.xyz_t* %p, i32 %n.08, i32 0
   %scevgep11 = getelementptr %struct.xyz_t, %struct.xyz_t* %p, i32 %n.08, i32 1
-  %3 = load double* %scevgep10, align 4
-  %4 = load double* %scevgep11, align 4
+  %3 = load double, double* %scevgep10, align 4
+  %4 = load double, double* %scevgep11, align 4
   %5 = fcmp uge double %3, %4
   br i1 %5, label %bb3, label %bb1
 
@@ -35,7 +35,7 @@ bb1:                                              ; preds = %bb
 ; CHECK: vcmpe.f64
 ; CHECK: vmrs APSR_nzcv, fpscr
   %scevgep12 = getelementptr %struct.xyz_t, %struct.xyz_t* %p, i32 %n.08, i32 2
-  %6 = load double* %scevgep12, align 4
+  %6 = load double, double* %scevgep12, align 4
   %7 = fcmp uge double %3, %6
   br i1 %7, label %bb3, label %bb2
 
diff --git a/llvm/test/CodeGen/ARM/ifcvt5.ll b/llvm/test/CodeGen/ARM/ifcvt5.ll
index 31e3e00c468..3aa2139cc03 100644
--- a/llvm/test/CodeGen/ARM/ifcvt5.ll
+++ b/llvm/test/CodeGen/ARM/ifcvt5.ll
@@ -6,7 +6,7 @@
 
 define void @foo(i32 %a) {
 entry:
-	%tmp = load i32** @x		; <i32*> [#uses=1]
+	%tmp = load i32*, i32** @x		; <i32*> [#uses=1]
 	store i32 %a, i32* %tmp
 	ret void
 }
diff --git a/llvm/test/CodeGen/ARM/ifcvt7.ll b/llvm/test/CodeGen/ARM/ifcvt7.ll
index 476ed4d47c6..e0d2b7cffb4 100644
--- a/llvm/test/CodeGen/ARM/ifcvt7.ll
+++ b/llvm/test/CodeGen/ARM/ifcvt7.ll
@@ -11,9 +11,9 @@ entry:
 	br label %tailrecurse
 
 tailrecurse:		; preds = %bb, %entry
-	%tmp6 = load %struct.quad_struct** null		; <%struct.quad_struct*> [#uses=1]
-	%tmp9 = load %struct.quad_struct** null		; <%struct.quad_struct*> [#uses=2]
-	%tmp12 = load %struct.quad_struct** null		; <%struct.quad_struct*> [#uses=1]
+	%tmp6 = load %struct.quad_struct*, %struct.quad_struct** null		; <%struct.quad_struct*> [#uses=1]
+	%tmp9 = load %struct.quad_struct*, %struct.quad_struct** null		; <%struct.quad_struct*> [#uses=2]
+	%tmp12 = load %struct.quad_struct*, %struct.quad_struct** null		; <%struct.quad_struct*> [#uses=1]
 	%tmp14 = icmp eq %struct.quad_struct* null, null		; <i1> [#uses=1]
 	%tmp17 = icmp eq %struct.quad_struct* %tmp6, null		; <i1> [#uses=1]
 	%tmp23 = icmp eq %struct.quad_struct* %tmp9, null		; <i1> [#uses=1]
diff --git a/llvm/test/CodeGen/ARM/illegal-vector-bitcast.ll b/llvm/test/CodeGen/ARM/illegal-vector-bitcast.ll
index 7208fffbcc8..766b3d7ca43 100644
--- a/llvm/test/CodeGen/ARM/illegal-vector-bitcast.ll
+++ b/llvm/test/CodeGen/ARM/illegal-vector-bitcast.ll
@@ -3,10 +3,10 @@
 
 define void @foo(<8 x float>* %f, <8 x float>* %g, <4 x i64>* %y)
 {
-  %h = load <8 x float>* %f
+  %h = load <8 x float>, <8 x float>* %f
   %i = fmul <8 x float> %h, <float 0x3FF19999A0000000, float 0x400A666660000000, float 0x40119999A0000000, float 0x40159999A0000000, float 0.5, float 0x3FE3333340000000, float 0x3FE6666660000000, float 0x3FE99999A0000000>
   %m = bitcast <8 x float> %i to <4 x i64>
-  %z = load <4 x i64>* %y
+  %z = load <4 x i64>, <4 x i64>* %y
   %n = mul <4 x i64> %z, %m
   %p = bitcast <4 x i64> %n to <8 x float>
   store <8 x float> %p, <8 x float>* %g
diff --git a/llvm/test/CodeGen/ARM/indirectbr-2.ll b/llvm/test/CodeGen/ARM/indirectbr-2.ll
index 3a5d2d824a7..318880a83d9 100644
--- a/llvm/test/CodeGen/ARM/indirectbr-2.ll
+++ b/llvm/test/CodeGen/ARM/indirectbr-2.ll
@@ -15,7 +15,7 @@
 
 define i32 @func() nounwind ssp {
   %1 = alloca i32, align 4
-  %2 = load i32* @foo, align 4
+  %2 = load i32, i32* @foo, align 4
   %3 = icmp eq i32 %2, 34879
   br label %4
 
@@ -24,7 +24,7 @@ define i32 @func() nounwind ssp {
   %6 = mul i32 %5, 287
   %7 = add i32 %6, 2
   %8 = getelementptr [2 x i32], [2 x i32]* @DWJumpTable2808, i32 0, i32 %5
-  %9 = load i32* %8
+  %9 = load i32, i32* %8
   %10 = add i32 %9, ptrtoint (i8* blockaddress(@func, %4) to i32)
   %11 = inttoptr i32 %10 to i8*
   %12 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([45 x i8]* @0, i32 0, i32 0))
@@ -33,7 +33,7 @@ define i32 @func() nounwind ssp {
 ; <label>:13                                      ; preds = %4
   %tmp14 = phi i32 [ %7, %4 ]
   store i32 23958, i32* @foo, align 4
-  %tmp15 = load i32* %1, align 4
+  %tmp15 = load i32, i32* %1, align 4
   %tmp16 = icmp eq i32 %tmp15, 0
   %tmp17 = zext i1 %tmp16 to i32
   %tmp21 = add i32 %tmp17, %tmp14
diff --git a/llvm/test/CodeGen/ARM/indirectbr.ll b/llvm/test/CodeGen/ARM/indirectbr.ll
index 2c63b6f987f..d15ef14b449 100644
--- a/llvm/test/CodeGen/ARM/indirectbr.ll
+++ b/llvm/test/CodeGen/ARM/indirectbr.ll
@@ -16,7 +16,7 @@ entry:
 ; THUMB: [[NEXTADDR_PCBASE:LPC0_[0-9]]]:
 ; THUMB: add r[[NEXTADDR_REG]], pc
 
-  %0 = load i8** @nextaddr, align 4               ; <i8*> [#uses=2]
+  %0 = load i8*, i8** @nextaddr, align 4               ; <i8*> [#uses=2]
   %1 = icmp eq i8* %0, null                       ; <i1> [#uses=1]
 ; indirect branch gets duplicated here
 ; ARM: bx
@@ -32,7 +32,7 @@ bb2:                                              ; preds = %entry, %bb3
 
 bb3:                                              ; preds = %entry
   %2 = getelementptr inbounds [5 x i8*], [5 x i8*]* @C.0.2070, i32 0, i32 %i ; <i8**> [#uses=1]
-  %gotovar.4.0.pre = load i8** %2, align 4        ; <i8*> [#uses=1]
+  %gotovar.4.0.pre = load i8*, i8** %2, align 4        ; <i8*> [#uses=1]
   br label %bb2
 
 L5:                                               ; preds = %bb2
diff --git a/llvm/test/CodeGen/ARM/inline-diagnostics.ll b/llvm/test/CodeGen/ARM/inline-diagnostics.ll
index dbb779e4203..3f5b73c5a21 100644
--- a/llvm/test/CodeGen/ARM/inline-diagnostics.ll
+++ b/llvm/test/CodeGen/ARM/inline-diagnostics.ll
@@ -9,7 +9,7 @@ define float @inline_func(float %f1, float %f2) #0 {
   %c3 = alloca %struct.float4, align 4
   call void asm sideeffect "vmul.f32 ${2:q}, ${0:q}, ${1:q}", "=*r,=*r,*w"(%struct.float4* %c1, %struct.float4* %c2, %struct.float4* %c3) #1, !srcloc !1
   %x = getelementptr inbounds %struct.float4, %struct.float4* %c3, i32 0, i32 0
-  %1 = load float* %x, align 4
+  %1 = load float, float* %x, align 4
   ret float %1
 }
 
diff --git a/llvm/test/CodeGen/ARM/interrupt-attr.ll b/llvm/test/CodeGen/ARM/interrupt-attr.ll
index c6da09d156b..95ada085b0d 100644
--- a/llvm/test/CodeGen/ARM/interrupt-attr.ll
+++ b/llvm/test/CodeGen/ARM/interrupt-attr.ll
@@ -65,7 +65,7 @@ define arm_aapcscc void @fiq_fn() alignstack(8) "interrupt"="FIQ" {
 
 ; CHECK-A-THUMB-LABEL: fiq_fn:
 ; CHECK-M-LABEL: fiq_fn:
-  %val = load volatile [16 x i32]* @bigvar
+  %val = load volatile [16 x i32], [16 x i32]* @bigvar
   store volatile [16 x i32] %val, [16 x i32]* @bigvar
   ret void
 }
@@ -81,7 +81,7 @@ define arm_aapcscc void @swi_fn() alignstack(8) "interrupt"="SWI" {
 ; CHECK-A: pop {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, lr}
 ; CHECK-A: subs pc, lr, #0
 
-  %val = load volatile [16 x i32]* @bigvar
+  %val = load volatile [16 x i32], [16 x i32]* @bigvar
   store volatile [16 x i32] %val, [16 x i32]* @bigvar
   ret void
 }
@@ -126,8 +126,8 @@ define arm_aapcscc void @floating_fn() alignstack(8) "interrupt"="IRQ" {
 ; CHECK-A-NOT: vstr
 ; CHECK-A-NOT: vstm
 ; CHECK-A: vadd.f64 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-  %lhs = load volatile double* @var
-  %rhs = load volatile double* @var
+  %lhs = load volatile double, double* @var
+  %rhs = load volatile double, double* @var
   %sum = fadd double %lhs, %rhs
   store double %sum, double* @var
   ret void
diff --git a/llvm/test/CodeGen/ARM/intrinsics-crypto.ll b/llvm/test/CodeGen/ARM/intrinsics-crypto.ll
index 96413d341e4..6e5efd85a34 100644
--- a/llvm/test/CodeGen/ARM/intrinsics-crypto.ll
+++ b/llvm/test/CodeGen/ARM/intrinsics-crypto.ll
@@ -1,8 +1,8 @@
 ; RUN: llc < %s -mtriple=armv8 -mattr=+crypto | FileCheck %s
 
 define arm_aapcs_vfpcc <16 x i8> @test_aesde(<16 x i8>* %a, <16 x i8> *%b) {
-  %tmp = load <16 x i8>* %a
-  %tmp2 = load <16 x i8>* %b
+  %tmp = load <16 x i8>, <16 x i8>* %a
+  %tmp2 = load <16 x i8>, <16 x i8>* %b
   %tmp3 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %tmp, <16 x i8> %tmp2)
   ; CHECK: aesd.8 q{{[0-9]+}}, q{{[0-9]+}}
   %tmp4 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %tmp3, <16 x i8> %tmp2)
@@ -15,9 +15,9 @@ define arm_aapcs_vfpcc <16 x i8> @test_aesde(<16 x i8>* %a, <16 x i8> *%b) {
 }
 
 define arm_aapcs_vfpcc <4 x i32> @test_sha(<4 x i32> *%a, <4 x i32> *%b, <4 x i32> *%c) {
-  %tmp = load <4 x i32>* %a
-  %tmp2 = load <4 x i32>* %b
-  %tmp3 = load <4 x i32>* %c
+  %tmp = load <4 x i32>, <4 x i32>* %a
+  %tmp2 = load <4 x i32>, <4 x i32>* %b
+  %tmp3 = load <4 x i32>, <4 x i32>* %c
   %scalar = extractelement <4 x i32> %tmp, i32 0
   %resscalar = call i32 @llvm.arm.neon.sha1h(i32 %scalar)
   %res1 = insertelement <4 x i32> undef, i32 %resscalar, i32 0
diff --git a/llvm/test/CodeGen/ARM/invoke-donothing-assert.ll b/llvm/test/CodeGen/ARM/invoke-donothing-assert.ll
index 0b607f7edf3..aab3556c547 100644
--- a/llvm/test/CodeGen/ARM/invoke-donothing-assert.ll
+++ b/llvm/test/CodeGen/ARM/invoke-donothing-assert.ll
@@ -46,7 +46,7 @@ new.notnull.i.i:
   br label %cleanup
 
 cleanup:
-  %0 = load i32* %a, align 4
+  %0 = load i32, i32* %a, align 4
   %inc294 = add nsw i32 %0, 4
   store i32 %inc294, i32* %a, align 4
   br i1 false, label %_ZN3lol5ArrayIivvvvvvvED1Ev.exit, label %delete.notnull.i.i.i1409
diff --git a/llvm/test/CodeGen/ARM/isel-v8i32-crash.ll b/llvm/test/CodeGen/ARM/isel-v8i32-crash.ll
index 0116fe8de7c..f1745bd4727 100644
--- a/llvm/test/CodeGen/ARM/isel-v8i32-crash.ll
+++ b/llvm/test/CodeGen/ARM/isel-v8i32-crash.ll
@@ -15,7 +15,7 @@ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
 define void @func(i16* nocapture %pb, float* nocapture readonly %pf) #0 {
 entry:
   %0 = bitcast float* %pf to <8 x float>*
-  %1 = load <8 x float>* %0, align 4
+  %1 = load <8 x float>, <8 x float>* %0, align 4
   %2 = fmul <8 x float> %1, <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00>
   %3 = fptosi <8 x float> %2 to <8 x i16>
   %4 = bitcast i16* %pb to <8 x i16>*
diff --git a/llvm/test/CodeGen/ARM/krait-cpu-div-attribute.ll b/llvm/test/CodeGen/ARM/krait-cpu-div-attribute.ll
index df4bd57522d..222664a904c 100644
--- a/llvm/test/CodeGen/ARM/krait-cpu-div-attribute.ll
+++ b/llvm/test/CodeGen/ARM/krait-cpu-div-attribute.ll
@@ -28,8 +28,8 @@ entry:
   store i32 0, i32* %retval
   store volatile i32 100, i32* %b, align 4
   store volatile i32 32, i32* %c, align 4
-  %0 = load volatile i32* %b, align 4
-  %1 = load volatile i32* %c, align 4
+  %0 = load volatile i32, i32* %b, align 4
+  %1 = load volatile i32, i32* %c, align 4
   %div = sdiv i32 %0, %1
   store volatile i32 %div, i32* %a, align 4
   ret i32 0
diff --git a/llvm/test/CodeGen/ARM/large-stack.ll b/llvm/test/CodeGen/ARM/large-stack.ll
index 1a9a1fadeeb..e2d4de341ec 100644
--- a/llvm/test/CodeGen/ARM/large-stack.ll
+++ b/llvm/test/CodeGen/ARM/large-stack.ll
@@ -15,6 +15,6 @@ define i32 @test3() {
 	%tmp = alloca i32, align 4
 	%a = alloca [805306369 x i8], align 16
 	store i32 0, i32* %tmp
-	%tmp1 = load i32* %tmp
+	%tmp1 = load i32, i32* %tmp
         ret i32 %tmp1
 }
diff --git a/llvm/test/CodeGen/ARM/ldm.ll b/llvm/test/CodeGen/ARM/ldm.ll
index 3977da6da9c..b35631f3fae 100644
--- a/llvm/test/CodeGen/ARM/ldm.ll
+++ b/llvm/test/CodeGen/ARM/ldm.ll
@@ -8,8 +8,8 @@ define i32 @t1() {
 ; CHECK: pop
 ; V4T-LABEL: t1:
 ; V4T: pop
-        %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 0)            ; <i32> [#uses=1]
-        %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1)           ; <i32> [#uses=1]
+        %tmp = load i32, i32* getelementptr ([0 x i32]* @X, i32 0, i32 0)            ; <i32> [#uses=1]
+        %tmp3 = load i32, i32* getelementptr ([0 x i32]* @X, i32 0, i32 1)           ; <i32> [#uses=1]
         %tmp4 = tail call i32 @f1( i32 %tmp, i32 %tmp3 )                ; <i32> [#uses=1]
         ret i32 %tmp4
 }
@@ -19,9 +19,9 @@ define i32 @t2() {
 ; CHECK: pop
 ; V4T-LABEL: t2:
 ; V4T: pop
-        %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2)            ; <i32> [#uses=1]
-        %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3)           ; <i32> [#uses=1]
-        %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 4)           ; <i32> [#uses=1]
+        %tmp = load i32, i32* getelementptr ([0 x i32]* @X, i32 0, i32 2)            ; <i32> [#uses=1]
+        %tmp3 = load i32, i32* getelementptr ([0 x i32]* @X, i32 0, i32 3)           ; <i32> [#uses=1]
+        %tmp5 = load i32, i32* getelementptr ([0 x i32]* @X, i32 0, i32 4)           ; <i32> [#uses=1]
         %tmp6 = tail call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 )             ; <i32> [#uses=1]
         ret i32 %tmp6
 }
@@ -34,9 +34,9 @@ define i32 @t3() {
 ; V4T: ldmib
 ; V4T: pop
 ; V4T-NEXT: bx lr
-        %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1)            ; <i32> [#uses=1]
-        %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2)           ; <i32> [#uses=1]
-        %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3)           ; <i32> [#uses=1]
+        %tmp = load i32, i32* getelementptr ([0 x i32]* @X, i32 0, i32 1)            ; <i32> [#uses=1]
+        %tmp3 = load i32, i32* getelementptr ([0 x i32]* @X, i32 0, i32 2)           ; <i32> [#uses=1]
+        %tmp5 = load i32, i32* getelementptr ([0 x i32]* @X, i32 0, i32 3)           ; <i32> [#uses=1]
         %tmp6 = call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 )             ; <i32> [#uses=1]
         ret i32 %tmp6
 }
diff --git a/llvm/test/CodeGen/ARM/ldr.ll b/llvm/test/CodeGen/ARM/ldr.ll
index c740e1adc42..bd4de5de669 100644
--- a/llvm/test/CodeGen/ARM/ldr.ll
+++ b/llvm/test/CodeGen/ARM/ldr.ll
@@ -4,7 +4,7 @@ define i32 @f1(i32* %v) {
 ; CHECK-LABEL: f1:
 ; CHECK: ldr r0
 entry:
-        %tmp = load i32* %v
+        %tmp = load i32, i32* %v
         ret i32 %tmp
 }
 
@@ -13,7 +13,7 @@ define i32 @f2(i32* %v) {
 ; CHECK: ldr r0
 entry:
         %tmp2 = getelementptr i32, i32* %v, i32 1023
-        %tmp = load i32* %tmp2
+        %tmp = load i32, i32* %tmp2
         ret i32 %tmp
 }
 
@@ -23,7 +23,7 @@ define i32 @f3(i32* %v) {
 ; CHECK: ldr r0
 entry:
         %tmp2 = getelementptr i32, i32* %v, i32 1024
-        %tmp = load i32* %tmp2
+        %tmp = load i32, i32* %tmp2
         ret i32 %tmp
 }
 
@@ -34,7 +34,7 @@ define i32 @f4(i32 %base) {
 entry:
         %tmp1 = sub i32 %base, 128
         %tmp2 = inttoptr i32 %tmp1 to i32*
-        %tmp3 = load i32* %tmp2
+        %tmp3 = load i32, i32* %tmp2
         ret i32 %tmp3
 }
 
@@ -44,7 +44,7 @@ define i32 @f5(i32 %base, i32 %offset) {
 entry:
         %tmp1 = add i32 %base, %offset
         %tmp2 = inttoptr i32 %tmp1 to i32*
-        %tmp3 = load i32* %tmp2
+        %tmp3 = load i32, i32* %tmp2
         ret i32 %tmp3
 }
 
@@ -55,7 +55,7 @@ entry:
         %tmp1 = shl i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
         %tmp3 = inttoptr i32 %tmp2 to i32*
-        %tmp4 = load i32* %tmp3
+        %tmp4 = load i32, i32* %tmp3
         ret i32 %tmp4
 }
 
@@ -66,6 +66,6 @@ entry:
         %tmp1 = lshr i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
         %tmp3 = inttoptr i32 %tmp2 to i32*
-        %tmp4 = load i32* %tmp3
+        %tmp4 = load i32, i32* %tmp3
         ret i32 %tmp4
 }
diff --git a/llvm/test/CodeGen/ARM/ldr_ext.ll b/llvm/test/CodeGen/ARM/ldr_ext.ll
index 31aaba5c3c2..15efb50c9a9 100644
--- a/llvm/test/CodeGen/ARM/ldr_ext.ll
+++ b/llvm/test/CodeGen/ARM/ldr_ext.ll
@@ -2,28 +2,28 @@
 
 define i32 @test1(i8* %t1) nounwind {
 ; CHECK: ldrb
-    %tmp.u = load i8* %t1
+    %tmp.u = load i8, i8* %t1
     %tmp1.s = zext i8 %tmp.u to i32
     ret i32 %tmp1.s
 }
 
 define i32 @test2(i16* %t1) nounwind {
 ; CHECK: ldrh
-    %tmp.u = load i16* %t1
+    %tmp.u = load i16, i16* %t1
     %tmp1.s = zext i16 %tmp.u to i32
     ret i32 %tmp1.s
 }
 
 define i32 @test3(i8* %t0) nounwind {
 ; CHECK: ldrsb
-    %tmp.s = load i8* %t0
+    %tmp.s = load i8, i8* %t0
     %tmp1.s = sext i8 %tmp.s to i32
     ret i32 %tmp1.s
 }
 
 define i32 @test4(i16* %t0) nounwind {
 ; CHECK: ldrsh
-    %tmp.s = load i16* %t0
+    %tmp.s = load i16, i16* %t0
     %tmp1.s = sext i16 %tmp.s to i32
     ret i32 %tmp1.s
 }
@@ -31,7 +31,7 @@ define i32 @test4(i16* %t0) nounwind {
 define i32 @test5() nounwind {
 ; CHECK: mov r0, #0
 ; CHECK: ldrsh
-    %tmp.s = load i16* null
+    %tmp.s = load i16, i16* null
     %tmp1.s = sext i16 %tmp.s to i32
     ret i32 %tmp1.s
 }
diff --git a/llvm/test/CodeGen/ARM/ldr_frame.ll b/llvm/test/CodeGen/ARM/ldr_frame.ll
index 113badd4ed1..01b18bccc33 100644
--- a/llvm/test/CodeGen/ARM/ldr_frame.ll
+++ b/llvm/test/CodeGen/ARM/ldr_frame.ll
@@ -3,14 +3,14 @@
 define i32 @f1() {
 	%buf = alloca [32 x i32], align 4
 	%tmp = getelementptr [32 x i32], [32 x i32]* %buf, i32 0, i32 0
-	%tmp1 = load i32* %tmp
+	%tmp1 = load i32, i32* %tmp
 	ret i32 %tmp1
 }
 
 define i32 @f2() {
 	%buf = alloca [32 x i8], align 4
 	%tmp = getelementptr [32 x i8], [32 x i8]* %buf, i32 0, i32 0
-	%tmp1 = load i8* %tmp
+	%tmp1 = load i8, i8* %tmp
         %tmp2 = zext i8 %tmp1 to i32
 	ret i32 %tmp2
 }
@@ -18,14 +18,14 @@ define i32 @f2() {
 define i32 @f3() {
 	%buf = alloca [32 x i32], align 4
 	%tmp = getelementptr [32 x i32], [32 x i32]* %buf, i32 0, i32 32
-	%tmp1 = load i32* %tmp
+	%tmp1 = load i32, i32* %tmp
 	ret i32 %tmp1
 }
 
 define i32 @f4() {
 	%buf = alloca [32 x i8], align 4
 	%tmp = getelementptr [32 x i8], [32 x i8]* %buf, i32 0, i32 2
-	%tmp1 = load i8* %tmp
+	%tmp1 = load i8, i8* %tmp
         %tmp2 = zext i8 %tmp1 to i32
 	ret i32 %tmp2
 }
diff --git a/llvm/test/CodeGen/ARM/ldr_post.ll b/llvm/test/CodeGen/ARM/ldr_post.ll
index 2558b16f3d3..139c6f45e52 100644
--- a/llvm/test/CodeGen/ARM/ldr_post.ll
+++ b/llvm/test/CodeGen/ARM/ldr_post.ll
@@ -7,7 +7,7 @@
 define i32 @test1(i32 %a, i32 %b, i32 %c) {
         %tmp1 = mul i32 %a, %b          ; <i32> [#uses=2]
         %tmp2 = inttoptr i32 %tmp1 to i32*              ; <i32*> [#uses=1]
-        %tmp3 = load i32* %tmp2         ; <i32> [#uses=1]
+        %tmp3 = load i32, i32* %tmp2         ; <i32> [#uses=1]
         %tmp4 = sub i32 %tmp1, %c               ; <i32> [#uses=1]
         %tmp5 = mul i32 %tmp4, %tmp3            ; <i32> [#uses=1]
         ret i32 %tmp5
@@ -19,7 +19,7 @@ define i32 @test1(i32 %a, i32 %b, i32 %c) {
 define i32 @test2(i32 %a, i32 %b) {
         %tmp1 = mul i32 %a, %b          ; <i32> [#uses=2]
         %tmp2 = inttoptr i32 %tmp1 to i32*              ; <i32*> [#uses=1]
-        %tmp3 = load i32* %tmp2         ; <i32> [#uses=1]
+        %tmp3 = load i32, i32* %tmp2         ; <i32> [#uses=1]
         %tmp4 = sub i32 %tmp1, 16               ; <i32> [#uses=1]
         %tmp5 = mul i32 %tmp4, %tmp3            ; <i32> [#uses=1]
         ret i32 %tmp5
diff --git a/llvm/test/CodeGen/ARM/ldr_pre.ll b/llvm/test/CodeGen/ARM/ldr_pre.ll
index 4246c092b73..c6c76e22283 100644
--- a/llvm/test/CodeGen/ARM/ldr_pre.ll
+++ b/llvm/test/CodeGen/ARM/ldr_pre.ll
@@ -6,7 +6,7 @@
 ; CHECK-NOT: ldr
 define i32* @test1(i32* %X, i32* %dest) {
         %Y = getelementptr i32, i32* %X, i32 4               ; <i32*> [#uses=2]
-        %A = load i32* %Y               ; <i32> [#uses=1]
+        %A = load i32, i32* %Y               ; <i32> [#uses=1]
         store i32 %A, i32* %dest
         ret i32* %Y
 }
@@ -17,7 +17,7 @@ define i32* @test1(i32* %X, i32* %dest) {
 define i32 @test2(i32 %a, i32 %b, i32 %c) {
         %tmp1 = sub i32 %a, %b          ; <i32> [#uses=2]
         %tmp2 = inttoptr i32 %tmp1 to i32*              ; <i32*> [#uses=1]
-        %tmp3 = load i32* %tmp2         ; <i32> [#uses=1]
+        %tmp3 = load i32, i32* %tmp2         ; <i32> [#uses=1]
         %tmp4 = sub i32 %tmp1, %c               ; <i32> [#uses=1]
         %tmp5 = add i32 %tmp4, %tmp3            ; <i32> [#uses=1]
         ret i32 %tmp5
diff --git a/llvm/test/CodeGen/ARM/ldrd-memoper.ll b/llvm/test/CodeGen/ARM/ldrd-memoper.ll
index f1a1121f6aa..744fbd5efb8 100644
--- a/llvm/test/CodeGen/ARM/ldrd-memoper.ll
+++ b/llvm/test/CodeGen/ARM/ldrd-memoper.ll
@@ -8,8 +8,8 @@
 ; CHECK: Formed {{.*}} t2LDRD{{.*}} mem:LD4[%0] LD4[%0+4]
 define i64 @t(i64 %a) nounwind readonly {
 entry:
-	%0 = load i64** @b, align 4
-	%1 = load i64* %0, align 4
+	%0 = load i64*, i64** @b, align 4
+	%1 = load i64, i64* %0, align 4
 	%2 = mul i64 %1, %a
 	ret i64 %2
 }
diff --git a/llvm/test/CodeGen/ARM/ldrd.ll b/llvm/test/CodeGen/ARM/ldrd.ll
index 7d8ca6124c3..6fed0aea78c 100644
--- a/llvm/test/CodeGen/ARM/ldrd.ll
+++ b/llvm/test/CodeGen/ARM/ldrd.ll
@@ -19,8 +19,8 @@ entry:
 ; M3-LABEL: t:
 ; M3-NOT: ldrd
 
-	%0 = load i64** @b, align 4
-	%1 = load i64* %0, align 4
+	%0 = load i64*, i64** @b, align 4
+	%1 = load i64, i64* %0, align 4
 	%2 = mul i64 %1, %a
 	ret i64 %2
 }
@@ -57,8 +57,8 @@ bb:                                               ; preds = %bb, %entry
   %scevgep4 = getelementptr i32, i32* %b, i32 %i.03    ; <i32*> [#uses=1]
   %tmp = add i32 %i.03, 1                         ; <i32> [#uses=3]
   %scevgep5 = getelementptr i32, i32* %a, i32 %tmp     ; <i32*> [#uses=1]
-  %2 = load i32* %scevgep, align 4                ; <i32> [#uses=1]
-  %3 = load i32* %scevgep5, align 4               ; <i32> [#uses=1]
+  %2 = load i32, i32* %scevgep, align 4                ; <i32> [#uses=1]
+  %3 = load i32, i32* %scevgep5, align 4               ; <i32> [#uses=1]
   %4 = add nsw i32 %3, %2                         ; <i32> [#uses=1]
   store i32 %4, i32* %scevgep4, align 4
   %exitcond = icmp eq i32 %tmp, %0                ; <i1> [#uses=1]
@@ -84,8 +84,8 @@ entry:
 ; A8-NEXT: str [[FIELD1]], {{\[}}[[BASE]]{{\]}}
   %orig_blocks = alloca [256 x i16], align 2
   %0 = bitcast [256 x i16]* %orig_blocks to i8*call void @llvm.lifetime.start(i64 512, i8* %0) nounwind
-  %tmp1 = load i32* getelementptr inbounds (%struct.Test* @TestVar, i32 0, i32 1), align 4
-  %tmp2 = load i32* getelementptr inbounds (%struct.Test* @TestVar, i32 0, i32 2), align 4
+  %tmp1 = load i32, i32* getelementptr inbounds (%struct.Test* @TestVar, i32 0, i32 1), align 4
+  %tmp2 = load i32, i32* getelementptr inbounds (%struct.Test* @TestVar, i32 0, i32 2), align 4
   %add = add nsw i32 %tmp2, %tmp1
   store i32 %add, i32* getelementptr inbounds (%struct.Test* @TestVar, i32 0, i32 0), align 4
   call void @llvm.lifetime.end(i64 512, i8* %0) nounwind
diff --git a/llvm/test/CodeGen/ARM/ldst-f32-2-i32.ll b/llvm/test/CodeGen/ARM/ldst-f32-2-i32.ll
index 82c0fdbdf97..d00f44e8e88 100644
--- a/llvm/test/CodeGen/ARM/ldst-f32-2-i32.ll
+++ b/llvm/test/CodeGen/ARM/ldst-f32-2-i32.ll
@@ -17,7 +17,7 @@ bb:
   %uglygep = getelementptr i8, i8* %src6, i32 %tmp
   %src_addr.04 = bitcast i8* %uglygep to float*
   %dst_addr.03 = getelementptr float, float* %dst, i32 %j.05
-  %1 = load float* %src_addr.04, align 4
+  %1 = load float, float* %src_addr.04, align 4
   store float %1, float* %dst_addr.03, align 4
   %2 = add i32 %j.05, 1
   %exitcond = icmp eq i32 %2, %width
diff --git a/llvm/test/CodeGen/ARM/ldstrex-m.ll b/llvm/test/CodeGen/ARM/ldstrex-m.ll
index b50699f4cde..3d83a9d78e3 100644
--- a/llvm/test/CodeGen/ARM/ldstrex-m.ll
+++ b/llvm/test/CodeGen/ARM/ldstrex-m.ll
@@ -4,7 +4,7 @@
 ; CHECK-NOT: ldrexd
 define i64 @f0(i64* %p) nounwind readonly {
 entry:
-  %0 = load atomic i64* %p seq_cst, align 8
+  %0 = load atomic i64, i64* %p seq_cst, align 8
   ret i64 %0
 }
 
@@ -29,7 +29,7 @@ entry:
 ; CHECK: ldr
 define i32 @f3(i32* %p) nounwind readonly {
 entry:
-  %0 = load atomic i32* %p seq_cst, align 4
+  %0 = load atomic i32, i32* %p seq_cst, align 4
   ret i32 %0
 }
 
@@ -37,7 +37,7 @@ entry:
 ; CHECK: ldrb
 define i8 @f4(i8* %p) nounwind readonly {
 entry:
-  %0 = load atomic i8* %p seq_cst, align 4
+  %0 = load atomic i8, i8* %p seq_cst, align 4
   ret i8 %0
 }
 
diff --git a/llvm/test/CodeGen/ARM/ldstrex.ll b/llvm/test/CodeGen/ARM/ldstrex.ll
index edcf1162c73..59349f72a8f 100644
--- a/llvm/test/CodeGen/ARM/ldstrex.ll
+++ b/llvm/test/CodeGen/ARM/ldstrex.ll
@@ -106,14 +106,14 @@ declare void @llvm.arm.clrex() nounwind
 
 define void @excl_addrmode() {
 ; CHECK-T2ADDRMODE-LABEL: excl_addrmode:
-  %base1020 = load i32** @base
+  %base1020 = load i32*, i32** @base
   %offset1020 = getelementptr i32, i32* %base1020, i32 255
   call i32 @llvm.arm.ldrex.p0i32(i32* %offset1020)
   call i32 @llvm.arm.strex.p0i32(i32 0, i32* %offset1020)
 ; CHECK-T2ADDRMODE: ldrex {{r[0-9]+}}, [{{r[0-9]+}}, #1020]
 ; CHECK-T2ADDRMODE: strex {{r[0-9]+}}, {{r[0-9]+}}, [{{r[0-9]+}}, #1020]
 
-  %base1024 = load i32** @base
+  %base1024 = load i32*, i32** @base
   %offset1024 = getelementptr i32, i32* %base1024, i32 256
   call i32 @llvm.arm.ldrex.p0i32(i32* %offset1024)
   call i32 @llvm.arm.strex.p0i32(i32 0, i32* %offset1024)
@@ -121,7 +121,7 @@ define void @excl_addrmode() {
 ; CHECK-T2ADDRMODE: ldrex {{r[0-9]+}}, [r[[ADDR]]]
 ; CHECK-T2ADDRMODE: strex {{r[0-9]+}}, {{r[0-9]+}}, [r[[ADDR]]]
 
-  %base1 = load i32** @base
+  %base1 = load i32*, i32** @base
   %addr8 = bitcast i32* %base1 to i8*
   %offset1_8 = getelementptr i8, i8* %addr8, i32 1
   %offset1 = bitcast i8* %offset1_8 to i32*
diff --git a/llvm/test/CodeGen/ARM/load-global.ll b/llvm/test/CodeGen/ARM/load-global.ll
index 00ca2e8b1b7..34748bc848b 100644
--- a/llvm/test/CodeGen/ARM/load-global.ll
+++ b/llvm/test/CodeGen/ARM/load-global.ll
@@ -45,6 +45,6 @@ define i32 @test1() {
 ; LINUX: ldr r0, [r1, r0]
 ; LINUX: ldr r0, [r0]
 ; LINUX: .long G(GOT)
-	%tmp = load i32* @G
+	%tmp = load i32, i32* @G
 	ret i32 %tmp
 }
diff --git a/llvm/test/CodeGen/ARM/load.ll b/llvm/test/CodeGen/ARM/load.ll
index ca16adc0082..3b2d637cb26 100644
--- a/llvm/test/CodeGen/ARM/load.ll
+++ b/llvm/test/CodeGen/ARM/load.ll
@@ -2,28 +2,28 @@
 
 define i32 @f1(i8* %p) {
 entry:
-        %tmp = load i8* %p              ; <i8> [#uses=1]
+        %tmp = load i8, i8* %p              ; <i8> [#uses=1]
         %tmp1 = sext i8 %tmp to i32              ; <i32> [#uses=1]
         ret i32 %tmp1
 }
 
 define i32 @f2(i8* %p) {
 entry:
-        %tmp = load i8* %p              ; <i8> [#uses=1]
+        %tmp = load i8, i8* %p              ; <i8> [#uses=1]
         %tmp2 = zext i8 %tmp to i32              ; <i32> [#uses=1]
         ret i32 %tmp2
 }
 
 define i32 @f3(i16* %p) {
 entry:
-        %tmp = load i16* %p             ; <i16> [#uses=1]
+        %tmp = load i16, i16* %p             ; <i16> [#uses=1]
         %tmp3 = sext i16 %tmp to i32             ; <i32> [#uses=1]
         ret i32 %tmp3
 }
 
 define i32 @f4(i16* %p) {
 entry:
-        %tmp = load i16* %p             ; <i16> [#uses=1]
+        %tmp = load i16, i16* %p             ; <i16> [#uses=1]
         %tmp4 = zext i16 %tmp to i32             ; <i32> [#uses=1]
         ret i32 %tmp4
 }
diff --git a/llvm/test/CodeGen/ARM/load_i1_select.ll b/llvm/test/CodeGen/ARM/load_i1_select.ll
index 7a208ea4175..48cd4eae58f 100644
--- a/llvm/test/CodeGen/ARM/load_i1_select.ll
+++ b/llvm/test/CodeGen/ARM/load_i1_select.ll
@@ -11,7 +11,7 @@ target triple = "thumbv7-apple-ios0.0.0"
 ; CHECK: tst.w r[[R0]], #1
 define void @foo(i8* %call, double* %p) nounwind {
 entry:
-  %tmp2 = load i8* %call
+  %tmp2 = load i8, i8* %call
   %tmp3 = trunc i8 %tmp2 to i1
   %cond = select i1 %tmp3, double 2.000000e+00, double 1.000000e+00
   store double %cond, double* %p
diff --git a/llvm/test/CodeGen/ARM/long.ll b/llvm/test/CodeGen/ARM/long.ll
index d0bff4a906e..1807813d930 100644
--- a/llvm/test/CodeGen/ARM/long.ll
+++ b/llvm/test/CodeGen/ARM/long.ll
@@ -85,6 +85,6 @@ define i64 @f10() {
 ; CHECK-LABEL: f10:
 entry:
         %a = alloca i64, align 8                ; <i64*> [#uses=1]
-        %retval = load i64* %a          ; <i64> [#uses=1]
+        %retval = load i64, i64* %a          ; <i64> [#uses=1]
         ret i64 %retval
 }
diff --git a/llvm/test/CodeGen/ARM/lsr-code-insertion.ll b/llvm/test/CodeGen/ARM/lsr-code-insertion.ll
index a5b33338688..aa2b2d26d12 100644
--- a/llvm/test/CodeGen/ARM/lsr-code-insertion.ll
+++ b/llvm/test/CodeGen/ARM/lsr-code-insertion.ll
@@ -23,16 +23,16 @@ bb:		; preds = %cond_next59, %entry
 	%indvar = phi i32 [ 0, %entry ], [ %k.069.0, %cond_next59 ]		; <i32> [#uses=6]
 	%k.069.0 = add i32 %indvar, 1		; <i32> [#uses=3]
 	%tmp3 = getelementptr i32, i32* %mpp, i32 %indvar		; <i32*> [#uses=1]
-	%tmp4 = load i32* %tmp3		; <i32> [#uses=1]
+	%tmp4 = load i32, i32* %tmp3		; <i32> [#uses=1]
 	%tmp8 = getelementptr i32, i32* %tpmm, i32 %indvar		; <i32*> [#uses=1]
-	%tmp9 = load i32* %tmp8		; <i32> [#uses=1]
+	%tmp9 = load i32, i32* %tmp8		; <i32> [#uses=1]
 	%tmp10 = add i32 %tmp9, %tmp4		; <i32> [#uses=2]
 	%tmp13 = getelementptr i32, i32* %mc, i32 %k.069.0		; <i32*> [#uses=5]
 	store i32 %tmp10, i32* %tmp13
 	%tmp17 = getelementptr i32, i32* %ip, i32 %indvar		; <i32*> [#uses=1]
-	%tmp18 = load i32* %tmp17		; <i32> [#uses=1]
+	%tmp18 = load i32, i32* %tmp17		; <i32> [#uses=1]
 	%tmp22 = getelementptr i32, i32* %tpim, i32 %indvar		; <i32*> [#uses=1]
-	%tmp23 = load i32* %tmp22		; <i32> [#uses=1]
+	%tmp23 = load i32, i32* %tmp22		; <i32> [#uses=1]
 	%tmp24 = add i32 %tmp23, %tmp18		; <i32> [#uses=2]
 	%tmp30 = icmp sgt i32 %tmp24, %tmp10		; <i1> [#uses=1]
 	br i1 %tmp30, label %cond_true, label %cond_next
@@ -42,9 +42,9 @@ cond_true:		; preds = %bb
 	br label %cond_next
 
 cond_next:		; preds = %cond_true, %bb
-	%tmp39 = load i32* %tmp13		; <i32> [#uses=1]
+	%tmp39 = load i32, i32* %tmp13		; <i32> [#uses=1]
 	%tmp42 = getelementptr i32, i32* %ms, i32 %k.069.0		; <i32*> [#uses=1]
-	%tmp43 = load i32* %tmp42		; <i32> [#uses=1]
+	%tmp43 = load i32, i32* %tmp42		; <i32> [#uses=1]
 	%tmp44 = add i32 %tmp43, %tmp39		; <i32> [#uses=2]
 	store i32 %tmp44, i32* %tmp13
 	%tmp52 = icmp slt i32 %tmp44, -987654321		; <i1> [#uses=1]
diff --git a/llvm/test/CodeGen/ARM/lsr-icmp-imm.ll b/llvm/test/CodeGen/ARM/lsr-icmp-imm.ll
index ddadadc5e2d..ef98727344e 100644
--- a/llvm/test/CodeGen/ARM/lsr-icmp-imm.ll
+++ b/llvm/test/CodeGen/ARM/lsr-icmp-imm.ll
@@ -19,7 +19,7 @@ for.body:                                         ; preds = %entry, %for.body
   %i.addr.05 = phi i32 [ %sub, %for.body ], [ %i, %entry ]
   %b.04 = phi i32 [ %.b.0, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i32 %i.addr.05
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %cmp1 = icmp sgt i32 %0, %b.04
   %.b.0 = select i1 %cmp1, i32 %0, i32 %b.04
   %i.addr.0.bi.0 = select i1 %cmp1, i32 %i.addr.05, i32 %bi.06
diff --git a/llvm/test/CodeGen/ARM/lsr-unfolded-offset.ll b/llvm/test/CodeGen/ARM/lsr-unfolded-offset.ll
index 410233e2688..17292cfe289 100644
--- a/llvm/test/CodeGen/ARM/lsr-unfolded-offset.ll
+++ b/llvm/test/CodeGen/ARM/lsr-unfolded-offset.ll
@@ -25,8 +25,8 @@ outer.loop:                                 ; preds = %for.inc69, %entry
   %0 = phi i32 [ %inc71, %for.inc69 ], [ 0, %entry ]
   %offset = getelementptr %struct.partition_entry, %struct.partition_entry* %part, i32 %0, i32 2
   %len = getelementptr %struct.partition_entry, %struct.partition_entry* %part, i32 %0, i32 3
-  %tmp5 = load i64* %offset, align 4
-  %tmp15 = load i64* %len, align 4
+  %tmp5 = load i64, i64* %offset, align 4
+  %tmp15 = load i64, i64* %len, align 4
   %add = add nsw i64 %tmp15, %tmp5
   br label %inner.loop
 
@@ -39,8 +39,8 @@ inner.loop:                                       ; preds = %for.inc, %outer.loo
 if.end:                                           ; preds = %inner.loop
   %len39 = getelementptr %struct.partition_entry, %struct.partition_entry* %part, i32 %1, i32 3
   %offset28 = getelementptr %struct.partition_entry, %struct.partition_entry* %part, i32 %1, i32 2
-  %tmp29 = load i64* %offset28, align 4
-  %tmp40 = load i64* %len39, align 4
+  %tmp29 = load i64, i64* %offset28, align 4
+  %tmp40 = load i64, i64* %len39, align 4
   %add41 = add nsw i64 %tmp40, %tmp29
   %cmp44 = icmp sge i64 %tmp29, %tmp5
   %cmp47 = icmp slt i64 %tmp29, %add
diff --git a/llvm/test/CodeGen/ARM/machine-cse-cmp.ll b/llvm/test/CodeGen/ARM/machine-cse-cmp.ll
index 895e74abf62..9d7ea622e58 100644
--- a/llvm/test/CodeGen/ARM/machine-cse-cmp.ll
+++ b/llvm/test/CodeGen/ARM/machine-cse-cmp.ll
@@ -30,7 +30,7 @@ entry:
 ; CHECK: poplt
 ; CHECK-NOT: cmp
 ; CHECK: movle
-  %0 = load i32* @foo, align 4
+  %0 = load i32, i32* @foo, align 4
   %cmp28 = icmp sgt i32 %0, 0
   br i1 %cmp28, label %for.body.lr.ph, label %for.cond1.preheader
 
@@ -53,7 +53,7 @@ entry:
 ; CHECK-NOT: sub
 ; CHECK: cmp
 ; CHECK: blt
-%0 = load i32* %offset, align 4
+%0 = load i32, i32* %offset, align 4
 %cmp = icmp slt i32 %0, %size
 %s = sub nsw i32 %0, %size
 %size2 = sub nsw i32 %size, 0
diff --git a/llvm/test/CodeGen/ARM/machine-licm.ll b/llvm/test/CodeGen/ARM/machine-licm.ll
index 921426eb904..a1eec78e453 100644
--- a/llvm/test/CodeGen/ARM/machine-licm.ll
+++ b/llvm/test/CodeGen/ARM/machine-licm.ll
@@ -39,14 +39,14 @@ bb.nph:                                           ; preds = %entry
 ; THUMB: LCPI0_0:
 ; THUMB-NOT: LCPI0_1:
 ; THUMB: .section
-  %.pre = load i32* @GV, align 4                  ; <i32> [#uses=1]
+  %.pre = load i32, i32* @GV, align 4                  ; <i32> [#uses=1]
   br label %bb
 
 bb:                                               ; preds = %bb, %bb.nph
   %1 = phi i32 [ %.pre, %bb.nph ], [ %3, %bb ]    ; <i32> [#uses=1]
   %i.03 = phi i32 [ 0, %bb.nph ], [ %4, %bb ]     ; <i32> [#uses=2]
   %scevgep = getelementptr i32, i32* %vals, i32 %i.03  ; <i32*> [#uses=1]
-  %2 = load i32* %scevgep, align 4                ; <i32> [#uses=1]
+  %2 = load i32, i32* %scevgep, align 4                ; <i32> [#uses=1]
   %3 = add nsw i32 %1, %2                         ; <i32> [#uses=2]
   store i32 %3, i32* @GV, align 4
   %4 = add i32 %i.03, 1                           ; <i32> [#uses=2]
diff --git a/llvm/test/CodeGen/ARM/minsize-litpools.ll b/llvm/test/CodeGen/ARM/minsize-litpools.ll
index d5cd2a9b72e..d5452ed0f90 100644
--- a/llvm/test/CodeGen/ARM/minsize-litpools.ll
+++ b/llvm/test/CodeGen/ARM/minsize-litpools.ll
@@ -12,7 +12,7 @@ define i32 @small_global() minsize {
 ; CHECK: ldr r[[GLOBDEST:[0-9]+]], {{.?LCPI0_0}}
 ; CHECK: ldr r0, [r[[GLOBDEST]]]
 
-  %val = load i32* @var
+  %val = load i32, i32* @var
   ret i32 %val
 }
 
@@ -21,6 +21,6 @@ define i32 @big_global() {
 ; CHECK: movw [[GLOBDEST:r[0-9]+]], :lower16:var
 ; CHECK: movt [[GLOBDEST]], :upper16:var
 
-  %val = load i32* @var
+  %val = load i32, i32* @var
   ret i32 %val
 }
diff --git a/llvm/test/CodeGen/ARM/misched-copy-arm.ll b/llvm/test/CodeGen/ARM/misched-copy-arm.ll
index 4f5249cc4ba..330252a90d7 100644
--- a/llvm/test/CodeGen/ARM/misched-copy-arm.ll
+++ b/llvm/test/CodeGen/ARM/misched-copy-arm.ll
@@ -19,7 +19,7 @@ for.body:                                         ; preds = %entry, %for.body
   %s.05 = phi i32 [ %mul, %for.body ], [ 0, %entry ]
   %indvars.iv.next = add i32 %indvars.iv, %s
   %arrayidx = getelementptr inbounds i32, i32* %d, i32 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %mul = mul nsw i32 %0, %s.05
   %exitcond = icmp eq i32 %indvars.iv.next, %a
   br i1 %exitcond, label %for.end, label %for.body
@@ -65,7 +65,7 @@ if.end28:                                         ; preds = %if.then24, %while.c
   %dst.1 = phi %struct.rtx_def* [ undef, %if.then24 ], [ %dst.0, %while.cond ], [ %dst.0, %while.cond ]
   %arrayidx30 = getelementptr inbounds %struct.rtx_def, %struct.rtx_def* %dst.1, i32 0, i32 1, i32 0
   %rtx31 = bitcast %union.rtunion_def* %arrayidx30 to %struct.rtx_def**
-  %0 = load %struct.rtx_def** %rtx31, align 4
+  %0 = load %struct.rtx_def*, %struct.rtx_def** %rtx31, align 4
   br label %while.cond
 
 if.then46:                                        ; preds = %while.cond
diff --git a/llvm/test/CodeGen/ARM/mult-alt-generic-arm.ll b/llvm/test/CodeGen/ARM/mult-alt-generic-arm.ll
index 05e9b0facd6..2c0a6121f79 100644
--- a/llvm/test/CodeGen/ARM/mult-alt-generic-arm.ll
+++ b/llvm/test/CodeGen/ARM/mult-alt-generic-arm.ll
@@ -33,10 +33,10 @@ entry:
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r,<r"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* %in1, align 4
+  %tmp1 = load i32, i32* %in1, align 4
   %1 = call i32 asm "foo $1,$0", "=r,r<"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   ret void
@@ -48,10 +48,10 @@ entry:
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r,>r"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* %in1, align 4
+  %tmp1 = load i32, i32* %in1, align 4
   %1 = call i32 asm "foo $1,$0", "=r,r>"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   ret void
@@ -63,7 +63,7 @@ entry:
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r,r"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
   ret void
@@ -120,10 +120,10 @@ entry:
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r,imr"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* @min1, align 4
+  %tmp1 = load i32, i32* @min1, align 4
   %1 = call i32 asm "foo $1,$0", "=r,imr"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   %2 = call i32 asm "foo $1,$0", "=r,imr"(i32 1) nounwind
@@ -137,10 +137,10 @@ entry:
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r,X"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* @min1, align 4
+  %tmp1 = load i32, i32* @min1, align 4
   %1 = call i32 asm "foo $1,$0", "=r,X"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   %2 = call i32 asm "foo $1,$0", "=r,X"(i32 1) nounwind
@@ -166,7 +166,7 @@ entry:
 
 define arm_aapcscc void @multi_m() nounwind {
 entry:
-  %tmp = load i32* @min1, align 4
+  %tmp = load i32, i32* @min1, align 4
   call void asm "foo $1,$0", "=*m|r,m|r"(i32* @mout0, i32 %tmp) nounwind
   ret void
 }
@@ -191,10 +191,10 @@ entry:
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r|r,r|<r"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* %in1, align 4
+  %tmp1 = load i32, i32* %in1, align 4
   %1 = call i32 asm "foo $1,$0", "=r|r,r|r<"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   ret void
@@ -206,10 +206,10 @@ entry:
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r|r,r|>r"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* %in1, align 4
+  %tmp1 = load i32, i32* %in1, align 4
   %1 = call i32 asm "foo $1,$0", "=r|r,r|r>"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   ret void
@@ -221,7 +221,7 @@ entry:
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r|r,r|m"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
   ret void
@@ -278,10 +278,10 @@ entry:
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* @min1, align 4
+  %tmp1 = load i32, i32* @min1, align 4
   %1 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   %2 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 1) nounwind
@@ -295,10 +295,10 @@ entry:
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* @min1, align 4
+  %tmp1 = load i32, i32* @min1, align 4
   %1 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   %2 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 1) nounwind
diff --git a/llvm/test/CodeGen/ARM/negative-offset.ll b/llvm/test/CodeGen/ARM/negative-offset.ll
index a5fdb1e8bb5..bafc9645471 100644
--- a/llvm/test/CodeGen/ARM/negative-offset.ll
+++ b/llvm/test/CodeGen/ARM/negative-offset.ll
@@ -8,9 +8,9 @@ entry:
 ;CHECK: ldr r{{.*}}, [r0, #-16]
 ;CHECK: ldr r{{.*}}, [r0, #-8]
   %arrayidx = getelementptr inbounds i32, i32* %p, i32 -4
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %arrayidx1 = getelementptr inbounds i32, i32* %p, i32 -2
-  %1 = load i32* %arrayidx1, align 4
+  %1 = load i32, i32* %arrayidx1, align 4
   %add = add nsw i32 %1, %0
   ret i32 %add
 }
diff --git a/llvm/test/CodeGen/ARM/neon_cmp.ll b/llvm/test/CodeGen/ARM/neon_cmp.ll
index e1662c43c64..fcf4486fa46 100644
--- a/llvm/test/CodeGen/ARM/neon_cmp.ll
+++ b/llvm/test/CodeGen/ARM/neon_cmp.ll
@@ -4,8 +4,8 @@
 ; radar://13191881
 ; CHECK: vfcmp
 define void @vfcmp(<2 x double>* %a, <2 x double>* %b) {
-  %wide.load = load <2 x double>* %a, align 4
-  %wide.load2 = load <2 x double>* %b, align 4
+  %wide.load = load <2 x double>, <2 x double>* %a, align 4
+  %wide.load2 = load <2 x double>, <2 x double>* %b, align 4
 ; CHECK-NOT: vdup.32
 ; CHECK-NOT: vmovn.i64
   %v1 = fcmp olt <2 x double> %wide.load, %wide.load2
diff --git a/llvm/test/CodeGen/ARM/neon_div.ll b/llvm/test/CodeGen/ARM/neon_div.ll
index 4f1607ed5ba..e185c2a8afb 100644
--- a/llvm/test/CodeGen/ARM/neon_div.ll
+++ b/llvm/test/CodeGen/ARM/neon_div.ll
@@ -7,8 +7,8 @@ define <8 x i8> @sdivi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vrecpe.f32
 ;CHECK: vmovn.i32
 ;CHECK: vmovn.i16
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = sdiv <8 x i8> %tmp1, %tmp2
 	ret <8 x i8> %tmp3
 }
@@ -21,8 +21,8 @@ define <8 x i8> @udivi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vrecps.f32
 ;CHECK: vmovn.i32
 ;CHECK: vqmovun.s16
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = udiv <8 x i8> %tmp1, %tmp2
 	ret <8 x i8> %tmp3
 }
@@ -31,8 +31,8 @@ define <4 x i16> @sdivi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vrecpe.f32
 ;CHECK: vrecps.f32
 ;CHECK: vmovn.i32
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = sdiv <4 x i16> %tmp1, %tmp2
 	ret <4 x i16> %tmp3
 }
@@ -42,8 +42,8 @@ define <4 x i16> @udivi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vrecps.f32
 ;CHECK: vrecps.f32
 ;CHECK: vmovn.i32
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = udiv <4 x i16> %tmp1, %tmp2
 	ret <4 x i16> %tmp3
 }
diff --git a/llvm/test/CodeGen/ARM/neon_fpconv.ll b/llvm/test/CodeGen/ARM/neon_fpconv.ll
index 8e37ce77818..61ac4098a6d 100644
--- a/llvm/test/CodeGen/ARM/neon_fpconv.ll
+++ b/llvm/test/CodeGen/ARM/neon_fpconv.ll
@@ -19,7 +19,7 @@ define <2 x double> @vextend(<2 x float> %a) {
 ; CHECK: vsitofp_double
 define void @vsitofp_double(<2 x i32>* %loadaddr,
                             <2 x double>* %storeaddr) {
-  %v0 = load <2 x i32>* %loadaddr
+  %v0 = load <2 x i32>, <2 x i32>* %loadaddr
 ; CHECK:      vldr
 ; CHECK-NEXT:	vcvt.f64.s32
 ; CHECK-NEXT:	vcvt.f64.s32
@@ -31,7 +31,7 @@ define void @vsitofp_double(<2 x i32>* %loadaddr,
 ; CHECK: vuitofp_double
 define void @vuitofp_double(<2 x i32>* %loadaddr,
                             <2 x double>* %storeaddr) {
-  %v0 = load <2 x i32>* %loadaddr
+  %v0 = load <2 x i32>, <2 x i32>* %loadaddr
 ; CHECK:      vldr
 ; CHECK-NEXT:	vcvt.f64.u32
 ; CHECK-NEXT:	vcvt.f64.u32
diff --git a/llvm/test/CodeGen/ARM/neon_ld1.ll b/llvm/test/CodeGen/ARM/neon_ld1.ll
index 9fd3fc5f341..f4d60190553 100644
--- a/llvm/test/CodeGen/ARM/neon_ld1.ll
+++ b/llvm/test/CodeGen/ARM/neon_ld1.ll
@@ -7,8 +7,8 @@
 ; CHECK: vstr d
 define void @t1(<2 x i32>* %r, <4 x i16>* %a, <4 x i16>* %b) nounwind {
 entry:
-	%0 = load <4 x i16>* %a, align 8		; <<4 x i16>> [#uses=1]
-	%1 = load <4 x i16>* %b, align 8		; <<4 x i16>> [#uses=1]
+	%0 = load <4 x i16>, <4 x i16>* %a, align 8		; <<4 x i16>> [#uses=1]
+	%1 = load <4 x i16>, <4 x i16>* %b, align 8		; <<4 x i16>> [#uses=1]
 	%2 = add <4 x i16> %0, %1		; <<4 x i16>> [#uses=1]
 	%3 = bitcast <4 x i16> %2 to <2 x i32>		; <<2 x i32>> [#uses=1]
 	store <2 x i32> %3, <2 x i32>* %r, align 8
@@ -22,8 +22,8 @@ entry:
 ; CHECK: vmov r0, r1, d
 define <2 x i32> @t2(<4 x i16>* %a, <4 x i16>* %b) nounwind readonly {
 entry:
-	%0 = load <4 x i16>* %a, align 8		; <<4 x i16>> [#uses=1]
-	%1 = load <4 x i16>* %b, align 8		; <<4 x i16>> [#uses=1]
+	%0 = load <4 x i16>, <4 x i16>* %a, align 8		; <<4 x i16>> [#uses=1]
+	%1 = load <4 x i16>, <4 x i16>* %b, align 8		; <<4 x i16>> [#uses=1]
 	%2 = sub <4 x i16> %0, %1		; <<4 x i16>> [#uses=1]
 	%3 = bitcast <4 x i16> %2 to <2 x i32>		; <<2 x i32>> [#uses=1]
 	ret <2 x i32> %3
diff --git a/llvm/test/CodeGen/ARM/neon_ld2.ll b/llvm/test/CodeGen/ARM/neon_ld2.ll
index 571a16a061d..5bd6ae6d2a9 100644
--- a/llvm/test/CodeGen/ARM/neon_ld2.ll
+++ b/llvm/test/CodeGen/ARM/neon_ld2.ll
@@ -13,8 +13,8 @@
 ; SWIFT: vst1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+:128\]}}
 define void @t1(<4 x i32>* %r, <2 x i64>* %a, <2 x i64>* %b) nounwind {
 entry:
-	%0 = load <2 x i64>* %a, align 16		; <<2 x i64>> [#uses=1]
-	%1 = load <2 x i64>* %b, align 16		; <<2 x i64>> [#uses=1]
+	%0 = load <2 x i64>, <2 x i64>* %a, align 16		; <<2 x i64>> [#uses=1]
+	%1 = load <2 x i64>, <2 x i64>* %b, align 16		; <<2 x i64>> [#uses=1]
 	%2 = add <2 x i64> %0, %1		; <<2 x i64>> [#uses=1]
 	%3 = bitcast <2 x i64> %2 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	store <4 x i32> %3, <4 x i32>* %r, align 16
@@ -35,8 +35,8 @@ entry:
 ; SWIFT: vmov r2, r3, d
 define <4 x i32> @t2(<2 x i64>* %a, <2 x i64>* %b) nounwind readonly {
 entry:
-	%0 = load <2 x i64>* %a, align 16		; <<2 x i64>> [#uses=1]
-	%1 = load <2 x i64>* %b, align 16		; <<2 x i64>> [#uses=1]
+	%0 = load <2 x i64>, <2 x i64>* %a, align 16		; <<2 x i64>> [#uses=1]
+	%1 = load <2 x i64>, <2 x i64>* %b, align 16		; <<2 x i64>> [#uses=1]
 	%2 = sub <2 x i64> %0, %1		; <<2 x i64>> [#uses=1]
 	%3 = bitcast <2 x i64> %2 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	ret <4 x i32> %3
@@ -50,8 +50,8 @@ entry:
 ; SWIFT: vst1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+}}
 define void @t3(<4 x i32>* %r, <2 x i64>* %a, <2 x i64>* %b) nounwind {
 entry:
-	%0 = load <2 x i64>* %a, align 8
-	%1 = load <2 x i64>* %b, align 8
+	%0 = load <2 x i64>, <2 x i64>* %a, align 8
+	%1 = load <2 x i64>, <2 x i64>* %b, align 8
 	%2 = add <2 x i64> %0, %1
 	%3 = bitcast <2 x i64> %2 to <4 x i32>
 	store <4 x i32> %3, <4 x i32>* %r, align 8
diff --git a/llvm/test/CodeGen/ARM/neon_spill.ll b/llvm/test/CodeGen/ARM/neon_spill.ll
index d286d16486c..04f08f43840 100644
--- a/llvm/test/CodeGen/ARM/neon_spill.ll
+++ b/llvm/test/CodeGen/ARM/neon_spill.ll
@@ -24,7 +24,7 @@ declare arm_aapcs_vfpcc %2** @func4()
 define arm_aapcs_vfpcc void @foo(%3* nocapture) nounwind align 2 {
   call void @llvm.arm.neon.vst4.v4i32(i8* undef, <4 x i32> <i32 0, i32 1065353216, i32 1073741824, i32 1077936128>, <4 x i32> <i32 1082130432, i32 1084227584, i32 1086324736, i32 1088421888>, <4 x i32> <i32 1090519040, i32 1091567616, i32 1092616192, i32 1093664768>, <4 x i32> <i32 1094713344, i32 1095761920, i32 1096810496, i32 1097859072>, i32 16) nounwind
   %2 = call arm_aapcs_vfpcc  %0** @func2() nounwind
-  %3 = load %0** %2, align 4
+  %3 = load %0*, %0** %2, align 4
   store float 0.000000e+00, float* undef, align 4
   %4 = call arm_aapcs_vfpcc  %2* @func3(%2* undef, %2* undef, i32 2956) nounwind
   call arm_aapcs_vfpcc  void @func1(%0* %3, float* undef, float* undef, %2* undef)
diff --git a/llvm/test/CodeGen/ARM/no-fpu.ll b/llvm/test/CodeGen/ARM/no-fpu.ll
index fff4bccb80e..c5d1f1951d7 100644
--- a/llvm/test/CodeGen/ARM/no-fpu.ll
+++ b/llvm/test/CodeGen/ARM/no-fpu.ll
@@ -6,7 +6,7 @@
 ; Check no NEON instructions are selected when feature is disabled.
 define void @neonop(i64* nocapture readonly %a, i64* nocapture %b) #0 {
   %1 = bitcast i64* %a to <2 x i64>*
-  %wide.load = load <2 x i64>* %1, align 8
+  %wide.load = load <2 x i64>, <2 x i64>* %1, align 8
   ; NONEON-NOVFP-NOT: vld1.64
   ; NONEON-NOT: vld1.64
   %add = add <2 x i64> %wide.load, %wide.load
diff --git a/llvm/test/CodeGen/ARM/no-tail-call.ll b/llvm/test/CodeGen/ARM/no-tail-call.ll
index b65edc299be..5a5d43c2871 100644
--- a/llvm/test/CodeGen/ARM/no-tail-call.ll
+++ b/llvm/test/CodeGen/ARM/no-tail-call.ll
@@ -38,7 +38,7 @@ entry:
   %5 = call float @llvm.ceil.f32(float 5.000000e+00)
   %.native3 = getelementptr inbounds %foo, %foo* %1, i32 0, i32 0
   %.native3.value = getelementptr inbounds %Sf, %Sf* %.native3, i32 0, i32 0
-  %6 = load float* %.native3.value, align 4
+  %6 = load float, float* %.native3.value, align 4
   %7 = call float @llvm.ceil.f32(float %6)
   %8 = insertvalue { float, float, float } { float 0.000000e+00, float undef, float undef }, float %5, 1
   %9 = insertvalue { float, float, float } %8, float %7, 2
@@ -76,7 +76,7 @@ entry:
   %5 = call float @llvm.ceil.f32(float 5.000000e+00)
   %.native3 = getelementptr inbounds %foo, %foo* %1, i32 0, i32 0
   %.native3.value = getelementptr inbounds %Sf, %Sf* %.native3, i32 0, i32 0
-  %6 = load float* %.native3.value, align 4
+  %6 = load float, float* %.native3.value, align 4
   %7 = call float @llvm.ceil.f32(float %6)
   %8 = insertvalue { float, float } { float 0.000000e+00, float undef }, float %7, 1
   ret { float, float } %8
diff --git a/llvm/test/CodeGen/ARM/none-macho.ll b/llvm/test/CodeGen/ARM/none-macho.ll
index 2a7878fee30..23555b3d22c 100644
--- a/llvm/test/CodeGen/ARM/none-macho.ll
+++ b/llvm/test/CodeGen/ARM/none-macho.ll
@@ -11,7 +11,7 @@
 
 define i32 @test_litpool() minsize {
 ; CHECK-LABEL: test_litpool:
-  %val = load i32* @var
+  %val = load i32, i32* @var
   ret i32 %val
 
   ; Lit-pool entries need to produce a "$non_lazy_ptr" version of the symbol.
@@ -21,7 +21,7 @@ define i32 @test_litpool() minsize {
 
 define i32 @test_movw_movt() {
 ; CHECK-LABEL: test_movw_movt:
-  %val = load i32* @var
+  %val = load i32, i32* @var
   ret i32 %val
 
   ; movw/movt should also address their symbols MachO-style
@@ -56,7 +56,7 @@ define i32 @test_frame_ptr() {
 %big_arr = type [8 x i32]
 define void @test_two_areas(%big_arr* %addr) {
 ; CHECK-LABEL: test_two_areas:
-  %val = load %big_arr* %addr
+  %val = load %big_arr, %big_arr* %addr
   call void @test_trap()
   store %big_arr %val, %big_arr* %addr
 
diff --git a/llvm/test/CodeGen/ARM/nop_concat_vectors.ll b/llvm/test/CodeGen/ARM/nop_concat_vectors.ll
index c81090095a9..fa0e892f9e2 100644
--- a/llvm/test/CodeGen/ARM/nop_concat_vectors.ll
+++ b/llvm/test/CodeGen/ARM/nop_concat_vectors.ll
@@ -5,7 +5,7 @@
 ;CHECK-NOT: vst1.32
 ;CHECK: bx
 define void @foo(<16 x i8>* %J) {
-  %A = load <16 x i8>* %J
+  %A = load <16 x i8>, <16 x i8>* %J
   %T1 = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   %T2 = shufflevector <8 x i8>  %T1, <8 x i8> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   store <16 x i8> %T2, <16 x i8>* %J
diff --git a/llvm/test/CodeGen/ARM/optselect-regclass.ll b/llvm/test/CodeGen/ARM/optselect-regclass.ll
index 0acb2f2f0ec..5935a4f109b 100644
--- a/llvm/test/CodeGen/ARM/optselect-regclass.ll
+++ b/llvm/test/CodeGen/ARM/optselect-regclass.ll
@@ -9,7 +9,7 @@
 ; Function Attrs: nounwind ssp
 define void @xfr() {
 entry:
-  %bf.load4 = load i32* getelementptr inbounds (%union.opcode.0.2.5.8.15.28* @opcode, i32 0, i32 0), align 4
+  %bf.load4 = load i32, i32* getelementptr inbounds (%union.opcode.0.2.5.8.15.28* @opcode, i32 0, i32 0), align 4
   %bf.clear10 = and i32 %bf.load4, 65535
   %and11 = and i32 %bf.load4, 32768
   %tobool12 = icmp ne i32 %and11, 0
diff --git a/llvm/test/CodeGen/ARM/phi.ll b/llvm/test/CodeGen/ARM/phi.ll
index 3ac0f5246ef..ff85052175c 100644
--- a/llvm/test/CodeGen/ARM/phi.ll
+++ b/llvm/test/CodeGen/ARM/phi.ll
@@ -19,7 +19,7 @@ rblock:
 end:
 ; CHECK: ldr	r0, [r1, #4]
   %gep = phi i32* [%lbranch, %lblock], [%rbranch, %rblock]
-  %r = load i32* %gep
+  %r = load i32, i32* %gep
 ; CHECK-NEXT: bx	lr
   ret i32 %r
 }
diff --git a/llvm/test/CodeGen/ARM/popcnt.ll b/llvm/test/CodeGen/ARM/popcnt.ll
index 7ace640c6b6..74f90640ca6 100644
--- a/llvm/test/CodeGen/ARM/popcnt.ll
+++ b/llvm/test/CodeGen/ARM/popcnt.ll
@@ -4,7 +4,7 @@
 define <8 x i8> @vcnt8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vcnt8:
 ;CHECK: vcnt.8 {{d[0-9]+}}, {{d[0-9]+}}
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %tmp1)
 	ret <8 x i8> %tmp2
 }
@@ -12,7 +12,7 @@ define <8 x i8> @vcnt8(<8 x i8>* %A) nounwind {
 define <16 x i8> @vcntQ8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: vcntQ8:
 ;CHECK: vcnt.8 {{q[0-9]+}}, {{q[0-9]+}}
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %tmp1)
 	ret <16 x i8> %tmp2
 }
@@ -24,7 +24,7 @@ define <4 x i16> @vcnt16(<4 x i16>* %A) nounwind {
 ; CHECK: vadd.i8 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
 ; CHECK: vuzp.8 {{d[0-9]+}}, {{d[0-9]+}}
 ; CHECK: vmovl.u8 {{q[0-9]+}}, {{d[0-9]+}}
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> %tmp1)
 	ret <4 x i16> %tmp2
 }
@@ -36,7 +36,7 @@ define <8 x i16> @vcntQ16(<8 x i16>* %A) nounwind {
 ; CHECK: vadd.i8 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
 ; CHECK: vuzp.8 {{q[0-9]+}}, {{q[0-9]+}}
 ; CHECK: vmovl.u8 {{q[0-9]+}}, {{d[0-9]+}}
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %tmp1)
 	ret <8 x i16> %tmp2
 }
@@ -51,7 +51,7 @@ define <2 x i32> @vcnt32(<2 x i32>* %A) nounwind {
 ; CHECK: vrev32.16 {{d[0-9]+}}, {{d[0-9]+}}
 ; CHECK: vuzp.16 {{d[0-9]+}}, {{d[0-9]+}}
 ; CHECK: vmovl.u16 {{q[0-9]+}}, {{d[0-9]+}}
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %tmp1)
 	ret <2 x i32> %tmp2
 }
@@ -66,7 +66,7 @@ define <4 x i32> @vcntQ32(<4 x i32>* %A) nounwind {
 ; CHECK: vrev32.16 {{q[0-9]+}}, {{q[0-9]+}}
 ; CHECK: vuzp.16 {{q[0-9]+}}, {{q[0-9]+}}
 ; CHECK: vmovl.u16 {{q[0-9]+}}, {{d[0-9]+}}
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %tmp1)
 	ret <4 x i32> %tmp2
 }
@@ -81,7 +81,7 @@ declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) nounwind readnone
 define <8 x i8> @vclz8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vclz8:
 ;CHECK: vclz.i8 {{d[0-9]+}}, {{d[0-9]+}}
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %tmp1, i1 0)
 	ret <8 x i8> %tmp2
 }
@@ -89,7 +89,7 @@ define <8 x i8> @vclz8(<8 x i8>* %A) nounwind {
 define <4 x i16> @vclz16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vclz16:
 ;CHECK: vclz.i16 {{d[0-9]+}}, {{d[0-9]+}}
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %tmp1, i1 0)
 	ret <4 x i16> %tmp2
 }
@@ -97,7 +97,7 @@ define <4 x i16> @vclz16(<4 x i16>* %A) nounwind {
 define <2 x i32> @vclz32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vclz32:
 ;CHECK: vclz.i32 {{d[0-9]+}}, {{d[0-9]+}}
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %tmp1, i1 0)
 	ret <2 x i32> %tmp2
 }
@@ -105,7 +105,7 @@ define <2 x i32> @vclz32(<2 x i32>* %A) nounwind {
 define <16 x i8> @vclzQ8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: vclzQ8:
 ;CHECK: vclz.i8 {{q[0-9]+}}, {{q[0-9]+}}
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %tmp1, i1 0)
 	ret <16 x i8> %tmp2
 }
@@ -113,7 +113,7 @@ define <16 x i8> @vclzQ8(<16 x i8>* %A) nounwind {
 define <8 x i16> @vclzQ16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vclzQ16:
 ;CHECK: vclz.i16 {{q[0-9]+}}, {{q[0-9]+}}
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %tmp1, i1 0)
 	ret <8 x i16> %tmp2
 }
@@ -121,7 +121,7 @@ define <8 x i16> @vclzQ16(<8 x i16>* %A) nounwind {
 define <4 x i32> @vclzQ32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vclzQ32:
 ;CHECK: vclz.i32 {{q[0-9]+}}, {{q[0-9]+}}
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %tmp1, i1 0)
 	ret <4 x i32> %tmp2
 }
@@ -137,7 +137,7 @@ declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) nounwind readnone
 define <8 x i8> @vclss8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vclss8:
 ;CHECK: vcls.s8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> %tmp1)
 	ret <8 x i8> %tmp2
 }
@@ -145,7 +145,7 @@ define <8 x i8> @vclss8(<8 x i8>* %A) nounwind {
 define <4 x i16> @vclss16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vclss16:
 ;CHECK: vcls.s16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> %tmp1)
 	ret <4 x i16> %tmp2
 }
@@ -153,7 +153,7 @@ define <4 x i16> @vclss16(<4 x i16>* %A) nounwind {
 define <2 x i32> @vclss32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vclss32:
 ;CHECK: vcls.s32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> %tmp1)
 	ret <2 x i32> %tmp2
 }
@@ -161,7 +161,7 @@ define <2 x i32> @vclss32(<2 x i32>* %A) nounwind {
 define <16 x i8> @vclsQs8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: vclsQs8:
 ;CHECK: vcls.s8
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> %tmp1)
 	ret <16 x i8> %tmp2
 }
@@ -169,7 +169,7 @@ define <16 x i8> @vclsQs8(<16 x i8>* %A) nounwind {
 define <8 x i16> @vclsQs16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vclsQs16:
 ;CHECK: vcls.s16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> %tmp1)
 	ret <8 x i16> %tmp2
 }
@@ -177,7 +177,7 @@ define <8 x i16> @vclsQs16(<8 x i16>* %A) nounwind {
 define <4 x i32> @vclsQs32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vclsQs32:
 ;CHECK: vcls.s32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> %tmp1)
 	ret <4 x i32> %tmp2
 }
diff --git a/llvm/test/CodeGen/ARM/pr13249.ll b/llvm/test/CodeGen/ARM/pr13249.ll
index 54430e6b0e7..cede6007ba1 100644
--- a/llvm/test/CodeGen/ARM/pr13249.ll
+++ b/llvm/test/CodeGen/ARM/pr13249.ll
@@ -6,7 +6,7 @@ bb:
 
 bb3:                                              ; preds = %bb3, %bb
   %tmp = phi i8* [ %tmp5, %bb3 ], [ %arg, %bb ]
-  %tmp4 = load i8* %tmp, align 1
+  %tmp4 = load i8, i8* %tmp, align 1
   %tmp5 = getelementptr inbounds i8, i8* %tmp, i32 1
   br i1 undef, label %bb3, label %bb7
 
@@ -18,7 +18,7 @@ bb7:                                              ; preds = %bb13, %bb3
   br i1 %tmp10, label %bb13, label %bb15
 
 bb13:                                             ; preds = %bb7
-  %tmp14 = load i8* %tmp12, align 1
+  %tmp14 = load i8, i8* %tmp12, align 1
   br label %bb7
 
 bb15:                                             ; preds = %bb7
diff --git a/llvm/test/CodeGen/ARM/pr18364-movw.ll b/llvm/test/CodeGen/ARM/pr18364-movw.ll
index fdcf15485f1..b783522c42b 100644
--- a/llvm/test/CodeGen/ARM/pr18364-movw.ll
+++ b/llvm/test/CodeGen/ARM/pr18364-movw.ll
@@ -14,8 +14,8 @@ entry:
   %z = alloca i64, align 8
   store i64 1, i64* %y, align 8
   store i64 11579764786944, i64* %z, align 8
-  %0 = load i64* %y, align 8
-  %1 = load i64* %z, align 8
+  %0 = load i64, i64* %y, align 8
+  %1 = load i64, i64* %z, align 8
   %sub = sub i64 %0, %1
   ret i64 %sub
 }
diff --git a/llvm/test/CodeGen/ARM/pr3502.ll b/llvm/test/CodeGen/ARM/pr3502.ll
index 606d9698b97..4ec982ebea2 100644
--- a/llvm/test/CodeGen/ARM/pr3502.ll
+++ b/llvm/test/CodeGen/ARM/pr3502.ll
@@ -11,7 +11,7 @@ define void @SomeCall(i32 %num) nounwind {
 entry:
 	tail call void asm sideeffect "mcr p15, 0, $0, c7, c10, 4 \0A\09", "r,~{memory}"(i32 0) nounwind
 	tail call void asm sideeffect "mcr p15,0,$0,c7,c14,0", "r,~{memory}"(i32 0) nounwind
-	%0 = load %struct.SHARED_AREA** null, align 4		; <%struct.SHARED_AREA*> [#uses=1]
+	%0 = load %struct.SHARED_AREA*, %struct.SHARED_AREA** null, align 4		; <%struct.SHARED_AREA*> [#uses=1]
 	%1 = ptrtoint %struct.SHARED_AREA* %0 to i32		; <i32> [#uses=1]
 	%2 = lshr i32 %1, 20		; <i32> [#uses=1]
 	%3 = tail call i32 @SetCurrEntry(i32 %2, i32 0) nounwind		; <i32> [#uses=0]
diff --git a/llvm/test/CodeGen/ARM/private.ll b/llvm/test/CodeGen/ARM/private.ll
index e48c292db46..fab8f37f989 100644
--- a/llvm/test/CodeGen/ARM/private.ll
+++ b/llvm/test/CodeGen/ARM/private.ll
@@ -15,7 +15,7 @@ define private void @foo() {
 
 define i32 @bar() {
         call void @foo()
-	%1 = load i32* @baz, align 4
+	%1 = load i32, i32* @baz, align 4
         ret i32 %1
 }
 
diff --git a/llvm/test/CodeGen/ARM/reg_sequence.ll b/llvm/test/CodeGen/ARM/reg_sequence.ll
index a9fdd7e167b..507ee48cafc 100644
--- a/llvm/test/CodeGen/ARM/reg_sequence.ll
+++ b/llvm/test/CodeGen/ARM/reg_sequence.ll
@@ -20,9 +20,9 @@ entry:
 ; CHECK-NOT:    vmov d
 ; CHECK-NEXT:   vst1.16
   %0 = getelementptr inbounds %struct.int32x4_t, %struct.int32x4_t* %vT0ptr, i32 0, i32 0 ; <<4 x i32>*> [#uses=1]
-  %1 = load <4 x i32>* %0, align 16               ; <<4 x i32>> [#uses=1]
+  %1 = load <4 x i32>, <4 x i32>* %0, align 16               ; <<4 x i32>> [#uses=1]
   %2 = getelementptr inbounds %struct.int32x4_t, %struct.int32x4_t* %vT1ptr, i32 0, i32 0 ; <<4 x i32>*> [#uses=1]
-  %3 = load <4 x i32>* %2, align 16               ; <<4 x i32>> [#uses=1]
+  %3 = load <4 x i32>, <4 x i32>* %2, align 16               ; <<4 x i32>> [#uses=1]
   %4 = bitcast i16* %i_ptr to i8*                 ; <i8*> [#uses=1]
   %5 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %4, i32 1) ; <<8 x i16>> [#uses=1]
   %6 = bitcast <8 x i16> %5 to <2 x double>       ; <<2 x double>> [#uses=2]
@@ -56,9 +56,9 @@ entry:
 ; CHECK:        vst1.16
 ; CHECK:        vst1.16
   %0 = getelementptr inbounds %struct.int16x8_t, %struct.int16x8_t* %vT0ptr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
-  %1 = load <8 x i16>* %0, align 16               ; <<8 x i16>> [#uses=1]
+  %1 = load <8 x i16>, <8 x i16>* %0, align 16               ; <<8 x i16>> [#uses=1]
   %2 = getelementptr inbounds %struct.int16x8_t, %struct.int16x8_t* %vT1ptr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
-  %3 = load <8 x i16>* %2, align 16               ; <<8 x i16>> [#uses=1]
+  %3 = load <8 x i16>, <8 x i16>* %2, align 16               ; <<8 x i16>> [#uses=1]
   %4 = bitcast i16* %i_ptr to i8*                 ; <i8*> [#uses=1]
   %5 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %4, i32 1) ; <<8 x i16>> [#uses=1]
   %6 = getelementptr inbounds i16, i16* %i_ptr, i32 8  ; <i16*> [#uses=1]
@@ -146,7 +146,7 @@ define <8 x i16> @t5(i16* %A, <8 x i16>* %B) nounwind {
 ; CHECK-NOT:    vmov
 ; CHECK:        vadd.i16
   %tmp0 = bitcast i16* %A to i8*                  ; <i8*> [#uses=1]
-  %tmp1 = load <8 x i16>* %B                      ; <<8 x i16>> [#uses=2]
+  %tmp1 = load <8 x i16>, <8 x i16>* %B                      ; <<8 x i16>> [#uses=2]
   %tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 1) ; <%struct.__neon_int16x8x2_t> [#uses=2]
   %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 0 ; <<8 x i16>> [#uses=1]
   %tmp4 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 1 ; <<8 x i16>> [#uses=1]
@@ -159,7 +159,7 @@ define <8 x i8> @t6(i8* %A, <8 x i8>* %B) nounwind {
 ; CHECK:        vldr
 ; CHECK:        vorr d[[D0:[0-9]+]], d[[D1:[0-9]+]]
 ; CHECK-NEXT:   vld2.8 {d[[D1]][1], d[[D0]][1]}
-  %tmp1 = load <8 x i8>* %B                       ; <<8 x i8>> [#uses=2]
+  %tmp1 = load <8 x i8>, <8 x i8>* %B                       ; <<8 x i8>> [#uses=2]
   %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1) ; <%struct.__neon_int8x8x2_t> [#uses=2]
   %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0 ; <<8 x i8>> [#uses=1]
   %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1 ; <<8 x i8>> [#uses=1]
diff --git a/llvm/test/CodeGen/ARM/saxpy10-a9.ll b/llvm/test/CodeGen/ARM/saxpy10-a9.ll
index 26c6954f762..531b02bbaac 100644
--- a/llvm/test/CodeGen/ARM/saxpy10-a9.ll
+++ b/llvm/test/CodeGen/ARM/saxpy10-a9.ll
@@ -63,72 +63,72 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-
 ; This accumulates a sum rather than storing each result.
 define float @saxpy10(float* nocapture readonly %data1, float* nocapture readonly %data2, float %a) {
 entry:
-  %0 = load float* %data1, align 4
+  %0 = load float, float* %data1, align 4
   %mul = fmul float %0, %a
-  %1 = load float* %data2, align 4
+  %1 = load float, float* %data2, align 4
   %add = fadd float %mul, %1
   %add2 = fadd float %add, 0.000000e+00
   %arrayidx.1 = getelementptr inbounds float, float* %data1, i32 1
-  %2 = load float* %arrayidx.1, align 4
+  %2 = load float, float* %arrayidx.1, align 4
   %mul.1 = fmul float %2, %a
   %arrayidx1.1 = getelementptr inbounds float, float* %data2, i32 1
-  %3 = load float* %arrayidx1.1, align 4
+  %3 = load float, float* %arrayidx1.1, align 4
   %add.1 = fadd float %mul.1, %3
   %add2.1 = fadd float %add2, %add.1
   %arrayidx.2 = getelementptr inbounds float, float* %data1, i32 2
-  %4 = load float* %arrayidx.2, align 4
+  %4 = load float, float* %arrayidx.2, align 4
   %mul.2 = fmul float %4, %a
   %arrayidx1.2 = getelementptr inbounds float, float* %data2, i32 2
-  %5 = load float* %arrayidx1.2, align 4
+  %5 = load float, float* %arrayidx1.2, align 4
   %add.2 = fadd float %mul.2, %5
   %add2.2 = fadd float %add2.1, %add.2
   %arrayidx.3 = getelementptr inbounds float, float* %data1, i32 3
-  %6 = load float* %arrayidx.3, align 4
+  %6 = load float, float* %arrayidx.3, align 4
   %mul.3 = fmul float %6, %a
   %arrayidx1.3 = getelementptr inbounds float, float* %data2, i32 3
-  %7 = load float* %arrayidx1.3, align 4
+  %7 = load float, float* %arrayidx1.3, align 4
   %add.3 = fadd float %mul.3, %7
   %add2.3 = fadd float %add2.2, %add.3
   %arrayidx.4 = getelementptr inbounds float, float* %data1, i32 4
-  %8 = load float* %arrayidx.4, align 4
+  %8 = load float, float* %arrayidx.4, align 4
   %mul.4 = fmul float %8, %a
   %arrayidx1.4 = getelementptr inbounds float, float* %data2, i32 4
-  %9 = load float* %arrayidx1.4, align 4
+  %9 = load float, float* %arrayidx1.4, align 4
   %add.4 = fadd float %mul.4, %9
   %add2.4 = fadd float %add2.3, %add.4
   %arrayidx.5 = getelementptr inbounds float, float* %data1, i32 5
-  %10 = load float* %arrayidx.5, align 4
+  %10 = load float, float* %arrayidx.5, align 4
   %mul.5 = fmul float %10, %a
   %arrayidx1.5 = getelementptr inbounds float, float* %data2, i32 5
-  %11 = load float* %arrayidx1.5, align 4
+  %11 = load float, float* %arrayidx1.5, align 4
   %add.5 = fadd float %mul.5, %11
   %add2.5 = fadd float %add2.4, %add.5
   %arrayidx.6 = getelementptr inbounds float, float* %data1, i32 6
-  %12 = load float* %arrayidx.6, align 4
+  %12 = load float, float* %arrayidx.6, align 4
   %mul.6 = fmul float %12, %a
   %arrayidx1.6 = getelementptr inbounds float, float* %data2, i32 6
-  %13 = load float* %arrayidx1.6, align 4
+  %13 = load float, float* %arrayidx1.6, align 4
   %add.6 = fadd float %mul.6, %13
   %add2.6 = fadd float %add2.5, %add.6
   %arrayidx.7 = getelementptr inbounds float, float* %data1, i32 7
-  %14 = load float* %arrayidx.7, align 4
+  %14 = load float, float* %arrayidx.7, align 4
   %mul.7 = fmul float %14, %a
   %arrayidx1.7 = getelementptr inbounds float, float* %data2, i32 7
-  %15 = load float* %arrayidx1.7, align 4
+  %15 = load float, float* %arrayidx1.7, align 4
   %add.7 = fadd float %mul.7, %15
   %add2.7 = fadd float %add2.6, %add.7
   %arrayidx.8 = getelementptr inbounds float, float* %data1, i32 8
-  %16 = load float* %arrayidx.8, align 4
+  %16 = load float, float* %arrayidx.8, align 4
   %mul.8 = fmul float %16, %a
   %arrayidx1.8 = getelementptr inbounds float, float* %data2, i32 8
-  %17 = load float* %arrayidx1.8, align 4
+  %17 = load float, float* %arrayidx1.8, align 4
   %add.8 = fadd float %mul.8, %17
   %add2.8 = fadd float %add2.7, %add.8
   %arrayidx.9 = getelementptr inbounds float, float* %data1, i32 9
-  %18 = load float* %arrayidx.9, align 4
+  %18 = load float, float* %arrayidx.9, align 4
   %mul.9 = fmul float %18, %a
   %arrayidx1.9 = getelementptr inbounds float, float* %data2, i32 9
-  %19 = load float* %arrayidx1.9, align 4
+  %19 = load float, float* %arrayidx1.9, align 4
   %add.9 = fadd float %mul.9, %19
   %add2.9 = fadd float %add2.8, %add.9
   ret float %add2.9
diff --git a/llvm/test/CodeGen/ARM/segmented-stacks.ll b/llvm/test/CodeGen/ARM/segmented-stacks.ll
index 9873bf33294..cbb124de11c 100644
--- a/llvm/test/CodeGen/ARM/segmented-stacks.ll
+++ b/llvm/test/CodeGen/ARM/segmented-stacks.ll
@@ -55,7 +55,7 @@ define void @test_basic() #0 {
 }
 
 define i32 @test_nested(i32 * nest %closure, i32 %other) #0 {
-       %addend = load i32 * %closure
+       %addend = load i32 , i32 * %closure
        %result = add i32 %other, %addend
        %mem = alloca i32, i32 10
        call void @dummy_use (i32* %mem, i32 10)
diff --git a/llvm/test/CodeGen/ARM/select_xform.ll b/llvm/test/CodeGen/ARM/select_xform.ll
index 326eb512d85..460ca8f1840 100644
--- a/llvm/test/CodeGen/ARM/select_xform.ll
+++ b/llvm/test/CodeGen/ARM/select_xform.ll
@@ -183,7 +183,7 @@ define i32 @t12(i32 %a, i32 %b) nounwind {
 ; Handle frame index operands.
 define void @pr13628() nounwind uwtable align 2 {
   %x3 = alloca i8, i32 256, align 8
-  %x4 = load i8* undef, align 1
+  %x4 = load i8, i8* undef, align 1
   %x5 = icmp ne i8 %x4, 0
   %x6 = select i1 %x5, i8* %x3, i8* null
   call void @bar(i8* %x6) nounwind
diff --git a/llvm/test/CodeGen/ARM/shifter_operand.ll b/llvm/test/CodeGen/ARM/shifter_operand.ll
index fd09d822d65..3999168de6b 100644
--- a/llvm/test/CodeGen/ARM/shifter_operand.ll
+++ b/llvm/test/CodeGen/ARM/shifter_operand.ll
@@ -43,8 +43,8 @@ entry:
         %tmp3 = inttoptr i32 %tmp2 to i32*
         %tmp4 = add i32 %base2, %tmp1
         %tmp5 = inttoptr i32 %tmp4 to i32*
-        %tmp6 = load i32* %tmp3
-        %tmp7 = load i32* %tmp5
+        %tmp6 = load i32, i32* %tmp3
+        %tmp7 = load i32, i32* %tmp5
         %tmp8 = add i32 %tmp7, %tmp6
         ret i32 %tmp8
 }
@@ -68,7 +68,7 @@ entry:
   %1 = bitcast i8* %0 to i32*
   %2 = sext i16 %addr to i32
   %3 = getelementptr inbounds i32, i32* %1, i32 %2
-  %4 = load i32* %3, align 4
+  %4 = load i32, i32* %3, align 4
   %5 = add nsw i32 %4, 1
   store i32 %5, i32* %3, align 4
   ret void
diff --git a/llvm/test/CodeGen/ARM/smul.ll b/llvm/test/CodeGen/ARM/smul.ll
index b7ddd10a568..13873f511e1 100644
--- a/llvm/test/CodeGen/ARM/smul.ll
+++ b/llvm/test/CodeGen/ARM/smul.ll
@@ -7,7 +7,7 @@
 define i32 @f1(i32 %y) {
 ; CHECK: f1
 ; CHECK: smulbt
-        %tmp = load i16* @x             ; <i16> [#uses=1]
+        %tmp = load i16, i16* @x             ; <i16> [#uses=1]
         %tmp1 = add i16 %tmp, 2         ; <i16> [#uses=1]
         %tmp2 = sext i16 %tmp1 to i32           ; <i32> [#uses=1]
         %tmp3 = ashr i32 %y, 16         ; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/ARM/space-directive.ll b/llvm/test/CodeGen/ARM/space-directive.ll
index 55be1991fe8..24f0d0aadf6 100644
--- a/llvm/test/CodeGen/ARM/space-directive.ll
+++ b/llvm/test/CodeGen/ARM/space-directive.ll
@@ -11,7 +11,7 @@ define i32 @test_space() minsize {
 ; CHECK: [[PAST_CP]]:
 ; CHECK: .zero 10000
   %addr = inttoptr i32 12345678 to i32*
-  %val = load i32* %addr
+  %val = load i32, i32* %addr
   call i32 @llvm.arm.space(i32 10000, i32 undef)
   ret i32 %val
 }
diff --git a/llvm/test/CodeGen/ARM/spill-q.ll b/llvm/test/CodeGen/ARM/spill-q.ll
index 425fc12755c..1a102e3d971 100644
--- a/llvm/test/CodeGen/ARM/spill-q.ll
+++ b/llvm/test/CodeGen/ARM/spill-q.ll
@@ -43,7 +43,7 @@ entry:
   store float 0.000000e+00, float* undef, align 4
   %ld12 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
   store float 0.000000e+00, float* undef, align 4
-  %val173 = load <4 x float>* undef               ; <<4 x float>> [#uses=1]
+  %val173 = load <4 x float>, <4 x float>* undef               ; <<4 x float>> [#uses=1]
   br label %bb4
 
 bb4:                                              ; preds = %bb193, %entry
diff --git a/llvm/test/CodeGen/ARM/ssp-data-layout.ll b/llvm/test/CodeGen/ARM/ssp-data-layout.ll
index 1dc0a93f65e..516cc2bdc27 100644
--- a/llvm/test/CodeGen/ARM/ssp-data-layout.ll
+++ b/llvm/test/CodeGen/ARM/ssp-data-layout.ll
@@ -149,18 +149,18 @@ entry:
   %arraydecay22 = getelementptr inbounds [2 x i8], [2 x i8]* %small, i32 0, i32 0
   %arraydecay23 = getelementptr inbounds [8 x i32], [8 x i32]* %large2, i32 0, i32 0
   %arraydecay24 = getelementptr inbounds [2 x i16], [2 x i16]* %small2, i32 0, i32 0
-  %0 = load i32* %x, align 4
-  %1 = load i32* %y, align 4
-  %2 = load i32* %z, align 4
+  %0 = load i32, i32* %x, align 4
+  %1 = load i32, i32* %y, align 4
+  %2 = load i32, i32* %z, align 4
   %coerce.dive = getelementptr %struct.struct_large_char, %struct.struct_large_char* %a, i32 0, i32 0
   %3 = bitcast [8 x i8]* %coerce.dive to i64*
-  %4 = load i64* %3, align 1
+  %4 = load i64, i64* %3, align 1
   %coerce.dive25 = getelementptr %struct.struct_small_char, %struct.struct_small_char* %b, i32 0, i32 0
   %5 = bitcast [2 x i8]* %coerce.dive25 to i16*
-  %6 = load i16* %5, align 1
+  %6 = load i16, i16* %5, align 1
   %coerce.dive26 = getelementptr %struct.struct_small_nonchar, %struct.struct_small_nonchar* %d, i32 0, i32 0
   %7 = bitcast [2 x i16]* %coerce.dive26 to i32*
-  %8 = load i32* %7, align 1
+  %8 = load i32, i32* %7, align 1
   call void @takes_all(i64 %4, i16 %6, %struct.struct_large_nonchar* byval align 8 %c, i32 %8, i8* %arraydecay, i8* %arraydecay22, i32* %arraydecay23, i16* %arraydecay24, i32* %ptr, i32 %0, i32 %1, i32 %2)
   ret void
 }
@@ -296,18 +296,18 @@ entry:
   %arraydecay22 = getelementptr inbounds [2 x i8], [2 x i8]* %small, i32 0, i32 0
   %arraydecay23 = getelementptr inbounds [8 x i32], [8 x i32]* %large2, i32 0, i32 0
   %arraydecay24 = getelementptr inbounds [2 x i16], [2 x i16]* %small2, i32 0, i32 0
-  %0 = load i32* %x, align 4
-  %1 = load i32* %y, align 4
-  %2 = load i32* %z, align 4
+  %0 = load i32, i32* %x, align 4
+  %1 = load i32, i32* %y, align 4
+  %2 = load i32, i32* %z, align 4
   %coerce.dive = getelementptr %struct.struct_large_char, %struct.struct_large_char* %a, i32 0, i32 0
   %3 = bitcast [8 x i8]* %coerce.dive to i64*
-  %4 = load i64* %3, align 1
+  %4 = load i64, i64* %3, align 1
   %coerce.dive25 = getelementptr %struct.struct_small_char, %struct.struct_small_char* %b, i32 0, i32 0
   %5 = bitcast [2 x i8]* %coerce.dive25 to i16*
-  %6 = load i16* %5, align 1
+  %6 = load i16, i16* %5, align 1
   %coerce.dive26 = getelementptr %struct.struct_small_nonchar, %struct.struct_small_nonchar* %d, i32 0, i32 0
   %7 = bitcast [2 x i16]* %coerce.dive26 to i32*
-  %8 = load i32* %7, align 1
+  %8 = load i32, i32* %7, align 1
   call void @takes_all(i64 %4, i16 %6, %struct.struct_large_nonchar* byval align 8 %c, i32 %8, i8* %arraydecay, i8* %arraydecay22, i32* %arraydecay23, i16* %arraydecay24, i32* %ptr, i32 %0, i32 %1, i32 %2)
   ret void
 }
@@ -431,18 +431,18 @@ entry:
   %arraydecay22 = getelementptr inbounds [2 x i8], [2 x i8]* %small, i32 0, i32 0
   %arraydecay23 = getelementptr inbounds [8 x i32], [8 x i32]* %large2, i32 0, i32 0
   %arraydecay24 = getelementptr inbounds [2 x i16], [2 x i16]* %small2, i32 0, i32 0
-  %0 = load i32* %x, align 4
-  %1 = load i32* %y, align 4
-  %2 = load i32* %z, align 4
+  %0 = load i32, i32* %x, align 4
+  %1 = load i32, i32* %y, align 4
+  %2 = load i32, i32* %z, align 4
   %coerce.dive = getelementptr %struct.struct_large_char, %struct.struct_large_char* %a, i32 0, i32 0
   %3 = bitcast [8 x i8]* %coerce.dive to i64*
-  %4 = load i64* %3, align 1
+  %4 = load i64, i64* %3, align 1
   %coerce.dive25 = getelementptr %struct.struct_small_char, %struct.struct_small_char* %b, i32 0, i32 0
   %5 = bitcast [2 x i8]* %coerce.dive25 to i16*
-  %6 = load i16* %5, align 1
+  %6 = load i16, i16* %5, align 1
   %coerce.dive26 = getelementptr %struct.struct_small_nonchar, %struct.struct_small_nonchar* %d, i32 0, i32 0
   %7 = bitcast [2 x i16]* %coerce.dive26 to i32*
-  %8 = load i32* %7, align 1
+  %8 = load i32, i32* %7, align 1
   call void @takes_all(i64 %4, i16 %6, %struct.struct_large_nonchar* byval align 8 %c, i32 %8, i8* %arraydecay, i8* %arraydecay22, i32* %arraydecay23, i16* %arraydecay24, i32* %ptr, i32 %0, i32 %1, i32 %2)
   ret void
 }
@@ -475,13 +475,13 @@ entry:
   %0 = bitcast %struct.struct_large_char2* %b to %struct.struct_large_char*
   %coerce.dive = getelementptr %struct.struct_large_char, %struct.struct_large_char* %0, i32 0, i32 0
   %1 = bitcast [8 x i8]* %coerce.dive to i64*
-  %2 = load i64* %1, align 1
+  %2 = load i64, i64* %1, align 1
   %coerce.dive4 = getelementptr %struct.struct_small_char, %struct.struct_small_char* %a, i32 0, i32 0
   %3 = bitcast [2 x i8]* %coerce.dive4 to i16*
-  %4 = load i16* %3, align 1
+  %4 = load i16, i16* %3, align 1
   %coerce.dive5 = getelementptr %struct.struct_small_nonchar, %struct.struct_small_nonchar* %d2, i32 0, i32 0
   %5 = bitcast [2 x i16]* %coerce.dive5 to i32*
-  %6 = load i32* %5, align 1
+  %6 = load i32, i32* %5, align 1
   call void @takes_all(i64 %2, i16 %4, %struct.struct_large_nonchar* byval align 8 %d1, i32 %6, i8* null, i8* null, i32* null, i16* null, i32* null, i32 0, i32 0, i32 0)
   ret void
 }
diff --git a/llvm/test/CodeGen/ARM/stack-alignment.ll b/llvm/test/CodeGen/ARM/stack-alignment.ll
index 2decd2a05aa..ac14a5959d1 100644
--- a/llvm/test/CodeGen/ARM/stack-alignment.ll
+++ b/llvm/test/CodeGen/ARM/stack-alignment.ll
@@ -40,65 +40,65 @@ entry:
 ; CHECK-THUMB2:      bfc	r4, #0, #12
 ; CHECK-THUMB2-NEXT: mov	sp, r4
   %a = alloca i8, align 4096
-  %0 = load double* %d, align 4
+  %0 = load double, double* %d, align 4
   %arrayidx1 = getelementptr inbounds double, double* %d, i32 1
-  %1 = load double* %arrayidx1, align 4
+  %1 = load double, double* %arrayidx1, align 4
   %arrayidx2 = getelementptr inbounds double, double* %d, i32 2
-  %2 = load double* %arrayidx2, align 4
+  %2 = load double, double* %arrayidx2, align 4
   %arrayidx3 = getelementptr inbounds double, double* %d, i32 3
-  %3 = load double* %arrayidx3, align 4
+  %3 = load double, double* %arrayidx3, align 4
   %arrayidx4 = getelementptr inbounds double, double* %d, i32 4
-  %4 = load double* %arrayidx4, align 4
+  %4 = load double, double* %arrayidx4, align 4
   %arrayidx5 = getelementptr inbounds double, double* %d, i32 5
-  %5 = load double* %arrayidx5, align 4
+  %5 = load double, double* %arrayidx5, align 4
   %arrayidx6 = getelementptr inbounds double, double* %d, i32 6
-  %6 = load double* %arrayidx6, align 4
+  %6 = load double, double* %arrayidx6, align 4
   %arrayidx7 = getelementptr inbounds double, double* %d, i32 7
-  %7 = load double* %arrayidx7, align 4
+  %7 = load double, double* %arrayidx7, align 4
   %arrayidx8 = getelementptr inbounds double, double* %d, i32 8
-  %8 = load double* %arrayidx8, align 4
+  %8 = load double, double* %arrayidx8, align 4
   %arrayidx9 = getelementptr inbounds double, double* %d, i32 9
-  %9 = load double* %arrayidx9, align 4
+  %9 = load double, double* %arrayidx9, align 4
   %arrayidx10 = getelementptr inbounds double, double* %d, i32 10
-  %10 = load double* %arrayidx10, align 4
+  %10 = load double, double* %arrayidx10, align 4
   %arrayidx11 = getelementptr inbounds double, double* %d, i32 11
-  %11 = load double* %arrayidx11, align 4
+  %11 = load double, double* %arrayidx11, align 4
   %arrayidx12 = getelementptr inbounds double, double* %d, i32 12
-  %12 = load double* %arrayidx12, align 4
+  %12 = load double, double* %arrayidx12, align 4
   %arrayidx13 = getelementptr inbounds double, double* %d, i32 13
-  %13 = load double* %arrayidx13, align 4
+  %13 = load double, double* %arrayidx13, align 4
   %arrayidx14 = getelementptr inbounds double, double* %d, i32 14
-  %14 = load double* %arrayidx14, align 4
+  %14 = load double, double* %arrayidx14, align 4
   %arrayidx15 = getelementptr inbounds double, double* %d, i32 15
-  %15 = load double* %arrayidx15, align 4
+  %15 = load double, double* %arrayidx15, align 4
   %arrayidx16 = getelementptr inbounds double, double* %d, i32 16
-  %16 = load double* %arrayidx16, align 4
+  %16 = load double, double* %arrayidx16, align 4
   %arrayidx17 = getelementptr inbounds double, double* %d, i32 17
-  %17 = load double* %arrayidx17, align 4
+  %17 = load double, double* %arrayidx17, align 4
   %arrayidx18 = getelementptr inbounds double, double* %d, i32 18
-  %18 = load double* %arrayidx18, align 4
+  %18 = load double, double* %arrayidx18, align 4
   %arrayidx19 = getelementptr inbounds double, double* %d, i32 19
-  %19 = load double* %arrayidx19, align 4
+  %19 = load double, double* %arrayidx19, align 4
   %arrayidx20 = getelementptr inbounds double, double* %d, i32 20
-  %20 = load double* %arrayidx20, align 4
+  %20 = load double, double* %arrayidx20, align 4
   %arrayidx21 = getelementptr inbounds double, double* %d, i32 21
-  %21 = load double* %arrayidx21, align 4
+  %21 = load double, double* %arrayidx21, align 4
   %arrayidx22 = getelementptr inbounds double, double* %d, i32 22
-  %22 = load double* %arrayidx22, align 4
+  %22 = load double, double* %arrayidx22, align 4
   %arrayidx23 = getelementptr inbounds double, double* %d, i32 23
-  %23 = load double* %arrayidx23, align 4
+  %23 = load double, double* %arrayidx23, align 4
   %arrayidx24 = getelementptr inbounds double, double* %d, i32 24
-  %24 = load double* %arrayidx24, align 4
+  %24 = load double, double* %arrayidx24, align 4
   %arrayidx25 = getelementptr inbounds double, double* %d, i32 25
-  %25 = load double* %arrayidx25, align 4
+  %25 = load double, double* %arrayidx25, align 4
   %arrayidx26 = getelementptr inbounds double, double* %d, i32 26
-  %26 = load double* %arrayidx26, align 4
+  %26 = load double, double* %arrayidx26, align 4
   %arrayidx27 = getelementptr inbounds double, double* %d, i32 27
-  %27 = load double* %arrayidx27, align 4
+  %27 = load double, double* %arrayidx27, align 4
   %arrayidx28 = getelementptr inbounds double, double* %d, i32 28
-  %28 = load double* %arrayidx28, align 4
+  %28 = load double, double* %arrayidx28, align 4
   %arrayidx29 = getelementptr inbounds double, double* %d, i32 29
-  %29 = load double* %arrayidx29, align 4
+  %29 = load double, double* %arrayidx29, align 4
   %div = fdiv double %29, %28
   %div30 = fdiv double %div, %27
   %div31 = fdiv double %div30, %26
diff --git a/llvm/test/CodeGen/ARM/str_post.ll b/llvm/test/CodeGen/ARM/str_post.ll
index a4f864065d5..0933e15dab4 100644
--- a/llvm/test/CodeGen/ARM/str_post.ll
+++ b/llvm/test/CodeGen/ARM/str_post.ll
@@ -3,7 +3,7 @@
 define i16 @test1(i32* %X, i16* %A) {
 ; CHECK-LABEL: test1:
 ; CHECK: strh {{.*}}[{{.*}}], #-4
-        %Y = load i32* %X               ; <i32> [#uses=1]
+        %Y = load i32, i32* %X               ; <i32> [#uses=1]
         %tmp1 = trunc i32 %Y to i16             ; <i16> [#uses=1]
         store i16 %tmp1, i16* %A
         %tmp2 = ptrtoint i16* %A to i16         ; <i16> [#uses=1]
@@ -14,7 +14,7 @@ define i16 @test1(i32* %X, i16* %A) {
 define i32 @test2(i32* %X, i32* %A) {
 ; CHECK-LABEL: test2:
 ; CHECK: str {{.*}}[{{.*}}],
-        %Y = load i32* %X               ; <i32> [#uses=1]
+        %Y = load i32, i32* %X               ; <i32> [#uses=1]
         store i32 %Y, i32* %A
         %tmp1 = ptrtoint i32* %A to i32         ; <i32> [#uses=1]
         %tmp2 = sub i32 %tmp1, 4                ; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/ARM/str_pre-2.ll b/llvm/test/CodeGen/ARM/str_pre-2.ll
index 5ce2bcecb47..4b8b4c6bca7 100644
--- a/llvm/test/CodeGen/ARM/str_pre-2.ll
+++ b/llvm/test/CodeGen/ARM/str_pre-2.ll
@@ -7,8 +7,8 @@ entry:
 ; CHECK: push {r4, r5, lr}
 ; CHECK: pop {r4, r5, pc}
         call void asm sideeffect "", "~{r4},~{r5}"() nounwind
-	%0 = load i64** @b, align 4
-	%1 = load i64* %0, align 4
+	%0 = load i64*, i64** @b, align 4
+	%1 = load i64, i64* %0, align 4
 	%2 = mul i64 %1, %a
 	ret i64 %2
 }
diff --git a/llvm/test/CodeGen/ARM/str_pre.ll b/llvm/test/CodeGen/ARM/str_pre.ll
index dc8cee91312..848261f83e3 100644
--- a/llvm/test/CodeGen/ARM/str_pre.ll
+++ b/llvm/test/CodeGen/ARM/str_pre.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -mtriple=arm-eabi %s -o -  | FileCheck %s
 
 define void @test1(i32* %X, i32* %A, i32** %dest) {
-        %B = load i32* %A               ; <i32> [#uses=1]
+        %B = load i32, i32* %A               ; <i32> [#uses=1]
         %Y = getelementptr i32, i32* %X, i32 4               ; <i32*> [#uses=2]
         store i32 %B, i32* %Y
         store i32* %Y, i32** %dest
@@ -9,7 +9,7 @@ define void @test1(i32* %X, i32* %A, i32** %dest) {
 }
 
 define i16* @test2(i16* %X, i32* %A) {
-        %B = load i32* %A               ; <i32> [#uses=1]
+        %B = load i32, i32* %A               ; <i32> [#uses=1]
         %Y = getelementptr i16, i16* %X, i32 4               ; <i16*> [#uses=2]
         %tmp = trunc i32 %B to i16              ; <i16> [#uses=1]
         store i16 %tmp, i16* %Y
diff --git a/llvm/test/CodeGen/ARM/struct-byval-frame-index.ll b/llvm/test/CodeGen/ARM/struct-byval-frame-index.ll
index 80b157ee692..bca797d6dce 100644
--- a/llvm/test/CodeGen/ARM/struct-byval-frame-index.ll
+++ b/llvm/test/CodeGen/ARM/struct-byval-frame-index.ll
@@ -72,10 +72,10 @@ declare void @SetMotionVectorsMB(%structK* nocapture, i32) #1
 ; Function Attrs: nounwind
 define void @set_stored_macroblock_parameters() #1 {
 entry:
-  %0 = load %structB** @img, align 4
-  %1 = load i32* undef, align 4
+  %0 = load %structB*, %structB** @img, align 4
+  %1 = load i32, i32* undef, align 4
   %mb_data = getelementptr inbounds %structB, %structB* %0, i32 0, i32 61
-  %2 = load %structK** %mb_data, align 4
+  %2 = load %structK*, %structK** %mb_data, align 4
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
@@ -134,7 +134,7 @@ if.then233:                                       ; preds = %if.end230
 
 if.end236:                                        ; preds = %if.end230
   %cmp242 = icmp ne i16 undef, 8
-  %4 = load i32* @luma_transform_size_8x8_flag, align 4
+  %4 = load i32, i32* @luma_transform_size_8x8_flag, align 4
   %tobool245 = icmp ne i32 %4, 0
   %or.cond812 = or i1 %cmp242, %tobool245
   br i1 %or.cond812, label %if.end249, label %land.lhs.true246
@@ -150,11 +150,11 @@ if.then248:                                       ; preds = %land.lhs.true246
   br label %if.end249
 
 if.end249:                                        ; preds = %if.then248, %land.lhs.true246, %if.end236
-  %5 = load i32* @luma_transform_size_8x8_flag, align 4
-  %6 = load %structA** @rdopt, align 4
+  %5 = load i32, i32* @luma_transform_size_8x8_flag, align 4
+  %6 = load %structA*, %structA** @rdopt, align 4
   %luma_transform_size_8x8_flag264 = getelementptr inbounds %structA, %structA* %6, i32 0, i32 21
   store i32 %5, i32* %luma_transform_size_8x8_flag264, align 4
-  %7 = load i32* undef, align 4
+  %7 = load i32, i32* undef, align 4
   %add281 = add nsw i32 %7, 0
   br label %for.body285
 
@@ -162,36 +162,36 @@ for.body285:                                      ; preds = %for.inc503, %if.end
   %8 = phi %structB* [ undef, %if.end249 ], [ %.pre1155, %for.inc503 ]
   %i.21103 = phi i32 [ 0, %if.end249 ], [ %inc504, %for.inc503 ]
   %block_x286 = getelementptr inbounds %structB, %structB* %8, i32 0, i32 37
-  %9 = load i32* %block_x286, align 4
+  %9 = load i32, i32* %block_x286, align 4
   %add287 = add nsw i32 %9, %i.21103
   %shr289 = ashr i32 %i.21103, 1
   %add290 = add nsw i32 %shr289, 0
   %arrayidx292 = getelementptr inbounds %structK, %structK* %2, i32 %1, i32 15, i32 %add290
-  %10 = load %structM** @enc_picture, align 4
+  %10 = load %structM*, %structM** @enc_picture, align 4
   %ref_idx = getelementptr inbounds %structM, %structM* %10, i32 0, i32 35
-  %11 = load i8**** %ref_idx, align 4
-  %12 = load i8*** %11, align 4
+  %11 = load i8***, i8**** %ref_idx, align 4
+  %12 = load i8**, i8*** %11, align 4
   %arrayidx313 = getelementptr inbounds i8*, i8** %12, i32 %add281
-  %13 = load i8** %arrayidx313, align 4
+  %13 = load i8*, i8** %arrayidx313, align 4
   %arrayidx314 = getelementptr inbounds i8, i8* %13, i32 %add287
   store i8 -1, i8* %arrayidx314, align 1
-  %14 = load %structB** @img, align 4
+  %14 = load %structB*, %structB** @img, align 4
   %MbaffFrameFlag327 = getelementptr inbounds %structB, %structB* %14, i32 0, i32 100
-  %15 = load i32* %MbaffFrameFlag327, align 4
+  %15 = load i32, i32* %MbaffFrameFlag327, align 4
   %tobool328 = icmp eq i32 %15, 0
   br i1 %tobool328, label %if.end454, label %if.then329
 
 if.then329:                                       ; preds = %for.body285
-  %16 = load %structA** @rdopt, align 4
+  %16 = load %structA*, %structA** @rdopt, align 4
   br label %if.end454
 
 if.end454:                                        ; preds = %if.then329, %for.body285
-  %17 = load i32* %arrayidx292, align 4
+  %17 = load i32, i32* %arrayidx292, align 4
   %cmp457 = icmp eq i32 %17, 0
   br i1 %cmp457, label %if.then475, label %lor.lhs.false459
 
 lor.lhs.false459:                                 ; preds = %if.end454
-  %18 = load i32* %mb_type, align 4
+  %18 = load i32, i32* %mb_type, align 4
   switch i32 %18, label %for.inc503 [
     i32 9, label %if.then475
     i32 10, label %if.then475
@@ -205,7 +205,7 @@ if.then475:                                       ; preds = %lor.lhs.false459, %
 
 for.inc503:                                       ; preds = %if.then475, %lor.lhs.false459
   %inc504 = add nsw i32 %i.21103, 1
-  %.pre1155 = load %structB** @img, align 4
+  %.pre1155 = load %structB*, %structB** @img, align 4
   br label %for.body285
 }
 
diff --git a/llvm/test/CodeGen/ARM/sub-cmp-peephole.ll b/llvm/test/CodeGen/ARM/sub-cmp-peephole.ll
index f7328dc580e..32038883469 100644
--- a/llvm/test/CodeGen/ARM/sub-cmp-peephole.ll
+++ b/llvm/test/CodeGen/ARM/sub-cmp-peephole.ll
@@ -169,7 +169,7 @@ entry:
 ; CHECK: sub
 ; CHECK: cmp
 ; CHECK: bge
-  %load = load i32* @t, align 4
+  %load = load i32, i32* @t, align 4
   %sub = sub i32 %load, 17
   %cmp = icmp slt i32 %sub, 0
   br i1 %cmp, label %if.then, label %if.else
@@ -191,7 +191,7 @@ entry:
 ; CHECK: sub
 ; CHECK: cmp
 ; CHECK: bhs
-  %load = load i32* @t, align 4
+  %load = load i32, i32* @t, align 4
   %sub = sub i32 %load, 17
   %cmp = icmp ult i32 %sub, 0
   br i1 %cmp, label %if.then, label %if.else
diff --git a/llvm/test/CodeGen/ARM/swift-atomics.ll b/llvm/test/CodeGen/ARM/swift-atomics.ll
index 8b100f1f41f..ca7e7fb299b 100644
--- a/llvm/test/CodeGen/ARM/swift-atomics.ll
+++ b/llvm/test/CodeGen/ARM/swift-atomics.ll
@@ -33,7 +33,7 @@ define i32 @test_seq_cst(i32* %p, i32 %v) {
 ; CHECK-STRICT-ATOMIC: dmb {{ish$}}
 
   store atomic i32 %v, i32* %p seq_cst, align 4
-  %val = load atomic i32* %p seq_cst, align 4
+  %val = load atomic i32, i32* %p seq_cst, align 4
   ret i32 %val
 }
 
@@ -46,6 +46,6 @@ define i32 @test_acq(i32* %addr) {
 
 ; CHECK-STRICT-ATOMIC-LABEL: test_acq:
 ; CHECK-STRICT-ATOMIC: dmb {{ish$}}
-  %val = load atomic i32* %addr acquire, align 4
+  %val = load atomic i32, i32* %addr acquire, align 4
   ret i32 %val
 }
diff --git a/llvm/test/CodeGen/ARM/swift-vldm.ll b/llvm/test/CodeGen/ARM/swift-vldm.ll
index ca237927ff1..9e507279fa0 100644
--- a/llvm/test/CodeGen/ARM/swift-vldm.ll
+++ b/llvm/test/CodeGen/ARM/swift-vldm.ll
@@ -15,11 +15,11 @@ entry:
   %addr1 = getelementptr double, double * %x, i32 1
   %addr2 = getelementptr double, double * %x, i32 2
   %addr3 = getelementptr double, double * %x, i32 3
-  %d0 = load double * %y
-  %d1 = load double * %x
-  %d2 = load double * %addr1
-  %d3 = load double * %addr2
-  %d4 = load double * %addr3
+  %d0 = load double , double * %y
+  %d1 = load double , double * %x
+  %d2 = load double , double * %addr1
+  %d3 = load double , double * %addr2
+  %d4 = load double , double * %addr3
   ; We are trying to force x[0-3] in registers d1 to d4 so that we can test we
   ; don't form a "vldmia rX, {d1, d2, d3, d4}".
   ; We are relying on the calling convention and that register allocation
diff --git a/llvm/test/CodeGen/ARM/tail-dup.ll b/llvm/test/CodeGen/ARM/tail-dup.ll
index 49ab114443a..407bdf7524b 100644
--- a/llvm/test/CodeGen/ARM/tail-dup.ll
+++ b/llvm/test/CodeGen/ARM/tail-dup.ll
@@ -11,19 +11,19 @@
 
 define i32 @fn(i32* nocapture %opcodes) nounwind readonly ssp {
 entry:
-  %0 = load i32* %opcodes, align 4
+  %0 = load i32, i32* %opcodes, align 4
   %arrayidx = getelementptr inbounds [3 x i8*], [3 x i8*]* @fn.codetable, i32 0, i32 %0
   br label %indirectgoto
 
 INCREMENT:                                        ; preds = %indirectgoto
   %inc = add nsw i32 %result.0, 1
-  %1 = load i32* %opcodes.addr.0, align 4
+  %1 = load i32, i32* %opcodes.addr.0, align 4
   %arrayidx2 = getelementptr inbounds [3 x i8*], [3 x i8*]* @fn.codetable, i32 0, i32 %1
   br label %indirectgoto
 
 DECREMENT:                                        ; preds = %indirectgoto
   %dec = add nsw i32 %result.0, -1
-  %2 = load i32* %opcodes.addr.0, align 4
+  %2 = load i32, i32* %opcodes.addr.0, align 4
   %arrayidx4 = getelementptr inbounds [3 x i8*], [3 x i8*]* @fn.codetable, i32 0, i32 %2
   br label %indirectgoto
 
@@ -32,7 +32,7 @@ indirectgoto:                                     ; preds = %DECREMENT, %INCREME
   %opcodes.pn = phi i32* [ %opcodes, %entry ], [ %opcodes.addr.0, %DECREMENT ], [ %opcodes.addr.0, %INCREMENT ]
   %indirect.goto.dest.in = phi i8** [ %arrayidx, %entry ], [ %arrayidx4, %DECREMENT ], [ %arrayidx2, %INCREMENT ]
   %opcodes.addr.0 = getelementptr inbounds i32, i32* %opcodes.pn, i32 1
-  %indirect.goto.dest = load i8** %indirect.goto.dest.in, align 4
+  %indirect.goto.dest = load i8*, i8** %indirect.goto.dest.in, align 4
   indirectbr i8* %indirect.goto.dest, [label %RETURN, label %INCREMENT, label %DECREMENT]
 
 RETURN:                                           ; preds = %indirectgoto
diff --git a/llvm/test/CodeGen/ARM/test-sharedidx.ll b/llvm/test/CodeGen/ARM/test-sharedidx.ll
index 4bdf3b0bcdb..377996c4c3c 100644
--- a/llvm/test/CodeGen/ARM/test-sharedidx.ll
+++ b/llvm/test/CodeGen/ARM/test-sharedidx.ll
@@ -24,10 +24,10 @@ for.body:                                         ; preds = %entry, %for.body.3
 ; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
   %i.09 = phi i32 [ %add5.3, %for.body.3 ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds i8, i8* %a, i32 %i.09
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv6 = zext i8 %0 to i32
   %arrayidx1 = getelementptr inbounds i8, i8* %b, i32 %i.09
-  %1 = load i8* %arrayidx1, align 1
+  %1 = load i8, i8* %arrayidx1, align 1
   %conv27 = zext i8 %1 to i32
   %add = add nsw i32 %conv27, %conv6
   %conv3 = trunc i32 %add to i8
@@ -45,10 +45,10 @@ for.body.1:                                       ; preds = %for.body
 ; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
 ; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
   %arrayidx.1 = getelementptr inbounds i8, i8* %a, i32 %add5
-  %2 = load i8* %arrayidx.1, align 1
+  %2 = load i8, i8* %arrayidx.1, align 1
   %conv6.1 = zext i8 %2 to i32
   %arrayidx1.1 = getelementptr inbounds i8, i8* %b, i32 %add5
-  %3 = load i8* %arrayidx1.1, align 1
+  %3 = load i8, i8* %arrayidx1.1, align 1
   %conv27.1 = zext i8 %3 to i32
   %add.1 = add nsw i32 %conv27.1, %conv6.1
   %conv3.1 = trunc i32 %add.1 to i8
@@ -63,10 +63,10 @@ for.body.2:                                       ; preds = %for.body.1
 ; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
 ; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
   %arrayidx.2 = getelementptr inbounds i8, i8* %a, i32 %add5.1
-  %4 = load i8* %arrayidx.2, align 1
+  %4 = load i8, i8* %arrayidx.2, align 1
   %conv6.2 = zext i8 %4 to i32
   %arrayidx1.2 = getelementptr inbounds i8, i8* %b, i32 %add5.1
-  %5 = load i8* %arrayidx1.2, align 1
+  %5 = load i8, i8* %arrayidx1.2, align 1
   %conv27.2 = zext i8 %5 to i32
   %add.2 = add nsw i32 %conv27.2, %conv6.2
   %conv3.2 = trunc i32 %add.2 to i8
@@ -81,10 +81,10 @@ for.body.3:                                       ; preds = %for.body.2
 ; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
 ; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
   %arrayidx.3 = getelementptr inbounds i8, i8* %a, i32 %add5.2
-  %6 = load i8* %arrayidx.3, align 1
+  %6 = load i8, i8* %arrayidx.3, align 1
   %conv6.3 = zext i8 %6 to i32
   %arrayidx1.3 = getelementptr inbounds i8, i8* %b, i32 %add5.2
-  %7 = load i8* %arrayidx1.3, align 1
+  %7 = load i8, i8* %arrayidx1.3, align 1
   %conv27.3 = zext i8 %7 to i32
   %add.3 = add nsw i32 %conv27.3, %conv6.3
   %conv3.3 = trunc i32 %add.3 to i8
diff --git a/llvm/test/CodeGen/ARM/thumb1-varalloc.ll b/llvm/test/CodeGen/ARM/thumb1-varalloc.ll
index 892fe1a0271..0637be03d56 100644
--- a/llvm/test/CodeGen/ARM/thumb1-varalloc.ll
+++ b/llvm/test/CodeGen/ARM/thumb1-varalloc.ll
@@ -12,7 +12,7 @@ entry:
 ; CHECK-LABEL: foo:
 
 	%size = alloca i32, align 4
-	%0 = load i8** @__bar, align 4
+	%0 = load i8*, i8** @__bar, align 4
 	%1 = icmp eq i8* %0, null
 	br i1 %1, label %bb1, label %bb3
 ; CHECK: bne
diff --git a/llvm/test/CodeGen/ARM/thumb1_return_sequence.ll b/llvm/test/CodeGen/ARM/thumb1_return_sequence.ll
index c83126098f7..9c62faeaa68 100644
--- a/llvm/test/CodeGen/ARM/thumb1_return_sequence.ll
+++ b/llvm/test/CodeGen/ARM/thumb1_return_sequence.ll
@@ -13,10 +13,10 @@ entry:
 
   %b = alloca <6 x i32>, align 16
   %a = alloca <4 x i32>, align 16
-  %stuff = load <6 x i32>* %p, align 16
+  %stuff = load <6 x i32>, <6 x i32>* %p, align 16
   store <6 x i32> %stuff, <6 x i32>* %b, align 16
   store <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32>* %a, align 16
-  %0 = load <4 x i32>* %a, align 16
+  %0 = load <4 x i32>, <4 x i32>* %a, align 16
   ret <4 x i32> %0
 
 ; Epilogue
@@ -46,7 +46,7 @@ entry:
   %a = alloca <4 x i32>, align 16
   store <4 x i32> <i32 42, i32 42, i32 42, i32 42>, <4 x i32>* %b, align 16
   store <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32>* %a, align 16
-  %0 = load <4 x i32>* %a, align 16
+  %0 = load <4 x i32>, <4 x i32>* %a, align 16
   call void @llvm.va_start(i8* null)
   ret <4 x i32> %0
 
@@ -78,7 +78,7 @@ entry:
 ; CHECK-V4T:    push    {[[SAVED:(r[4567](, )?)+]], lr}
 ; CHECK-V5T:    push    {[[SAVED:(r[4567](, )?)+]], lr}
 
-  %0 = load <6 x i32>* %p, align 16
+  %0 = load <6 x i32>, <6 x i32>* %p, align 16
   %1 = extractelement <6 x i32> %0, i32 0
   %2 = extractelement <6 x i32> %0, i32 1
   %3 = extractelement <6 x i32> %0, i32 2
@@ -121,24 +121,24 @@ entry:
   store i32 2, i32* %b, align 4
   store i32 3, i32* %c, align 4
   store i32 4, i32* %d, align 4
-  %0 = load i32* %a, align 4
+  %0 = load i32, i32* %a, align 4
   %inc = add nsw i32 %0, 1
   store i32 %inc, i32* %a, align 4
-  %1 = load i32* %b, align 4
+  %1 = load i32, i32* %b, align 4
   %inc1 = add nsw i32 %1, 1
   store i32 %inc1, i32* %b, align 4
-  %2 = load i32* %c, align 4
+  %2 = load i32, i32* %c, align 4
   %inc2 = add nsw i32 %2, 1
   store i32 %inc2, i32* %c, align 4
-  %3 = load i32* %d, align 4
+  %3 = load i32, i32* %d, align 4
   %inc3 = add nsw i32 %3, 1
   store i32 %inc3, i32* %d, align 4
-  %4 = load i32* %a, align 4
-  %5 = load i32* %b, align 4
+  %4 = load i32, i32* %a, align 4
+  %5 = load i32, i32* %b, align 4
   %add = add nsw i32 %4, %5
-  %6 = load i32* %c, align 4
+  %6 = load i32, i32* %c, align 4
   %add4 = add nsw i32 %add, %6
-  %7 = load i32* %d, align 4
+  %7 = load i32, i32* %d, align 4
   %add5 = add nsw i32 %add4, %7
   %add6 = add nsw i32 %add5, %i
   call void @llvm.va_start(i8* null)
diff --git a/llvm/test/CodeGen/ARM/thumb_indirect_calls.ll b/llvm/test/CodeGen/ARM/thumb_indirect_calls.ll
index 16a55a882d9..9f1950c743c 100644
--- a/llvm/test/CodeGen/ARM/thumb_indirect_calls.ll
+++ b/llvm/test/CodeGen/ARM/thumb_indirect_calls.ll
@@ -6,7 +6,7 @@
 ; CHECK-LABEL foo:
 define void @foo(i32 %x) {
 entry:
-  %0 = load void (i32)** @f, align 4
+  %0 = load void (i32)*, void (i32)** @f, align 4
   tail call void %0(i32 %x)
   ret void
 
diff --git a/llvm/test/CodeGen/ARM/tls1.ll b/llvm/test/CodeGen/ARM/tls1.ll
index b03f76b6ef0..d492522955e 100644
--- a/llvm/test/CodeGen/ARM/tls1.ll
+++ b/llvm/test/CodeGen/ARM/tls1.ll
@@ -12,7 +12,7 @@
 
 define i32 @f() {
 entry:
-	%tmp1 = load i32* @i		; <i32> [#uses=1]
+	%tmp1 = load i32, i32* @i		; <i32> [#uses=1]
 	ret i32 %tmp1
 }
 
diff --git a/llvm/test/CodeGen/ARM/tls2.ll b/llvm/test/CodeGen/ARM/tls2.ll
index 24b4794b061..d522da8f571 100644
--- a/llvm/test/CodeGen/ARM/tls2.ll
+++ b/llvm/test/CodeGen/ARM/tls2.ll
@@ -12,7 +12,7 @@ define i32 @f() {
 ; CHECK-PIC-LABEL: f:
 ; CHECK-PIC: __tls_get_addr
 entry:
-	%tmp1 = load i32* @i		; <i32> [#uses=1]
+	%tmp1 = load i32, i32* @i		; <i32> [#uses=1]
 	ret i32 %tmp1
 }
 
diff --git a/llvm/test/CodeGen/ARM/tls3.ll b/llvm/test/CodeGen/ARM/tls3.ll
index e0e944f70c5..1617a520150 100644
--- a/llvm/test/CodeGen/ARM/tls3.ll
+++ b/llvm/test/CodeGen/ARM/tls3.ll
@@ -6,6 +6,6 @@
 
 define i32 @main() {
 entry:
-	%tmp2 = load i32* getelementptr (%struct.anon* @teste, i32 0, i32 0), align 8		; <i32> [#uses=1]
+	%tmp2 = load i32, i32* getelementptr (%struct.anon* @teste, i32 0, i32 0), align 8		; <i32> [#uses=1]
 	ret i32 %tmp2
 }
diff --git a/llvm/test/CodeGen/ARM/trunc_ldr.ll b/llvm/test/CodeGen/ARM/trunc_ldr.ll
index 94cef8b348f..ca7ad9a2fc6 100644
--- a/llvm/test/CodeGen/ARM/trunc_ldr.ll
+++ b/llvm/test/CodeGen/ARM/trunc_ldr.ll
@@ -6,7 +6,7 @@
 define i8 @f1(%struct.A* %d) {
 	%tmp2 = getelementptr %struct.A, %struct.A* %d, i32 0, i32 4
 	%tmp23 = bitcast i16* %tmp2 to i32*
-	%tmp4 = load i32* %tmp23
+	%tmp4 = load i32, i32* %tmp23
 	%tmp512 = lshr i32 %tmp4, 24
 	%tmp56 = trunc i32 %tmp512 to i8
 	ret i8 %tmp56
@@ -15,7 +15,7 @@ define i8 @f1(%struct.A* %d) {
 define i32 @f2(%struct.A* %d) {
 	%tmp2 = getelementptr %struct.A, %struct.A* %d, i32 0, i32 4
 	%tmp23 = bitcast i16* %tmp2 to i32*
-	%tmp4 = load i32* %tmp23
+	%tmp4 = load i32, i32* %tmp23
 	%tmp512 = lshr i32 %tmp4, 24
 	%tmp56 = trunc i32 %tmp512 to i8
         %tmp57 = sext i8 %tmp56 to i32
diff --git a/llvm/test/CodeGen/ARM/truncstore-dag-combine.ll b/llvm/test/CodeGen/ARM/truncstore-dag-combine.ll
index 360e3e13f59..11fa022451f 100644
--- a/llvm/test/CodeGen/ARM/truncstore-dag-combine.ll
+++ b/llvm/test/CodeGen/ARM/truncstore-dag-combine.ll
@@ -3,7 +3,7 @@
 define void @bar(i8* %P, i16* %Q) {
 entry:
 	%P1 = bitcast i8* %P to i16*		; <i16*> [#uses=1]
-	%tmp = load i16* %Q, align 1		; <i16> [#uses=1]
+	%tmp = load i16, i16* %Q, align 1		; <i16> [#uses=1]
 	store i16 %tmp, i16* %P1, align 1
 	ret void
 }
@@ -11,7 +11,7 @@ entry:
 define void @foo(i8* %P, i32* %Q) {
 entry:
 	%P1 = bitcast i8* %P to i32*		; <i32*> [#uses=1]
-	%tmp = load i32* %Q, align 1		; <i32> [#uses=1]
+	%tmp = load i32, i32* %Q, align 1		; <i32> [#uses=1]
 	store i32 %tmp, i32* %P1, align 1
 	ret void
 }
diff --git a/llvm/test/CodeGen/ARM/twoaddrinstr.ll b/llvm/test/CodeGen/ARM/twoaddrinstr.ll
index 01df3b42d10..97a49334b74 100644
--- a/llvm/test/CodeGen/ARM/twoaddrinstr.ll
+++ b/llvm/test/CodeGen/ARM/twoaddrinstr.ll
@@ -12,7 +12,7 @@ define void @PR13378() nounwind {
 ; CHECK-NEXT:   vst1.32
 
 entry:
-  %0 = load <4 x float>* undef, align 4
+  %0 = load <4 x float>, <4 x float>* undef, align 4
   store <4 x float> zeroinitializer, <4 x float>* undef, align 4
   store <4 x float> %0, <4 x float>* undef, align 4
   %1 = insertelement <4 x float> %0, float 1.000000e+00, i32 3
diff --git a/llvm/test/CodeGen/ARM/uint64tof64.ll b/llvm/test/CodeGen/ARM/uint64tof64.ll
index 32eb225a2ad..d7799551a8d 100644
--- a/llvm/test/CodeGen/ARM/uint64tof64.ll
+++ b/llvm/test/CodeGen/ARM/uint64tof64.ll
@@ -7,7 +7,7 @@
 
 define fastcc void @t() {
 entry:
-	%0 = load i64* null, align 4		; <i64> [#uses=1]
+	%0 = load i64, i64* null, align 4		; <i64> [#uses=1]
 	%1 = uitofp i64 %0 to double		; <double> [#uses=1]
 	%2 = fdiv double 0.000000e+00, %1		; <double> [#uses=1]
 	%3 = call i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* null, i8* getelementptr ([54 x i8]* @"\01LC10", i32 0, i32 0), i64 0, double %2)		; <i32> [#uses=0]
diff --git a/llvm/test/CodeGen/ARM/umulo-32.ll b/llvm/test/CodeGen/ARM/umulo-32.ll
index 19875ce9407..1c8357314c2 100644
--- a/llvm/test/CodeGen/ARM/umulo-32.ll
+++ b/llvm/test/CodeGen/ARM/umulo-32.ll
@@ -28,7 +28,7 @@ store i32 0, i32* %1
 store i32 %argc, i32* %2, align 4
 store i8** %argv, i8*** %3, align 4
 store i32 10, i32* %m_degree, align 4
-%4 = load i32* %m_degree, align 4
+%4 = load i32, i32* %m_degree, align 4
 %5 = call %umul.ty @llvm.umul.with.overflow.i32(i32 %4, i32 8)
 %6 = extractvalue %umul.ty %5, 1
 %7 = extractvalue %umul.ty %5, 0
diff --git a/llvm/test/CodeGen/ARM/unaligned_load_store.ll b/llvm/test/CodeGen/ARM/unaligned_load_store.ll
index 72163ae30c3..0be3917ffa2 100644
--- a/llvm/test/CodeGen/ARM/unaligned_load_store.ll
+++ b/llvm/test/CodeGen/ARM/unaligned_load_store.ll
@@ -28,7 +28,7 @@ entry:
 
   %__src1.i = bitcast i8* %b to i32*              ; <i32*> [#uses=1]
   %__dest2.i = bitcast i8* %a to i32*             ; <i32*> [#uses=1]
-  %tmp.i = load i32* %__src1.i, align 1           ; <i32> [#uses=1]
+  %tmp.i = load i32, i32* %__src1.i, align 1           ; <i32> [#uses=1]
   store i32 %tmp.i, i32* %__dest2.i, align 1
   ret void
 }
@@ -44,7 +44,7 @@ entry:
 ; UNALIGNED-LABEL: hword:
 ; UNALIGNED: vld1.16
 ; UNALIGNED: vst1.16
-  %tmp = load double* %a, align 2
+  %tmp = load double, double* %a, align 2
   store double %tmp, double* %b, align 2
   ret void
 }
@@ -60,7 +60,7 @@ entry:
 ; UNALIGNED-LABEL: byte:
 ; UNALIGNED: vld1.8
 ; UNALIGNED: vst1.8
-  %tmp = load double* %a, align 1
+  %tmp = load double, double* %a, align 1
   store double %tmp, double* %b, align 1
   ret void
 }
@@ -76,7 +76,7 @@ entry:
 ; UNALIGNED: ldr
 ; UNALIGNED-NOT: strb
 ; UNALIGNED: str
-  %tmp = load i32* %a, align 1
+  %tmp = load i32, i32* %a, align 1
   store i32 %tmp, i32* %b, align 1
   ret void
 }
diff --git a/llvm/test/CodeGen/ARM/unaligned_load_store_vector.ll b/llvm/test/CodeGen/ARM/unaligned_load_store_vector.ll
index 6f98945dcf4..abb523c2275 100644
--- a/llvm/test/CodeGen/ARM/unaligned_load_store_vector.ll
+++ b/llvm/test/CodeGen/ARM/unaligned_load_store_vector.ll
@@ -11,7 +11,7 @@ entry:
   %vi  = bitcast i8* %pi to <8 x i8>*
   %vo  = bitcast i8* %po to <8 x i8>*
 ;CHECK: vld1.8
-  %v1 = load  <8 x i8>* %vi, align 1
+  %v1 = load  <8 x i8>,  <8 x i8>* %vi, align 1
 ;CHECK: vst1.8
   store <8 x i8> %v1, <8 x i8>* %vo, align 1
   ret void
@@ -29,7 +29,7 @@ entry:
   %vi  = bitcast i8* %pi to <4 x i16>*
   %vo  = bitcast i8* %po to <4 x i16>*
 ;CHECK: vld1.8
-  %v1 = load  <4 x i16>* %vi, align 1
+  %v1 = load  <4 x i16>,  <4 x i16>* %vi, align 1
 ;CHECK: vst1.8
   store <4 x i16> %v1, <4 x i16>* %vo, align 1
   ret void
@@ -47,7 +47,7 @@ entry:
   %vi  = bitcast i8* %pi to <2 x i32>*
   %vo  = bitcast i8* %po to <2 x i32>*
 ;CHECK: vld1.8
-  %v1 = load  <2 x i32>* %vi, align 1
+  %v1 = load  <2 x i32>,  <2 x i32>* %vi, align 1
 ;CHECK: vst1.8
   store <2 x i32> %v1, <2 x i32>* %vo, align 1
   ret void
@@ -65,7 +65,7 @@ entry:
   %vi  = bitcast i8* %pi to <2 x float>*
   %vo  = bitcast i8* %po to <2 x float>*
 ;CHECK: vld1.8
-  %v1 = load  <2 x float>* %vi, align 1
+  %v1 = load  <2 x float>,  <2 x float>* %vi, align 1
 ;CHECK: vst1.8
   store <2 x float> %v1, <2 x float>* %vo, align 1
   ret void
@@ -83,7 +83,7 @@ entry:
   %vi  = bitcast i8* %pi to <16 x i8>*
   %vo  = bitcast i8* %po to <16 x i8>*
 ;CHECK: vld1.8
-  %v1 = load  <16 x i8>* %vi, align 1
+  %v1 = load  <16 x i8>,  <16 x i8>* %vi, align 1
 ;CHECK: vst1.8
   store <16 x i8> %v1, <16 x i8>* %vo, align 1
   ret void
@@ -101,7 +101,7 @@ entry:
   %vi  = bitcast i8* %pi to <8 x i16>*
   %vo  = bitcast i8* %po to <8 x i16>*
 ;CHECK: vld1.8
-  %v1 = load  <8 x i16>* %vi, align 1
+  %v1 = load  <8 x i16>,  <8 x i16>* %vi, align 1
 ;CHECK: vst1.8
   store <8 x i16> %v1, <8 x i16>* %vo, align 1
   ret void
@@ -119,7 +119,7 @@ entry:
   %vi  = bitcast i8* %pi to <4 x i32>*
   %vo  = bitcast i8* %po to <4 x i32>*
 ;CHECK: vld1.8
-  %v1 = load  <4 x i32>* %vi, align 1
+  %v1 = load  <4 x i32>,  <4 x i32>* %vi, align 1
 ;CHECK: vst1.8
   store <4 x i32> %v1, <4 x i32>* %vo, align 1
   ret void
@@ -137,7 +137,7 @@ entry:
   %vi  = bitcast i8* %pi to <2 x i64>*
   %vo  = bitcast i8* %po to <2 x i64>*
 ;CHECK: vld1.8
-  %v1 = load  <2 x i64>* %vi, align 1
+  %v1 = load  <2 x i64>,  <2 x i64>* %vi, align 1
 ;CHECK: vst1.8
   store <2 x i64> %v1, <2 x i64>* %vo, align 1
   ret void
@@ -155,7 +155,7 @@ entry:
   %vi  = bitcast i8* %pi to <4 x float>*
   %vo  = bitcast i8* %po to <4 x float>*
 ;CHECK: vld1.8
-  %v1 = load  <4 x float>* %vi, align 1
+  %v1 = load  <4 x float>,  <4 x float>* %vi, align 1
 ;CHECK: vst1.8
   store <4 x float> %v1, <4 x float>* %vo, align 1
   ret void
@@ -173,7 +173,7 @@ entry:
   %vi  = bitcast i8* %pi to <8 x i8>*
   %vo  = bitcast i8* %po to <8 x i8>*
 ;CHECK: vld1.16
-  %v1 = load  <8 x i8>* %vi, align 2
+  %v1 = load  <8 x i8>,  <8 x i8>* %vi, align 2
 ;CHECK: vst1.16
   store <8 x i8> %v1, <8 x i8>* %vo, align 2
   ret void
@@ -191,7 +191,7 @@ entry:
   %vi  = bitcast i8* %pi to <4 x i16>*
   %vo  = bitcast i8* %po to <4 x i16>*
 ;CHECK: vld1.16
-  %v1 = load  <4 x i16>* %vi, align 2
+  %v1 = load  <4 x i16>,  <4 x i16>* %vi, align 2
 ;CHECK: vst1.16
   store <4 x i16> %v1, <4 x i16>* %vo, align 2
   ret void
@@ -209,7 +209,7 @@ entry:
   %vi  = bitcast i8* %pi to <2 x i32>*
   %vo  = bitcast i8* %po to <2 x i32>*
 ;CHECK: vld1.16
-  %v1 = load  <2 x i32>* %vi, align 2
+  %v1 = load  <2 x i32>,  <2 x i32>* %vi, align 2
 ;CHECK: vst1.16
   store <2 x i32> %v1, <2 x i32>* %vo, align 2
   ret void
@@ -227,7 +227,7 @@ entry:
   %vi  = bitcast i8* %pi to <2 x float>*
   %vo  = bitcast i8* %po to <2 x float>*
 ;CHECK: vld1.16
-  %v1 = load  <2 x float>* %vi, align 2
+  %v1 = load  <2 x float>,  <2 x float>* %vi, align 2
 ;CHECK: vst1.16
   store <2 x float> %v1, <2 x float>* %vo, align 2
   ret void
@@ -245,7 +245,7 @@ entry:
   %vi  = bitcast i8* %pi to <16 x i8>*
   %vo  = bitcast i8* %po to <16 x i8>*
 ;CHECK: vld1.16
-  %v1 = load  <16 x i8>* %vi, align 2
+  %v1 = load  <16 x i8>,  <16 x i8>* %vi, align 2
 ;CHECK: vst1.16
   store <16 x i8> %v1, <16 x i8>* %vo, align 2
   ret void
@@ -263,7 +263,7 @@ entry:
   %vi  = bitcast i8* %pi to <8 x i16>*
   %vo  = bitcast i8* %po to <8 x i16>*
 ;CHECK: vld1.16
-  %v1 = load  <8 x i16>* %vi, align 2
+  %v1 = load  <8 x i16>,  <8 x i16>* %vi, align 2
 ;CHECK: vst1.16
   store <8 x i16> %v1, <8 x i16>* %vo, align 2
   ret void
@@ -281,7 +281,7 @@ entry:
   %vi  = bitcast i8* %pi to <4 x i32>*
   %vo  = bitcast i8* %po to <4 x i32>*
 ;CHECK: vld1.16
-  %v1 = load  <4 x i32>* %vi, align 2
+  %v1 = load  <4 x i32>,  <4 x i32>* %vi, align 2
 ;CHECK: vst1.16
   store <4 x i32> %v1, <4 x i32>* %vo, align 2
   ret void
@@ -299,7 +299,7 @@ entry:
   %vi  = bitcast i8* %pi to <2 x i64>*
   %vo  = bitcast i8* %po to <2 x i64>*
 ;CHECK: vld1.16
-  %v1 = load  <2 x i64>* %vi, align 2
+  %v1 = load  <2 x i64>,  <2 x i64>* %vi, align 2
 ;CHECK: vst1.16
   store <2 x i64> %v1, <2 x i64>* %vo, align 2
   ret void
@@ -317,7 +317,7 @@ entry:
   %vi  = bitcast i8* %pi to <4 x float>*
   %vo  = bitcast i8* %po to <4 x float>*
 ;CHECK: vld1.16
-  %v1 = load  <4 x float>* %vi, align 2
+  %v1 = load  <4 x float>,  <4 x float>* %vi, align 2
 ;CHECK: vst1.16
   store <4 x float> %v1, <4 x float>* %vo, align 2
   ret void
@@ -335,7 +335,7 @@ entry:
   %vi  = bitcast i8* %pi to <8 x i8>*
   %vo  = bitcast i8* %po to <8 x i8>*
 ;CHECK: vldr
-  %v1 = load  <8 x i8>* %vi, align 4
+  %v1 = load  <8 x i8>,  <8 x i8>* %vi, align 4
 ;CHECK: vstr
   store <8 x i8> %v1, <8 x i8>* %vo, align 4
   ret void
@@ -353,7 +353,7 @@ entry:
   %vi  = bitcast i8* %pi to <4 x i16>*
   %vo  = bitcast i8* %po to <4 x i16>*
 ;CHECK: vldr
-  %v1 = load  <4 x i16>* %vi, align 4
+  %v1 = load  <4 x i16>,  <4 x i16>* %vi, align 4
 ;CHECK: vstr
   store <4 x i16> %v1, <4 x i16>* %vo, align 4
   ret void
@@ -371,7 +371,7 @@ entry:
   %vi  = bitcast i8* %pi to <2 x i32>*
   %vo  = bitcast i8* %po to <2 x i32>*
 ;CHECK: vldr
-  %v1 = load  <2 x i32>* %vi, align 4
+  %v1 = load  <2 x i32>,  <2 x i32>* %vi, align 4
 ;CHECK: vstr
   store <2 x i32> %v1, <2 x i32>* %vo, align 4
   ret void
@@ -389,7 +389,7 @@ entry:
   %vi  = bitcast i8* %pi to <2 x float>*
   %vo  = bitcast i8* %po to <2 x float>*
 ;CHECK: vldr
-  %v1 = load  <2 x float>* %vi, align 4
+  %v1 = load  <2 x float>,  <2 x float>* %vi, align 4
 ;CHECK: vstr
   store <2 x float> %v1, <2 x float>* %vo, align 4
   ret void
@@ -407,7 +407,7 @@ entry:
   %vi  = bitcast i8* %pi to <16 x i8>*
   %vo  = bitcast i8* %po to <16 x i8>*
 ;CHECK: vld1.32
-  %v1 = load  <16 x i8>* %vi, align 4
+  %v1 = load  <16 x i8>,  <16 x i8>* %vi, align 4
 ;CHECK: vst1.32
   store <16 x i8> %v1, <16 x i8>* %vo, align 4
   ret void
@@ -425,7 +425,7 @@ entry:
   %vi  = bitcast i8* %pi to <8 x i16>*
   %vo  = bitcast i8* %po to <8 x i16>*
 ;CHECK: vld1.32
-  %v1 = load  <8 x i16>* %vi, align 4
+  %v1 = load  <8 x i16>,  <8 x i16>* %vi, align 4
 ;CHECK: vst1.32
   store <8 x i16> %v1, <8 x i16>* %vo, align 4
   ret void
@@ -443,7 +443,7 @@ entry:
   %vi  = bitcast i8* %pi to <4 x i32>*
   %vo  = bitcast i8* %po to <4 x i32>*
 ;CHECK: vld1.32
-  %v1 = load  <4 x i32>* %vi, align 4
+  %v1 = load  <4 x i32>,  <4 x i32>* %vi, align 4
 ;CHECK: vst1.32
   store <4 x i32> %v1, <4 x i32>* %vo, align 4
   ret void
@@ -461,7 +461,7 @@ entry:
   %vi  = bitcast i8* %pi to <2 x i64>*
   %vo  = bitcast i8* %po to <2 x i64>*
 ;CHECK: vld1.32
-  %v1 = load  <2 x i64>* %vi, align 4
+  %v1 = load  <2 x i64>,  <2 x i64>* %vi, align 4
 ;CHECK: vst1.32
   store <2 x i64> %v1, <2 x i64>* %vo, align 4
   ret void
@@ -479,7 +479,7 @@ entry:
   %vi  = bitcast i8* %pi to <4 x float>*
   %vo  = bitcast i8* %po to <4 x float>*
 ;CHECK: vld1.32
-  %v1 = load  <4 x float>* %vi, align 4
+  %v1 = load  <4 x float>,  <4 x float>* %vi, align 4
 ;CHECK: vst1.32
   store <4 x float> %v1, <4 x float>* %vo, align 4
   ret void
diff --git a/llvm/test/CodeGen/ARM/undef-sext.ll b/llvm/test/CodeGen/ARM/undef-sext.ll
index 7774fcd5981..bb06bcbaf44 100644
--- a/llvm/test/CodeGen/ARM/undef-sext.ll
+++ b/llvm/test/CodeGen/ARM/undef-sext.ll
@@ -9,6 +9,6 @@ entry:
 ; CHECK: bx lr
   %0 = sext i16 undef to i32
   %1 = getelementptr inbounds i32, i32* %a, i32 %0
-  %2 = load i32* %1, align 4
+  %2 = load i32, i32* %1, align 4
   ret i32 %2
 }
diff --git a/llvm/test/CodeGen/ARM/vaba.ll b/llvm/test/CodeGen/ARM/vaba.ll
index 6478b1843c6..4323f318446 100644
--- a/llvm/test/CodeGen/ARM/vaba.ll
+++ b/llvm/test/CodeGen/ARM/vaba.ll
@@ -3,9 +3,9 @@
 define <8 x i8> @vabas8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 ;CHECK-LABEL: vabas8:
 ;CHECK: vaba.s8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = load <8 x i8>* %C
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
+	%tmp3 = load <8 x i8>, <8 x i8>* %C
 	%tmp4 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp3)
 	%tmp5 = add <8 x i8> %tmp1, %tmp4
 	ret <8 x i8> %tmp5
@@ -14,9 +14,9 @@ define <8 x i8> @vabas8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 define <4 x i16> @vabas16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
 ;CHECK-LABEL: vabas16:
 ;CHECK: vaba.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = load <4 x i16>* %C
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
+	%tmp3 = load <4 x i16>, <4 x i16>* %C
 	%tmp4 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp2, <4 x i16> %tmp3)
 	%tmp5 = add <4 x i16> %tmp1, %tmp4
 	ret <4 x i16> %tmp5
@@ -25,9 +25,9 @@ define <4 x i16> @vabas16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
 define <2 x i32> @vabas32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
 ;CHECK-LABEL: vabas32:
 ;CHECK: vaba.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
-	%tmp3 = load <2 x i32>* %C
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
+	%tmp3 = load <2 x i32>, <2 x i32>* %C
 	%tmp4 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp2, <2 x i32> %tmp3)
 	%tmp5 = add <2 x i32> %tmp1, %tmp4
 	ret <2 x i32> %tmp5
@@ -36,9 +36,9 @@ define <2 x i32> @vabas32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
 define <8 x i8> @vabau8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 ;CHECK-LABEL: vabau8:
 ;CHECK: vaba.u8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = load <8 x i8>* %C
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
+	%tmp3 = load <8 x i8>, <8 x i8>* %C
 	%tmp4 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp3)
 	%tmp5 = add <8 x i8> %tmp1, %tmp4
 	ret <8 x i8> %tmp5
@@ -47,9 +47,9 @@ define <8 x i8> @vabau8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 define <4 x i16> @vabau16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
 ;CHECK-LABEL: vabau16:
 ;CHECK: vaba.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = load <4 x i16>* %C
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
+	%tmp3 = load <4 x i16>, <4 x i16>* %C
 	%tmp4 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp2, <4 x i16> %tmp3)
 	%tmp5 = add <4 x i16> %tmp1, %tmp4
 	ret <4 x i16> %tmp5
@@ -58,9 +58,9 @@ define <4 x i16> @vabau16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
 define <2 x i32> @vabau32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
 ;CHECK-LABEL: vabau32:
 ;CHECK: vaba.u32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
-	%tmp3 = load <2 x i32>* %C
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
+	%tmp3 = load <2 x i32>, <2 x i32>* %C
 	%tmp4 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp2, <2 x i32> %tmp3)
 	%tmp5 = add <2 x i32> %tmp1, %tmp4
 	ret <2 x i32> %tmp5
@@ -69,9 +69,9 @@ define <2 x i32> @vabau32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
 define <16 x i8> @vabaQs8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
 ;CHECK-LABEL: vabaQs8:
 ;CHECK: vaba.s8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
-	%tmp3 = load <16 x i8>* %C
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
+	%tmp3 = load <16 x i8>, <16 x i8>* %C
 	%tmp4 = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %tmp2, <16 x i8> %tmp3)
 	%tmp5 = add <16 x i8> %tmp1, %tmp4
 	ret <16 x i8> %tmp5
@@ -80,9 +80,9 @@ define <16 x i8> @vabaQs8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind
 define <8 x i16> @vabaQs16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
 ;CHECK-LABEL: vabaQs16:
 ;CHECK: vaba.s16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
-	%tmp3 = load <8 x i16>* %C
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
+	%tmp3 = load <8 x i16>, <8 x i16>* %C
 	%tmp4 = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %tmp2, <8 x i16> %tmp3)
 	%tmp5 = add <8 x i16> %tmp1, %tmp4
 	ret <8 x i16> %tmp5
@@ -91,9 +91,9 @@ define <8 x i16> @vabaQs16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind
 define <4 x i32> @vabaQs32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
 ;CHECK-LABEL: vabaQs32:
 ;CHECK: vaba.s32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
-	%tmp3 = load <4 x i32>* %C
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
+	%tmp3 = load <4 x i32>, <4 x i32>* %C
 	%tmp4 = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %tmp2, <4 x i32> %tmp3)
 	%tmp5 = add <4 x i32> %tmp1, %tmp4
 	ret <4 x i32> %tmp5
@@ -102,9 +102,9 @@ define <4 x i32> @vabaQs32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind
 define <16 x i8> @vabaQu8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
 ;CHECK-LABEL: vabaQu8:
 ;CHECK: vaba.u8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
-	%tmp3 = load <16 x i8>* %C
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
+	%tmp3 = load <16 x i8>, <16 x i8>* %C
 	%tmp4 = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %tmp2, <16 x i8> %tmp3)
 	%tmp5 = add <16 x i8> %tmp1, %tmp4
 	ret <16 x i8> %tmp5
@@ -113,9 +113,9 @@ define <16 x i8> @vabaQu8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind
 define <8 x i16> @vabaQu16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
 ;CHECK-LABEL: vabaQu16:
 ;CHECK: vaba.u16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
-	%tmp3 = load <8 x i16>* %C
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
+	%tmp3 = load <8 x i16>, <8 x i16>* %C
 	%tmp4 = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %tmp2, <8 x i16> %tmp3)
 	%tmp5 = add <8 x i16> %tmp1, %tmp4
 	ret <8 x i16> %tmp5
@@ -124,9 +124,9 @@ define <8 x i16> @vabaQu16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind
 define <4 x i32> @vabaQu32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
 ;CHECK-LABEL: vabaQu32:
 ;CHECK: vaba.u32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
-	%tmp3 = load <4 x i32>* %C
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
+	%tmp3 = load <4 x i32>, <4 x i32>* %C
 	%tmp4 = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %tmp2, <4 x i32> %tmp3)
 	%tmp5 = add <4 x i32> %tmp1, %tmp4
 	ret <4 x i32> %tmp5
@@ -151,9 +151,9 @@ declare <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32>, <4 x i32>) nounwind read
 define <8 x i16> @vabals8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 ;CHECK-LABEL: vabals8:
 ;CHECK: vabal.s8
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = load <8 x i8>* %C
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
+	%tmp3 = load <8 x i8>, <8 x i8>* %C
 	%tmp4 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp3)
 	%tmp5 = zext <8 x i8> %tmp4 to <8 x i16>
 	%tmp6 = add <8 x i16> %tmp1, %tmp5
@@ -163,9 +163,9 @@ define <8 x i16> @vabals8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 define <4 x i32> @vabals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
 ;CHECK-LABEL: vabals16:
 ;CHECK: vabal.s16
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = load <4 x i16>* %C
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
+	%tmp3 = load <4 x i16>, <4 x i16>* %C
 	%tmp4 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp2, <4 x i16> %tmp3)
 	%tmp5 = zext <4 x i16> %tmp4 to <4 x i32>
 	%tmp6 = add <4 x i32> %tmp1, %tmp5
@@ -175,9 +175,9 @@ define <4 x i32> @vabals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
 define <2 x i64> @vabals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
 ;CHECK-LABEL: vabals32:
 ;CHECK: vabal.s32
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i32>* %B
-	%tmp3 = load <2 x i32>* %C
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
+	%tmp3 = load <2 x i32>, <2 x i32>* %C
 	%tmp4 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp2, <2 x i32> %tmp3)
 	%tmp5 = zext <2 x i32> %tmp4 to <2 x i64>
 	%tmp6 = add <2 x i64> %tmp1, %tmp5
@@ -187,9 +187,9 @@ define <2 x i64> @vabals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
 define <8 x i16> @vabalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 ;CHECK-LABEL: vabalu8:
 ;CHECK: vabal.u8
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = load <8 x i8>* %C
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
+	%tmp3 = load <8 x i8>, <8 x i8>* %C
 	%tmp4 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp3)
 	%tmp5 = zext <8 x i8> %tmp4 to <8 x i16>
 	%tmp6 = add <8 x i16> %tmp1, %tmp5
@@ -199,9 +199,9 @@ define <8 x i16> @vabalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 define <4 x i32> @vabalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
 ;CHECK-LABEL: vabalu16:
 ;CHECK: vabal.u16
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = load <4 x i16>* %C
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
+	%tmp3 = load <4 x i16>, <4 x i16>* %C
 	%tmp4 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp2, <4 x i16> %tmp3)
 	%tmp5 = zext <4 x i16> %tmp4 to <4 x i32>
 	%tmp6 = add <4 x i32> %tmp1, %tmp5
@@ -211,9 +211,9 @@ define <4 x i32> @vabalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
 define <2 x i64> @vabalu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
 ;CHECK-LABEL: vabalu32:
 ;CHECK: vabal.u32
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i32>* %B
-	%tmp3 = load <2 x i32>* %C
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
+	%tmp3 = load <2 x i32>, <2 x i32>* %C
 	%tmp4 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp2, <2 x i32> %tmp3)
 	%tmp5 = zext <2 x i32> %tmp4 to <2 x i64>
 	%tmp6 = add <2 x i64> %tmp1, %tmp5
diff --git a/llvm/test/CodeGen/ARM/vabd.ll b/llvm/test/CodeGen/ARM/vabd.ll
index 9ba8be28c77..548b8a34046 100644
--- a/llvm/test/CodeGen/ARM/vabd.ll
+++ b/llvm/test/CodeGen/ARM/vabd.ll
@@ -3,8 +3,8 @@
 define <8 x i8> @vabds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vabds8:
 ;CHECK: vabd.s8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -12,8 +12,8 @@ define <8 x i8> @vabds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vabds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vabds16:
 ;CHECK: vabd.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -21,8 +21,8 @@ define <4 x i16> @vabds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @vabds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vabds32:
 ;CHECK: vabd.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -30,8 +30,8 @@ define <2 x i32> @vabds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <8 x i8> @vabdu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vabdu8:
 ;CHECK: vabd.u8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -39,8 +39,8 @@ define <8 x i8> @vabdu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vabdu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vabdu16:
 ;CHECK: vabd.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -48,8 +48,8 @@ define <4 x i16> @vabdu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @vabdu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vabdu32:
 ;CHECK: vabd.u32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -57,8 +57,8 @@ define <2 x i32> @vabdu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <2 x float> @vabdf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vabdf32:
 ;CHECK: vabd.f32
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = call <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x float> %tmp3
 }
@@ -66,8 +66,8 @@ define <2 x float> @vabdf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 define <16 x i8> @vabdQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vabdQs8:
 ;CHECK: vabd.s8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -75,8 +75,8 @@ define <16 x i8> @vabdQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 define <8 x i16> @vabdQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vabdQs16:
 ;CHECK: vabd.s16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -84,8 +84,8 @@ define <8 x i16> @vabdQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @vabdQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vabdQs32:
 ;CHECK: vabd.s32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -93,8 +93,8 @@ define <4 x i32> @vabdQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 define <16 x i8> @vabdQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vabdQu8:
 ;CHECK: vabd.u8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -102,8 +102,8 @@ define <16 x i8> @vabdQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 define <8 x i16> @vabdQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vabdQu16:
 ;CHECK: vabd.u16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -111,8 +111,8 @@ define <8 x i16> @vabdQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @vabdQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vabdQu32:
 ;CHECK: vabd.u32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -120,8 +120,8 @@ define <4 x i32> @vabdQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 define <4 x float> @vabdQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: vabdQf32:
 ;CHECK: vabd.f32
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %A
+	%tmp2 = load <4 x float>, <4 x float>* %B
 	%tmp3 = call <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
 	ret <4 x float> %tmp3
 }
@@ -149,8 +149,8 @@ declare <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float>, <4 x float>) nounwin
 define <8 x i16> @vabdls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vabdls8:
 ;CHECK: vabdl.s8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	%tmp4 = zext <8 x i8> %tmp3 to <8 x i16>
 	ret <8 x i16> %tmp4
@@ -159,8 +159,8 @@ define <8 x i16> @vabdls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i32> @vabdls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vabdls16:
 ;CHECK: vabdl.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	%tmp4 = zext <4 x i16> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -169,8 +169,8 @@ define <4 x i32> @vabdls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i64> @vabdls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vabdls32:
 ;CHECK: vabdl.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	%tmp4 = zext <2 x i32> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
@@ -179,8 +179,8 @@ define <2 x i64> @vabdls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <8 x i16> @vabdlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vabdlu8:
 ;CHECK: vabdl.u8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	%tmp4 = zext <8 x i8> %tmp3 to <8 x i16>
 	ret <8 x i16> %tmp4
@@ -189,8 +189,8 @@ define <8 x i16> @vabdlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i32> @vabdlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vabdlu16:
 ;CHECK: vabdl.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	%tmp4 = zext <4 x i16> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -199,8 +199,8 @@ define <4 x i32> @vabdlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i64> @vabdlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vabdlu32:
 ;CHECK: vabdl.u32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	%tmp4 = zext <2 x i32> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
diff --git a/llvm/test/CodeGen/ARM/vabs.ll b/llvm/test/CodeGen/ARM/vabs.ll
index 3a1aec86edf..38c6d6c28ae 100644
--- a/llvm/test/CodeGen/ARM/vabs.ll
+++ b/llvm/test/CodeGen/ARM/vabs.ll
@@ -3,7 +3,7 @@
 define <8 x i8> @vabss8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vabss8:
 ;CHECK: vabs.s8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vabs.v8i8(<8 x i8> %tmp1)
 	ret <8 x i8> %tmp2
 }
@@ -11,7 +11,7 @@ define <8 x i8> @vabss8(<8 x i8>* %A) nounwind {
 define <4 x i16> @vabss16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vabss16:
 ;CHECK: vabs.s16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vabs.v4i16(<4 x i16> %tmp1)
 	ret <4 x i16> %tmp2
 }
@@ -19,7 +19,7 @@ define <4 x i16> @vabss16(<4 x i16>* %A) nounwind {
 define <2 x i32> @vabss32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vabss32:
 ;CHECK: vabs.s32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vabs.v2i32(<2 x i32> %tmp1)
 	ret <2 x i32> %tmp2
 }
@@ -27,7 +27,7 @@ define <2 x i32> @vabss32(<2 x i32>* %A) nounwind {
 define <2 x float> @vabsf32(<2 x float>* %A) nounwind {
 ;CHECK-LABEL: vabsf32:
 ;CHECK: vabs.f32
-	%tmp1 = load <2 x float>* %A
+	%tmp1 = load <2 x float>, <2 x float>* %A
 	%tmp2 = call <2 x float> @llvm.fabs.v2f32(<2 x float> %tmp1)
 	ret <2 x float> %tmp2
 }
@@ -35,7 +35,7 @@ define <2 x float> @vabsf32(<2 x float>* %A) nounwind {
 define <16 x i8> @vabsQs8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: vabsQs8:
 ;CHECK: vabs.s8
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vabs.v16i8(<16 x i8> %tmp1)
 	ret <16 x i8> %tmp2
 }
@@ -43,7 +43,7 @@ define <16 x i8> @vabsQs8(<16 x i8>* %A) nounwind {
 define <8 x i16> @vabsQs16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vabsQs16:
 ;CHECK: vabs.s16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16> %tmp1)
 	ret <8 x i16> %tmp2
 }
@@ -51,7 +51,7 @@ define <8 x i16> @vabsQs16(<8 x i16>* %A) nounwind {
 define <4 x i32> @vabsQs32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vabsQs32:
 ;CHECK: vabs.s32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32> %tmp1)
 	ret <4 x i32> %tmp2
 }
@@ -59,7 +59,7 @@ define <4 x i32> @vabsQs32(<4 x i32>* %A) nounwind {
 define <4 x float> @vabsQf32(<4 x float>* %A) nounwind {
 ;CHECK-LABEL: vabsQf32:
 ;CHECK: vabs.f32
-	%tmp1 = load <4 x float>* %A
+	%tmp1 = load <4 x float>, <4 x float>* %A
 	%tmp2 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %tmp1)
 	ret <4 x float> %tmp2
 }
@@ -77,7 +77,7 @@ declare <4 x float> @llvm.fabs.v4f32(<4 x float>) nounwind readnone
 define <8 x i8> @vqabss8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vqabss8:
 ;CHECK: vqabs.s8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqabs.v8i8(<8 x i8> %tmp1)
 	ret <8 x i8> %tmp2
 }
@@ -85,7 +85,7 @@ define <8 x i8> @vqabss8(<8 x i8>* %A) nounwind {
 define <4 x i16> @vqabss16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vqabss16:
 ;CHECK: vqabs.s16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16> %tmp1)
 	ret <4 x i16> %tmp2
 }
@@ -93,7 +93,7 @@ define <4 x i16> @vqabss16(<4 x i16>* %A) nounwind {
 define <2 x i32> @vqabss32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vqabss32:
 ;CHECK: vqabs.s32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32> %tmp1)
 	ret <2 x i32> %tmp2
 }
@@ -101,7 +101,7 @@ define <2 x i32> @vqabss32(<2 x i32>* %A) nounwind {
 define <16 x i8> @vqabsQs8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: vqabsQs8:
 ;CHECK: vqabs.s8
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8> %tmp1)
 	ret <16 x i8> %tmp2
 }
@@ -109,7 +109,7 @@ define <16 x i8> @vqabsQs8(<16 x i8>* %A) nounwind {
 define <8 x i16> @vqabsQs16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vqabsQs16:
 ;CHECK: vqabs.s16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16> %tmp1)
 	ret <8 x i16> %tmp2
 }
@@ -117,7 +117,7 @@ define <8 x i16> @vqabsQs16(<8 x i16>* %A) nounwind {
 define <4 x i32> @vqabsQs32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vqabsQs32:
 ;CHECK: vqabs.s32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32> %tmp1)
 	ret <4 x i32> %tmp2
 }
diff --git a/llvm/test/CodeGen/ARM/vadd.ll b/llvm/test/CodeGen/ARM/vadd.ll
index 86b0d029701..dd35dd1ccfb 100644
--- a/llvm/test/CodeGen/ARM/vadd.ll
+++ b/llvm/test/CodeGen/ARM/vadd.ll
@@ -3,8 +3,8 @@
 define <8 x i8> @vaddi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vaddi8:
 ;CHECK: vadd.i8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = add <8 x i8> %tmp1, %tmp2
 	ret <8 x i8> %tmp3
 }
@@ -12,8 +12,8 @@ define <8 x i8> @vaddi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vaddi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vaddi16:
 ;CHECK: vadd.i16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = add <4 x i16> %tmp1, %tmp2
 	ret <4 x i16> %tmp3
 }
@@ -21,8 +21,8 @@ define <4 x i16> @vaddi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @vaddi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vaddi32:
 ;CHECK: vadd.i32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = add <2 x i32> %tmp1, %tmp2
 	ret <2 x i32> %tmp3
 }
@@ -30,8 +30,8 @@ define <2 x i32> @vaddi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <1 x i64> @vaddi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: vaddi64:
 ;CHECK: vadd.i64
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = add <1 x i64> %tmp1, %tmp2
 	ret <1 x i64> %tmp3
 }
@@ -39,8 +39,8 @@ define <1 x i64> @vaddi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 define <2 x float> @vaddf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vaddf32:
 ;CHECK: vadd.f32
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = fadd <2 x float> %tmp1, %tmp2
 	ret <2 x float> %tmp3
 }
@@ -48,8 +48,8 @@ define <2 x float> @vaddf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 define <16 x i8> @vaddQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vaddQi8:
 ;CHECK: vadd.i8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = add <16 x i8> %tmp1, %tmp2
 	ret <16 x i8> %tmp3
 }
@@ -57,8 +57,8 @@ define <16 x i8> @vaddQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 define <8 x i16> @vaddQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vaddQi16:
 ;CHECK: vadd.i16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = add <8 x i16> %tmp1, %tmp2
 	ret <8 x i16> %tmp3
 }
@@ -66,8 +66,8 @@ define <8 x i16> @vaddQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @vaddQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vaddQi32:
 ;CHECK: vadd.i32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = add <4 x i32> %tmp1, %tmp2
 	ret <4 x i32> %tmp3
 }
@@ -75,8 +75,8 @@ define <4 x i32> @vaddQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 define <2 x i64> @vaddQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: vaddQi64:
 ;CHECK: vadd.i64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = add <2 x i64> %tmp1, %tmp2
 	ret <2 x i64> %tmp3
 }
@@ -84,8 +84,8 @@ define <2 x i64> @vaddQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 define <4 x float> @vaddQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: vaddQf32:
 ;CHECK: vadd.f32
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %A
+	%tmp2 = load <4 x float>, <4 x float>* %B
 	%tmp3 = fadd <4 x float> %tmp1, %tmp2
 	ret <4 x float> %tmp3
 }
@@ -93,8 +93,8 @@ define <4 x float> @vaddQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 define <8 x i8> @vraddhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vraddhni16:
 ;CHECK: vraddhn.i16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -102,8 +102,8 @@ define <8 x i8> @vraddhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 define <4 x i16> @vraddhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vraddhni32:
 ;CHECK: vraddhn.i32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -111,8 +111,8 @@ define <4 x i16> @vraddhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 define <2 x i32> @vraddhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: vraddhni64:
 ;CHECK: vraddhn.i64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -151,8 +151,8 @@ define <2 x i32> @vaddhni64_natural(<2 x i64> %A, <2 x i64> %B) nounwind {
 define <8 x i16> @vaddls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vaddls8:
 ;CHECK: vaddl.s8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = sext <8 x i8> %tmp1 to <8 x i16>
 	%tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
 	%tmp5 = add <8 x i16> %tmp3, %tmp4
@@ -162,8 +162,8 @@ define <8 x i16> @vaddls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i32> @vaddls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vaddls16:
 ;CHECK: vaddl.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = sext <4 x i16> %tmp1 to <4 x i32>
 	%tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
 	%tmp5 = add <4 x i32> %tmp3, %tmp4
@@ -173,8 +173,8 @@ define <4 x i32> @vaddls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i64> @vaddls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vaddls32:
 ;CHECK: vaddl.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = sext <2 x i32> %tmp1 to <2 x i64>
 	%tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
 	%tmp5 = add <2 x i64> %tmp3, %tmp4
@@ -184,8 +184,8 @@ define <2 x i64> @vaddls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <8 x i16> @vaddlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vaddlu8:
 ;CHECK: vaddl.u8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = zext <8 x i8> %tmp1 to <8 x i16>
 	%tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
 	%tmp5 = add <8 x i16> %tmp3, %tmp4
@@ -195,8 +195,8 @@ define <8 x i16> @vaddlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i32> @vaddlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vaddlu16:
 ;CHECK: vaddl.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = zext <4 x i16> %tmp1 to <4 x i32>
 	%tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
 	%tmp5 = add <4 x i32> %tmp3, %tmp4
@@ -206,8 +206,8 @@ define <4 x i32> @vaddlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i64> @vaddlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vaddlu32:
 ;CHECK: vaddl.u32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = zext <2 x i32> %tmp1 to <2 x i64>
 	%tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
 	%tmp5 = add <2 x i64> %tmp3, %tmp4
@@ -217,8 +217,8 @@ define <2 x i64> @vaddlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <8 x i16> @vaddws8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vaddws8:
 ;CHECK: vaddw.s8
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = sext <8 x i8> %tmp2 to <8 x i16>
 	%tmp4 = add <8 x i16> %tmp1, %tmp3
 	ret <8 x i16> %tmp4
@@ -227,8 +227,8 @@ define <8 x i16> @vaddws8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
 define <4 x i32> @vaddws16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vaddws16:
 ;CHECK: vaddw.s16
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = sext <4 x i16> %tmp2 to <4 x i32>
 	%tmp4 = add <4 x i32> %tmp1, %tmp3
 	ret <4 x i32> %tmp4
@@ -237,8 +237,8 @@ define <4 x i32> @vaddws16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
 define <2 x i64> @vaddws32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vaddws32:
 ;CHECK: vaddw.s32
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = sext <2 x i32> %tmp2 to <2 x i64>
 	%tmp4 = add <2 x i64> %tmp1, %tmp3
 	ret <2 x i64> %tmp4
@@ -247,8 +247,8 @@ define <2 x i64> @vaddws32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
 define <8 x i16> @vaddwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vaddwu8:
 ;CHECK: vaddw.u8
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = zext <8 x i8> %tmp2 to <8 x i16>
 	%tmp4 = add <8 x i16> %tmp1, %tmp3
 	ret <8 x i16> %tmp4
@@ -257,8 +257,8 @@ define <8 x i16> @vaddwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
 define <4 x i32> @vaddwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vaddwu16:
 ;CHECK: vaddw.u16
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = zext <4 x i16> %tmp2 to <4 x i32>
 	%tmp4 = add <4 x i32> %tmp1, %tmp3
 	ret <4 x i32> %tmp4
@@ -267,8 +267,8 @@ define <4 x i32> @vaddwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
 define <2 x i64> @vaddwu32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vaddwu32:
 ;CHECK: vaddw.u32
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = zext <2 x i32> %tmp2 to <2 x i64>
 	%tmp4 = add <2 x i64> %tmp1, %tmp3
 	ret <2 x i64> %tmp4
diff --git a/llvm/test/CodeGen/ARM/vargs_align.ll b/llvm/test/CodeGen/ARM/vargs_align.ll
index 3abb57ee51f..d19abd5ef80 100644
--- a/llvm/test/CodeGen/ARM/vargs_align.ll
+++ b/llvm/test/CodeGen/ARM/vargs_align.ll
@@ -8,13 +8,13 @@ entry:
 	%tmp = alloca i32, align 4		; <i32*> [#uses=2]
 	store i32 %a, i32* %a_addr
 	store i32 0, i32* %tmp
-	%tmp1 = load i32* %tmp		; <i32> [#uses=1]
+	%tmp1 = load i32, i32* %tmp		; <i32> [#uses=1]
 	store i32 %tmp1, i32* %retval
 	call void @llvm.va_start(i8* null)
 	br label %return
 
 return:		; preds = %entry
-	%retval2 = load i32* %retval		; <i32> [#uses=1]
+	%retval2 = load i32, i32* %retval		; <i32> [#uses=1]
 	ret i32 %retval2
 ; EABI: add sp, sp, #12
 ; EABI: add sp, sp, #16
diff --git a/llvm/test/CodeGen/ARM/vbits.ll b/llvm/test/CodeGen/ARM/vbits.ll
index dfeaacf2085..db9bc6ccdd0 100644
--- a/llvm/test/CodeGen/ARM/vbits.ll
+++ b/llvm/test/CodeGen/ARM/vbits.ll
@@ -3,8 +3,8 @@
 define <8 x i8> @v_andi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: v_andi8:
 ;CHECK: vand
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = and <8 x i8> %tmp1, %tmp2
 	ret <8 x i8> %tmp3
 }
@@ -12,8 +12,8 @@ define <8 x i8> @v_andi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @v_andi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: v_andi16:
 ;CHECK: vand
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = and <4 x i16> %tmp1, %tmp2
 	ret <4 x i16> %tmp3
 }
@@ -21,8 +21,8 @@ define <4 x i16> @v_andi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @v_andi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: v_andi32:
 ;CHECK: vand
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = and <2 x i32> %tmp1, %tmp2
 	ret <2 x i32> %tmp3
 }
@@ -30,8 +30,8 @@ define <2 x i32> @v_andi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <1 x i64> @v_andi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: v_andi64:
 ;CHECK: vand
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = and <1 x i64> %tmp1, %tmp2
 	ret <1 x i64> %tmp3
 }
@@ -39,8 +39,8 @@ define <1 x i64> @v_andi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 define <16 x i8> @v_andQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: v_andQi8:
 ;CHECK: vand
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = and <16 x i8> %tmp1, %tmp2
 	ret <16 x i8> %tmp3
 }
@@ -48,8 +48,8 @@ define <16 x i8> @v_andQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 define <8 x i16> @v_andQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: v_andQi16:
 ;CHECK: vand
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = and <8 x i16> %tmp1, %tmp2
 	ret <8 x i16> %tmp3
 }
@@ -57,8 +57,8 @@ define <8 x i16> @v_andQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @v_andQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: v_andQi32:
 ;CHECK: vand
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = and <4 x i32> %tmp1, %tmp2
 	ret <4 x i32> %tmp3
 }
@@ -66,8 +66,8 @@ define <4 x i32> @v_andQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 define <2 x i64> @v_andQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: v_andQi64:
 ;CHECK: vand
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = and <2 x i64> %tmp1, %tmp2
 	ret <2 x i64> %tmp3
 }
@@ -75,8 +75,8 @@ define <2 x i64> @v_andQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 define <8 x i8> @v_bici8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: v_bici8:
 ;CHECK: vbic
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = xor <8 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
 	%tmp4 = and <8 x i8> %tmp1, %tmp3
 	ret <8 x i8> %tmp4
@@ -85,8 +85,8 @@ define <8 x i8> @v_bici8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @v_bici16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: v_bici16:
 ;CHECK: vbic
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = xor <4 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1 >
 	%tmp4 = and <4 x i16> %tmp1, %tmp3
 	ret <4 x i16> %tmp4
@@ -95,8 +95,8 @@ define <4 x i16> @v_bici16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @v_bici32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: v_bici32:
 ;CHECK: vbic
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = xor <2 x i32> %tmp2, < i32 -1, i32 -1 >
 	%tmp4 = and <2 x i32> %tmp1, %tmp3
 	ret <2 x i32> %tmp4
@@ -105,8 +105,8 @@ define <2 x i32> @v_bici32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <1 x i64> @v_bici64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: v_bici64:
 ;CHECK: vbic
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = xor <1 x i64> %tmp2, < i64 -1 >
 	%tmp4 = and <1 x i64> %tmp1, %tmp3
 	ret <1 x i64> %tmp4
@@ -115,8 +115,8 @@ define <1 x i64> @v_bici64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 define <16 x i8> @v_bicQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: v_bicQi8:
 ;CHECK: vbic
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = xor <16 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
 	%tmp4 = and <16 x i8> %tmp1, %tmp3
 	ret <16 x i8> %tmp4
@@ -125,8 +125,8 @@ define <16 x i8> @v_bicQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 define <8 x i16> @v_bicQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: v_bicQi16:
 ;CHECK: vbic
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = xor <8 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
 	%tmp4 = and <8 x i16> %tmp1, %tmp3
 	ret <8 x i16> %tmp4
@@ -135,8 +135,8 @@ define <8 x i16> @v_bicQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @v_bicQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: v_bicQi32:
 ;CHECK: vbic
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = xor <4 x i32> %tmp2, < i32 -1, i32 -1, i32 -1, i32 -1 >
 	%tmp4 = and <4 x i32> %tmp1, %tmp3
 	ret <4 x i32> %tmp4
@@ -145,8 +145,8 @@ define <4 x i32> @v_bicQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 define <2 x i64> @v_bicQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: v_bicQi64:
 ;CHECK: vbic
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = xor <2 x i64> %tmp2, < i64 -1, i64 -1 >
 	%tmp4 = and <2 x i64> %tmp1, %tmp3
 	ret <2 x i64> %tmp4
@@ -155,8 +155,8 @@ define <2 x i64> @v_bicQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 define <8 x i8> @v_eori8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: v_eori8:
 ;CHECK: veor
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = xor <8 x i8> %tmp1, %tmp2
 	ret <8 x i8> %tmp3
 }
@@ -164,8 +164,8 @@ define <8 x i8> @v_eori8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @v_eori16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: v_eori16:
 ;CHECK: veor
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = xor <4 x i16> %tmp1, %tmp2
 	ret <4 x i16> %tmp3
 }
@@ -173,8 +173,8 @@ define <4 x i16> @v_eori16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @v_eori32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: v_eori32:
 ;CHECK: veor
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = xor <2 x i32> %tmp1, %tmp2
 	ret <2 x i32> %tmp3
 }
@@ -182,8 +182,8 @@ define <2 x i32> @v_eori32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <1 x i64> @v_eori64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: v_eori64:
 ;CHECK: veor
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = xor <1 x i64> %tmp1, %tmp2
 	ret <1 x i64> %tmp3
 }
@@ -191,8 +191,8 @@ define <1 x i64> @v_eori64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 define <16 x i8> @v_eorQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: v_eorQi8:
 ;CHECK: veor
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = xor <16 x i8> %tmp1, %tmp2
 	ret <16 x i8> %tmp3
 }
@@ -200,8 +200,8 @@ define <16 x i8> @v_eorQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 define <8 x i16> @v_eorQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: v_eorQi16:
 ;CHECK: veor
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = xor <8 x i16> %tmp1, %tmp2
 	ret <8 x i16> %tmp3
 }
@@ -209,8 +209,8 @@ define <8 x i16> @v_eorQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @v_eorQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: v_eorQi32:
 ;CHECK: veor
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = xor <4 x i32> %tmp1, %tmp2
 	ret <4 x i32> %tmp3
 }
@@ -218,8 +218,8 @@ define <4 x i32> @v_eorQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 define <2 x i64> @v_eorQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: v_eorQi64:
 ;CHECK: veor
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = xor <2 x i64> %tmp1, %tmp2
 	ret <2 x i64> %tmp3
 }
@@ -227,7 +227,7 @@ define <2 x i64> @v_eorQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 define <8 x i8> @v_mvni8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: v_mvni8:
 ;CHECK: vmvn
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = xor <8 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
 	ret <8 x i8> %tmp2
 }
@@ -235,7 +235,7 @@ define <8 x i8> @v_mvni8(<8 x i8>* %A) nounwind {
 define <4 x i16> @v_mvni16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: v_mvni16:
 ;CHECK: vmvn
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = xor <4 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1 >
 	ret <4 x i16> %tmp2
 }
@@ -243,7 +243,7 @@ define <4 x i16> @v_mvni16(<4 x i16>* %A) nounwind {
 define <2 x i32> @v_mvni32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: v_mvni32:
 ;CHECK: vmvn
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = xor <2 x i32> %tmp1, < i32 -1, i32 -1 >
 	ret <2 x i32> %tmp2
 }
@@ -251,7 +251,7 @@ define <2 x i32> @v_mvni32(<2 x i32>* %A) nounwind {
 define <1 x i64> @v_mvni64(<1 x i64>* %A) nounwind {
 ;CHECK-LABEL: v_mvni64:
 ;CHECK: vmvn
-	%tmp1 = load <1 x i64>* %A
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
 	%tmp2 = xor <1 x i64> %tmp1, < i64 -1 >
 	ret <1 x i64> %tmp2
 }
@@ -259,7 +259,7 @@ define <1 x i64> @v_mvni64(<1 x i64>* %A) nounwind {
 define <16 x i8> @v_mvnQi8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: v_mvnQi8:
 ;CHECK: vmvn
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = xor <16 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
 	ret <16 x i8> %tmp2
 }
@@ -267,7 +267,7 @@ define <16 x i8> @v_mvnQi8(<16 x i8>* %A) nounwind {
 define <8 x i16> @v_mvnQi16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: v_mvnQi16:
 ;CHECK: vmvn
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = xor <8 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
 	ret <8 x i16> %tmp2
 }
@@ -275,7 +275,7 @@ define <8 x i16> @v_mvnQi16(<8 x i16>* %A) nounwind {
 define <4 x i32> @v_mvnQi32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: v_mvnQi32:
 ;CHECK: vmvn
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = xor <4 x i32> %tmp1, < i32 -1, i32 -1, i32 -1, i32 -1 >
 	ret <4 x i32> %tmp2
 }
@@ -283,7 +283,7 @@ define <4 x i32> @v_mvnQi32(<4 x i32>* %A) nounwind {
 define <2 x i64> @v_mvnQi64(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: v_mvnQi64:
 ;CHECK: vmvn
-	%tmp1 = load <2 x i64>* %A
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = xor <2 x i64> %tmp1, < i64 -1, i64 -1 >
 	ret <2 x i64> %tmp2
 }
@@ -291,8 +291,8 @@ define <2 x i64> @v_mvnQi64(<2 x i64>* %A) nounwind {
 define <8 x i8> @v_orri8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: v_orri8:
 ;CHECK: vorr
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = or <8 x i8> %tmp1, %tmp2
 	ret <8 x i8> %tmp3
 }
@@ -300,8 +300,8 @@ define <8 x i8> @v_orri8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @v_orri16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: v_orri16:
 ;CHECK: vorr
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = or <4 x i16> %tmp1, %tmp2
 	ret <4 x i16> %tmp3
 }
@@ -309,8 +309,8 @@ define <4 x i16> @v_orri16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @v_orri32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: v_orri32:
 ;CHECK: vorr
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = or <2 x i32> %tmp1, %tmp2
 	ret <2 x i32> %tmp3
 }
@@ -318,8 +318,8 @@ define <2 x i32> @v_orri32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <1 x i64> @v_orri64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: v_orri64:
 ;CHECK: vorr
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = or <1 x i64> %tmp1, %tmp2
 	ret <1 x i64> %tmp3
 }
@@ -327,8 +327,8 @@ define <1 x i64> @v_orri64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 define <16 x i8> @v_orrQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: v_orrQi8:
 ;CHECK: vorr
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = or <16 x i8> %tmp1, %tmp2
 	ret <16 x i8> %tmp3
 }
@@ -336,8 +336,8 @@ define <16 x i8> @v_orrQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 define <8 x i16> @v_orrQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: v_orrQi16:
 ;CHECK: vorr
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = or <8 x i16> %tmp1, %tmp2
 	ret <8 x i16> %tmp3
 }
@@ -345,8 +345,8 @@ define <8 x i16> @v_orrQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @v_orrQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: v_orrQi32:
 ;CHECK: vorr
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = or <4 x i32> %tmp1, %tmp2
 	ret <4 x i32> %tmp3
 }
@@ -354,8 +354,8 @@ define <4 x i32> @v_orrQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 define <2 x i64> @v_orrQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: v_orrQi64:
 ;CHECK: vorr
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = or <2 x i64> %tmp1, %tmp2
 	ret <2 x i64> %tmp3
 }
@@ -363,8 +363,8 @@ define <2 x i64> @v_orrQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 define <8 x i8> @v_orni8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: v_orni8:
 ;CHECK: vorn
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = xor <8 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
 	%tmp4 = or <8 x i8> %tmp1, %tmp3
 	ret <8 x i8> %tmp4
@@ -373,8 +373,8 @@ define <8 x i8> @v_orni8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @v_orni16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: v_orni16:
 ;CHECK: vorn
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = xor <4 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1 >
 	%tmp4 = or <4 x i16> %tmp1, %tmp3
 	ret <4 x i16> %tmp4
@@ -383,8 +383,8 @@ define <4 x i16> @v_orni16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @v_orni32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: v_orni32:
 ;CHECK: vorn
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = xor <2 x i32> %tmp2, < i32 -1, i32 -1 >
 	%tmp4 = or <2 x i32> %tmp1, %tmp3
 	ret <2 x i32> %tmp4
@@ -393,8 +393,8 @@ define <2 x i32> @v_orni32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <1 x i64> @v_orni64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: v_orni64:
 ;CHECK: vorn
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = xor <1 x i64> %tmp2, < i64 -1 >
 	%tmp4 = or <1 x i64> %tmp1, %tmp3
 	ret <1 x i64> %tmp4
@@ -403,8 +403,8 @@ define <1 x i64> @v_orni64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 define <16 x i8> @v_ornQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: v_ornQi8:
 ;CHECK: vorn
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = xor <16 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
 	%tmp4 = or <16 x i8> %tmp1, %tmp3
 	ret <16 x i8> %tmp4
@@ -413,8 +413,8 @@ define <16 x i8> @v_ornQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 define <8 x i16> @v_ornQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: v_ornQi16:
 ;CHECK: vorn
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = xor <8 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
 	%tmp4 = or <8 x i16> %tmp1, %tmp3
 	ret <8 x i16> %tmp4
@@ -423,8 +423,8 @@ define <8 x i16> @v_ornQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @v_ornQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: v_ornQi32:
 ;CHECK: vorn
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = xor <4 x i32> %tmp2, < i32 -1, i32 -1, i32 -1, i32 -1 >
 	%tmp4 = or <4 x i32> %tmp1, %tmp3
 	ret <4 x i32> %tmp4
@@ -433,8 +433,8 @@ define <4 x i32> @v_ornQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 define <2 x i64> @v_ornQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: v_ornQi64:
 ;CHECK: vorn
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = xor <2 x i64> %tmp2, < i64 -1, i64 -1 >
 	%tmp4 = or <2 x i64> %tmp1, %tmp3
 	ret <2 x i64> %tmp4
@@ -443,8 +443,8 @@ define <2 x i64> @v_ornQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 define <8 x i8> @vtsti8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vtsti8:
 ;CHECK: vtst.8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = and <8 x i8> %tmp1, %tmp2
 	%tmp4 = icmp ne <8 x i8> %tmp3, zeroinitializer
         %tmp5 = sext <8 x i1> %tmp4 to <8 x i8>
@@ -454,8 +454,8 @@ define <8 x i8> @vtsti8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vtsti16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vtsti16:
 ;CHECK: vtst.16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = and <4 x i16> %tmp1, %tmp2
 	%tmp4 = icmp ne <4 x i16> %tmp3, zeroinitializer
         %tmp5 = sext <4 x i1> %tmp4 to <4 x i16>
@@ -465,8 +465,8 @@ define <4 x i16> @vtsti16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @vtsti32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vtsti32:
 ;CHECK: vtst.32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = and <2 x i32> %tmp1, %tmp2
 	%tmp4 = icmp ne <2 x i32> %tmp3, zeroinitializer
         %tmp5 = sext <2 x i1> %tmp4 to <2 x i32>
@@ -476,8 +476,8 @@ define <2 x i32> @vtsti32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <16 x i8> @vtstQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vtstQi8:
 ;CHECK: vtst.8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = and <16 x i8> %tmp1, %tmp2
 	%tmp4 = icmp ne <16 x i8> %tmp3, zeroinitializer
         %tmp5 = sext <16 x i1> %tmp4 to <16 x i8>
@@ -487,8 +487,8 @@ define <16 x i8> @vtstQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 define <8 x i16> @vtstQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vtstQi16:
 ;CHECK: vtst.16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = and <8 x i16> %tmp1, %tmp2
 	%tmp4 = icmp ne <8 x i16> %tmp3, zeroinitializer
         %tmp5 = sext <8 x i1> %tmp4 to <8 x i16>
@@ -498,8 +498,8 @@ define <8 x i16> @vtstQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @vtstQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vtstQi32:
 ;CHECK: vtst.32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = and <4 x i32> %tmp1, %tmp2
 	%tmp4 = icmp ne <4 x i32> %tmp3, zeroinitializer
         %tmp5 = sext <4 x i1> %tmp4 to <4 x i32>
@@ -511,7 +511,7 @@ define <8 x i8> @v_orrimm(<8 x i8>* %A) nounwind {
 ; CHECK-NOT: vmov
 ; CHECK-NOT: vmvn
 ; CHECK: vorr
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp3 = or <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1>
 	ret <8 x i8> %tmp3
 }
@@ -521,7 +521,7 @@ define <16 x i8> @v_orrimmQ(<16 x i8>* %A) nounwind {
 ; CHECK-NOT: vmov
 ; CHECK-NOT: vmvn
 ; CHECK: vorr
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp3 = or <16 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1>
 	ret <16 x i8> %tmp3
 }
@@ -531,7 +531,7 @@ define <8 x i8> @v_bicimm(<8 x i8>* %A) nounwind {
 ; CHECK-NOT: vmov
 ; CHECK-NOT: vmvn
 ; CHECK: vbic
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp3 = and <8 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0 >
 	ret <8 x i8> %tmp3
 }
@@ -541,7 +541,7 @@ define <16 x i8> @v_bicimmQ(<16 x i8>* %A) nounwind {
 ; CHECK-NOT: vmov
 ; CHECK-NOT: vmvn
 ; CHECK: vbic
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp3 = and <16 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0 >
 	ret <16 x i8> %tmp3
 }
diff --git a/llvm/test/CodeGen/ARM/vbsl-constant.ll b/llvm/test/CodeGen/ARM/vbsl-constant.ll
index 5e033fe2a64..6bcbbc8fa87 100644
--- a/llvm/test/CodeGen/ARM/vbsl-constant.ll
+++ b/llvm/test/CodeGen/ARM/vbsl-constant.ll
@@ -5,9 +5,9 @@ define <8 x i8> @v_bsli8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 ;CHECK: vldr
 ;CHECK: vldr
 ;CHECK: vbsl
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = load <8 x i8>* %C
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
+	%tmp3 = load <8 x i8>, <8 x i8>* %C
 	%tmp4 = and <8 x i8> %tmp1, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 	%tmp6 = and <8 x i8> %tmp3, <i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4>
 	%tmp7 = or <8 x i8> %tmp4, %tmp6
@@ -19,9 +19,9 @@ define <4 x i16> @v_bsli16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
 ;CHECK: vldr
 ;CHECK: vldr
 ;CHECK: vbsl
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = load <4 x i16>* %C
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
+	%tmp3 = load <4 x i16>, <4 x i16>* %C
 	%tmp4 = and <4 x i16> %tmp1, <i16 3, i16 3, i16 3, i16 3>
 	%tmp6 = and <4 x i16> %tmp3, <i16 -4, i16 -4, i16 -4, i16 -4>
 	%tmp7 = or <4 x i16> %tmp4, %tmp6
@@ -33,9 +33,9 @@ define <2 x i32> @v_bsli32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
 ;CHECK: vldr
 ;CHECK: vldr
 ;CHECK: vbsl
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
-	%tmp3 = load <2 x i32>* %C
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
+	%tmp3 = load <2 x i32>, <2 x i32>* %C
 	%tmp4 = and <2 x i32> %tmp1, <i32 3, i32 3>
 	%tmp6 = and <2 x i32> %tmp3, <i32 -4, i32 -4>
 	%tmp7 = or <2 x i32> %tmp4, %tmp6
@@ -48,9 +48,9 @@ define <1 x i64> @v_bsli64(<1 x i64>* %A, <1 x i64>* %B, <1 x i64>* %C) nounwind
 ;CHECK: vldr
 ;CHECK: vldr
 ;CHECK: vbsl
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
-	%tmp3 = load <1 x i64>* %C
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
+	%tmp3 = load <1 x i64>, <1 x i64>* %C
 	%tmp4 = and <1 x i64> %tmp1, <i64 3>
 	%tmp6 = and <1 x i64> %tmp3, <i64 -4>
 	%tmp7 = or <1 x i64> %tmp4, %tmp6
@@ -62,9 +62,9 @@ define <16 x i8> @v_bslQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind
 ;CHECK: vld1.32
 ;CHECK: vld1.32
 ;CHECK: vbsl
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
-	%tmp3 = load <16 x i8>* %C
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
+	%tmp3 = load <16 x i8>, <16 x i8>* %C
 	%tmp4 = and <16 x i8> %tmp1, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 	%tmp6 = and <16 x i8> %tmp3, <i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4>
 	%tmp7 = or <16 x i8> %tmp4, %tmp6
@@ -76,9 +76,9 @@ define <8 x i16> @v_bslQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwin
 ;CHECK: vld1.32
 ;CHECK: vld1.32
 ;CHECK: vbsl
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
-	%tmp3 = load <8 x i16>* %C
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
+	%tmp3 = load <8 x i16>, <8 x i16>* %C
 	%tmp4 = and <8 x i16> %tmp1, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 	%tmp6 = and <8 x i16> %tmp3, <i16 -4, i16 -4, i16 -4, i16 -4, i16 -4, i16 -4, i16 -4, i16 -4>
 	%tmp7 = or <8 x i16> %tmp4, %tmp6
@@ -90,9 +90,9 @@ define <4 x i32> @v_bslQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwin
 ;CHECK: vld1.32
 ;CHECK: vld1.32
 ;CHECK: vbsl
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
-	%tmp3 = load <4 x i32>* %C
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
+	%tmp3 = load <4 x i32>, <4 x i32>* %C
 	%tmp4 = and <4 x i32> %tmp1, <i32 3, i32 3, i32 3, i32 3>
 	%tmp6 = and <4 x i32> %tmp3, <i32 -4, i32 -4, i32 -4, i32 -4>
 	%tmp7 = or <4 x i32> %tmp4, %tmp6
@@ -105,9 +105,9 @@ define <2 x i64> @v_bslQi64(<2 x i64>* %A, <2 x i64>* %B, <2 x i64>* %C) nounwin
 ;CHECK: vld1.32
 ;CHECK: vld1.64
 ;CHECK: vbsl
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
-	%tmp3 = load <2 x i64>* %C
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
+	%tmp3 = load <2 x i64>, <2 x i64>* %C
 	%tmp4 = and <2 x i64> %tmp1, <i64 3, i64 3>
 	%tmp6 = and <2 x i64> %tmp3, <i64 -4, i64 -4>
 	%tmp7 = or <2 x i64> %tmp4, %tmp6
diff --git a/llvm/test/CodeGen/ARM/vbsl.ll b/llvm/test/CodeGen/ARM/vbsl.ll
index ddc37cc8244..6812dd90a10 100644
--- a/llvm/test/CodeGen/ARM/vbsl.ll
+++ b/llvm/test/CodeGen/ARM/vbsl.ll
@@ -5,9 +5,9 @@
 define <8 x i8> @v_bsli8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 ;CHECK-LABEL: v_bsli8:
 ;CHECK: vbsl
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = load <8 x i8>* %C
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
+	%tmp3 = load <8 x i8>, <8 x i8>* %C
 	%tmp4 = and <8 x i8> %tmp1, %tmp2
 	%tmp5 = xor <8 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
 	%tmp6 = and <8 x i8> %tmp5, %tmp3
@@ -18,9 +18,9 @@ define <8 x i8> @v_bsli8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 define <4 x i16> @v_bsli16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
 ;CHECK-LABEL: v_bsli16:
 ;CHECK: vbsl
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = load <4 x i16>* %C
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
+	%tmp3 = load <4 x i16>, <4 x i16>* %C
 	%tmp4 = and <4 x i16> %tmp1, %tmp2
 	%tmp5 = xor <4 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1 >
 	%tmp6 = and <4 x i16> %tmp5, %tmp3
@@ -31,9 +31,9 @@ define <4 x i16> @v_bsli16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
 define <2 x i32> @v_bsli32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
 ;CHECK-LABEL: v_bsli32:
 ;CHECK: vbsl
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
-	%tmp3 = load <2 x i32>* %C
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
+	%tmp3 = load <2 x i32>, <2 x i32>* %C
 	%tmp4 = and <2 x i32> %tmp1, %tmp2
 	%tmp5 = xor <2 x i32> %tmp1, < i32 -1, i32 -1 >
 	%tmp6 = and <2 x i32> %tmp5, %tmp3
@@ -44,9 +44,9 @@ define <2 x i32> @v_bsli32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
 define <1 x i64> @v_bsli64(<1 x i64>* %A, <1 x i64>* %B, <1 x i64>* %C) nounwind {
 ;CHECK-LABEL: v_bsli64:
 ;CHECK: vbsl
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
-	%tmp3 = load <1 x i64>* %C
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
+	%tmp3 = load <1 x i64>, <1 x i64>* %C
 	%tmp4 = and <1 x i64> %tmp1, %tmp2
 	%tmp5 = xor <1 x i64> %tmp1, < i64 -1 >
 	%tmp6 = and <1 x i64> %tmp5, %tmp3
@@ -57,9 +57,9 @@ define <1 x i64> @v_bsli64(<1 x i64>* %A, <1 x i64>* %B, <1 x i64>* %C) nounwind
 define <16 x i8> @v_bslQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
 ;CHECK-LABEL: v_bslQi8:
 ;CHECK: vbsl
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
-	%tmp3 = load <16 x i8>* %C
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
+	%tmp3 = load <16 x i8>, <16 x i8>* %C
 	%tmp4 = and <16 x i8> %tmp1, %tmp2
 	%tmp5 = xor <16 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
 	%tmp6 = and <16 x i8> %tmp5, %tmp3
@@ -70,9 +70,9 @@ define <16 x i8> @v_bslQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind
 define <8 x i16> @v_bslQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
 ;CHECK-LABEL: v_bslQi16:
 ;CHECK: vbsl
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
-	%tmp3 = load <8 x i16>* %C
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
+	%tmp3 = load <8 x i16>, <8 x i16>* %C
 	%tmp4 = and <8 x i16> %tmp1, %tmp2
 	%tmp5 = xor <8 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
 	%tmp6 = and <8 x i16> %tmp5, %tmp3
@@ -83,9 +83,9 @@ define <8 x i16> @v_bslQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwin
 define <4 x i32> @v_bslQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
 ;CHECK-LABEL: v_bslQi32:
 ;CHECK: vbsl
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
-	%tmp3 = load <4 x i32>* %C
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
+	%tmp3 = load <4 x i32>, <4 x i32>* %C
 	%tmp4 = and <4 x i32> %tmp1, %tmp2
 	%tmp5 = xor <4 x i32> %tmp1, < i32 -1, i32 -1, i32 -1, i32 -1 >
 	%tmp6 = and <4 x i32> %tmp5, %tmp3
@@ -96,9 +96,9 @@ define <4 x i32> @v_bslQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwin
 define <2 x i64> @v_bslQi64(<2 x i64>* %A, <2 x i64>* %B, <2 x i64>* %C) nounwind {
 ;CHECK-LABEL: v_bslQi64:
 ;CHECK: vbsl
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
-	%tmp3 = load <2 x i64>* %C
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
+	%tmp3 = load <2 x i64>, <2 x i64>* %C
 	%tmp4 = and <2 x i64> %tmp1, %tmp2
 	%tmp5 = xor <2 x i64> %tmp1, < i64 -1, i64 -1 >
 	%tmp6 = and <2 x i64> %tmp5, %tmp3
diff --git a/llvm/test/CodeGen/ARM/vceq.ll b/llvm/test/CodeGen/ARM/vceq.ll
index e3202e402cc..3772401a2f9 100644
--- a/llvm/test/CodeGen/ARM/vceq.ll
+++ b/llvm/test/CodeGen/ARM/vceq.ll
@@ -3,8 +3,8 @@
 define <8 x i8> @vceqi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vceqi8:
 ;CHECK: vceq.i8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = icmp eq <8 x i8> %tmp1, %tmp2
         %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
 	ret <8 x i8> %tmp4
@@ -13,8 +13,8 @@ define <8 x i8> @vceqi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vceqi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vceqi16:
 ;CHECK: vceq.i16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = icmp eq <4 x i16> %tmp1, %tmp2
         %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
 	ret <4 x i16> %tmp4
@@ -23,8 +23,8 @@ define <4 x i16> @vceqi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @vceqi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vceqi32:
 ;CHECK: vceq.i32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = icmp eq <2 x i32> %tmp1, %tmp2
         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -33,8 +33,8 @@ define <2 x i32> @vceqi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <2 x i32> @vceqf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vceqf32:
 ;CHECK: vceq.f32
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = fcmp oeq <2 x float> %tmp1, %tmp2
         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -43,8 +43,8 @@ define <2 x i32> @vceqf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 define <16 x i8> @vceqQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vceqQi8:
 ;CHECK: vceq.i8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = icmp eq <16 x i8> %tmp1, %tmp2
         %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
 	ret <16 x i8> %tmp4
@@ -53,8 +53,8 @@ define <16 x i8> @vceqQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 define <8 x i16> @vceqQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vceqQi16:
 ;CHECK: vceq.i16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = icmp eq <8 x i16> %tmp1, %tmp2
         %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
 	ret <8 x i16> %tmp4
@@ -63,8 +63,8 @@ define <8 x i16> @vceqQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @vceqQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vceqQi32:
 ;CHECK: vceq.i32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = icmp eq <4 x i32> %tmp1, %tmp2
         %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -73,8 +73,8 @@ define <4 x i32> @vceqQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 define <4 x i32> @vceqQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: vceqQf32:
 ;CHECK: vceq.f32
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %A
+	%tmp2 = load <4 x float>, <4 x float>* %B
 	%tmp3 = fcmp oeq <4 x float> %tmp1, %tmp2
         %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -85,7 +85,7 @@ define <8 x i8> @vceqi8Z(<8 x i8>* %A) nounwind {
 ;CHECK-NOT: vmov
 ;CHECK-NOT: vmvn
 ;CHECK: vceq.i8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp3 = icmp eq <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
         %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
 	ret <8 x i8> %tmp4
diff --git a/llvm/test/CodeGen/ARM/vcge.ll b/llvm/test/CodeGen/ARM/vcge.ll
index 3739f5ee8c5..2cd33cf3a42 100644
--- a/llvm/test/CodeGen/ARM/vcge.ll
+++ b/llvm/test/CodeGen/ARM/vcge.ll
@@ -3,8 +3,8 @@
 define <8 x i8> @vcges8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vcges8:
 ;CHECK: vcge.s8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = icmp sge <8 x i8> %tmp1, %tmp2
         %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
 	ret <8 x i8> %tmp4
@@ -13,8 +13,8 @@ define <8 x i8> @vcges8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vcges16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vcges16:
 ;CHECK: vcge.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = icmp sge <4 x i16> %tmp1, %tmp2
         %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
 	ret <4 x i16> %tmp4
@@ -23,8 +23,8 @@ define <4 x i16> @vcges16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @vcges32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vcges32:
 ;CHECK: vcge.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = icmp sge <2 x i32> %tmp1, %tmp2
         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -33,8 +33,8 @@ define <2 x i32> @vcges32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <8 x i8> @vcgeu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vcgeu8:
 ;CHECK: vcge.u8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = icmp uge <8 x i8> %tmp1, %tmp2
         %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
 	ret <8 x i8> %tmp4
@@ -43,8 +43,8 @@ define <8 x i8> @vcgeu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vcgeu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vcgeu16:
 ;CHECK: vcge.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = icmp uge <4 x i16> %tmp1, %tmp2
         %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
 	ret <4 x i16> %tmp4
@@ -53,8 +53,8 @@ define <4 x i16> @vcgeu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @vcgeu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vcgeu32:
 ;CHECK: vcge.u32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = icmp uge <2 x i32> %tmp1, %tmp2
         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -63,8 +63,8 @@ define <2 x i32> @vcgeu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <2 x i32> @vcgef32(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vcgef32:
 ;CHECK: vcge.f32
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = fcmp oge <2 x float> %tmp1, %tmp2
         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -73,8 +73,8 @@ define <2 x i32> @vcgef32(<2 x float>* %A, <2 x float>* %B) nounwind {
 define <16 x i8> @vcgeQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vcgeQs8:
 ;CHECK: vcge.s8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = icmp sge <16 x i8> %tmp1, %tmp2
         %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
 	ret <16 x i8> %tmp4
@@ -83,8 +83,8 @@ define <16 x i8> @vcgeQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 define <8 x i16> @vcgeQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vcgeQs16:
 ;CHECK: vcge.s16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = icmp sge <8 x i16> %tmp1, %tmp2
         %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
 	ret <8 x i16> %tmp4
@@ -93,8 +93,8 @@ define <8 x i16> @vcgeQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @vcgeQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vcgeQs32:
 ;CHECK: vcge.s32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = icmp sge <4 x i32> %tmp1, %tmp2
         %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -103,8 +103,8 @@ define <4 x i32> @vcgeQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 define <16 x i8> @vcgeQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vcgeQu8:
 ;CHECK: vcge.u8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = icmp uge <16 x i8> %tmp1, %tmp2
         %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
 	ret <16 x i8> %tmp4
@@ -113,8 +113,8 @@ define <16 x i8> @vcgeQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 define <8 x i16> @vcgeQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vcgeQu16:
 ;CHECK: vcge.u16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = icmp uge <8 x i16> %tmp1, %tmp2
         %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
 	ret <8 x i16> %tmp4
@@ -123,8 +123,8 @@ define <8 x i16> @vcgeQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @vcgeQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vcgeQu32:
 ;CHECK: vcge.u32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = icmp uge <4 x i32> %tmp1, %tmp2
         %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -133,8 +133,8 @@ define <4 x i32> @vcgeQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 define <4 x i32> @vcgeQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: vcgeQf32:
 ;CHECK: vcge.f32
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %A
+	%tmp2 = load <4 x float>, <4 x float>* %B
 	%tmp3 = fcmp oge <4 x float> %tmp1, %tmp2
         %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -143,8 +143,8 @@ define <4 x i32> @vcgeQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 define <2 x i32> @vacgef32(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vacgef32:
 ;CHECK: vacge.f32
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -152,8 +152,8 @@ define <2 x i32> @vacgef32(<2 x float>* %A, <2 x float>* %B) nounwind {
 define <4 x i32> @vacgeQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: vacgeQf32:
 ;CHECK: vacge.f32
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %A
+	%tmp2 = load <4 x float>, <4 x float>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -166,7 +166,7 @@ define <8 x i8> @vcgei8Z(<8 x i8>* %A) nounwind {
 ;CHECK-NOT: vmov
 ;CHECK-NOT: vmvn
 ;CHECK: vcge.s8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp3 = icmp sge <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
         %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
 	ret <8 x i8> %tmp4
@@ -177,7 +177,7 @@ define <8 x i8> @vclei8Z(<8 x i8>* %A) nounwind {
 ;CHECK-NOT: vmov
 ;CHECK-NOT: vmvn
 ;CHECK: vcle.s8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp3 = icmp sle <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
         %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
 	ret <8 x i8> %tmp4
diff --git a/llvm/test/CodeGen/ARM/vcgt.ll b/llvm/test/CodeGen/ARM/vcgt.ll
index 2f736f689ab..c39c939d6c9 100644
--- a/llvm/test/CodeGen/ARM/vcgt.ll
+++ b/llvm/test/CodeGen/ARM/vcgt.ll
@@ -4,8 +4,8 @@
 define <8 x i8> @vcgts8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vcgts8:
 ;CHECK: vcgt.s8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = icmp sgt <8 x i8> %tmp1, %tmp2
         %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
 	ret <8 x i8> %tmp4
@@ -14,8 +14,8 @@ define <8 x i8> @vcgts8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vcgts16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vcgts16:
 ;CHECK: vcgt.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = icmp sgt <4 x i16> %tmp1, %tmp2
         %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
 	ret <4 x i16> %tmp4
@@ -24,8 +24,8 @@ define <4 x i16> @vcgts16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @vcgts32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vcgts32:
 ;CHECK: vcgt.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = icmp sgt <2 x i32> %tmp1, %tmp2
         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -34,8 +34,8 @@ define <2 x i32> @vcgts32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <8 x i8> @vcgtu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vcgtu8:
 ;CHECK: vcgt.u8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = icmp ugt <8 x i8> %tmp1, %tmp2
         %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
 	ret <8 x i8> %tmp4
@@ -44,8 +44,8 @@ define <8 x i8> @vcgtu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vcgtu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vcgtu16:
 ;CHECK: vcgt.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = icmp ugt <4 x i16> %tmp1, %tmp2
         %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
 	ret <4 x i16> %tmp4
@@ -54,8 +54,8 @@ define <4 x i16> @vcgtu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @vcgtu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vcgtu32:
 ;CHECK: vcgt.u32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = icmp ugt <2 x i32> %tmp1, %tmp2
         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -64,8 +64,8 @@ define <2 x i32> @vcgtu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <2 x i32> @vcgtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vcgtf32:
 ;CHECK: vcgt.f32
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = fcmp ogt <2 x float> %tmp1, %tmp2
         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -74,8 +74,8 @@ define <2 x i32> @vcgtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 define <16 x i8> @vcgtQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vcgtQs8:
 ;CHECK: vcgt.s8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = icmp sgt <16 x i8> %tmp1, %tmp2
         %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
 	ret <16 x i8> %tmp4
@@ -84,8 +84,8 @@ define <16 x i8> @vcgtQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 define <8 x i16> @vcgtQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vcgtQs16:
 ;CHECK: vcgt.s16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = icmp sgt <8 x i16> %tmp1, %tmp2
         %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
 	ret <8 x i16> %tmp4
@@ -94,8 +94,8 @@ define <8 x i16> @vcgtQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @vcgtQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vcgtQs32:
 ;CHECK: vcgt.s32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = icmp sgt <4 x i32> %tmp1, %tmp2
         %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -104,8 +104,8 @@ define <4 x i32> @vcgtQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 define <16 x i8> @vcgtQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vcgtQu8:
 ;CHECK: vcgt.u8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = icmp ugt <16 x i8> %tmp1, %tmp2
         %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
 	ret <16 x i8> %tmp4
@@ -114,8 +114,8 @@ define <16 x i8> @vcgtQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 define <8 x i16> @vcgtQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vcgtQu16:
 ;CHECK: vcgt.u16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = icmp ugt <8 x i16> %tmp1, %tmp2
         %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
 	ret <8 x i16> %tmp4
@@ -124,8 +124,8 @@ define <8 x i16> @vcgtQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @vcgtQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vcgtQu32:
 ;CHECK: vcgt.u32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = icmp ugt <4 x i32> %tmp1, %tmp2
         %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -134,8 +134,8 @@ define <4 x i32> @vcgtQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 define <4 x i32> @vcgtQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: vcgtQf32:
 ;CHECK: vcgt.f32
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %A
+	%tmp2 = load <4 x float>, <4 x float>* %B
 	%tmp3 = fcmp ogt <4 x float> %tmp1, %tmp2
         %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -144,8 +144,8 @@ define <4 x i32> @vcgtQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 define <2 x i32> @vacgtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vacgtf32:
 ;CHECK: vacgt.f32
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -153,8 +153,8 @@ define <2 x i32> @vacgtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 define <4 x i32> @vacgtQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: vacgtQf32:
 ;CHECK: vacgt.f32
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %A
+	%tmp2 = load <4 x float>, <4 x float>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -165,8 +165,8 @@ define <4 x i32> @vcgt_zext(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK: vmov.i32 [[Q0:q[0-9]+]], #0x1
 ;CHECK: vcgt.f32 [[Q1:q[0-9]+]]
 ;CHECK: vand [[Q2:q[0-9]+]], [[Q1]], [[Q0]]
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %A
+	%tmp2 = load <4 x float>, <4 x float>* %B
 	%tmp3 = fcmp ogt <4 x float> %tmp1, %tmp2
         %tmp4 = zext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -180,7 +180,7 @@ define <8 x i8> @vcgti8Z(<8 x i8>* %A) nounwind {
 ;CHECK-NOT: vmov
 ;CHECK-NOT: vmvn
 ;CHECK: vcgt.s8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp3 = icmp sgt <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
         %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
 	ret <8 x i8> %tmp4
@@ -191,7 +191,7 @@ define <8 x i8> @vclti8Z(<8 x i8>* %A) nounwind {
 ;CHECK-NOT: vmov
 ;CHECK-NOT: vmvn
 ;CHECK: vclt.s8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp3 = icmp slt <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
         %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
 	ret <8 x i8> %tmp4
diff --git a/llvm/test/CodeGen/ARM/vcnt.ll b/llvm/test/CodeGen/ARM/vcnt.ll
index 390559b8280..de251c58e6b 100644
--- a/llvm/test/CodeGen/ARM/vcnt.ll
+++ b/llvm/test/CodeGen/ARM/vcnt.ll
@@ -4,7 +4,7 @@
 define <8 x i8> @vcnt8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vcnt8:
 ;CHECK: vcnt.8 {{d[0-9]+}}, {{d[0-9]+}}
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %tmp1)
 	ret <8 x i8> %tmp2
 }
@@ -12,7 +12,7 @@ define <8 x i8> @vcnt8(<8 x i8>* %A) nounwind {
 define <16 x i8> @vcntQ8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: vcntQ8:
 ;CHECK: vcnt.8 {{q[0-9]+}}, {{q[0-9]+}}
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %tmp1)
 	ret <16 x i8> %tmp2
 }
@@ -23,7 +23,7 @@ declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) nounwind readnone
 define <8 x i8> @vclz8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vclz8:
 ;CHECK: vclz.i8 {{d[0-9]+}}, {{d[0-9]+}}
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %tmp1, i1 0)
 	ret <8 x i8> %tmp2
 }
@@ -31,7 +31,7 @@ define <8 x i8> @vclz8(<8 x i8>* %A) nounwind {
 define <4 x i16> @vclz16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vclz16:
 ;CHECK: vclz.i16 {{d[0-9]+}}, {{d[0-9]+}}
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %tmp1, i1 0)
 	ret <4 x i16> %tmp2
 }
@@ -39,7 +39,7 @@ define <4 x i16> @vclz16(<4 x i16>* %A) nounwind {
 define <2 x i32> @vclz32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vclz32:
 ;CHECK: vclz.i32 {{d[0-9]+}}, {{d[0-9]+}}
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %tmp1, i1 0)
 	ret <2 x i32> %tmp2
 }
@@ -47,7 +47,7 @@ define <2 x i32> @vclz32(<2 x i32>* %A) nounwind {
 define <16 x i8> @vclzQ8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: vclzQ8:
 ;CHECK: vclz.i8 {{q[0-9]+}}, {{q[0-9]+}}
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %tmp1, i1 0)
 	ret <16 x i8> %tmp2
 }
@@ -55,7 +55,7 @@ define <16 x i8> @vclzQ8(<16 x i8>* %A) nounwind {
 define <8 x i16> @vclzQ16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vclzQ16:
 ;CHECK: vclz.i16 {{q[0-9]+}}, {{q[0-9]+}}
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %tmp1, i1 0)
 	ret <8 x i16> %tmp2
 }
@@ -63,7 +63,7 @@ define <8 x i16> @vclzQ16(<8 x i16>* %A) nounwind {
 define <4 x i32> @vclzQ32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vclzQ32:
 ;CHECK: vclz.i32 {{q[0-9]+}}, {{q[0-9]+}}
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %tmp1, i1 0)
 	ret <4 x i32> %tmp2
 }
@@ -79,7 +79,7 @@ declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) nounwind readnone
 define <8 x i8> @vclss8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vclss8:
 ;CHECK: vcls.s8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> %tmp1)
 	ret <8 x i8> %tmp2
 }
@@ -87,7 +87,7 @@ define <8 x i8> @vclss8(<8 x i8>* %A) nounwind {
 define <4 x i16> @vclss16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vclss16:
 ;CHECK: vcls.s16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> %tmp1)
 	ret <4 x i16> %tmp2
 }
@@ -95,7 +95,7 @@ define <4 x i16> @vclss16(<4 x i16>* %A) nounwind {
 define <2 x i32> @vclss32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vclss32:
 ;CHECK: vcls.s32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> %tmp1)
 	ret <2 x i32> %tmp2
 }
@@ -103,7 +103,7 @@ define <2 x i32> @vclss32(<2 x i32>* %A) nounwind {
 define <16 x i8> @vclsQs8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: vclsQs8:
 ;CHECK: vcls.s8
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> %tmp1)
 	ret <16 x i8> %tmp2
 }
@@ -111,7 +111,7 @@ define <16 x i8> @vclsQs8(<16 x i8>* %A) nounwind {
 define <8 x i16> @vclsQs16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vclsQs16:
 ;CHECK: vcls.s16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> %tmp1)
 	ret <8 x i16> %tmp2
 }
@@ -119,7 +119,7 @@ define <8 x i16> @vclsQs16(<8 x i16>* %A) nounwind {
 define <4 x i32> @vclsQs32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vclsQs32:
 ;CHECK: vcls.s32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> %tmp1)
 	ret <4 x i32> %tmp2
 }
diff --git a/llvm/test/CodeGen/ARM/vcombine.ll b/llvm/test/CodeGen/ARM/vcombine.ll
index 33aa71df0be..9491c15aef5 100644
--- a/llvm/test/CodeGen/ARM/vcombine.ll
+++ b/llvm/test/CodeGen/ARM/vcombine.ll
@@ -7,8 +7,8 @@ define <16 x i8> @vcombine8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ; CHECK-LE: vmov r2, r3, d17
 ; CHECK-BE: vmov r1, r0, d16
 ; CHECK-BE: vmov r3, r2, d17
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 	ret <16 x i8> %tmp3
 }
@@ -19,8 +19,8 @@ define <8 x i16> @vcombine16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ; CHECK-LE: vmov r2, r3, d17
 ; CHECK-BE: vmov r1, r0, d16
 ; CHECK-BE: vmov r3, r2, d17
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 	ret <8 x i16> %tmp3
 }
@@ -31,8 +31,8 @@ define <4 x i32> @vcombine32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ; CHECK-LE: vmov r2, r3, d17
 ; CHECK-BE: vmov r1, r0, d16
 ; CHECK-BE: vmov r3, r2, d17
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 	ret <4 x i32> %tmp3
 }
@@ -43,8 +43,8 @@ define <4 x float> @vcombinefloat(<2 x float>* %A, <2 x float>* %B) nounwind {
 ; CHECK-LE: vmov r2, r3, d17
 ; CHECK-BE: vmov r1, r0, d16
 ; CHECK-BE: vmov r3, r2, d17
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 	ret <4 x float> %tmp3
 }
@@ -55,8 +55,8 @@ define <2 x i64> @vcombine64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ; CHECK-LE: vmov r2, r3, d17
 ; CHECK-BE: vmov r1, r0, d16
 ; CHECK-BE: vmov r3, r2, d17
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = shufflevector <1 x i64> %tmp1, <1 x i64> %tmp2, <2 x i32> <i32 0, i32 1>
 	ret <2 x i64> %tmp3
 }
@@ -69,7 +69,7 @@ define <4 x i16> @vget_low16(<8 x i16>* %A) nounwind {
 ; CHECK-NOT: vst
 ; CHECK-LE: vmov r0, r1, d16
 ; CHECK-BE: vmov r1, r0, d16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
         %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
         ret <4 x i16> %tmp2
 }
@@ -79,7 +79,7 @@ define <8 x i8> @vget_high8(<16 x i8>* %A) nounwind {
 ; CHECK-NOT: vst
 ; CHECK-LE: vmov r0, r1, d17
 ; CHECK-BE: vmov r1, r0, d16
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
         %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
         ret <8 x i8> %tmp2
 }
diff --git a/llvm/test/CodeGen/ARM/vcvt-cost.ll b/llvm/test/CodeGen/ARM/vcvt-cost.ll
index 5e56a5b34cf..c80450a2840 100644
--- a/llvm/test/CodeGen/ARM/vcvt-cost.ll
+++ b/llvm/test/CodeGen/ARM/vcvt-cost.ll
@@ -9,7 +9,7 @@ define void @func_cvt5(%T0_5* %loadaddr, %T1_5* %storeaddr) {
 ; CHECK: vmovl.s8
 ; CHECK: vmovl.s16
 ; CHECK: vmovl.s16
-  %v0 = load %T0_5* %loadaddr
+  %v0 = load %T0_5, %T0_5* %loadaddr
 ; COST: func_cvt5
 ; COST: cost of 3 {{.*}} sext
   %r = sext %T0_5 %v0 to %T1_5
@@ -25,7 +25,7 @@ define void @func_cvt1(%TA0_5* %loadaddr, %TA1_5* %storeaddr) {
 ; CHECK: vmovl.u8
 ; CHECK: vmovl.u16
 ; CHECK: vmovl.u16
-  %v0 = load %TA0_5* %loadaddr
+  %v0 = load %TA0_5, %TA0_5* %loadaddr
 ; COST: func_cvt1
 ; COST: cost of 3 {{.*}} zext
   %r = zext %TA0_5 %v0 to %TA1_5
@@ -40,7 +40,7 @@ define void @func_cvt51(%T0_51* %loadaddr, %T1_51* %storeaddr) {
 ; CHECK: vmovn.i32
 ; CHECK: vmovn.i32
 ; CHECK: vmovn.i16
-  %v0 = load %T0_51* %loadaddr
+  %v0 = load %T0_51, %T0_51* %loadaddr
 ; COST: func_cvt51
 ; COST: cost of 3 {{.*}} trunc
   %r = trunc %T0_51 %v0 to %T1_51
@@ -56,7 +56,7 @@ define void @func_cvt52(%TT0_5* %loadaddr, %TT1_5* %storeaddr) {
 ; CHECK: vmovl.s16
 ; CHECK: vmovl.s16
 ; CHECK: vmovl.s16
-  %v0 = load %TT0_5* %loadaddr
+  %v0 = load %TT0_5, %TT0_5* %loadaddr
 ; COST: func_cvt52
 ; COST: cost of 6 {{.*}} sext
   %r = sext %TT0_5 %v0 to %TT1_5
@@ -73,7 +73,7 @@ define void @func_cvt12(%TTA0_5* %loadaddr, %TTA1_5* %storeaddr) {
 ; CHECK: vmovl.u16
 ; CHECK: vmovl.u16
 ; CHECK: vmovl.u16
-  %v0 = load %TTA0_5* %loadaddr
+  %v0 = load %TTA0_5, %TTA0_5* %loadaddr
 ; COST: func_cvt12
 ; COST: cost of 6 {{.*}} zext
   %r = zext %TTA0_5 %v0 to %TTA1_5
@@ -91,7 +91,7 @@ define void @func_cvt512(%TT0_51* %loadaddr, %TT1_51* %storeaddr) {
 ; CHECK: vmovn.i32
 ; CHECK: vmovn.i16
 ; CHECK: vmovn.i16
-  %v0 = load %TT0_51* %loadaddr
+  %v0 = load %TT0_51, %TT0_51* %loadaddr
 ; COST: func_cvt512
 ; COST: cost of 6 {{.*}} trunc
   %r = trunc %TT0_51 %v0 to %TT1_51
@@ -103,7 +103,7 @@ define void @func_cvt512(%TT0_51* %loadaddr, %TT1_51* %storeaddr) {
 define void @sext_v4i16_v4i64(<4 x i16>* %loadaddr, <4 x i64>* %storeaddr) {
 ; CHECK: vmovl.s32
 ; CHECK: vmovl.s32
-  %v0 = load <4 x i16>* %loadaddr
+  %v0 = load <4 x i16>, <4 x i16>* %loadaddr
 ; COST: sext_v4i16_v4i64
 ; COST: cost of 3 {{.*}} sext
   %r = sext <4 x i16> %v0 to <4 x i64>
@@ -115,7 +115,7 @@ define void @sext_v4i16_v4i64(<4 x i16>* %loadaddr, <4 x i64>* %storeaddr) {
 define void @zext_v4i16_v4i64(<4 x i16>* %loadaddr, <4 x i64>* %storeaddr) {
 ; CHECK: vmovl.u32
 ; CHECK: vmovl.u32
-  %v0 = load <4 x i16>* %loadaddr
+  %v0 = load <4 x i16>, <4 x i16>* %loadaddr
 ; COST: zext_v4i16_v4i64
 ; COST: cost of 3 {{.*}} zext
   %r = zext <4 x i16> %v0 to <4 x i64>
@@ -129,7 +129,7 @@ define void @sext_v8i16_v8i64(<8 x i16>* %loadaddr, <8 x i64>* %storeaddr) {
 ; CHECK: vmovl.s32
 ; CHECK: vmovl.s32
 ; CHECK: vmovl.s32
-  %v0 = load <8 x i16>* %loadaddr
+  %v0 = load <8 x i16>, <8 x i16>* %loadaddr
 ; COST: sext_v8i16_v8i64
 ; COST: cost of 6 {{.*}} sext
   %r = sext <8 x i16> %v0 to <8 x i64>
@@ -143,7 +143,7 @@ define void @zext_v8i16_v8i64(<8 x i16>* %loadaddr, <8 x i64>* %storeaddr) {
 ; CHECK: vmovl.u32
 ; CHECK: vmovl.u32
 ; CHECK: vmovl.u32
-  %v0 = load <8 x i16>* %loadaddr
+  %v0 = load <8 x i16>, <8 x i16>* %loadaddr
 ; COST: zext_v8i16_v8i64
 ; COST: cost of 6 {{.*}} zext
   %r = zext <8 x i16> %v0 to <8 x i64>
diff --git a/llvm/test/CodeGen/ARM/vcvt-v8.ll b/llvm/test/CodeGen/ARM/vcvt-v8.ll
index c449009e1e1..9d5972fa4da 100644
--- a/llvm/test/CodeGen/ARM/vcvt-v8.ll
+++ b/llvm/test/CodeGen/ARM/vcvt-v8.ll
@@ -2,7 +2,7 @@
 define <4 x i32> @vcvtasq(<4 x float>* %A) {
 ; CHECK: vcvtasq
 ; CHECK: vcvta.s32.f32 q{{[0-9]+}}, q{{[0-9]+}}
-  %tmp1 = load <4 x float>* %A
+  %tmp1 = load <4 x float>, <4 x float>* %A
   %tmp2 = call <4 x i32> @llvm.arm.neon.vcvtas.v4i32.v4f32(<4 x float> %tmp1)
   ret <4 x i32> %tmp2
 }
@@ -10,7 +10,7 @@ define <4 x i32> @vcvtasq(<4 x float>* %A) {
 define <2 x i32> @vcvtasd(<2 x float>* %A) {
 ; CHECK: vcvtasd
 ; CHECK: vcvta.s32.f32 d{{[0-9]+}}, d{{[0-9]+}}
-  %tmp1 = load <2 x float>* %A
+  %tmp1 = load <2 x float>, <2 x float>* %A
   %tmp2 = call <2 x i32> @llvm.arm.neon.vcvtas.v2i32.v2f32(<2 x float> %tmp1)
   ret <2 x i32> %tmp2
 }
@@ -18,7 +18,7 @@ define <2 x i32> @vcvtasd(<2 x float>* %A) {
 define <4 x i32> @vcvtnsq(<4 x float>* %A) {
 ; CHECK: vcvtnsq
 ; CHECK: vcvtn.s32.f32 q{{[0-9]+}}, q{{[0-9]+}}
-  %tmp1 = load <4 x float>* %A
+  %tmp1 = load <4 x float>, <4 x float>* %A
   %tmp2 = call <4 x i32> @llvm.arm.neon.vcvtns.v4i32.v4f32(<4 x float> %tmp1)
   ret <4 x i32> %tmp2
 }
@@ -26,7 +26,7 @@ define <4 x i32> @vcvtnsq(<4 x float>* %A) {
 define <2 x i32> @vcvtnsd(<2 x float>* %A) {
 ; CHECK: vcvtnsd
 ; CHECK: vcvtn.s32.f32 d{{[0-9]+}}, d{{[0-9]+}}
-  %tmp1 = load <2 x float>* %A
+  %tmp1 = load <2 x float>, <2 x float>* %A
   %tmp2 = call <2 x i32> @llvm.arm.neon.vcvtns.v2i32.v2f32(<2 x float> %tmp1)
   ret <2 x i32> %tmp2
 }
@@ -34,7 +34,7 @@ define <2 x i32> @vcvtnsd(<2 x float>* %A) {
 define <4 x i32> @vcvtpsq(<4 x float>* %A) {
 ; CHECK: vcvtpsq
 ; CHECK: vcvtp.s32.f32 q{{[0-9]+}}, q{{[0-9]+}}
-  %tmp1 = load <4 x float>* %A
+  %tmp1 = load <4 x float>, <4 x float>* %A
   %tmp2 = call <4 x i32> @llvm.arm.neon.vcvtps.v4i32.v4f32(<4 x float> %tmp1)
   ret <4 x i32> %tmp2
 }
@@ -42,7 +42,7 @@ define <4 x i32> @vcvtpsq(<4 x float>* %A) {
 define <2 x i32> @vcvtpsd(<2 x float>* %A) {
 ; CHECK: vcvtpsd
 ; CHECK: vcvtp.s32.f32 d{{[0-9]+}}, d{{[0-9]+}}
-  %tmp1 = load <2 x float>* %A
+  %tmp1 = load <2 x float>, <2 x float>* %A
   %tmp2 = call <2 x i32> @llvm.arm.neon.vcvtps.v2i32.v2f32(<2 x float> %tmp1)
   ret <2 x i32> %tmp2
 }
@@ -50,7 +50,7 @@ define <2 x i32> @vcvtpsd(<2 x float>* %A) {
 define <4 x i32> @vcvtmsq(<4 x float>* %A) {
 ; CHECK: vcvtmsq
 ; CHECK: vcvtm.s32.f32 q{{[0-9]+}}, q{{[0-9]+}}
-  %tmp1 = load <4 x float>* %A
+  %tmp1 = load <4 x float>, <4 x float>* %A
   %tmp2 = call <4 x i32> @llvm.arm.neon.vcvtms.v4i32.v4f32(<4 x float> %tmp1)
   ret <4 x i32> %tmp2
 }
@@ -58,7 +58,7 @@ define <4 x i32> @vcvtmsq(<4 x float>* %A) {
 define <2 x i32> @vcvtmsd(<2 x float>* %A) {
 ; CHECK: vcvtmsd
 ; CHECK: vcvtm.s32.f32 d{{[0-9]+}}, d{{[0-9]+}}
-  %tmp1 = load <2 x float>* %A
+  %tmp1 = load <2 x float>, <2 x float>* %A
   %tmp2 = call <2 x i32> @llvm.arm.neon.vcvtms.v2i32.v2f32(<2 x float> %tmp1)
   ret <2 x i32> %tmp2
 }
@@ -66,7 +66,7 @@ define <2 x i32> @vcvtmsd(<2 x float>* %A) {
 define <4 x i32> @vcvtauq(<4 x float>* %A) {
 ; CHECK: vcvtauq
 ; CHECK: vcvta.u32.f32 q{{[0-9]+}}, q{{[0-9]+}}
-  %tmp1 = load <4 x float>* %A
+  %tmp1 = load <4 x float>, <4 x float>* %A
   %tmp2 = call <4 x i32> @llvm.arm.neon.vcvtau.v4i32.v4f32(<4 x float> %tmp1)
   ret <4 x i32> %tmp2
 }
@@ -74,7 +74,7 @@ define <4 x i32> @vcvtauq(<4 x float>* %A) {
 define <2 x i32> @vcvtaud(<2 x float>* %A) {
 ; CHECK: vcvtaud
 ; CHECK: vcvta.u32.f32 d{{[0-9]+}}, d{{[0-9]+}}
-  %tmp1 = load <2 x float>* %A
+  %tmp1 = load <2 x float>, <2 x float>* %A
   %tmp2 = call <2 x i32> @llvm.arm.neon.vcvtau.v2i32.v2f32(<2 x float> %tmp1)
   ret <2 x i32> %tmp2
 }
@@ -82,7 +82,7 @@ define <2 x i32> @vcvtaud(<2 x float>* %A) {
 define <4 x i32> @vcvtnuq(<4 x float>* %A) {
 ; CHECK: vcvtnuq
 ; CHECK: vcvtn.u32.f32 q{{[0-9]+}}, q{{[0-9]+}}
-  %tmp1 = load <4 x float>* %A
+  %tmp1 = load <4 x float>, <4 x float>* %A
   %tmp2 = call <4 x i32> @llvm.arm.neon.vcvtnu.v4i32.v4f32(<4 x float> %tmp1)
   ret <4 x i32> %tmp2
 }
@@ -90,7 +90,7 @@ define <4 x i32> @vcvtnuq(<4 x float>* %A) {
 define <2 x i32> @vcvtnud(<2 x float>* %A) {
 ; CHECK: vcvtnud
 ; CHECK: vcvtn.u32.f32 d{{[0-9]+}}, d{{[0-9]+}}
-  %tmp1 = load <2 x float>* %A
+  %tmp1 = load <2 x float>, <2 x float>* %A
   %tmp2 = call <2 x i32> @llvm.arm.neon.vcvtnu.v2i32.v2f32(<2 x float> %tmp1)
   ret <2 x i32> %tmp2
 }
@@ -98,7 +98,7 @@ define <2 x i32> @vcvtnud(<2 x float>* %A) {
 define <4 x i32> @vcvtpuq(<4 x float>* %A) {
 ; CHECK: vcvtpuq
 ; CHECK: vcvtp.u32.f32 q{{[0-9]+}}, q{{[0-9]+}}
-  %tmp1 = load <4 x float>* %A
+  %tmp1 = load <4 x float>, <4 x float>* %A
   %tmp2 = call <4 x i32> @llvm.arm.neon.vcvtpu.v4i32.v4f32(<4 x float> %tmp1)
   ret <4 x i32> %tmp2
 }
@@ -106,7 +106,7 @@ define <4 x i32> @vcvtpuq(<4 x float>* %A) {
 define <2 x i32> @vcvtpud(<2 x float>* %A) {
 ; CHECK: vcvtpud
 ; CHECK: vcvtp.u32.f32 d{{[0-9]+}}, d{{[0-9]+}}
-  %tmp1 = load <2 x float>* %A
+  %tmp1 = load <2 x float>, <2 x float>* %A
   %tmp2 = call <2 x i32> @llvm.arm.neon.vcvtpu.v2i32.v2f32(<2 x float> %tmp1)
   ret <2 x i32> %tmp2
 }
@@ -114,7 +114,7 @@ define <2 x i32> @vcvtpud(<2 x float>* %A) {
 define <4 x i32> @vcvtmuq(<4 x float>* %A) {
 ; CHECK: vcvtmuq
 ; CHECK: vcvtm.u32.f32 q{{[0-9]+}}, q{{[0-9]+}}
-  %tmp1 = load <4 x float>* %A
+  %tmp1 = load <4 x float>, <4 x float>* %A
   %tmp2 = call <4 x i32> @llvm.arm.neon.vcvtmu.v4i32.v4f32(<4 x float> %tmp1)
   ret <4 x i32> %tmp2
 }
@@ -122,7 +122,7 @@ define <4 x i32> @vcvtmuq(<4 x float>* %A) {
 define <2 x i32> @vcvtmud(<2 x float>* %A) {
 ; CHECK: vcvtmud
 ; CHECK: vcvtm.u32.f32 d{{[0-9]+}}, d{{[0-9]+}}
-  %tmp1 = load <2 x float>* %A
+  %tmp1 = load <2 x float>, <2 x float>* %A
   %tmp2 = call <2 x i32> @llvm.arm.neon.vcvtmu.v2i32.v2f32(<2 x float> %tmp1)
   ret <2 x i32> %tmp2
 }
diff --git a/llvm/test/CodeGen/ARM/vcvt.ll b/llvm/test/CodeGen/ARM/vcvt.ll
index af4e6a3b046..0b7ffb8960a 100644
--- a/llvm/test/CodeGen/ARM/vcvt.ll
+++ b/llvm/test/CodeGen/ARM/vcvt.ll
@@ -3,7 +3,7 @@
 define <2 x i32> @vcvt_f32tos32(<2 x float>* %A) nounwind {
 ;CHECK-LABEL: vcvt_f32tos32:
 ;CHECK: vcvt.s32.f32
-	%tmp1 = load <2 x float>* %A
+	%tmp1 = load <2 x float>, <2 x float>* %A
 	%tmp2 = fptosi <2 x float> %tmp1 to <2 x i32>
 	ret <2 x i32> %tmp2
 }
@@ -11,7 +11,7 @@ define <2 x i32> @vcvt_f32tos32(<2 x float>* %A) nounwind {
 define <2 x i32> @vcvt_f32tou32(<2 x float>* %A) nounwind {
 ;CHECK-LABEL: vcvt_f32tou32:
 ;CHECK: vcvt.u32.f32
-	%tmp1 = load <2 x float>* %A
+	%tmp1 = load <2 x float>, <2 x float>* %A
 	%tmp2 = fptoui <2 x float> %tmp1 to <2 x i32>
 	ret <2 x i32> %tmp2
 }
@@ -19,7 +19,7 @@ define <2 x i32> @vcvt_f32tou32(<2 x float>* %A) nounwind {
 define <2 x float> @vcvt_s32tof32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vcvt_s32tof32:
 ;CHECK: vcvt.f32.s32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = sitofp <2 x i32> %tmp1 to <2 x float>
 	ret <2 x float> %tmp2
 }
@@ -27,7 +27,7 @@ define <2 x float> @vcvt_s32tof32(<2 x i32>* %A) nounwind {
 define <2 x float> @vcvt_u32tof32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vcvt_u32tof32:
 ;CHECK: vcvt.f32.u32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = uitofp <2 x i32> %tmp1 to <2 x float>
 	ret <2 x float> %tmp2
 }
@@ -35,7 +35,7 @@ define <2 x float> @vcvt_u32tof32(<2 x i32>* %A) nounwind {
 define <4 x i32> @vcvtQ_f32tos32(<4 x float>* %A) nounwind {
 ;CHECK-LABEL: vcvtQ_f32tos32:
 ;CHECK: vcvt.s32.f32
-	%tmp1 = load <4 x float>* %A
+	%tmp1 = load <4 x float>, <4 x float>* %A
 	%tmp2 = fptosi <4 x float> %tmp1 to <4 x i32>
 	ret <4 x i32> %tmp2
 }
@@ -43,7 +43,7 @@ define <4 x i32> @vcvtQ_f32tos32(<4 x float>* %A) nounwind {
 define <4 x i32> @vcvtQ_f32tou32(<4 x float>* %A) nounwind {
 ;CHECK-LABEL: vcvtQ_f32tou32:
 ;CHECK: vcvt.u32.f32
-	%tmp1 = load <4 x float>* %A
+	%tmp1 = load <4 x float>, <4 x float>* %A
 	%tmp2 = fptoui <4 x float> %tmp1 to <4 x i32>
 	ret <4 x i32> %tmp2
 }
@@ -51,7 +51,7 @@ define <4 x i32> @vcvtQ_f32tou32(<4 x float>* %A) nounwind {
 define <4 x float> @vcvtQ_s32tof32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vcvtQ_s32tof32:
 ;CHECK: vcvt.f32.s32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = sitofp <4 x i32> %tmp1 to <4 x float>
 	ret <4 x float> %tmp2
 }
@@ -59,7 +59,7 @@ define <4 x float> @vcvtQ_s32tof32(<4 x i32>* %A) nounwind {
 define <4 x float> @vcvtQ_u32tof32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vcvtQ_u32tof32:
 ;CHECK: vcvt.f32.u32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = uitofp <4 x i32> %tmp1 to <4 x float>
 	ret <4 x float> %tmp2
 }
@@ -67,7 +67,7 @@ define <4 x float> @vcvtQ_u32tof32(<4 x i32>* %A) nounwind {
 define <2 x i32> @vcvt_n_f32tos32(<2 x float>* %A) nounwind {
 ;CHECK-LABEL: vcvt_n_f32tos32:
 ;CHECK: vcvt.s32.f32
-	%tmp1 = load <2 x float>* %A
+	%tmp1 = load <2 x float>, <2 x float>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> %tmp1, i32 1)
 	ret <2 x i32> %tmp2
 }
@@ -75,7 +75,7 @@ define <2 x i32> @vcvt_n_f32tos32(<2 x float>* %A) nounwind {
 define <2 x i32> @vcvt_n_f32tou32(<2 x float>* %A) nounwind {
 ;CHECK-LABEL: vcvt_n_f32tou32:
 ;CHECK: vcvt.u32.f32
-	%tmp1 = load <2 x float>* %A
+	%tmp1 = load <2 x float>, <2 x float>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> %tmp1, i32 1)
 	ret <2 x i32> %tmp2
 }
@@ -83,7 +83,7 @@ define <2 x i32> @vcvt_n_f32tou32(<2 x float>* %A) nounwind {
 define <2 x float> @vcvt_n_s32tof32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vcvt_n_s32tof32:
 ;CHECK: vcvt.f32.s32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = call <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> %tmp1, i32 1)
 	ret <2 x float> %tmp2
 }
@@ -91,7 +91,7 @@ define <2 x float> @vcvt_n_s32tof32(<2 x i32>* %A) nounwind {
 define <2 x float> @vcvt_n_u32tof32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vcvt_n_u32tof32:
 ;CHECK: vcvt.f32.u32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = call <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> %tmp1, i32 1)
 	ret <2 x float> %tmp2
 }
@@ -104,7 +104,7 @@ declare <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32) nounwi
 define <4 x i32> @vcvtQ_n_f32tos32(<4 x float>* %A) nounwind {
 ;CHECK-LABEL: vcvtQ_n_f32tos32:
 ;CHECK: vcvt.s32.f32
-	%tmp1 = load <4 x float>* %A
+	%tmp1 = load <4 x float>, <4 x float>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> %tmp1, i32 1)
 	ret <4 x i32> %tmp2
 }
@@ -112,7 +112,7 @@ define <4 x i32> @vcvtQ_n_f32tos32(<4 x float>* %A) nounwind {
 define <4 x i32> @vcvtQ_n_f32tou32(<4 x float>* %A) nounwind {
 ;CHECK-LABEL: vcvtQ_n_f32tou32:
 ;CHECK: vcvt.u32.f32
-	%tmp1 = load <4 x float>* %A
+	%tmp1 = load <4 x float>, <4 x float>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> %tmp1, i32 1)
 	ret <4 x i32> %tmp2
 }
@@ -120,7 +120,7 @@ define <4 x i32> @vcvtQ_n_f32tou32(<4 x float>* %A) nounwind {
 define <4 x float> @vcvtQ_n_s32tof32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vcvtQ_n_s32tof32:
 ;CHECK: vcvt.f32.s32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> %tmp1, i32 1)
 	ret <4 x float> %tmp2
 }
@@ -128,7 +128,7 @@ define <4 x float> @vcvtQ_n_s32tof32(<4 x i32>* %A) nounwind {
 define <4 x float> @vcvtQ_n_u32tof32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vcvtQ_n_u32tof32:
 ;CHECK: vcvt.f32.u32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> %tmp1, i32 1)
 	ret <4 x float> %tmp2
 }
@@ -141,7 +141,7 @@ declare <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32) nounwi
 define <4 x float> @vcvt_f16tof32(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vcvt_f16tof32:
 ;CHECK: vcvt.f32.f16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = call <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16> %tmp1)
 	ret <4 x float> %tmp2
 }
@@ -149,7 +149,7 @@ define <4 x float> @vcvt_f16tof32(<4 x i16>* %A) nounwind {
 define <4 x i16> @vcvt_f32tof16(<4 x float>* %A) nounwind {
 ;CHECK-LABEL: vcvt_f32tof16:
 ;CHECK: vcvt.f16.f32
-	%tmp1 = load <4 x float>* %A
+	%tmp1 = load <4 x float>, <4 x float>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float> %tmp1)
 	ret <4 x i16> %tmp2
 }
diff --git a/llvm/test/CodeGen/ARM/vcvt_combine.ll b/llvm/test/CodeGen/ARM/vcvt_combine.ll
index 07ba230757b..0c856e8d761 100644
--- a/llvm/test/CodeGen/ARM/vcvt_combine.ll
+++ b/llvm/test/CodeGen/ARM/vcvt_combine.ll
@@ -7,7 +7,7 @@
 ; CHECK-NOT: vmul
 define void @t0() nounwind {
 entry:
-  %tmp = load float* @in, align 4
+  %tmp = load float, float* @in, align 4
   %vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0
   %vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1
   %mul.i = fmul <2 x float> %vecinit2.i, <float 8.000000e+00, float 8.000000e+00>
@@ -23,7 +23,7 @@ declare void @foo_int32x2_t(<2 x i32>)
 ; CHECK-NOT: vmul
 define void @t1() nounwind {
 entry:
-  %tmp = load float* @in, align 4
+  %tmp = load float, float* @in, align 4
   %vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0
   %vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1
   %mul.i = fmul <2 x float> %vecinit2.i, <float 8.000000e+00, float 8.000000e+00>
@@ -39,7 +39,7 @@ declare void @foo_uint32x2_t(<2 x i32>)
 ; CHECK: vmul
 define void @t2() nounwind {
 entry:
-  %tmp = load float* @in, align 4
+  %tmp = load float, float* @in, align 4
   %vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0
   %vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1
   %mul.i = fmul <2 x float> %vecinit2.i, <float 0x401B333340000000, float 0x401B333340000000>
@@ -53,7 +53,7 @@ entry:
 ; CHECK: vmul
 define void @t3() nounwind {
 entry:
-  %tmp = load float* @in, align 4
+  %tmp = load float, float* @in, align 4
   %vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0
   %vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1
   %mul.i = fmul <2 x float> %vecinit2.i, <float 0x4200000000000000, float 0x4200000000000000>
@@ -67,7 +67,7 @@ entry:
 ; CHECK-NOT: vmul
 define void @t4() nounwind {
 entry:
-  %tmp = load float* @in, align 4
+  %tmp = load float, float* @in, align 4
   %vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0
   %vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1
   %mul.i = fmul <2 x float> %vecinit2.i, <float 0x41F0000000000000, float 0x41F0000000000000>
@@ -81,7 +81,7 @@ entry:
 ; CHECK-NOT: vmul
 define void @t5() nounwind {
 entry:
-  %tmp = load float* @in, align 4
+  %tmp = load float, float* @in, align 4
   %vecinit.i = insertelement <4 x float> undef, float %tmp, i32 0
   %vecinit2.i = insertelement <4 x float> %vecinit.i, float %tmp, i32 1
   %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %tmp, i32 2
diff --git a/llvm/test/CodeGen/ARM/vdiv_combine.ll b/llvm/test/CodeGen/ARM/vdiv_combine.ll
index 96807f7280f..8c6e4ba3505 100644
--- a/llvm/test/CodeGen/ARM/vdiv_combine.ll
+++ b/llvm/test/CodeGen/ARM/vdiv_combine.ll
@@ -11,7 +11,7 @@ declare void @foo_int32x4_t(<4 x i32>)
 ; CHECK-NOT: {{vdiv|vmul}}
 define void @t1() nounwind {
 entry:
-  %tmp = load i32* @iin, align 4
+  %tmp = load i32, i32* @iin, align 4
   %vecinit.i = insertelement <2 x i32> undef, i32 %tmp, i32 0
   %vecinit2.i = insertelement <2 x i32> %vecinit.i, i32 %tmp, i32 1
   %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float>
@@ -27,7 +27,7 @@ declare void @foo_float32x2_t(<2 x float>)
 ; CHECK-NOT: {{vdiv|vmul}}
 define void @t2() nounwind {
 entry:
-  %tmp = load i32* @uin, align 4
+  %tmp = load i32, i32* @uin, align 4
   %vecinit.i = insertelement <2 x i32> undef, i32 %tmp, i32 0
   %vecinit2.i = insertelement <2 x i32> %vecinit.i, i32 %tmp, i32 1
   %vcvt.i = uitofp <2 x i32> %vecinit2.i to <2 x float>
@@ -41,7 +41,7 @@ entry:
 ; CHECK: {{vdiv|vmul}}
 define void @t3() nounwind {
 entry:
-  %tmp = load i32* @iin, align 4
+  %tmp = load i32, i32* @iin, align 4
   %vecinit.i = insertelement <2 x i32> undef, i32 %tmp, i32 0
   %vecinit2.i = insertelement <2 x i32> %vecinit.i, i32 %tmp, i32 1
   %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float>
@@ -55,7 +55,7 @@ entry:
 ; CHECK: {{vdiv|vmul}}
 define void @t4() nounwind {
 entry:
-  %tmp = load i32* @iin, align 4
+  %tmp = load i32, i32* @iin, align 4
   %vecinit.i = insertelement <2 x i32> undef, i32 %tmp, i32 0
   %vecinit2.i = insertelement <2 x i32> %vecinit.i, i32 %tmp, i32 1
   %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float>
@@ -69,7 +69,7 @@ entry:
 ; CHECK-NOT: {{vdiv|vmul}}
 define void @t5() nounwind {
 entry:
-  %tmp = load i32* @iin, align 4
+  %tmp = load i32, i32* @iin, align 4
   %vecinit.i = insertelement <2 x i32> undef, i32 %tmp, i32 0
   %vecinit2.i = insertelement <2 x i32> %vecinit.i, i32 %tmp, i32 1
   %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float>
@@ -83,7 +83,7 @@ entry:
 ; CHECK-NOT: {{vdiv|vmul}}
 define void @t6() nounwind {
 entry:
-  %tmp = load i32* @iin, align 4
+  %tmp = load i32, i32* @iin, align 4
   %vecinit.i = insertelement <4 x i32> undef, i32 %tmp, i32 0
   %vecinit2.i = insertelement <4 x i32> %vecinit.i, i32 %tmp, i32 1
   %vecinit4.i = insertelement <4 x i32> %vecinit2.i, i32 %tmp, i32 2
diff --git a/llvm/test/CodeGen/ARM/vdup.ll b/llvm/test/CodeGen/ARM/vdup.ll
index 6f8b3dda9cd..36eebbfc465 100644
--- a/llvm/test/CodeGen/ARM/vdup.ll
+++ b/llvm/test/CodeGen/ARM/vdup.ll
@@ -166,7 +166,7 @@ define <4 x float> @v_shuffledupQfloat(float %A) nounwind {
 define <8 x i8> @vduplane8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vduplane8:
 ;CHECK: vdup.8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
 	ret <8 x i8> %tmp2
 }
@@ -174,7 +174,7 @@ define <8 x i8> @vduplane8(<8 x i8>* %A) nounwind {
 define <4 x i16> @vduplane16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vduplane16:
 ;CHECK: vdup.16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
 	ret <4 x i16> %tmp2
 }
@@ -182,7 +182,7 @@ define <4 x i16> @vduplane16(<4 x i16>* %A) nounwind {
 define <2 x i32> @vduplane32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vduplane32:
 ;CHECK: vdup.32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> < i32 1, i32 1 >
 	ret <2 x i32> %tmp2
 }
@@ -190,7 +190,7 @@ define <2 x i32> @vduplane32(<2 x i32>* %A) nounwind {
 define <2 x float> @vduplanefloat(<2 x float>* %A) nounwind {
 ;CHECK-LABEL: vduplanefloat:
 ;CHECK: vdup.32
-	%tmp1 = load <2 x float>* %A
+	%tmp1 = load <2 x float>, <2 x float>* %A
 	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> < i32 1, i32 1 >
 	ret <2 x float> %tmp2
 }
@@ -198,7 +198,7 @@ define <2 x float> @vduplanefloat(<2 x float>* %A) nounwind {
 define <16 x i8> @vduplaneQ8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vduplaneQ8:
 ;CHECK: vdup.8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <16 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
 	ret <16 x i8> %tmp2
 }
@@ -206,7 +206,7 @@ define <16 x i8> @vduplaneQ8(<8 x i8>* %A) nounwind {
 define <8 x i16> @vduplaneQ16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vduplaneQ16:
 ;CHECK: vdup.16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
 	ret <8 x i16> %tmp2
 }
@@ -214,7 +214,7 @@ define <8 x i16> @vduplaneQ16(<4 x i16>* %A) nounwind {
 define <4 x i32> @vduplaneQ32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vduplaneQ32:
 ;CHECK: vdup.32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
 	ret <4 x i32> %tmp2
 }
@@ -222,7 +222,7 @@ define <4 x i32> @vduplaneQ32(<2 x i32>* %A) nounwind {
 define <4 x float> @vduplaneQfloat(<2 x float>* %A) nounwind {
 ;CHECK-LABEL: vduplaneQfloat:
 ;CHECK: vdup.32
-	%tmp1 = load <2 x float>* %A
+	%tmp1 = load <2 x float>, <2 x float>* %A
 	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
 	ret <4 x float> %tmp2
 }
diff --git a/llvm/test/CodeGen/ARM/vector-DAGCombine.ll b/llvm/test/CodeGen/ARM/vector-DAGCombine.ll
index 566e955af2b..29f4bb972a2 100644
--- a/llvm/test/CodeGen/ARM/vector-DAGCombine.ll
+++ b/llvm/test/CodeGen/ARM/vector-DAGCombine.ll
@@ -21,7 +21,7 @@ bb.i19:                                           ; preds = %bb.i19, %bb3
 define void @test_illegal_build_vector() nounwind {
 entry:
   store <2 x i64> undef, <2 x i64>* undef, align 16
-  %0 = load <16 x i8>* undef, align 16            ; <<16 x i8>> [#uses=1]
+  %0 = load <16 x i8>, <16 x i8>* undef, align 16            ; <<16 x i8>> [#uses=1]
   %1 = or <16 x i8> zeroinitializer, %0           ; <<16 x i8>> [#uses=1]
   store <16 x i8> %1, <16 x i8>* undef, align 16
   ret void
@@ -63,7 +63,7 @@ bb2:
 ; Test trying to do a ShiftCombine on illegal types.
 ; The vector should be split first.
 define void @lshrIllegalType(<8 x i32>* %A) nounwind {
-       %tmp1 = load <8 x i32>* %A
+       %tmp1 = load <8 x i32>, <8 x i32>* %A
        %tmp2 = lshr <8 x i32> %tmp1, < i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
        store <8 x i32> %tmp2, <8 x i32>* %A
        ret void
@@ -89,7 +89,7 @@ declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>, i32) nounwind
 define void @i64_buildvector(i64* %ptr, <2 x i64>* %vp) nounwind {
 ; CHECK: i64_buildvector
 ; CHECK: vldr
-  %t0 = load i64* %ptr, align 4
+  %t0 = load i64, i64* %ptr, align 4
   %t1 = insertelement <2 x i64> undef, i64 %t0, i32 0
   store <2 x i64> %t1, <2 x i64>* %vp
   ret void
@@ -98,8 +98,8 @@ define void @i64_buildvector(i64* %ptr, <2 x i64>* %vp) nounwind {
 define void @i64_insertelement(i64* %ptr, <2 x i64>* %vp) nounwind {
 ; CHECK: i64_insertelement
 ; CHECK: vldr
-  %t0 = load i64* %ptr, align 4
-  %vec = load <2 x i64>* %vp
+  %t0 = load i64, i64* %ptr, align 4
+  %vec = load <2 x i64>, <2 x i64>* %vp
   %t1 = insertelement <2 x i64> %vec, i64 %t0, i32 0
   store <2 x i64> %t1, <2 x i64>* %vp
   ret void
@@ -108,7 +108,7 @@ define void @i64_insertelement(i64* %ptr, <2 x i64>* %vp) nounwind {
 define void @i64_extractelement(i64* %ptr, <2 x i64>* %vp) nounwind {
 ; CHECK: i64_extractelement
 ; CHECK: vstr
-  %vec = load <2 x i64>* %vp
+  %vec = load <2 x i64>, <2 x i64>* %vp
   %t1 = extractelement <2 x i64> %vec, i32 0
   store i64 %t1, i64* %ptr
   ret void
@@ -116,7 +116,7 @@ define void @i64_extractelement(i64* %ptr, <2 x i64>* %vp) nounwind {
 
 ; Test trying to do a AND Combine on illegal types.
 define void @andVec(<3 x i8>* %A) nounwind {
-  %tmp = load <3 x i8>* %A, align 4
+  %tmp = load <3 x i8>, <3 x i8>* %A, align 4
   %and = and <3 x i8> %tmp, <i8 7, i8 7, i8 7>
   store <3 x i8> %and, <3 x i8>* %A
   ret void
@@ -125,7 +125,7 @@ define void @andVec(<3 x i8>* %A) nounwind {
 
 ; Test trying to do an OR Combine on illegal types.
 define void @orVec(<3 x i8>* %A) nounwind {
-  %tmp = load <3 x i8>* %A, align 4
+  %tmp = load <3 x i8>, <3 x i8>* %A, align 4
   %or = or <3 x i8> %tmp, <i8 7, i8 7, i8 7>
   store <3 x i8> %or, <3 x i8>* %A
   ret void
@@ -146,7 +146,7 @@ define i16 @foldBuildVectors() {
 ; shuffles.
 ; CHECK-LABEL: reverse_v8i16:
 define void @reverse_v8i16(<8 x i16>* %loadaddr, <8 x i16>* %storeaddr) {
-  %v0 = load <8 x i16>* %loadaddr
+  %v0 = load <8 x i16>, <8 x i16>* %loadaddr
   ; CHECK: vrev64.16
   ; CHECK: vext.16
   %v1 = shufflevector <8 x i16> %v0, <8 x i16> undef,
@@ -159,7 +159,7 @@ define void @reverse_v8i16(<8 x i16>* %loadaddr, <8 x i16>* %storeaddr) {
 ; shuffles.
 ; CHECK-LABEL: reverse_v16i8:
 define void @reverse_v16i8(<16 x i8>* %loadaddr, <16 x i8>* %storeaddr) {
-  %v0 = load <16 x i8>* %loadaddr
+  %v0 = load <16 x i8>, <16 x i8>* %loadaddr
   ; CHECK: vrev64.8
   ; CHECK: vext.8
   %v1 = shufflevector <16 x i8> %v0, <16 x i8> undef,
@@ -180,9 +180,9 @@ define void @reverse_v16i8(<16 x i8>* %loadaddr, <16 x i8>* %storeaddr) {
 define <8 x i16> @t3(i8 zeroext %xf, i8* nocapture %sp0, i8* nocapture %sp1, i32* nocapture %outp) {
 entry:
   %pix_sp0.0.cast = bitcast i8* %sp0 to i32*
-  %pix_sp0.0.copyload = load i32* %pix_sp0.0.cast, align 1
+  %pix_sp0.0.copyload = load i32, i32* %pix_sp0.0.cast, align 1
   %pix_sp1.0.cast = bitcast i8* %sp1 to i32*
-  %pix_sp1.0.copyload = load i32* %pix_sp1.0.cast, align 1
+  %pix_sp1.0.copyload = load i32, i32* %pix_sp1.0.cast, align 1
   %vecinit = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
   %vecinit1 = insertelement <2 x i32> %vecinit, i32 %pix_sp1.0.copyload, i32 1
   %0 = bitcast <2 x i32> %vecinit1 to <8 x i8>
@@ -200,7 +200,7 @@ define <8 x i16> @t4(i8* nocapture %sp0) {
 ; CHECK: vld1.32 {{{d[0-9]+}}[0]}, [r0]
 entry:
   %pix_sp0.0.cast = bitcast i8* %sp0 to i32*
-  %pix_sp0.0.copyload = load i32* %pix_sp0.0.cast, align 1
+  %pix_sp0.0.copyload = load i32, i32* %pix_sp0.0.cast, align 1
   %vec = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
   %0 = bitcast <2 x i32> %vec to <8 x i8>
   %vmull.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %0, <8 x i8> %0)
@@ -219,11 +219,11 @@ entry:
 define <8 x i16> @t5(i8* nocapture %sp0, i8* nocapture %sp1, i8* nocapture %sp2) {
 entry:
   %pix_sp0.0.cast = bitcast i8* %sp0 to i32*
-  %pix_sp0.0.copyload = load i32* %pix_sp0.0.cast, align 1
+  %pix_sp0.0.copyload = load i32, i32* %pix_sp0.0.cast, align 1
   %pix_sp1.0.cast = bitcast i8* %sp1 to i32*
-  %pix_sp1.0.copyload = load i32* %pix_sp1.0.cast, align 1
+  %pix_sp1.0.copyload = load i32, i32* %pix_sp1.0.cast, align 1
   %pix_sp2.0.cast = bitcast i8* %sp2 to i32*
-  %pix_sp2.0.copyload = load i32* %pix_sp2.0.cast, align 1
+  %pix_sp2.0.copyload = load i32, i32* %pix_sp2.0.cast, align 1
   %vec = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 1
   %vecinit1 = insertelement <2 x i32> %vec, i32 %pix_sp1.0.copyload, i32 0
   %vecinit2 = insertelement <2 x i32> %vec, i32 %pix_sp2.0.copyload, i32 0
diff --git a/llvm/test/CodeGen/ARM/vector-extend-narrow.ll b/llvm/test/CodeGen/ARM/vector-extend-narrow.ll
index f3218969c78..7e2751b5cf5 100644
--- a/llvm/test/CodeGen/ARM/vector-extend-narrow.ll
+++ b/llvm/test/CodeGen/ARM/vector-extend-narrow.ll
@@ -4,7 +4,7 @@
 define float @f(<4 x i16>* nocapture %in) {
   ; CHECK: vldr
   ; CHECK: vmovl.u16
-  %1 = load <4 x i16>* %in
+  %1 = load <4 x i16>, <4 x i16>* %in
   ; CHECK: vcvt.f32.u32
   %2 = uitofp <4 x i16> %1 to <4 x float>
   %3 = extractelement <4 x float> %2, i32 0
@@ -25,7 +25,7 @@ define float @g(<4 x i8>* nocapture %in) {
   ; CHECK: vld1
   ; CHECK: vmovl.u8
   ; CHECK: vmovl.u16
-  %1 = load <4 x i8>* %in
+  %1 = load <4 x i8>, <4 x i8>* %in
   ; CHECK: vcvt.f32.u32
   %2 = uitofp <4 x i8> %1 to <4 x float>
   %3 = extractelement <4 x float> %2, i32 0
@@ -58,7 +58,7 @@ define <4 x i8> @i(<4 x i8>* %x) {
   ; CHECK: vrecps
   ; CHECK: vmul
   ; CHECK: vmovn
-  %1 = load <4 x i8>* %x, align 4
+  %1 = load <4 x i8>, <4 x i8>* %x, align 4
   %2 = sdiv <4 x i8> zeroinitializer, %1
   ret <4 x i8> %2
 }
@@ -68,7 +68,7 @@ define <4 x i32> @j(<4 x i8>* %in) nounwind {
   ; CHECK: vmovl.u8
   ; CHECK: vmovl.u16
   ; CHECK-NOT: vand
-  %1 = load <4 x i8>* %in, align 4
+  %1 = load <4 x i8>, <4 x i8>* %in, align 4
   %2 = zext <4 x i8> %1 to <4 x i32>
   ret <4 x i32> %2
 }
diff --git a/llvm/test/CodeGen/ARM/vector-load.ll b/llvm/test/CodeGen/ARM/vector-load.ll
index f5cf70b4c4d..17f134f458a 100644
--- a/llvm/test/CodeGen/ARM/vector-load.ll
+++ b/llvm/test/CodeGen/ARM/vector-load.ll
@@ -6,16 +6,16 @@ target triple = "thumbv7s-apple-ios8.0.0"
 define <8 x i8> @load_v8i8(<8 x i8>** %ptr) {
 ;CHECK-LABEL: load_v8i8:
 ;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]
-	%A = load <8 x i8>** %ptr
-	%lA = load <8 x i8>* %A, align 1
+	%A = load <8 x i8>*, <8 x i8>** %ptr
+	%lA = load <8 x i8>, <8 x i8>* %A, align 1
 	ret <8 x i8> %lA
 }
 
 define <8 x i8> @load_v8i8_update(<8 x i8>** %ptr) {
 ;CHECK-LABEL: load_v8i8_update:
 ;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <8 x i8>** %ptr
-	%lA = load <8 x i8>* %A, align 1
+	%A = load <8 x i8>*, <8 x i8>** %ptr
+	%lA = load <8 x i8>, <8 x i8>* %A, align 1
 	%inc = getelementptr <8 x i8>, <8 x i8>* %A, i38 1
         store <8 x i8>* %inc, <8 x i8>** %ptr
 	ret <8 x i8> %lA
@@ -24,16 +24,16 @@ define <8 x i8> @load_v8i8_update(<8 x i8>** %ptr) {
 define <4 x i16> @load_v4i16(<4 x i16>** %ptr) {
 ;CHECK-LABEL: load_v4i16:
 ;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]
-	%A = load <4 x i16>** %ptr
-	%lA = load <4 x i16>* %A, align 1
+	%A = load <4 x i16>*, <4 x i16>** %ptr
+	%lA = load <4 x i16>, <4 x i16>* %A, align 1
 	ret <4 x i16> %lA
 }
 
 define <4 x i16> @load_v4i16_update(<4 x i16>** %ptr) {
 ;CHECK-LABEL: load_v4i16_update:
 ;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <4 x i16>** %ptr
-	%lA = load <4 x i16>* %A, align 1
+	%A = load <4 x i16>*, <4 x i16>** %ptr
+	%lA = load <4 x i16>, <4 x i16>* %A, align 1
 	%inc = getelementptr <4 x i16>, <4 x i16>* %A, i34 1
         store <4 x i16>* %inc, <4 x i16>** %ptr
 	ret <4 x i16> %lA
@@ -42,16 +42,16 @@ define <4 x i16> @load_v4i16_update(<4 x i16>** %ptr) {
 define <2 x i32> @load_v2i32(<2 x i32>** %ptr) {
 ;CHECK-LABEL: load_v2i32:
 ;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]
-	%A = load <2 x i32>** %ptr
-	%lA = load <2 x i32>* %A, align 1
+	%A = load <2 x i32>*, <2 x i32>** %ptr
+	%lA = load <2 x i32>, <2 x i32>* %A, align 1
 	ret <2 x i32> %lA
 }
 
 define <2 x i32> @load_v2i32_update(<2 x i32>** %ptr) {
 ;CHECK-LABEL: load_v2i32_update:
 ;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <2 x i32>** %ptr
-	%lA = load <2 x i32>* %A, align 1
+	%A = load <2 x i32>*, <2 x i32>** %ptr
+	%lA = load <2 x i32>, <2 x i32>* %A, align 1
 	%inc = getelementptr <2 x i32>, <2 x i32>* %A, i32 1
         store <2 x i32>* %inc, <2 x i32>** %ptr
 	ret <2 x i32> %lA
@@ -60,16 +60,16 @@ define <2 x i32> @load_v2i32_update(<2 x i32>** %ptr) {
 define <2 x float> @load_v2f32(<2 x float>** %ptr) {
 ;CHECK-LABEL: load_v2f32:
 ;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]
-	%A = load <2 x float>** %ptr
-	%lA = load <2 x float>* %A, align 1
+	%A = load <2 x float>*, <2 x float>** %ptr
+	%lA = load <2 x float>, <2 x float>* %A, align 1
 	ret <2 x float> %lA
 }
 
 define <2 x float> @load_v2f32_update(<2 x float>** %ptr) {
 ;CHECK-LABEL: load_v2f32_update:
 ;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <2 x float>** %ptr
-	%lA = load <2 x float>* %A, align 1
+	%A = load <2 x float>*, <2 x float>** %ptr
+	%lA = load <2 x float>, <2 x float>* %A, align 1
 	%inc = getelementptr <2 x float>, <2 x float>* %A, i32 1
         store <2 x float>* %inc, <2 x float>** %ptr
 	ret <2 x float> %lA
@@ -78,16 +78,16 @@ define <2 x float> @load_v2f32_update(<2 x float>** %ptr) {
 define <1 x i64> @load_v1i64(<1 x i64>** %ptr) {
 ;CHECK-LABEL: load_v1i64:
 ;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]
-	%A = load <1 x i64>** %ptr
-	%lA = load <1 x i64>* %A, align 1
+	%A = load <1 x i64>*, <1 x i64>** %ptr
+	%lA = load <1 x i64>, <1 x i64>* %A, align 1
 	ret <1 x i64> %lA
 }
 
 define <1 x i64> @load_v1i64_update(<1 x i64>** %ptr) {
 ;CHECK-LABEL: load_v1i64_update:
 ;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <1 x i64>** %ptr
-	%lA = load <1 x i64>* %A, align 1
+	%A = load <1 x i64>*, <1 x i64>** %ptr
+	%lA = load <1 x i64>, <1 x i64>* %A, align 1
 	%inc = getelementptr <1 x i64>, <1 x i64>* %A, i31 1
         store <1 x i64>* %inc, <1 x i64>** %ptr
 	ret <1 x i64> %lA
@@ -96,16 +96,16 @@ define <1 x i64> @load_v1i64_update(<1 x i64>** %ptr) {
 define <16 x i8> @load_v16i8(<16 x i8>** %ptr) {
 ;CHECK-LABEL: load_v16i8:
 ;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
-	%A = load <16 x i8>** %ptr
-	%lA = load <16 x i8>* %A, align 1
+	%A = load <16 x i8>*, <16 x i8>** %ptr
+	%lA = load <16 x i8>, <16 x i8>* %A, align 1
 	ret <16 x i8> %lA
 }
 
 define <16 x i8> @load_v16i8_update(<16 x i8>** %ptr) {
 ;CHECK-LABEL: load_v16i8_update:
 ;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <16 x i8>** %ptr
-	%lA = load <16 x i8>* %A, align 1
+	%A = load <16 x i8>*, <16 x i8>** %ptr
+	%lA = load <16 x i8>, <16 x i8>* %A, align 1
 	%inc = getelementptr <16 x i8>, <16 x i8>* %A, i316 1
         store <16 x i8>* %inc, <16 x i8>** %ptr
 	ret <16 x i8> %lA
@@ -114,16 +114,16 @@ define <16 x i8> @load_v16i8_update(<16 x i8>** %ptr) {
 define <8 x i16> @load_v8i16(<8 x i16>** %ptr) {
 ;CHECK-LABEL: load_v8i16:
 ;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
-	%A = load <8 x i16>** %ptr
-	%lA = load <8 x i16>* %A, align 1
+	%A = load <8 x i16>*, <8 x i16>** %ptr
+	%lA = load <8 x i16>, <8 x i16>* %A, align 1
 	ret <8 x i16> %lA
 }
 
 define <8 x i16> @load_v8i16_update(<8 x i16>** %ptr) {
 ;CHECK-LABEL: load_v8i16_update:
 ;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <8 x i16>** %ptr
-	%lA = load <8 x i16>* %A, align 1
+	%A = load <8 x i16>*, <8 x i16>** %ptr
+	%lA = load <8 x i16>, <8 x i16>* %A, align 1
 	%inc = getelementptr <8 x i16>, <8 x i16>* %A, i38 1
         store <8 x i16>* %inc, <8 x i16>** %ptr
 	ret <8 x i16> %lA
@@ -132,16 +132,16 @@ define <8 x i16> @load_v8i16_update(<8 x i16>** %ptr) {
 define <4 x i32> @load_v4i32(<4 x i32>** %ptr) {
 ;CHECK-LABEL: load_v4i32:
 ;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
-	%A = load <4 x i32>** %ptr
-	%lA = load <4 x i32>* %A, align 1
+	%A = load <4 x i32>*, <4 x i32>** %ptr
+	%lA = load <4 x i32>, <4 x i32>* %A, align 1
 	ret <4 x i32> %lA
 }
 
 define <4 x i32> @load_v4i32_update(<4 x i32>** %ptr) {
 ;CHECK-LABEL: load_v4i32_update:
 ;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <4 x i32>** %ptr
-	%lA = load <4 x i32>* %A, align 1
+	%A = load <4 x i32>*, <4 x i32>** %ptr
+	%lA = load <4 x i32>, <4 x i32>* %A, align 1
 	%inc = getelementptr <4 x i32>, <4 x i32>* %A, i34 1
         store <4 x i32>* %inc, <4 x i32>** %ptr
 	ret <4 x i32> %lA
@@ -150,16 +150,16 @@ define <4 x i32> @load_v4i32_update(<4 x i32>** %ptr) {
 define <4 x float> @load_v4f32(<4 x float>** %ptr) {
 ;CHECK-LABEL: load_v4f32:
 ;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
-	%A = load <4 x float>** %ptr
-	%lA = load <4 x float>* %A, align 1
+	%A = load <4 x float>*, <4 x float>** %ptr
+	%lA = load <4 x float>, <4 x float>* %A, align 1
 	ret <4 x float> %lA
 }
 
 define <4 x float> @load_v4f32_update(<4 x float>** %ptr) {
 ;CHECK-LABEL: load_v4f32_update:
 ;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <4 x float>** %ptr
-	%lA = load <4 x float>* %A, align 1
+	%A = load <4 x float>*, <4 x float>** %ptr
+	%lA = load <4 x float>, <4 x float>* %A, align 1
 	%inc = getelementptr <4 x float>, <4 x float>* %A, i34 1
         store <4 x float>* %inc, <4 x float>** %ptr
 	ret <4 x float> %lA
@@ -168,16 +168,16 @@ define <4 x float> @load_v4f32_update(<4 x float>** %ptr) {
 define <2 x i64> @load_v2i64(<2 x i64>** %ptr) {
 ;CHECK-LABEL: load_v2i64:
 ;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
-	%A = load <2 x i64>** %ptr
-	%lA = load <2 x i64>* %A, align 1
+	%A = load <2 x i64>*, <2 x i64>** %ptr
+	%lA = load <2 x i64>, <2 x i64>* %A, align 1
 	ret <2 x i64> %lA
 }
 
 define <2 x i64> @load_v2i64_update(<2 x i64>** %ptr) {
 ;CHECK-LABEL: load_v2i64_update:
 ;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <2 x i64>** %ptr
-	%lA = load <2 x i64>* %A, align 1
+	%A = load <2 x i64>*, <2 x i64>** %ptr
+	%lA = load <2 x i64>, <2 x i64>* %A, align 1
 	%inc = getelementptr <2 x i64>, <2 x i64>* %A, i32 1
         store <2 x i64>* %inc, <2 x i64>** %ptr
 	ret <2 x i64> %lA
@@ -187,8 +187,8 @@ define <2 x i64> @load_v2i64_update(<2 x i64>** %ptr) {
 define <2 x i64> @load_v2i64_update_aligned2(<2 x i64>** %ptr) {
 ;CHECK-LABEL: load_v2i64_update_aligned2:
 ;CHECK: vld1.16 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <2 x i64>** %ptr
-	%lA = load <2 x i64>* %A, align 2
+	%A = load <2 x i64>*, <2 x i64>** %ptr
+	%lA = load <2 x i64>, <2 x i64>* %A, align 2
 	%inc = getelementptr <2 x i64>, <2 x i64>* %A, i32 1
         store <2 x i64>* %inc, <2 x i64>** %ptr
 	ret <2 x i64> %lA
@@ -197,8 +197,8 @@ define <2 x i64> @load_v2i64_update_aligned2(<2 x i64>** %ptr) {
 define <2 x i64> @load_v2i64_update_aligned4(<2 x i64>** %ptr) {
 ;CHECK-LABEL: load_v2i64_update_aligned4:
 ;CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <2 x i64>** %ptr
-	%lA = load <2 x i64>* %A, align 4
+	%A = load <2 x i64>*, <2 x i64>** %ptr
+	%lA = load <2 x i64>, <2 x i64>* %A, align 4
 	%inc = getelementptr <2 x i64>, <2 x i64>* %A, i32 1
         store <2 x i64>* %inc, <2 x i64>** %ptr
 	ret <2 x i64> %lA
@@ -207,8 +207,8 @@ define <2 x i64> @load_v2i64_update_aligned4(<2 x i64>** %ptr) {
 define <2 x i64> @load_v2i64_update_aligned8(<2 x i64>** %ptr) {
 ;CHECK-LABEL: load_v2i64_update_aligned8:
 ;CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <2 x i64>** %ptr
-	%lA = load <2 x i64>* %A, align 8
+	%A = load <2 x i64>*, <2 x i64>** %ptr
+	%lA = load <2 x i64>, <2 x i64>* %A, align 8
 	%inc = getelementptr <2 x i64>, <2 x i64>* %A, i32 1
         store <2 x i64>* %inc, <2 x i64>** %ptr
 	ret <2 x i64> %lA
@@ -217,8 +217,8 @@ define <2 x i64> @load_v2i64_update_aligned8(<2 x i64>** %ptr) {
 define <2 x i64> @load_v2i64_update_aligned16(<2 x i64>** %ptr) {
 ;CHECK-LABEL: load_v2i64_update_aligned16:
 ;CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}:128]!
-	%A = load <2 x i64>** %ptr
-	%lA = load <2 x i64>* %A, align 16
+	%A = load <2 x i64>*, <2 x i64>** %ptr
+	%lA = load <2 x i64>, <2 x i64>* %A, align 16
 	%inc = getelementptr <2 x i64>, <2 x i64>* %A, i32 1
         store <2 x i64>* %inc, <2 x i64>** %ptr
 	ret <2 x i64> %lA
@@ -230,8 +230,8 @@ define <4 x i32> @zextload_v8i8tov8i32(<4 x i8>** %ptr) {
 ;CHECK: vld1.32 {{{d[0-9]+}}[0]}, [{{r[0-9]+}}:32]
 ;CHECK: vmovl.u8        {{q[0-9]+}}, {{d[0-9]+}}
 ;CHECK: vmovl.u16       {{q[0-9]+}}, {{d[0-9]+}}
-	%A = load <4 x i8>** %ptr
-	%lA = load <4 x i8>* %A, align 4
+	%A = load <4 x i8>*, <4 x i8>** %ptr
+	%lA = load <4 x i8>, <4 x i8>* %A, align 4
         %zlA = zext <4 x i8> %lA to <4 x i32>
 	ret <4 x i32> %zlA
 }
@@ -244,8 +244,8 @@ define <4 x i32> @zextload_v8i8tov8i32_fake_update(<4 x i8>** %ptr) {
 ;CHECK: str.w   r[[INCREG]], [r0]
 ;CHECK: vmovl.u8        {{q[0-9]+}}, {{d[0-9]+}}
 ;CHECK: vmovl.u16       {{q[0-9]+}}, {{d[0-9]+}}
-	%A = load <4 x i8>** %ptr
-	%lA = load <4 x i8>* %A, align 4
+	%A = load <4 x i8>*, <4 x i8>** %ptr
+	%lA = load <4 x i8>, <4 x i8>* %A, align 4
 	%inc = getelementptr <4 x i8>, <4 x i8>* %A, i38 4
         store <4 x i8>* %inc, <4 x i8>** %ptr
         %zlA = zext <4 x i8> %lA to <4 x i32>
diff --git a/llvm/test/CodeGen/ARM/vector-promotion.ll b/llvm/test/CodeGen/ARM/vector-promotion.ll
index 42ceb60c47f..1dabee38608 100644
--- a/llvm/test/CodeGen/ARM/vector-promotion.ll
+++ b/llvm/test/CodeGen/ARM/vector-promotion.ll
@@ -3,7 +3,7 @@
 ; RUN: llc -mtriple=thumbv7-apple-ios %s -o - -mattr=+neon | FileCheck --check-prefix=ASM %s
 
 ; IR-BOTH-LABEL: @simpleOneInstructionPromotion
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
 ; IR-BOTH-NEXT: [[VECTOR_OR:%[a-zA-Z_0-9-]+]] = or <2 x i32> [[LOAD]], <i32 undef, i32 1>
 ; IR-BOTH-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[VECTOR_OR]], i32 1
 ; IR-BOTH-NEXT: store i32 [[EXTRACT]], i32* %dest
@@ -16,7 +16,7 @@
 ; ASM-NEXT: vst1.32 {[[LOAD]][1]}, [r1:32]
 ; ASM-NEXT: bx
 define void @simpleOneInstructionPromotion(<2 x i32>* %addr1, i32* %dest) {
-  %in1 = load <2 x i32>* %addr1, align 8
+  %in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
   %extract = extractelement <2 x i32> %in1, i32 1
   %out = or i32 %extract, 1
   store i32 %out, i32* %dest, align 4
@@ -24,7 +24,7 @@ define void @simpleOneInstructionPromotion(<2 x i32>* %addr1, i32* %dest) {
 }
 
 ; IR-BOTH-LABEL: @unsupportedInstructionForPromotion
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
 ; IR-BOTH-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 0
 ; IR-BOTH-NEXT: [[CMP:%[a-zA-Z_0-9-]+]] = icmp eq i32 [[EXTRACT]], %in2
 ; IR-BOTH-NEXT: store i1 [[CMP]], i1* %dest
@@ -35,7 +35,7 @@ define void @simpleOneInstructionPromotion(<2 x i32>* %addr1, i32* %dest) {
 ; ASM: vmov.32 {{r[0-9]+}}, [[LOAD]]
 ; ASM: bx
 define void @unsupportedInstructionForPromotion(<2 x i32>* %addr1, i32 %in2, i1* %dest) {
-  %in1 = load <2 x i32>* %addr1, align 8
+  %in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
   %extract = extractelement <2 x i32> %in1, i32 0
   %out = icmp eq i32 %extract, %in2
   store i1 %out, i1* %dest, align 4
@@ -44,7 +44,7 @@ define void @unsupportedInstructionForPromotion(<2 x i32>* %addr1, i32 %in2, i1*
 
 
 ; IR-BOTH-LABEL: @unsupportedChainInDifferentBBs
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
 ; IR-BOTH-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 0
 ; IR-BOTH-NEXT: br i1 %bool, label %bb2, label %end
 ; BB2
@@ -58,7 +58,7 @@ define void @unsupportedInstructionForPromotion(<2 x i32>* %addr1, i32 %in2, i1*
 ; ASM: bx
 define void @unsupportedChainInDifferentBBs(<2 x i32>* %addr1, i32* %dest, i1 %bool) {
 bb1:
-  %in1 = load <2 x i32>* %addr1, align 8
+  %in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
   %extract = extractelement <2 x i32> %in1, i32 0
   br i1 %bool, label %bb2, label %end
 bb2: 
@@ -70,7 +70,7 @@ end:
 }
 
 ; IR-LABEL: @chainOfInstructionsToPromote
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
 ; IR-BOTH-NEXT: [[VECTOR_OR1:%[a-zA-Z_0-9-]+]] = or <2 x i32> [[LOAD]], <i32 1, i32 undef>
 ; IR-BOTH-NEXT: [[VECTOR_OR2:%[a-zA-Z_0-9-]+]] = or <2 x i32> [[VECTOR_OR1]], <i32 1, i32 undef>
 ; IR-BOTH-NEXT: [[VECTOR_OR3:%[a-zA-Z_0-9-]+]] = or <2 x i32> [[VECTOR_OR2]], <i32 1, i32 undef>
@@ -87,7 +87,7 @@ end:
 ; ASM-NOT: vmov.32 {{r[0-9]+}}, [[LOAD]]
 ; ASM: bx
 define void @chainOfInstructionsToPromote(<2 x i32>* %addr1, i32* %dest) {
-  %in1 = load <2 x i32>* %addr1, align 8
+  %in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
   %extract = extractelement <2 x i32> %in1, i32 0
   %out1 = or i32 %extract, 1
   %out2 = or i32 %out1, 1
@@ -101,7 +101,7 @@ define void @chainOfInstructionsToPromote(<2 x i32>* %addr1, i32* %dest) {
 }
 
 ; IR-BOTH-LABEL: @unsupportedMultiUses
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
 ; IR-BOTH-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 1
 ; IR-BOTH-NEXT: [[OR:%[a-zA-Z_0-9-]+]] = or i32 [[EXTRACT]], 1
 ; IR-BOTH-NEXT: store i32 [[OR]], i32* %dest
@@ -112,7 +112,7 @@ define void @chainOfInstructionsToPromote(<2 x i32>* %addr1, i32* %dest) {
 ; ASM: vmov.32 {{r[0-9]+}}, [[LOAD]]
 ; ASM: bx
 define i32 @unsupportedMultiUses(<2 x i32>* %addr1, i32* %dest) {
-  %in1 = load <2 x i32>* %addr1, align 8
+  %in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
   %extract = extractelement <2 x i32> %in1, i32 1
   %out = or i32 %extract, 1
   store i32 %out, i32* %dest, align 4
@@ -122,7 +122,7 @@ define i32 @unsupportedMultiUses(<2 x i32>* %addr1, i32* %dest) {
 ; Check that we promote we a splat constant when this is a division.
 ; The NORMAL mode does not promote anything as divisions are not legal.
 ; IR-BOTH-LABEL: @udivCase
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
 ; Scalar version:
 ; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 1
 ; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = udiv i32 [[EXTRACT]], 7
@@ -133,7 +133,7 @@ define i32 @unsupportedMultiUses(<2 x i32>* %addr1, i32* %dest) {
 ; IR-BOTH-NEXT: store i32 [[RES]], i32* %dest
 ; IR-BOTH-NEXT: ret
 define void @udivCase(<2 x i32>* %addr1, i32* %dest) {
-  %in1 = load <2 x i32>* %addr1, align 8
+  %in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
   %extract = extractelement <2 x i32> %in1, i32 1
   %out = udiv i32 %extract, 7
   store i32 %out, i32* %dest, align 4
@@ -141,7 +141,7 @@ define void @udivCase(<2 x i32>* %addr1, i32* %dest) {
 }
 
 ; IR-BOTH-LABEL: @uremCase
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
 ; Scalar version:
 ; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 1
 ; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = urem i32 [[EXTRACT]], 7
@@ -152,7 +152,7 @@ define void @udivCase(<2 x i32>* %addr1, i32* %dest) {
 ; IR-BOTH-NEXT: store i32 [[RES]], i32* %dest
 ; IR-BOTH-NEXT: ret 
 define void @uremCase(<2 x i32>* %addr1, i32* %dest) {
-  %in1 = load <2 x i32>* %addr1, align 8
+  %in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
   %extract = extractelement <2 x i32> %in1, i32 1
   %out = urem i32 %extract, 7
   store i32 %out, i32* %dest, align 4
@@ -160,7 +160,7 @@ define void @uremCase(<2 x i32>* %addr1, i32* %dest) {
 }
 
 ; IR-BOTH-LABEL: @sdivCase
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
 ; Scalar version:
 ; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 1
 ; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = sdiv i32 [[EXTRACT]], 7
@@ -171,7 +171,7 @@ define void @uremCase(<2 x i32>* %addr1, i32* %dest) {
 ; IR-BOTH-NEXT: store i32 [[RES]], i32* %dest
 ; IR-BOTH-NEXT: ret 
 define void @sdivCase(<2 x i32>* %addr1, i32* %dest) {
-  %in1 = load <2 x i32>* %addr1, align 8
+  %in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
   %extract = extractelement <2 x i32> %in1, i32 1
   %out = sdiv i32 %extract, 7
   store i32 %out, i32* %dest, align 4
@@ -179,7 +179,7 @@ define void @sdivCase(<2 x i32>* %addr1, i32* %dest) {
 }
 
 ; IR-BOTH-LABEL: @sremCase
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
 ; Scalar version:
 ; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 1
 ; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = srem i32 [[EXTRACT]], 7
@@ -190,7 +190,7 @@ define void @sdivCase(<2 x i32>* %addr1, i32* %dest) {
 ; IR-BOTH-NEXT: store i32 [[RES]], i32* %dest
 ; IR-BOTH-NEXT: ret 
 define void @sremCase(<2 x i32>* %addr1, i32* %dest) {
-  %in1 = load <2 x i32>* %addr1, align 8
+  %in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
   %extract = extractelement <2 x i32> %in1, i32 1
   %out = srem i32 %extract, 7
   store i32 %out, i32* %dest, align 4
@@ -198,7 +198,7 @@ define void @sremCase(<2 x i32>* %addr1, i32* %dest) {
 }
 
 ; IR-BOTH-LABEL: @fdivCase
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x float>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x float>, <2 x float>* %addr1
 ; Scalar version:  
 ; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x float> [[LOAD]], i32 1
 ; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = fdiv float [[EXTRACT]], 7.0
@@ -209,7 +209,7 @@ define void @sremCase(<2 x i32>* %addr1, i32* %dest) {
 ; IR-BOTH-NEXT: store float [[RES]], float* %dest
 ; IR-BOTH-NEXT: ret
 define void @fdivCase(<2 x float>* %addr1, float* %dest) {
-  %in1 = load <2 x float>* %addr1, align 8   
+  %in1 = load <2 x float>, <2 x float>* %addr1, align 8   
   %extract = extractelement <2 x float> %in1, i32 1
   %out = fdiv float %extract, 7.0
   store float %out, float* %dest, align 4
@@ -217,7 +217,7 @@ define void @fdivCase(<2 x float>* %addr1, float* %dest) {
 }
 
 ; IR-BOTH-LABEL: @fremCase
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x float>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x float>, <2 x float>* %addr1
 ; Scalar version:  
 ; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x float> [[LOAD]], i32 1
 ; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = frem float [[EXTRACT]], 7.0
@@ -228,7 +228,7 @@ define void @fdivCase(<2 x float>* %addr1, float* %dest) {
 ; IR-BOTH-NEXT: store float [[RES]], float* %dest
 ; IR-BOTH-NEXT: ret
 define void @fremCase(<2 x float>* %addr1, float* %dest) {
-  %in1 = load <2 x float>* %addr1, align 8   
+  %in1 = load <2 x float>, <2 x float>* %addr1, align 8   
   %extract = extractelement <2 x float> %in1, i32 1
   %out = frem float %extract, 7.0
   store float %out, float* %dest, align 4
@@ -238,13 +238,13 @@ define void @fremCase(<2 x float>* %addr1, float* %dest) {
 ; Check that we do not promote when we may introduce undefined behavior
 ; like division by zero.
 ; IR-BOTH-LABEL: @undefDivCase
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
 ; IR-BOTH-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 1
 ; IR-BOTH-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = udiv i32 7, [[EXTRACT]]
 ; IR-BOTH-NEXT: store i32 [[RES]], i32* %dest
 ; IR-BOTH-NEXT: ret
 define void @undefDivCase(<2 x i32>* %addr1, i32* %dest) {
-  %in1 = load <2 x i32>* %addr1, align 8
+  %in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
   %extract = extractelement <2 x i32> %in1, i32 1
   %out = udiv i32 7, %extract
   store i32 %out, i32* %dest, align 4
@@ -255,13 +255,13 @@ define void @undefDivCase(<2 x i32>* %addr1, i32* %dest) {
 ; Check that we do not promote when we may introduce undefined behavior
 ; like division by zero.
 ; IR-BOTH-LABEL: @undefRemCase
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
 ; IR-BOTH-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 1
 ; IR-BOTH-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = srem i32 7, [[EXTRACT]]
 ; IR-BOTH-NEXT: store i32 [[RES]], i32* %dest
 ; IR-BOTH-NEXT: ret
 define void @undefRemCase(<2 x i32>* %addr1, i32* %dest) {
-  %in1 = load <2 x i32>* %addr1, align 8
+  %in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
   %extract = extractelement <2 x i32> %in1, i32 1
   %out = srem i32 7, %extract
   store i32 %out, i32* %dest, align 4
@@ -271,7 +271,7 @@ define void @undefRemCase(<2 x i32>* %addr1, i32* %dest) {
 ; Check that we use an undef mask for undefined behavior if the fast-math
 ; flag is set.
 ; IR-BOTH-LABEL: @undefConstantFRemCaseWithFastMath
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x float>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x float>, <2 x float>* %addr1
 ; Scalar version:  
 ; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x float> [[LOAD]], i32 1
 ; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = frem nnan float [[EXTRACT]], 7.0
@@ -282,7 +282,7 @@ define void @undefRemCase(<2 x i32>* %addr1, i32* %dest) {
 ; IR-BOTH-NEXT: store float [[RES]], float* %dest
 ; IR-BOTH-NEXT: ret
 define void @undefConstantFRemCaseWithFastMath(<2 x float>* %addr1, float* %dest) {
-  %in1 = load <2 x float>* %addr1, align 8   
+  %in1 = load <2 x float>, <2 x float>* %addr1, align 8   
   %extract = extractelement <2 x float> %in1, i32 1
   %out = frem nnan float %extract, 7.0
   store float %out, float* %dest, align 4
@@ -292,7 +292,7 @@ define void @undefConstantFRemCaseWithFastMath(<2 x float>* %addr1, float* %dest
 ; Check that we use an undef mask for undefined behavior if the fast-math
 ; flag is set.
 ; IR-BOTH-LABEL: @undefVectorFRemCaseWithFastMath
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x float>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x float>, <2 x float>* %addr1
 ; Scalar version:  
 ; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x float> [[LOAD]], i32 1
 ; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = frem nnan float 7.000000e+00, [[EXTRACT]]
@@ -303,7 +303,7 @@ define void @undefConstantFRemCaseWithFastMath(<2 x float>* %addr1, float* %dest
 ; IR-BOTH-NEXT: store float [[RES]], float* %dest
 ; IR-BOTH-NEXT: ret
 define void @undefVectorFRemCaseWithFastMath(<2 x float>* %addr1, float* %dest) {
-  %in1 = load <2 x float>* %addr1, align 8   
+  %in1 = load <2 x float>, <2 x float>* %addr1, align 8   
   %extract = extractelement <2 x float> %in1, i32 1
   %out = frem nnan float 7.0, %extract
   store float %out, float* %dest, align 4
@@ -314,7 +314,7 @@ define void @undefVectorFRemCaseWithFastMath(<2 x float>* %addr1, float* %dest)
 ; This requires the STRESS mode, as floating point value are
 ; not promote on armv7.
 ; IR-BOTH-LABEL: @simpleOneInstructionPromotionFloat
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x float>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x float>, <2 x float>* %addr1
 ; Scalar version: 
 ; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x float> [[LOAD]], i32 1
 ; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = fadd float [[EXTRACT]], 1.0
@@ -325,7 +325,7 @@ define void @undefVectorFRemCaseWithFastMath(<2 x float>* %addr1, float* %dest)
 ; IR-BOTH-NEXT: store float [[RES]], float* %dest
 ; IR-BOTH-NEXT: ret
 define void @simpleOneInstructionPromotionFloat(<2 x float>* %addr1, float* %dest) {
-  %in1 = load <2 x float>* %addr1, align 8
+  %in1 = load <2 x float>, <2 x float>* %addr1, align 8
   %extract = extractelement <2 x float> %in1, i32 1
   %out = fadd float %extract, 1.0
   store float %out, float* %dest, align 4
@@ -337,7 +337,7 @@ define void @simpleOneInstructionPromotionFloat(<2 x float>* %addr1, float* %des
 ; This requires the STRESS modes, as variable index are expensive
 ; to lower.
 ; IR-BOTH-LABEL: @simpleOneInstructionPromotionVariableIdx
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
 ; Scalar version:
 ; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 %idx
 ; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = or i32 [[EXTRACT]], 1
@@ -348,7 +348,7 @@ define void @simpleOneInstructionPromotionFloat(<2 x float>* %addr1, float* %des
 ; IR-BOTH-NEXT: store i32 [[RES]], i32* %dest
 ; IR-BOTH-NEXT: ret
 define void @simpleOneInstructionPromotionVariableIdx(<2 x i32>* %addr1, i32* %dest, i32 %idx) {
-  %in1 = load <2 x i32>* %addr1, align 8
+  %in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
   %extract = extractelement <2 x i32> %in1, i32 %idx
   %out = or i32 %extract, 1
   store i32 %out, i32* %dest, align 4
@@ -360,7 +360,7 @@ define void @simpleOneInstructionPromotionVariableIdx(<2 x i32>* %addr1, i32* %d
 ; as legal or custom, althought the actual assembly is better if we were
 ; promoting it.
 ; IR-BOTH-LABEL: @simpleOneInstructionPromotion8x8
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <8 x i8>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <8 x i8>, <8 x i8>* %addr1
 ; Scalar version:  
 ; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <8 x i8> [[LOAD]], i32 1
 ; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = or i8 [[EXTRACT]], 1
@@ -371,7 +371,7 @@ define void @simpleOneInstructionPromotionVariableIdx(<2 x i32>* %addr1, i32* %d
 ; IR-BOTH-NEXT: store i8 [[RES]], i8* %dest
 ; IR-BOTH-NEXT: ret
 define void @simpleOneInstructionPromotion8x8(<8 x i8>* %addr1, i8* %dest) {
-  %in1 = load <8 x i8>* %addr1, align 8
+  %in1 = load <8 x i8>, <8 x i8>* %addr1, align 8
   %extract = extractelement <8 x i8> %in1, i32 1
   %out = or i8 %extract, 1
   store i8 %out, i8* %dest, align 4
@@ -381,7 +381,7 @@ define void @simpleOneInstructionPromotion8x8(<8 x i8>* %addr1, i8* %dest) {
 ; Check that we optimized the sequence correctly when it can be
 ; lowered on a Q register.
 ; IR-BOTH-LABEL: @simpleOneInstructionPromotion
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <4 x i32>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <4 x i32>, <4 x i32>* %addr1
 ; IR-BOTH-NEXT: [[VECTOR_OR:%[a-zA-Z_0-9-]+]] = or <4 x i32> [[LOAD]], <i32 undef, i32 1, i32 undef, i32 undef>
 ; IR-BOTH-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <4 x i32> [[VECTOR_OR]], i32 1
 ; IR-BOTH-NEXT: store i32 [[EXTRACT]], i32* %dest
@@ -395,7 +395,7 @@ define void @simpleOneInstructionPromotion8x8(<8 x i8>* %addr1, i8* %dest) {
 ; ASM-NEXT: vst1.32 {[[LOAD]][1]}, [r1]
 ; ASM-NEXT: bx
 define void @simpleOneInstructionPromotion4x32(<4 x i32>* %addr1, i32* %dest) {
-  %in1 = load <4 x i32>* %addr1, align 8
+  %in1 = load <4 x i32>, <4 x i32>* %addr1, align 8
   %extract = extractelement <4 x i32> %in1, i32 1
   %out = or i32 %extract, 1
   store i32 %out, i32* %dest, align 1
diff --git a/llvm/test/CodeGen/ARM/vector-spilling.ll b/llvm/test/CodeGen/ARM/vector-spilling.ll
index c138bc35516..b8058c8e871 100644
--- a/llvm/test/CodeGen/ARM/vector-spilling.ll
+++ b/llvm/test/CodeGen/ARM/vector-spilling.ll
@@ -11,16 +11,16 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 define void @test(<8 x i64>* %src) #0 {
 entry:
   %0 = getelementptr inbounds <8 x i64>, <8 x i64>* %src, i32 0
-  %1 = load <8 x i64>* %0, align 8
+  %1 = load <8 x i64>, <8 x i64>* %0, align 8
 
   %2 = getelementptr inbounds <8 x i64>, <8 x i64>* %src, i32 1
-  %3 = load <8 x i64>* %2, align 8
+  %3 = load <8 x i64>, <8 x i64>* %2, align 8
 
   %4 = getelementptr inbounds <8 x i64>, <8 x i64>* %src, i32 2
-  %5 = load <8 x i64>* %4, align 8
+  %5 = load <8 x i64>, <8 x i64>* %4, align 8
 
   %6 = getelementptr inbounds <8 x i64>, <8 x i64>* %src, i32 3
-  %7 = load <8 x i64>* %6, align 8
+  %7 = load <8 x i64>, <8 x i64>* %6, align 8
 
   %8 = shufflevector <8 x i64> %1, <8 x i64> %3, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
   %9 = shufflevector <8 x i64> %1, <8 x i64> %3, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
diff --git a/llvm/test/CodeGen/ARM/vector-store.ll b/llvm/test/CodeGen/ARM/vector-store.ll
index b5ac5bb9bac..30baa9a20dd 100644
--- a/llvm/test/CodeGen/ARM/vector-store.ll
+++ b/llvm/test/CodeGen/ARM/vector-store.ll
@@ -6,7 +6,7 @@ target triple = "thumbv7s-apple-ios8.0.0"
 define void @store_v8i8(<8 x i8>** %ptr, <8 x i8> %val) {
 ;CHECK-LABEL: store_v8i8:
 ;CHECK: str r1, [r0]
-	%A = load <8 x i8>** %ptr
+	%A = load <8 x i8>*, <8 x i8>** %ptr
 	store  <8 x i8> %val, <8 x i8>* %A, align 1
 	ret void
 }
@@ -14,7 +14,7 @@ define void @store_v8i8(<8 x i8>** %ptr, <8 x i8> %val) {
 define void @store_v8i8_update(<8 x i8>** %ptr, <8 x i8> %val) {
 ;CHECK-LABEL: store_v8i8_update:
 ;CHECK: vst1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <8 x i8>** %ptr
+	%A = load <8 x i8>*, <8 x i8>** %ptr
 	store  <8 x i8> %val, <8 x i8>* %A, align 1
 	%inc = getelementptr <8 x i8>, <8 x i8>* %A, i38 1
         store <8 x i8>* %inc, <8 x i8>** %ptr
@@ -24,7 +24,7 @@ define void @store_v8i8_update(<8 x i8>** %ptr, <8 x i8> %val) {
 define void @store_v4i16(<4 x i16>** %ptr, <4 x i16> %val) {
 ;CHECK-LABEL: store_v4i16:
 ;CHECK: str r1, [r0]
-	%A = load <4 x i16>** %ptr
+	%A = load <4 x i16>*, <4 x i16>** %ptr
 	store  <4 x i16> %val, <4 x i16>* %A, align 1
 	ret void
 }
@@ -32,7 +32,7 @@ define void @store_v4i16(<4 x i16>** %ptr, <4 x i16> %val) {
 define void @store_v4i16_update(<4 x i16>** %ptr, <4 x i16> %val) {
 ;CHECK-LABEL: store_v4i16_update:
 ;CHECK: vst1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <4 x i16>** %ptr
+	%A = load <4 x i16>*, <4 x i16>** %ptr
 	store  <4 x i16> %val, <4 x i16>* %A, align 1
 	%inc = getelementptr <4 x i16>, <4 x i16>* %A, i34 1
         store <4 x i16>* %inc, <4 x i16>** %ptr
@@ -42,7 +42,7 @@ define void @store_v4i16_update(<4 x i16>** %ptr, <4 x i16> %val) {
 define void @store_v2i32(<2 x i32>** %ptr, <2 x i32> %val) {
 ;CHECK-LABEL: store_v2i32:
 ;CHECK: str r1, [r0]
-	%A = load <2 x i32>** %ptr
+	%A = load <2 x i32>*, <2 x i32>** %ptr
 	store  <2 x i32> %val, <2 x i32>* %A, align 1
 	ret void
 }
@@ -50,7 +50,7 @@ define void @store_v2i32(<2 x i32>** %ptr, <2 x i32> %val) {
 define void @store_v2i32_update(<2 x i32>** %ptr, <2 x i32> %val) {
 ;CHECK-LABEL: store_v2i32_update:
 ;CHECK: vst1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <2 x i32>** %ptr
+	%A = load <2 x i32>*, <2 x i32>** %ptr
 	store  <2 x i32> %val, <2 x i32>* %A, align 1
 	%inc = getelementptr <2 x i32>, <2 x i32>* %A, i32 1
         store <2 x i32>* %inc, <2 x i32>** %ptr
@@ -60,7 +60,7 @@ define void @store_v2i32_update(<2 x i32>** %ptr, <2 x i32> %val) {
 define void @store_v2f32(<2 x float>** %ptr, <2 x float> %val) {
 ;CHECK-LABEL: store_v2f32:
 ;CHECK: str r1, [r0]
-	%A = load <2 x float>** %ptr
+	%A = load <2 x float>*, <2 x float>** %ptr
 	store  <2 x float> %val, <2 x float>* %A, align 1
 	ret void
 }
@@ -68,7 +68,7 @@ define void @store_v2f32(<2 x float>** %ptr, <2 x float> %val) {
 define void @store_v2f32_update(<2 x float>** %ptr, <2 x float> %val) {
 ;CHECK-LABEL: store_v2f32_update:
 ;CHECK: vst1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <2 x float>** %ptr
+	%A = load <2 x float>*, <2 x float>** %ptr
 	store  <2 x float> %val, <2 x float>* %A, align 1
 	%inc = getelementptr <2 x float>, <2 x float>* %A, i32 1
         store <2 x float>* %inc, <2 x float>** %ptr
@@ -78,7 +78,7 @@ define void @store_v2f32_update(<2 x float>** %ptr, <2 x float> %val) {
 define void @store_v1i64(<1 x i64>** %ptr, <1 x i64> %val) {
 ;CHECK-LABEL: store_v1i64:
 ;CHECK: str r1, [r0]
-	%A = load <1 x i64>** %ptr
+	%A = load <1 x i64>*, <1 x i64>** %ptr
 	store  <1 x i64> %val, <1 x i64>* %A, align 1
 	ret void
 }
@@ -86,7 +86,7 @@ define void @store_v1i64(<1 x i64>** %ptr, <1 x i64> %val) {
 define void @store_v1i64_update(<1 x i64>** %ptr, <1 x i64> %val) {
 ;CHECK-LABEL: store_v1i64_update:
 ;CHECK: vst1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <1 x i64>** %ptr
+	%A = load <1 x i64>*, <1 x i64>** %ptr
 	store  <1 x i64> %val, <1 x i64>* %A, align 1
 	%inc = getelementptr <1 x i64>, <1 x i64>* %A, i31 1
         store <1 x i64>* %inc, <1 x i64>** %ptr
@@ -96,7 +96,7 @@ define void @store_v1i64_update(<1 x i64>** %ptr, <1 x i64> %val) {
 define void @store_v16i8(<16 x i8>** %ptr, <16 x i8> %val) {
 ;CHECK-LABEL: store_v16i8:
 ;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
-	%A = load <16 x i8>** %ptr
+	%A = load <16 x i8>*, <16 x i8>** %ptr
 	store  <16 x i8> %val, <16 x i8>* %A, align 1
 	ret void
 }
@@ -104,7 +104,7 @@ define void @store_v16i8(<16 x i8>** %ptr, <16 x i8> %val) {
 define void @store_v16i8_update(<16 x i8>** %ptr, <16 x i8> %val) {
 ;CHECK-LABEL: store_v16i8_update:
 ;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <16 x i8>** %ptr
+	%A = load <16 x i8>*, <16 x i8>** %ptr
 	store  <16 x i8> %val, <16 x i8>* %A, align 1
 	%inc = getelementptr <16 x i8>, <16 x i8>* %A, i316 1
         store <16 x i8>* %inc, <16 x i8>** %ptr
@@ -114,7 +114,7 @@ define void @store_v16i8_update(<16 x i8>** %ptr, <16 x i8> %val) {
 define void @store_v8i16(<8 x i16>** %ptr, <8 x i16> %val) {
 ;CHECK-LABEL: store_v8i16:
 ;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
-	%A = load <8 x i16>** %ptr
+	%A = load <8 x i16>*, <8 x i16>** %ptr
 	store  <8 x i16> %val, <8 x i16>* %A, align 1
 	ret void
 }
@@ -122,7 +122,7 @@ define void @store_v8i16(<8 x i16>** %ptr, <8 x i16> %val) {
 define void @store_v8i16_update(<8 x i16>** %ptr, <8 x i16> %val) {
 ;CHECK-LABEL: store_v8i16_update:
 ;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <8 x i16>** %ptr
+	%A = load <8 x i16>*, <8 x i16>** %ptr
 	store  <8 x i16> %val, <8 x i16>* %A, align 1
 	%inc = getelementptr <8 x i16>, <8 x i16>* %A, i38 1
         store <8 x i16>* %inc, <8 x i16>** %ptr
@@ -132,7 +132,7 @@ define void @store_v8i16_update(<8 x i16>** %ptr, <8 x i16> %val) {
 define void @store_v4i32(<4 x i32>** %ptr, <4 x i32> %val) {
 ;CHECK-LABEL: store_v4i32:
 ;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
-	%A = load <4 x i32>** %ptr
+	%A = load <4 x i32>*, <4 x i32>** %ptr
 	store  <4 x i32> %val, <4 x i32>* %A, align 1
 	ret void
 }
@@ -140,7 +140,7 @@ define void @store_v4i32(<4 x i32>** %ptr, <4 x i32> %val) {
 define void @store_v4i32_update(<4 x i32>** %ptr, <4 x i32> %val) {
 ;CHECK-LABEL: store_v4i32_update:
 ;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <4 x i32>** %ptr
+	%A = load <4 x i32>*, <4 x i32>** %ptr
 	store  <4 x i32> %val, <4 x i32>* %A, align 1
 	%inc = getelementptr <4 x i32>, <4 x i32>* %A, i34 1
         store <4 x i32>* %inc, <4 x i32>** %ptr
@@ -150,7 +150,7 @@ define void @store_v4i32_update(<4 x i32>** %ptr, <4 x i32> %val) {
 define void @store_v4f32(<4 x float>** %ptr, <4 x float> %val) {
 ;CHECK-LABEL: store_v4f32:
 ;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
-	%A = load <4 x float>** %ptr
+	%A = load <4 x float>*, <4 x float>** %ptr
 	store  <4 x float> %val, <4 x float>* %A, align 1
 	ret void
 }
@@ -158,7 +158,7 @@ define void @store_v4f32(<4 x float>** %ptr, <4 x float> %val) {
 define void @store_v4f32_update(<4 x float>** %ptr, <4 x float> %val) {
 ;CHECK-LABEL: store_v4f32_update:
 ;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <4 x float>** %ptr
+	%A = load <4 x float>*, <4 x float>** %ptr
 	store  <4 x float> %val, <4 x float>* %A, align 1
 	%inc = getelementptr <4 x float>, <4 x float>* %A, i34 1
         store <4 x float>* %inc, <4 x float>** %ptr
@@ -168,7 +168,7 @@ define void @store_v4f32_update(<4 x float>** %ptr, <4 x float> %val) {
 define void @store_v2i64(<2 x i64>** %ptr, <2 x i64> %val) {
 ;CHECK-LABEL: store_v2i64:
 ;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
-	%A = load <2 x i64>** %ptr
+	%A = load <2 x i64>*, <2 x i64>** %ptr
 	store  <2 x i64> %val, <2 x i64>* %A, align 1
 	ret void
 }
@@ -176,7 +176,7 @@ define void @store_v2i64(<2 x i64>** %ptr, <2 x i64> %val) {
 define void @store_v2i64_update(<2 x i64>** %ptr, <2 x i64> %val) {
 ;CHECK-LABEL: store_v2i64_update:
 ;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <2 x i64>** %ptr
+	%A = load <2 x i64>*, <2 x i64>** %ptr
 	store  <2 x i64> %val, <2 x i64>* %A, align 1
 	%inc = getelementptr <2 x i64>, <2 x i64>* %A, i32 1
         store <2 x i64>* %inc, <2 x i64>** %ptr
@@ -186,7 +186,7 @@ define void @store_v2i64_update(<2 x i64>** %ptr, <2 x i64> %val) {
 define void @store_v2i64_update_aligned2(<2 x i64>** %ptr, <2 x i64> %val) {
 ;CHECK-LABEL: store_v2i64_update_aligned2:
 ;CHECK: vst1.16 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <2 x i64>** %ptr
+	%A = load <2 x i64>*, <2 x i64>** %ptr
 	store  <2 x i64> %val, <2 x i64>* %A, align 2
 	%inc = getelementptr <2 x i64>, <2 x i64>* %A, i32 1
         store <2 x i64>* %inc, <2 x i64>** %ptr
@@ -196,7 +196,7 @@ define void @store_v2i64_update_aligned2(<2 x i64>** %ptr, <2 x i64> %val) {
 define void @store_v2i64_update_aligned4(<2 x i64>** %ptr, <2 x i64> %val) {
 ;CHECK-LABEL: store_v2i64_update_aligned4:
 ;CHECK: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <2 x i64>** %ptr
+	%A = load <2 x i64>*, <2 x i64>** %ptr
 	store  <2 x i64> %val, <2 x i64>* %A, align 4
 	%inc = getelementptr <2 x i64>, <2 x i64>* %A, i32 1
         store <2 x i64>* %inc, <2 x i64>** %ptr
@@ -206,7 +206,7 @@ define void @store_v2i64_update_aligned4(<2 x i64>** %ptr, <2 x i64> %val) {
 define void @store_v2i64_update_aligned8(<2 x i64>** %ptr, <2 x i64> %val) {
 ;CHECK-LABEL: store_v2i64_update_aligned8:
 ;CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <2 x i64>** %ptr
+	%A = load <2 x i64>*, <2 x i64>** %ptr
 	store  <2 x i64> %val, <2 x i64>* %A, align 8
 	%inc = getelementptr <2 x i64>, <2 x i64>* %A, i32 1
         store <2 x i64>* %inc, <2 x i64>** %ptr
@@ -216,7 +216,7 @@ define void @store_v2i64_update_aligned8(<2 x i64>** %ptr, <2 x i64> %val) {
 define void @store_v2i64_update_aligned16(<2 x i64>** %ptr, <2 x i64> %val) {
 ;CHECK-LABEL: store_v2i64_update_aligned16:
 ;CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}:128]!
-	%A = load <2 x i64>** %ptr
+	%A = load <2 x i64>*, <2 x i64>** %ptr
 	store  <2 x i64> %val, <2 x i64>* %A, align 16
 	%inc = getelementptr <2 x i64>, <2 x i64>* %A, i32 1
         store <2 x i64>* %inc, <2 x i64>** %ptr
@@ -232,7 +232,7 @@ define void @truncstore_v4i32tov4i8(<4 x i8>** %ptr, <4 x i32> %val) {
 ;CHECK: vuzp.8  [[VECLO]], {{d[0-9]+}}
 ;CHECK: ldr     r[[PTRREG:[0-9]+]], [r0]
 ;CHECK: vst1.32 {[[VECLO]][0]}, [r[[PTRREG]]:32]
-	%A = load <4 x i8>** %ptr
+	%A = load <4 x i8>*, <4 x i8>** %ptr
         %trunc = trunc <4 x i32> %val to <4 x i8>
 	store  <4 x i8> %trunc, <4 x i8>* %A, align 4
 	ret void
@@ -249,7 +249,7 @@ define void @truncstore_v4i32tov4i8_fake_update(<4 x i8>** %ptr, <4 x i32> %val)
 ;CHECK: ldr     r[[PTRREG:[0-9]+]], [r0]
 ;CHECK: vst1.32 {[[VECLO]][0]}, [r[[PTRREG]]:32], [[IMM16]]
 ;CHECK: str     r[[PTRREG]], [r0]
-	%A = load <4 x i8>** %ptr
+	%A = load <4 x i8>*, <4 x i8>** %ptr
         %trunc = trunc <4 x i32> %val to <4 x i8>
 	store  <4 x i8> %trunc, <4 x i8>* %A, align 4
 	%inc = getelementptr <4 x i8>, <4 x i8>* %A, i38 4
diff --git a/llvm/test/CodeGen/ARM/vext.ll b/llvm/test/CodeGen/ARM/vext.ll
index 4407451244e..72ecf0ef062 100644
--- a/llvm/test/CodeGen/ARM/vext.ll
+++ b/llvm/test/CodeGen/ARM/vext.ll
@@ -3,8 +3,8 @@
 define <8 x i8> @test_vextd(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: test_vextd:
 ;CHECK: vext
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
 	ret <8 x i8> %tmp3
 }
@@ -12,8 +12,8 @@ define <8 x i8> @test_vextd(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <8 x i8> @test_vextRd(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: test_vextRd:
 ;CHECK: vext
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4>
 	ret <8 x i8> %tmp3
 }
@@ -21,8 +21,8 @@ define <8 x i8> @test_vextRd(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <16 x i8> @test_vextq(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: test_vextq:
 ;CHECK: vext
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
 	ret <16 x i8> %tmp3
 }
@@ -30,8 +30,8 @@ define <16 x i8> @test_vextq(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 define <16 x i8> @test_vextRq(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: test_vextRq:
 ;CHECK: vext
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
 	ret <16 x i8> %tmp3
 }
@@ -39,8 +39,8 @@ define <16 x i8> @test_vextRq(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 define <4 x i16> @test_vextd16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: test_vextd16:
 ;CHECK: vext
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
 	ret <4 x i16> %tmp3
 }
@@ -48,8 +48,8 @@ define <4 x i16> @test_vextd16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <4 x i32> @test_vextq32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: test_vextq32:
 ;CHECK: vext
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
 	ret <4 x i32> %tmp3
 }
@@ -59,8 +59,8 @@ define <4 x i32> @test_vextq32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 define <8 x i8> @test_vextd_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: test_vextd_undef:
 ;CHECK: vext
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 3, i32 undef, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10>
 	ret <8 x i8> %tmp3
 }
@@ -68,8 +68,8 @@ define <8 x i8> @test_vextd_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <16 x i8> @test_vextRq_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: test_vextRq_undef:
 ;CHECK: vext
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 23, i32 24, i32 25, i32 26, i32 undef, i32 undef, i32 29, i32 30, i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 undef, i32 6>
 	ret <16 x i8> %tmp3
 }
@@ -118,8 +118,8 @@ define <4 x i16> @test_interleaved(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vext.16
 ;CHECK-NOT: vext.16
 ;CHECK: vzip.16
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i16>* %B
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
+        %tmp2 = load <8 x i16>, <8 x i16>* %B
         %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <4 x i32> <i32 3, i32 8, i32 5, i32 9>
         ret <4 x i16> %tmp3
 }
@@ -128,8 +128,8 @@ define <4 x i16> @test_interleaved(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 define <4 x i16> @test_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: test_undef:
 ;CHECK: vzip.16
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i16>* %B
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
+        %tmp2 = load <8 x i16>, <8 x i16>* %B
         %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <4 x i32> <i32 undef, i32 8, i32 5, i32 9>
         ret <4 x i16> %tmp3
 }
@@ -143,7 +143,7 @@ define <4 x i16> @test_multisource(<32 x i16>* %B) nounwind {
 ;CHECK: vmov.16 [[REG]][1]
 ;CHECK: vmov.16 [[REG]][2]
 ;CHECK: vmov.16 [[REG]][3]
-        %tmp1 = load <32 x i16>* %B
+        %tmp1 = load <32 x i16>, <32 x i16>* %B
         %tmp2 = shufflevector <32 x i16> %tmp1, <32 x i16> undef, <4 x i32> <i32 0, i32 8, i32 16, i32 24>
         ret <4 x i16> %tmp2
 }
@@ -156,7 +156,7 @@ define <4 x i16> @test_largespan(<8 x i16>* %B) nounwind {
 ;CHECK: vmov.16 [[REG]][1]
 ;CHECK: vmov.16 [[REG]][2]
 ;CHECK: vmov.16 [[REG]][3]
-        %tmp1 = load <8 x i16>* %B
+        %tmp1 = load <8 x i16>, <8 x i16>* %B
         %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
         ret <4 x i16> %tmp2
 }
@@ -174,8 +174,8 @@ define <8 x i16> @test_illegal(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vmov.16 [[REG2]][1]
 ;CHECK: vmov.16 [[REG2]][2]
 ;CHECK: vmov.16 [[REG2]][3]
-       %tmp1 = load <8 x i16>* %A
-       %tmp2 = load <8 x i16>* %B
+       %tmp1 = load <8 x i16>, <8 x i16>* %A
+       %tmp2 = load <8 x i16>, <8 x i16>* %B
        %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 7, i32 5, i32 13, i32 3, i32 2, i32 2, i32 9>
        ret <8 x i16> %tmp3
 }
@@ -185,7 +185,7 @@ define <8 x i16> @test_illegal(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 define arm_aapcscc void @test_elem_mismatch(<2 x i64>* nocapture %src, <4 x i16>* nocapture %dest) nounwind {
 ; CHECK-LABEL: test_elem_mismatch:
 ; CHECK: vstr
-  %tmp0 = load <2 x i64>* %src, align 16
+  %tmp0 = load <2 x i64>, <2 x i64>* %src, align 16
   %tmp1 = bitcast <2 x i64> %tmp0 to <4 x i32>
   %tmp2 = extractelement <4 x i32> %tmp1, i32 0
   %tmp3 = extractelement <4 x i32> %tmp1, i32 2
diff --git a/llvm/test/CodeGen/ARM/vfcmp.ll b/llvm/test/CodeGen/ARM/vfcmp.ll
index 4b2fea9baa0..8673b7d639d 100644
--- a/llvm/test/CodeGen/ARM/vfcmp.ll
+++ b/llvm/test/CodeGen/ARM/vfcmp.ll
@@ -7,8 +7,8 @@ define <2 x i32> @vcunef32(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vcunef32:
 ;CHECK: vceq.f32
 ;CHECK-NEXT: vmvn
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = fcmp une <2 x float> %tmp1, %tmp2
         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -18,8 +18,8 @@ define <2 x i32> @vcunef32(<2 x float>* %A, <2 x float>* %B) nounwind {
 define <2 x i32> @vcoltf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vcoltf32:
 ;CHECK: vcgt.f32
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = fcmp olt <2 x float> %tmp1, %tmp2
         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -29,8 +29,8 @@ define <2 x i32> @vcoltf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 define <2 x i32> @vcolef32(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vcolef32:
 ;CHECK: vcge.f32
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = fcmp ole <2 x float> %tmp1, %tmp2
         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -41,8 +41,8 @@ define <2 x i32> @vcugef32(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vcugef32:
 ;CHECK: vcgt.f32
 ;CHECK-NEXT: vmvn
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = fcmp uge <2 x float> %tmp1, %tmp2
         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -53,8 +53,8 @@ define <2 x i32> @vculef32(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vculef32:
 ;CHECK: vcgt.f32
 ;CHECK-NEXT: vmvn
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = fcmp ule <2 x float> %tmp1, %tmp2
         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -65,8 +65,8 @@ define <2 x i32> @vcugtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vcugtf32:
 ;CHECK: vcge.f32
 ;CHECK-NEXT: vmvn
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = fcmp ugt <2 x float> %tmp1, %tmp2
         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -77,8 +77,8 @@ define <2 x i32> @vcultf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vcultf32:
 ;CHECK: vcge.f32
 ;CHECK-NEXT: vmvn
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = fcmp ult <2 x float> %tmp1, %tmp2
         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -91,8 +91,8 @@ define <2 x i32> @vcueqf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-NEXT: vcgt.f32
 ;CHECK-NEXT: vorr
 ;CHECK-NEXT: vmvn
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = fcmp ueq <2 x float> %tmp1, %tmp2
         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -104,8 +104,8 @@ define <2 x i32> @vconef32(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK: vcgt.f32
 ;CHECK-NEXT: vcgt.f32
 ;CHECK-NEXT: vorr
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = fcmp one <2 x float> %tmp1, %tmp2
         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -118,8 +118,8 @@ define <2 x i32> @vcunof32(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-NEXT: vcgt.f32
 ;CHECK-NEXT: vorr
 ;CHECK-NEXT: vmvn
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = fcmp uno <2 x float> %tmp1, %tmp2
         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -131,8 +131,8 @@ define <2 x i32> @vcordf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK: vcge.f32
 ;CHECK-NEXT: vcgt.f32
 ;CHECK-NEXT: vorr
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = fcmp ord <2 x float> %tmp1, %tmp2
         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
diff --git a/llvm/test/CodeGen/ARM/vfp.ll b/llvm/test/CodeGen/ARM/vfp.ll
index 57ff9d3742e..31b55e8571d 100644
--- a/llvm/test/CodeGen/ARM/vfp.ll
+++ b/llvm/test/CodeGen/ARM/vfp.ll
@@ -2,8 +2,8 @@
 ; RUN: llc < %s -mtriple=arm-apple-ios -mattr=+vfp2 -disable-post-ra -regalloc=basic | FileCheck %s
 
 define void @test(float* %P, double* %D) {
-	%A = load float* %P		; <float> [#uses=1]
-	%B = load double* %D		; <double> [#uses=1]
+	%A = load float, float* %P		; <float> [#uses=1]
+	%B = load double, double* %D		; <double> [#uses=1]
 	store float %A, float* %P
 	store double %B, double* %D
 	ret void
@@ -15,11 +15,11 @@ declare double @fabs(double)
 
 define void @test_abs(float* %P, double* %D) {
 ;CHECK-LABEL: test_abs:
-	%a = load float* %P		; <float> [#uses=1]
+	%a = load float, float* %P		; <float> [#uses=1]
 ;CHECK: vabs.f32
 	%b = call float @fabsf( float %a ) readnone	; <float> [#uses=1]
 	store float %b, float* %P
-	%A = load double* %D		; <double> [#uses=1]
+	%A = load double, double* %D		; <double> [#uses=1]
 ;CHECK: vabs.f64
 	%B = call double @fabs( double %A ) readnone	; <double> [#uses=1]
 	store double %B, double* %D
@@ -28,10 +28,10 @@ define void @test_abs(float* %P, double* %D) {
 
 define void @test_add(float* %P, double* %D) {
 ;CHECK-LABEL: test_add:
-	%a = load float* %P		; <float> [#uses=2]
+	%a = load float, float* %P		; <float> [#uses=2]
 	%b = fadd float %a, %a		; <float> [#uses=1]
 	store float %b, float* %P
-	%A = load double* %D		; <double> [#uses=2]
+	%A = load double, double* %D		; <double> [#uses=2]
 	%B = fadd double %A, %A		; <double> [#uses=1]
 	store double %B, double* %D
 	ret void
@@ -39,11 +39,11 @@ define void @test_add(float* %P, double* %D) {
 
 define void @test_ext_round(float* %P, double* %D) {
 ;CHECK-LABEL: test_ext_round:
-	%a = load float* %P		; <float> [#uses=1]
+	%a = load float, float* %P		; <float> [#uses=1]
 ;CHECK: vcvt.f64.f32
 ;CHECK: vcvt.f32.f64
 	%b = fpext float %a to double		; <double> [#uses=1]
-	%A = load double* %D		; <double> [#uses=1]
+	%A = load double, double* %D		; <double> [#uses=1]
 	%B = fptrunc double %A to float		; <float> [#uses=1]
 	store double %b, double* %D
 	store float %B, float* %P
@@ -52,9 +52,9 @@ define void @test_ext_round(float* %P, double* %D) {
 
 define void @test_fma(float* %P1, float* %P2, float* %P3) {
 ;CHECK-LABEL: test_fma:
-	%a1 = load float* %P1		; <float> [#uses=1]
-	%a2 = load float* %P2		; <float> [#uses=1]
-	%a3 = load float* %P3		; <float> [#uses=1]
+	%a1 = load float, float* %P1		; <float> [#uses=1]
+	%a2 = load float, float* %P2		; <float> [#uses=1]
+	%a3 = load float, float* %P3		; <float> [#uses=1]
 ;CHECK: vnmls.f32
 	%X = fmul float %a1, %a2		; <float> [#uses=1]
 	%Y = fsub float %X, %a3		; <float> [#uses=1]
@@ -64,7 +64,7 @@ define void @test_fma(float* %P1, float* %P2, float* %P3) {
 
 define i32 @test_ftoi(float* %P1) {
 ;CHECK-LABEL: test_ftoi:
-	%a1 = load float* %P1		; <float> [#uses=1]
+	%a1 = load float, float* %P1		; <float> [#uses=1]
 ;CHECK: vcvt.s32.f32
 	%b1 = fptosi float %a1 to i32		; <i32> [#uses=1]
 	ret i32 %b1
@@ -72,7 +72,7 @@ define i32 @test_ftoi(float* %P1) {
 
 define i32 @test_ftou(float* %P1) {
 ;CHECK-LABEL: test_ftou:
-	%a1 = load float* %P1		; <float> [#uses=1]
+	%a1 = load float, float* %P1		; <float> [#uses=1]
 ;CHECK: vcvt.u32.f32
 	%b1 = fptoui float %a1 to i32		; <i32> [#uses=1]
 	ret i32 %b1
@@ -80,7 +80,7 @@ define i32 @test_ftou(float* %P1) {
 
 define i32 @test_dtoi(double* %P1) {
 ;CHECK-LABEL: test_dtoi:
-	%a1 = load double* %P1		; <double> [#uses=1]
+	%a1 = load double, double* %P1		; <double> [#uses=1]
 ;CHECK: vcvt.s32.f64
 	%b1 = fptosi double %a1 to i32		; <i32> [#uses=1]
 	ret i32 %b1
@@ -88,7 +88,7 @@ define i32 @test_dtoi(double* %P1) {
 
 define i32 @test_dtou(double* %P1) {
 ;CHECK-LABEL: test_dtou:
-	%a1 = load double* %P1		; <double> [#uses=1]
+	%a1 = load double, double* %P1		; <double> [#uses=1]
 ;CHECK: vcvt.u32.f64
 	%b1 = fptoui double %a1 to i32		; <i32> [#uses=1]
 	ret i32 %b1
@@ -113,9 +113,9 @@ define void @test_utod2(double* %P1, i8 %X) {
 define void @test_cmp(float* %glob, i32 %X) {
 ;CHECK-LABEL: test_cmp:
 entry:
-	%tmp = load float* %glob		; <float> [#uses=2]
+	%tmp = load float, float* %glob		; <float> [#uses=2]
 	%tmp3 = getelementptr float, float* %glob, i32 2		; <float*> [#uses=1]
-	%tmp4 = load float* %tmp3		; <float> [#uses=2]
+	%tmp4 = load float, float* %tmp3		; <float> [#uses=2]
 	%tmp.upgrd.1 = fcmp oeq float %tmp, %tmp4		; <i1> [#uses=1]
 	%tmp5 = fcmp uno float %tmp, %tmp4		; <i1> [#uses=1]
 	%tmp6 = or i1 %tmp.upgrd.1, %tmp5		; <i1> [#uses=1]
@@ -141,7 +141,7 @@ declare i32 @baz(...)
 define void @test_cmpfp0(float* %glob, i32 %X) {
 ;CHECK-LABEL: test_cmpfp0:
 entry:
-	%tmp = load float* %glob		; <float> [#uses=1]
+	%tmp = load float, float* %glob		; <float> [#uses=1]
 ;CHECK: vcmpe.f32
 	%tmp.upgrd.3 = fcmp ogt float %tmp, 0.000000e+00		; <i1> [#uses=1]
 	br i1 %tmp.upgrd.3, label %cond_true, label %cond_false
diff --git a/llvm/test/CodeGen/ARM/vget_lane.ll b/llvm/test/CodeGen/ARM/vget_lane.ll
index 2518ee2278c..d4cbfad5be6 100644
--- a/llvm/test/CodeGen/ARM/vget_lane.ll
+++ b/llvm/test/CodeGen/ARM/vget_lane.ll
@@ -5,7 +5,7 @@ target triple = "thumbv7-elf"
 define i32 @vget_lanes8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vget_lanes8:
 ;CHECK: vmov.s8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = extractelement <8 x i8> %tmp1, i32 1
 	%tmp3 = sext i8 %tmp2 to i32
 	ret i32 %tmp3
@@ -14,7 +14,7 @@ define i32 @vget_lanes8(<8 x i8>* %A) nounwind {
 define i32 @vget_lanes16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vget_lanes16:
 ;CHECK: vmov.s16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = extractelement <4 x i16> %tmp1, i32 1
 	%tmp3 = sext i16 %tmp2 to i32
 	ret i32 %tmp3
@@ -23,7 +23,7 @@ define i32 @vget_lanes16(<4 x i16>* %A) nounwind {
 define i32 @vget_laneu8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vget_laneu8:
 ;CHECK: vmov.u8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = extractelement <8 x i8> %tmp1, i32 1
 	%tmp3 = zext i8 %tmp2 to i32
 	ret i32 %tmp3
@@ -32,7 +32,7 @@ define i32 @vget_laneu8(<8 x i8>* %A) nounwind {
 define i32 @vget_laneu16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vget_laneu16:
 ;CHECK: vmov.u16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = extractelement <4 x i16> %tmp1, i32 1
 	%tmp3 = zext i16 %tmp2 to i32
 	ret i32 %tmp3
@@ -42,7 +42,7 @@ define i32 @vget_laneu16(<4 x i16>* %A) nounwind {
 define i32 @vget_lanei32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vget_lanei32:
 ;CHECK: vmov.32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = add <2 x i32> %tmp1, %tmp1
 	%tmp3 = extractelement <2 x i32> %tmp2, i32 1
 	ret i32 %tmp3
@@ -51,7 +51,7 @@ define i32 @vget_lanei32(<2 x i32>* %A) nounwind {
 define i32 @vgetQ_lanes8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: vgetQ_lanes8:
 ;CHECK: vmov.s8
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = extractelement <16 x i8> %tmp1, i32 1
 	%tmp3 = sext i8 %tmp2 to i32
 	ret i32 %tmp3
@@ -60,7 +60,7 @@ define i32 @vgetQ_lanes8(<16 x i8>* %A) nounwind {
 define i32 @vgetQ_lanes16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vgetQ_lanes16:
 ;CHECK: vmov.s16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = extractelement <8 x i16> %tmp1, i32 1
 	%tmp3 = sext i16 %tmp2 to i32
 	ret i32 %tmp3
@@ -69,7 +69,7 @@ define i32 @vgetQ_lanes16(<8 x i16>* %A) nounwind {
 define i32 @vgetQ_laneu8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: vgetQ_laneu8:
 ;CHECK: vmov.u8
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = extractelement <16 x i8> %tmp1, i32 1
 	%tmp3 = zext i8 %tmp2 to i32
 	ret i32 %tmp3
@@ -78,7 +78,7 @@ define i32 @vgetQ_laneu8(<16 x i8>* %A) nounwind {
 define i32 @vgetQ_laneu16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vgetQ_laneu16:
 ;CHECK: vmov.u16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = extractelement <8 x i16> %tmp1, i32 1
 	%tmp3 = zext i16 %tmp2 to i32
 	ret i32 %tmp3
@@ -88,7 +88,7 @@ define i32 @vgetQ_laneu16(<8 x i16>* %A) nounwind {
 define i32 @vgetQ_lanei32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vgetQ_lanei32:
 ;CHECK: vmov.32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = add <4 x i32> %tmp1, %tmp1
 	%tmp3 = extractelement <4 x i32> %tmp2, i32 1
 	ret i32 %tmp3
@@ -100,7 +100,7 @@ entry:
   %arg0_uint16x4_t = alloca <4 x i16>             ; <<4 x i16>*> [#uses=1]
   %out_uint16_t = alloca i16                      ; <i16*> [#uses=1]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-  %0 = load <4 x i16>* %arg0_uint16x4_t, align 8  ; <<4 x i16>> [#uses=1]
+  %0 = load <4 x i16>, <4 x i16>* %arg0_uint16x4_t, align 8  ; <<4 x i16>> [#uses=1]
   %1 = extractelement <4 x i16> %0, i32 1         ; <i16> [#uses=1]
   %2 = add i16 %1, %1
   store i16 %2, i16* %out_uint16_t, align 2
@@ -116,7 +116,7 @@ entry:
   %arg0_uint8x8_t = alloca <8 x i8>               ; <<8 x i8>*> [#uses=1]
   %out_uint8_t = alloca i8                        ; <i8*> [#uses=1]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-  %0 = load <8 x i8>* %arg0_uint8x8_t, align 8    ; <<8 x i8>> [#uses=1]
+  %0 = load <8 x i8>, <8 x i8>* %arg0_uint8x8_t, align 8    ; <<8 x i8>> [#uses=1]
   %1 = extractelement <8 x i8> %0, i32 1          ; <i8> [#uses=1]
   %2 = add i8 %1, %1
   store i8 %2, i8* %out_uint8_t, align 1
@@ -132,7 +132,7 @@ entry:
   %arg0_uint16x8_t = alloca <8 x i16>             ; <<8 x i16>*> [#uses=1]
   %out_uint16_t = alloca i16                      ; <i16*> [#uses=1]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-  %0 = load <8 x i16>* %arg0_uint16x8_t, align 16 ; <<8 x i16>> [#uses=1]
+  %0 = load <8 x i16>, <8 x i16>* %arg0_uint16x8_t, align 16 ; <<8 x i16>> [#uses=1]
   %1 = extractelement <8 x i16> %0, i32 1         ; <i16> [#uses=1]
   %2 = add i16 %1, %1
   store i16 %2, i16* %out_uint16_t, align 2
@@ -148,7 +148,7 @@ entry:
   %arg0_uint8x16_t = alloca <16 x i8>             ; <<16 x i8>*> [#uses=1]
   %out_uint8_t = alloca i8                        ; <i8*> [#uses=1]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-  %0 = load <16 x i8>* %arg0_uint8x16_t, align 16 ; <<16 x i8>> [#uses=1]
+  %0 = load <16 x i8>, <16 x i8>* %arg0_uint8x16_t, align 16 ; <<16 x i8>> [#uses=1]
   %1 = extractelement <16 x i8> %0, i32 1         ; <i8> [#uses=1]
   %2 = add i8 %1, %1
   store i8 %2, i8* %out_uint8_t, align 1
@@ -161,7 +161,7 @@ return:                                           ; preds = %entry
 define <8 x i8> @vset_lane8(<8 x i8>* %A, i8 %B) nounwind {
 ;CHECK-LABEL: vset_lane8:
 ;CHECK: vmov.8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = insertelement <8 x i8> %tmp1, i8 %B, i32 1
 	ret <8 x i8> %tmp2
 }
@@ -169,7 +169,7 @@ define <8 x i8> @vset_lane8(<8 x i8>* %A, i8 %B) nounwind {
 define <4 x i16> @vset_lane16(<4 x i16>* %A, i16 %B) nounwind {
 ;CHECK-LABEL: vset_lane16:
 ;CHECK: vmov.16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = insertelement <4 x i16> %tmp1, i16 %B, i32 1
 	ret <4 x i16> %tmp2
 }
@@ -177,7 +177,7 @@ define <4 x i16> @vset_lane16(<4 x i16>* %A, i16 %B) nounwind {
 define <2 x i32> @vset_lane32(<2 x i32>* %A, i32 %B) nounwind {
 ;CHECK-LABEL: vset_lane32:
 ;CHECK: vmov.32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = insertelement <2 x i32> %tmp1, i32 %B, i32 1
 	ret <2 x i32> %tmp2
 }
@@ -185,7 +185,7 @@ define <2 x i32> @vset_lane32(<2 x i32>* %A, i32 %B) nounwind {
 define <16 x i8> @vsetQ_lane8(<16 x i8>* %A, i8 %B) nounwind {
 ;CHECK-LABEL: vsetQ_lane8:
 ;CHECK: vmov.8
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = insertelement <16 x i8> %tmp1, i8 %B, i32 1
 	ret <16 x i8> %tmp2
 }
@@ -193,7 +193,7 @@ define <16 x i8> @vsetQ_lane8(<16 x i8>* %A, i8 %B) nounwind {
 define <8 x i16> @vsetQ_lane16(<8 x i16>* %A, i16 %B) nounwind {
 ;CHECK-LABEL: vsetQ_lane16:
 ;CHECK: vmov.16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = insertelement <8 x i16> %tmp1, i16 %B, i32 1
 	ret <8 x i16> %tmp2
 }
@@ -201,7 +201,7 @@ define <8 x i16> @vsetQ_lane16(<8 x i16>* %A, i16 %B) nounwind {
 define <4 x i32> @vsetQ_lane32(<4 x i32>* %A, i32 %B) nounwind {
 ;CHECK-LABEL: vsetQ_lane32:
 ;CHECK: vmov.32 d{{.*}}[1], r1
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = insertelement <4 x i32> %tmp1, i32 %B, i32 1
 	ret <4 x i32> %tmp2
 }
@@ -219,14 +219,14 @@ entry:
 ; be an immediate constant.  Make sure a variable lane number is handled.
 
 define i32 @vget_variable_lanes8(<8 x i8>* %A, i32 %B) nounwind {
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = extractelement <8 x i8> %tmp1, i32 %B
 	%tmp3 = sext i8 %tmp2 to i32
 	ret i32 %tmp3
 }
 
 define i32 @vgetQ_variable_lanei32(<4 x i32>* %A, i32 %B) nounwind {
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = add <4 x i32> %tmp1, %tmp1
 	%tmp3 = extractelement <4 x i32> %tmp2, i32 %B
 	ret i32 %tmp3
diff --git a/llvm/test/CodeGen/ARM/vhadd.ll b/llvm/test/CodeGen/ARM/vhadd.ll
index 6183db3702b..01e239d5c73 100644
--- a/llvm/test/CodeGen/ARM/vhadd.ll
+++ b/llvm/test/CodeGen/ARM/vhadd.ll
@@ -3,8 +3,8 @@
 define <8 x i8> @vhadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vhadds8:
 ;CHECK: vhadd.s8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vhadds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -12,8 +12,8 @@ define <8 x i8> @vhadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vhadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vhadds16:
 ;CHECK: vhadd.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -21,8 +21,8 @@ define <4 x i16> @vhadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @vhadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vhadds32:
 ;CHECK: vhadd.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vhadds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -30,8 +30,8 @@ define <2 x i32> @vhadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <8 x i8> @vhaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vhaddu8:
 ;CHECK: vhadd.u8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -39,8 +39,8 @@ define <8 x i8> @vhaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vhaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vhaddu16:
 ;CHECK: vhadd.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -48,8 +48,8 @@ define <4 x i16> @vhaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @vhaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vhaddu32:
 ;CHECK: vhadd.u32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vhaddu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -57,8 +57,8 @@ define <2 x i32> @vhaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <16 x i8> @vhaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vhaddQs8:
 ;CHECK: vhadd.s8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -66,8 +66,8 @@ define <16 x i8> @vhaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 define <8 x i16> @vhaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vhaddQs16:
 ;CHECK: vhadd.s16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -75,8 +75,8 @@ define <8 x i16> @vhaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @vhaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vhaddQs32:
 ;CHECK: vhadd.s32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vhadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -84,8 +84,8 @@ define <4 x i32> @vhaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 define <16 x i8> @vhaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vhaddQu8:
 ;CHECK: vhadd.u8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -93,8 +93,8 @@ define <16 x i8> @vhaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 define <8 x i16> @vhaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vhaddQu16:
 ;CHECK: vhadd.u16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -102,8 +102,8 @@ define <8 x i16> @vhaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @vhaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vhaddQu32:
 ;CHECK: vhadd.u32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -127,8 +127,8 @@ declare <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32>, <4 x i32>) nounwind rea
 define <8 x i8> @vrhadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vrhadds8:
 ;CHECK: vrhadd.s8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -136,8 +136,8 @@ define <8 x i8> @vrhadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vrhadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vrhadds16:
 ;CHECK: vrhadd.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -145,8 +145,8 @@ define <4 x i16> @vrhadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @vrhadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vrhadds32:
 ;CHECK: vrhadd.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -154,8 +154,8 @@ define <2 x i32> @vrhadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <8 x i8> @vrhaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vrhaddu8:
 ;CHECK: vrhadd.u8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -163,8 +163,8 @@ define <8 x i8> @vrhaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vrhaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vrhaddu16:
 ;CHECK: vrhadd.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -172,8 +172,8 @@ define <4 x i16> @vrhaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @vrhaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vrhaddu32:
 ;CHECK: vrhadd.u32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -181,8 +181,8 @@ define <2 x i32> @vrhaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <16 x i8> @vrhaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vrhaddQs8:
 ;CHECK: vrhadd.s8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -190,8 +190,8 @@ define <16 x i8> @vrhaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 define <8 x i16> @vrhaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vrhaddQs16:
 ;CHECK: vrhadd.s16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -199,8 +199,8 @@ define <8 x i16> @vrhaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @vrhaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vrhaddQs32:
 ;CHECK: vrhadd.s32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -208,8 +208,8 @@ define <4 x i32> @vrhaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 define <16 x i8> @vrhaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vrhaddQu8:
 ;CHECK: vrhadd.u8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -217,8 +217,8 @@ define <16 x i8> @vrhaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 define <8 x i16> @vrhaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vrhaddQu16:
 ;CHECK: vrhadd.u16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -226,8 +226,8 @@ define <8 x i16> @vrhaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @vrhaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vrhaddQu32:
 ;CHECK: vrhadd.u32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
diff --git a/llvm/test/CodeGen/ARM/vhsub.ll b/llvm/test/CodeGen/ARM/vhsub.ll
index f1a0cb27f57..7b3b29ac6e1 100644
--- a/llvm/test/CodeGen/ARM/vhsub.ll
+++ b/llvm/test/CodeGen/ARM/vhsub.ll
@@ -3,8 +3,8 @@
 define <8 x i8> @vhsubs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vhsubs8:
 ;CHECK: vhsub.s8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vhsubs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -12,8 +12,8 @@ define <8 x i8> @vhsubs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vhsubs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vhsubs16:
 ;CHECK: vhsub.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vhsubs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -21,8 +21,8 @@ define <4 x i16> @vhsubs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @vhsubs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vhsubs32:
 ;CHECK: vhsub.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vhsubs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -30,8 +30,8 @@ define <2 x i32> @vhsubs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <8 x i8> @vhsubu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vhsubu8:
 ;CHECK: vhsub.u8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vhsubu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -39,8 +39,8 @@ define <8 x i8> @vhsubu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vhsubu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vhsubu16:
 ;CHECK: vhsub.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vhsubu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -48,8 +48,8 @@ define <4 x i16> @vhsubu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @vhsubu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vhsubu32:
 ;CHECK: vhsub.u32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vhsubu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -57,8 +57,8 @@ define <2 x i32> @vhsubu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <16 x i8> @vhsubQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vhsubQs8:
 ;CHECK: vhsub.s8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vhsubs.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -66,8 +66,8 @@ define <16 x i8> @vhsubQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 define <8 x i16> @vhsubQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vhsubQs16:
 ;CHECK: vhsub.s16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vhsubs.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -75,8 +75,8 @@ define <8 x i16> @vhsubQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @vhsubQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vhsubQs32:
 ;CHECK: vhsub.s32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vhsubs.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -84,8 +84,8 @@ define <4 x i32> @vhsubQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 define <16 x i8> @vhsubQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vhsubQu8:
 ;CHECK: vhsub.u8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vhsubu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -93,8 +93,8 @@ define <16 x i8> @vhsubQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 define <8 x i16> @vhsubQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vhsubQu16:
 ;CHECK: vhsub.u16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vhsubu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -102,8 +102,8 @@ define <8 x i16> @vhsubQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @vhsubQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vhsubQu32:
 ;CHECK: vhsub.u32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vhsubu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
diff --git a/llvm/test/CodeGen/ARM/vicmp.ll b/llvm/test/CodeGen/ARM/vicmp.ll
index bebb32062f7..21b104a0d04 100644
--- a/llvm/test/CodeGen/ARM/vicmp.ll
+++ b/llvm/test/CodeGen/ARM/vicmp.ll
@@ -10,8 +10,8 @@ define <8 x i8> @vcnei8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vcnei8:
 ;CHECK: vceq.i8
 ;CHECK-NEXT: vmvn
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = icmp ne <8 x i8> %tmp1, %tmp2
         %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
 	ret <8 x i8> %tmp4
@@ -21,8 +21,8 @@ define <4 x i16> @vcnei16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vcnei16:
 ;CHECK: vceq.i16
 ;CHECK-NEXT: vmvn
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = icmp ne <4 x i16> %tmp1, %tmp2
         %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
 	ret <4 x i16> %tmp4
@@ -32,8 +32,8 @@ define <2 x i32> @vcnei32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vcnei32:
 ;CHECK: vceq.i32
 ;CHECK-NEXT: vmvn
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = icmp ne <2 x i32> %tmp1, %tmp2
         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -43,8 +43,8 @@ define <16 x i8> @vcneQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vcneQi8:
 ;CHECK: vceq.i8
 ;CHECK-NEXT: vmvn
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = icmp ne <16 x i8> %tmp1, %tmp2
         %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
 	ret <16 x i8> %tmp4
@@ -54,8 +54,8 @@ define <8 x i16> @vcneQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vcneQi16:
 ;CHECK: vceq.i16
 ;CHECK-NEXT: vmvn
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = icmp ne <8 x i16> %tmp1, %tmp2
         %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
 	ret <8 x i16> %tmp4
@@ -65,8 +65,8 @@ define <4 x i32> @vcneQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vcneQi32:
 ;CHECK: vceq.i32
 ;CHECK-NEXT: vmvn
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = icmp ne <4 x i32> %tmp1, %tmp2
         %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -75,8 +75,8 @@ define <4 x i32> @vcneQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 define <16 x i8> @vcltQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vcltQs8:
 ;CHECK: vcgt.s8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = icmp slt <16 x i8> %tmp1, %tmp2
         %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
 	ret <16 x i8> %tmp4
@@ -85,8 +85,8 @@ define <16 x i8> @vcltQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 define <4 x i16> @vcles16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vcles16:
 ;CHECK: vcge.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = icmp sle <4 x i16> %tmp1, %tmp2
         %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
 	ret <4 x i16> %tmp4
@@ -95,8 +95,8 @@ define <4 x i16> @vcles16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <4 x i16> @vcltu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vcltu16:
 ;CHECK: vcgt.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = icmp ult <4 x i16> %tmp1, %tmp2
         %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
 	ret <4 x i16> %tmp4
@@ -105,8 +105,8 @@ define <4 x i16> @vcltu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <4 x i32> @vcleQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vcleQu32:
 ;CHECK: vcge.u32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = icmp ule <4 x i32> %tmp1, %tmp2
         %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
diff --git a/llvm/test/CodeGen/ARM/vld1.ll b/llvm/test/CodeGen/ARM/vld1.ll
index 24dc1ab3195..8064ea4a320 100644
--- a/llvm/test/CodeGen/ARM/vld1.ll
+++ b/llvm/test/CodeGen/ARM/vld1.ll
@@ -23,7 +23,7 @@ define <4 x i16> @vld1i16(i16* %A) nounwind {
 define <4 x i16> @vld1i16_update(i16** %ptr) nounwind {
 ;CHECK-LABEL: vld1i16_update:
 ;CHECK: vld1.16 {d16}, [{{r[0-9]+}}]!
-	%A = load i16** %ptr
+	%A = load i16*, i16** %ptr
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %tmp0, i32 1)
 	%tmp2 = getelementptr i16, i16* %A, i32 4
@@ -43,7 +43,7 @@ define <2 x i32> @vld1i32(i32* %A) nounwind {
 define <2 x i32> @vld1i32_update(i32** %ptr, i32 %inc) nounwind {
 ;CHECK-LABEL: vld1i32_update:
 ;CHECK: vld1.32 {d16}, [{{r[0-9]+}}], {{r[0-9]+}}
-	%A = load i32** %ptr
+	%A = load i32*, i32** %ptr
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32(i8* %tmp0, i32 1)
 	%tmp2 = getelementptr i32, i32* %A, i32 %inc
@@ -79,7 +79,7 @@ define <16 x i8> @vld1Qi8(i8* %A) nounwind {
 define <16 x i8> @vld1Qi8_update(i8** %ptr) nounwind {
 ;CHECK-LABEL: vld1Qi8_update:
 ;CHECK: vld1.8 {d16, d17}, [{{r[0-9]+}}:64]!
-	%A = load i8** %ptr
+	%A = load i8*, i8** %ptr
 	%tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A, i32 8)
 	%tmp2 = getelementptr i8, i8* %A, i32 16
 	store i8* %tmp2, i8** %ptr
diff --git a/llvm/test/CodeGen/ARM/vld2.ll b/llvm/test/CodeGen/ARM/vld2.ll
index 26097cfde8c..391b49152cd 100644
--- a/llvm/test/CodeGen/ARM/vld2.ll
+++ b/llvm/test/CodeGen/ARM/vld2.ll
@@ -60,7 +60,7 @@ define <2 x float> @vld2f(float* %A) nounwind {
 define <2 x float> @vld2f_update(float** %ptr) nounwind {
 ;CHECK-LABEL: vld2f_update:
 ;CHECK: vld2.32 {d16, d17}, [r1]!
-	%A = load float** %ptr
+	%A = load float*, float** %ptr
 	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8* %tmp0, i32 1)
 	%tmp2 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 0
@@ -98,7 +98,7 @@ define <16 x i8> @vld2Qi8(i8* %A) nounwind {
 define <16 x i8> @vld2Qi8_update(i8** %ptr, i32 %inc) nounwind {
 ;CHECK-LABEL: vld2Qi8_update:
 ;CHECK: vld2.8 {d16, d17, d18, d19}, [r2:128], r1
-	%A = load i8** %ptr
+	%A = load i8*, i8** %ptr
 	%tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8* %A, i32 16)
         %tmp2 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 1
diff --git a/llvm/test/CodeGen/ARM/vld3.ll b/llvm/test/CodeGen/ARM/vld3.ll
index a98b35e9a67..0d14179ba73 100644
--- a/llvm/test/CodeGen/ARM/vld3.ll
+++ b/llvm/test/CodeGen/ARM/vld3.ll
@@ -38,7 +38,7 @@ define <4 x i16> @vld3i16(i16* %A) nounwind {
 define <4 x i16> @vld3i16_update(i16** %ptr, i32 %inc) nounwind {
 ;CHECK-LABEL: vld3i16_update:
 ;CHECK: vld3.16 {d16, d17, d18}, [{{r[0-9]+}}], {{r[0-9]+}}
-	%A = load i16** %ptr
+	%A = load i16*, i16** %ptr
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8* %tmp0, i32 1)
 	%tmp2 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 0
@@ -137,7 +137,7 @@ define <4 x i32> @vld3Qi32_update(i32** %ptr) nounwind {
 ;CHECK-LABEL: vld3Qi32_update:
 ;CHECK: vld3.32 {d16, d18, d20}, [r[[R:[0-9]+]]]!
 ;CHECK: vld3.32 {d17, d19, d21}, [r[[R]]]!
-	%A = load i32** %ptr
+	%A = load i32*, i32** %ptr
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8* %tmp0, i32 1)
 	%tmp2 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 0
diff --git a/llvm/test/CodeGen/ARM/vld4.ll b/llvm/test/CodeGen/ARM/vld4.ll
index e244e4b074c..575e0fa717f 100644
--- a/llvm/test/CodeGen/ARM/vld4.ll
+++ b/llvm/test/CodeGen/ARM/vld4.ll
@@ -26,7 +26,7 @@ define <8 x i8> @vld4i8(i8* %A) nounwind {
 define <8 x i8> @vld4i8_update(i8** %ptr, i32 %inc) nounwind {
 ;CHECK-LABEL: vld4i8_update:
 ;CHECK: vld4.8 {d16, d17, d18, d19}, [r2:128], r1
-	%A = load i8** %ptr
+	%A = load i8*, i8** %ptr
 	%tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8* %A, i32 16)
 	%tmp2 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 0
 	%tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 2
@@ -126,7 +126,7 @@ define <8 x i16> @vld4Qi16_update(i16** %ptr) nounwind {
 ;CHECK-LABEL: vld4Qi16_update:
 ;CHECK: vld4.16 {d16, d18, d20, d22}, [r1:64]!
 ;CHECK: vld4.16 {d17, d19, d21, d23}, [r1:64]!
-	%A = load i16** %ptr
+	%A = load i16*, i16** %ptr
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8* %tmp0, i32 8)
 	%tmp2 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 0
diff --git a/llvm/test/CodeGen/ARM/vlddup.ll b/llvm/test/CodeGen/ARM/vlddup.ll
index caf00a4161f..09304d87d53 100644
--- a/llvm/test/CodeGen/ARM/vlddup.ll
+++ b/llvm/test/CodeGen/ARM/vlddup.ll
@@ -4,7 +4,7 @@ define <8 x i8> @vld1dupi8(i8* %A) nounwind {
 ;CHECK-LABEL: vld1dupi8:
 ;Check the (default) alignment value.
 ;CHECK: vld1.8 {d16[]}, [r0]
-	%tmp1 = load i8* %A, align 8
+	%tmp1 = load i8, i8* %A, align 8
 	%tmp2 = insertelement <8 x i8> undef, i8 %tmp1, i32 0
 	%tmp3 = shufflevector <8 x i8> %tmp2, <8 x i8> undef, <8 x i32> zeroinitializer
         ret <8 x i8> %tmp3
@@ -14,7 +14,7 @@ define <4 x i16> @vld1dupi16(i16* %A) nounwind {
 ;CHECK-LABEL: vld1dupi16:
 ;Check the alignment value.  Max for this instruction is 16 bits:
 ;CHECK: vld1.16 {d16[]}, [r0:16]
-	%tmp1 = load i16* %A, align 8
+	%tmp1 = load i16, i16* %A, align 8
 	%tmp2 = insertelement <4 x i16> undef, i16 %tmp1, i32 0
 	%tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> undef, <4 x i32> zeroinitializer
         ret <4 x i16> %tmp3
@@ -24,7 +24,7 @@ define <2 x i32> @vld1dupi32(i32* %A) nounwind {
 ;CHECK-LABEL: vld1dupi32:
 ;Check the alignment value.  Max for this instruction is 32 bits:
 ;CHECK: vld1.32 {d16[]}, [r0:32]
-	%tmp1 = load i32* %A, align 8
+	%tmp1 = load i32, i32* %A, align 8
 	%tmp2 = insertelement <2 x i32> undef, i32 %tmp1, i32 0
 	%tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> undef, <2 x i32> zeroinitializer
         ret <2 x i32> %tmp3
@@ -33,7 +33,7 @@ define <2 x i32> @vld1dupi32(i32* %A) nounwind {
 define <2 x float> @vld1dupf(float* %A) nounwind {
 ;CHECK-LABEL: vld1dupf:
 ;CHECK: vld1.32 {d16[]}, [r0:32]
-	%tmp0 = load float* %A
+	%tmp0 = load float, float* %A
         %tmp1 = insertelement <2 x float> undef, float %tmp0, i32 0
         %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
         ret <2 x float> %tmp2
@@ -43,7 +43,7 @@ define <16 x i8> @vld1dupQi8(i8* %A) nounwind {
 ;CHECK-LABEL: vld1dupQi8:
 ;Check the (default) alignment value.
 ;CHECK: vld1.8 {d16[], d17[]}, [r0]
-	%tmp1 = load i8* %A, align 8
+	%tmp1 = load i8, i8* %A, align 8
 	%tmp2 = insertelement <16 x i8> undef, i8 %tmp1, i32 0
 	%tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <16 x i32> zeroinitializer
         ret <16 x i8> %tmp3
@@ -52,7 +52,7 @@ define <16 x i8> @vld1dupQi8(i8* %A) nounwind {
 define <4 x float> @vld1dupQf(float* %A) nounwind {
 ;CHECK-LABEL: vld1dupQf:
 ;CHECK: vld1.32 {d16[], d17[]}, [r0:32]
-        %tmp0 = load float* %A
+        %tmp0 = load float, float* %A
         %tmp1 = insertelement <4 x float> undef, float %tmp0, i32 0
         %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
         ret <4 x float> %tmp2
@@ -93,7 +93,7 @@ define <4 x i16> @vld2dupi16(i8* %A) nounwind {
 define <4 x i16> @vld2dupi16_update(i16** %ptr) nounwind {
 ;CHECK-LABEL: vld2dupi16_update:
 ;CHECK: vld2.16 {d16[], d17[]}, [r1]!
-	%A = load i16** %ptr
+	%A = load i16*, i16** %ptr
         %A2 = bitcast i16* %A to i8*
 	%tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %A2, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
 	%tmp1 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 0
@@ -130,7 +130,7 @@ declare %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>,
 define <8 x i8> @vld3dupi8_update(i8** %ptr, i32 %inc) nounwind {
 ;CHECK-LABEL: vld3dupi8_update:
 ;CHECK: vld3.8 {d16[], d17[], d18[]}, [r2], r1
-	%A = load i8** %ptr
+	%A = load i8*, i8** %ptr
 	%tmp0 = tail call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8* %A, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 8)
 	%tmp1 = extractvalue %struct.__neon_int8x8x3_t %tmp0, 0
 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer
@@ -171,7 +171,7 @@ declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>,
 define <4 x i16> @vld4dupi16_update(i16** %ptr) nounwind {
 ;CHECK-LABEL: vld4dupi16_update:
 ;CHECK: vld4.16 {d16[], d17[], d18[], d19[]}, [r1]!
-	%A = load i16** %ptr
+	%A = load i16*, i16** %ptr
         %A2 = bitcast i16* %A to i8*
 	%tmp0 = tail call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8* %A2, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 1)
 	%tmp1 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 0
diff --git a/llvm/test/CodeGen/ARM/vldlane.ll b/llvm/test/CodeGen/ARM/vldlane.ll
index 14d4e3f7fdd..ac2be7f87f5 100644
--- a/llvm/test/CodeGen/ARM/vldlane.ll
+++ b/llvm/test/CodeGen/ARM/vldlane.ll
@@ -7,8 +7,8 @@ define <8 x i8> @vld1lanei8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vld1lanei8:
 ;Check the (default) alignment value.
 ;CHECK: vld1.8 {d16[3]}, [r0]
-	%tmp1 = load <8 x i8>* %B
-	%tmp2 = load i8* %A, align 8
+	%tmp1 = load <8 x i8>, <8 x i8>* %B
+	%tmp2 = load i8, i8* %A, align 8
 	%tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 3
         ret <8 x i8> %tmp3
 }
@@ -17,8 +17,8 @@ define <4 x i16> @vld1lanei16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vld1lanei16:
 ;Check the alignment value.  Max for this instruction is 16 bits:
 ;CHECK: vld1.16 {d16[2]}, [r0:16]
-	%tmp1 = load <4 x i16>* %B
-	%tmp2 = load i16* %A, align 8
+	%tmp1 = load <4 x i16>, <4 x i16>* %B
+	%tmp2 = load i16, i16* %A, align 8
 	%tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 2
         ret <4 x i16> %tmp3
 }
@@ -27,8 +27,8 @@ define <2 x i32> @vld1lanei32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vld1lanei32:
 ;Check the alignment value.  Max for this instruction is 32 bits:
 ;CHECK: vld1.32 {d16[1]}, [r0:32]
-	%tmp1 = load <2 x i32>* %B
-	%tmp2 = load i32* %A, align 8
+	%tmp1 = load <2 x i32>, <2 x i32>* %B
+	%tmp2 = load i32, i32* %A, align 8
 	%tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
         ret <2 x i32> %tmp3
 }
@@ -37,8 +37,8 @@ define <2 x i32> @vld1lanei32a32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vld1lanei32a32:
 ;Check the alignment value.  Legal values are none or :32.
 ;CHECK: vld1.32 {d16[1]}, [r0:32]
-	%tmp1 = load <2 x i32>* %B
-	%tmp2 = load i32* %A, align 4
+	%tmp1 = load <2 x i32>, <2 x i32>* %B
+	%tmp2 = load i32, i32* %A, align 4
 	%tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
         ret <2 x i32> %tmp3
 }
@@ -46,8 +46,8 @@ define <2 x i32> @vld1lanei32a32(i32* %A, <2 x i32>* %B) nounwind {
 define <2 x float> @vld1lanef(float* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vld1lanef:
 ;CHECK: vld1.32 {d16[1]}, [r0:32]
-	%tmp1 = load <2 x float>* %B
-	%tmp2 = load float* %A, align 4
+	%tmp1 = load <2 x float>, <2 x float>* %B
+	%tmp2 = load float, float* %A, align 4
 	%tmp3 = insertelement <2 x float> %tmp1, float %tmp2, i32 1
 	ret <2 x float> %tmp3
 }
@@ -55,8 +55,8 @@ define <2 x float> @vld1lanef(float* %A, <2 x float>* %B) nounwind {
 define <16 x i8> @vld1laneQi8(i8* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vld1laneQi8:
 ;CHECK: vld1.8 {d17[1]}, [r0]
-	%tmp1 = load <16 x i8>* %B
-	%tmp2 = load i8* %A, align 8
+	%tmp1 = load <16 x i8>, <16 x i8>* %B
+	%tmp2 = load i8, i8* %A, align 8
 	%tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 9
 	ret <16 x i8> %tmp3
 }
@@ -64,8 +64,8 @@ define <16 x i8> @vld1laneQi8(i8* %A, <16 x i8>* %B) nounwind {
 define <8 x i16> @vld1laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vld1laneQi16:
 ;CHECK: vld1.16 {d17[1]}, [r0:16]
-	%tmp1 = load <8 x i16>* %B
-	%tmp2 = load i16* %A, align 8
+	%tmp1 = load <8 x i16>, <8 x i16>* %B
+	%tmp2 = load i16, i16* %A, align 8
 	%tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 5
 	ret <8 x i16> %tmp3
 }
@@ -73,8 +73,8 @@ define <8 x i16> @vld1laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @vld1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vld1laneQi32:
 ;CHECK: vld1.32 {d17[1]}, [r0:32]
-	%tmp1 = load <4 x i32>* %B
-	%tmp2 = load i32* %A, align 8
+	%tmp1 = load <4 x i32>, <4 x i32>* %B
+	%tmp2 = load i32, i32* %A, align 8
 	%tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 3
 	ret <4 x i32> %tmp3
 }
@@ -82,8 +82,8 @@ define <4 x i32> @vld1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 define <4 x float> @vld1laneQf(float* %A, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: vld1laneQf:
 ;CHECK: vld1.32 {d16[0]}, [r0:32]
-	%tmp1 = load <4 x float>* %B
-	%tmp2 = load float* %A
+	%tmp1 = load <4 x float>, <4 x float>* %B
+	%tmp2 = load float, float* %A
 	%tmp3 = insertelement <4 x float> %tmp1, float %tmp2, i32 0
 	ret <4 x float> %tmp3
 }
@@ -101,7 +101,7 @@ define <8 x i8> @vld2lanei8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vld2lanei8:
 ;Check the alignment value.  Max for this instruction is 16 bits:
 ;CHECK: vld2.8 {d16[1], d17[1]}, [r0:16]
-	%tmp1 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %B
 	%tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 4)
         %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1
@@ -114,7 +114,7 @@ define <4 x i16> @vld2lanei16(i16* %A, <4 x i16>* %B) nounwind {
 ;Check the alignment value.  Max for this instruction is 32 bits:
 ;CHECK: vld2.16 {d16[1], d17[1]}, [r0:32]
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %B
 	%tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
         %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 1
@@ -126,7 +126,7 @@ define <2 x i32> @vld2lanei32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vld2lanei32:
 ;CHECK: vld2.32
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %B
 	%tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
         %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1
@@ -138,9 +138,9 @@ define <2 x i32> @vld2lanei32(i32* %A, <2 x i32>* %B) nounwind {
 define <2 x i32> @vld2lanei32_update(i32** %ptr, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vld2lanei32_update:
 ;CHECK: vld2.32 {d16[1], d17[1]}, [{{r[0-9]+}}]!
-	%A = load i32** %ptr
+	%A = load i32*, i32** %ptr
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %B
 	%tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
 	%tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0
 	%tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1
@@ -154,7 +154,7 @@ define <2 x float> @vld2lanef(float* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vld2lanef:
 ;CHECK: vld2.32
 	%tmp0 = bitcast float* %A to i8*
-	%tmp1 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %B
 	%tmp2 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
         %tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 1
@@ -167,7 +167,7 @@ define <8 x i16> @vld2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;Check the (default) alignment.
 ;CHECK: vld2.16 {d17[1], d19[1]}, [{{r[0-9]+}}]
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %B
 	%tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 5, i32 1)
         %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 1
@@ -180,7 +180,7 @@ define <4 x i32> @vld2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 ;Check the alignment value.  Max for this instruction is 64 bits:
 ;CHECK: vld2.32 {d17[0], d19[0]}, [{{r[0-9]+}}:64]
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %B
 	%tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16)
         %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 1
@@ -192,7 +192,7 @@ define <4 x float> @vld2laneQf(float* %A, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: vld2laneQf:
 ;CHECK: vld2.32
 	%tmp0 = bitcast float* %A to i8*
-	%tmp1 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %B
 	%tmp2 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
         %tmp3 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 1
@@ -221,7 +221,7 @@ declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8*, <4 x flo
 define <8 x i8> @vld3lanei8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vld3lanei8:
 ;CHECK: vld3.8
-	%tmp1 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %B
 	%tmp2 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1)
         %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1
@@ -236,7 +236,7 @@ define <4 x i16> @vld3lanei16(i16* %A, <4 x i16>* %B) nounwind {
 ;Check the (default) alignment value.  VLD3 does not support alignment.
 ;CHECK: vld3.16 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}]
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %B
 	%tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
         %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 1
@@ -250,7 +250,7 @@ define <2 x i32> @vld3lanei32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vld3lanei32:
 ;CHECK: vld3.32
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %B
 	%tmp2 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
         %tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 1
@@ -264,7 +264,7 @@ define <2 x float> @vld3lanef(float* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vld3lanef:
 ;CHECK: vld3.32
 	%tmp0 = bitcast float* %A to i8*
-	%tmp1 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %B
 	%tmp2 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
         %tmp3 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 1
@@ -279,7 +279,7 @@ define <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;Check the (default) alignment value.  VLD3 does not support alignment.
 ;CHECK: vld3.16 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}]
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %B
 	%tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 8)
         %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 1
@@ -293,9 +293,9 @@ define <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 define <8 x i16> @vld3laneQi16_update(i16** %ptr, <8 x i16>* %B, i32 %inc) nounwind {
 ;CHECK-LABEL: vld3laneQi16_update:
 ;CHECK: vld3.16 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}], {{r[0-9]+}}
-	%A = load i16** %ptr
+	%A = load i16*, i16** %ptr
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %B
 	%tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 8)
 	%tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 0
 	%tmp4 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 1
@@ -311,7 +311,7 @@ define <4 x i32> @vld3laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vld3laneQi32:
 ;CHECK: vld3.32
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %B
 	%tmp2 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 3, i32 1)
         %tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 1
@@ -325,7 +325,7 @@ define <4 x float> @vld3laneQf(float* %A, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: vld3laneQf:
 ;CHECK: vld3.32
 	%tmp0 = bitcast float* %A to i8*
-	%tmp1 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %B
 	%tmp2 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
         %tmp3 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 1
@@ -357,7 +357,7 @@ define <8 x i8> @vld4lanei8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vld4lanei8:
 ;Check the alignment value.  Max for this instruction is 32 bits:
 ;CHECK: vld4.8 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}:32]
-	%tmp1 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %B
 	%tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
         %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
@@ -373,8 +373,8 @@ define <8 x i8> @vld4lanei8(i8* %A, <8 x i8>* %B) nounwind {
 define <8 x i8> @vld4lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vld4lanei8_update:
 ;CHECK: vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [{{r[0-9]+}}:32]!
-	%A = load i8** %ptr
-	%tmp1 = load <8 x i8>* %B
+	%A = load i8*, i8** %ptr
+	%tmp1 = load <8 x i8>, <8 x i8>* %B
 	%tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
 	%tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
 	%tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
@@ -394,7 +394,7 @@ define <4 x i16> @vld4lanei16(i16* %A, <4 x i16>* %B) nounwind {
 ;being loaded is ignored.
 ;CHECK: vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [{{r[0-9]+}}]
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %B
 	%tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 4)
         %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 1
@@ -412,7 +412,7 @@ define <2 x i32> @vld4lanei32(i32* %A, <2 x i32>* %B) nounwind {
 ;it is smaller than the total size of the memory being loaded.
 ;CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [{{r[0-9]+}}:64]
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %B
 	%tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 8)
         %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 1
@@ -428,7 +428,7 @@ define <2 x float> @vld4lanef(float* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vld4lanef:
 ;CHECK: vld4.32
 	%tmp0 = bitcast float* %A to i8*
-	%tmp1 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %B
 	%tmp2 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
         %tmp3 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 1
@@ -445,7 +445,7 @@ define <8 x i16> @vld4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;Check the alignment value.  Max for this instruction is 64 bits:
 ;CHECK: vld4.16 {d16[1], d18[1], d20[1], d22[1]}, [{{r[0-9]+}}:64]
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %B
 	%tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 16)
         %tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 1
@@ -462,7 +462,7 @@ define <4 x i32> @vld4laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 ;Check the (default) alignment.
 ;CHECK: vld4.32 {d17[0], d19[0], d21[0], d23[0]}, [{{r[0-9]+}}]
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %B
 	%tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1)
         %tmp3 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 1
@@ -478,7 +478,7 @@ define <4 x float> @vld4laneQf(float* %A, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: vld4laneQf:
 ;CHECK: vld4.32
 	%tmp0 = bitcast float* %A to i8*
-	%tmp1 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %B
 	%tmp2 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
         %tmp3 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 1
diff --git a/llvm/test/CodeGen/ARM/vldm-liveness.ll b/llvm/test/CodeGen/ARM/vldm-liveness.ll
index 060044f9208..e114e6970a3 100644
--- a/llvm/test/CodeGen/ARM/vldm-liveness.ll
+++ b/llvm/test/CodeGen/ARM/vldm-liveness.ll
@@ -23,13 +23,13 @@ define arm_aapcs_vfpcc <4 x float> @foo(float* %ptr) {
 ; CHECK: vldmia r0, {s0, s1}
 ; CHECK: vldr s2, [r0, #16]
    %off0 = getelementptr float, float* %ptr, i32 0
-   %val0 = load float* %off0
+   %val0 = load float, float* %off0
    %off1 = getelementptr float, float* %ptr, i32 1
-   %val1 = load float* %off1
+   %val1 = load float, float* %off1
    %off4 = getelementptr float, float* %ptr, i32 4
-   %val4 = load float* %off4
+   %val4 = load float, float* %off4
    %off2 = getelementptr float, float* %ptr, i32 2
-   %val2 = load float* %off2
+   %val2 = load float, float* %off2
 
    %vec1 = insertelement <4 x float> undef, float %val0, i32 0
    %vec2 = insertelement <4 x float> %vec1, float %val1, i32 1
diff --git a/llvm/test/CodeGen/ARM/vldm-sched-a9.ll b/llvm/test/CodeGen/ARM/vldm-sched-a9.ll
index 368c26e3a1c..0e0cf97d436 100644
--- a/llvm/test/CodeGen/ARM/vldm-sched-a9.ll
+++ b/llvm/test/CodeGen/ARM/vldm-sched-a9.ll
@@ -13,9 +13,9 @@ entry:
   %arrayidx39 = getelementptr inbounds i64, i64* %src, i32 13
   %vecinit285 = shufflevector <16 x i64> undef, <16 x i64> <i64 15, i64 16, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
   store <16 x i64> %vecinit285, <16 x i64>* undef, align 128
-  %0 = load i64* undef, align 8
+  %0 = load i64, i64* undef, align 8
   %vecinit379 = insertelement <16 x i64> undef, i64 %0, i32 9
-  %1 = load i64* undef, align 8
+  %1 = load i64, i64* undef, align 8
   %vecinit419 = insertelement <16 x i64> undef, i64 %1, i32 15
   store <16 x i64> %vecinit419, <16 x i64>* undef, align 128
   %vecinit579 = insertelement <16 x i64> undef, i64 0, i32 4
@@ -23,14 +23,14 @@ entry:
   %vecinit584 = insertelement <16 x i64> %vecinit582, i64 undef, i32 9
   %vecinit586 = insertelement <16 x i64> %vecinit584, i64 0, i32 10
   %vecinit589 = shufflevector <16 x i64> %vecinit586, <16 x i64> <i64 12, i64 13, i64 14, i64 15, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 16, i32 17, i32 18, i32 19, i32 undef>
-  %2 = load i64* undef, align 8
+  %2 = load i64, i64* undef, align 8
   %vecinit591 = insertelement <16 x i64> %vecinit589, i64 %2, i32 15
   store <16 x i64> %vecinit591, <16 x i64>* undef, align 128
   %vecinit694 = shufflevector <16 x i64> undef, <16 x i64> <i64 13, i64 14, i64 15, i64 16, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
   store <16 x i64> %vecinit694, <16 x i64>* undef, align 128
-  %3 = load i64* undef, align 8
+  %3 = load i64, i64* undef, align 8
   %vecinit1331 = insertelement <16 x i64> undef, i64 %3, i32 14
-  %4 = load i64* undef, align 8
+  %4 = load i64, i64* undef, align 8
   %vecinit1468 = insertelement <16 x i64> undef, i64 %4, i32 11
   %vecinit1471 = shufflevector <16 x i64> %vecinit1468, <16 x i64> <i64 13, i64 14, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 undef, i32 undef>
   %vecinit1474 = shufflevector <16 x i64> %vecinit1471, <16 x i64> <i64 15, i64 16, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
@@ -45,14 +45,14 @@ entry:
   %vecinit1599 = insertelement <16 x i64> %vecinit1597, i64 undef, i32 8
   %vecinit1601 = insertelement <16 x i64> %vecinit1599, i64 undef, i32 9
   %vecinit1603 = insertelement <16 x i64> %vecinit1601, i64 undef, i32 10
-  %5 = load i64* undef, align 8
+  %5 = load i64, i64* undef, align 8
   %vecinit1605 = insertelement <16 x i64> %vecinit1603, i64 %5, i32 11
   %vecinit1608 = shufflevector <16 x i64> %vecinit1605, <16 x i64> <i64 13, i64 14, i64 15, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 undef>
-  %6 = load i64* undef, align 8
+  %6 = load i64, i64* undef, align 8
   %vecinit1610 = insertelement <16 x i64> %vecinit1608, i64 %6, i32 15
   store <16 x i64> %vecinit1610, <16 x i64>* undef, align 128
   %vecinit2226 = shufflevector <16 x i64> undef, <16 x i64> <i64 6, i64 7, i64 8, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 16, i32 17, i32 18, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %7 = load i64* undef, align 8
+  %7 = load i64, i64* undef, align 8
   %vecinit2228 = insertelement <16 x i64> %vecinit2226, i64 %7, i32 8
   %vecinit2230 = insertelement <16 x i64> %vecinit2228, i64 undef, i32 9
   %vecinit2233 = shufflevector <16 x i64> %vecinit2230, <16 x i64> <i64 11, i64 12, i64 13, i64 14, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 16, i32 17, i32 18, i32 19, i32 undef, i32 undef>
@@ -62,7 +62,7 @@ entry:
   %vecinit2249 = shufflevector <16 x i64> %vecinit2246, <16 x i64> <i64 7, i64 8, i64 9, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 16, i32 17, i32 18, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   %vecinit2252 = shufflevector <16 x i64> %vecinit2249, <16 x i64> <i64 10, i64 11, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 16, i32 17, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   %vecinit2255 = shufflevector <16 x i64> %vecinit2252, <16 x i64> <i64 12, i64 13, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 16, i32 17, i32 undef, i32 undef, i32 undef>
-  %8 = load i64* %arrayidx39, align 8
+  %8 = load i64, i64* %arrayidx39, align 8
   %vecinit2257 = insertelement <16 x i64> %vecinit2255, i64 %8, i32 13
   %vecinit2260 = shufflevector <16 x i64> %vecinit2257, <16 x i64> <i64 15, i64 16, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
   store <16 x i64> %vecinit2260, <16 x i64>* null, align 128
diff --git a/llvm/test/CodeGen/ARM/vminmax.ll b/llvm/test/CodeGen/ARM/vminmax.ll
index 1167ebe0671..011bfd7ff88 100644
--- a/llvm/test/CodeGen/ARM/vminmax.ll
+++ b/llvm/test/CodeGen/ARM/vminmax.ll
@@ -3,8 +3,8 @@
 define <8 x i8> @vmins8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vmins8:
 ;CHECK: vmin.s8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -12,8 +12,8 @@ define <8 x i8> @vmins8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vmins16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vmins16:
 ;CHECK: vmin.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -21,8 +21,8 @@ define <4 x i16> @vmins16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @vmins32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vmins32:
 ;CHECK: vmin.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -30,8 +30,8 @@ define <2 x i32> @vmins32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <8 x i8> @vminu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vminu8:
 ;CHECK: vmin.u8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -39,8 +39,8 @@ define <8 x i8> @vminu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vminu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vminu16:
 ;CHECK: vmin.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -48,8 +48,8 @@ define <4 x i16> @vminu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @vminu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vminu32:
 ;CHECK: vmin.u32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -57,8 +57,8 @@ define <2 x i32> @vminu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <2 x float> @vminf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vminf32:
 ;CHECK: vmin.f32
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x float> %tmp3
 }
@@ -66,8 +66,8 @@ define <2 x float> @vminf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 define <16 x i8> @vminQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vminQs8:
 ;CHECK: vmin.s8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -75,8 +75,8 @@ define <16 x i8> @vminQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 define <8 x i16> @vminQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vminQs16:
 ;CHECK: vmin.s16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -84,8 +84,8 @@ define <8 x i16> @vminQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @vminQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vminQs32:
 ;CHECK: vmin.s32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -93,8 +93,8 @@ define <4 x i32> @vminQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 define <16 x i8> @vminQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vminQu8:
 ;CHECK: vmin.u8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -102,8 +102,8 @@ define <16 x i8> @vminQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 define <8 x i16> @vminQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vminQu16:
 ;CHECK: vmin.u16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -111,8 +111,8 @@ define <8 x i16> @vminQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @vminQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vminQu32:
 ;CHECK: vmin.u32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -120,8 +120,8 @@ define <4 x i32> @vminQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 define <4 x float> @vminQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: vminQf32:
 ;CHECK: vmin.f32
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %A
+	%tmp2 = load <4 x float>, <4 x float>* %B
 	%tmp3 = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
 	ret <4 x float> %tmp3
 }
@@ -149,8 +149,8 @@ declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) nounwin
 define <8 x i8> @vmaxs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vmaxs8:
 ;CHECK: vmax.s8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -158,8 +158,8 @@ define <8 x i8> @vmaxs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vmaxs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vmaxs16:
 ;CHECK: vmax.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -167,8 +167,8 @@ define <4 x i16> @vmaxs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @vmaxs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vmaxs32:
 ;CHECK: vmax.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -176,8 +176,8 @@ define <2 x i32> @vmaxs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <8 x i8> @vmaxu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vmaxu8:
 ;CHECK: vmax.u8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -185,8 +185,8 @@ define <8 x i8> @vmaxu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vmaxu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vmaxu16:
 ;CHECK: vmax.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -194,8 +194,8 @@ define <4 x i16> @vmaxu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @vmaxu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vmaxu32:
 ;CHECK: vmax.u32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -203,8 +203,8 @@ define <2 x i32> @vmaxu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <2 x float> @vmaxf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vmaxf32:
 ;CHECK: vmax.f32
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x float> %tmp3
 }
@@ -212,8 +212,8 @@ define <2 x float> @vmaxf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 define <16 x i8> @vmaxQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vmaxQs8:
 ;CHECK: vmax.s8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -221,8 +221,8 @@ define <16 x i8> @vmaxQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 define <8 x i16> @vmaxQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vmaxQs16:
 ;CHECK: vmax.s16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -230,8 +230,8 @@ define <8 x i16> @vmaxQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @vmaxQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vmaxQs32:
 ;CHECK: vmax.s32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -239,8 +239,8 @@ define <4 x i32> @vmaxQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 define <16 x i8> @vmaxQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vmaxQu8:
 ;CHECK: vmax.u8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -248,8 +248,8 @@ define <16 x i8> @vmaxQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 define <8 x i16> @vmaxQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vmaxQu16:
 ;CHECK: vmax.u16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -257,8 +257,8 @@ define <8 x i16> @vmaxQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @vmaxQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vmaxQu32:
 ;CHECK: vmax.u32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -266,8 +266,8 @@ define <4 x i32> @vmaxQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 define <4 x float> @vmaxQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: vmaxQf32:
 ;CHECK: vmax.f32
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %A
+	%tmp2 = load <4 x float>, <4 x float>* %B
 	%tmp3 = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
 	ret <4 x float> %tmp3
 }
diff --git a/llvm/test/CodeGen/ARM/vminmaxnm.ll b/llvm/test/CodeGen/ARM/vminmaxnm.ll
index 39289a0bafb..a1832842fe0 100644
--- a/llvm/test/CodeGen/ARM/vminmaxnm.ll
+++ b/llvm/test/CodeGen/ARM/vminmaxnm.ll
@@ -4,8 +4,8 @@
 define <4 x float> @vmaxnmq(<4 x float>* %A, <4 x float>* %B) nounwind {
 ; CHECK-LABEL: vmaxnmq:
 ; CHECK: vmaxnm.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
-  %tmp1 = load <4 x float>* %A
-  %tmp2 = load <4 x float>* %B
+  %tmp1 = load <4 x float>, <4 x float>* %A
+  %tmp2 = load <4 x float>, <4 x float>* %B
   %tmp3 = call <4 x float> @llvm.arm.neon.vmaxnm.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
   ret <4 x float> %tmp3
 }
@@ -13,8 +13,8 @@ define <4 x float> @vmaxnmq(<4 x float>* %A, <4 x float>* %B) nounwind {
 define <2 x float> @vmaxnmd(<2 x float>* %A, <2 x float>* %B) nounwind {
 ; CHECK-LABEL: vmaxnmd:
 ; CHECK: vmaxnm.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %tmp1 = load <2 x float>* %A
-  %tmp2 = load <2 x float>* %B
+  %tmp1 = load <2 x float>, <2 x float>* %A
+  %tmp2 = load <2 x float>, <2 x float>* %B
   %tmp3 = call <2 x float> @llvm.arm.neon.vmaxnm.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
   ret <2 x float> %tmp3
 }
@@ -22,8 +22,8 @@ define <2 x float> @vmaxnmd(<2 x float>* %A, <2 x float>* %B) nounwind {
 define <4 x float> @vminnmq(<4 x float>* %A, <4 x float>* %B) nounwind {
 ; CHECK-LABEL: vminnmq:
 ; CHECK: vminnm.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
-  %tmp1 = load <4 x float>* %A
-  %tmp2 = load <4 x float>* %B
+  %tmp1 = load <4 x float>, <4 x float>* %A
+  %tmp2 = load <4 x float>, <4 x float>* %B
   %tmp3 = call <4 x float> @llvm.arm.neon.vminnm.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
   ret <4 x float> %tmp3
 }
@@ -31,8 +31,8 @@ define <4 x float> @vminnmq(<4 x float>* %A, <4 x float>* %B) nounwind {
 define <2 x float> @vminnmd(<2 x float>* %A, <2 x float>* %B) nounwind {
 ; CHECK-LABEL: vminnmd:
 ; CHECK: vminnm.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %tmp1 = load <2 x float>* %A
-  %tmp2 = load <2 x float>* %B
+  %tmp1 = load <2 x float>, <2 x float>* %A
+  %tmp2 = load <2 x float>, <2 x float>* %B
   %tmp3 = call <2 x float> @llvm.arm.neon.vminnm.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
   ret <2 x float> %tmp3
 }
diff --git a/llvm/test/CodeGen/ARM/vmla.ll b/llvm/test/CodeGen/ARM/vmla.ll
index 6073fc5566f..8ca33a9eeca 100644
--- a/llvm/test/CodeGen/ARM/vmla.ll
+++ b/llvm/test/CodeGen/ARM/vmla.ll
@@ -3,9 +3,9 @@
 define <8 x i8> @vmlai8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8> * %C) nounwind {
 ;CHECK-LABEL: vmlai8:
 ;CHECK: vmla.i8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = load <8 x i8>* %C
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
+	%tmp3 = load <8 x i8>, <8 x i8>* %C
 	%tmp4 = mul <8 x i8> %tmp2, %tmp3
 	%tmp5 = add <8 x i8> %tmp1, %tmp4
 	ret <8 x i8> %tmp5
@@ -14,9 +14,9 @@ define <8 x i8> @vmlai8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8> * %C) nounwind {
 define <4 x i16> @vmlai16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
 ;CHECK-LABEL: vmlai16:
 ;CHECK: vmla.i16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = load <4 x i16>* %C
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
+	%tmp3 = load <4 x i16>, <4 x i16>* %C
 	%tmp4 = mul <4 x i16> %tmp2, %tmp3
 	%tmp5 = add <4 x i16> %tmp1, %tmp4
 	ret <4 x i16> %tmp5
@@ -25,9 +25,9 @@ define <4 x i16> @vmlai16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
 define <2 x i32> @vmlai32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
 ;CHECK-LABEL: vmlai32:
 ;CHECK: vmla.i32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
-	%tmp3 = load <2 x i32>* %C
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
+	%tmp3 = load <2 x i32>, <2 x i32>* %C
 	%tmp4 = mul <2 x i32> %tmp2, %tmp3
 	%tmp5 = add <2 x i32> %tmp1, %tmp4
 	ret <2 x i32> %tmp5
@@ -36,9 +36,9 @@ define <2 x i32> @vmlai32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
 define <2 x float> @vmlaf32(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) nounwind {
 ;CHECK-LABEL: vmlaf32:
 ;CHECK: vmla.f32
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
-	%tmp3 = load <2 x float>* %C
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
+	%tmp3 = load <2 x float>, <2 x float>* %C
 	%tmp4 = fmul <2 x float> %tmp2, %tmp3
 	%tmp5 = fadd <2 x float> %tmp1, %tmp4
 	ret <2 x float> %tmp5
@@ -47,9 +47,9 @@ define <2 x float> @vmlaf32(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) n
 define <16 x i8> @vmlaQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8> * %C) nounwind {
 ;CHECK-LABEL: vmlaQi8:
 ;CHECK: vmla.i8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
-	%tmp3 = load <16 x i8>* %C
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
+	%tmp3 = load <16 x i8>, <16 x i8>* %C
 	%tmp4 = mul <16 x i8> %tmp2, %tmp3
 	%tmp5 = add <16 x i8> %tmp1, %tmp4
 	ret <16 x i8> %tmp5
@@ -58,9 +58,9 @@ define <16 x i8> @vmlaQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8> * %C) nounwind
 define <8 x i16> @vmlaQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
 ;CHECK-LABEL: vmlaQi16:
 ;CHECK: vmla.i16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
-	%tmp3 = load <8 x i16>* %C
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
+	%tmp3 = load <8 x i16>, <8 x i16>* %C
 	%tmp4 = mul <8 x i16> %tmp2, %tmp3
 	%tmp5 = add <8 x i16> %tmp1, %tmp4
 	ret <8 x i16> %tmp5
@@ -69,9 +69,9 @@ define <8 x i16> @vmlaQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind
 define <4 x i32> @vmlaQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
 ;CHECK-LABEL: vmlaQi32:
 ;CHECK: vmla.i32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
-	%tmp3 = load <4 x i32>* %C
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
+	%tmp3 = load <4 x i32>, <4 x i32>* %C
 	%tmp4 = mul <4 x i32> %tmp2, %tmp3
 	%tmp5 = add <4 x i32> %tmp1, %tmp4
 	ret <4 x i32> %tmp5
@@ -80,9 +80,9 @@ define <4 x i32> @vmlaQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind
 define <4 x float> @vmlaQf32(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind {
 ;CHECK-LABEL: vmlaQf32:
 ;CHECK: vmla.f32
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
-	%tmp3 = load <4 x float>* %C
+	%tmp1 = load <4 x float>, <4 x float>* %A
+	%tmp2 = load <4 x float>, <4 x float>* %B
+	%tmp3 = load <4 x float>, <4 x float>* %C
 	%tmp4 = fmul <4 x float> %tmp2, %tmp3
 	%tmp5 = fadd <4 x float> %tmp1, %tmp4
 	ret <4 x float> %tmp5
@@ -91,9 +91,9 @@ define <4 x float> @vmlaQf32(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C)
 define <8 x i16> @vmlals8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 ;CHECK-LABEL: vmlals8:
 ;CHECK: vmlal.s8
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = load <8 x i8>* %C
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
+	%tmp3 = load <8 x i8>, <8 x i8>* %C
 	%tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
 	%tmp5 = sext <8 x i8> %tmp3 to <8 x i16>
 	%tmp6 = mul <8 x i16> %tmp4, %tmp5
@@ -104,9 +104,9 @@ define <8 x i16> @vmlals8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 define <4 x i32> @vmlals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
 ;CHECK-LABEL: vmlals16:
 ;CHECK: vmlal.s16
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = load <4 x i16>* %C
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
+	%tmp3 = load <4 x i16>, <4 x i16>* %C
 	%tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
 	%tmp5 = sext <4 x i16> %tmp3 to <4 x i32>
 	%tmp6 = mul <4 x i32> %tmp4, %tmp5
@@ -117,9 +117,9 @@ define <4 x i32> @vmlals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
 define <2 x i64> @vmlals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
 ;CHECK-LABEL: vmlals32:
 ;CHECK: vmlal.s32
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i32>* %B
-	%tmp3 = load <2 x i32>* %C
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
+	%tmp3 = load <2 x i32>, <2 x i32>* %C
 	%tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
 	%tmp5 = sext <2 x i32> %tmp3 to <2 x i64>
 	%tmp6 = mul <2 x i64> %tmp4, %tmp5
@@ -130,9 +130,9 @@ define <2 x i64> @vmlals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
 define <8 x i16> @vmlalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 ;CHECK-LABEL: vmlalu8:
 ;CHECK: vmlal.u8
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = load <8 x i8>* %C
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
+	%tmp3 = load <8 x i8>, <8 x i8>* %C
 	%tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
 	%tmp5 = zext <8 x i8> %tmp3 to <8 x i16>
 	%tmp6 = mul <8 x i16> %tmp4, %tmp5
@@ -143,9 +143,9 @@ define <8 x i16> @vmlalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 define <4 x i32> @vmlalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
 ;CHECK-LABEL: vmlalu16:
 ;CHECK: vmlal.u16
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = load <4 x i16>* %C
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
+	%tmp3 = load <4 x i16>, <4 x i16>* %C
 	%tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
 	%tmp5 = zext <4 x i16> %tmp3 to <4 x i32>
 	%tmp6 = mul <4 x i32> %tmp4, %tmp5
@@ -156,9 +156,9 @@ define <4 x i32> @vmlalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
 define <2 x i64> @vmlalu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
 ;CHECK-LABEL: vmlalu32:
 ;CHECK: vmlal.u32
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i32>* %B
-	%tmp3 = load <2 x i32>* %C
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
+	%tmp3 = load <2 x i32>, <2 x i32>* %C
 	%tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
 	%tmp5 = zext <2 x i32> %tmp3 to <2 x i64>
 	%tmp6 = mul <2 x i64> %tmp4, %tmp5
diff --git a/llvm/test/CodeGen/ARM/vmls.ll b/llvm/test/CodeGen/ARM/vmls.ll
index f86739cea3f..d14928147a3 100644
--- a/llvm/test/CodeGen/ARM/vmls.ll
+++ b/llvm/test/CodeGen/ARM/vmls.ll
@@ -3,9 +3,9 @@
 define <8 x i8> @vmlsi8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8> * %C) nounwind {
 ;CHECK-LABEL: vmlsi8:
 ;CHECK: vmls.i8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = load <8 x i8>* %C
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
+	%tmp3 = load <8 x i8>, <8 x i8>* %C
 	%tmp4 = mul <8 x i8> %tmp2, %tmp3
 	%tmp5 = sub <8 x i8> %tmp1, %tmp4
 	ret <8 x i8> %tmp5
@@ -14,9 +14,9 @@ define <8 x i8> @vmlsi8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8> * %C) nounwind {
 define <4 x i16> @vmlsi16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
 ;CHECK-LABEL: vmlsi16:
 ;CHECK: vmls.i16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = load <4 x i16>* %C
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
+	%tmp3 = load <4 x i16>, <4 x i16>* %C
 	%tmp4 = mul <4 x i16> %tmp2, %tmp3
 	%tmp5 = sub <4 x i16> %tmp1, %tmp4
 	ret <4 x i16> %tmp5
@@ -25,9 +25,9 @@ define <4 x i16> @vmlsi16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
 define <2 x i32> @vmlsi32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
 ;CHECK-LABEL: vmlsi32:
 ;CHECK: vmls.i32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
-	%tmp3 = load <2 x i32>* %C
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
+	%tmp3 = load <2 x i32>, <2 x i32>* %C
 	%tmp4 = mul <2 x i32> %tmp2, %tmp3
 	%tmp5 = sub <2 x i32> %tmp1, %tmp4
 	ret <2 x i32> %tmp5
@@ -36,9 +36,9 @@ define <2 x i32> @vmlsi32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
 define <2 x float> @vmlsf32(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) nounwind {
 ;CHECK-LABEL: vmlsf32:
 ;CHECK: vmls.f32
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
-	%tmp3 = load <2 x float>* %C
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
+	%tmp3 = load <2 x float>, <2 x float>* %C
 	%tmp4 = fmul <2 x float> %tmp2, %tmp3
 	%tmp5 = fsub <2 x float> %tmp1, %tmp4
 	ret <2 x float> %tmp5
@@ -47,9 +47,9 @@ define <2 x float> @vmlsf32(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) n
 define <16 x i8> @vmlsQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8> * %C) nounwind {
 ;CHECK-LABEL: vmlsQi8:
 ;CHECK: vmls.i8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
-	%tmp3 = load <16 x i8>* %C
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
+	%tmp3 = load <16 x i8>, <16 x i8>* %C
 	%tmp4 = mul <16 x i8> %tmp2, %tmp3
 	%tmp5 = sub <16 x i8> %tmp1, %tmp4
 	ret <16 x i8> %tmp5
@@ -58,9 +58,9 @@ define <16 x i8> @vmlsQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8> * %C) nounwind
 define <8 x i16> @vmlsQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
 ;CHECK-LABEL: vmlsQi16:
 ;CHECK: vmls.i16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
-	%tmp3 = load <8 x i16>* %C
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
+	%tmp3 = load <8 x i16>, <8 x i16>* %C
 	%tmp4 = mul <8 x i16> %tmp2, %tmp3
 	%tmp5 = sub <8 x i16> %tmp1, %tmp4
 	ret <8 x i16> %tmp5
@@ -69,9 +69,9 @@ define <8 x i16> @vmlsQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind
 define <4 x i32> @vmlsQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
 ;CHECK-LABEL: vmlsQi32:
 ;CHECK: vmls.i32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
-	%tmp3 = load <4 x i32>* %C
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
+	%tmp3 = load <4 x i32>, <4 x i32>* %C
 	%tmp4 = mul <4 x i32> %tmp2, %tmp3
 	%tmp5 = sub <4 x i32> %tmp1, %tmp4
 	ret <4 x i32> %tmp5
@@ -80,9 +80,9 @@ define <4 x i32> @vmlsQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind
 define <4 x float> @vmlsQf32(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind {
 ;CHECK-LABEL: vmlsQf32:
 ;CHECK: vmls.f32
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
-	%tmp3 = load <4 x float>* %C
+	%tmp1 = load <4 x float>, <4 x float>* %A
+	%tmp2 = load <4 x float>, <4 x float>* %B
+	%tmp3 = load <4 x float>, <4 x float>* %C
 	%tmp4 = fmul <4 x float> %tmp2, %tmp3
 	%tmp5 = fsub <4 x float> %tmp1, %tmp4
 	ret <4 x float> %tmp5
@@ -91,9 +91,9 @@ define <4 x float> @vmlsQf32(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C)
 define <8 x i16> @vmlsls8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 ;CHECK-LABEL: vmlsls8:
 ;CHECK: vmlsl.s8
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = load <8 x i8>* %C
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
+	%tmp3 = load <8 x i8>, <8 x i8>* %C
 	%tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
 	%tmp5 = sext <8 x i8> %tmp3 to <8 x i16>
 	%tmp6 = mul <8 x i16> %tmp4, %tmp5
@@ -104,9 +104,9 @@ define <8 x i16> @vmlsls8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 define <4 x i32> @vmlsls16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
 ;CHECK-LABEL: vmlsls16:
 ;CHECK: vmlsl.s16
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = load <4 x i16>* %C
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
+	%tmp3 = load <4 x i16>, <4 x i16>* %C
 	%tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
 	%tmp5 = sext <4 x i16> %tmp3 to <4 x i32>
 	%tmp6 = mul <4 x i32> %tmp4, %tmp5
@@ -117,9 +117,9 @@ define <4 x i32> @vmlsls16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
 define <2 x i64> @vmlsls32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
 ;CHECK-LABEL: vmlsls32:
 ;CHECK: vmlsl.s32
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i32>* %B
-	%tmp3 = load <2 x i32>* %C
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
+	%tmp3 = load <2 x i32>, <2 x i32>* %C
 	%tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
 	%tmp5 = sext <2 x i32> %tmp3 to <2 x i64>
 	%tmp6 = mul <2 x i64> %tmp4, %tmp5
@@ -130,9 +130,9 @@ define <2 x i64> @vmlsls32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
 define <8 x i16> @vmlslu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 ;CHECK-LABEL: vmlslu8:
 ;CHECK: vmlsl.u8
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = load <8 x i8>* %C
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
+	%tmp3 = load <8 x i8>, <8 x i8>* %C
 	%tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
 	%tmp5 = zext <8 x i8> %tmp3 to <8 x i16>
 	%tmp6 = mul <8 x i16> %tmp4, %tmp5
@@ -143,9 +143,9 @@ define <8 x i16> @vmlslu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 define <4 x i32> @vmlslu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
 ;CHECK-LABEL: vmlslu16:
 ;CHECK: vmlsl.u16
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = load <4 x i16>* %C
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
+	%tmp3 = load <4 x i16>, <4 x i16>* %C
 	%tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
 	%tmp5 = zext <4 x i16> %tmp3 to <4 x i32>
 	%tmp6 = mul <4 x i32> %tmp4, %tmp5
@@ -156,9 +156,9 @@ define <4 x i32> @vmlslu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
 define <2 x i64> @vmlslu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
 ;CHECK-LABEL: vmlslu32:
 ;CHECK: vmlsl.u32
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i32>* %B
-	%tmp3 = load <2 x i32>* %C
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
+	%tmp3 = load <2 x i32>, <2 x i32>* %C
 	%tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
 	%tmp5 = zext <2 x i32> %tmp3 to <2 x i64>
 	%tmp6 = mul <2 x i64> %tmp4, %tmp5
diff --git a/llvm/test/CodeGen/ARM/vmov.ll b/llvm/test/CodeGen/ARM/vmov.ll
index ae4a8f9ae67..b7a23b7bb59 100644
--- a/llvm/test/CodeGen/ARM/vmov.ll
+++ b/llvm/test/CodeGen/ARM/vmov.ll
@@ -191,7 +191,7 @@ entry:
 define <8 x i16> @vmovls8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vmovls8:
 ;CHECK: vmovl.s8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
 	ret <8 x i16> %tmp2
 }
@@ -199,7 +199,7 @@ define <8 x i16> @vmovls8(<8 x i8>* %A) nounwind {
 define <4 x i32> @vmovls16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vmovls16:
 ;CHECK: vmovl.s16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
 	ret <4 x i32> %tmp2
 }
@@ -207,7 +207,7 @@ define <4 x i32> @vmovls16(<4 x i16>* %A) nounwind {
 define <2 x i64> @vmovls32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vmovls32:
 ;CHECK: vmovl.s32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
 	ret <2 x i64> %tmp2
 }
@@ -215,7 +215,7 @@ define <2 x i64> @vmovls32(<2 x i32>* %A) nounwind {
 define <8 x i16> @vmovlu8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vmovlu8:
 ;CHECK: vmovl.u8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
 	ret <8 x i16> %tmp2
 }
@@ -223,7 +223,7 @@ define <8 x i16> @vmovlu8(<8 x i8>* %A) nounwind {
 define <4 x i32> @vmovlu16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vmovlu16:
 ;CHECK: vmovl.u16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
 	ret <4 x i32> %tmp2
 }
@@ -231,7 +231,7 @@ define <4 x i32> @vmovlu16(<4 x i16>* %A) nounwind {
 define <2 x i64> @vmovlu32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vmovlu32:
 ;CHECK: vmovl.u32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
 	ret <2 x i64> %tmp2
 }
@@ -239,7 +239,7 @@ define <2 x i64> @vmovlu32(<2 x i32>* %A) nounwind {
 define <8 x i8> @vmovni16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vmovni16:
 ;CHECK: vmovn.i16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = trunc <8 x i16> %tmp1 to <8 x i8>
 	ret <8 x i8> %tmp2
 }
@@ -247,7 +247,7 @@ define <8 x i8> @vmovni16(<8 x i16>* %A) nounwind {
 define <4 x i16> @vmovni32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vmovni32:
 ;CHECK: vmovn.i32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = trunc <4 x i32> %tmp1 to <4 x i16>
 	ret <4 x i16> %tmp2
 }
@@ -255,7 +255,7 @@ define <4 x i16> @vmovni32(<4 x i32>* %A) nounwind {
 define <2 x i32> @vmovni64(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: vmovni64:
 ;CHECK: vmovn.i64
-	%tmp1 = load <2 x i64>* %A
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = trunc <2 x i64> %tmp1 to <2 x i32>
 	ret <2 x i32> %tmp2
 }
@@ -263,7 +263,7 @@ define <2 x i32> @vmovni64(<2 x i64>* %A) nounwind {
 define <8 x i8> @vqmovns16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vqmovns16:
 ;CHECK: vqmovn.s16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16> %tmp1)
 	ret <8 x i8> %tmp2
 }
@@ -271,7 +271,7 @@ define <8 x i8> @vqmovns16(<8 x i16>* %A) nounwind {
 define <4 x i16> @vqmovns32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vqmovns32:
 ;CHECK: vqmovn.s32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32> %tmp1)
 	ret <4 x i16> %tmp2
 }
@@ -279,7 +279,7 @@ define <4 x i16> @vqmovns32(<4 x i32>* %A) nounwind {
 define <2 x i32> @vqmovns64(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: vqmovns64:
 ;CHECK: vqmovn.s64
-	%tmp1 = load <2 x i64>* %A
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64> %tmp1)
 	ret <2 x i32> %tmp2
 }
@@ -287,7 +287,7 @@ define <2 x i32> @vqmovns64(<2 x i64>* %A) nounwind {
 define <8 x i8> @vqmovnu16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vqmovnu16:
 ;CHECK: vqmovn.u16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> %tmp1)
 	ret <8 x i8> %tmp2
 }
@@ -295,7 +295,7 @@ define <8 x i8> @vqmovnu16(<8 x i16>* %A) nounwind {
 define <4 x i16> @vqmovnu32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vqmovnu32:
 ;CHECK: vqmovn.u32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32> %tmp1)
 	ret <4 x i16> %tmp2
 }
@@ -303,7 +303,7 @@ define <4 x i16> @vqmovnu32(<4 x i32>* %A) nounwind {
 define <2 x i32> @vqmovnu64(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: vqmovnu64:
 ;CHECK: vqmovn.u64
-	%tmp1 = load <2 x i64>* %A
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64> %tmp1)
 	ret <2 x i32> %tmp2
 }
@@ -311,7 +311,7 @@ define <2 x i32> @vqmovnu64(<2 x i64>* %A) nounwind {
 define <8 x i8> @vqmovuns16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vqmovuns16:
 ;CHECK: vqmovun.s16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16> %tmp1)
 	ret <8 x i8> %tmp2
 }
@@ -319,7 +319,7 @@ define <8 x i8> @vqmovuns16(<8 x i16>* %A) nounwind {
 define <4 x i16> @vqmovuns32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vqmovuns32:
 ;CHECK: vqmovun.s32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32> %tmp1)
 	ret <4 x i16> %tmp2
 }
@@ -327,7 +327,7 @@ define <4 x i16> @vqmovuns32(<4 x i32>* %A) nounwind {
 define <2 x i32> @vqmovuns64(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: vqmovuns64:
 ;CHECK: vqmovun.s64
-	%tmp1 = load <2 x i64>* %A
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64> %tmp1)
 	ret <2 x i32> %tmp2
 }
@@ -348,7 +348,7 @@ declare <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64>) nounwind readnone
 ; Radar 8598391.
 define void @noTruncStore(<4 x i32>* %a, <4 x i16>* %b) nounwind {
 ;CHECK: vmovn
-  %tmp1 = load <4 x i32>* %a, align 16
+  %tmp1 = load <4 x i32>, <4 x i32>* %a, align 16
   %tmp2 = trunc <4 x i32> %tmp1 to <4 x i16>
   store <4 x i16> %tmp2, <4 x i16>* %b, align 8
   ret void
@@ -376,7 +376,7 @@ define void @v_mov_v4f32_undef(<4 x float> * nocapture %p) nounwind {
 entry:
 ;CHECK-LABEL: v_mov_v4f32_undef:
 ;CHECK: vmov.f32 q{{.*}}, #1.000000e+00
-  %a = load <4 x float> *%p
+  %a = load <4 x float> , <4 x float> *%p
   %b = fadd <4 x float> %a, <float undef, float 1.0, float 1.0, float 1.0>
   store <4 x float> %b, <4 x float> *%p
   ret void
diff --git a/llvm/test/CodeGen/ARM/vmul.ll b/llvm/test/CodeGen/ARM/vmul.ll
index a9e5ad5a0f3..c3e41cacde4 100644
--- a/llvm/test/CodeGen/ARM/vmul.ll
+++ b/llvm/test/CodeGen/ARM/vmul.ll
@@ -3,8 +3,8 @@
 define <8 x i8> @vmuli8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vmuli8:
 ;CHECK: vmul.i8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = mul <8 x i8> %tmp1, %tmp2
 	ret <8 x i8> %tmp3
 }
@@ -12,8 +12,8 @@ define <8 x i8> @vmuli8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vmuli16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vmuli16:
 ;CHECK: vmul.i16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = mul <4 x i16> %tmp1, %tmp2
 	ret <4 x i16> %tmp3
 }
@@ -21,8 +21,8 @@ define <4 x i16> @vmuli16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @vmuli32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vmuli32:
 ;CHECK: vmul.i32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = mul <2 x i32> %tmp1, %tmp2
 	ret <2 x i32> %tmp3
 }
@@ -30,8 +30,8 @@ define <2 x i32> @vmuli32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <2 x float> @vmulf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vmulf32:
 ;CHECK: vmul.f32
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = fmul <2 x float> %tmp1, %tmp2
 	ret <2 x float> %tmp3
 }
@@ -39,8 +39,8 @@ define <2 x float> @vmulf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 define <8 x i8> @vmulp8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vmulp8:
 ;CHECK: vmul.p8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vmulp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -48,8 +48,8 @@ define <8 x i8> @vmulp8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <16 x i8> @vmulQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vmulQi8:
 ;CHECK: vmul.i8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = mul <16 x i8> %tmp1, %tmp2
 	ret <16 x i8> %tmp3
 }
@@ -57,8 +57,8 @@ define <16 x i8> @vmulQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 define <8 x i16> @vmulQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vmulQi16:
 ;CHECK: vmul.i16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = mul <8 x i16> %tmp1, %tmp2
 	ret <8 x i16> %tmp3
 }
@@ -66,8 +66,8 @@ define <8 x i16> @vmulQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @vmulQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vmulQi32:
 ;CHECK: vmul.i32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = mul <4 x i32> %tmp1, %tmp2
 	ret <4 x i32> %tmp3
 }
@@ -75,8 +75,8 @@ define <4 x i32> @vmulQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 define <4 x float> @vmulQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: vmulQf32:
 ;CHECK: vmul.f32
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %A
+	%tmp2 = load <4 x float>, <4 x float>* %B
 	%tmp3 = fmul <4 x float> %tmp1, %tmp2
 	ret <4 x float> %tmp3
 }
@@ -84,8 +84,8 @@ define <4 x float> @vmulQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 define <16 x i8> @vmulQp8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vmulQp8:
 ;CHECK: vmul.p8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vmulp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -150,8 +150,8 @@ entry:
 define <8 x i16> @vmulls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vmulls8:
 ;CHECK: vmull.s8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = sext <8 x i8> %tmp1 to <8 x i16>
 	%tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
 	%tmp5 = mul <8 x i16> %tmp3, %tmp4
@@ -161,8 +161,8 @@ define <8 x i16> @vmulls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <8 x i16> @vmulls8_int(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vmulls8_int:
 ;CHECK: vmull.s8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -170,8 +170,8 @@ define <8 x i16> @vmulls8_int(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i32> @vmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vmulls16:
 ;CHECK: vmull.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = sext <4 x i16> %tmp1 to <4 x i32>
 	%tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
 	%tmp5 = mul <4 x i32> %tmp3, %tmp4
@@ -181,8 +181,8 @@ define <4 x i32> @vmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <4 x i32> @vmulls16_int(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vmulls16_int:
 ;CHECK: vmull.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -190,8 +190,8 @@ define <4 x i32> @vmulls16_int(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i64> @vmulls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vmulls32:
 ;CHECK: vmull.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = sext <2 x i32> %tmp1 to <2 x i64>
 	%tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
 	%tmp5 = mul <2 x i64> %tmp3, %tmp4
@@ -201,8 +201,8 @@ define <2 x i64> @vmulls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <2 x i64> @vmulls32_int(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vmulls32_int:
 ;CHECK: vmull.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i64> %tmp3
 }
@@ -210,8 +210,8 @@ define <2 x i64> @vmulls32_int(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <8 x i16> @vmullu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vmullu8:
 ;CHECK: vmull.u8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = zext <8 x i8> %tmp1 to <8 x i16>
 	%tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
 	%tmp5 = mul <8 x i16> %tmp3, %tmp4
@@ -221,8 +221,8 @@ define <8 x i16> @vmullu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <8 x i16> @vmullu8_int(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vmullu8_int:
 ;CHECK: vmull.u8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -230,8 +230,8 @@ define <8 x i16> @vmullu8_int(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i32> @vmullu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vmullu16:
 ;CHECK: vmull.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = zext <4 x i16> %tmp1 to <4 x i32>
 	%tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
 	%tmp5 = mul <4 x i32> %tmp3, %tmp4
@@ -241,8 +241,8 @@ define <4 x i32> @vmullu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <4 x i32> @vmullu16_int(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vmullu16_int:
 ;CHECK: vmull.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -250,8 +250,8 @@ define <4 x i32> @vmullu16_int(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i64> @vmullu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vmullu32:
 ;CHECK: vmull.u32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = zext <2 x i32> %tmp1 to <2 x i64>
 	%tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
 	%tmp5 = mul <2 x i64> %tmp3, %tmp4
@@ -261,8 +261,8 @@ define <2 x i64> @vmullu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <2 x i64> @vmullu32_int(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vmullu32_int:
 ;CHECK: vmull.u32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i64> %tmp3
 }
@@ -270,8 +270,8 @@ define <2 x i64> @vmullu32_int(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <8 x i16> @vmullp8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vmullp8:
 ;CHECK: vmull.p8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -560,7 +560,7 @@ for.body33.lr.ph:                                 ; preds = %for.body
 for.body33:                                       ; preds = %for.body33, %for.body33.lr.ph
   %add45 = add i32 undef, undef
   %vld155 = tail call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* undef, i32 1)
-  %0 = load i32** undef, align 4
+  %0 = load i32*, i32** undef, align 4
   %shuffle.i250 = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer
   %1 = bitcast <1 x i64> %shuffle.i250 to <8 x i8>
   %vmovl.i249 = zext <8 x i8> %1 to <8 x i16>
@@ -616,7 +616,7 @@ declare <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16>) nounwind readnone
 ; PR15970
 define void @no_illegal_types_vmull_sext(<4 x i32> %a) {
 entry:
-  %wide.load283.i = load <4 x i8>* undef, align 1
+  %wide.load283.i = load <4 x i8>, <4 x i8>* undef, align 1
   %0 = sext <4 x i8> %wide.load283.i to <4 x i32>
   %1 = sub nsw <4 x i32> %0, %a
   %2 = mul nsw <4 x i32> %1, %1
@@ -626,7 +626,7 @@ entry:
 }
 define void @no_illegal_types_vmull_zext(<4 x i32> %a) {
 entry:
-  %wide.load283.i = load <4 x i8>* undef, align 1
+  %wide.load283.i = load <4 x i8>, <4 x i8>* undef, align 1
   %0 = zext <4 x i8> %wide.load283.i to <4 x i32>
   %1 = sub nsw <4 x i32> %0, %a
   %2 = mul nsw <4 x i32> %1, %1
@@ -642,8 +642,8 @@ define void @foo(<4 x float> * %a, <4 x float>* nocapture %dst, float* nocapture
 ;   and used a vector * scalar instruction.
 ; CHECK: vldr  {{s[0-9]+}}, [r2]
 ; CHECK: vmul.f32  q8, q8, d0[0]
-  %tmp = load float* %src, align 4
-  %tmp5 = load <4 x float>* %a, align 4
+  %tmp = load float, float* %src, align 4
+  %tmp5 = load <4 x float>, <4 x float>* %a, align 4
   %tmp6 = insertelement <4 x float> undef, float %tmp, i32 0
   %tmp7 = insertelement <4 x float> %tmp6, float %tmp, i32 1
   %tmp8 = insertelement <4 x float> %tmp7, float %tmp, i32 2
diff --git a/llvm/test/CodeGen/ARM/vneg.ll b/llvm/test/CodeGen/ARM/vneg.ll
index 4d548ddf814..24a585f65a4 100644
--- a/llvm/test/CodeGen/ARM/vneg.ll
+++ b/llvm/test/CodeGen/ARM/vneg.ll
@@ -3,7 +3,7 @@
 define <8 x i8> @vnegs8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vnegs8:
 ;CHECK: vneg.s8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = sub <8 x i8> zeroinitializer, %tmp1
 	ret <8 x i8> %tmp2
 }
@@ -11,7 +11,7 @@ define <8 x i8> @vnegs8(<8 x i8>* %A) nounwind {
 define <4 x i16> @vnegs16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vnegs16:
 ;CHECK: vneg.s16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = sub <4 x i16> zeroinitializer, %tmp1
 	ret <4 x i16> %tmp2
 }
@@ -19,7 +19,7 @@ define <4 x i16> @vnegs16(<4 x i16>* %A) nounwind {
 define <2 x i32> @vnegs32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vnegs32:
 ;CHECK: vneg.s32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = sub <2 x i32> zeroinitializer, %tmp1
 	ret <2 x i32> %tmp2
 }
@@ -27,7 +27,7 @@ define <2 x i32> @vnegs32(<2 x i32>* %A) nounwind {
 define <2 x float> @vnegf32(<2 x float>* %A) nounwind {
 ;CHECK-LABEL: vnegf32:
 ;CHECK: vneg.f32
-	%tmp1 = load <2 x float>* %A
+	%tmp1 = load <2 x float>, <2 x float>* %A
 	%tmp2 = fsub <2 x float> < float -0.000000e+00, float -0.000000e+00 >, %tmp1
 	ret <2 x float> %tmp2
 }
@@ -35,7 +35,7 @@ define <2 x float> @vnegf32(<2 x float>* %A) nounwind {
 define <16 x i8> @vnegQs8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: vnegQs8:
 ;CHECK: vneg.s8
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = sub <16 x i8> zeroinitializer, %tmp1
 	ret <16 x i8> %tmp2
 }
@@ -43,7 +43,7 @@ define <16 x i8> @vnegQs8(<16 x i8>* %A) nounwind {
 define <8 x i16> @vnegQs16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vnegQs16:
 ;CHECK: vneg.s16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = sub <8 x i16> zeroinitializer, %tmp1
 	ret <8 x i16> %tmp2
 }
@@ -51,7 +51,7 @@ define <8 x i16> @vnegQs16(<8 x i16>* %A) nounwind {
 define <4 x i32> @vnegQs32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vnegQs32:
 ;CHECK: vneg.s32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = sub <4 x i32> zeroinitializer, %tmp1
 	ret <4 x i32> %tmp2
 }
@@ -59,7 +59,7 @@ define <4 x i32> @vnegQs32(<4 x i32>* %A) nounwind {
 define <4 x float> @vnegQf32(<4 x float>* %A) nounwind {
 ;CHECK-LABEL: vnegQf32:
 ;CHECK: vneg.f32
-	%tmp1 = load <4 x float>* %A
+	%tmp1 = load <4 x float>, <4 x float>* %A
 	%tmp2 = fsub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, %tmp1
 	ret <4 x float> %tmp2
 }
@@ -67,7 +67,7 @@ define <4 x float> @vnegQf32(<4 x float>* %A) nounwind {
 define <8 x i8> @vqnegs8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vqnegs8:
 ;CHECK: vqneg.s8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqneg.v8i8(<8 x i8> %tmp1)
 	ret <8 x i8> %tmp2
 }
@@ -75,7 +75,7 @@ define <8 x i8> @vqnegs8(<8 x i8>* %A) nounwind {
 define <4 x i16> @vqnegs16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vqnegs16:
 ;CHECK: vqneg.s16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqneg.v4i16(<4 x i16> %tmp1)
 	ret <4 x i16> %tmp2
 }
@@ -83,7 +83,7 @@ define <4 x i16> @vqnegs16(<4 x i16>* %A) nounwind {
 define <2 x i32> @vqnegs32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vqnegs32:
 ;CHECK: vqneg.s32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqneg.v2i32(<2 x i32> %tmp1)
 	ret <2 x i32> %tmp2
 }
@@ -91,7 +91,7 @@ define <2 x i32> @vqnegs32(<2 x i32>* %A) nounwind {
 define <16 x i8> @vqnegQs8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: vqnegQs8:
 ;CHECK: vqneg.s8
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vqneg.v16i8(<16 x i8> %tmp1)
 	ret <16 x i8> %tmp2
 }
@@ -99,7 +99,7 @@ define <16 x i8> @vqnegQs8(<16 x i8>* %A) nounwind {
 define <8 x i16> @vqnegQs16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vqnegQs16:
 ;CHECK: vqneg.s16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vqneg.v8i16(<8 x i16> %tmp1)
 	ret <8 x i16> %tmp2
 }
@@ -107,7 +107,7 @@ define <8 x i16> @vqnegQs16(<8 x i16>* %A) nounwind {
 define <4 x i32> @vqnegQs32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vqnegQs32:
 ;CHECK: vqneg.s32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vqneg.v4i32(<4 x i32> %tmp1)
 	ret <4 x i32> %tmp2
 }
diff --git a/llvm/test/CodeGen/ARM/vpadal.ll b/llvm/test/CodeGen/ARM/vpadal.ll
index ffeac737fa3..ffb69243b88 100644
--- a/llvm/test/CodeGen/ARM/vpadal.ll
+++ b/llvm/test/CodeGen/ARM/vpadal.ll
@@ -3,8 +3,8 @@
 define <4 x i16> @vpadals8(<4 x i16>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vpadals8:
 ;CHECK: vpadal.s8
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vpadals.v4i16.v8i8(<4 x i16> %tmp1, <8 x i8> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -12,8 +12,8 @@ define <4 x i16> @vpadals8(<4 x i16>* %A, <8 x i8>* %B) nounwind {
 define <2 x i32> @vpadals16(<2 x i32>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vpadals16:
 ;CHECK: vpadal.s16
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vpadals.v2i32.v4i16(<2 x i32> %tmp1, <4 x i16> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -21,8 +21,8 @@ define <2 x i32> @vpadals16(<2 x i32>* %A, <4 x i16>* %B) nounwind {
 define <1 x i64> @vpadals32(<1 x i64>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vpadals32:
 ;CHECK: vpadal.s32
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vpadals.v1i64.v2i32(<1 x i64> %tmp1, <2 x i32> %tmp2)
 	ret <1 x i64> %tmp3
 }
@@ -30,8 +30,8 @@ define <1 x i64> @vpadals32(<1 x i64>* %A, <2 x i32>* %B) nounwind {
 define <4 x i16> @vpadalu8(<4 x i16>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vpadalu8:
 ;CHECK: vpadal.u8
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vpadalu.v4i16.v8i8(<4 x i16> %tmp1, <8 x i8> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -39,8 +39,8 @@ define <4 x i16> @vpadalu8(<4 x i16>* %A, <8 x i8>* %B) nounwind {
 define <2 x i32> @vpadalu16(<2 x i32>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vpadalu16:
 ;CHECK: vpadal.u16
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vpadalu.v2i32.v4i16(<2 x i32> %tmp1, <4 x i16> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -48,8 +48,8 @@ define <2 x i32> @vpadalu16(<2 x i32>* %A, <4 x i16>* %B) nounwind {
 define <1 x i64> @vpadalu32(<1 x i64>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vpadalu32:
 ;CHECK: vpadal.u32
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vpadalu.v1i64.v2i32(<1 x i64> %tmp1, <2 x i32> %tmp2)
 	ret <1 x i64> %tmp3
 }
@@ -57,8 +57,8 @@ define <1 x i64> @vpadalu32(<1 x i64>* %A, <2 x i32>* %B) nounwind {
 define <8 x i16> @vpadalQs8(<8 x i16>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vpadalQs8:
 ;CHECK: vpadal.s8
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vpadals.v8i16.v16i8(<8 x i16> %tmp1, <16 x i8> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -66,8 +66,8 @@ define <8 x i16> @vpadalQs8(<8 x i16>* %A, <16 x i8>* %B) nounwind {
 define <4 x i32> @vpadalQs16(<4 x i32>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vpadalQs16:
 ;CHECK: vpadal.s16
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vpadals.v4i32.v8i16(<4 x i32> %tmp1, <8 x i16> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -75,8 +75,8 @@ define <4 x i32> @vpadalQs16(<4 x i32>* %A, <8 x i16>* %B) nounwind {
 define <2 x i64> @vpadalQs32(<2 x i64>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vpadalQs32:
 ;CHECK: vpadal.s32
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vpadals.v2i64.v4i32(<2 x i64> %tmp1, <4 x i32> %tmp2)
 	ret <2 x i64> %tmp3
 }
@@ -84,8 +84,8 @@ define <2 x i64> @vpadalQs32(<2 x i64>* %A, <4 x i32>* %B) nounwind {
 define <8 x i16> @vpadalQu8(<8 x i16>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vpadalQu8:
 ;CHECK: vpadal.u8
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vpadalu.v8i16.v16i8(<8 x i16> %tmp1, <16 x i8> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -93,8 +93,8 @@ define <8 x i16> @vpadalQu8(<8 x i16>* %A, <16 x i8>* %B) nounwind {
 define <4 x i32> @vpadalQu16(<4 x i32>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vpadalQu16:
 ;CHECK: vpadal.u16
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vpadalu.v4i32.v8i16(<4 x i32> %tmp1, <8 x i16> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -102,8 +102,8 @@ define <4 x i32> @vpadalQu16(<4 x i32>* %A, <8 x i16>* %B) nounwind {
 define <2 x i64> @vpadalQu32(<2 x i64>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vpadalQu32:
 ;CHECK: vpadal.u32
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vpadalu.v2i64.v4i32(<2 x i64> %tmp1, <4 x i32> %tmp2)
 	ret <2 x i64> %tmp3
 }
diff --git a/llvm/test/CodeGen/ARM/vpadd.ll b/llvm/test/CodeGen/ARM/vpadd.ll
index 01cb1c74e38..e362ce36f8b 100644
--- a/llvm/test/CodeGen/ARM/vpadd.ll
+++ b/llvm/test/CodeGen/ARM/vpadd.ll
@@ -3,8 +3,8 @@
 define <8 x i8> @vpaddi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vpaddi8:
 ;CHECK: vpadd.i8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -12,8 +12,8 @@ define <8 x i8> @vpaddi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vpaddi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vpaddi16:
 ;CHECK: vpadd.i16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -21,8 +21,8 @@ define <4 x i16> @vpaddi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @vpaddi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vpaddi32:
 ;CHECK: vpadd.i32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -30,8 +30,8 @@ define <2 x i32> @vpaddi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <2 x float> @vpaddf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vpaddf32:
 ;CHECK: vpadd.f32
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x float> %tmp3
 }
@@ -45,7 +45,7 @@ declare <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float>, <2 x float>) nounwin
 define <4 x i16> @vpaddls8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vpaddls8:
 ;CHECK: vpaddl.s8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8> %tmp1)
 	ret <4 x i16> %tmp2
 }
@@ -53,7 +53,7 @@ define <4 x i16> @vpaddls8(<8 x i8>* %A) nounwind {
 define <2 x i32> @vpaddls16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vpaddls16:
 ;CHECK: vpaddl.s16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16> %tmp1)
 	ret <2 x i32> %tmp2
 }
@@ -61,7 +61,7 @@ define <2 x i32> @vpaddls16(<4 x i16>* %A) nounwind {
 define <1 x i64> @vpaddls32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vpaddls32:
 ;CHECK: vpaddl.s32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = call <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32> %tmp1)
 	ret <1 x i64> %tmp2
 }
@@ -69,7 +69,7 @@ define <1 x i64> @vpaddls32(<2 x i32>* %A) nounwind {
 define <4 x i16> @vpaddlu8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vpaddlu8:
 ;CHECK: vpaddl.u8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vpaddlu.v4i16.v8i8(<8 x i8> %tmp1)
 	ret <4 x i16> %tmp2
 }
@@ -77,7 +77,7 @@ define <4 x i16> @vpaddlu8(<8 x i8>* %A) nounwind {
 define <2 x i32> @vpaddlu16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vpaddlu16:
 ;CHECK: vpaddl.u16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16> %tmp1)
 	ret <2 x i32> %tmp2
 }
@@ -85,7 +85,7 @@ define <2 x i32> @vpaddlu16(<4 x i16>* %A) nounwind {
 define <1 x i64> @vpaddlu32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vpaddlu32:
 ;CHECK: vpaddl.u32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = call <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32> %tmp1)
 	ret <1 x i64> %tmp2
 }
@@ -93,7 +93,7 @@ define <1 x i64> @vpaddlu32(<2 x i32>* %A) nounwind {
 define <8 x i16> @vpaddlQs8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: vpaddlQs8:
 ;CHECK: vpaddl.s8
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vpaddls.v8i16.v16i8(<16 x i8> %tmp1)
 	ret <8 x i16> %tmp2
 }
@@ -101,7 +101,7 @@ define <8 x i16> @vpaddlQs8(<16 x i8>* %A) nounwind {
 define <4 x i32> @vpaddlQs16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vpaddlQs16:
 ;CHECK: vpaddl.s16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16> %tmp1)
 	ret <4 x i32> %tmp2
 }
@@ -109,7 +109,7 @@ define <4 x i32> @vpaddlQs16(<8 x i16>* %A) nounwind {
 define <2 x i64> @vpaddlQs32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vpaddlQs32:
 ;CHECK: vpaddl.s32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32> %tmp1)
 	ret <2 x i64> %tmp2
 }
@@ -117,7 +117,7 @@ define <2 x i64> @vpaddlQs32(<4 x i32>* %A) nounwind {
 define <8 x i16> @vpaddlQu8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: vpaddlQu8:
 ;CHECK: vpaddl.u8
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8> %tmp1)
 	ret <8 x i16> %tmp2
 }
@@ -125,7 +125,7 @@ define <8 x i16> @vpaddlQu8(<16 x i8>* %A) nounwind {
 define <4 x i32> @vpaddlQu16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vpaddlQu16:
 ;CHECK: vpaddl.u16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16> %tmp1)
 	ret <4 x i32> %tmp2
 }
@@ -133,7 +133,7 @@ define <4 x i32> @vpaddlQu16(<8 x i16>* %A) nounwind {
 define <2 x i64> @vpaddlQu32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vpaddlQu32:
 ;CHECK: vpaddl.u32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32> %tmp1)
 	ret <2 x i64> %tmp2
 }
@@ -143,9 +143,9 @@ define void @addCombineToVPADDL() nounwind ssp {
 ; CHECK: vpaddl.s8
   %cbcr = alloca <16 x i8>, align 16
   %X = alloca <8 x i8>, align 8
-  %tmp = load <16 x i8>* %cbcr
+  %tmp = load <16 x i8>, <16 x i8>* %cbcr
   %tmp1 = shufflevector <16 x i8> %tmp, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-  %tmp2 = load <16 x i8>* %cbcr
+  %tmp2 = load <16 x i8>, <16 x i8>* %cbcr
   %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
   %add = add <8 x i8> %tmp3, %tmp1
   store <8 x i8> %add, <8 x i8>* %X, align 8
diff --git a/llvm/test/CodeGen/ARM/vpminmax.ll b/llvm/test/CodeGen/ARM/vpminmax.ll
index 0b893e5bc89..9ea8c69612c 100644
--- a/llvm/test/CodeGen/ARM/vpminmax.ll
+++ b/llvm/test/CodeGen/ARM/vpminmax.ll
@@ -3,8 +3,8 @@
 define <8 x i8> @vpmins8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vpmins8:
 ;CHECK: vpmin.s8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vpmins.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -12,8 +12,8 @@ define <8 x i8> @vpmins8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vpmins16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vpmins16:
 ;CHECK: vpmin.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -21,8 +21,8 @@ define <4 x i16> @vpmins16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @vpmins32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vpmins32:
 ;CHECK: vpmin.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -30,8 +30,8 @@ define <2 x i32> @vpmins32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <8 x i8> @vpminu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vpminu8:
 ;CHECK: vpmin.u8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vpminu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -39,8 +39,8 @@ define <8 x i8> @vpminu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vpminu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vpminu16:
 ;CHECK: vpmin.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -48,8 +48,8 @@ define <4 x i16> @vpminu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @vpminu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vpminu32:
 ;CHECK: vpmin.u32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -57,8 +57,8 @@ define <2 x i32> @vpminu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <2 x float> @vpminf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vpminf32:
 ;CHECK: vpmin.f32
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = call <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x float> %tmp3
 }
@@ -76,8 +76,8 @@ declare <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float>, <2 x float>) nounwi
 define <8 x i8> @vpmaxs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vpmaxs8:
 ;CHECK: vpmax.s8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vpmaxs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -85,8 +85,8 @@ define <8 x i8> @vpmaxs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vpmaxs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vpmaxs16:
 ;CHECK: vpmax.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -94,8 +94,8 @@ define <4 x i16> @vpmaxs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @vpmaxs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vpmaxs32:
 ;CHECK: vpmax.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -103,8 +103,8 @@ define <2 x i32> @vpmaxs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <8 x i8> @vpmaxu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vpmaxu8:
 ;CHECK: vpmax.u8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vpmaxu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -112,8 +112,8 @@ define <8 x i8> @vpmaxu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vpmaxu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vpmaxu16:
 ;CHECK: vpmax.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -121,8 +121,8 @@ define <4 x i16> @vpmaxu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @vpmaxu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vpmaxu32:
 ;CHECK: vpmax.u32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -130,8 +130,8 @@ define <2 x i32> @vpmaxu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <2 x float> @vpmaxf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vpmaxf32:
 ;CHECK: vpmax.f32
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = call <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x float> %tmp3
 }
diff --git a/llvm/test/CodeGen/ARM/vqadd.ll b/llvm/test/CodeGen/ARM/vqadd.ll
index 81acc8bc5ab..d1e90cb2094 100644
--- a/llvm/test/CodeGen/ARM/vqadd.ll
+++ b/llvm/test/CodeGen/ARM/vqadd.ll
@@ -3,8 +3,8 @@
 define <8 x i8> @vqadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vqadds8:
 ;CHECK: vqadd.s8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vqadds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -12,8 +12,8 @@ define <8 x i8> @vqadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vqadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vqadds16:
 ;CHECK: vqadd.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -21,8 +21,8 @@ define <4 x i16> @vqadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @vqadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vqadds32:
 ;CHECK: vqadd.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -30,8 +30,8 @@ define <2 x i32> @vqadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <1 x i64> @vqadds64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: vqadds64:
 ;CHECK: vqadd.s64
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
 	ret <1 x i64> %tmp3
 }
@@ -39,8 +39,8 @@ define <1 x i64> @vqadds64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 define <8 x i8> @vqaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vqaddu8:
 ;CHECK: vqadd.u8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vqaddu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -48,8 +48,8 @@ define <8 x i8> @vqaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vqaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vqaddu16:
 ;CHECK: vqadd.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vqaddu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -57,8 +57,8 @@ define <4 x i16> @vqaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @vqaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vqaddu32:
 ;CHECK: vqadd.u32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vqaddu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -66,8 +66,8 @@ define <2 x i32> @vqaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <1 x i64> @vqaddu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: vqaddu64:
 ;CHECK: vqadd.u64
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
 	ret <1 x i64> %tmp3
 }
@@ -75,8 +75,8 @@ define <1 x i64> @vqaddu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 define <16 x i8> @vqaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vqaddQs8:
 ;CHECK: vqadd.s8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vqadds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -84,8 +84,8 @@ define <16 x i8> @vqaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 define <8 x i16> @vqaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vqaddQs16:
 ;CHECK: vqadd.s16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -93,8 +93,8 @@ define <8 x i16> @vqaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @vqaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vqaddQs32:
 ;CHECK: vqadd.s32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -102,8 +102,8 @@ define <4 x i32> @vqaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 define <2 x i64> @vqaddQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: vqaddQs64:
 ;CHECK: vqadd.s64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
 	ret <2 x i64> %tmp3
 }
@@ -111,8 +111,8 @@ define <2 x i64> @vqaddQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 define <16 x i8> @vqaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vqaddQu8:
 ;CHECK: vqadd.u8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vqaddu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -120,8 +120,8 @@ define <16 x i8> @vqaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 define <8 x i16> @vqaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vqaddQu16:
 ;CHECK: vqadd.u16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vqaddu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -129,8 +129,8 @@ define <8 x i16> @vqaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @vqaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vqaddQu32:
 ;CHECK: vqadd.u32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vqaddu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -138,8 +138,8 @@ define <4 x i32> @vqaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 define <2 x i64> @vqaddQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: vqaddQu64:
 ;CHECK: vqadd.u64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
 	ret <2 x i64> %tmp3
 }
diff --git a/llvm/test/CodeGen/ARM/vqdmul.ll b/llvm/test/CodeGen/ARM/vqdmul.ll
index d298167d3a9..6da080012a1 100644
--- a/llvm/test/CodeGen/ARM/vqdmul.ll
+++ b/llvm/test/CodeGen/ARM/vqdmul.ll
@@ -5,8 +5,8 @@ target triple = "thumbv7-elf"
 define <4 x i16> @vqdmulhs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vqdmulhs16:
 ;CHECK: vqdmulh.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -14,8 +14,8 @@ define <4 x i16> @vqdmulhs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @vqdmulhs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vqdmulhs32:
 ;CHECK: vqdmulh.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -23,8 +23,8 @@ define <2 x i32> @vqdmulhs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <8 x i16> @vqdmulhQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vqdmulhQs16:
 ;CHECK: vqdmulh.s16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -32,8 +32,8 @@ define <8 x i16> @vqdmulhQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @vqdmulhQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vqdmulhQs32:
 ;CHECK: vqdmulh.s32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -83,8 +83,8 @@ declare <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind re
 define <4 x i16> @vqrdmulhs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vqrdmulhs16:
 ;CHECK: vqrdmulh.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -92,8 +92,8 @@ define <4 x i16> @vqrdmulhs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @vqrdmulhs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vqrdmulhs32:
 ;CHECK: vqrdmulh.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -101,8 +101,8 @@ define <2 x i32> @vqrdmulhs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <8 x i16> @vqrdmulhQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vqrdmulhQs16:
 ;CHECK: vqrdmulh.s16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -110,8 +110,8 @@ define <8 x i16> @vqrdmulhQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @vqrdmulhQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vqrdmulhQs32:
 ;CHECK: vqrdmulh.s32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -161,8 +161,8 @@ declare <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind r
 define <4 x i32> @vqdmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vqdmulls16:
 ;CHECK: vqdmull.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -170,8 +170,8 @@ define <4 x i32> @vqdmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i64> @vqdmulls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vqdmulls32:
 ;CHECK: vqdmull.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i64> %tmp3
 }
@@ -200,9 +200,9 @@ declare <2 x i64>  @llvm.arm.neon.vqdmull.v2i64(<2 x i32>, <2 x i32>) nounwind r
 define <4 x i32> @vqdmlals16_natural(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
 ;CHECK-LABEL: vqdmlals16_natural:
 ;CHECK: vqdmlal.s16
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = load <4 x i16>* %B
-        %tmp3 = load <4 x i16>* %C
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
+        %tmp2 = load <4 x i16>, <4 x i16>* %B
+        %tmp3 = load <4 x i16>, <4 x i16>* %C
         %tmp4 = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %tmp2, <4 x i16> %tmp3)
         %tmp5 = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp4)
         ret <4 x i32> %tmp5
@@ -211,9 +211,9 @@ define <4 x i32> @vqdmlals16_natural(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C
 define <2 x i64> @vqdmlals32_natural(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
 ;CHECK-LABEL: vqdmlals32_natural:
 ;CHECK: vqdmlal.s32
-        %tmp1 = load <2 x i64>* %A
-        %tmp2 = load <2 x i32>* %B
-        %tmp3 = load <2 x i32>* %C
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
+        %tmp2 = load <2 x i32>, <2 x i32>* %B
+        %tmp3 = load <2 x i32>, <2 x i32>* %C
         %tmp4 = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %tmp2, <2 x i32> %tmp3)
         %tmp5 = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp4)
         ret <2 x i64> %tmp5
@@ -245,9 +245,9 @@ declare <2 x i64>  @llvm.arm.neon.vqadds.v2i64(<2 x i64>, <2 x i64>) nounwind re
 define <4 x i32> @vqdmlsls16_natural(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
 ;CHECK-LABEL: vqdmlsls16_natural:
 ;CHECK: vqdmlsl.s16
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = load <4 x i16>* %B
-        %tmp3 = load <4 x i16>* %C
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
+        %tmp2 = load <4 x i16>, <4 x i16>* %B
+        %tmp3 = load <4 x i16>, <4 x i16>* %C
         %tmp4 = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %tmp2, <4 x i16> %tmp3)
         %tmp5 = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp4)
         ret <4 x i32> %tmp5
@@ -256,9 +256,9 @@ define <4 x i32> @vqdmlsls16_natural(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C
 define <2 x i64> @vqdmlsls32_natural(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
 ;CHECK-LABEL: vqdmlsls32_natural:
 ;CHECK: vqdmlsl.s32
-        %tmp1 = load <2 x i64>* %A
-        %tmp2 = load <2 x i32>* %B
-        %tmp3 = load <2 x i32>* %C
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
+        %tmp2 = load <2 x i32>, <2 x i32>* %B
+        %tmp3 = load <2 x i32>, <2 x i32>* %C
         %tmp4 = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %tmp2, <2 x i32> %tmp3)
         %tmp5 = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp4)
         ret <2 x i64> %tmp5
diff --git a/llvm/test/CodeGen/ARM/vqshl.ll b/llvm/test/CodeGen/ARM/vqshl.ll
index 4afef6dbd65..6a6d9af7a2b 100644
--- a/llvm/test/CodeGen/ARM/vqshl.ll
+++ b/llvm/test/CodeGen/ARM/vqshl.ll
@@ -3,8 +3,8 @@
 define <8 x i8> @vqshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vqshls8:
 ;CHECK: vqshl.s8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -12,8 +12,8 @@ define <8 x i8> @vqshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vqshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vqshls16:
 ;CHECK: vqshl.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -21,8 +21,8 @@ define <4 x i16> @vqshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @vqshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vqshls32:
 ;CHECK: vqshl.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -30,8 +30,8 @@ define <2 x i32> @vqshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <1 x i64> @vqshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: vqshls64:
 ;CHECK: vqshl.s64
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
 	ret <1 x i64> %tmp3
 }
@@ -39,8 +39,8 @@ define <1 x i64> @vqshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 define <8 x i8> @vqshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vqshlu8:
 ;CHECK: vqshl.u8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -48,8 +48,8 @@ define <8 x i8> @vqshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vqshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vqshlu16:
 ;CHECK: vqshl.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -57,8 +57,8 @@ define <4 x i16> @vqshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @vqshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vqshlu32:
 ;CHECK: vqshl.u32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -66,8 +66,8 @@ define <2 x i32> @vqshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <1 x i64> @vqshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: vqshlu64:
 ;CHECK: vqshl.u64
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
 	ret <1 x i64> %tmp3
 }
@@ -75,8 +75,8 @@ define <1 x i64> @vqshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 define <16 x i8> @vqshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vqshlQs8:
 ;CHECK: vqshl.s8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -84,8 +84,8 @@ define <16 x i8> @vqshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 define <8 x i16> @vqshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vqshlQs16:
 ;CHECK: vqshl.s16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -93,8 +93,8 @@ define <8 x i16> @vqshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @vqshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vqshlQs32:
 ;CHECK: vqshl.s32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -102,8 +102,8 @@ define <4 x i32> @vqshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 define <2 x i64> @vqshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: vqshlQs64:
 ;CHECK: vqshl.s64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
 	ret <2 x i64> %tmp3
 }
@@ -111,8 +111,8 @@ define <2 x i64> @vqshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 define <16 x i8> @vqshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vqshlQu8:
 ;CHECK: vqshl.u8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -120,8 +120,8 @@ define <16 x i8> @vqshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 define <8 x i16> @vqshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vqshlQu16:
 ;CHECK: vqshl.u16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -129,8 +129,8 @@ define <8 x i16> @vqshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @vqshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vqshlQu32:
 ;CHECK: vqshl.u32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -138,8 +138,8 @@ define <4 x i32> @vqshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 define <2 x i64> @vqshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: vqshlQu64:
 ;CHECK: vqshl.u64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
 	ret <2 x i64> %tmp3
 }
@@ -147,7 +147,7 @@ define <2 x i64> @vqshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 define <8 x i8> @vqshls_n8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vqshls_n8:
 ;CHECK: vqshl.s8{{.*#7}}
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
 	ret <8 x i8> %tmp2
 }
@@ -155,7 +155,7 @@ define <8 x i8> @vqshls_n8(<8 x i8>* %A) nounwind {
 define <4 x i16> @vqshls_n16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vqshls_n16:
 ;CHECK: vqshl.s16{{.*#15}}
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
 	ret <4 x i16> %tmp2
 }
@@ -163,7 +163,7 @@ define <4 x i16> @vqshls_n16(<4 x i16>* %A) nounwind {
 define <2 x i32> @vqshls_n32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vqshls_n32:
 ;CHECK: vqshl.s32{{.*#31}}
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
 	ret <2 x i32> %tmp2
 }
@@ -171,7 +171,7 @@ define <2 x i32> @vqshls_n32(<2 x i32>* %A) nounwind {
 define <1 x i64> @vqshls_n64(<1 x i64>* %A) nounwind {
 ;CHECK-LABEL: vqshls_n64:
 ;CHECK: vqshl.s64{{.*#63}}
-	%tmp1 = load <1 x i64>* %A
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
 	%tmp2 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >)
 	ret <1 x i64> %tmp2
 }
@@ -179,7 +179,7 @@ define <1 x i64> @vqshls_n64(<1 x i64>* %A) nounwind {
 define <8 x i8> @vqshlu_n8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vqshlu_n8:
 ;CHECK: vqshl.u8{{.*#7}}
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
 	ret <8 x i8> %tmp2
 }
@@ -187,7 +187,7 @@ define <8 x i8> @vqshlu_n8(<8 x i8>* %A) nounwind {
 define <4 x i16> @vqshlu_n16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vqshlu_n16:
 ;CHECK: vqshl.u16{{.*#15}}
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
 	ret <4 x i16> %tmp2
 }
@@ -195,7 +195,7 @@ define <4 x i16> @vqshlu_n16(<4 x i16>* %A) nounwind {
 define <2 x i32> @vqshlu_n32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vqshlu_n32:
 ;CHECK: vqshl.u32{{.*#31}}
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
 	ret <2 x i32> %tmp2
 }
@@ -203,7 +203,7 @@ define <2 x i32> @vqshlu_n32(<2 x i32>* %A) nounwind {
 define <1 x i64> @vqshlu_n64(<1 x i64>* %A) nounwind {
 ;CHECK-LABEL: vqshlu_n64:
 ;CHECK: vqshl.u64{{.*#63}}
-	%tmp1 = load <1 x i64>* %A
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
 	%tmp2 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >)
 	ret <1 x i64> %tmp2
 }
@@ -211,7 +211,7 @@ define <1 x i64> @vqshlu_n64(<1 x i64>* %A) nounwind {
 define <8 x i8> @vqshlsu_n8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vqshlsu_n8:
 ;CHECK: vqshlu.s8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftsu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
 	ret <8 x i8> %tmp2
 }
@@ -219,7 +219,7 @@ define <8 x i8> @vqshlsu_n8(<8 x i8>* %A) nounwind {
 define <4 x i16> @vqshlsu_n16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vqshlsu_n16:
 ;CHECK: vqshlu.s16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftsu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
 	ret <4 x i16> %tmp2
 }
@@ -227,7 +227,7 @@ define <4 x i16> @vqshlsu_n16(<4 x i16>* %A) nounwind {
 define <2 x i32> @vqshlsu_n32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vqshlsu_n32:
 ;CHECK: vqshlu.s32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftsu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
 	ret <2 x i32> %tmp2
 }
@@ -235,7 +235,7 @@ define <2 x i32> @vqshlsu_n32(<2 x i32>* %A) nounwind {
 define <1 x i64> @vqshlsu_n64(<1 x i64>* %A) nounwind {
 ;CHECK-LABEL: vqshlsu_n64:
 ;CHECK: vqshlu.s64
-	%tmp1 = load <1 x i64>* %A
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
 	%tmp2 = call <1 x i64> @llvm.arm.neon.vqshiftsu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >)
 	ret <1 x i64> %tmp2
 }
@@ -243,7 +243,7 @@ define <1 x i64> @vqshlsu_n64(<1 x i64>* %A) nounwind {
 define <16 x i8> @vqshlQs_n8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: vqshlQs_n8:
 ;CHECK: vqshl.s8{{.*#7}}
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
 	ret <16 x i8> %tmp2
 }
@@ -251,7 +251,7 @@ define <16 x i8> @vqshlQs_n8(<16 x i8>* %A) nounwind {
 define <8 x i16> @vqshlQs_n16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vqshlQs_n16:
 ;CHECK: vqshl.s16{{.*#15}}
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
 	ret <8 x i16> %tmp2
 }
@@ -259,7 +259,7 @@ define <8 x i16> @vqshlQs_n16(<8 x i16>* %A) nounwind {
 define <4 x i32> @vqshlQs_n32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vqshlQs_n32:
 ;CHECK: vqshl.s32{{.*#31}}
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
 	ret <4 x i32> %tmp2
 }
@@ -267,7 +267,7 @@ define <4 x i32> @vqshlQs_n32(<4 x i32>* %A) nounwind {
 define <2 x i64> @vqshlQs_n64(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: vqshlQs_n64:
 ;CHECK: vqshl.s64{{.*#63}}
-	%tmp1 = load <2 x i64>* %A
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >)
 	ret <2 x i64> %tmp2
 }
@@ -275,7 +275,7 @@ define <2 x i64> @vqshlQs_n64(<2 x i64>* %A) nounwind {
 define <16 x i8> @vqshlQu_n8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: vqshlQu_n8:
 ;CHECK: vqshl.u8{{.*#7}}
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
 	ret <16 x i8> %tmp2
 }
@@ -283,7 +283,7 @@ define <16 x i8> @vqshlQu_n8(<16 x i8>* %A) nounwind {
 define <8 x i16> @vqshlQu_n16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vqshlQu_n16:
 ;CHECK: vqshl.u16{{.*#15}}
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
 	ret <8 x i16> %tmp2
 }
@@ -291,7 +291,7 @@ define <8 x i16> @vqshlQu_n16(<8 x i16>* %A) nounwind {
 define <4 x i32> @vqshlQu_n32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vqshlQu_n32:
 ;CHECK: vqshl.u32{{.*#31}}
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
 	ret <4 x i32> %tmp2
 }
@@ -299,7 +299,7 @@ define <4 x i32> @vqshlQu_n32(<4 x i32>* %A) nounwind {
 define <2 x i64> @vqshlQu_n64(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: vqshlQu_n64:
 ;CHECK: vqshl.u64{{.*#63}}
-	%tmp1 = load <2 x i64>* %A
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >)
 	ret <2 x i64> %tmp2
 }
@@ -307,7 +307,7 @@ define <2 x i64> @vqshlQu_n64(<2 x i64>* %A) nounwind {
 define <16 x i8> @vqshlQsu_n8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: vqshlQsu_n8:
 ;CHECK: vqshlu.s8
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vqshiftsu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
 	ret <16 x i8> %tmp2
 }
@@ -315,7 +315,7 @@ define <16 x i8> @vqshlQsu_n8(<16 x i8>* %A) nounwind {
 define <8 x i16> @vqshlQsu_n16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vqshlQsu_n16:
 ;CHECK: vqshlu.s16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vqshiftsu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
 	ret <8 x i16> %tmp2
 }
@@ -323,7 +323,7 @@ define <8 x i16> @vqshlQsu_n16(<8 x i16>* %A) nounwind {
 define <4 x i32> @vqshlQsu_n32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vqshlQsu_n32:
 ;CHECK: vqshlu.s32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vqshiftsu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
 	ret <4 x i32> %tmp2
 }
@@ -331,7 +331,7 @@ define <4 x i32> @vqshlQsu_n32(<4 x i32>* %A) nounwind {
 define <2 x i64> @vqshlQsu_n64(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: vqshlQsu_n64:
 ;CHECK: vqshlu.s64
-	%tmp1 = load <2 x i64>* %A
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vqshiftsu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >)
 	ret <2 x i64> %tmp2
 }
@@ -369,8 +369,8 @@ declare <2 x i64> @llvm.arm.neon.vqshiftsu.v2i64(<2 x i64>, <2 x i64>) nounwind
 define <8 x i8> @vqrshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vqrshls8:
 ;CHECK: vqrshl.s8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -378,8 +378,8 @@ define <8 x i8> @vqrshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vqrshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vqrshls16:
 ;CHECK: vqrshl.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -387,8 +387,8 @@ define <4 x i16> @vqrshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @vqrshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vqrshls32:
 ;CHECK: vqrshl.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -396,8 +396,8 @@ define <2 x i32> @vqrshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <1 x i64> @vqrshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: vqrshls64:
 ;CHECK: vqrshl.s64
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
 	ret <1 x i64> %tmp3
 }
@@ -405,8 +405,8 @@ define <1 x i64> @vqrshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 define <8 x i8> @vqrshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vqrshlu8:
 ;CHECK: vqrshl.u8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -414,8 +414,8 @@ define <8 x i8> @vqrshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vqrshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vqrshlu16:
 ;CHECK: vqrshl.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -423,8 +423,8 @@ define <4 x i16> @vqrshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @vqrshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vqrshlu32:
 ;CHECK: vqrshl.u32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -432,8 +432,8 @@ define <2 x i32> @vqrshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <1 x i64> @vqrshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: vqrshlu64:
 ;CHECK: vqrshl.u64
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
 	ret <1 x i64> %tmp3
 }
@@ -441,8 +441,8 @@ define <1 x i64> @vqrshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 define <16 x i8> @vqrshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vqrshlQs8:
 ;CHECK: vqrshl.s8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -450,8 +450,8 @@ define <16 x i8> @vqrshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 define <8 x i16> @vqrshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vqrshlQs16:
 ;CHECK: vqrshl.s16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -459,8 +459,8 @@ define <8 x i16> @vqrshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @vqrshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vqrshlQs32:
 ;CHECK: vqrshl.s32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -468,8 +468,8 @@ define <4 x i32> @vqrshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 define <2 x i64> @vqrshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: vqrshlQs64:
 ;CHECK: vqrshl.s64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
 	ret <2 x i64> %tmp3
 }
@@ -477,8 +477,8 @@ define <2 x i64> @vqrshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 define <16 x i8> @vqrshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vqrshlQu8:
 ;CHECK: vqrshl.u8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -486,8 +486,8 @@ define <16 x i8> @vqrshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 define <8 x i16> @vqrshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vqrshlQu16:
 ;CHECK: vqrshl.u16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -495,8 +495,8 @@ define <8 x i16> @vqrshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @vqrshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vqrshlQu32:
 ;CHECK: vqrshl.u32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -504,8 +504,8 @@ define <4 x i32> @vqrshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 define <2 x i64> @vqrshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: vqrshlQu64:
 ;CHECK: vqrshl.u64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
 	ret <2 x i64> %tmp3
 }
diff --git a/llvm/test/CodeGen/ARM/vqshrn.ll b/llvm/test/CodeGen/ARM/vqshrn.ll
index f02482c0f77..b4b5e96d457 100644
--- a/llvm/test/CodeGen/ARM/vqshrn.ll
+++ b/llvm/test/CodeGen/ARM/vqshrn.ll
@@ -3,7 +3,7 @@
 define <8 x i8> @vqshrns8(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vqshrns8:
 ;CHECK: vqshrn.s16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftns.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
 	ret <8 x i8> %tmp2
 }
@@ -11,7 +11,7 @@ define <8 x i8> @vqshrns8(<8 x i16>* %A) nounwind {
 define <4 x i16> @vqshrns16(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vqshrns16:
 ;CHECK: vqshrn.s32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftns.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
 	ret <4 x i16> %tmp2
 }
@@ -19,7 +19,7 @@ define <4 x i16> @vqshrns16(<4 x i32>* %A) nounwind {
 define <2 x i32> @vqshrns32(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: vqshrns32:
 ;CHECK: vqshrn.s64
-	%tmp1 = load <2 x i64>* %A
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftns.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
 	ret <2 x i32> %tmp2
 }
@@ -27,7 +27,7 @@ define <2 x i32> @vqshrns32(<2 x i64>* %A) nounwind {
 define <8 x i8> @vqshrnu8(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vqshrnu8:
 ;CHECK: vqshrn.u16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftnu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
 	ret <8 x i8> %tmp2
 }
@@ -35,7 +35,7 @@ define <8 x i8> @vqshrnu8(<8 x i16>* %A) nounwind {
 define <4 x i16> @vqshrnu16(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vqshrnu16:
 ;CHECK: vqshrn.u32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftnu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
 	ret <4 x i16> %tmp2
 }
@@ -43,7 +43,7 @@ define <4 x i16> @vqshrnu16(<4 x i32>* %A) nounwind {
 define <2 x i32> @vqshrnu32(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: vqshrnu32:
 ;CHECK: vqshrn.u64
-	%tmp1 = load <2 x i64>* %A
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftnu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
 	ret <2 x i32> %tmp2
 }
@@ -51,7 +51,7 @@ define <2 x i32> @vqshrnu32(<2 x i64>* %A) nounwind {
 define <8 x i8> @vqshruns8(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vqshruns8:
 ;CHECK: vqshrun.s16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftnsu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
 	ret <8 x i8> %tmp2
 }
@@ -59,7 +59,7 @@ define <8 x i8> @vqshruns8(<8 x i16>* %A) nounwind {
 define <4 x i16> @vqshruns16(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vqshruns16:
 ;CHECK: vqshrun.s32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
 	ret <4 x i16> %tmp2
 }
@@ -67,7 +67,7 @@ define <4 x i16> @vqshruns16(<4 x i32>* %A) nounwind {
 define <2 x i32> @vqshruns32(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: vqshruns32:
 ;CHECK: vqshrun.s64
-	%tmp1 = load <2 x i64>* %A
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftnsu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
 	ret <2 x i32> %tmp2
 }
@@ -87,7 +87,7 @@ declare <2 x i32> @llvm.arm.neon.vqshiftnsu.v2i32(<2 x i64>, <2 x i64>) nounwind
 define <8 x i8> @vqrshrns8(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vqrshrns8:
 ;CHECK: vqrshrn.s16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqrshiftns.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
 	ret <8 x i8> %tmp2
 }
@@ -95,7 +95,7 @@ define <8 x i8> @vqrshrns8(<8 x i16>* %A) nounwind {
 define <4 x i16> @vqrshrns16(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vqrshrns16:
 ;CHECK: vqrshrn.s32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
 	ret <4 x i16> %tmp2
 }
@@ -103,7 +103,7 @@ define <4 x i16> @vqrshrns16(<4 x i32>* %A) nounwind {
 define <2 x i32> @vqrshrns32(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: vqrshrns32:
 ;CHECK: vqrshrn.s64
-	%tmp1 = load <2 x i64>* %A
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqrshiftns.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
 	ret <2 x i32> %tmp2
 }
@@ -111,7 +111,7 @@ define <2 x i32> @vqrshrns32(<2 x i64>* %A) nounwind {
 define <8 x i8> @vqrshrnu8(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vqrshrnu8:
 ;CHECK: vqrshrn.u16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqrshiftnu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
 	ret <8 x i8> %tmp2
 }
@@ -119,7 +119,7 @@ define <8 x i8> @vqrshrnu8(<8 x i16>* %A) nounwind {
 define <4 x i16> @vqrshrnu16(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vqrshrnu16:
 ;CHECK: vqrshrn.u32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqrshiftnu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
 	ret <4 x i16> %tmp2
 }
@@ -127,7 +127,7 @@ define <4 x i16> @vqrshrnu16(<4 x i32>* %A) nounwind {
 define <2 x i32> @vqrshrnu32(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: vqrshrnu32:
 ;CHECK: vqrshrn.u64
-	%tmp1 = load <2 x i64>* %A
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqrshiftnu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
 	ret <2 x i32> %tmp2
 }
@@ -135,7 +135,7 @@ define <2 x i32> @vqrshrnu32(<2 x i64>* %A) nounwind {
 define <8 x i8> @vqrshruns8(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vqrshruns8:
 ;CHECK: vqrshrun.s16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqrshiftnsu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
 	ret <8 x i8> %tmp2
 }
@@ -143,7 +143,7 @@ define <8 x i8> @vqrshruns8(<8 x i16>* %A) nounwind {
 define <4 x i16> @vqrshruns16(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vqrshruns16:
 ;CHECK: vqrshrun.s32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqrshiftnsu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
 	ret <4 x i16> %tmp2
 }
@@ -151,7 +151,7 @@ define <4 x i16> @vqrshruns16(<4 x i32>* %A) nounwind {
 define <2 x i32> @vqrshruns32(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: vqrshruns32:
 ;CHECK: vqrshrun.s64
-	%tmp1 = load <2 x i64>* %A
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqrshiftnsu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
 	ret <2 x i32> %tmp2
 }
diff --git a/llvm/test/CodeGen/ARM/vqsub.ll b/llvm/test/CodeGen/ARM/vqsub.ll
index 4af43801920..40963ce8248 100644
--- a/llvm/test/CodeGen/ARM/vqsub.ll
+++ b/llvm/test/CodeGen/ARM/vqsub.ll
@@ -3,8 +3,8 @@
 define <8 x i8> @vqsubs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vqsubs8:
 ;CHECK: vqsub.s8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vqsubs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -12,8 +12,8 @@ define <8 x i8> @vqsubs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vqsubs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vqsubs16:
 ;CHECK: vqsub.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -21,8 +21,8 @@ define <4 x i16> @vqsubs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @vqsubs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vqsubs32:
 ;CHECK: vqsub.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -30,8 +30,8 @@ define <2 x i32> @vqsubs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <1 x i64> @vqsubs64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: vqsubs64:
 ;CHECK: vqsub.s64
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
 	ret <1 x i64> %tmp3
 }
@@ -39,8 +39,8 @@ define <1 x i64> @vqsubs64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 define <8 x i8> @vqsubu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vqsubu8:
 ;CHECK: vqsub.u8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vqsubu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -48,8 +48,8 @@ define <8 x i8> @vqsubu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vqsubu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vqsubu16:
 ;CHECK: vqsub.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vqsubu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -57,8 +57,8 @@ define <4 x i16> @vqsubu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @vqsubu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vqsubu32:
 ;CHECK: vqsub.u32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vqsubu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -66,8 +66,8 @@ define <2 x i32> @vqsubu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <1 x i64> @vqsubu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: vqsubu64:
 ;CHECK: vqsub.u64
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
 	ret <1 x i64> %tmp3
 }
@@ -75,8 +75,8 @@ define <1 x i64> @vqsubu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 define <16 x i8> @vqsubQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vqsubQs8:
 ;CHECK: vqsub.s8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vqsubs.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -84,8 +84,8 @@ define <16 x i8> @vqsubQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 define <8 x i16> @vqsubQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vqsubQs16:
 ;CHECK: vqsub.s16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -93,8 +93,8 @@ define <8 x i16> @vqsubQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @vqsubQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vqsubQs32:
 ;CHECK: vqsub.s32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -102,8 +102,8 @@ define <4 x i32> @vqsubQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 define <2 x i64> @vqsubQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: vqsubQs64:
 ;CHECK: vqsub.s64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
 	ret <2 x i64> %tmp3
 }
@@ -111,8 +111,8 @@ define <2 x i64> @vqsubQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 define <16 x i8> @vqsubQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vqsubQu8:
 ;CHECK: vqsub.u8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vqsubu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -120,8 +120,8 @@ define <16 x i8> @vqsubQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 define <8 x i16> @vqsubQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vqsubQu16:
 ;CHECK: vqsub.u16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -129,8 +129,8 @@ define <8 x i16> @vqsubQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @vqsubQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vqsubQu32:
 ;CHECK: vqsub.u32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vqsubu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -138,8 +138,8 @@ define <4 x i32> @vqsubQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 define <2 x i64> @vqsubQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: vqsubQu64:
 ;CHECK: vqsub.u64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vqsubu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
 	ret <2 x i64> %tmp3
 }
diff --git a/llvm/test/CodeGen/ARM/vrec.ll b/llvm/test/CodeGen/ARM/vrec.ll
index 91979e5a334..a7ebd79289d 100644
--- a/llvm/test/CodeGen/ARM/vrec.ll
+++ b/llvm/test/CodeGen/ARM/vrec.ll
@@ -3,7 +3,7 @@
 define <2 x i32> @vrecpei32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vrecpei32:
 ;CHECK: vrecpe.u32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32> %tmp1)
 	ret <2 x i32> %tmp2
 }
@@ -11,7 +11,7 @@ define <2 x i32> @vrecpei32(<2 x i32>* %A) nounwind {
 define <4 x i32> @vrecpeQi32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vrecpeQi32:
 ;CHECK: vrecpe.u32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32> %tmp1)
 	ret <4 x i32> %tmp2
 }
@@ -19,7 +19,7 @@ define <4 x i32> @vrecpeQi32(<4 x i32>* %A) nounwind {
 define <2 x float> @vrecpef32(<2 x float>* %A) nounwind {
 ;CHECK-LABEL: vrecpef32:
 ;CHECK: vrecpe.f32
-	%tmp1 = load <2 x float>* %A
+	%tmp1 = load <2 x float>, <2 x float>* %A
 	%tmp2 = call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %tmp1)
 	ret <2 x float> %tmp2
 }
@@ -27,7 +27,7 @@ define <2 x float> @vrecpef32(<2 x float>* %A) nounwind {
 define <4 x float> @vrecpeQf32(<4 x float>* %A) nounwind {
 ;CHECK-LABEL: vrecpeQf32:
 ;CHECK: vrecpe.f32
-	%tmp1 = load <4 x float>* %A
+	%tmp1 = load <4 x float>, <4 x float>* %A
 	%tmp2 = call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %tmp1)
 	ret <4 x float> %tmp2
 }
@@ -41,8 +41,8 @@ declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone
 define <2 x float> @vrecpsf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vrecpsf32:
 ;CHECK: vrecps.f32
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = call <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x float> %tmp3
 }
@@ -50,8 +50,8 @@ define <2 x float> @vrecpsf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 define <4 x float> @vrecpsQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: vrecpsQf32:
 ;CHECK: vrecps.f32
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %A
+	%tmp2 = load <4 x float>, <4 x float>* %B
 	%tmp3 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
 	ret <4 x float> %tmp3
 }
@@ -62,7 +62,7 @@ declare <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float>, <4 x float>) nounwi
 define <2 x i32> @vrsqrtei32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vrsqrtei32:
 ;CHECK: vrsqrte.u32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32> %tmp1)
 	ret <2 x i32> %tmp2
 }
@@ -70,7 +70,7 @@ define <2 x i32> @vrsqrtei32(<2 x i32>* %A) nounwind {
 define <4 x i32> @vrsqrteQi32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vrsqrteQi32:
 ;CHECK: vrsqrte.u32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32> %tmp1)
 	ret <4 x i32> %tmp2
 }
@@ -78,7 +78,7 @@ define <4 x i32> @vrsqrteQi32(<4 x i32>* %A) nounwind {
 define <2 x float> @vrsqrtef32(<2 x float>* %A) nounwind {
 ;CHECK-LABEL: vrsqrtef32:
 ;CHECK: vrsqrte.f32
-	%tmp1 = load <2 x float>* %A
+	%tmp1 = load <2 x float>, <2 x float>* %A
 	%tmp2 = call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %tmp1)
 	ret <2 x float> %tmp2
 }
@@ -86,7 +86,7 @@ define <2 x float> @vrsqrtef32(<2 x float>* %A) nounwind {
 define <4 x float> @vrsqrteQf32(<4 x float>* %A) nounwind {
 ;CHECK-LABEL: vrsqrteQf32:
 ;CHECK: vrsqrte.f32
-	%tmp1 = load <4 x float>* %A
+	%tmp1 = load <4 x float>, <4 x float>* %A
 	%tmp2 = call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %tmp1)
 	ret <4 x float> %tmp2
 }
@@ -100,8 +100,8 @@ declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) nounwind readnone
 define <2 x float> @vrsqrtsf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vrsqrtsf32:
 ;CHECK: vrsqrts.f32
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = call <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x float> %tmp3
 }
@@ -109,8 +109,8 @@ define <2 x float> @vrsqrtsf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 define <4 x float> @vrsqrtsQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: vrsqrtsQf32:
 ;CHECK: vrsqrts.f32
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %A
+	%tmp2 = load <4 x float>, <4 x float>* %B
 	%tmp3 = call <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
 	ret <4 x float> %tmp3
 }
diff --git a/llvm/test/CodeGen/ARM/vrev.ll b/llvm/test/CodeGen/ARM/vrev.ll
index 51d4f99747f..a20d4b6baf2 100644
--- a/llvm/test/CodeGen/ARM/vrev.ll
+++ b/llvm/test/CodeGen/ARM/vrev.ll
@@ -3,7 +3,7 @@
 define <8 x i8> @test_vrev64D8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: test_vrev64D8:
 ;CHECK: vrev64.8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 	ret <8 x i8> %tmp2
 }
@@ -11,7 +11,7 @@ define <8 x i8> @test_vrev64D8(<8 x i8>* %A) nounwind {
 define <4 x i16> @test_vrev64D16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: test_vrev64D16:
 ;CHECK: vrev64.16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 	ret <4 x i16> %tmp2
 }
@@ -19,7 +19,7 @@ define <4 x i16> @test_vrev64D16(<4 x i16>* %A) nounwind {
 define <2 x i32> @test_vrev64D32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: test_vrev64D32:
 ;CHECK: vrev64.32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
 	ret <2 x i32> %tmp2
 }
@@ -27,7 +27,7 @@ define <2 x i32> @test_vrev64D32(<2 x i32>* %A) nounwind {
 define <2 x float> @test_vrev64Df(<2 x float>* %A) nounwind {
 ;CHECK-LABEL: test_vrev64Df:
 ;CHECK: vrev64.32
-	%tmp1 = load <2 x float>* %A
+	%tmp1 = load <2 x float>, <2 x float>* %A
 	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> <i32 1, i32 0>
 	ret <2 x float> %tmp2
 }
@@ -35,7 +35,7 @@ define <2 x float> @test_vrev64Df(<2 x float>* %A) nounwind {
 define <16 x i8> @test_vrev64Q8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: test_vrev64Q8:
 ;CHECK: vrev64.8
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
 	ret <16 x i8> %tmp2
 }
@@ -43,7 +43,7 @@ define <16 x i8> @test_vrev64Q8(<16 x i8>* %A) nounwind {
 define <8 x i16> @test_vrev64Q16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: test_vrev64Q16:
 ;CHECK: vrev64.16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
 	ret <8 x i16> %tmp2
 }
@@ -51,7 +51,7 @@ define <8 x i16> @test_vrev64Q16(<8 x i16>* %A) nounwind {
 define <4 x i32> @test_vrev64Q32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: test_vrev64Q32:
 ;CHECK: vrev64.32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
 	ret <4 x i32> %tmp2
 }
@@ -59,7 +59,7 @@ define <4 x i32> @test_vrev64Q32(<4 x i32>* %A) nounwind {
 define <4 x float> @test_vrev64Qf(<4 x float>* %A) nounwind {
 ;CHECK-LABEL: test_vrev64Qf:
 ;CHECK: vrev64.32
-	%tmp1 = load <4 x float>* %A
+	%tmp1 = load <4 x float>, <4 x float>* %A
 	%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
 	ret <4 x float> %tmp2
 }
@@ -67,7 +67,7 @@ define <4 x float> @test_vrev64Qf(<4 x float>* %A) nounwind {
 define <8 x i8> @test_vrev32D8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: test_vrev32D8:
 ;CHECK: vrev32.8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
 	ret <8 x i8> %tmp2
 }
@@ -75,7 +75,7 @@ define <8 x i8> @test_vrev32D8(<8 x i8>* %A) nounwind {
 define <4 x i16> @test_vrev32D16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: test_vrev32D16:
 ;CHECK: vrev32.16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
 	ret <4 x i16> %tmp2
 }
@@ -83,7 +83,7 @@ define <4 x i16> @test_vrev32D16(<4 x i16>* %A) nounwind {
 define <16 x i8> @test_vrev32Q8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: test_vrev32Q8:
 ;CHECK: vrev32.8
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
 	ret <16 x i8> %tmp2
 }
@@ -91,7 +91,7 @@ define <16 x i8> @test_vrev32Q8(<16 x i8>* %A) nounwind {
 define <8 x i16> @test_vrev32Q16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: test_vrev32Q16:
 ;CHECK: vrev32.16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
 	ret <8 x i16> %tmp2
 }
@@ -99,7 +99,7 @@ define <8 x i16> @test_vrev32Q16(<8 x i16>* %A) nounwind {
 define <8 x i8> @test_vrev16D8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: test_vrev16D8:
 ;CHECK: vrev16.8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
 	ret <8 x i8> %tmp2
 }
@@ -107,7 +107,7 @@ define <8 x i8> @test_vrev16D8(<8 x i8>* %A) nounwind {
 define <16 x i8> @test_vrev16Q8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: test_vrev16Q8:
 ;CHECK: vrev16.8
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
 	ret <16 x i8> %tmp2
 }
@@ -117,7 +117,7 @@ define <16 x i8> @test_vrev16Q8(<16 x i8>* %A) nounwind {
 define <8 x i8> @test_vrev64D8_undef(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: test_vrev64D8_undef:
 ;CHECK: vrev64.8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 undef, i32 undef, i32 4, i32 3, i32 2, i32 1, i32 0>
 	ret <8 x i8> %tmp2
 }
@@ -125,7 +125,7 @@ define <8 x i8> @test_vrev64D8_undef(<8 x i8>* %A) nounwind {
 define <8 x i16> @test_vrev32Q16_undef(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: test_vrev32Q16_undef:
 ;CHECK: vrev32.16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 undef, i32 2, i32 5, i32 4, i32 7, i32 undef>
 	ret <8 x i16> %tmp2
 }
@@ -136,7 +136,7 @@ define void @test_with_vcombine(<4 x float>* %v) nounwind {
 ;CHECK-LABEL: test_with_vcombine:
 ;CHECK-NOT: vext
 ;CHECK: vrev64.32
-  %tmp1 = load <4 x float>* %v, align 16
+  %tmp1 = load <4 x float>, <4 x float>* %v, align 16
   %tmp2 = bitcast <4 x float> %tmp1 to <2 x double>
   %tmp3 = extractelement <2 x double> %tmp2, i32 0
   %tmp4 = bitcast double %tmp3 to <2 x float>
@@ -155,7 +155,7 @@ define void @test_vrev64(<4 x i16>* nocapture %source, <2 x i16>* nocapture %dst
 ; CHECK: vst1.32
 entry:
   %0 = bitcast <4 x i16>* %source to <8 x i16>*
-  %tmp2 = load <8 x i16>* %0, align 4
+  %tmp2 = load <8 x i16>, <8 x i16>* %0, align 4
   %tmp3 = extractelement <8 x i16> %tmp2, i32 6
   %tmp5 = insertelement <2 x i16> undef, i16 %tmp3, i32 0
   %tmp9 = extractelement <8 x i16> %tmp2, i32 5
@@ -171,7 +171,7 @@ define void @float_vrev64(float* nocapture %source, <4 x float>* nocapture %dest
 ; CHECK: vrev64.32
 entry:
   %0 = bitcast float* %source to <4 x float>*
-  %tmp2 = load <4 x float>* %0, align 4
+  %tmp2 = load <4 x float>, <4 x float>* %0, align 4
   %tmp5 = shufflevector <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <4 x float> %tmp2, <4 x i32> <i32 0, i32 7, i32 0, i32 0>
   %arrayidx8 = getelementptr inbounds <4 x float>, <4 x float>* %dest, i32 11
   store <4 x float> %tmp5, <4 x float>* %arrayidx8, align 4
diff --git a/llvm/test/CodeGen/ARM/vselect_imax.ll b/llvm/test/CodeGen/ARM/vselect_imax.ll
index e999034fa47..0eb051036d9 100644
--- a/llvm/test/CodeGen/ARM/vselect_imax.ll
+++ b/llvm/test/CodeGen/ARM/vselect_imax.ll
@@ -18,8 +18,8 @@ define void @vmax_v4i32(<4 x i32>* %m, <4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: func_blend10:
 define void @func_blend10(%T0_10* %loadaddr, %T0_10* %loadaddr2,
                            %T1_10* %blend, %T0_10* %storeaddr) {
-  %v0 = load %T0_10* %loadaddr
-  %v1 = load %T0_10* %loadaddr2
+  %v0 = load %T0_10, %T0_10* %loadaddr
+  %v1 = load %T0_10, %T0_10* %loadaddr2
   %c = icmp slt %T0_10 %v0, %v1
 ; CHECK: vbsl
 ; CHECK: vbsl
@@ -34,8 +34,8 @@ define void @func_blend10(%T0_10* %loadaddr, %T0_10* %loadaddr2,
 ; CHECK-LABEL: func_blend14:
 define void @func_blend14(%T0_14* %loadaddr, %T0_14* %loadaddr2,
                            %T1_14* %blend, %T0_14* %storeaddr) {
-  %v0 = load %T0_14* %loadaddr
-  %v1 = load %T0_14* %loadaddr2
+  %v0 = load %T0_14, %T0_14* %loadaddr
+  %v1 = load %T0_14, %T0_14* %loadaddr2
   %c = icmp slt %T0_14 %v0, %v1
 ; CHECK: vbsl
 ; CHECK: vbsl
@@ -52,8 +52,8 @@ define void @func_blend15(%T0_15* %loadaddr, %T0_15* %loadaddr2,
                            %T1_15* %blend, %T0_15* %storeaddr) {
 ; CHECK: vbsl
 ; CHECK: vbsl
-  %v0 = load %T0_15* %loadaddr
-  %v1 = load %T0_15* %loadaddr2
+  %v0 = load %T0_15, %T0_15* %loadaddr
+  %v1 = load %T0_15, %T0_15* %loadaddr2
   %c = icmp slt %T0_15 %v0, %v1
 ; COST: func_blend15
 ; COST: cost of 82 {{.*}} select
@@ -68,8 +68,8 @@ define void @func_blend18(%T0_18* %loadaddr, %T0_18* %loadaddr2,
                            %T1_18* %blend, %T0_18* %storeaddr) {
 ; CHECK: vbsl
 ; CHECK: vbsl
-  %v0 = load %T0_18* %loadaddr
-  %v1 = load %T0_18* %loadaddr2
+  %v0 = load %T0_18, %T0_18* %loadaddr
+  %v1 = load %T0_18, %T0_18* %loadaddr2
   %c = icmp slt %T0_18 %v0, %v1
 ; COST: func_blend18
 ; COST: cost of 19 {{.*}} select
@@ -86,8 +86,8 @@ define void @func_blend19(%T0_19* %loadaddr, %T0_19* %loadaddr2,
 ; CHECK: vbsl
 ; CHECK: vbsl
 ; CHECK: vbsl
-  %v0 = load %T0_19* %loadaddr
-  %v1 = load %T0_19* %loadaddr2
+  %v0 = load %T0_19, %T0_19* %loadaddr
+  %v1 = load %T0_19, %T0_19* %loadaddr2
   %c = icmp slt %T0_19 %v0, %v1
 ; COST: func_blend19
 ; COST: cost of 50 {{.*}} select
@@ -108,8 +108,8 @@ define void @func_blend20(%T0_20* %loadaddr, %T0_20* %loadaddr2,
 ; CHECK: vbsl
 ; CHECK: vbsl
 ; CHECK: vbsl
-  %v0 = load %T0_20* %loadaddr
-  %v1 = load %T0_20* %loadaddr2
+  %v0 = load %T0_20, %T0_20* %loadaddr
+  %v1 = load %T0_20, %T0_20* %loadaddr2
   %c = icmp slt %T0_20 %v0, %v1
 ; COST: func_blend20
 ; COST: cost of 100 {{.*}} select
diff --git a/llvm/test/CodeGen/ARM/vshift.ll b/llvm/test/CodeGen/ARM/vshift.ll
index 618a137b5b0..31e4cb05dd2 100644
--- a/llvm/test/CodeGen/ARM/vshift.ll
+++ b/llvm/test/CodeGen/ARM/vshift.ll
@@ -3,8 +3,8 @@
 define <8 x i8> @vshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vshls8:
 ;CHECK: vshl.u8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = shl <8 x i8> %tmp1, %tmp2
 	ret <8 x i8> %tmp3
 }
@@ -12,8 +12,8 @@ define <8 x i8> @vshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vshls16:
 ;CHECK: vshl.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = shl <4 x i16> %tmp1, %tmp2
 	ret <4 x i16> %tmp3
 }
@@ -21,8 +21,8 @@ define <4 x i16> @vshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @vshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vshls32:
 ;CHECK: vshl.u32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = shl <2 x i32> %tmp1, %tmp2
 	ret <2 x i32> %tmp3
 }
@@ -30,8 +30,8 @@ define <2 x i32> @vshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <1 x i64> @vshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: vshls64:
 ;CHECK: vshl.u64
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = shl <1 x i64> %tmp1, %tmp2
 	ret <1 x i64> %tmp3
 }
@@ -39,7 +39,7 @@ define <1 x i64> @vshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 define <8 x i8> @vshli8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vshli8:
 ;CHECK: vshl.i8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = shl <8 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
 	ret <8 x i8> %tmp2
 }
@@ -47,7 +47,7 @@ define <8 x i8> @vshli8(<8 x i8>* %A) nounwind {
 define <4 x i16> @vshli16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vshli16:
 ;CHECK: vshl.i16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = shl <4 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15 >
 	ret <4 x i16> %tmp2
 }
@@ -55,7 +55,7 @@ define <4 x i16> @vshli16(<4 x i16>* %A) nounwind {
 define <2 x i32> @vshli32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vshli32:
 ;CHECK: vshl.i32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = shl <2 x i32> %tmp1, < i32 31, i32 31 >
 	ret <2 x i32> %tmp2
 }
@@ -63,7 +63,7 @@ define <2 x i32> @vshli32(<2 x i32>* %A) nounwind {
 define <1 x i64> @vshli64(<1 x i64>* %A) nounwind {
 ;CHECK-LABEL: vshli64:
 ;CHECK: vshl.i64
-	%tmp1 = load <1 x i64>* %A
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
 	%tmp2 = shl <1 x i64> %tmp1, < i64 63 >
 	ret <1 x i64> %tmp2
 }
@@ -71,8 +71,8 @@ define <1 x i64> @vshli64(<1 x i64>* %A) nounwind {
 define <16 x i8> @vshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vshlQs8:
 ;CHECK: vshl.u8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = shl <16 x i8> %tmp1, %tmp2
 	ret <16 x i8> %tmp3
 }
@@ -80,8 +80,8 @@ define <16 x i8> @vshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 define <8 x i16> @vshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vshlQs16:
 ;CHECK: vshl.u16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = shl <8 x i16> %tmp1, %tmp2
 	ret <8 x i16> %tmp3
 }
@@ -89,8 +89,8 @@ define <8 x i16> @vshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @vshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vshlQs32:
 ;CHECK: vshl.u32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = shl <4 x i32> %tmp1, %tmp2
 	ret <4 x i32> %tmp3
 }
@@ -98,8 +98,8 @@ define <4 x i32> @vshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 define <2 x i64> @vshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: vshlQs64:
 ;CHECK: vshl.u64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = shl <2 x i64> %tmp1, %tmp2
 	ret <2 x i64> %tmp3
 }
@@ -107,7 +107,7 @@ define <2 x i64> @vshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 define <16 x i8> @vshlQi8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: vshlQi8:
 ;CHECK: vshl.i8
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = shl <16 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
 	ret <16 x i8> %tmp2
 }
@@ -115,7 +115,7 @@ define <16 x i8> @vshlQi8(<16 x i8>* %A) nounwind {
 define <8 x i16> @vshlQi16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vshlQi16:
 ;CHECK: vshl.i16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = shl <8 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >
 	ret <8 x i16> %tmp2
 }
@@ -123,7 +123,7 @@ define <8 x i16> @vshlQi16(<8 x i16>* %A) nounwind {
 define <4 x i32> @vshlQi32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vshlQi32:
 ;CHECK: vshl.i32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = shl <4 x i32> %tmp1, < i32 31, i32 31, i32 31, i32 31 >
 	ret <4 x i32> %tmp2
 }
@@ -131,7 +131,7 @@ define <4 x i32> @vshlQi32(<4 x i32>* %A) nounwind {
 define <2 x i64> @vshlQi64(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: vshlQi64:
 ;CHECK: vshl.i64
-	%tmp1 = load <2 x i64>* %A
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = shl <2 x i64> %tmp1, < i64 63, i64 63 >
 	ret <2 x i64> %tmp2
 }
@@ -140,8 +140,8 @@ define <8 x i8> @vlshru8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vlshru8:
 ;CHECK: vneg.s8
 ;CHECK: vshl.u8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = lshr <8 x i8> %tmp1, %tmp2
 	ret <8 x i8> %tmp3
 }
@@ -150,8 +150,8 @@ define <4 x i16> @vlshru16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vlshru16:
 ;CHECK: vneg.s16
 ;CHECK: vshl.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = lshr <4 x i16> %tmp1, %tmp2
 	ret <4 x i16> %tmp3
 }
@@ -160,8 +160,8 @@ define <2 x i32> @vlshru32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vlshru32:
 ;CHECK: vneg.s32
 ;CHECK: vshl.u32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = lshr <2 x i32> %tmp1, %tmp2
 	ret <2 x i32> %tmp3
 }
@@ -170,8 +170,8 @@ define <1 x i64> @vlshru64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: vlshru64:
 ;CHECK: vsub.i64
 ;CHECK: vshl.u64
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = lshr <1 x i64> %tmp1, %tmp2
 	ret <1 x i64> %tmp3
 }
@@ -179,7 +179,7 @@ define <1 x i64> @vlshru64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 define <8 x i8> @vlshri8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vlshri8:
 ;CHECK: vshr.u8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = lshr <8 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
 	ret <8 x i8> %tmp2
 }
@@ -187,7 +187,7 @@ define <8 x i8> @vlshri8(<8 x i8>* %A) nounwind {
 define <4 x i16> @vlshri16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vlshri16:
 ;CHECK: vshr.u16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = lshr <4 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15 >
 	ret <4 x i16> %tmp2
 }
@@ -195,7 +195,7 @@ define <4 x i16> @vlshri16(<4 x i16>* %A) nounwind {
 define <2 x i32> @vlshri32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vlshri32:
 ;CHECK: vshr.u32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = lshr <2 x i32> %tmp1, < i32 31, i32 31 >
 	ret <2 x i32> %tmp2
 }
@@ -203,7 +203,7 @@ define <2 x i32> @vlshri32(<2 x i32>* %A) nounwind {
 define <1 x i64> @vlshri64(<1 x i64>* %A) nounwind {
 ;CHECK-LABEL: vlshri64:
 ;CHECK: vshr.u64
-	%tmp1 = load <1 x i64>* %A
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
 	%tmp2 = lshr <1 x i64> %tmp1, < i64 63 >
 	ret <1 x i64> %tmp2
 }
@@ -212,8 +212,8 @@ define <16 x i8> @vlshrQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vlshrQu8:
 ;CHECK: vneg.s8
 ;CHECK: vshl.u8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = lshr <16 x i8> %tmp1, %tmp2
 	ret <16 x i8> %tmp3
 }
@@ -222,8 +222,8 @@ define <8 x i16> @vlshrQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vlshrQu16:
 ;CHECK: vneg.s16
 ;CHECK: vshl.u16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = lshr <8 x i16> %tmp1, %tmp2
 	ret <8 x i16> %tmp3
 }
@@ -232,8 +232,8 @@ define <4 x i32> @vlshrQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vlshrQu32:
 ;CHECK: vneg.s32
 ;CHECK: vshl.u32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = lshr <4 x i32> %tmp1, %tmp2
 	ret <4 x i32> %tmp3
 }
@@ -242,8 +242,8 @@ define <2 x i64> @vlshrQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: vlshrQu64:
 ;CHECK: vsub.i64
 ;CHECK: vshl.u64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = lshr <2 x i64> %tmp1, %tmp2
 	ret <2 x i64> %tmp3
 }
@@ -251,7 +251,7 @@ define <2 x i64> @vlshrQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 define <16 x i8> @vlshrQi8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: vlshrQi8:
 ;CHECK: vshr.u8
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = lshr <16 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
 	ret <16 x i8> %tmp2
 }
@@ -259,7 +259,7 @@ define <16 x i8> @vlshrQi8(<16 x i8>* %A) nounwind {
 define <8 x i16> @vlshrQi16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vlshrQi16:
 ;CHECK: vshr.u16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = lshr <8 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >
 	ret <8 x i16> %tmp2
 }
@@ -267,7 +267,7 @@ define <8 x i16> @vlshrQi16(<8 x i16>* %A) nounwind {
 define <4 x i32> @vlshrQi32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vlshrQi32:
 ;CHECK: vshr.u32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = lshr <4 x i32> %tmp1, < i32 31, i32 31, i32 31, i32 31 >
 	ret <4 x i32> %tmp2
 }
@@ -275,7 +275,7 @@ define <4 x i32> @vlshrQi32(<4 x i32>* %A) nounwind {
 define <2 x i64> @vlshrQi64(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: vlshrQi64:
 ;CHECK: vshr.u64
-	%tmp1 = load <2 x i64>* %A
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = lshr <2 x i64> %tmp1, < i64 63, i64 63 >
 	ret <2 x i64> %tmp2
 }
@@ -291,8 +291,8 @@ define <8 x i8> @vashrs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vashrs8:
 ;CHECK: vneg.s8
 ;CHECK: vshl.s8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = ashr <8 x i8> %tmp1, %tmp2
 	ret <8 x i8> %tmp3
 }
@@ -301,8 +301,8 @@ define <4 x i16> @vashrs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vashrs16:
 ;CHECK: vneg.s16
 ;CHECK: vshl.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = ashr <4 x i16> %tmp1, %tmp2
 	ret <4 x i16> %tmp3
 }
@@ -311,8 +311,8 @@ define <2 x i32> @vashrs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vashrs32:
 ;CHECK: vneg.s32
 ;CHECK: vshl.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = ashr <2 x i32> %tmp1, %tmp2
 	ret <2 x i32> %tmp3
 }
@@ -321,8 +321,8 @@ define <1 x i64> @vashrs64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: vashrs64:
 ;CHECK: vsub.i64
 ;CHECK: vshl.s64
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = ashr <1 x i64> %tmp1, %tmp2
 	ret <1 x i64> %tmp3
 }
@@ -330,7 +330,7 @@ define <1 x i64> @vashrs64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 define <8 x i8> @vashri8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vashri8:
 ;CHECK: vshr.s8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = ashr <8 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
 	ret <8 x i8> %tmp2
 }
@@ -338,7 +338,7 @@ define <8 x i8> @vashri8(<8 x i8>* %A) nounwind {
 define <4 x i16> @vashri16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vashri16:
 ;CHECK: vshr.s16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = ashr <4 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15 >
 	ret <4 x i16> %tmp2
 }
@@ -346,7 +346,7 @@ define <4 x i16> @vashri16(<4 x i16>* %A) nounwind {
 define <2 x i32> @vashri32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vashri32:
 ;CHECK: vshr.s32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = ashr <2 x i32> %tmp1, < i32 31, i32 31 >
 	ret <2 x i32> %tmp2
 }
@@ -354,7 +354,7 @@ define <2 x i32> @vashri32(<2 x i32>* %A) nounwind {
 define <1 x i64> @vashri64(<1 x i64>* %A) nounwind {
 ;CHECK-LABEL: vashri64:
 ;CHECK: vshr.s64
-	%tmp1 = load <1 x i64>* %A
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
 	%tmp2 = ashr <1 x i64> %tmp1, < i64 63 >
 	ret <1 x i64> %tmp2
 }
@@ -363,8 +363,8 @@ define <16 x i8> @vashrQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vashrQs8:
 ;CHECK: vneg.s8
 ;CHECK: vshl.s8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = ashr <16 x i8> %tmp1, %tmp2
 	ret <16 x i8> %tmp3
 }
@@ -373,8 +373,8 @@ define <8 x i16> @vashrQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vashrQs16:
 ;CHECK: vneg.s16
 ;CHECK: vshl.s16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = ashr <8 x i16> %tmp1, %tmp2
 	ret <8 x i16> %tmp3
 }
@@ -383,8 +383,8 @@ define <4 x i32> @vashrQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vashrQs32:
 ;CHECK: vneg.s32
 ;CHECK: vshl.s32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = ashr <4 x i32> %tmp1, %tmp2
 	ret <4 x i32> %tmp3
 }
@@ -393,8 +393,8 @@ define <2 x i64> @vashrQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: vashrQs64:
 ;CHECK: vsub.i64
 ;CHECK: vshl.s64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = ashr <2 x i64> %tmp1, %tmp2
 	ret <2 x i64> %tmp3
 }
@@ -402,7 +402,7 @@ define <2 x i64> @vashrQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 define <16 x i8> @vashrQi8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: vashrQi8:
 ;CHECK: vshr.s8
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = ashr <16 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
 	ret <16 x i8> %tmp2
 }
@@ -410,7 +410,7 @@ define <16 x i8> @vashrQi8(<16 x i8>* %A) nounwind {
 define <8 x i16> @vashrQi16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vashrQi16:
 ;CHECK: vshr.s16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = ashr <8 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >
 	ret <8 x i16> %tmp2
 }
@@ -418,7 +418,7 @@ define <8 x i16> @vashrQi16(<8 x i16>* %A) nounwind {
 define <4 x i32> @vashrQi32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vashrQi32:
 ;CHECK: vshr.s32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = ashr <4 x i32> %tmp1, < i32 31, i32 31, i32 31, i32 31 >
 	ret <4 x i32> %tmp2
 }
@@ -426,7 +426,7 @@ define <4 x i32> @vashrQi32(<4 x i32>* %A) nounwind {
 define <2 x i64> @vashrQi64(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: vashrQi64:
 ;CHECK: vshr.s64
-	%tmp1 = load <2 x i64>* %A
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = ashr <2 x i64> %tmp1, < i64 63, i64 63 >
 	ret <2 x i64> %tmp2
 }
diff --git a/llvm/test/CodeGen/ARM/vshiftins.ll b/llvm/test/CodeGen/ARM/vshiftins.ll
index 9526c322201..29487378317 100644
--- a/llvm/test/CodeGen/ARM/vshiftins.ll
+++ b/llvm/test/CodeGen/ARM/vshiftins.ll
@@ -3,8 +3,8 @@
 define <8 x i8> @vsli8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vsli8:
 ;CHECK: vsli.8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
 	ret <8 x i8> %tmp3
 }
@@ -12,8 +12,8 @@ define <8 x i8> @vsli8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vsli16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vsli16:
 ;CHECK: vsli.16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
 	ret <4 x i16> %tmp3
 }
@@ -21,8 +21,8 @@ define <4 x i16> @vsli16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @vsli32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vsli32:
 ;CHECK: vsli.32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> < i32 31, i32 31 >)
 	ret <2 x i32> %tmp3
 }
@@ -30,8 +30,8 @@ define <2 x i32> @vsli32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <1 x i64> @vsli64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: vsli64:
 ;CHECK: vsli.64
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2, <1 x i64> < i64 63 >)
 	ret <1 x i64> %tmp3
 }
@@ -39,8 +39,8 @@ define <1 x i64> @vsli64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 define <16 x i8> @vsliQ8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vsliQ8:
 ;CHECK: vsli.8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
 	ret <16 x i8> %tmp3
 }
@@ -48,8 +48,8 @@ define <16 x i8> @vsliQ8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 define <8 x i16> @vsliQ16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vsliQ16:
 ;CHECK: vsli.16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
 	ret <8 x i16> %tmp3
 }
@@ -57,8 +57,8 @@ define <8 x i16> @vsliQ16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @vsliQ32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vsliQ32:
 ;CHECK: vsli.32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
 	ret <4 x i32> %tmp3
 }
@@ -66,8 +66,8 @@ define <4 x i32> @vsliQ32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 define <2 x i64> @vsliQ64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: vsliQ64:
 ;CHECK: vsli.64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2, <2 x i64> < i64 63, i64 63 >)
 	ret <2 x i64> %tmp3
 }
@@ -75,8 +75,8 @@ define <2 x i64> @vsliQ64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 define <8 x i8> @vsri8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vsri8:
 ;CHECK: vsri.8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
 	ret <8 x i8> %tmp3
 }
@@ -84,8 +84,8 @@ define <8 x i8> @vsri8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vsri16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vsri16:
 ;CHECK: vsri.16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
 	ret <4 x i16> %tmp3
 }
@@ -93,8 +93,8 @@ define <4 x i16> @vsri16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @vsri32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vsri32:
 ;CHECK: vsri.32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> < i32 -32, i32 -32 >)
 	ret <2 x i32> %tmp3
 }
@@ -102,8 +102,8 @@ define <2 x i32> @vsri32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <1 x i64> @vsri64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: vsri64:
 ;CHECK: vsri.64
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2, <1 x i64> < i64 -64 >)
 	ret <1 x i64> %tmp3
 }
@@ -111,8 +111,8 @@ define <1 x i64> @vsri64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 define <16 x i8> @vsriQ8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vsriQ8:
 ;CHECK: vsri.8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
 	ret <16 x i8> %tmp3
 }
@@ -120,8 +120,8 @@ define <16 x i8> @vsriQ8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 define <8 x i16> @vsriQ16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vsriQ16:
 ;CHECK: vsri.16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
 	ret <8 x i16> %tmp3
 }
@@ -129,8 +129,8 @@ define <8 x i16> @vsriQ16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @vsriQ32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vsriQ32:
 ;CHECK: vsri.32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
 	ret <4 x i32> %tmp3
 }
@@ -138,8 +138,8 @@ define <4 x i32> @vsriQ32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 define <2 x i64> @vsriQ64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: vsriQ64:
 ;CHECK: vsri.64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2, <2 x i64> < i64 -64, i64 -64 >)
 	ret <2 x i64> %tmp3
 }
diff --git a/llvm/test/CodeGen/ARM/vshl.ll b/llvm/test/CodeGen/ARM/vshl.ll
index 6228652fc71..ef76e3d9a36 100644
--- a/llvm/test/CodeGen/ARM/vshl.ll
+++ b/llvm/test/CodeGen/ARM/vshl.ll
@@ -3,8 +3,8 @@
 define <8 x i8> @vshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vshls8:
 ;CHECK: vshl.s8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -12,8 +12,8 @@ define <8 x i8> @vshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vshls16:
 ;CHECK: vshl.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -21,8 +21,8 @@ define <4 x i16> @vshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @vshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vshls32:
 ;CHECK: vshl.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -30,8 +30,8 @@ define <2 x i32> @vshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <1 x i64> @vshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: vshls64:
 ;CHECK: vshl.s64
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
 	ret <1 x i64> %tmp3
 }
@@ -39,8 +39,8 @@ define <1 x i64> @vshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 define <8 x i8> @vshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vshlu8:
 ;CHECK: vshl.u8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -48,8 +48,8 @@ define <8 x i8> @vshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vshlu16:
 ;CHECK: vshl.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -57,8 +57,8 @@ define <4 x i16> @vshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @vshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vshlu32:
 ;CHECK: vshl.u32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -66,8 +66,8 @@ define <2 x i32> @vshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <1 x i64> @vshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: vshlu64:
 ;CHECK: vshl.u64
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
 	ret <1 x i64> %tmp3
 }
@@ -75,8 +75,8 @@ define <1 x i64> @vshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 define <16 x i8> @vshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vshlQs8:
 ;CHECK: vshl.s8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -84,8 +84,8 @@ define <16 x i8> @vshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 define <8 x i16> @vshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vshlQs16:
 ;CHECK: vshl.s16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -93,8 +93,8 @@ define <8 x i16> @vshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @vshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vshlQs32:
 ;CHECK: vshl.s32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -102,8 +102,8 @@ define <4 x i32> @vshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 define <2 x i64> @vshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: vshlQs64:
 ;CHECK: vshl.s64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
 	ret <2 x i64> %tmp3
 }
@@ -111,8 +111,8 @@ define <2 x i64> @vshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 define <16 x i8> @vshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vshlQu8:
 ;CHECK: vshl.u8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -120,8 +120,8 @@ define <16 x i8> @vshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 define <8 x i16> @vshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vshlQu16:
 ;CHECK: vshl.u16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -129,8 +129,8 @@ define <8 x i16> @vshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @vshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vshlQu32:
 ;CHECK: vshl.u32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -138,8 +138,8 @@ define <4 x i32> @vshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 define <2 x i64> @vshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: vshlQu64:
 ;CHECK: vshl.u64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
 	ret <2 x i64> %tmp3
 }
@@ -150,7 +150,7 @@ define <2 x i64> @vshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 define <8 x i8> @vshli8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vshli8:
 ;CHECK: vshl.i8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
 	ret <8 x i8> %tmp2
 }
@@ -158,7 +158,7 @@ define <8 x i8> @vshli8(<8 x i8>* %A) nounwind {
 define <4 x i16> @vshli16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vshli16:
 ;CHECK: vshl.i16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
 	ret <4 x i16> %tmp2
 }
@@ -166,7 +166,7 @@ define <4 x i16> @vshli16(<4 x i16>* %A) nounwind {
 define <2 x i32> @vshli32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vshli32:
 ;CHECK: vshl.i32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
 	ret <2 x i32> %tmp2
 }
@@ -174,7 +174,7 @@ define <2 x i32> @vshli32(<2 x i32>* %A) nounwind {
 define <1 x i64> @vshli64(<1 x i64>* %A) nounwind {
 ;CHECK-LABEL: vshli64:
 ;CHECK: vshl.i64
-	%tmp1 = load <1 x i64>* %A
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
 	%tmp2 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >)
 	ret <1 x i64> %tmp2
 }
@@ -182,7 +182,7 @@ define <1 x i64> @vshli64(<1 x i64>* %A) nounwind {
 define <16 x i8> @vshlQi8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: vshlQi8:
 ;CHECK: vshl.i8
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
 	ret <16 x i8> %tmp2
 }
@@ -190,7 +190,7 @@ define <16 x i8> @vshlQi8(<16 x i8>* %A) nounwind {
 define <8 x i16> @vshlQi16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vshlQi16:
 ;CHECK: vshl.i16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
 	ret <8 x i16> %tmp2
 }
@@ -198,7 +198,7 @@ define <8 x i16> @vshlQi16(<8 x i16>* %A) nounwind {
 define <4 x i32> @vshlQi32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vshlQi32:
 ;CHECK: vshl.i32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
 	ret <4 x i32> %tmp2
 }
@@ -206,7 +206,7 @@ define <4 x i32> @vshlQi32(<4 x i32>* %A) nounwind {
 define <2 x i64> @vshlQi64(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: vshlQi64:
 ;CHECK: vshl.i64
-	%tmp1 = load <2 x i64>* %A
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >)
 	ret <2 x i64> %tmp2
 }
@@ -216,7 +216,7 @@ define <2 x i64> @vshlQi64(<2 x i64>* %A) nounwind {
 define <8 x i8> @vshrs8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vshrs8:
 ;CHECK: vshr.s8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
 	ret <8 x i8> %tmp2
 }
@@ -224,7 +224,7 @@ define <8 x i8> @vshrs8(<8 x i8>* %A) nounwind {
 define <4 x i16> @vshrs16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vshrs16:
 ;CHECK: vshr.s16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
 	ret <4 x i16> %tmp2
 }
@@ -232,7 +232,7 @@ define <4 x i16> @vshrs16(<4 x i16>* %A) nounwind {
 define <2 x i32> @vshrs32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vshrs32:
 ;CHECK: vshr.s32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >)
 	ret <2 x i32> %tmp2
 }
@@ -240,7 +240,7 @@ define <2 x i32> @vshrs32(<2 x i32>* %A) nounwind {
 define <1 x i64> @vshrs64(<1 x i64>* %A) nounwind {
 ;CHECK-LABEL: vshrs64:
 ;CHECK: vshr.s64
-	%tmp1 = load <1 x i64>* %A
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
 	%tmp2 = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >)
 	ret <1 x i64> %tmp2
 }
@@ -248,7 +248,7 @@ define <1 x i64> @vshrs64(<1 x i64>* %A) nounwind {
 define <8 x i8> @vshru8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vshru8:
 ;CHECK: vshr.u8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
 	ret <8 x i8> %tmp2
 }
@@ -256,7 +256,7 @@ define <8 x i8> @vshru8(<8 x i8>* %A) nounwind {
 define <4 x i16> @vshru16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vshru16:
 ;CHECK: vshr.u16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
 	ret <4 x i16> %tmp2
 }
@@ -264,7 +264,7 @@ define <4 x i16> @vshru16(<4 x i16>* %A) nounwind {
 define <2 x i32> @vshru32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vshru32:
 ;CHECK: vshr.u32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >)
 	ret <2 x i32> %tmp2
 }
@@ -272,7 +272,7 @@ define <2 x i32> @vshru32(<2 x i32>* %A) nounwind {
 define <1 x i64> @vshru64(<1 x i64>* %A) nounwind {
 ;CHECK-LABEL: vshru64:
 ;CHECK: vshr.u64
-	%tmp1 = load <1 x i64>* %A
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
 	%tmp2 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >)
 	ret <1 x i64> %tmp2
 }
@@ -280,7 +280,7 @@ define <1 x i64> @vshru64(<1 x i64>* %A) nounwind {
 define <16 x i8> @vshrQs8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: vshrQs8:
 ;CHECK: vshr.s8
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
 	ret <16 x i8> %tmp2
 }
@@ -288,7 +288,7 @@ define <16 x i8> @vshrQs8(<16 x i8>* %A) nounwind {
 define <8 x i16> @vshrQs16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vshrQs16:
 ;CHECK: vshr.s16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
 	ret <8 x i16> %tmp2
 }
@@ -296,7 +296,7 @@ define <8 x i16> @vshrQs16(<8 x i16>* %A) nounwind {
 define <4 x i32> @vshrQs32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vshrQs32:
 ;CHECK: vshr.s32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
 	ret <4 x i32> %tmp2
 }
@@ -304,7 +304,7 @@ define <4 x i32> @vshrQs32(<4 x i32>* %A) nounwind {
 define <2 x i64> @vshrQs64(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: vshrQs64:
 ;CHECK: vshr.s64
-	%tmp1 = load <2 x i64>* %A
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >)
 	ret <2 x i64> %tmp2
 }
@@ -312,7 +312,7 @@ define <2 x i64> @vshrQs64(<2 x i64>* %A) nounwind {
 define <16 x i8> @vshrQu8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: vshrQu8:
 ;CHECK: vshr.u8
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
 	ret <16 x i8> %tmp2
 }
@@ -320,7 +320,7 @@ define <16 x i8> @vshrQu8(<16 x i8>* %A) nounwind {
 define <8 x i16> @vshrQu16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vshrQu16:
 ;CHECK: vshr.u16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
 	ret <8 x i16> %tmp2
 }
@@ -328,7 +328,7 @@ define <8 x i16> @vshrQu16(<8 x i16>* %A) nounwind {
 define <4 x i32> @vshrQu32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vshrQu32:
 ;CHECK: vshr.u32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
 	ret <4 x i32> %tmp2
 }
@@ -336,7 +336,7 @@ define <4 x i32> @vshrQu32(<4 x i32>* %A) nounwind {
 define <2 x i64> @vshrQu64(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: vshrQu64:
 ;CHECK: vshr.u64
-	%tmp1 = load <2 x i64>* %A
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >)
 	ret <2 x i64> %tmp2
 }
@@ -364,8 +364,8 @@ declare <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64>, <2 x i64>) nounwind re
 define <8 x i8> @vrshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vrshls8:
 ;CHECK: vrshl.s8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -373,8 +373,8 @@ define <8 x i8> @vrshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vrshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vrshls16:
 ;CHECK: vrshl.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -382,8 +382,8 @@ define <4 x i16> @vrshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @vrshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vrshls32:
 ;CHECK: vrshl.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -391,8 +391,8 @@ define <2 x i32> @vrshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <1 x i64> @vrshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: vrshls64:
 ;CHECK: vrshl.s64
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
 	ret <1 x i64> %tmp3
 }
@@ -400,8 +400,8 @@ define <1 x i64> @vrshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 define <8 x i8> @vrshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vrshlu8:
 ;CHECK: vrshl.u8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -409,8 +409,8 @@ define <8 x i8> @vrshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vrshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vrshlu16:
 ;CHECK: vrshl.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -418,8 +418,8 @@ define <4 x i16> @vrshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @vrshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vrshlu32:
 ;CHECK: vrshl.u32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -427,8 +427,8 @@ define <2 x i32> @vrshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <1 x i64> @vrshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: vrshlu64:
 ;CHECK: vrshl.u64
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
 	ret <1 x i64> %tmp3
 }
@@ -436,8 +436,8 @@ define <1 x i64> @vrshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 define <16 x i8> @vrshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vrshlQs8:
 ;CHECK: vrshl.s8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -445,8 +445,8 @@ define <16 x i8> @vrshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 define <8 x i16> @vrshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vrshlQs16:
 ;CHECK: vrshl.s16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -454,8 +454,8 @@ define <8 x i16> @vrshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @vrshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vrshlQs32:
 ;CHECK: vrshl.s32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -463,8 +463,8 @@ define <4 x i32> @vrshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 define <2 x i64> @vrshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: vrshlQs64:
 ;CHECK: vrshl.s64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
 	ret <2 x i64> %tmp3
 }
@@ -472,8 +472,8 @@ define <2 x i64> @vrshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 define <16 x i8> @vrshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vrshlQu8:
 ;CHECK: vrshl.u8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -481,8 +481,8 @@ define <16 x i8> @vrshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 define <8 x i16> @vrshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vrshlQu16:
 ;CHECK: vrshl.u16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -490,8 +490,8 @@ define <8 x i16> @vrshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @vrshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vrshlQu32:
 ;CHECK: vrshl.u32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -499,8 +499,8 @@ define <4 x i32> @vrshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 define <2 x i64> @vrshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: vrshlQu64:
 ;CHECK: vrshl.u64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
 	ret <2 x i64> %tmp3
 }
@@ -508,7 +508,7 @@ define <2 x i64> @vrshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 define <8 x i8> @vrshrs8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vrshrs8:
 ;CHECK: vrshr.s8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
 	ret <8 x i8> %tmp2
 }
@@ -516,7 +516,7 @@ define <8 x i8> @vrshrs8(<8 x i8>* %A) nounwind {
 define <4 x i16> @vrshrs16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vrshrs16:
 ;CHECK: vrshr.s16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
 	ret <4 x i16> %tmp2
 }
@@ -524,7 +524,7 @@ define <4 x i16> @vrshrs16(<4 x i16>* %A) nounwind {
 define <2 x i32> @vrshrs32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vrshrs32:
 ;CHECK: vrshr.s32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >)
 	ret <2 x i32> %tmp2
 }
@@ -532,7 +532,7 @@ define <2 x i32> @vrshrs32(<2 x i32>* %A) nounwind {
 define <1 x i64> @vrshrs64(<1 x i64>* %A) nounwind {
 ;CHECK-LABEL: vrshrs64:
 ;CHECK: vrshr.s64
-	%tmp1 = load <1 x i64>* %A
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
 	%tmp2 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >)
 	ret <1 x i64> %tmp2
 }
@@ -540,7 +540,7 @@ define <1 x i64> @vrshrs64(<1 x i64>* %A) nounwind {
 define <8 x i8> @vrshru8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vrshru8:
 ;CHECK: vrshr.u8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
 	ret <8 x i8> %tmp2
 }
@@ -548,7 +548,7 @@ define <8 x i8> @vrshru8(<8 x i8>* %A) nounwind {
 define <4 x i16> @vrshru16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vrshru16:
 ;CHECK: vrshr.u16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
 	ret <4 x i16> %tmp2
 }
@@ -556,7 +556,7 @@ define <4 x i16> @vrshru16(<4 x i16>* %A) nounwind {
 define <2 x i32> @vrshru32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vrshru32:
 ;CHECK: vrshr.u32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >)
 	ret <2 x i32> %tmp2
 }
@@ -564,7 +564,7 @@ define <2 x i32> @vrshru32(<2 x i32>* %A) nounwind {
 define <1 x i64> @vrshru64(<1 x i64>* %A) nounwind {
 ;CHECK-LABEL: vrshru64:
 ;CHECK: vrshr.u64
-	%tmp1 = load <1 x i64>* %A
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
 	%tmp2 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >)
 	ret <1 x i64> %tmp2
 }
@@ -572,7 +572,7 @@ define <1 x i64> @vrshru64(<1 x i64>* %A) nounwind {
 define <16 x i8> @vrshrQs8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: vrshrQs8:
 ;CHECK: vrshr.s8
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
 	ret <16 x i8> %tmp2
 }
@@ -580,7 +580,7 @@ define <16 x i8> @vrshrQs8(<16 x i8>* %A) nounwind {
 define <8 x i16> @vrshrQs16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vrshrQs16:
 ;CHECK: vrshr.s16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
 	ret <8 x i16> %tmp2
 }
@@ -588,7 +588,7 @@ define <8 x i16> @vrshrQs16(<8 x i16>* %A) nounwind {
 define <4 x i32> @vrshrQs32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vrshrQs32:
 ;CHECK: vrshr.s32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
 	ret <4 x i32> %tmp2
 }
@@ -596,7 +596,7 @@ define <4 x i32> @vrshrQs32(<4 x i32>* %A) nounwind {
 define <2 x i64> @vrshrQs64(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: vrshrQs64:
 ;CHECK: vrshr.s64
-	%tmp1 = load <2 x i64>* %A
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >)
 	ret <2 x i64> %tmp2
 }
@@ -604,7 +604,7 @@ define <2 x i64> @vrshrQs64(<2 x i64>* %A) nounwind {
 define <16 x i8> @vrshrQu8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: vrshrQu8:
 ;CHECK: vrshr.u8
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
 	ret <16 x i8> %tmp2
 }
@@ -612,7 +612,7 @@ define <16 x i8> @vrshrQu8(<16 x i8>* %A) nounwind {
 define <8 x i16> @vrshrQu16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vrshrQu16:
 ;CHECK: vrshr.u16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
 	ret <8 x i16> %tmp2
 }
@@ -620,7 +620,7 @@ define <8 x i16> @vrshrQu16(<8 x i16>* %A) nounwind {
 define <4 x i32> @vrshrQu32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vrshrQu32:
 ;CHECK: vrshr.u32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
 	ret <4 x i32> %tmp2
 }
@@ -628,7 +628,7 @@ define <4 x i32> @vrshrQu32(<4 x i32>* %A) nounwind {
 define <2 x i64> @vrshrQu64(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: vrshrQu64:
 ;CHECK: vrshr.u64
-	%tmp1 = load <2 x i64>* %A
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >)
 	ret <2 x i64> %tmp2
 }
diff --git a/llvm/test/CodeGen/ARM/vshll.ll b/llvm/test/CodeGen/ARM/vshll.ll
index 27873eb7275..a8230134d91 100644
--- a/llvm/test/CodeGen/ARM/vshll.ll
+++ b/llvm/test/CodeGen/ARM/vshll.ll
@@ -3,7 +3,7 @@
 define <8 x i16> @vshlls8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vshlls8:
 ;CHECK: vshll.s8
-        %tmp1 = load <8 x i8>* %A
+        %tmp1 = load <8 x i8>, <8 x i8>* %A
         %sext = sext <8 x i8> %tmp1 to <8 x i16>
         %shift = shl <8 x i16> %sext, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
         ret <8 x i16> %shift
@@ -12,7 +12,7 @@ define <8 x i16> @vshlls8(<8 x i8>* %A) nounwind {
 define <4 x i32> @vshlls16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vshlls16:
 ;CHECK: vshll.s16
-        %tmp1 = load <4 x i16>* %A
+        %tmp1 = load <4 x i16>, <4 x i16>* %A
         %sext = sext <4 x i16> %tmp1 to <4 x i32>
         %shift = shl <4 x i32> %sext, <i32 15, i32 15, i32 15, i32 15>
         ret <4 x i32> %shift
@@ -21,7 +21,7 @@ define <4 x i32> @vshlls16(<4 x i16>* %A) nounwind {
 define <2 x i64> @vshlls32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vshlls32:
 ;CHECK: vshll.s32
-        %tmp1 = load <2 x i32>* %A
+        %tmp1 = load <2 x i32>, <2 x i32>* %A
         %sext = sext <2 x i32> %tmp1 to <2 x i64>
         %shift = shl <2 x i64> %sext, <i64 31, i64 31>
         ret <2 x i64> %shift
@@ -30,7 +30,7 @@ define <2 x i64> @vshlls32(<2 x i32>* %A) nounwind {
 define <8 x i16> @vshllu8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vshllu8:
 ;CHECK: vshll.u8
-        %tmp1 = load <8 x i8>* %A
+        %tmp1 = load <8 x i8>, <8 x i8>* %A
         %zext = zext <8 x i8> %tmp1 to <8 x i16>
         %shift = shl <8 x i16> %zext, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
         ret <8 x i16> %shift
@@ -39,7 +39,7 @@ define <8 x i16> @vshllu8(<8 x i8>* %A) nounwind {
 define <4 x i32> @vshllu16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vshllu16:
 ;CHECK: vshll.u16
-        %tmp1 = load <4 x i16>* %A
+        %tmp1 = load <4 x i16>, <4 x i16>* %A
         %zext = zext <4 x i16> %tmp1 to <4 x i32>
         %shift = shl <4 x i32> %zext, <i32 15, i32 15, i32 15, i32 15>
         ret <4 x i32> %shift
@@ -48,7 +48,7 @@ define <4 x i32> @vshllu16(<4 x i16>* %A) nounwind {
 define <2 x i64> @vshllu32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vshllu32:
 ;CHECK: vshll.u32
-        %tmp1 = load <2 x i32>* %A
+        %tmp1 = load <2 x i32>, <2 x i32>* %A
         %zext = zext <2 x i32> %tmp1 to <2 x i64>
         %shift = shl <2 x i64> %zext, <i64 31, i64 31>
         ret <2 x i64> %shift
@@ -59,7 +59,7 @@ define <2 x i64> @vshllu32(<2 x i32>* %A) nounwind {
 define <8 x i16> @vshlli8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vshlli8:
 ;CHECK: vshll.i8
-        %tmp1 = load <8 x i8>* %A
+        %tmp1 = load <8 x i8>, <8 x i8>* %A
         %sext = sext <8 x i8> %tmp1 to <8 x i16>
         %shift = shl <8 x i16> %sext, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
         ret <8 x i16> %shift
@@ -68,7 +68,7 @@ define <8 x i16> @vshlli8(<8 x i8>* %A) nounwind {
 define <4 x i32> @vshlli16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vshlli16:
 ;CHECK: vshll.i16
-        %tmp1 = load <4 x i16>* %A
+        %tmp1 = load <4 x i16>, <4 x i16>* %A
         %zext = zext <4 x i16> %tmp1 to <4 x i32>
         %shift = shl <4 x i32> %zext, <i32 16, i32 16, i32 16, i32 16>
         ret <4 x i32> %shift
@@ -77,7 +77,7 @@ define <4 x i32> @vshlli16(<4 x i16>* %A) nounwind {
 define <2 x i64> @vshlli32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vshlli32:
 ;CHECK: vshll.i32
-        %tmp1 = load <2 x i32>* %A
+        %tmp1 = load <2 x i32>, <2 x i32>* %A
         %zext = zext <2 x i32> %tmp1 to <2 x i64>
         %shift = shl <2 x i64> %zext, <i64 32, i64 32>
         ret <2 x i64> %shift
@@ -89,7 +89,7 @@ define <8 x i16> @vshllu8_bad(<8 x i8>* %A) nounwind {
 ; CHECK-LABEL: vshllu8_bad:
 ; CHECK: vmovl.u8
 ; CHECK: vshl.i16
-        %tmp1 = load <8 x i8>* %A
+        %tmp1 = load <8 x i8>, <8 x i8>* %A
         %zext = zext <8 x i8> %tmp1 to <8 x i16>
         %shift = shl <8 x i16> %zext, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
         ret <8 x i16> %shift
@@ -99,7 +99,7 @@ define <4 x i32> @vshlls16_bad(<4 x i16>* %A) nounwind {
 ; CHECK-LABEL: vshlls16_bad:
 ; CHECK: vmovl.s16
 ; CHECK: vshl.i32
-        %tmp1 = load <4 x i16>* %A
+        %tmp1 = load <4 x i16>, <4 x i16>* %A
         %sext = sext <4 x i16> %tmp1 to <4 x i32>
         %shift = shl <4 x i32> %sext, <i32 17, i32 17, i32 17, i32 17>
         ret <4 x i32> %shift
@@ -109,7 +109,7 @@ define <2 x i64> @vshllu32_bad(<2 x i32>* %A) nounwind {
 ; CHECK-LABEL: vshllu32_bad:
 ; CHECK: vmovl.u32
 ; CHECK: vshl.i64
-        %tmp1 = load <2 x i32>* %A
+        %tmp1 = load <2 x i32>, <2 x i32>* %A
         %zext = zext <2 x i32> %tmp1 to <2 x i64>
         %shift = shl <2 x i64> %zext, <i64 33, i64 33>
         ret <2 x i64> %shift
diff --git a/llvm/test/CodeGen/ARM/vshrn.ll b/llvm/test/CodeGen/ARM/vshrn.ll
index 8aa009ab823..e033486562c 100644
--- a/llvm/test/CodeGen/ARM/vshrn.ll
+++ b/llvm/test/CodeGen/ARM/vshrn.ll
@@ -3,7 +3,7 @@
 define <8 x i8> @vshrns8(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vshrns8:
 ;CHECK: vshrn.i16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
         %tmp2 = lshr <8 x i16> %tmp1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
         %tmp3 = trunc <8 x i16> %tmp2 to <8 x i8>
 	ret <8 x i8> %tmp3
@@ -12,7 +12,7 @@ define <8 x i8> @vshrns8(<8 x i16>* %A) nounwind {
 define <4 x i16> @vshrns16(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vshrns16:
 ;CHECK: vshrn.i32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
         %tmp2 = ashr <4 x i32> %tmp1, <i32 16, i32 16, i32 16, i32 16>
         %tmp3 = trunc <4 x i32> %tmp2 to <4 x i16>
 	ret <4 x i16> %tmp3
@@ -21,7 +21,7 @@ define <4 x i16> @vshrns16(<4 x i32>* %A) nounwind {
 define <2 x i32> @vshrns32(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: vshrns32:
 ;CHECK: vshrn.i64
-	%tmp1 = load <2 x i64>* %A
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
         %tmp2 = ashr <2 x i64> %tmp1, <i64 32, i64 32>
         %tmp3 = trunc <2 x i64> %tmp2 to <2 x i32>
 	ret <2 x i32> %tmp3
@@ -31,7 +31,7 @@ define <8 x i8> @vshrns8_bad(<8 x i16>* %A) nounwind {
 ; CHECK-LABEL: vshrns8_bad:
 ; CHECK: vshr.s16
 ; CHECK: vmovn.i16
-        %tmp1 = load <8 x i16>* %A
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
         %tmp2 = ashr <8 x i16> %tmp1, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
         %tmp3 = trunc <8 x i16> %tmp2 to <8 x i8>
         ret <8 x i8> %tmp3
@@ -41,7 +41,7 @@ define <4 x i16> @vshrns16_bad(<4 x i32>* %A) nounwind {
 ; CHECK-LABEL: vshrns16_bad:
 ; CHECK: vshr.u32
 ; CHECK: vmovn.i32
-        %tmp1 = load <4 x i32>* %A
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
         %tmp2 = lshr <4 x i32> %tmp1, <i32 17, i32 17, i32 17, i32 17>
         %tmp3 = trunc <4 x i32> %tmp2 to <4 x i16>
         ret <4 x i16> %tmp3
@@ -51,7 +51,7 @@ define <2 x i32> @vshrns32_bad(<2 x i64>* %A) nounwind {
 ; CHECK-LABEL: vshrns32_bad:
 ; CHECK: vshr.u64
 ; CHECK: vmovn.i64
-        %tmp1 = load <2 x i64>* %A
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
         %tmp2 = lshr <2 x i64> %tmp1, <i64 33, i64 33>
         %tmp3 = trunc <2 x i64> %tmp2 to <2 x i32>
         ret <2 x i32> %tmp3
@@ -60,7 +60,7 @@ define <2 x i32> @vshrns32_bad(<2 x i64>* %A) nounwind {
 define <8 x i8> @vrshrns8(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vrshrns8:
 ;CHECK: vrshrn.i16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vrshiftn.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
 	ret <8 x i8> %tmp2
 }
@@ -68,7 +68,7 @@ define <8 x i8> @vrshrns8(<8 x i16>* %A) nounwind {
 define <4 x i16> @vrshrns16(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vrshrns16:
 ;CHECK: vrshrn.i32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
 	ret <4 x i16> %tmp2
 }
@@ -76,7 +76,7 @@ define <4 x i16> @vrshrns16(<4 x i32>* %A) nounwind {
 define <2 x i32> @vrshrns32(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: vrshrns32:
 ;CHECK: vrshrn.i64
-	%tmp1 = load <2 x i64>* %A
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
 	ret <2 x i32> %tmp2
 }
diff --git a/llvm/test/CodeGen/ARM/vsra.ll b/llvm/test/CodeGen/ARM/vsra.ll
index fa5985a330c..cb758fa2f38 100644
--- a/llvm/test/CodeGen/ARM/vsra.ll
+++ b/llvm/test/CodeGen/ARM/vsra.ll
@@ -3,8 +3,8 @@
 define <8 x i8> @vsras8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vsras8:
 ;CHECK: vsra.s8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = ashr <8 x i8> %tmp2, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
     %tmp4 = add <8 x i8> %tmp1, %tmp3
 	ret <8 x i8> %tmp4
@@ -13,8 +13,8 @@ define <8 x i8> @vsras8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vsras16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vsras16:
 ;CHECK: vsra.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = ashr <4 x i16> %tmp2, < i16 15, i16 15, i16 15, i16 15 >
         %tmp4 = add <4 x i16> %tmp1, %tmp3
 	ret <4 x i16> %tmp4
@@ -23,8 +23,8 @@ define <4 x i16> @vsras16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @vsras32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vsras32:
 ;CHECK: vsra.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = ashr <2 x i32> %tmp2, < i32 31, i32 31 >
         %tmp4 = add <2 x i32> %tmp1, %tmp3
 	ret <2 x i32> %tmp4
@@ -33,8 +33,8 @@ define <2 x i32> @vsras32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <1 x i64> @vsras64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: vsras64:
 ;CHECK: vsra.s64
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = ashr <1 x i64> %tmp2, < i64 63 >
         %tmp4 = add <1 x i64> %tmp1, %tmp3
 	ret <1 x i64> %tmp4
@@ -43,8 +43,8 @@ define <1 x i64> @vsras64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 define <16 x i8> @vsraQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vsraQs8:
 ;CHECK: vsra.s8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = ashr <16 x i8> %tmp2, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
         %tmp4 = add <16 x i8> %tmp1, %tmp3
 	ret <16 x i8> %tmp4
@@ -53,8 +53,8 @@ define <16 x i8> @vsraQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 define <8 x i16> @vsraQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vsraQs16:
 ;CHECK: vsra.s16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = ashr <8 x i16> %tmp2, < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >
         %tmp4 = add <8 x i16> %tmp1, %tmp3
 	ret <8 x i16> %tmp4
@@ -63,8 +63,8 @@ define <8 x i16> @vsraQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @vsraQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vsraQs32:
 ;CHECK: vsra.s32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = ashr <4 x i32> %tmp2, < i32 31, i32 31, i32 31, i32 31 >
         %tmp4 = add <4 x i32> %tmp1, %tmp3
 	ret <4 x i32> %tmp4
@@ -73,8 +73,8 @@ define <4 x i32> @vsraQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 define <2 x i64> @vsraQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: vsraQs64:
 ;CHECK: vsra.s64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = ashr <2 x i64> %tmp2, < i64 63, i64 63 >
         %tmp4 = add <2 x i64> %tmp1, %tmp3
 	ret <2 x i64> %tmp4
@@ -83,8 +83,8 @@ define <2 x i64> @vsraQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 define <8 x i8> @vsrau8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vsrau8:
 ;CHECK: vsra.u8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = lshr <8 x i8> %tmp2, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
         %tmp4 = add <8 x i8> %tmp1, %tmp3
 	ret <8 x i8> %tmp4
@@ -93,8 +93,8 @@ define <8 x i8> @vsrau8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vsrau16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vsrau16:
 ;CHECK: vsra.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = lshr <4 x i16> %tmp2, < i16 15, i16 15, i16 15, i16 15 >
         %tmp4 = add <4 x i16> %tmp1, %tmp3
 	ret <4 x i16> %tmp4
@@ -103,8 +103,8 @@ define <4 x i16> @vsrau16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @vsrau32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vsrau32:
 ;CHECK: vsra.u32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = lshr <2 x i32> %tmp2, < i32 31, i32 31 >
         %tmp4 = add <2 x i32> %tmp1, %tmp3
 	ret <2 x i32> %tmp4
@@ -113,8 +113,8 @@ define <2 x i32> @vsrau32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <1 x i64> @vsrau64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: vsrau64:
 ;CHECK: vsra.u64
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = lshr <1 x i64> %tmp2, < i64 63 >
         %tmp4 = add <1 x i64> %tmp1, %tmp3
 	ret <1 x i64> %tmp4
@@ -123,8 +123,8 @@ define <1 x i64> @vsrau64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 define <16 x i8> @vsraQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vsraQu8:
 ;CHECK: vsra.u8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = lshr <16 x i8> %tmp2, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
         %tmp4 = add <16 x i8> %tmp1, %tmp3
 	ret <16 x i8> %tmp4
@@ -133,8 +133,8 @@ define <16 x i8> @vsraQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 define <8 x i16> @vsraQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vsraQu16:
 ;CHECK: vsra.u16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = lshr <8 x i16> %tmp2, < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >
         %tmp4 = add <8 x i16> %tmp1, %tmp3
 	ret <8 x i16> %tmp4
@@ -143,8 +143,8 @@ define <8 x i16> @vsraQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @vsraQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vsraQu32:
 ;CHECK: vsra.u32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = lshr <4 x i32> %tmp2, < i32 31, i32 31, i32 31, i32 31 >
         %tmp4 = add <4 x i32> %tmp1, %tmp3
 	ret <4 x i32> %tmp4
@@ -153,8 +153,8 @@ define <4 x i32> @vsraQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 define <2 x i64> @vsraQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: vsraQu64:
 ;CHECK: vsra.u64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = lshr <2 x i64> %tmp2, < i64 63, i64 63 >
         %tmp4 = add <2 x i64> %tmp1, %tmp3
 	ret <2 x i64> %tmp4
@@ -163,8 +163,8 @@ define <2 x i64> @vsraQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 define <8 x i8> @vrsras8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vrsras8:
 ;CHECK: vrsra.s8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %tmp2, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
         %tmp4 = add <8 x i8> %tmp1, %tmp3
 	ret <8 x i8> %tmp4
@@ -173,8 +173,8 @@ define <8 x i8> @vrsras8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vrsras16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vrsras16:
 ;CHECK: vrsra.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %tmp2, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
         %tmp4 = add <4 x i16> %tmp1, %tmp3
 	ret <4 x i16> %tmp4
@@ -183,8 +183,8 @@ define <4 x i16> @vrsras16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @vrsras32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vrsras32:
 ;CHECK: vrsra.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %tmp2, <2 x i32> < i32 -32, i32 -32 >)
         %tmp4 = add <2 x i32> %tmp1, %tmp3
 	ret <2 x i32> %tmp4
@@ -193,8 +193,8 @@ define <2 x i32> @vrsras32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <1 x i64> @vrsras64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: vrsras64:
 ;CHECK: vrsra.s64
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %tmp2, <1 x i64> < i64 -64 >)
         %tmp4 = add <1 x i64> %tmp1, %tmp3
 	ret <1 x i64> %tmp4
@@ -203,8 +203,8 @@ define <1 x i64> @vrsras64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 define <8 x i8> @vrsrau8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vrsrau8:
 ;CHECK: vrsra.u8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %tmp2, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
         %tmp4 = add <8 x i8> %tmp1, %tmp3
 	ret <8 x i8> %tmp4
@@ -213,8 +213,8 @@ define <8 x i8> @vrsrau8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vrsrau16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vrsrau16:
 ;CHECK: vrsra.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %tmp2, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
         %tmp4 = add <4 x i16> %tmp1, %tmp3
 	ret <4 x i16> %tmp4
@@ -223,8 +223,8 @@ define <4 x i16> @vrsrau16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @vrsrau32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vrsrau32:
 ;CHECK: vrsra.u32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %tmp2, <2 x i32> < i32 -32, i32 -32 >)
         %tmp4 = add <2 x i32> %tmp1, %tmp3
 	ret <2 x i32> %tmp4
@@ -233,8 +233,8 @@ define <2 x i32> @vrsrau32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <1 x i64> @vrsrau64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: vrsrau64:
 ;CHECK: vrsra.u64
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %tmp2, <1 x i64> < i64 -64 >)
         %tmp4 = add <1 x i64> %tmp1, %tmp3
 	ret <1 x i64> %tmp4
@@ -243,8 +243,8 @@ define <1 x i64> @vrsrau64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 define <16 x i8> @vrsraQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vrsraQs8:
 ;CHECK: vrsra.s8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %tmp2, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
         %tmp4 = add <16 x i8> %tmp1, %tmp3
 	ret <16 x i8> %tmp4
@@ -253,8 +253,8 @@ define <16 x i8> @vrsraQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 define <8 x i16> @vrsraQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vrsraQs16:
 ;CHECK: vrsra.s16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %tmp2, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
         %tmp4 = add <8 x i16> %tmp1, %tmp3
 	ret <8 x i16> %tmp4
@@ -263,8 +263,8 @@ define <8 x i16> @vrsraQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @vrsraQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vrsraQs32:
 ;CHECK: vrsra.s32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %tmp2, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
         %tmp4 = add <4 x i32> %tmp1, %tmp3
 	ret <4 x i32> %tmp4
@@ -273,8 +273,8 @@ define <4 x i32> @vrsraQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 define <2 x i64> @vrsraQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: vrsraQs64:
 ;CHECK: vrsra.s64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %tmp2, <2 x i64> < i64 -64, i64 -64 >)
         %tmp4 = add <2 x i64> %tmp1, %tmp3
 	ret <2 x i64> %tmp4
@@ -283,8 +283,8 @@ define <2 x i64> @vrsraQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 define <16 x i8> @vrsraQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vrsraQu8:
 ;CHECK: vrsra.u8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %tmp2, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
         %tmp4 = add <16 x i8> %tmp1, %tmp3
 	ret <16 x i8> %tmp4
@@ -293,8 +293,8 @@ define <16 x i8> @vrsraQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 define <8 x i16> @vrsraQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vrsraQu16:
 ;CHECK: vrsra.u16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %tmp2, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
         %tmp4 = add <8 x i16> %tmp1, %tmp3
 	ret <8 x i16> %tmp4
@@ -303,8 +303,8 @@ define <8 x i16> @vrsraQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @vrsraQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vrsraQu32:
 ;CHECK: vrsra.u32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %tmp2, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
         %tmp4 = add <4 x i32> %tmp1, %tmp3
 	ret <4 x i32> %tmp4
@@ -313,8 +313,8 @@ define <4 x i32> @vrsraQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 define <2 x i64> @vrsraQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: vrsraQu64:
 ;CHECK: vrsra.u64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %tmp2, <2 x i64> < i64 -64, i64 -64 >)
         %tmp4 = add <2 x i64> %tmp1, %tmp3
 	ret <2 x i64> %tmp4
diff --git a/llvm/test/CodeGen/ARM/vst1.ll b/llvm/test/CodeGen/ARM/vst1.ll
index 723eb1db071..f605fa4d600 100644
--- a/llvm/test/CodeGen/ARM/vst1.ll
+++ b/llvm/test/CodeGen/ARM/vst1.ll
@@ -4,7 +4,7 @@ define void @vst1i8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vst1i8:
 ;Check the alignment value.  Max for this instruction is 64 bits:
 ;CHECK: vst1.8 {d16}, [r0:64]
-	%tmp1 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %B
 	call void @llvm.arm.neon.vst1.v8i8(i8* %A, <8 x i8> %tmp1, i32 16)
 	ret void
 }
@@ -13,7 +13,7 @@ define void @vst1i16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vst1i16:
 ;CHECK: vst1.16
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %B
 	call void @llvm.arm.neon.vst1.v4i16(i8* %tmp0, <4 x i16> %tmp1, i32 1)
 	ret void
 }
@@ -22,7 +22,7 @@ define void @vst1i32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vst1i32:
 ;CHECK: vst1.32
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %B
 	call void @llvm.arm.neon.vst1.v2i32(i8* %tmp0, <2 x i32> %tmp1, i32 1)
 	ret void
 }
@@ -31,7 +31,7 @@ define void @vst1f(float* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vst1f:
 ;CHECK: vst1.32
 	%tmp0 = bitcast float* %A to i8*
-	%tmp1 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %B
 	call void @llvm.arm.neon.vst1.v2f32(i8* %tmp0, <2 x float> %tmp1, i32 1)
 	ret void
 }
@@ -40,9 +40,9 @@ define void @vst1f(float* %A, <2 x float>* %B) nounwind {
 define void @vst1f_update(float** %ptr, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vst1f_update:
 ;CHECK: vst1.32 {d16}, [r1]!
-	%A = load float** %ptr
+	%A = load float*, float** %ptr
 	%tmp0 = bitcast float* %A to i8*
-	%tmp1 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %B
 	call void @llvm.arm.neon.vst1.v2f32(i8* %tmp0, <2 x float> %tmp1, i32 1)
 	%tmp2 = getelementptr float, float* %A, i32 2
 	store float* %tmp2, float** %ptr
@@ -53,7 +53,7 @@ define void @vst1i64(i64* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: vst1i64:
 ;CHECK: vst1.64
 	%tmp0 = bitcast i64* %A to i8*
-	%tmp1 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %B
 	call void @llvm.arm.neon.vst1.v1i64(i8* %tmp0, <1 x i64> %tmp1, i32 1)
 	ret void
 }
@@ -62,7 +62,7 @@ define void @vst1Qi8(i8* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vst1Qi8:
 ;Check the alignment value.  Max for this instruction is 128 bits:
 ;CHECK: vst1.8 {d16, d17}, [r0:64]
-	%tmp1 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %B
 	call void @llvm.arm.neon.vst1.v16i8(i8* %A, <16 x i8> %tmp1, i32 8)
 	ret void
 }
@@ -72,7 +72,7 @@ define void @vst1Qi16(i16* %A, <8 x i16>* %B) nounwind {
 ;Check the alignment value.  Max for this instruction is 128 bits:
 ;CHECK: vst1.16 {d16, d17}, [r0:128]
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %B
 	call void @llvm.arm.neon.vst1.v8i16(i8* %tmp0, <8 x i16> %tmp1, i32 32)
 	ret void
 }
@@ -81,9 +81,9 @@ define void @vst1Qi16(i16* %A, <8 x i16>* %B) nounwind {
 define void @vst1Qi16_update(i16** %ptr, <8 x i16>* %B, i32 %inc) nounwind {
 ;CHECK-LABEL: vst1Qi16_update:
 ;CHECK: vst1.16 {d16, d17}, [r1:64], r2
-	%A = load i16** %ptr
+	%A = load i16*, i16** %ptr
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %B
 	call void @llvm.arm.neon.vst1.v8i16(i8* %tmp0, <8 x i16> %tmp1, i32 8)
 	%tmp2 = getelementptr i16, i16* %A, i32 %inc
 	store i16* %tmp2, i16** %ptr
@@ -94,7 +94,7 @@ define void @vst1Qi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vst1Qi32:
 ;CHECK: vst1.32
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %B
 	call void @llvm.arm.neon.vst1.v4i32(i8* %tmp0, <4 x i32> %tmp1, i32 1)
 	ret void
 }
@@ -103,7 +103,7 @@ define void @vst1Qf(float* %A, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: vst1Qf:
 ;CHECK: vst1.32
 	%tmp0 = bitcast float* %A to i8*
-	%tmp1 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %B
 	call void @llvm.arm.neon.vst1.v4f32(i8* %tmp0, <4 x float> %tmp1, i32 1)
 	ret void
 }
@@ -112,7 +112,7 @@ define void @vst1Qi64(i64* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: vst1Qi64:
 ;CHECK: vst1.64
 	%tmp0 = bitcast i64* %A to i8*
-	%tmp1 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %B
 	call void @llvm.arm.neon.vst1.v2i64(i8* %tmp0, <2 x i64> %tmp1, i32 1)
 	ret void
 }
@@ -121,7 +121,7 @@ define void @vst1Qf64(double* %A, <2 x double>* %B) nounwind {
 ;CHECK-LABEL: vst1Qf64:
 ;CHECK: vst1.64
 	%tmp0 = bitcast double* %A to i8*
-	%tmp1 = load <2 x double>* %B
+	%tmp1 = load <2 x double>, <2 x double>* %B
 	call void @llvm.arm.neon.vst1.v2f64(i8* %tmp0, <2 x double> %tmp1, i32 1)
 	ret void
 }
diff --git a/llvm/test/CodeGen/ARM/vst2.ll b/llvm/test/CodeGen/ARM/vst2.ll
index 2130e13216f..17c8a4bdad9 100644
--- a/llvm/test/CodeGen/ARM/vst2.ll
+++ b/llvm/test/CodeGen/ARM/vst2.ll
@@ -4,7 +4,7 @@ define void @vst2i8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vst2i8:
 ;Check the alignment value.  Max for this instruction is 128 bits:
 ;CHECK: vst2.8 {d16, d17}, [r0:64]
-	%tmp1 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %B
 	call void @llvm.arm.neon.vst2.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 8)
 	ret void
 }
@@ -13,8 +13,8 @@ define void @vst2i8(i8* %A, <8 x i8>* %B) nounwind {
 define void @vst2i8_update(i8** %ptr, <8 x i8>* %B, i32 %inc) nounwind {
 ;CHECK-LABEL: vst2i8_update:
 ;CHECK: vst2.8 {d16, d17}, [r1], r2
-	%A = load i8** %ptr
-	%tmp1 = load <8 x i8>* %B
+	%A = load i8*, i8** %ptr
+	%tmp1 = load <8 x i8>, <8 x i8>* %B
 	call void @llvm.arm.neon.vst2.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 4)
 	%tmp2 = getelementptr i8, i8* %A, i32 %inc
 	store i8* %tmp2, i8** %ptr
@@ -26,7 +26,7 @@ define void @vst2i16(i16* %A, <4 x i16>* %B) nounwind {
 ;Check the alignment value.  Max for this instruction is 128 bits:
 ;CHECK: vst2.16 {d16, d17}, [r0:128]
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %B
 	call void @llvm.arm.neon.vst2.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 32)
 	ret void
 }
@@ -35,7 +35,7 @@ define void @vst2i32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vst2i32:
 ;CHECK: vst2.32
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %B
 	call void @llvm.arm.neon.vst2.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
 	ret void
 }
@@ -44,7 +44,7 @@ define void @vst2f(float* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vst2f:
 ;CHECK: vst2.32
 	%tmp0 = bitcast float* %A to i8*
-	%tmp1 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %B
 	call void @llvm.arm.neon.vst2.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
 	ret void
 }
@@ -54,7 +54,7 @@ define void @vst2i64(i64* %A, <1 x i64>* %B) nounwind {
 ;Check the alignment value.  Max for this instruction is 128 bits:
 ;CHECK: vst1.64 {d16, d17}, [r0:128]
 	%tmp0 = bitcast i64* %A to i8*
-	%tmp1 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %B
 	call void @llvm.arm.neon.vst2.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 32)
 	ret void
 }
@@ -63,9 +63,9 @@ define void @vst2i64(i64* %A, <1 x i64>* %B) nounwind {
 define void @vst2i64_update(i64** %ptr, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: vst2i64_update:
 ;CHECK: vst1.64 {d16, d17}, [r1:64]!
-	%A = load i64** %ptr
+	%A = load i64*, i64** %ptr
 	%tmp0 = bitcast i64* %A to i8*
-	%tmp1 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %B
 	call void @llvm.arm.neon.vst2.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 8)
 	%tmp2 = getelementptr i64, i64* %A, i32 2
 	store i64* %tmp2, i64** %ptr
@@ -76,7 +76,7 @@ define void @vst2Qi8(i8* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vst2Qi8:
 ;Check the alignment value.  Max for this instruction is 256 bits:
 ;CHECK: vst2.8 {d16, d17, d18, d19}, [r0:64]
-	%tmp1 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %B
 	call void @llvm.arm.neon.vst2.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 8)
 	ret void
 }
@@ -86,7 +86,7 @@ define void @vst2Qi16(i16* %A, <8 x i16>* %B) nounwind {
 ;Check the alignment value.  Max for this instruction is 256 bits:
 ;CHECK: vst2.16 {d16, d17, d18, d19}, [r0:128]
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %B
 	call void @llvm.arm.neon.vst2.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 16)
 	ret void
 }
@@ -96,7 +96,7 @@ define void @vst2Qi32(i32* %A, <4 x i32>* %B) nounwind {
 ;Check the alignment value.  Max for this instruction is 256 bits:
 ;CHECK: vst2.32 {d16, d17, d18, d19}, [r0:256]
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %B
 	call void @llvm.arm.neon.vst2.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 64)
 	ret void
 }
@@ -105,7 +105,7 @@ define void @vst2Qf(float* %A, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: vst2Qf:
 ;CHECK: vst2.32
 	%tmp0 = bitcast float* %A to i8*
-	%tmp1 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %B
 	call void @llvm.arm.neon.vst2.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
 	ret void
 }
@@ -113,7 +113,7 @@ define void @vst2Qf(float* %A, <4 x float>* %B) nounwind {
 define i8* @vst2update(i8* %out, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vst2update:
 ;CHECK: vst2.16 {d16, d17}, [r0]!
-	%tmp1 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %B
 	tail call void @llvm.arm.neon.vst2.v4i16(i8* %out, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 2)
 	%t5 = getelementptr inbounds i8, i8* %out, i32 16
 	ret i8* %t5
@@ -122,7 +122,7 @@ define i8* @vst2update(i8* %out, <4 x i16>* %B) nounwind {
 define i8* @vst2update2(i8 * %out, <4 x float> * %this) nounwind optsize ssp align 2 {
 ;CHECK-LABEL: vst2update2:
 ;CHECK: vst2.32 {d16, d17, d18, d19}, [r0]!
-  %tmp1 = load <4 x float>* %this
+  %tmp1 = load <4 x float>, <4 x float>* %this
   call void @llvm.arm.neon.vst2.v4f32(i8* %out, <4 x float> %tmp1, <4 x float> %tmp1, i32 4) nounwind
   %tmp2 = getelementptr inbounds i8, i8* %out, i32  32
   ret i8* %tmp2
diff --git a/llvm/test/CodeGen/ARM/vst3.ll b/llvm/test/CodeGen/ARM/vst3.ll
index 3a861dd40fe..691ee3bd28f 100644
--- a/llvm/test/CodeGen/ARM/vst3.ll
+++ b/llvm/test/CodeGen/ARM/vst3.ll
@@ -5,7 +5,7 @@ define void @vst3i8(i8* %A, <8 x i8>* %B) nounwind {
 ;Check the alignment value.  Max for this instruction is 64 bits:
 ;This test runs at -O0 so do not check for specific register numbers.
 ;CHECK: vst3.8 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}:64]
-	%tmp1 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %B
 	call void @llvm.arm.neon.vst3.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 32)
 	ret void
 }
@@ -14,7 +14,7 @@ define void @vst3i16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vst3i16:
 ;CHECK: vst3.16
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %B
 	call void @llvm.arm.neon.vst3.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
 	ret void
 }
@@ -23,7 +23,7 @@ define void @vst3i32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vst3i32:
 ;CHECK: vst3.32
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %B
 	call void @llvm.arm.neon.vst3.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
 	ret void
 }
@@ -32,9 +32,9 @@ define void @vst3i32(i32* %A, <2 x i32>* %B) nounwind {
 define void @vst3i32_update(i32** %ptr, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vst3i32_update:
 ;CHECK: vst3.32 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}]!
-	%A = load i32** %ptr
+	%A = load i32*, i32** %ptr
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %B
 	call void @llvm.arm.neon.vst3.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
 	%tmp2 = getelementptr i32, i32* %A, i32 6
 	store i32* %tmp2, i32** %ptr
@@ -45,7 +45,7 @@ define void @vst3f(float* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vst3f:
 ;CHECK: vst3.32
 	%tmp0 = bitcast float* %A to i8*
-	%tmp1 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %B
 	call void @llvm.arm.neon.vst3.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
 	ret void
 }
@@ -56,7 +56,7 @@ define void @vst3i64(i64* %A, <1 x i64>* %B) nounwind {
 ;This test runs at -O0 so do not check for specific register numbers.
 ;CHECK: vst1.64 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}:64]
 	%tmp0 = bitcast i64* %A to i8*
-	%tmp1 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %B
 	call void @llvm.arm.neon.vst3.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 16)
 	ret void
 }
@@ -64,9 +64,9 @@ define void @vst3i64(i64* %A, <1 x i64>* %B) nounwind {
 define void @vst3i64_update(i64** %ptr, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: vst3i64_update
 ;CHECK: vst1.64	{d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}]!
-        %A = load i64** %ptr
+        %A = load i64*, i64** %ptr
         %tmp0 = bitcast i64* %A to i8*
-        %tmp1 = load <1 x i64>* %B
+        %tmp1 = load <1 x i64>, <1 x i64>* %B
         call void @llvm.arm.neon.vst3.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1)
         %tmp2 = getelementptr i64, i64* %A, i32 3
         store i64* %tmp2, i64** %ptr
@@ -79,7 +79,7 @@ define void @vst3Qi8(i8* %A, <16 x i8>* %B) nounwind {
 ;This test runs at -O0 so do not check for specific register numbers.
 ;CHECK: vst3.8 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}:64]!
 ;CHECK: vst3.8 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}:64]
-	%tmp1 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %B
 	call void @llvm.arm.neon.vst3.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 32)
 	ret void
 }
@@ -89,7 +89,7 @@ define void @vst3Qi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vst3.16
 ;CHECK: vst3.16
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %B
 	call void @llvm.arm.neon.vst3.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
 	ret void
 }
@@ -99,9 +99,9 @@ define void @vst3Qi16_update(i16** %ptr, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vst3Qi16_update:
 ;CHECK: vst3.16 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}]!
 ;CHECK: vst3.16 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}]!
-	%A = load i16** %ptr
+	%A = load i16*, i16** %ptr
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %B
 	call void @llvm.arm.neon.vst3.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
 	%tmp2 = getelementptr i16, i16* %A, i32 24
 	store i16* %tmp2, i16** %ptr
@@ -113,7 +113,7 @@ define void @vst3Qi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vst3.32
 ;CHECK: vst3.32
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %B
 	call void @llvm.arm.neon.vst3.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1)
 	ret void
 }
@@ -123,7 +123,7 @@ define void @vst3Qf(float* %A, <4 x float>* %B) nounwind {
 ;CHECK: vst3.32
 ;CHECK: vst3.32
 	%tmp0 = bitcast float* %A to i8*
-	%tmp1 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %B
 	call void @llvm.arm.neon.vst3.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
 	ret void
 }
diff --git a/llvm/test/CodeGen/ARM/vst4.ll b/llvm/test/CodeGen/ARM/vst4.ll
index e865884f759..c343c6c8695 100644
--- a/llvm/test/CodeGen/ARM/vst4.ll
+++ b/llvm/test/CodeGen/ARM/vst4.ll
@@ -4,7 +4,7 @@ define void @vst4i8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vst4i8:
 ;Check the alignment value.  Max for this instruction is 256 bits:
 ;CHECK: vst4.8 {d16, d17, d18, d19}, [r0:64]
-	%tmp1 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %B
 	call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 8)
 	ret void
 }
@@ -13,8 +13,8 @@ define void @vst4i8(i8* %A, <8 x i8>* %B) nounwind {
 define void @vst4i8_update(i8** %ptr, <8 x i8>* %B, i32 %inc) nounwind {
 ;CHECK-LABEL: vst4i8_update:
 ;CHECK: vst4.8 {d16, d17, d18, d19}, [r1:128], r2
-	%A = load i8** %ptr
-	%tmp1 = load <8 x i8>* %B
+	%A = load i8*, i8** %ptr
+	%tmp1 = load <8 x i8>, <8 x i8>* %B
 	call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 16)
 	%tmp2 = getelementptr i8, i8* %A, i32 %inc
 	store i8* %tmp2, i8** %ptr
@@ -26,7 +26,7 @@ define void @vst4i16(i16* %A, <4 x i16>* %B) nounwind {
 ;Check the alignment value.  Max for this instruction is 256 bits:
 ;CHECK: vst4.16 {d16, d17, d18, d19}, [r0:128]
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %B
 	call void @llvm.arm.neon.vst4.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 16)
 	ret void
 }
@@ -36,7 +36,7 @@ define void @vst4i32(i32* %A, <2 x i32>* %B) nounwind {
 ;Check the alignment value.  Max for this instruction is 256 bits:
 ;CHECK: vst4.32 {d16, d17, d18, d19}, [r0:256]
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %B
 	call void @llvm.arm.neon.vst4.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 32)
 	ret void
 }
@@ -45,7 +45,7 @@ define void @vst4f(float* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vst4f:
 ;CHECK: vst4.32
 	%tmp0 = bitcast float* %A to i8*
-	%tmp1 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %B
 	call void @llvm.arm.neon.vst4.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
 	ret void
 }
@@ -55,7 +55,7 @@ define void @vst4i64(i64* %A, <1 x i64>* %B) nounwind {
 ;Check the alignment value.  Max for this instruction is 256 bits:
 ;CHECK: vst1.64 {d16, d17, d18, d19}, [r0:256]
 	%tmp0 = bitcast i64* %A to i8*
-	%tmp1 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %B
 	call void @llvm.arm.neon.vst4.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 64)
 	ret void
 }
@@ -63,9 +63,9 @@ define void @vst4i64(i64* %A, <1 x i64>* %B) nounwind {
 define void @vst4i64_update(i64** %ptr, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: vst4i64_update:
 ;CHECK: vst1.64	{d16, d17, d18, d19}, [r1]!
-        %A = load i64** %ptr
+        %A = load i64*, i64** %ptr
         %tmp0 = bitcast i64* %A to i8*
-        %tmp1 = load <1 x i64>* %B
+        %tmp1 = load <1 x i64>, <1 x i64>* %B
         call void @llvm.arm.neon.vst4.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1)
         %tmp2 = getelementptr i64, i64* %A, i32 4
         store i64* %tmp2, i64** %ptr
@@ -77,7 +77,7 @@ define void @vst4Qi8(i8* %A, <16 x i8>* %B) nounwind {
 ;Check the alignment value.  Max for this instruction is 256 bits:
 ;CHECK: vst4.8 {d16, d18, d20, d22}, [r0:256]!
 ;CHECK: vst4.8 {d17, d19, d21, d23}, [r0:256]
-	%tmp1 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %B
 	call void @llvm.arm.neon.vst4.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 64)
 	ret void
 }
@@ -88,7 +88,7 @@ define void @vst4Qi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vst4.16 {d16, d18, d20, d22}, [r0]!
 ;CHECK: vst4.16 {d17, d19, d21, d23}, [r0]
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %B
 	call void @llvm.arm.neon.vst4.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
 	ret void
 }
@@ -98,7 +98,7 @@ define void @vst4Qi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vst4.32
 ;CHECK: vst4.32
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %B
 	call void @llvm.arm.neon.vst4.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1)
 	ret void
 }
@@ -108,7 +108,7 @@ define void @vst4Qf(float* %A, <4 x float>* %B) nounwind {
 ;CHECK: vst4.32
 ;CHECK: vst4.32
 	%tmp0 = bitcast float* %A to i8*
-	%tmp1 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %B
 	call void @llvm.arm.neon.vst4.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
 	ret void
 }
@@ -118,9 +118,9 @@ define void @vst4Qf_update(float** %ptr, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: vst4Qf_update:
 ;CHECK: vst4.32 {d16, d18, d20, d22}, [r1]!
 ;CHECK: vst4.32 {d17, d19, d21, d23}, [r1]!
-	%A = load float** %ptr
+	%A = load float*, float** %ptr
 	%tmp0 = bitcast float* %A to i8*
-	%tmp1 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %B
 	call void @llvm.arm.neon.vst4.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
 	%tmp2 = getelementptr float, float* %A, i32 16
 	store float* %tmp2, float** %ptr
diff --git a/llvm/test/CodeGen/ARM/vstlane.ll b/llvm/test/CodeGen/ARM/vstlane.ll
index af1ba9b07d6..a4575417bce 100644
--- a/llvm/test/CodeGen/ARM/vstlane.ll
+++ b/llvm/test/CodeGen/ARM/vstlane.ll
@@ -4,7 +4,7 @@ define void @vst1lanei8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vst1lanei8:
 ;Check the (default) alignment.
 ;CHECK: vst1.8 {d16[3]}, [r0]
-	%tmp1 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %B
         %tmp2 = extractelement <8 x i8> %tmp1, i32 3
         store i8 %tmp2, i8* %A, align 8
 	ret void
@@ -14,8 +14,8 @@ define void @vst1lanei8(i8* %A, <8 x i8>* %B) nounwind {
 define void @vst1lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vst1lanei8_update:
 ;CHECK: vst1.8 {d16[3]}, [{{r[0-9]}}]!
-	%A = load i8** %ptr
-	%tmp1 = load <8 x i8>* %B
+	%A = load i8*, i8** %ptr
+	%tmp1 = load <8 x i8>, <8 x i8>* %B
 	%tmp2 = extractelement <8 x i8> %tmp1, i32 3
 	store i8 %tmp2, i8* %A, align 8
 	%tmp3 = getelementptr i8, i8* %A, i32 1
@@ -27,7 +27,7 @@ define void @vst1lanei16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vst1lanei16:
 ;Check the alignment value.  Max for this instruction is 16 bits:
 ;CHECK: vst1.16 {d16[2]}, [r0:16]
-	%tmp1 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %B
         %tmp2 = extractelement <4 x i16> %tmp1, i32 2
         store i16 %tmp2, i16* %A, align 8
 	ret void
@@ -37,7 +37,7 @@ define void @vst1lanei32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vst1lanei32:
 ;Check the alignment value.  Max for this instruction is 32 bits:
 ;CHECK: vst1.32 {d16[1]}, [r0:32]
-	%tmp1 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %B
         %tmp2 = extractelement <2 x i32> %tmp1, i32 1
         store i32 %tmp2, i32* %A, align 8
 	ret void
@@ -46,7 +46,7 @@ define void @vst1lanei32(i32* %A, <2 x i32>* %B) nounwind {
 define void @vst1lanef(float* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vst1lanef:
 ;CHECK: vst1.32 {d16[1]}, [r0:32]
-	%tmp1 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %B
         %tmp2 = extractelement <2 x float> %tmp1, i32 1
         store float %tmp2, float* %A
 	ret void
@@ -56,7 +56,7 @@ define void @vst1laneQi8(i8* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vst1laneQi8:
 ; // Can use scalar load. No need to use vectors.
 ; // CHE-CK: vst1.8 {d17[1]}, [r0]
-	%tmp1 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %B
         %tmp2 = extractelement <16 x i8> %tmp1, i32 9
         store i8 %tmp2, i8* %A, align 8
 	ret void
@@ -65,7 +65,7 @@ define void @vst1laneQi8(i8* %A, <16 x i8>* %B) nounwind {
 define void @vst1laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vst1laneQi16:
 ;CHECK: vst1.16 {d17[1]}, [r0:16]
-	%tmp1 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %B
         %tmp2 = extractelement <8 x i16> %tmp1, i32 5
         store i16 %tmp2, i16* %A, align 8
 	ret void
@@ -75,7 +75,7 @@ define void @vst1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vst1laneQi32:
 ; // Can use scalar load. No need to use vectors.
 ; // CHE-CK: vst1.32 {d17[1]}, [r0:32]
-	%tmp1 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %B
         %tmp2 = extractelement <4 x i32> %tmp1, i32 3
         store i32 %tmp2, i32* %A, align 8
 	ret void
@@ -86,8 +86,8 @@ define void @vst1laneQi32_update(i32** %ptr, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vst1laneQi32_update:
 ; // Can use scalar load. No need to use vectors.
 ; // CHE-CK: vst1.32 {d17[1]}, [r1:32]!
-	%A = load i32** %ptr
-	%tmp1 = load <4 x i32>* %B
+	%A = load i32*, i32** %ptr
+	%tmp1 = load <4 x i32>, <4 x i32>* %B
 	%tmp2 = extractelement <4 x i32> %tmp1, i32 3
 	store i32 %tmp2, i32* %A, align 8
 	%tmp3 = getelementptr i32, i32* %A, i32 1
@@ -99,7 +99,7 @@ define void @vst1laneQf(float* %A, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: vst1laneQf:
 ; // Can use scalar load. No need to use vectors.
 ; // CHE-CK: vst1.32 {d17[1]}, [r0]
-	%tmp1 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %B
         %tmp2 = extractelement <4 x float> %tmp1, i32 3
         store float %tmp2, float* %A
 	ret void
@@ -109,7 +109,7 @@ define void @vst2lanei8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vst2lanei8:
 ;Check the alignment value.  Max for this instruction is 16 bits:
 ;CHECK: vst2.8 {d16[1], d17[1]}, [r0:16]
-	%tmp1 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %B
 	call void @llvm.arm.neon.vst2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 4)
 	ret void
 }
@@ -119,7 +119,7 @@ define void @vst2lanei16(i16* %A, <4 x i16>* %B) nounwind {
 ;Check the alignment value.  Max for this instruction is 32 bits:
 ;CHECK: vst2.16 {d16[1], d17[1]}, [r0:32]
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %B
 	call void @llvm.arm.neon.vst2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
 	ret void
 }
@@ -128,9 +128,9 @@ define void @vst2lanei16(i16* %A, <4 x i16>* %B) nounwind {
 define void @vst2lanei16_update(i16** %ptr, <4 x i16>* %B, i32 %inc) nounwind {
 ;CHECK-LABEL: vst2lanei16_update:
 ;CHECK: vst2.16 {d16[1], d17[1]}, [r1], r2
-	%A = load i16** %ptr
+	%A = load i16*, i16** %ptr
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %B
 	call void @llvm.arm.neon.vst2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 2)
 	%tmp2 = getelementptr i16, i16* %A, i32 %inc
 	store i16* %tmp2, i16** %ptr
@@ -141,7 +141,7 @@ define void @vst2lanei32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vst2lanei32:
 ;CHECK: vst2.32
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %B
 	call void @llvm.arm.neon.vst2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
 	ret void
 }
@@ -150,7 +150,7 @@ define void @vst2lanef(float* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vst2lanef:
 ;CHECK: vst2.32
 	%tmp0 = bitcast float* %A to i8*
-	%tmp1 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %B
 	call void @llvm.arm.neon.vst2lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
 	ret void
 }
@@ -160,7 +160,7 @@ define void @vst2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;Check the (default) alignment.
 ;CHECK: vst2.16 {d17[1], d19[1]}, [r0]
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %B
 	call void @llvm.arm.neon.vst2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 5, i32 1)
 	ret void
 }
@@ -170,7 +170,7 @@ define void @vst2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 ;Check the alignment value.  Max for this instruction is 64 bits:
 ;CHECK: vst2.32 {d17[0], d19[0]}, [r0:64]
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %B
 	call void @llvm.arm.neon.vst2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16)
 	ret void
 }
@@ -179,7 +179,7 @@ define void @vst2laneQf(float* %A, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: vst2laneQf:
 ;CHECK: vst2.32
 	%tmp0 = bitcast float* %A to i8*
-	%tmp1 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %B
 	call void @llvm.arm.neon.vst2lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 3, i32 1)
 	ret void
 }
@@ -196,7 +196,7 @@ declare void @llvm.arm.neon.vst2lane.v4f32(i8*, <4 x float>, <4 x float>, i32, i
 define void @vst3lanei8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vst3lanei8:
 ;CHECK: vst3.8
-	%tmp1 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %B
 	call void @llvm.arm.neon.vst3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1)
 	ret void
 }
@@ -206,7 +206,7 @@ define void @vst3lanei16(i16* %A, <4 x i16>* %B) nounwind {
 ;Check the (default) alignment value.  VST3 does not support alignment.
 ;CHECK: vst3.16 {d16[1], d17[1], d18[1]}, [r0]
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %B
 	call void @llvm.arm.neon.vst3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
 	ret void
 }
@@ -215,7 +215,7 @@ define void @vst3lanei32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vst3lanei32:
 ;CHECK: vst3.32
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %B
 	call void @llvm.arm.neon.vst3lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
 	ret void
 }
@@ -224,7 +224,7 @@ define void @vst3lanef(float* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vst3lanef:
 ;CHECK: vst3.32
 	%tmp0 = bitcast float* %A to i8*
-	%tmp1 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %B
 	call void @llvm.arm.neon.vst3lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
 	ret void
 }
@@ -234,7 +234,7 @@ define void @vst3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;Check the (default) alignment value.  VST3 does not support alignment.
 ;CHECK: vst3.16 {d17[2], d19[2], d21[2]}, [r0]
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %B
 	call void @llvm.arm.neon.vst3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 6, i32 8)
 	ret void
 }
@@ -243,7 +243,7 @@ define void @vst3laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vst3laneQi32:
 ;CHECK: vst3.32
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %B
 	call void @llvm.arm.neon.vst3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 0, i32 1)
 	ret void
 }
@@ -252,9 +252,9 @@ define void @vst3laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 define void @vst3laneQi32_update(i32** %ptr, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vst3laneQi32_update:
 ;CHECK: vst3.32 {d16[0], d18[0], d20[0]}, [r1]!
-	%A = load i32** %ptr
+	%A = load i32*, i32** %ptr
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %B
 	call void @llvm.arm.neon.vst3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 0, i32 1)
 	%tmp2 = getelementptr i32, i32* %A, i32 3
 	store i32* %tmp2, i32** %ptr
@@ -265,7 +265,7 @@ define void @vst3laneQf(float* %A, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: vst3laneQf:
 ;CHECK: vst3.32
 	%tmp0 = bitcast float* %A to i8*
-	%tmp1 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %B
 	call void @llvm.arm.neon.vst3lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
 	ret void
 }
@@ -284,7 +284,7 @@ define void @vst4lanei8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vst4lanei8:
 ;Check the alignment value.  Max for this instruction is 32 bits:
 ;CHECK: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r0:32]
-	%tmp1 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %B
 	call void @llvm.arm.neon.vst4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
 	ret void
 }
@@ -293,8 +293,8 @@ define void @vst4lanei8(i8* %A, <8 x i8>* %B) nounwind {
 define void @vst4lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vst4lanei8_update:
 ;CHECK: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r1:32]!
-	%A = load i8** %ptr
-	%tmp1 = load <8 x i8>* %B
+	%A = load i8*, i8** %ptr
+	%tmp1 = load <8 x i8>, <8 x i8>* %B
 	call void @llvm.arm.neon.vst4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
 	%tmp2 = getelementptr i8, i8* %A, i32 4
 	store i8* %tmp2, i8** %ptr
@@ -305,7 +305,7 @@ define void @vst4lanei16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vst4lanei16:
 ;CHECK: vst4.16
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %B
 	call void @llvm.arm.neon.vst4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 1)
 	ret void
 }
@@ -315,7 +315,7 @@ define void @vst4lanei32(i32* %A, <2 x i32>* %B) nounwind {
 ;Check the alignment value.  Max for this instruction is 128 bits:
 ;CHECK: vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r0:128]
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %B
 	call void @llvm.arm.neon.vst4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 16)
 	ret void
 }
@@ -324,7 +324,7 @@ define void @vst4lanef(float* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vst4lanef:
 ;CHECK: vst4.32
 	%tmp0 = bitcast float* %A to i8*
-	%tmp1 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %B
 	call void @llvm.arm.neon.vst4lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
 	ret void
 }
@@ -334,7 +334,7 @@ define void @vst4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;Check the alignment value.  Max for this instruction is 64 bits:
 ;CHECK: vst4.16 {d17[3], d19[3], d21[3], d23[3]}, [r0:64]
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %B
 	call void @llvm.arm.neon.vst4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 7, i32 16)
 	ret void
 }
@@ -344,7 +344,7 @@ define void @vst4laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 ;Check the (default) alignment.
 ;CHECK: vst4.32 {d17[0], d19[0], d21[0], d23[0]}, [r0]
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %B
 	call void @llvm.arm.neon.vst4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1)
 	ret void
 }
@@ -353,7 +353,7 @@ define void @vst4laneQf(float* %A, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: vst4laneQf:
 ;CHECK: vst4.32
 	%tmp0 = bitcast float* %A to i8*
-	%tmp1 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %B
 	call void @llvm.arm.neon.vst4lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
 	ret void
 }
diff --git a/llvm/test/CodeGen/ARM/vsub.ll b/llvm/test/CodeGen/ARM/vsub.ll
index d1a094b9275..75fb7d493a5 100644
--- a/llvm/test/CodeGen/ARM/vsub.ll
+++ b/llvm/test/CodeGen/ARM/vsub.ll
@@ -3,8 +3,8 @@
 define <8 x i8> @vsubi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vsubi8:
 ;CHECK: vsub.i8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = sub <8 x i8> %tmp1, %tmp2
 	ret <8 x i8> %tmp3
 }
@@ -12,8 +12,8 @@ define <8 x i8> @vsubi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vsubi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vsubi16:
 ;CHECK: vsub.i16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = sub <4 x i16> %tmp1, %tmp2
 	ret <4 x i16> %tmp3
 }
@@ -21,8 +21,8 @@ define <4 x i16> @vsubi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @vsubi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vsubi32:
 ;CHECK: vsub.i32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = sub <2 x i32> %tmp1, %tmp2
 	ret <2 x i32> %tmp3
 }
@@ -30,8 +30,8 @@ define <2 x i32> @vsubi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <1 x i64> @vsubi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: vsubi64:
 ;CHECK: vsub.i64
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = sub <1 x i64> %tmp1, %tmp2
 	ret <1 x i64> %tmp3
 }
@@ -39,8 +39,8 @@ define <1 x i64> @vsubi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 define <2 x float> @vsubf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vsubf32:
 ;CHECK: vsub.f32
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = fsub <2 x float> %tmp1, %tmp2
 	ret <2 x float> %tmp3
 }
@@ -48,8 +48,8 @@ define <2 x float> @vsubf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 define <16 x i8> @vsubQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vsubQi8:
 ;CHECK: vsub.i8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = sub <16 x i8> %tmp1, %tmp2
 	ret <16 x i8> %tmp3
 }
@@ -57,8 +57,8 @@ define <16 x i8> @vsubQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 define <8 x i16> @vsubQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vsubQi16:
 ;CHECK: vsub.i16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = sub <8 x i16> %tmp1, %tmp2
 	ret <8 x i16> %tmp3
 }
@@ -66,8 +66,8 @@ define <8 x i16> @vsubQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @vsubQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vsubQi32:
 ;CHECK: vsub.i32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = sub <4 x i32> %tmp1, %tmp2
 	ret <4 x i32> %tmp3
 }
@@ -75,8 +75,8 @@ define <4 x i32> @vsubQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 define <2 x i64> @vsubQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: vsubQi64:
 ;CHECK: vsub.i64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = sub <2 x i64> %tmp1, %tmp2
 	ret <2 x i64> %tmp3
 }
@@ -84,8 +84,8 @@ define <2 x i64> @vsubQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 define <4 x float> @vsubQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: vsubQf32:
 ;CHECK: vsub.f32
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %A
+	%tmp2 = load <4 x float>, <4 x float>* %B
 	%tmp3 = fsub <4 x float> %tmp1, %tmp2
 	ret <4 x float> %tmp3
 }
@@ -120,8 +120,8 @@ define <2 x i32> @vsubhni64_natural(<2 x i64> %A, <2 x i64> %B) nounwind {
 define <8 x i8> @vrsubhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vrsubhni16:
 ;CHECK: vrsubhn.i16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -129,8 +129,8 @@ define <8 x i8> @vrsubhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 define <4 x i16> @vrsubhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vrsubhni32:
 ;CHECK: vrsubhn.i32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -138,8 +138,8 @@ define <4 x i16> @vrsubhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 define <2 x i32> @vrsubhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: vrsubhni64:
 ;CHECK: vrsubhn.i64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -151,8 +151,8 @@ declare <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64>, <2 x i64>) nounwind re
 define <8 x i16> @vsubls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vsubls8:
 ;CHECK: vsubl.s8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = sext <8 x i8> %tmp1 to <8 x i16>
 	%tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
 	%tmp5 = sub <8 x i16> %tmp3, %tmp4
@@ -162,8 +162,8 @@ define <8 x i16> @vsubls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i32> @vsubls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vsubls16:
 ;CHECK: vsubl.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = sext <4 x i16> %tmp1 to <4 x i32>
 	%tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
 	%tmp5 = sub <4 x i32> %tmp3, %tmp4
@@ -173,8 +173,8 @@ define <4 x i32> @vsubls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i64> @vsubls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vsubls32:
 ;CHECK: vsubl.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = sext <2 x i32> %tmp1 to <2 x i64>
 	%tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
 	%tmp5 = sub <2 x i64> %tmp3, %tmp4
@@ -184,8 +184,8 @@ define <2 x i64> @vsubls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <8 x i16> @vsublu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vsublu8:
 ;CHECK: vsubl.u8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = zext <8 x i8> %tmp1 to <8 x i16>
 	%tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
 	%tmp5 = sub <8 x i16> %tmp3, %tmp4
@@ -195,8 +195,8 @@ define <8 x i16> @vsublu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <4 x i32> @vsublu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vsublu16:
 ;CHECK: vsubl.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = zext <4 x i16> %tmp1 to <4 x i32>
 	%tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
 	%tmp5 = sub <4 x i32> %tmp3, %tmp4
@@ -206,8 +206,8 @@ define <4 x i32> @vsublu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <2 x i64> @vsublu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vsublu32:
 ;CHECK: vsubl.u32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = zext <2 x i32> %tmp1 to <2 x i64>
 	%tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
 	%tmp5 = sub <2 x i64> %tmp3, %tmp4
@@ -217,8 +217,8 @@ define <2 x i64> @vsublu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <8 x i16> @vsubws8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vsubws8:
 ;CHECK: vsubw.s8
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = sext <8 x i8> %tmp2 to <8 x i16>
 	%tmp4 = sub <8 x i16> %tmp1, %tmp3
 	ret <8 x i16> %tmp4
@@ -227,8 +227,8 @@ define <8 x i16> @vsubws8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
 define <4 x i32> @vsubws16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vsubws16:
 ;CHECK: vsubw.s16
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = sext <4 x i16> %tmp2 to <4 x i32>
 	%tmp4 = sub <4 x i32> %tmp1, %tmp3
 	ret <4 x i32> %tmp4
@@ -237,8 +237,8 @@ define <4 x i32> @vsubws16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
 define <2 x i64> @vsubws32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vsubws32:
 ;CHECK: vsubw.s32
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = sext <2 x i32> %tmp2 to <2 x i64>
 	%tmp4 = sub <2 x i64> %tmp1, %tmp3
 	ret <2 x i64> %tmp4
@@ -247,8 +247,8 @@ define <2 x i64> @vsubws32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
 define <8 x i16> @vsubwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vsubwu8:
 ;CHECK: vsubw.u8
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = zext <8 x i8> %tmp2 to <8 x i16>
 	%tmp4 = sub <8 x i16> %tmp1, %tmp3
 	ret <8 x i16> %tmp4
@@ -257,8 +257,8 @@ define <8 x i16> @vsubwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
 define <4 x i32> @vsubwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vsubwu16:
 ;CHECK: vsubw.u16
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = zext <4 x i16> %tmp2 to <4 x i32>
 	%tmp4 = sub <4 x i32> %tmp1, %tmp3
 	ret <4 x i32> %tmp4
@@ -267,8 +267,8 @@ define <4 x i32> @vsubwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
 define <2 x i64> @vsubwu32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vsubwu32:
 ;CHECK: vsubw.u32
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = zext <2 x i32> %tmp2 to <2 x i64>
 	%tmp4 = sub <2 x i64> %tmp1, %tmp3
 	ret <2 x i64> %tmp4
diff --git a/llvm/test/CodeGen/ARM/vtbl.ll b/llvm/test/CodeGen/ARM/vtbl.ll
index 32258a30da9..e4dd572a41b 100644
--- a/llvm/test/CodeGen/ARM/vtbl.ll
+++ b/llvm/test/CodeGen/ARM/vtbl.ll
@@ -7,8 +7,8 @@
 define <8 x i8> @vtbl1(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vtbl1:
 ;CHECK: vtbl.8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -16,8 +16,8 @@ define <8 x i8> @vtbl1(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <8 x i8> @vtbl2(<8 x i8>* %A, %struct.__neon_int8x8x2_t* %B) nounwind {
 ;CHECK-LABEL: vtbl2:
 ;CHECK: vtbl.8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load %struct.__neon_int8x8x2_t* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load %struct.__neon_int8x8x2_t, %struct.__neon_int8x8x2_t* %B
         %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1
 	%tmp5 = call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4)
@@ -27,8 +27,8 @@ define <8 x i8> @vtbl2(<8 x i8>* %A, %struct.__neon_int8x8x2_t* %B) nounwind {
 define <8 x i8> @vtbl3(<8 x i8>* %A, %struct.__neon_int8x8x3_t* %B) nounwind {
 ;CHECK-LABEL: vtbl3:
 ;CHECK: vtbl.8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load %struct.__neon_int8x8x3_t* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load %struct.__neon_int8x8x3_t, %struct.__neon_int8x8x3_t* %B
         %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1
         %tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2
@@ -39,8 +39,8 @@ define <8 x i8> @vtbl3(<8 x i8>* %A, %struct.__neon_int8x8x3_t* %B) nounwind {
 define <8 x i8> @vtbl4(<8 x i8>* %A, %struct.__neon_int8x8x4_t* %B) nounwind {
 ;CHECK-LABEL: vtbl4:
 ;CHECK: vtbl.8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load %struct.__neon_int8x8x4_t* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load %struct.__neon_int8x8x4_t, %struct.__neon_int8x8x4_t* %B
         %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
         %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
@@ -52,9 +52,9 @@ define <8 x i8> @vtbl4(<8 x i8>* %A, %struct.__neon_int8x8x4_t* %B) nounwind {
 define <8 x i8> @vtbx1(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 ;CHECK-LABEL: vtbx1:
 ;CHECK: vtbx.8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = load <8 x i8>* %C
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
+	%tmp3 = load <8 x i8>, <8 x i8>* %C
 	%tmp4 = call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
 	ret <8 x i8> %tmp4
 }
@@ -62,11 +62,11 @@ define <8 x i8> @vtbx1(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 define <8 x i8> @vtbx2(<8 x i8>* %A, %struct.__neon_int8x8x2_t* %B, <8 x i8>* %C) nounwind {
 ;CHECK-LABEL: vtbx2:
 ;CHECK: vtbx.8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load %struct.__neon_int8x8x2_t* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load %struct.__neon_int8x8x2_t, %struct.__neon_int8x8x2_t* %B
         %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1
-	%tmp5 = load <8 x i8>* %C
+	%tmp5 = load <8 x i8>, <8 x i8>* %C
 	%tmp6 = call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5)
 	ret <8 x i8> %tmp6
 }
@@ -74,12 +74,12 @@ define <8 x i8> @vtbx2(<8 x i8>* %A, %struct.__neon_int8x8x2_t* %B, <8 x i8>* %C
 define <8 x i8> @vtbx3(<8 x i8>* %A, %struct.__neon_int8x8x3_t* %B, <8 x i8>* %C) nounwind {
 ;CHECK-LABEL: vtbx3:
 ;CHECK: vtbx.8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load %struct.__neon_int8x8x3_t* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load %struct.__neon_int8x8x3_t, %struct.__neon_int8x8x3_t* %B
         %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1
         %tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2
-	%tmp6 = load <8 x i8>* %C
+	%tmp6 = load <8 x i8>, <8 x i8>* %C
 	%tmp7 = call <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5, <8 x i8> %tmp6)
 	ret <8 x i8> %tmp7
 }
@@ -87,13 +87,13 @@ define <8 x i8> @vtbx3(<8 x i8>* %A, %struct.__neon_int8x8x3_t* %B, <8 x i8>* %C
 define <8 x i8> @vtbx4(<8 x i8>* %A, %struct.__neon_int8x8x4_t* %B, <8 x i8>* %C) nounwind {
 ;CHECK-LABEL: vtbx4:
 ;CHECK: vtbx.8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load %struct.__neon_int8x8x4_t* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load %struct.__neon_int8x8x4_t, %struct.__neon_int8x8x4_t* %B
         %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
         %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
         %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3
-	%tmp7 = load <8 x i8>* %C
+	%tmp7 = load <8 x i8>, <8 x i8>* %C
 	%tmp8 = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5, <8 x i8> %tmp6, <8 x i8> %tmp7)
 	ret <8 x i8> %tmp8
 }
diff --git a/llvm/test/CodeGen/ARM/vtrn.ll b/llvm/test/CodeGen/ARM/vtrn.ll
index cdae7f8ec37..caa5becac1d 100644
--- a/llvm/test/CodeGen/ARM/vtrn.ll
+++ b/llvm/test/CodeGen/ARM/vtrn.ll
@@ -4,8 +4,8 @@ define <8 x i8> @vtrni8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vtrni8:
 ;CHECK: vtrn.8
 ;CHECK-NEXT: vadd.i8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
 	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
         %tmp5 = add <8 x i8> %tmp3, %tmp4
@@ -16,8 +16,8 @@ define <4 x i16> @vtrni16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vtrni16:
 ;CHECK: vtrn.16
 ;CHECK-NEXT: vadd.i16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
 	%tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
         %tmp5 = add <4 x i16> %tmp3, %tmp4
@@ -28,8 +28,8 @@ define <2 x i32> @vtrni32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vtrni32:
 ;CHECK: vtrn.32
 ;CHECK-NEXT: vadd.i32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> <i32 0, i32 2>
 	%tmp4 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 3>
         %tmp5 = add <2 x i32> %tmp3, %tmp4
@@ -40,8 +40,8 @@ define <2 x float> @vtrnf(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vtrnf:
 ;CHECK: vtrn.32
 ;CHECK-NEXT: vadd.f32
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <2 x i32> <i32 0, i32 2>
 	%tmp4 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <2 x i32> <i32 1, i32 3>
         %tmp5 = fadd <2 x float> %tmp3, %tmp4
@@ -52,8 +52,8 @@ define <16 x i8> @vtrnQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vtrnQi8:
 ;CHECK: vtrn.8
 ;CHECK-NEXT: vadd.i8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
 	%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
         %tmp5 = add <16 x i8> %tmp3, %tmp4
@@ -64,8 +64,8 @@ define <8 x i16> @vtrnQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vtrnQi16:
 ;CHECK: vtrn.16
 ;CHECK-NEXT: vadd.i16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
 	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
         %tmp5 = add <8 x i16> %tmp3, %tmp4
@@ -76,8 +76,8 @@ define <4 x i32> @vtrnQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vtrnQi32:
 ;CHECK: vtrn.32
 ;CHECK-NEXT: vadd.i32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
 	%tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
         %tmp5 = add <4 x i32> %tmp3, %tmp4
@@ -88,8 +88,8 @@ define <4 x float> @vtrnQf(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: vtrnQf:
 ;CHECK: vtrn.32
 ;CHECK-NEXT: vadd.f32
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %A
+	%tmp2 = load <4 x float>, <4 x float>* %B
 	%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
 	%tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
         %tmp5 = fadd <4 x float> %tmp3, %tmp4
@@ -102,8 +102,8 @@ define <8 x i8> @vtrni8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vtrni8_undef:
 ;CHECK: vtrn.8
 ;CHECK-NEXT: vadd.i8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 2, i32 10, i32 undef, i32 12, i32 6, i32 14>
 	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 undef, i32 undef, i32 15>
         %tmp5 = add <8 x i8> %tmp3, %tmp4
@@ -114,8 +114,8 @@ define <8 x i16> @vtrnQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vtrnQi16_undef:
 ;CHECK: vtrn.16
 ;CHECK-NEXT: vadd.i16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 undef, i32 undef, i32 4, i32 12, i32 6, i32 14>
 	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 undef, i32 3, i32 11, i32 5, i32 13, i32 undef, i32 undef>
         %tmp5 = add <8 x i16> %tmp3, %tmp4
diff --git a/llvm/test/CodeGen/ARM/vuzp.ll b/llvm/test/CodeGen/ARM/vuzp.ll
index 832be6c3daf..7a7306a2659 100644
--- a/llvm/test/CodeGen/ARM/vuzp.ll
+++ b/llvm/test/CodeGen/ARM/vuzp.ll
@@ -4,8 +4,8 @@ define <8 x i8> @vuzpi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vuzpi8:
 ;CHECK: vuzp.8
 ;CHECK-NEXT: vadd.i8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
 	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
         %tmp5 = add <8 x i8> %tmp3, %tmp4
@@ -16,8 +16,8 @@ define <4 x i16> @vuzpi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vuzpi16:
 ;CHECK: vuzp.16
 ;CHECK-NEXT: vadd.i16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 	%tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
         %tmp5 = add <4 x i16> %tmp3, %tmp4
@@ -30,8 +30,8 @@ define <16 x i8> @vuzpQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vuzpQi8:
 ;CHECK: vuzp.8
 ;CHECK-NEXT: vadd.i8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
 	%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
         %tmp5 = add <16 x i8> %tmp3, %tmp4
@@ -42,8 +42,8 @@ define <8 x i16> @vuzpQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vuzpQi16:
 ;CHECK: vuzp.16
 ;CHECK-NEXT: vadd.i16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
 	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
         %tmp5 = add <8 x i16> %tmp3, %tmp4
@@ -54,8 +54,8 @@ define <4 x i32> @vuzpQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vuzpQi32:
 ;CHECK: vuzp.32
 ;CHECK-NEXT: vadd.i32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 	%tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
         %tmp5 = add <4 x i32> %tmp3, %tmp4
@@ -66,8 +66,8 @@ define <4 x float> @vuzpQf(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: vuzpQf:
 ;CHECK: vuzp.32
 ;CHECK-NEXT: vadd.f32
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %A
+	%tmp2 = load <4 x float>, <4 x float>* %B
 	%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 	%tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
         %tmp5 = fadd <4 x float> %tmp3, %tmp4
@@ -80,8 +80,8 @@ define <8 x i8> @vuzpi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vuzpi8_undef:
 ;CHECK: vuzp.8
 ;CHECK-NEXT: vadd.i8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 8, i32 10, i32 12, i32 14>
 	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 13, i32 15>
         %tmp5 = add <8 x i8> %tmp3, %tmp4
@@ -92,8 +92,8 @@ define <8 x i16> @vuzpQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vuzpQi16_undef:
 ;CHECK: vuzp.16
 ;CHECK-NEXT: vadd.i16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 14>
 	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 undef, i32 undef, i32 11, i32 13, i32 15>
         %tmp5 = add <8 x i16> %tmp3, %tmp4
diff --git a/llvm/test/CodeGen/ARM/vzip.ll b/llvm/test/CodeGen/ARM/vzip.ll
index f74dc62599c..a1b5b4549ac 100644
--- a/llvm/test/CodeGen/ARM/vzip.ll
+++ b/llvm/test/CodeGen/ARM/vzip.ll
@@ -4,8 +4,8 @@ define <8 x i8> @vzipi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vzipi8:
 ;CHECK: vzip.8
 ;CHECK-NEXT: vadd.i8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
 	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
         %tmp5 = add <8 x i8> %tmp3, %tmp4
@@ -16,8 +16,8 @@ define <4 x i16> @vzipi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vzipi16:
 ;CHECK: vzip.16
 ;CHECK-NEXT: vadd.i16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
 	%tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
         %tmp5 = add <4 x i16> %tmp3, %tmp4
@@ -30,8 +30,8 @@ define <16 x i8> @vzipQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vzipQi8:
 ;CHECK: vzip.8
 ;CHECK-NEXT: vadd.i8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
 	%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
         %tmp5 = add <16 x i8> %tmp3, %tmp4
@@ -42,8 +42,8 @@ define <8 x i16> @vzipQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vzipQi16:
 ;CHECK: vzip.16
 ;CHECK-NEXT: vadd.i16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
 	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
         %tmp5 = add <8 x i16> %tmp3, %tmp4
@@ -54,8 +54,8 @@ define <4 x i32> @vzipQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vzipQi32:
 ;CHECK: vzip.32
 ;CHECK-NEXT: vadd.i32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
 	%tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
         %tmp5 = add <4 x i32> %tmp3, %tmp4
@@ -66,8 +66,8 @@ define <4 x float> @vzipQf(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: vzipQf:
 ;CHECK: vzip.32
 ;CHECK-NEXT: vadd.f32
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %A
+	%tmp2 = load <4 x float>, <4 x float>* %B
 	%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
 	%tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
         %tmp5 = fadd <4 x float> %tmp3, %tmp4
@@ -80,8 +80,8 @@ define <8 x i8> @vzipi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vzipi8_undef:
 ;CHECK: vzip.8
 ;CHECK-NEXT: vadd.i8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 undef, i32 10, i32 3, i32 11>
 	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 undef, i32 undef, i32 15>
         %tmp5 = add <8 x i8> %tmp3, %tmp4
@@ -92,8 +92,8 @@ define <16 x i8> @vzipQi8_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vzipQi8_undef:
 ;CHECK: vzip.8
 ;CHECK-NEXT: vadd.i8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 undef, i32 undef, i32 undef, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
 	%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 undef, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 undef, i32 14, i32 30, i32 undef, i32 31>
         %tmp5 = add <16 x i8> %tmp3, %tmp4
diff --git a/llvm/test/CodeGen/ARM/zextload_demandedbits.ll b/llvm/test/CodeGen/ARM/zextload_demandedbits.ll
index 0b41265c152..ba7393c2dc2 100644
--- a/llvm/test/CodeGen/ARM/zextload_demandedbits.ll
+++ b/llvm/test/CodeGen/ARM/zextload_demandedbits.ll
@@ -15,7 +15,7 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-
 define void @quux(%struct.eggs* %arg) {
 bb:
   %tmp1 = getelementptr inbounds %struct.eggs, %struct.eggs* %arg, i32 0, i32 1
-  %0 = load i16* %tmp1, align 2
+  %0 = load i16, i16* %tmp1, align 2
   %tobool = icmp eq i16 %0, 0
   br i1 %tobool, label %bb16, label %bb3
author	David Blaikie <dblaikie@gmail.com>	2015-02-27 21:17:42 +0000
committer	David Blaikie <dblaikie@gmail.com>	2015-02-27 21:17:42 +0000
commit	a79ac14fa68297f9888bc70a10df5ed9b8864e38 (patch)
tree	8d8217a8928e3ee599bdde405e2e178b3a55b645 /llvm/test/CodeGen/ARM
parent	83687fb9e654c9d0086e7f6b728c26fa0b729e71 (diff)
download	bcm5719-llvm-a79ac14fa68297f9888bc70a10df5ed9b8864e38.tar.gz bcm5719-llvm-a79ac14fa68297f9888bc70a10df5ed9b8864e38.zip