65 files changed, 694 insertions, 694 deletions
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/commute.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/commute.ll
index cee51fd74de..1cff73d9f69 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/commute.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/commute.ll
@@ -9,7 +9,7 @@ define void @test1(%structA* nocapture readonly %J, i32 %xmin, i32 %ymin) {
 ; CHECK: %arrayidx4 = getelementptr inbounds %structA, %structA* %J, i64 0, i32 0, i64 0
 ; CHECK: %arrayidx9 = getelementptr inbounds %structA, %structA* %J, i64 0, i32 0, i64 1
 ; CHECK: %3 = bitcast float* %arrayidx4 to <2 x float>*
-; CHECK: %4 = load <2 x float>* %3, align 4
+; CHECK: %4 = load <2 x float>, <2 x float>* %3, align 4
 ; CHECK: %5 = fsub fast <2 x float> %2, %4
 ; CHECK: %6 = fmul fast <2 x float> %5, %5
 ; CHECK: %7 = extractelement <2 x float> %6, i32 0
@@ -24,10 +24,10 @@ for.body3.lr.ph:
   %conv5 = sitofp i32 %ymin to float
   %conv = sitofp i32 %xmin to float
   %arrayidx4 = getelementptr inbounds %structA, %structA* %J, i64 0, i32 0, i64 0
-  %0 = load float* %arrayidx4, align 4
+  %0 = load float, float* %arrayidx4, align 4
   %sub = fsub fast float %conv, %0
   %arrayidx9 = getelementptr inbounds %structA, %structA* %J, i64 0, i32 0, i64 1
-  %1 = load float* %arrayidx9, align 4
+  %1 = load float, float* %arrayidx9, align 4
   %sub10 = fsub fast float %conv5, %1
   %mul11 = fmul fast float %sub, %sub
   %mul12 = fmul fast float %sub10, %sub10
@@ -44,7 +44,7 @@ define void @test2(%structA* nocapture readonly %J, i32 %xmin, i32 %ymin) {
 ; CHECK: %arrayidx4 = getelementptr inbounds %structA, %structA* %J, i64 0, i32 0, i64 0
 ; CHECK: %arrayidx9 = getelementptr inbounds %structA, %structA* %J, i64 0, i32 0, i64 1
 ; CHECK: %3 = bitcast float* %arrayidx4 to <2 x float>*
-; CHECK: %4 = load <2 x float>* %3, align 4
+; CHECK: %4 = load <2 x float>, <2 x float>* %3, align 4
 ; CHECK: %5 = fsub fast <2 x float> %2, %4
 ; CHECK: %6 = fmul fast <2 x float> %5, %5
 ; CHECK: %7 = extractelement <2 x float> %6, i32 0
@@ -59,10 +59,10 @@ for.body3.lr.ph:
   %conv5 = sitofp i32 %ymin to float
   %conv = sitofp i32 %xmin to float
   %arrayidx4 = getelementptr inbounds %structA, %structA* %J, i64 0, i32 0, i64 0
-  %0 = load float* %arrayidx4, align 4
+  %0 = load float, float* %arrayidx4, align 4
   %sub = fsub fast float %conv, %0
   %arrayidx9 = getelementptr inbounds %structA, %structA* %J, i64 0, i32 0, i64 1
-  %1 = load float* %arrayidx9, align 4
+  %1 = load float, float* %arrayidx9, align 4
   %sub10 = fsub fast float %conv5, %1
   %mul11 = fmul fast float %sub, %sub
   %mul12 = fmul fast float %sub10, %sub10
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/load-store-q.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/load-store-q.ll
index d3afc056d87..6ff1118d3cc 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/load-store-q.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/load-store-q.ll
@@ -15,8 +15,8 @@ target triple = "arm64-apple-ios5.0.0"
 define void @f(double* %p, double* %q) {
   %addr2 = getelementptr double, double* %q, i32 1
   %addr = getelementptr double, double* %p, i32 1
-  %x = load double* %p
-  %y = load double* %addr
+  %x = load double, double* %p
+  %y = load double, double* %addr
   call void @g()
   store double %x, double* %q
   store double %y, double* %addr2
@@ -40,7 +40,7 @@ loop:
   store double %p1, double* %q
   store double %p2, double* %addr2
 
-  %x = load double* %p
-  %y = load double* %addr
+  %x = load double, double* %p
+  %y = load double, double* %addr
   br label %loop
 }
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/sdiv-pow2.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/sdiv-pow2.ll
index dddcf3c94e7..72c70823e69 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/sdiv-pow2.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/sdiv-pow2.ll
@@ -9,31 +9,31 @@ target triple = "aarch64--linux-gnu"
 
 define void @test1(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* noalias nocapture readonly %c) {
 entry:
-  %0 = load i32* %b, align 4
-  %1 = load i32* %c, align 4
+  %0 = load i32, i32* %b, align 4
+  %1 = load i32, i32* %c, align 4
   %add = add nsw i32 %1, %0
   %div = sdiv i32 %add, 2
   store i32 %div, i32* %a, align 4
   %arrayidx3 = getelementptr inbounds i32, i32* %b, i64 1
-  %2 = load i32* %arrayidx3, align 4
+  %2 = load i32, i32* %arrayidx3, align 4
   %arrayidx4 = getelementptr inbounds i32, i32* %c, i64 1
-  %3 = load i32* %arrayidx4, align 4
+  %3 = load i32, i32* %arrayidx4, align 4
   %add5 = add nsw i32 %3, %2
   %div6 = sdiv i32 %add5, 2
   %arrayidx7 = getelementptr inbounds i32, i32* %a, i64 1
   store i32 %div6, i32* %arrayidx7, align 4
   %arrayidx8 = getelementptr inbounds i32, i32* %b, i64 2
-  %4 = load i32* %arrayidx8, align 4
+  %4 = load i32, i32* %arrayidx8, align 4
   %arrayidx9 = getelementptr inbounds i32, i32* %c, i64 2
-  %5 = load i32* %arrayidx9, align 4
+  %5 = load i32, i32* %arrayidx9, align 4
   %add10 = add nsw i32 %5, %4
   %div11 = sdiv i32 %add10, 2
   %arrayidx12 = getelementptr inbounds i32, i32* %a, i64 2
   store i32 %div11, i32* %arrayidx12, align 4
   %arrayidx13 = getelementptr inbounds i32, i32* %b, i64 3
-  %6 = load i32* %arrayidx13, align 4
+  %6 = load i32, i32* %arrayidx13, align 4
   %arrayidx14 = getelementptr inbounds i32, i32* %c, i64 3
-  %7 = load i32* %arrayidx14, align 4
+  %7 = load i32, i32* %arrayidx14, align 4
   %add15 = add nsw i32 %7, %6
   %div16 = sdiv i32 %add15, 2
   %arrayidx17 = getelementptr inbounds i32, i32* %a, i64 3
diff --git a/llvm/test/Transforms/SLPVectorizer/ARM/memory.ll b/llvm/test/Transforms/SLPVectorizer/ARM/memory.ll
index b8e9a038386..57d7cceac6b 100644
--- a/llvm/test/Transforms/SLPVectorizer/ARM/memory.ll
+++ b/llvm/test/Transforms/SLPVectorizer/ARM/memory.ll
@@ -10,10 +10,10 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-
 ; CHECK-NOT: store <2 x double>
 define void @expensive_double_store(double* noalias %dst, double* noalias %src, i64 %count) {
 entry:
-  %0 = load double* %src, align 8
+  %0 = load double, double* %src, align 8
   store double %0, double* %dst, align 8
   %arrayidx2 = getelementptr inbounds double, double* %src, i64 1
-  %1 = load double* %arrayidx2, align 8
+  %1 = load double, double* %arrayidx2, align 8
   %arrayidx3 = getelementptr inbounds double, double* %dst, i64 1
   store double %1, double* %arrayidx3, align 8
   ret void
diff --git a/llvm/test/Transforms/SLPVectorizer/R600/simplebb.ll b/llvm/test/Transforms/SLPVectorizer/R600/simplebb.ll
index f88b86de539..9ed86f88147 100644
--- a/llvm/test/Transforms/SLPVectorizer/R600/simplebb.ll
+++ b/llvm/test/Transforms/SLPVectorizer/R600/simplebb.ll
@@ -6,17 +6,17 @@ target datalayout = "e-p:32:32:32-p3:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-
 ; Simple 3-pair chain with loads and stores
 define void @test1_as_3_3_3(double addrspace(3)* %a, double addrspace(3)* %b, double addrspace(3)* %c) {
 ; CHECK-LABEL: @test1_as_3_3_3(
-; CHECK: load <2 x double> addrspace(3)*
-; CHECK: load <2 x double> addrspace(3)*
+; CHECK: load <2 x double>, <2 x double> addrspace(3)*
+; CHECK: load <2 x double>, <2 x double> addrspace(3)*
 ; CHECK: store <2 x double> %{{.*}}, <2 x double> addrspace(3)* %
 ; CHECK: ret
-  %i0 = load double addrspace(3)* %a, align 8
-  %i1 = load double addrspace(3)* %b, align 8
+  %i0 = load double, double addrspace(3)* %a, align 8
+  %i1 = load double, double addrspace(3)* %b, align 8
   %mul = fmul double %i0, %i1
   %arrayidx3 = getelementptr inbounds double, double addrspace(3)* %a, i64 1
-  %i3 = load double addrspace(3)* %arrayidx3, align 8
+  %i3 = load double, double addrspace(3)* %arrayidx3, align 8
   %arrayidx4 = getelementptr inbounds double, double addrspace(3)* %b, i64 1
-  %i4 = load double addrspace(3)* %arrayidx4, align 8
+  %i4 = load double, double addrspace(3)* %arrayidx4, align 8
   %mul5 = fmul double %i3, %i4
   store double %mul, double addrspace(3)* %c, align 8
   %arrayidx5 = getelementptr inbounds double, double addrspace(3)* %c, i64 1
@@ -26,17 +26,17 @@ define void @test1_as_3_3_3(double addrspace(3)* %a, double addrspace(3)* %b, do
 
 define void @test1_as_3_0_0(double addrspace(3)* %a, double* %b, double* %c) {
 ; CHECK-LABEL: @test1_as_3_0_0(
-; CHECK: load <2 x double> addrspace(3)*
-; CHECK: load <2 x double>*
+; CHECK: load <2 x double>, <2 x double> addrspace(3)*
+; CHECK: load <2 x double>, <2 x double>*
 ; CHECK: store <2 x double> %{{.*}}, <2 x double>* %
 ; CHECK: ret
-  %i0 = load double addrspace(3)* %a, align 8
-  %i1 = load double* %b, align 8
+  %i0 = load double, double addrspace(3)* %a, align 8
+  %i1 = load double, double* %b, align 8
   %mul = fmul double %i0, %i1
   %arrayidx3 = getelementptr inbounds double, double addrspace(3)* %a, i64 1
-  %i3 = load double addrspace(3)* %arrayidx3, align 8
+  %i3 = load double, double addrspace(3)* %arrayidx3, align 8
   %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double* %arrayidx4, align 8
+  %i4 = load double, double* %arrayidx4, align 8
   %mul5 = fmul double %i3, %i4
   store double %mul, double* %c, align 8
   %arrayidx5 = getelementptr inbounds double, double* %c, i64 1
@@ -46,17 +46,17 @@ define void @test1_as_3_0_0(double addrspace(3)* %a, double* %b, double* %c) {
 
 define void @test1_as_0_0_3(double* %a, double* %b, double addrspace(3)* %c) {
 ; CHECK-LABEL: @test1_as_0_0_3(
-; CHECK: load <2 x double>*
-; CHECK: load <2 x double>*
+; CHECK: load <2 x double>, <2 x double>*
+; CHECK: load <2 x double>, <2 x double>*
 ; CHECK: store <2 x double> %{{.*}}, <2 x double> addrspace(3)* %
 ; CHECK: ret
-  %i0 = load double* %a, align 8
-  %i1 = load double* %b, align 8
+  %i0 = load double, double* %a, align 8
+  %i1 = load double, double* %b, align 8
   %mul = fmul double %i0, %i1
   %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double* %arrayidx3, align 8
+  %i3 = load double, double* %arrayidx3, align 8
   %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double* %arrayidx4, align 8
+  %i4 = load double, double* %arrayidx4, align 8
   %mul5 = fmul double %i3, %i4
   store double %mul, double addrspace(3)* %c, align 8
   %arrayidx5 = getelementptr inbounds double, double addrspace(3)* %c, i64 1
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/addsub.ll b/llvm/test/Transforms/SLPVectorizer/X86/addsub.ll
index 8c2777a71f7..bc0beec1bab 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/addsub.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/addsub.ll
@@ -21,35 +21,35 @@ target triple = "x86_64-unknown-linux-gnu"
 ; Function Attrs: nounwind uwtable
 define void @addsub() #0 {
 entry:
-  %0 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 0), align 4
-  %1 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 0), align 4
+  %0 = load i32, i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 0), align 4
+  %1 = load i32, i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 0), align 4
   %add = add nsw i32 %0, %1
-  %2 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 0), align 4
-  %3 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 0), align 4
+  %2 = load i32, i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 0), align 4
+  %3 = load i32, i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 0), align 4
   %add1 = add nsw i32 %2, %3
   %add2 = add nsw i32 %add, %add1
   store i32 %add2, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 0), align 4
-  %4 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 1), align 4
-  %5 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 1), align 4
+  %4 = load i32, i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 1), align 4
+  %5 = load i32, i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 1), align 4
   %add3 = add nsw i32 %4, %5
-  %6 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 1), align 4
-  %7 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 1), align 4
+  %6 = load i32, i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 1), align 4
+  %7 = load i32, i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 1), align 4
   %add4 = add nsw i32 %6, %7
   %sub = sub nsw i32 %add3, %add4
   store i32 %sub, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 1), align 4
-  %8 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 2), align 4
-  %9 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 2), align 4
+  %8 = load i32, i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 2), align 4
+  %9 = load i32, i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 2), align 4
   %add5 = add nsw i32 %8, %9
-  %10 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 2), align 4
-  %11 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 2), align 4
+  %10 = load i32, i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 2), align 4
+  %11 = load i32, i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 2), align 4
   %add6 = add nsw i32 %10, %11
   %add7 = add nsw i32 %add5, %add6
   store i32 %add7, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 2), align 4
-  %12 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 3), align 4
-  %13 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 3), align 4
+  %12 = load i32, i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 3), align 4
+  %13 = load i32, i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 3), align 4
   %add8 = add nsw i32 %12, %13
-  %14 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 3), align 4
-  %15 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 3), align 4
+  %14 = load i32, i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 3), align 4
+  %15 = load i32, i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 3), align 4
   %add9 = add nsw i32 %14, %15
   %sub10 = sub nsw i32 %add8, %add9
   store i32 %sub10, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 3), align 4
@@ -65,35 +65,35 @@ entry:
 ; Function Attrs: nounwind uwtable
 define void @subadd() #0 {
 entry:
-  %0 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 0), align 4
-  %1 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 0), align 4
+  %0 = load i32, i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 0), align 4
+  %1 = load i32, i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 0), align 4
   %add = add nsw i32 %0, %1
-  %2 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 0), align 4
-  %3 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 0), align 4
+  %2 = load i32, i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 0), align 4
+  %3 = load i32, i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 0), align 4
   %add1 = add nsw i32 %2, %3
   %sub = sub nsw i32 %add, %add1
   store i32 %sub, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 0), align 4
-  %4 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 1), align 4
-  %5 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 1), align 4
+  %4 = load i32, i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 1), align 4
+  %5 = load i32, i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 1), align 4
   %add2 = add nsw i32 %4, %5
-  %6 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 1), align 4
-  %7 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 1), align 4
+  %6 = load i32, i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 1), align 4
+  %7 = load i32, i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 1), align 4
   %add3 = add nsw i32 %6, %7
   %add4 = add nsw i32 %add2, %add3
   store i32 %add4, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 1), align 4
-  %8 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 2), align 4
-  %9 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 2), align 4
+  %8 = load i32, i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 2), align 4
+  %9 = load i32, i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 2), align 4
   %add5 = add nsw i32 %8, %9
-  %10 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 2), align 4
-  %11 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 2), align 4
+  %10 = load i32, i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 2), align 4
+  %11 = load i32, i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 2), align 4
   %add6 = add nsw i32 %10, %11
   %sub7 = sub nsw i32 %add5, %add6
   store i32 %sub7, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 2), align 4
-  %12 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 3), align 4
-  %13 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 3), align 4
+  %12 = load i32, i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 3), align 4
+  %13 = load i32, i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 3), align 4
   %add8 = add nsw i32 %12, %13
-  %14 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 3), align 4
-  %15 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 3), align 4
+  %14 = load i32, i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 3), align 4
+  %15 = load i32, i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 3), align 4
   %add9 = add nsw i32 %14, %15
   %add10 = add nsw i32 %add8, %add9
   store i32 %add10, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 3), align 4
@@ -107,20 +107,20 @@ entry:
 ; Function Attrs: nounwind uwtable
 define void @faddfsub() #0 {
 entry:
-  %0 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 0), align 4
-  %1 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 0), align 4
+  %0 = load float, float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 0), align 4
+  %1 = load float, float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 0), align 4
   %add = fadd float %0, %1
   store float %add, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 0), align 4
-  %2 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 1), align 4
-  %3 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 1), align 4
+  %2 = load float, float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 1), align 4
+  %3 = load float, float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 1), align 4
   %sub = fsub float %2, %3
   store float %sub, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 1), align 4
-  %4 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 2), align 4
-  %5 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 2), align 4
+  %4 = load float, float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 2), align 4
+  %5 = load float, float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 2), align 4
   %add1 = fadd float %4, %5
   store float %add1, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 2), align 4
-  %6 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 3), align 4
-  %7 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 3), align 4
+  %6 = load float, float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 3), align 4
+  %7 = load float, float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 3), align 4
   %sub2 = fsub float %6, %7
   store float %sub2, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 3), align 4
   ret void
@@ -133,20 +133,20 @@ entry:
 ; Function Attrs: nounwind uwtable
 define void @fsubfadd() #0 {
 entry:
-  %0 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 0), align 4
-  %1 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 0), align 4
+  %0 = load float, float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 0), align 4
+  %1 = load float, float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 0), align 4
   %sub = fsub float %0, %1
   store float %sub, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 0), align 4
-  %2 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 1), align 4
-  %3 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 1), align 4
+  %2 = load float, float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 1), align 4
+  %3 = load float, float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 1), align 4
   %add = fadd float %2, %3
   store float %add, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 1), align 4
-  %4 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 2), align 4
-  %5 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 2), align 4
+  %4 = load float, float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 2), align 4
+  %5 = load float, float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 2), align 4
   %sub1 = fsub float %4, %5
   store float %sub1, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 2), align 4
-  %6 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 3), align 4
-  %7 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 3), align 4
+  %6 = load float, float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 3), align 4
+  %7 = load float, float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 3), align 4
   %add2 = fadd float %6, %7
   store float %add2, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 3), align 4
   ret void
@@ -159,20 +159,20 @@ entry:
 ; Function Attrs: nounwind uwtable
 define void @No_faddfsub() #0 {
 entry:
-  %0 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 0), align 4
-  %1 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 0), align 4
+  %0 = load float, float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 0), align 4
+  %1 = load float, float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 0), align 4
   %add = fadd float %0, %1
   store float %add, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 0), align 4
-  %2 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 1), align 4
-  %3 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 1), align 4
+  %2 = load float, float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 1), align 4
+  %3 = load float, float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 1), align 4
   %add1 = fadd float %2, %3
   store float %add1, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 1), align 4
-  %4 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 2), align 4
-  %5 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 2), align 4
+  %4 = load float, float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 2), align 4
+  %5 = load float, float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 2), align 4
   %add2 = fadd float %4, %5
   store float %add2, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 2), align 4
-  %6 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 3), align 4
-  %7 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 3), align 4
+  %6 = load float, float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 3), align 4
+  %7 = load float, float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 3), align 4
   %sub = fsub float %6, %7
   store float %sub, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 3), align 4
   ret void
@@ -189,20 +189,20 @@ entry:
 ; CHECK: %4 = fsub <4 x float> %1, %2
 ; CHECK: %5 = shufflevector <4 x float> %3, <4 x float> %4, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
 define void @reorder_alt() #0 {
-  %1 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 0), align 4
-  %2 = load float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 0), align 4
+  %1 = load float, float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 0), align 4
+  %2 = load float, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 0), align 4
   %3 = fadd float %1, %2
   store float %3, float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 0), align 4
-  %4 = load float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 1), align 4
-  %5 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 1), align 4
+  %4 = load float, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 1), align 4
+  %5 = load float, float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 1), align 4
   %6 = fsub float %4, %5
   store float %6, float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 1), align 4
-  %7 = load float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 2), align 4
-  %8 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 2), align 4
+  %7 = load float, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 2), align 4
+  %8 = load float, float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 2), align 4
   %9 = fadd float %7, %8
   store float %9, float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 2), align 4
-  %10 = load float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 3), align 4
-  %11 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 3), align 4
+  %10 = load float, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 3), align 4
+  %11 = load float, float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 3), align 4
   %12 = fsub float %10, %11
   store float %12, float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 3), align 4
   ret void
@@ -222,27 +222,27 @@ define void @reorder_alt() #0 {
 ; CHECK: %8 = fsub <4 x float> %1, %6
 ; CHECK: %9 = shufflevector <4 x float> %7, <4 x float> %8, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
 define void @reorder_alt_subTree() #0 {
-  %1 = load float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 0), align 4
-  %2 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 0), align 4
-  %3 = load float* getelementptr inbounds ([4 x float]* @fd, i32 0, i64 0), align 4
+  %1 = load float, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 0), align 4
+  %2 = load float, float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 0), align 4
+  %3 = load float, float* getelementptr inbounds ([4 x float]* @fd, i32 0, i64 0), align 4
   %4 = fsub float %2, %3
   %5 = fadd float %1, %4
   store float %5, float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 0), align 4
-  %6 = load float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 1), align 4
-  %7 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 1), align 4
-  %8 = load float* getelementptr inbounds ([4 x float]* @fd, i32 0, i64 1), align 4
+  %6 = load float, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 1), align 4
+  %7 = load float, float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 1), align 4
+  %8 = load float, float* getelementptr inbounds ([4 x float]* @fd, i32 0, i64 1), align 4
   %9 = fadd float %7, %8
   %10 = fsub float %6, %9
   store float %10, float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 1), align 4
-  %11 = load float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 2), align 4
-  %12 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 2), align 4
-  %13 = load float* getelementptr inbounds ([4 x float]* @fd, i32 0, i64 2), align 4
+  %11 = load float, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 2), align 4
+  %12 = load float, float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 2), align 4
+  %13 = load float, float* getelementptr inbounds ([4 x float]* @fd, i32 0, i64 2), align 4
   %14 = fsub float %12, %13
   %15 = fadd float %11, %14
   store float %15, float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 2), align 4
-  %16 = load float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 3), align 4
-  %17 = load float* getelementptr inbounds ([4 x float]* @fd, i32 0, i64 3), align 4
-  %18 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 3), align 4
+  %16 = load float, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 3), align 4
+  %17 = load float, float* getelementptr inbounds ([4 x float]* @fd, i32 0, i64 3), align 4
+  %18 = load float, float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 3), align 4
   %19 = fadd float %17, %18
   %20 = fsub float %16, %19
   store float %20, float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 3), align 4
@@ -258,18 +258,18 @@ define void @reorder_alt_subTree() #0 {
 ; CHECK: fsub <2 x double>
 ; CHECK: shufflevector <2 x double> 
 define void @reorder_alt_rightsubTree(double* nocapture %c, double* noalias nocapture readonly %a, double* noalias nocapture readonly %b, double* noalias nocapture readonly %d) {
-  %1 = load double* %a
-  %2 = load double* %b
+  %1 = load double, double* %a
+  %2 = load double, double* %b
   %3 = fadd double %1, %2
-  %4 = load double* %d
+  %4 = load double, double* %d
   %5 = fsub double %3, %4
   store double %5, double* %c
   %6 = getelementptr inbounds double, double* %d, i64 1
-  %7 = load double* %6
+  %7 = load double, double* %6
   %8 = getelementptr inbounds double, double* %a, i64 1
-  %9 = load double* %8
+  %9 = load double, double* %8
   %10 = getelementptr inbounds double, double* %b, i64 1
-  %11 = load double* %10
+  %11 = load double, double* %10
   %12 = fadd double %9, %11
   %13 = fadd double %7, %12
   %14 = getelementptr inbounds double, double* %c, i64 1
@@ -290,20 +290,20 @@ define void @reorder_alt_rightsubTree(double* nocapture %c, double* noalias noca
 ; CHECK-NOT: fsub <4 x float>
 ; CHECK-NOT: shufflevector
 define void @no_vec_shuff_reorder() #0 {
-  %1 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 0), align 4
-  %2 = load float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 0), align 4
+  %1 = load float, float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 0), align 4
+  %2 = load float, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 0), align 4
   %3 = fadd float %1, %2
   store float %3, float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 0), align 4
-  %4 = load float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 1), align 4
-  %5 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 1), align 4
+  %4 = load float, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 1), align 4
+  %5 = load float, float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 1), align 4
   %6 = fsub float %4, %5
   store float %6, float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 1), align 4
-  %7 = load float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 2), align 4
-  %8 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 2), align 4
+  %7 = load float, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 2), align 4
+  %8 = load float, float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 2), align 4
   %9 = fadd float %7, %8
   store float %9, float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 2), align 4
-  %10 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 3), align 4
-  %11 = load float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 3), align 4
+  %10 = load float, float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 3), align 4
+  %11 = load float, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 3), align 4
   %12 = fsub float %10, %11
   store float %12, float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 3), align 4
   ret void
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/align.ll b/llvm/test/Transforms/SLPVectorizer/X86/align.ll
index 2d6afaf3c04..b74b70900ee 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/align.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/align.ll
@@ -8,16 +8,16 @@ target triple = "x86_64-apple-macosx10.8.0"
 define void @test1(double* %a, double* %b, double* %c) {
 entry:
   %agg.tmp.i.i.sroa.0 = alloca [3 x double], align 16
-; CHECK: %[[V0:[0-9]+]] = load <2 x double>* %[[V2:[0-9]+]], align 8
-  %i0 = load double* %a 
-  %i1 = load double* %b 
+; CHECK: %[[V0:[0-9]+]] = load <2 x double>, <2 x double>* %[[V2:[0-9]+]], align 8
+  %i0 = load double, double* %a 
+  %i1 = load double, double* %b 
   %mul = fmul double %i0, %i1
   %store1 = getelementptr inbounds [3 x double], [3 x double]* %agg.tmp.i.i.sroa.0, i64 0, i64 1
   %store2 = getelementptr inbounds [3 x double], [3 x double]* %agg.tmp.i.i.sroa.0, i64 0, i64 2
   %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double* %arrayidx3, align 8
+  %i3 = load double, double* %arrayidx3, align 8
   %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double* %arrayidx4, align 8
+  %i4 = load double, double* %arrayidx4, align 8
   %mul5 = fmul double %i3, %i4
 ; CHECK: store <2 x double> %[[V1:[0-9]+]], <2 x double>* %[[V2:[0-9]+]], align 8
   store double %mul, double* %store1
@@ -37,13 +37,13 @@ entry:
 
 define void @test2(float * %a, float * %b) {
 entry:
-  %l0 = load float* %a
+  %l0 = load float, float* %a
   %a1 = getelementptr inbounds float, float* %a, i64 1
-  %l1 = load float* %a1
+  %l1 = load float, float* %a1
   %a2 = getelementptr inbounds float, float* %a, i64 2
-  %l2 = load float* %a2
+  %l2 = load float, float* %a2
   %a3 = getelementptr inbounds float, float* %a, i64 3
-  %l3 = load float* %a3
+  %l3 = load float, float* %a3
   store float %l0, float* %b
   %b1 = getelementptr inbounds float, float* %b, i64 1
   store float %l1, float* %b1
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/call.ll b/llvm/test/Transforms/SLPVectorizer/X86/call.ll
index 1bb0382e6b1..b76ac2c15c6 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/call.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/call.ll
@@ -15,14 +15,14 @@ declare i64 @round(i64)
 ; CHECK: ret void
 define void @sin_libm(double* %a, double* %b, double* %c) {
 entry:
-  %i0 = load double* %a, align 8
-  %i1 = load double* %b, align 8
+  %i0 = load double, double* %a, align 8
+  %i1 = load double, double* %b, align 8
   %mul = fmul double %i0, %i1
   %call = tail call double @sin(double %mul) nounwind readnone
   %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double* %arrayidx3, align 8
+  %i3 = load double, double* %arrayidx3, align 8
   %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double* %arrayidx4, align 8
+  %i4 = load double, double* %arrayidx4, align 8
   %mul5 = fmul double %i3, %i4
   %call5 = tail call double @sin(double %mul5) nounwind readnone
   store double %call, double* %c, align 8
@@ -36,14 +36,14 @@ entry:
 ; CHECK: ret void
 define void @cos_libm(double* %a, double* %b, double* %c) {
 entry:
-  %i0 = load double* %a, align 8
-  %i1 = load double* %b, align 8
+  %i0 = load double, double* %a, align 8
+  %i1 = load double, double* %b, align 8
   %mul = fmul double %i0, %i1
   %call = tail call double @cos(double %mul) nounwind readnone
   %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double* %arrayidx3, align 8
+  %i3 = load double, double* %arrayidx3, align 8
   %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double* %arrayidx4, align 8
+  %i4 = load double, double* %arrayidx4, align 8
   %mul5 = fmul double %i3, %i4
   %call5 = tail call double @cos(double %mul5) nounwind readnone
   store double %call, double* %c, align 8
@@ -57,14 +57,14 @@ entry:
 ; CHECK: ret void
 define void @pow_libm(double* %a, double* %b, double* %c) {
 entry:
-  %i0 = load double* %a, align 8
-  %i1 = load double* %b, align 8
+  %i0 = load double, double* %a, align 8
+  %i1 = load double, double* %b, align 8
   %mul = fmul double %i0, %i1
   %call = tail call double @pow(double %mul,double %mul) nounwind readnone
   %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double* %arrayidx3, align 8
+  %i3 = load double, double* %arrayidx3, align 8
   %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double* %arrayidx4, align 8
+  %i4 = load double, double* %arrayidx4, align 8
   %mul5 = fmul double %i3, %i4
   %call5 = tail call double @pow(double %mul5,double %mul5) nounwind readnone
   store double %call, double* %c, align 8
@@ -79,14 +79,14 @@ entry:
 ; CHECK: ret void
 define void @exp2_libm(double* %a, double* %b, double* %c) {
 entry:
-  %i0 = load double* %a, align 8
-  %i1 = load double* %b, align 8
+  %i0 = load double, double* %a, align 8
+  %i1 = load double, double* %b, align 8
   %mul = fmul double %i0, %i1
   %call = tail call double @exp2(double %mul) nounwind readnone
   %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double* %arrayidx3, align 8
+  %i3 = load double, double* %arrayidx3, align 8
   %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double* %arrayidx4, align 8
+  %i4 = load double, double* %arrayidx4, align 8
   %mul5 = fmul double %i3, %i4
   %call5 = tail call double @exp2(double %mul5) nounwind readnone
   store double %call, double* %c, align 8
@@ -102,14 +102,14 @@ entry:
 ; CHECK: ret void
 define void @round_custom(i64* %a, i64* %b, i64* %c) {
 entry:
-  %i0 = load i64* %a, align 8
-  %i1 = load i64* %b, align 8
+  %i0 = load i64, i64* %a, align 8
+  %i1 = load i64, i64* %b, align 8
   %mul = mul i64 %i0, %i1
   %call = tail call i64 @round(i64 %mul) nounwind readnone
   %arrayidx3 = getelementptr inbounds i64, i64* %a, i64 1
-  %i3 = load i64* %arrayidx3, align 8
+  %i3 = load i64, i64* %arrayidx3, align 8
   %arrayidx4 = getelementptr inbounds i64, i64* %b, i64 1
-  %i4 = load i64* %arrayidx4, align 8
+  %i4 = load i64, i64* %arrayidx4, align 8
   %mul5 = mul i64 %i3, %i4
   %call5 = tail call i64 @round(i64 %mul5) nounwind readnone
   store i64 %call, i64* %c, align 8
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cast.ll b/llvm/test/Transforms/SLPVectorizer/X86/cast.ll
index 357efc569e6..044db5d694b 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/cast.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/cast.ll
@@ -15,21 +15,21 @@ target triple = "x86_64-apple-macosx10.9.0"
 ;CHECK: store <4 x i32>
 define i32 @foo(i32* noalias nocapture %A, i8* noalias nocapture %B) {
 entry:
-  %0 = load i8* %B, align 1
+  %0 = load i8, i8* %B, align 1
   %conv = sext i8 %0 to i32
   store i32 %conv, i32* %A, align 4
   %arrayidx2 = getelementptr inbounds i8, i8* %B, i64 1
-  %1 = load i8* %arrayidx2, align 1
+  %1 = load i8, i8* %arrayidx2, align 1
   %conv3 = sext i8 %1 to i32
   %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 1
   store i32 %conv3, i32* %arrayidx4, align 4
   %arrayidx5 = getelementptr inbounds i8, i8* %B, i64 2
-  %2 = load i8* %arrayidx5, align 1
+  %2 = load i8, i8* %arrayidx5, align 1
   %conv6 = sext i8 %2 to i32
   %arrayidx7 = getelementptr inbounds i32, i32* %A, i64 2
   store i32 %conv6, i32* %arrayidx7, align 4
   %arrayidx8 = getelementptr inbounds i8, i8* %B, i64 3
-  %3 = load i8* %arrayidx8, align 1
+  %3 = load i8, i8* %arrayidx8, align 1
   %conv9 = sext i8 %3 to i32
   %arrayidx10 = getelementptr inbounds i32, i32* %A, i64 3
   store i32 %conv9, i32* %arrayidx10, align 4
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cmp_sel.ll b/llvm/test/Transforms/SLPVectorizer/X86/cmp_sel.ll
index 92efaa1eea3..a3e2b21ea6e 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/cmp_sel.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/cmp_sel.ll
@@ -17,12 +17,12 @@ target triple = "x86_64-apple-macosx10.8.0"
 define i32 @foo(double* noalias nocapture %A, double* noalias nocapture %B, double %G) {
 entry:
   %arrayidx = getelementptr inbounds double, double* %B, i64 10
-  %0 = load double* %arrayidx, align 8
+  %0 = load double, double* %arrayidx, align 8
   %tobool = fcmp une double %0, 0.000000e+00
   %cond = select i1 %tobool, double %G, double 1.000000e+00
   store double %cond, double* %A, align 8
   %arrayidx2 = getelementptr inbounds double, double* %B, i64 11
-  %1 = load double* %arrayidx2, align 8
+  %1 = load double, double* %arrayidx2, align 8
   %tobool3 = fcmp une double %1, 0.000000e+00
   %cond7 = select i1 %tobool3, double %G, double 1.000000e+00
   %arrayidx8 = getelementptr inbounds double, double* %A, i64 1
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/compare-reduce.ll b/llvm/test/Transforms/SLPVectorizer/X86/compare-reduce.ll
index 4b78ac3e183..8555fe0df8f 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/compare-reduce.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/compare-reduce.ll
@@ -22,13 +22,13 @@ for.body:                                         ; preds = %for.inc, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
   %0 = shl nsw i64 %indvars.iv, 1
   %arrayidx = getelementptr inbounds double, double* %A, i64 %0
-  %1 = load double* %arrayidx, align 8
+  %1 = load double, double* %arrayidx, align 8
   %mul1 = fmul double %conv, %1
   %mul2 = fmul double %mul1, 7.000000e+00
   %add = fadd double %mul2, 5.000000e+00
   %2 = or i64 %0, 1
   %arrayidx6 = getelementptr inbounds double, double* %A, i64 %2
-  %3 = load double* %arrayidx6, align 8
+  %3 = load double, double* %arrayidx6, align 8
   %mul8 = fmul double %conv, %3
   %mul9 = fmul double %mul8, 4.000000e+00
   %add10 = fadd double %mul9, 9.000000e+00
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/consecutive-access.ll b/llvm/test/Transforms/SLPVectorizer/X86/consecutive-access.ll
index ab7380af3b5..1ad4d694d34 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/consecutive-access.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/consecutive-access.ll
@@ -21,25 +21,25 @@ entry:
   %mul = mul nsw i32 %u, 3
   %idxprom = sext i32 %mul to i64
   %arrayidx = getelementptr inbounds [2000 x double], [2000 x double]* @A, i32 0, i64 %idxprom
-  %0 = load double* %arrayidx, align 8
+  %0 = load double, double* %arrayidx, align 8
   %arrayidx4 = getelementptr inbounds [2000 x double], [2000 x double]* @B, i32 0, i64 %idxprom
-  %1 = load double* %arrayidx4, align 8
+  %1 = load double, double* %arrayidx4, align 8
   %add5 = fadd double %0, %1
   store double %add5, double* %arrayidx, align 8
   %add11 = add nsw i32 %mul, 1
   %idxprom12 = sext i32 %add11 to i64
   %arrayidx13 = getelementptr inbounds [2000 x double], [2000 x double]* @A, i32 0, i64 %idxprom12
-  %2 = load double* %arrayidx13, align 8
+  %2 = load double, double* %arrayidx13, align 8
   %arrayidx17 = getelementptr inbounds [2000 x double], [2000 x double]* @B, i32 0, i64 %idxprom12
-  %3 = load double* %arrayidx17, align 8
+  %3 = load double, double* %arrayidx17, align 8
   %add18 = fadd double %2, %3
   store double %add18, double* %arrayidx13, align 8
   %add24 = add nsw i32 %mul, 2
   %idxprom25 = sext i32 %add24 to i64
   %arrayidx26 = getelementptr inbounds [2000 x double], [2000 x double]* @A, i32 0, i64 %idxprom25
-  %4 = load double* %arrayidx26, align 8
+  %4 = load double, double* %arrayidx26, align 8
   %arrayidx30 = getelementptr inbounds [2000 x double], [2000 x double]* @B, i32 0, i64 %idxprom25
-  %5 = load double* %arrayidx30, align 8
+  %5 = load double, double* %arrayidx30, align 8
   %add31 = fadd double %4, %5
   store double %add31, double* %arrayidx26, align 8
   ret void
@@ -58,17 +58,17 @@ entry:
   %mul = mul nsw i32 %u, 2
   %idxprom = sext i32 %mul to i64
   %arrayidx = getelementptr inbounds [2000 x double], [2000 x double]* @A, i32 0, i64 %idxprom
-  %0 = load double* %arrayidx, align 8
+  %0 = load double, double* %arrayidx, align 8
   %arrayidx4 = getelementptr inbounds [2000 x double], [2000 x double]* @B, i32 0, i64 %idxprom
-  %1 = load double* %arrayidx4, align 8
+  %1 = load double, double* %arrayidx4, align 8
   %add5 = fadd double %0, %1
   store double %add5, double* %arrayidx, align 8
   %add11 = add nsw i32 %mul, 1
   %idxprom12 = sext i32 %add11 to i64
   %arrayidx13 = getelementptr inbounds [2000 x double], [2000 x double]* @A, i32 0, i64 %idxprom12
-  %2 = load double* %arrayidx13, align 8
+  %2 = load double, double* %arrayidx13, align 8
   %arrayidx17 = getelementptr inbounds [2000 x double], [2000 x double]* @B, i32 0, i64 %idxprom12
-  %3 = load double* %arrayidx17, align 8
+  %3 = load double, double* %arrayidx17, align 8
   %add18 = fadd double %2, %3
   store double %add18, double* %arrayidx13, align 8
   ret void
@@ -85,33 +85,33 @@ entry:
   %mul = mul nsw i32 %u, 4
   %idxprom = sext i32 %mul to i64
   %arrayidx = getelementptr inbounds [2000 x float], [2000 x float]* @C, i32 0, i64 %idxprom
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %arrayidx4 = getelementptr inbounds [2000 x float], [2000 x float]* @D, i32 0, i64 %idxprom
-  %1 = load float* %arrayidx4, align 4
+  %1 = load float, float* %arrayidx4, align 4
   %add5 = fadd float %0, %1
   store float %add5, float* %arrayidx, align 4
   %add11 = add nsw i32 %mul, 1
   %idxprom12 = sext i32 %add11 to i64
   %arrayidx13 = getelementptr inbounds [2000 x float], [2000 x float]* @C, i32 0, i64 %idxprom12
-  %2 = load float* %arrayidx13, align 4
+  %2 = load float, float* %arrayidx13, align 4
   %arrayidx17 = getelementptr inbounds [2000 x float], [2000 x float]* @D, i32 0, i64 %idxprom12
-  %3 = load float* %arrayidx17, align 4
+  %3 = load float, float* %arrayidx17, align 4
   %add18 = fadd float %2, %3
   store float %add18, float* %arrayidx13, align 4
   %add24 = add nsw i32 %mul, 2
   %idxprom25 = sext i32 %add24 to i64
   %arrayidx26 = getelementptr inbounds [2000 x float], [2000 x float]* @C, i32 0, i64 %idxprom25
-  %4 = load float* %arrayidx26, align 4
+  %4 = load float, float* %arrayidx26, align 4
   %arrayidx30 = getelementptr inbounds [2000 x float], [2000 x float]* @D, i32 0, i64 %idxprom25
-  %5 = load float* %arrayidx30, align 4
+  %5 = load float, float* %arrayidx30, align 4
   %add31 = fadd float %4, %5
   store float %add31, float* %arrayidx26, align 4
   %add37 = add nsw i32 %mul, 3
   %idxprom38 = sext i32 %add37 to i64
   %arrayidx39 = getelementptr inbounds [2000 x float], [2000 x float]* @C, i32 0, i64 %idxprom38
-  %6 = load float* %arrayidx39, align 4
+  %6 = load float, float* %arrayidx39, align 4
   %arrayidx43 = getelementptr inbounds [2000 x float], [2000 x float]* @D, i32 0, i64 %idxprom38
-  %7 = load float* %arrayidx43, align 4
+  %7 = load float, float* %arrayidx43, align 4
   %add44 = fadd float %6, %7
   store float %add44, float* %arrayidx39, align 4
   ret void
@@ -143,12 +143,12 @@ for.body:                                         ; preds = %for.body.lr.ph, %fo
   %mul = mul nsw i32 %0, 2
   %idxprom = sext i32 %mul to i64
   %arrayidx = getelementptr inbounds double, double* %A, i64 %idxprom
-  %2 = load double* %arrayidx, align 8
+  %2 = load double, double* %arrayidx, align 8
   %mul1 = fmul double 7.000000e+00, %2
   %add = add nsw i32 %mul, 1
   %idxprom3 = sext i32 %add to i64
   %arrayidx4 = getelementptr inbounds double, double* %A, i64 %idxprom3
-  %3 = load double* %arrayidx4, align 8
+  %3 = load double, double* %arrayidx4, align 8
   %mul5 = fmul double 7.000000e+00, %3
   %add6 = fadd double %mul1, %mul5
   %add7 = fadd double %1, %add6
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/continue_vectorizing.ll b/llvm/test/Transforms/SLPVectorizer/X86/continue_vectorizing.ll
index b53169c3d01..ecae70ecc91 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/continue_vectorizing.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/continue_vectorizing.ll
@@ -9,21 +9,21 @@ target triple = "x86_64-apple-macosx10.8.0"
 ; CHECK: ret
 define void @test1(double* %a, double* %b, double* %c, double* %d) {
 entry:
-  %i0 = load double* %a, align 8
-  %i1 = load double* %b, align 8
+  %i0 = load double, double* %a, align 8
+  %i1 = load double, double* %b, align 8
   %mul = fmul double %i0, %i1
   %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double* %arrayidx3, align 8
+  %i3 = load double, double* %arrayidx3, align 8
   %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double* %arrayidx4, align 8
+  %i4 = load double, double* %arrayidx4, align 8
   %mul5 = fmul double %i3, %i4
   store double %mul, double* %c, align 8
   %arrayidx5 = getelementptr inbounds double, double* %c, i64 1
   store double %mul5, double* %arrayidx5, align 8
   %0 = bitcast double* %a to <4 x i32>*
-  %1 = load <4 x i32>* %0, align 8
+  %1 = load <4 x i32>, <4 x i32>* %0, align 8
   %2 = bitcast double* %b to <4 x i32>*
-  %3 = load <4 x i32>* %2, align 8
+  %3 = load <4 x i32>, <4 x i32>* %2, align 8
   %4 = mul <4 x i32> %1, %3
   %5 = bitcast double* %d to <4 x i32>*
   store <4 x i32> %4, <4 x i32>* %5, align 8
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_binaryop.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_binaryop.ll
index dc99366e1da..9046c356282 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_binaryop.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_binaryop.ll
@@ -7,7 +7,7 @@ target triple = "x86_64-apple-darwin13.3.0"
 
 define i32 @fn1() {
 entry:
-  %init = load double* @a, align 8
+  %init = load double, double* @a, align 8
   br label %loop
 
 loop:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet.ll
index b5b2f262c47..1bad671fd82 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet.ll
@@ -45,7 +45,7 @@ define void @_ZN30GIM_TRIANGLE_CALCULATION_CACHE18triangle_collisionERK9btVector
 entry:
   %arrayidx26 = getelementptr inbounds %class.GIM_TRIANGLE_CALCULATION_CACHE.9.34.69.94.119.144.179.189.264.284.332, %class.GIM_TRIANGLE_CALCULATION_CACHE.9.34.69.94.119.144.179.189.264.284.332* %this, i64 0, i32 2, i64 0, i32 0, i64 1
   %arrayidx36 = getelementptr inbounds %class.GIM_TRIANGLE_CALCULATION_CACHE.9.34.69.94.119.144.179.189.264.284.332, %class.GIM_TRIANGLE_CALCULATION_CACHE.9.34.69.94.119.144.179.189.264.284.332* %this, i64 0, i32 2, i64 0, i32 0, i64 2
-  %0 = load float* %arrayidx36, align 4
+  %0 = load float, float* %arrayidx36, align 4
   %add587 = fadd float undef, undef
   %sub600 = fsub float %add587, undef
   store float %sub600, float* undef, align 4
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll
index 8ca63945929..f10c8626d41 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll
@@ -13,7 +13,7 @@ for.body:
   %s1.055 = phi float [ 0.000000e+00, %entry ], [ %cond.i40, %for.body ]
   %s0.054 = phi float [ 0.000000e+00, %entry ], [ %cond.i44, %for.body ]
   %arrayidx = getelementptr inbounds float, float* %src, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %arrayidx2 = getelementptr inbounds float, float* %dest, i64 %indvars.iv
   store float %acc1.056, float* %arrayidx2, align 4
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_dequeue.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_dequeue.ll
index f1ef9572564..28b7aa3c4de 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_dequeue.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_dequeue.ll
@@ -8,12 +8,12 @@ target triple = "x86_64-apple-macosx10.8.0"
 define void @_ZSt6uniqueISt15_Deque_iteratorIdRdPdEET_S4_S4_(%"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731"* %__first, %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731"* nocapture %__last) {
 entry:
   %_M_cur2.i.i = getelementptr inbounds %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731", %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731"* %__first, i64 0, i32 0
-  %0 = load double** %_M_cur2.i.i, align 8
+  %0 = load double*, double** %_M_cur2.i.i, align 8
   %_M_first3.i.i = getelementptr inbounds %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731", %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731"* %__first, i64 0, i32 1
   %_M_cur2.i.i81 = getelementptr inbounds %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731", %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731"* %__last, i64 0, i32 0
-  %1 = load double** %_M_cur2.i.i81, align 8
+  %1 = load double*, double** %_M_cur2.i.i81, align 8
   %_M_first3.i.i83 = getelementptr inbounds %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731", %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731"* %__last, i64 0, i32 1
-  %2 = load double** %_M_first3.i.i83, align 8
+  %2 = load double*, double** %_M_first3.i.i83, align 8
   br i1 undef, label %_ZSt13adjacent_findISt15_Deque_iteratorIdRdPdEET_S4_S4_.exit, label %while.cond.i.preheader
 
 while.cond.i.preheader:                           ; preds = %entry
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_gep.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_gep.ll
index aa1857233af..bd1e8f7cc19 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_gep.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_gep.ll
@@ -8,7 +8,7 @@ target triple = "x86_64-unknown-linux-gnu"
 ; Function Attrs: nounwind uwtable
 define i32 @fn1() {
 entry:
-  %0 = load i64** @a, align 8
+  %0 = load i64*, i64** @a, align 8
   %add.ptr = getelementptr inbounds i64, i64* %0, i64 1
   %1 = ptrtoint i64* %add.ptr to i64
   %arrayidx = getelementptr inbounds i64, i64* %0, i64 2
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll
index 4ddb27a0be9..70b13fd75f1 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll
@@ -81,10 +81,10 @@ define fastcc void @dct36(double* %inbuf) {
 entry:
   %arrayidx41 = getelementptr inbounds double, double* %inbuf, i64 2
   %arrayidx44 = getelementptr inbounds double, double* %inbuf, i64 1
-  %0 = load double* %arrayidx44, align 8
+  %0 = load double, double* %arrayidx44, align 8
   %add46 = fadd double %0, undef
   store double %add46, double* %arrayidx41, align 8
-  %1 = load double* %inbuf, align 8
+  %1 = load double, double* %inbuf, align 8
   %add49 = fadd double %1, %0
   store double %add49, double* %arrayidx44, align 8
   ret void
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_mandeltext.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_mandeltext.ll
index 109c3c93d5d..f82343fb433 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_mandeltext.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_mandeltext.ll
@@ -55,9 +55,9 @@ for.end48:                                        ; preds = %for.end44
 
 define void @zot(%struct.hoge* %arg) {
 bb:
-  %tmp = load double* undef, align 8
+  %tmp = load double, double* undef, align 8
   %tmp1 = fsub double %tmp, undef
-  %tmp2 = load double* undef, align 8
+  %tmp2 = load double, double* undef, align 8
   %tmp3 = fsub double %tmp2, undef
   %tmp4 = fmul double %tmp3, undef
   %tmp5 = fmul double %tmp3, undef
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_netbsd_decompress.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_netbsd_decompress.ll
index 8da3c34a027..9a5eb12c0aa 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_netbsd_decompress.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_netbsd_decompress.ll
@@ -13,14 +13,14 @@ target triple = "x86_64-apple-macosx10.8.0"
 
 define i32 @fn1() {
 entry:
-  %0 = load i32* getelementptr inbounds (%struct.DState* @b, i32 0, i32 0), align 4
-  %1 = load i32* getelementptr inbounds (%struct.DState* @b, i32 0, i32 1), align 4
-  %2 = load i32* @d, align 4
+  %0 = load i32, i32* getelementptr inbounds (%struct.DState* @b, i32 0, i32 0), align 4
+  %1 = load i32, i32* getelementptr inbounds (%struct.DState* @b, i32 0, i32 1), align 4
+  %2 = load i32, i32* @d, align 4
   %cond = icmp eq i32 %2, 0
   br i1 %cond, label %sw.bb, label %save_state_and_return
 
 sw.bb:                                            ; preds = %entry
-  %3 = load i32* @c, align 4
+  %3 = load i32, i32* @c, align 4
   %and = and i32 %3, 7
   store i32 %and, i32* @a, align 4
   switch i32 %and, label %if.end [
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_vectorizeTree.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_vectorizeTree.ll
index 8f023f80a67..45ca99a3ea1 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_vectorizeTree.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_vectorizeTree.ll
@@ -30,8 +30,8 @@ define void @bar() {
   %9 = phi double [ 1.800000e+01, %0 ], [ %10, %18 ], [ %10, %17 ], [ %10, %17 ]
   store double %9, double* %1, align 8
   store double %8, double* %2, align 8
-  %10 = load double* %3, align 8
-  %11 = load double* %4, align 8
+  %10 = load double, double* %3, align 8
+  %11 = load double, double* %4, align 8
   br i1 undef, label %12, label %13
 
 ; <label>:12                                      ; preds = %7
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cross_block_slp.ll b/llvm/test/Transforms/SLPVectorizer/X86/cross_block_slp.ll
index 1f78f925f39..ea0064d4682 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/cross_block_slp.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/cross_block_slp.ll
@@ -26,9 +26,9 @@ target triple = "x86_64-apple-macosx10.8.0"
 ;CHECK: ret
 define i32 @foo(double* nocapture %A, float* nocapture %B, i32 %g) {
 entry:
-  %0 = load float* %B, align 4
+  %0 = load float, float* %B, align 4
   %arrayidx1 = getelementptr inbounds float, float* %B, i64 1
-  %1 = load float* %arrayidx1, align 4
+  %1 = load float, float* %arrayidx1, align 4
   %add = fadd float %0, 5.000000e+00
   %add2 = fadd float %1, 8.000000e+00
   %tobool = icmp eq i32 %g, 0
@@ -40,12 +40,12 @@ if.then:
 
 if.end:
   %conv = fpext float %add to double
-  %2 = load double* %A, align 8
+  %2 = load double, double* %A, align 8
   %add4 = fadd double %conv, %2
   store double %add4, double* %A, align 8
   %conv5 = fpext float %add2 to double
   %arrayidx6 = getelementptr inbounds double, double* %A, i64 1
-  %3 = load double* %arrayidx6, align 8
+  %3 = load double, double* %arrayidx6, align 8
   %add7 = fadd double %conv5, %3
   store double %add7, double* %arrayidx6, align 8
   ret i32 undef
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cse.ll b/llvm/test/Transforms/SLPVectorizer/X86/cse.ll
index a0db8865e12..9f56e219599 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/cse.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/cse.ll
@@ -22,12 +22,12 @@ target triple = "i386-apple-macosx10.8.0"
 define i32 @test(double* nocapture %G) {
 entry:
   %arrayidx = getelementptr inbounds double, double* %G, i64 5
-  %0 = load double* %arrayidx, align 8
+  %0 = load double, double* %arrayidx, align 8
   %mul = fmul double %0, 4.000000e+00
   %add = fadd double %mul, 1.000000e+00
   store double %add, double* %G, align 8
   %arrayidx2 = getelementptr inbounds double, double* %G, i64 6
-  %1 = load double* %arrayidx2, align 8
+  %1 = load double, double* %arrayidx2, align 8
   %mul3 = fmul double %1, 3.000000e+00
   %add4 = fadd double %mul3, 6.000000e+00
   %arrayidx5 = getelementptr inbounds double, double* %G, i64 1
@@ -55,26 +55,26 @@ entry:
 ;CHECK: ret
 define i32 @foo(double* nocapture %A, i32 %n) {
 entry:
-  %0 = load double* %A, align 8
+  %0 = load double, double* %A, align 8
   %mul = fmul double %0, 7.900000e+00
   %conv = sitofp i32 %n to double
   %mul1 = fmul double %conv, %mul
   %add = fadd double %mul1, 6.000000e+00
   store double %add, double* %A, align 8
   %arrayidx3 = getelementptr inbounds double, double* %A, i64 1
-  %1 = load double* %arrayidx3, align 8
+  %1 = load double, double* %arrayidx3, align 8
   %mul4 = fmul double %1, 7.700000e+00
   %mul6 = fmul double %conv, %mul4
   %add7 = fadd double %mul6, 2.000000e+00
   store double %add7, double* %arrayidx3, align 8
   %arrayidx9 = getelementptr inbounds double, double* %A, i64 2
-  %2 = load double* %arrayidx9, align 8
+  %2 = load double, double* %arrayidx9, align 8
   %mul10 = fmul double %2, 7.600000e+00
   %mul12 = fmul double %conv, %mul10
   %add13 = fadd double %mul12, 3.000000e+00
   store double %add13, double* %arrayidx9, align 8
   %arrayidx15 = getelementptr inbounds double, double* %A, i64 3
-  %3 = load double* %arrayidx15, align 8
+  %3 = load double, double* %arrayidx15, align 8
   %mul16 = fmul double %3, 7.400000e+00
   %mul18 = fmul double %conv, %mul16
   %add19 = fadd double %mul18, 4.000000e+00
@@ -102,7 +102,7 @@ entry:
 define i32 @test2(double* nocapture %G, i32 %k) {
   %1 = icmp eq i32 %k, 0
   %2 = getelementptr inbounds double, double* %G, i64 5
-  %3 = load double* %2, align 8
+  %3 = load double, double* %2, align 8
   %4 = fmul double %3, 4.000000e+00
   br i1 %1, label %12, label %5
 
@@ -110,7 +110,7 @@ define i32 @test2(double* nocapture %G, i32 %k) {
   %6 = fadd double %4, 1.000000e+00
   store double %6, double* %G, align 8
   %7 = getelementptr inbounds double, double* %G, i64 6
-  %8 = load double* %7, align 8
+  %8 = load double, double* %7, align 8
   %9 = fmul double %8, 3.000000e+00
   %10 = fadd double %9, 6.000000e+00
   %11 = getelementptr inbounds double, double* %G, i64 1
@@ -122,7 +122,7 @@ define i32 @test2(double* nocapture %G, i32 %k) {
   %14 = getelementptr inbounds double, double* %G, i64 2
   store double %13, double* %14, align 8
   %15 = getelementptr inbounds double, double* %G, i64 6
-  %16 = load double* %15, align 8
+  %16 = load double, double* %15, align 8
   %17 = fmul double %16, 3.000000e+00
   %18 = fadd double %17, 8.000000e+00
   %19 = getelementptr inbounds double, double* %G, i64 3
@@ -147,26 +147,26 @@ define i32 @test2(double* nocapture %G, i32 %k) {
 ;CHECK: ret
 define i32 @foo4(double* nocapture %A, i32 %n) {
 entry:
-  %0 = load double* %A, align 8
+  %0 = load double, double* %A, align 8
   %mul = fmul double %0, 7.900000e+00
   %conv = sitofp i32 %n to double
   %mul1 = fmul double %conv, %mul
   %add = fadd double %mul1, 6.000000e+00
   store double %add, double* %A, align 8
   %arrayidx3 = getelementptr inbounds double, double* %A, i64 1
-  %1 = load double* %arrayidx3, align 8
+  %1 = load double, double* %arrayidx3, align 8
   %mul4 = fmul double %1, 7.900000e+00
   %mul6 = fmul double %conv, %mul4
   %add7 = fadd double %mul6, 6.000000e+00
   store double %add7, double* %arrayidx3, align 8
   %arrayidx9 = getelementptr inbounds double, double* %A, i64 2
-  %2 = load double* %arrayidx9, align 8
+  %2 = load double, double* %arrayidx9, align 8
   %mul10 = fmul double %2, 7.900000e+00
   %mul12 = fmul double %conv, %mul10
   %add13 = fadd double %mul12, 6.000000e+00
   store double %add13, double* %arrayidx9, align 8
   %arrayidx15 = getelementptr inbounds double, double* %A, i64 3
-  %3 = load double* %arrayidx15, align 8
+  %3 = load double, double* %arrayidx15, align 8
   %mul16 = fmul double %3, 7.900000e+00
   %mul18 = fmul double %conv, %mul16
   %add19 = fadd double %mul18, 6.000000e+00
@@ -189,12 +189,12 @@ entry:
 ;CHECK: ret
 define i32 @partial_mrg(double* nocapture %A, i32 %n) {
 entry:
-  %0 = load double* %A, align 8
+  %0 = load double, double* %A, align 8
   %conv = sitofp i32 %n to double
   %mul = fmul double %conv, %0
   store double %mul, double* %A, align 8
   %arrayidx2 = getelementptr inbounds double, double* %A, i64 1
-  %1 = load double* %arrayidx2, align 8
+  %1 = load double, double* %arrayidx2, align 8
   %mul4 = fmul double %conv, %1
   store double %mul4, double* %arrayidx2, align 8
   %cmp = icmp slt i32 %n, 4
@@ -202,11 +202,11 @@ entry:
 
 if.end:                                           ; preds = %entry
   %arrayidx7 = getelementptr inbounds double, double* %A, i64 2
-  %2 = load double* %arrayidx7, align 8
+  %2 = load double, double* %arrayidx7, align 8
   %mul9 = fmul double %conv, %2
   store double %mul9, double* %arrayidx7, align 8
   %arrayidx11 = getelementptr inbounds double, double* %A, i64 3
-  %3 = load double* %arrayidx11, align 8
+  %3 = load double, double* %arrayidx11, align 8
   %add = add nsw i32 %n, 4
   %conv12 = sitofp i32 %add to double
   %mul13 = fmul double %conv12, %3
@@ -228,18 +228,18 @@ entry:
 
 sw.epilog7:                                       ; No predecessors!
   %.in = getelementptr inbounds %class.B.53.55, %class.B.53.55* %this, i64 0, i32 0, i32 1
-  %0 = load double* %.in, align 8
+  %0 = load double, double* %.in, align 8
   %add = fadd double undef, 0.000000e+00
   %add6 = fadd double %add, %0
-  %1 = load double* @a, align 8
+  %1 = load double, double* @a, align 8
   %add8 = fadd double %1, 0.000000e+00
   %_dy = getelementptr inbounds %class.B.53.55, %class.B.53.55* %this, i64 0, i32 0, i32 2
-  %2 = load double* %_dy, align 8
+  %2 = load double, double* %_dy, align 8
   %add10 = fadd double %add8, %2
   br i1 undef, label %if.then12, label %if.end13
 
 if.then12:                                        ; preds = %sw.epilog7
-  %3 = load double* undef, align 8
+  %3 = load double, double* undef, align 8
   br label %if.end13
 
 if.end13:                                         ; preds = %if.then12, %sw.epilog7, %entry
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cycle_dup.ll b/llvm/test/Transforms/SLPVectorizer/X86/cycle_dup.ll
index 59f2923261f..0a4e961c2e8 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/cycle_dup.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/cycle_dup.ll
@@ -23,15 +23,15 @@ target triple = "x86_64-apple-macosx10.9.0"
 ;CHECK-NEXT:ret i32 undef
 define i32 @foo(i32* nocapture %A) #0 {
 entry:
-  %0 = load i32* %A, align 4
+  %0 = load i32, i32* %A, align 4
   %arrayidx1 = getelementptr inbounds i32, i32* %A, i64 1
-  %1 = load i32* %arrayidx1, align 4
+  %1 = load i32, i32* %arrayidx1, align 4
   %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 2
-  %2 = load i32* %arrayidx2, align 4
+  %2 = load i32, i32* %arrayidx2, align 4
   %arrayidx3 = getelementptr inbounds i32, i32* %A, i64 3
-  %3 = load i32* %arrayidx3, align 4
+  %3 = load i32, i32* %arrayidx3, align 4
   %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 13
-  %4 = load i32* %arrayidx4, align 4
+  %4 = load i32, i32* %arrayidx4, align 4
   %cmp24 = icmp sgt i32 %4, 0
   br i1 %cmp24, label %for.body, label %for.end
 
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/debug_info.ll b/llvm/test/Transforms/SLPVectorizer/X86/debug_info.ll
index d145a7d67ab..c28ccc5902d 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/debug_info.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/debug_info.ll
@@ -15,7 +15,7 @@ target triple = "x86_64-apple-macosx10.7.0"
 ;CHECK: @depth
 ;CHECK: getelementptr inbounds {{.*}}, !dbg ![[LOC:[0-9]+]]
 ;CHECK: bitcast double* {{.*}}, !dbg ![[LOC]]
-;CHECK: load <2 x double>* {{.*}}, !dbg ![[LOC]]
+;CHECK: load <2 x double>, <2 x double>* {{.*}}, !dbg ![[LOC]]
 ;CHECK: store <2 x double> {{.*}}, !dbg ![[LOC2:[0-9]+]]
 ;CHECK: ret
 ;CHECK: ![[LOC]] = !MDLocation(line: 4, scope:
@@ -33,9 +33,9 @@ entry:
 
 for.body.lr.ph:                                   ; preds = %entry
   %arrayidx = getelementptr inbounds double, double* %A, i64 4, !dbg !24
-  %0 = load double* %arrayidx, align 8, !dbg !24
+  %0 = load double, double* %arrayidx, align 8, !dbg !24
   %arrayidx1 = getelementptr inbounds double, double* %A, i64 5, !dbg !29
-  %1 = load double* %arrayidx1, align 8, !dbg !29
+  %1 = load double, double* %arrayidx1, align 8, !dbg !29
   br label %for.end, !dbg !23
 
 for.end:                                          ; preds = %for.body.lr.ph, %entry
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/diamond.ll b/llvm/test/Transforms/SLPVectorizer/X86/diamond.ll
index 692c0f633d5..4e2c02f6965 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/diamond.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/diamond.ll
@@ -18,22 +18,22 @@ target triple = "x86_64-apple-macosx10.8.0"
 ; CHECK: ret
 define i32 @foo(i32* noalias nocapture %B, i32* noalias nocapture %A, i32 %n, i32 %m) #0 {
 entry:
-  %0 = load i32* %A, align 4
+  %0 = load i32, i32* %A, align 4
   %mul238 = add i32 %m, %n
   %add = mul i32 %0, %mul238
   store i32 %add, i32* %B, align 4
   %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 1
-  %1 = load i32* %arrayidx4, align 4
+  %1 = load i32, i32* %arrayidx4, align 4
   %add8 = mul i32 %1, %mul238
   %arrayidx9 = getelementptr inbounds i32, i32* %B, i64 1
   store i32 %add8, i32* %arrayidx9, align 4
   %arrayidx10 = getelementptr inbounds i32, i32* %A, i64 2
-  %2 = load i32* %arrayidx10, align 4
+  %2 = load i32, i32* %arrayidx10, align 4
   %add14 = mul i32 %2, %mul238
   %arrayidx15 = getelementptr inbounds i32, i32* %B, i64 2
   store i32 %add14, i32* %arrayidx15, align 4
   %arrayidx16 = getelementptr inbounds i32, i32* %A, i64 3
-  %3 = load i32* %arrayidx16, align 4
+  %3 = load i32, i32* %arrayidx16, align 4
   %add20 = mul i32 %3, %mul238
   %arrayidx21 = getelementptr inbounds i32, i32* %B, i64 3
   store i32 %add20, i32* %arrayidx21, align 4
@@ -56,22 +56,22 @@ entry:
 ; CHECK-NEXT: ret
 define i32 @extr_user(i32* noalias nocapture %B, i32* noalias nocapture %A, i32 %n, i32 %m) {
 entry:
-  %0 = load i32* %A, align 4
+  %0 = load i32, i32* %A, align 4
   %mul238 = add i32 %m, %n
   %add = mul i32 %0, %mul238
   store i32 %add, i32* %B, align 4
   %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 1
-  %1 = load i32* %arrayidx4, align 4
+  %1 = load i32, i32* %arrayidx4, align 4
   %add8 = mul i32 %1, %mul238
   %arrayidx9 = getelementptr inbounds i32, i32* %B, i64 1
   store i32 %add8, i32* %arrayidx9, align 4
   %arrayidx10 = getelementptr inbounds i32, i32* %A, i64 2
-  %2 = load i32* %arrayidx10, align 4
+  %2 = load i32, i32* %arrayidx10, align 4
   %add14 = mul i32 %2, %mul238
   %arrayidx15 = getelementptr inbounds i32, i32* %B, i64 2
   store i32 %add14, i32* %arrayidx15, align 4
   %arrayidx16 = getelementptr inbounds i32, i32* %A, i64 3
-  %3 = load i32* %arrayidx16, align 4
+  %3 = load i32, i32* %arrayidx16, align 4
   %add20 = mul i32 %3, %mul238
   %arrayidx21 = getelementptr inbounds i32, i32* %B, i64 3
   store i32 %add20, i32* %arrayidx21, align 4
@@ -86,22 +86,22 @@ entry:
 ; CHECK-NEXT: ret
 define i32 @extr_user1(i32* noalias nocapture %B, i32* noalias nocapture %A, i32 %n, i32 %m) {
 entry:
-  %0 = load i32* %A, align 4
+  %0 = load i32, i32* %A, align 4
   %mul238 = add i32 %m, %n
   %add = mul i32 %0, %mul238
   store i32 %add, i32* %B, align 4
   %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 1
-  %1 = load i32* %arrayidx4, align 4
+  %1 = load i32, i32* %arrayidx4, align 4
   %add8 = mul i32 %1, %mul238
   %arrayidx9 = getelementptr inbounds i32, i32* %B, i64 1
   store i32 %add8, i32* %arrayidx9, align 4
   %arrayidx10 = getelementptr inbounds i32, i32* %A, i64 2
-  %2 = load i32* %arrayidx10, align 4
+  %2 = load i32, i32* %arrayidx10, align 4
   %add14 = mul i32 %2, %mul238
   %arrayidx15 = getelementptr inbounds i32, i32* %B, i64 2
   store i32 %add14, i32* %arrayidx15, align 4
   %arrayidx16 = getelementptr inbounds i32, i32* %A, i64 3
-  %3 = load i32* %arrayidx16, align 4
+  %3 = load i32, i32* %arrayidx16, align 4
   %add20 = mul i32 %3, %mul238
   %arrayidx21 = getelementptr inbounds i32, i32* %B, i64 3
   store i32 %add20, i32* %arrayidx21, align 4
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/external_user.ll b/llvm/test/Transforms/SLPVectorizer/X86/external_user.ll
index 68cef94ba74..bf2febda86b 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/external_user.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/external_user.ll
@@ -34,8 +34,8 @@ target triple = "x86_64-apple-macosx10.8.0"
 define double @ext_user(double* noalias nocapture %B, double* noalias nocapture %A, i32 %n, i32 %m) {
 entry:
   %arrayidx = getelementptr inbounds double, double* %A, i64 1
-  %0 = load double* %arrayidx, align 8
-  %1 = load double* %A, align 8
+  %0 = load double, double* %arrayidx, align 8
+  %1 = load double, double* %A, align 8
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
@@ -69,9 +69,9 @@ for.end:                                          ; preds = %for.body
 define i32 @needtogather(double *noalias %a, i32 *noalias %b,  float * noalias %c,
                 i32 * noalias %d) {
 entry:
-  %0 = load i32* %d, align 4
+  %0 = load i32, i32* %d, align 4
   %conv = sitofp i32 %0 to float
-  %1 = load float* %c
+  %1 = load float, float* %c
   %sub = fsub float 0.000000e+00, %1
   %mul = fmul float %sub, 0.000000e+00
   %add = fadd float %conv, %mul
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extract.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract.ll
index 5ac07a7683b..9a6ee2afc8e 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/extract.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/extract.ll
@@ -9,7 +9,7 @@ target triple = "x86_64-apple-macosx10.8.0"
 ;CHECK: ret void
 define void @fextr(double* %ptr) {
 entry:
-  %LD = load <2 x double>* undef
+  %LD = load <2 x double>, <2 x double>* undef
   %V0 = extractelement <2 x double> %LD, i32 0
   %V1 = extractelement <2 x double> %LD, i32 1
   %P0 = getelementptr inbounds double, double* %ptr, i64 0
@@ -27,7 +27,7 @@ entry:
 ;CHECK: ret void
 define void @fextr1(double* %ptr) {
 entry:
-  %LD = load <2 x double>* undef
+  %LD = load <2 x double>, <2 x double>* undef
   %V0 = extractelement <2 x double> %LD, i32 0
   %V1 = extractelement <2 x double> %LD, i32 1
   %P0 = getelementptr inbounds double, double* %ptr, i64 1  ; <--- incorrect order
@@ -45,7 +45,7 @@ entry:
 ;CHECK: ret void
 define void @fextr2(double* %ptr) {
 entry:
-  %LD = load <4 x double>* undef
+  %LD = load <4 x double>, <4 x double>* undef
   %V0 = extractelement <4 x double> %LD, i32 0  ; <--- invalid size.
   %V1 = extractelement <4 x double> %LD, i32 1
   %P0 = getelementptr inbounds double, double* %ptr, i64 0
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll
index a68ac7d2197..6e5415b3c70 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll
@@ -7,7 +7,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 ; Function Attrs: nounwind ssp uwtable
 define i32 @fn1() {
 entry:
-  %0 = load i64** @a, align 8
+  %0 = load i64*, i64** @a, align 8
   %add.ptr = getelementptr inbounds i64, i64* %0, i64 11
   %1 = ptrtoint i64* %add.ptr to i64
   store i64 %1, i64* %add.ptr, align 8
@@ -25,32 +25,32 @@ entry:
 declare float @llvm.powi.f32(float, i32)
 define void @fn2(i32* %a, i32* %b, float* %c) {
 entry:
-  %i0 = load i32* %a, align 4
-  %i1 = load i32* %b, align 4
+  %i0 = load i32, i32* %a, align 4
+  %i1 = load i32, i32* %b, align 4
   %add1 = add i32 %i0, %i1
   %fp1 = sitofp i32 %add1 to float
   %call1 = tail call float @llvm.powi.f32(float %fp1,i32 %add1) nounwind readnone
 
   %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 1
-  %i2 = load i32* %arrayidx2, align 4
+  %i2 = load i32, i32* %arrayidx2, align 4
   %arrayidx3 = getelementptr inbounds i32, i32* %b, i32 1
-  %i3 = load i32* %arrayidx3, align 4
+  %i3 = load i32, i32* %arrayidx3, align 4
   %add2 = add i32 %i2, %i3
   %fp2 = sitofp i32 %add2 to float
   %call2 = tail call float @llvm.powi.f32(float %fp2,i32 %add1) nounwind readnone
 
   %arrayidx4 = getelementptr inbounds i32, i32* %a, i32 2
-  %i4 = load i32* %arrayidx4, align 4
+  %i4 = load i32, i32* %arrayidx4, align 4
   %arrayidx5 = getelementptr inbounds i32, i32* %b, i32 2
-  %i5 = load i32* %arrayidx5, align 4
+  %i5 = load i32, i32* %arrayidx5, align 4
   %add3 = add i32 %i4, %i5
   %fp3 = sitofp i32 %add3 to float
   %call3 = tail call float @llvm.powi.f32(float %fp3,i32 %add1) nounwind readnone
 
   %arrayidx6 = getelementptr inbounds i32, i32* %a, i32 3
-  %i6 = load i32* %arrayidx6, align 4
+  %i6 = load i32, i32* %arrayidx6, align 4
   %arrayidx7 = getelementptr inbounds i32, i32* %b, i32 3
-  %i7 = load i32* %arrayidx7, align 4
+  %i7 = load i32, i32* %arrayidx7, align 4
   %add4 = add i32 %i6, %i7
   %fp4 = sitofp i32 %add4 to float
   %call4 = tail call float @llvm.powi.f32(float %fp4,i32 %add1) nounwind readnone
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/flag.ll b/llvm/test/Transforms/SLPVectorizer/X86/flag.ll
index 2890c9f41aa..7db8d75c20a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/flag.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/flag.ll
@@ -16,16 +16,16 @@ define i32 @rollable(i32* noalias nocapture %in, i32* noalias nocapture %out, i6
   %i.019 = phi i64 [ %26, %.lr.ph ], [ 0, %0 ]
   %2 = shl i64 %i.019, 2
   %3 = getelementptr inbounds i32, i32* %in, i64 %2
-  %4 = load i32* %3, align 4
+  %4 = load i32, i32* %3, align 4
   %5 = or i64 %2, 1
   %6 = getelementptr inbounds i32, i32* %in, i64 %5
-  %7 = load i32* %6, align 4
+  %7 = load i32, i32* %6, align 4
   %8 = or i64 %2, 2
   %9 = getelementptr inbounds i32, i32* %in, i64 %8
-  %10 = load i32* %9, align 4
+  %10 = load i32, i32* %9, align 4
   %11 = or i64 %2, 3
   %12 = getelementptr inbounds i32, i32* %in, i64 %11
-  %13 = load i32* %12, align 4
+  %13 = load i32, i32* %12, align 4
   %14 = mul i32 %4, 7
   %15 = add i32 %14, 7
   %16 = mul i32 %7, 7
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gep.ll b/llvm/test/Transforms/SLPVectorizer/X86/gep.ll
index 6c6a7bd3319..3f952d7b242 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/gep.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/gep.ll
@@ -10,12 +10,12 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 ; CHECK: <2 x i32*>
 define void @foo1 ({ i32*, i32* }* noalias %x, { i32*, i32* }* noalias %y) {
   %1 = getelementptr inbounds { i32*, i32* }, { i32*, i32* }* %y, i64 0, i32 0
-  %2 = load i32** %1, align 8
+  %2 = load i32*, i32** %1, align 8
   %3 = getelementptr inbounds i32, i32* %2, i64 16
   %4 = getelementptr inbounds { i32*, i32* }, { i32*, i32* }* %x, i64 0, i32 0
   store i32* %3, i32** %4, align 8
   %5 = getelementptr inbounds { i32*, i32* }, { i32*, i32* }* %y, i64 0, i32 1
-  %6 = load i32** %5, align 8
+  %6 = load i32*, i32** %5, align 8
   %7 = getelementptr inbounds i32, i32* %6, i64 16
   %8 = getelementptr inbounds { i32*, i32* }, { i32*, i32* }* %x, i64 0, i32 1
   store i32* %7, i32** %8, align 8
@@ -28,12 +28,12 @@ define void @foo1 ({ i32*, i32* }* noalias %x, { i32*, i32* }* noalias %y) {
 ; CHECK-NOT: <2 x i32*>
 define void @foo2 ({ i32*, i32* }* noalias %x, { i32*, i32* }* noalias %y, i32 %i) {
   %1 = getelementptr inbounds { i32*, i32* }, { i32*, i32* }* %y, i64 0, i32 0
-  %2 = load i32** %1, align 8
+  %2 = load i32*, i32** %1, align 8
   %3 = getelementptr inbounds i32, i32* %2, i32 %i
   %4 = getelementptr inbounds { i32*, i32* }, { i32*, i32* }* %x, i64 0, i32 0
   store i32* %3, i32** %4, align 8
   %5 = getelementptr inbounds { i32*, i32* }, { i32*, i32* }* %y, i64 0, i32 1
-  %6 = load i32** %5, align 8
+  %6 = load i32*, i32** %5, align 8
   %7 = getelementptr inbounds i32, i32* %6, i32 %i
   %8 = getelementptr inbounds { i32*, i32* }, { i32*, i32* }* %x, i64 0, i32 1
   store i32* %7, i32** %8, align 8
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/hoist.ll b/llvm/test/Transforms/SLPVectorizer/X86/hoist.ll
index c5e5b25b902..36c939b597e 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/hoist.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/hoist.ll
@@ -31,22 +31,22 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.024 = phi i32 [ 0, %entry ], [ %add10, %for.body ]
   %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.024
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %add = add nsw i32 %0, %n
   store i32 %add, i32* %arrayidx, align 4
   %add121 = or i32 %i.024, 1
   %arrayidx2 = getelementptr inbounds i32, i32* %A, i32 %add121
-  %1 = load i32* %arrayidx2, align 4
+  %1 = load i32, i32* %arrayidx2, align 4
   %add3 = add nsw i32 %1, %k
   store i32 %add3, i32* %arrayidx2, align 4
   %add422 = or i32 %i.024, 2
   %arrayidx5 = getelementptr inbounds i32, i32* %A, i32 %add422
-  %2 = load i32* %arrayidx5, align 4
+  %2 = load i32, i32* %arrayidx5, align 4
   %add6 = add nsw i32 %2, %n
   store i32 %add6, i32* %arrayidx5, align 4
   %add723 = or i32 %i.024, 3
   %arrayidx8 = getelementptr inbounds i32, i32* %A, i32 %add723
-  %3 = load i32* %arrayidx8, align 4
+  %3 = load i32, i32* %arrayidx8, align 4
   %add9 = add nsw i32 %3, %k
   store i32 %add9, i32* %arrayidx8, align 4
   %add10 = add nsw i32 %i.024, 4
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll
index 21d38c4de40..83b2e01f04e 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll
@@ -33,21 +33,21 @@ for.body:
   %sum.032 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %add17, %for.body ]
   %mul = shl nsw i64 %i.033, 2
   %arrayidx = getelementptr inbounds float, float* %A, i64 %mul
-  %1 = load float* %arrayidx, align 4
+  %1 = load float, float* %arrayidx, align 4
   %mul2 = fmul float %1, 7.000000e+00
   %add28 = or i64 %mul, 1
   %arrayidx4 = getelementptr inbounds float, float* %A, i64 %add28
-  %2 = load float* %arrayidx4, align 4
+  %2 = load float, float* %arrayidx4, align 4
   %mul5 = fmul float %2, 7.000000e+00
   %add6 = fadd fast float %mul2, %mul5
   %add829 = or i64 %mul, 2
   %arrayidx9 = getelementptr inbounds float, float* %A, i64 %add829
-  %3 = load float* %arrayidx9, align 4
+  %3 = load float, float* %arrayidx9, align 4
   %mul10 = fmul float %3, 7.000000e+00
   %add11 = fadd fast float %add6, %mul10
   %add1330 = or i64 %mul, 3
   %arrayidx14 = getelementptr inbounds float, float* %A, i64 %add1330
-  %4 = load float* %arrayidx14, align 4
+  %4 = load float, float* %arrayidx14, align 4
   %mul15 = fmul float %4, 7.000000e+00
   %add16 = fadd fast float %add11, %mul15
   %add17 = fadd fast float %sum.032, %add16
@@ -85,13 +85,13 @@ entry:
   br i1 %cmp38, label %for.body.lr.ph, label %for.end
 
 for.body.lr.ph:
-  %0 = load float* %B, align 4
+  %0 = load float, float* %B, align 4
   %arrayidx4 = getelementptr inbounds float, float* %B, i64 1
-  %1 = load float* %arrayidx4, align 4
+  %1 = load float, float* %arrayidx4, align 4
   %arrayidx9 = getelementptr inbounds float, float* %B, i64 2
-  %2 = load float* %arrayidx9, align 4
+  %2 = load float, float* %arrayidx9, align 4
   %arrayidx15 = getelementptr inbounds float, float* %B, i64 3
-  %3 = load float* %arrayidx15, align 4
+  %3 = load float, float* %arrayidx15, align 4
   %4 = sext i32 %n to i64
   br label %for.body
 
@@ -100,21 +100,21 @@ for.body:
   %sum.039 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %mul21, %for.body ]
   %mul = shl nsw i64 %i.040, 2
   %arrayidx2 = getelementptr inbounds float, float* %A, i64 %mul
-  %5 = load float* %arrayidx2, align 4
+  %5 = load float, float* %arrayidx2, align 4
   %mul3 = fmul float %0, %5
   %add35 = or i64 %mul, 1
   %arrayidx6 = getelementptr inbounds float, float* %A, i64 %add35
-  %6 = load float* %arrayidx6, align 4
+  %6 = load float, float* %arrayidx6, align 4
   %mul7 = fmul float %1, %6
   %add8 = fadd fast float %mul3, %mul7
   %add1136 = or i64 %mul, 2
   %arrayidx12 = getelementptr inbounds float, float* %A, i64 %add1136
-  %7 = load float* %arrayidx12, align 4
+  %7 = load float, float* %arrayidx12, align 4
   %mul13 = fmul float %2, %7
   %add14 = fadd fast float %add8, %mul13
   %add1737 = or i64 %mul, 3
   %arrayidx18 = getelementptr inbounds float, float* %A, i64 %add1737
-  %8 = load float* %arrayidx18, align 4
+  %8 = load float, float* %arrayidx18, align 4
   %mul19 = fmul float %3, %8
   %add20 = fadd fast float %add14, %mul19
   %mul21 = fmul float %sum.039, %add20
@@ -157,23 +157,23 @@ entry:
   br i1 %cmp81, label %for.body.lr.ph, label %for.end
 
 for.body.lr.ph:
-  %0 = load float* %B, align 4
+  %0 = load float, float* %B, align 4
   %arrayidx4 = getelementptr inbounds float, float* %B, i64 1
-  %1 = load float* %arrayidx4, align 4
+  %1 = load float, float* %arrayidx4, align 4
   %arrayidx9 = getelementptr inbounds float, float* %B, i64 2
-  %2 = load float* %arrayidx9, align 4
+  %2 = load float, float* %arrayidx9, align 4
   %arrayidx15 = getelementptr inbounds float, float* %B, i64 3
-  %3 = load float* %arrayidx15, align 4
+  %3 = load float, float* %arrayidx15, align 4
   %arrayidx21 = getelementptr inbounds float, float* %B, i64 4
-  %4 = load float* %arrayidx21, align 4
+  %4 = load float, float* %arrayidx21, align 4
   %arrayidx27 = getelementptr inbounds float, float* %B, i64 5
-  %5 = load float* %arrayidx27, align 4
+  %5 = load float, float* %arrayidx27, align 4
   %arrayidx33 = getelementptr inbounds float, float* %B, i64 6
-  %6 = load float* %arrayidx33, align 4
+  %6 = load float, float* %arrayidx33, align 4
   %arrayidx39 = getelementptr inbounds float, float* %B, i64 7
-  %7 = load float* %arrayidx39, align 4
+  %7 = load float, float* %arrayidx39, align 4
   %arrayidx45 = getelementptr inbounds float, float* %B, i64 8
-  %8 = load float* %arrayidx45, align 4
+  %8 = load float, float* %arrayidx45, align 4
   %9 = sext i32 %n to i64
   br label %for.body
 
@@ -182,46 +182,46 @@ for.body:
   %sum.082 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %add51, %for.body ]
   %mul = mul nsw i64 %i.083, 6
   %arrayidx2 = getelementptr inbounds float, float* %A, i64 %mul
-  %10 = load float* %arrayidx2, align 4
+  %10 = load float, float* %arrayidx2, align 4
   %mul3 = fmul fast float %0, %10
   %add80 = or i64 %mul, 1
   %arrayidx6 = getelementptr inbounds float, float* %A, i64 %add80
-  %11 = load float* %arrayidx6, align 4
+  %11 = load float, float* %arrayidx6, align 4
   %mul7 = fmul fast float %1, %11
   %add8 = fadd fast float %mul3, %mul7
   %add11 = add nsw i64 %mul, 2
   %arrayidx12 = getelementptr inbounds float, float* %A, i64 %add11
-  %12 = load float* %arrayidx12, align 4
+  %12 = load float, float* %arrayidx12, align 4
   %mul13 = fmul fast float %2, %12
   %add14 = fadd fast float %add8, %mul13
   %add17 = add nsw i64 %mul, 3
   %arrayidx18 = getelementptr inbounds float, float* %A, i64 %add17
-  %13 = load float* %arrayidx18, align 4
+  %13 = load float, float* %arrayidx18, align 4
   %mul19 = fmul fast float %3, %13
   %add20 = fadd fast float %add14, %mul19
   %add23 = add nsw i64 %mul, 4
   %arrayidx24 = getelementptr inbounds float, float* %A, i64 %add23
-  %14 = load float* %arrayidx24, align 4
+  %14 = load float, float* %arrayidx24, align 4
   %mul25 = fmul fast float %4, %14
   %add26 = fadd fast float %add20, %mul25
   %add29 = add nsw i64 %mul, 5
   %arrayidx30 = getelementptr inbounds float, float* %A, i64 %add29
-  %15 = load float* %arrayidx30, align 4
+  %15 = load float, float* %arrayidx30, align 4
   %mul31 = fmul fast float %5, %15
   %add32 = fadd fast float %add26, %mul31
   %add35 = add nsw i64 %mul, 6
   %arrayidx36 = getelementptr inbounds float, float* %A, i64 %add35
-  %16 = load float* %arrayidx36, align 4
+  %16 = load float, float* %arrayidx36, align 4
   %mul37 = fmul fast float %6, %16
   %add38 = fadd fast float %add32, %mul37
   %add41 = add nsw i64 %mul, 7
   %arrayidx42 = getelementptr inbounds float, float* %A, i64 %add41
-  %17 = load float* %arrayidx42, align 4
+  %17 = load float, float* %arrayidx42, align 4
   %mul43 = fmul fast float %7, %17
   %add44 = fadd fast float %add38, %mul43
   %add47 = add nsw i64 %mul, 8
   %arrayidx48 = getelementptr inbounds float, float* %A, i64 %add47
-  %18 = load float* %arrayidx48, align 4
+  %18 = load float, float* %arrayidx48, align 4
   %mul49 = fmul fast float %8, %18
   %add50 = fadd fast float %add44, %mul49
   %add51 = fadd fast float %sum.082, %add50
@@ -259,13 +259,13 @@ entry:
   br i1 %cmp41, label %for.body.lr.ph, label %for.end
 
 for.body.lr.ph:
-  %0 = load float* %B, align 4
+  %0 = load float, float* %B, align 4
   %arrayidx4 = getelementptr inbounds float, float* %B, i64 1
-  %1 = load float* %arrayidx4, align 4
+  %1 = load float, float* %arrayidx4, align 4
   %arrayidx10 = getelementptr inbounds float, float* %B, i64 2
-  %2 = load float* %arrayidx10, align 4
+  %2 = load float, float* %arrayidx10, align 4
   %arrayidx16 = getelementptr inbounds float, float* %B, i64 3
-  %3 = load float* %arrayidx16, align 4
+  %3 = load float, float* %arrayidx16, align 4
   %4 = sext i32 %n to i64
   br label %for.body
 
@@ -274,22 +274,22 @@ for.body:
   %sum.042 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %add21, %for.body ]
   %mul = shl nsw i64 %i.043, 2
   %arrayidx2 = getelementptr inbounds float, float* %A, i64 %mul
-  %5 = load float* %arrayidx2, align 4
+  %5 = load float, float* %arrayidx2, align 4
   %mul3 = fmul fast float %0, %5
   %add = fadd fast float %sum.042, %mul3
   %add638 = or i64 %mul, 1
   %arrayidx7 = getelementptr inbounds float, float* %A, i64 %add638
-  %6 = load float* %arrayidx7, align 4
+  %6 = load float, float* %arrayidx7, align 4
   %mul8 = fmul fast float %1, %6
   %add9 = fadd fast float %add, %mul8
   %add1239 = or i64 %mul, 2
   %arrayidx13 = getelementptr inbounds float, float* %A, i64 %add1239
-  %7 = load float* %arrayidx13, align 4
+  %7 = load float, float* %arrayidx13, align 4
   %mul14 = fmul fast float %2, %7
   %add15 = fadd fast float %add9, %mul14
   %add1840 = or i64 %mul, 3
   %arrayidx19 = getelementptr inbounds float, float* %A, i64 %add1840
-  %8 = load float* %arrayidx19, align 4
+  %8 = load float, float* %arrayidx19, align 4
   %mul20 = fmul fast float %3, %8
   %add21 = fadd fast float %add15, %mul20
   %inc = add nsw i64 %i.043, 1
@@ -335,27 +335,27 @@ for.body.lr.ph:
 for.body:
   %i.039 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
   %C.addr.038 = phi float* [ %C, %for.body.lr.ph ], [ %incdec.ptr, %for.body ]
-  %1 = load float* %B, align 4
+  %1 = load float, float* %B, align 4
   %mul = shl nsw i64 %i.039, 2
   %arrayidx2 = getelementptr inbounds float, float* %A, i64 %mul
-  %2 = load float* %arrayidx2, align 4
+  %2 = load float, float* %arrayidx2, align 4
   %mul3 = fmul fast float %1, %2
-  %3 = load float* %arrayidx4, align 4
+  %3 = load float, float* %arrayidx4, align 4
   %add34 = or i64 %mul, 1
   %arrayidx6 = getelementptr inbounds float, float* %A, i64 %add34
-  %4 = load float* %arrayidx6, align 4
+  %4 = load float, float* %arrayidx6, align 4
   %mul7 = fmul fast float %3, %4
   %add8 = fadd fast float %mul3, %mul7
-  %5 = load float* %arrayidx9, align 4
+  %5 = load float, float* %arrayidx9, align 4
   %add1135 = or i64 %mul, 2
   %arrayidx12 = getelementptr inbounds float, float* %A, i64 %add1135
-  %6 = load float* %arrayidx12, align 4
+  %6 = load float, float* %arrayidx12, align 4
   %mul13 = fmul fast float %5, %6
   %add14 = fadd fast float %add8, %mul13
-  %7 = load float* %arrayidx15, align 4
+  %7 = load float, float* %arrayidx15, align 4
   %add1736 = or i64 %mul, 3
   %arrayidx18 = getelementptr inbounds float, float* %A, i64 %add1736
-  %8 = load float* %arrayidx18, align 4
+  %8 = load float, float* %arrayidx18, align 4
   %mul19 = fmul fast float %7, %8
   %add20 = fadd fast float %add14, %mul19
   store float %add20, float* %C.addr.038, align 4
@@ -389,9 +389,9 @@ entry:
   br i1 %cmp17, label %for.body.lr.ph, label %for.end
 
 for.body.lr.ph:
-  %0 = load double* %B, align 8
+  %0 = load double, double* %B, align 8
   %arrayidx4 = getelementptr inbounds double, double* %B, i64 1
-  %1 = load double* %arrayidx4, align 8
+  %1 = load double, double* %arrayidx4, align 8
   %2 = sext i32 %n to i64
   br label %for.body
 
@@ -399,11 +399,11 @@ for.body:
   %i.018 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
   %mul = shl nsw i64 %i.018, 2
   %arrayidx2 = getelementptr inbounds double, double* %A, i64 %mul
-  %3 = load double* %arrayidx2, align 8
+  %3 = load double, double* %arrayidx2, align 8
   %mul3 = fmul fast double %0, %3
   %add16 = or i64 %mul, 1
   %arrayidx6 = getelementptr inbounds double, double* %A, i64 %add16
-  %4 = load double* %arrayidx6, align 8
+  %4 = load double, double* %arrayidx6, align 8
   %mul7 = fmul fast double %1, %4
   %add8 = fadd fast double %mul3, %mul7
   %arrayidx9 = getelementptr inbounds double, double* %C, i64 %i.018
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/implicitfloat.ll b/llvm/test/Transforms/SLPVectorizer/X86/implicitfloat.ll
index 3b80472ce3c..f7283f0d027 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/implicitfloat.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/implicitfloat.ll
@@ -9,13 +9,13 @@ target triple = "x86_64-apple-macosx10.8.0"
 ; CHECK: ret
 define void @test1(double* %a, double* %b, double* %c) noimplicitfloat { ; <------ noimplicitfloat attribute here!
 entry:
-  %i0 = load double* %a, align 8
-  %i1 = load double* %b, align 8
+  %i0 = load double, double* %a, align 8
+  %i1 = load double, double* %b, align 8
   %mul = fmul double %i0, %i1
   %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double* %arrayidx3, align 8
+  %i3 = load double, double* %arrayidx3, align 8
   %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double* %arrayidx4, align 8
+  %i4 = load double, double* %arrayidx4, align 8
   %mul5 = fmul double %i3, %i4
   store double %mul, double* %c, align 8
   %arrayidx5 = getelementptr inbounds double, double* %c, i64 1
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/in-tree-user.ll b/llvm/test/Transforms/SLPVectorizer/X86/in-tree-user.ll
index 9bc44f21097..26f0b9bfabe 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/in-tree-user.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/in-tree-user.ll
@@ -20,14 +20,14 @@ for.body:                                         ; preds = %for.inc, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
   %0 = shl nsw i64 %indvars.iv, 1
   %arrayidx = getelementptr inbounds double, double* %A, i64 %0
-  %1 = load double* %arrayidx, align 8
+  %1 = load double, double* %arrayidx, align 8
   %mul1 = fmul double %conv, %1
   %mul2 = fmul double %mul1, 7.000000e+00
   %add = fadd double %mul2, 5.000000e+00
   %InTreeUser = fadd double %add, %add    ; <------------------ In tree user.
   %2 = or i64 %0, 1
   %arrayidx6 = getelementptr inbounds double, double* %A, i64 %2
-  %3 = load double* %arrayidx6, align 8
+  %3 = load double, double* %arrayidx6, align 8
   %mul8 = fmul double %conv, %3
   %mul9 = fmul double %mul8, 4.000000e+00
   %add10 = fadd double %mul9, 9.000000e+00
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/intrinsic.ll b/llvm/test/Transforms/SLPVectorizer/X86/intrinsic.ll
index 974d7e64b7b..cc5a4afe43d 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/intrinsic.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/intrinsic.ll
@@ -13,14 +13,14 @@ declare double @llvm.fabs.f64(double) nounwind readnone
 ;CHECK: ret
 define void @vec_fabs_f64(double* %a, double* %b, double* %c) {
 entry:
-  %i0 = load double* %a, align 8
-  %i1 = load double* %b, align 8
+  %i0 = load double, double* %a, align 8
+  %i1 = load double, double* %b, align 8
   %mul = fmul double %i0, %i1
   %call = tail call double @llvm.fabs.f64(double %mul) nounwind readnone
   %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double* %arrayidx3, align 8
+  %i3 = load double, double* %arrayidx3, align 8
   %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double* %arrayidx4, align 8
+  %i4 = load double, double* %arrayidx4, align 8
   %mul5 = fmul double %i3, %i4
   %call5 = tail call double @llvm.fabs.f64(double %mul5) nounwind readnone
   store double %call, double* %c, align 8
@@ -39,31 +39,31 @@ declare float @llvm.copysign.f32(float, float) nounwind readnone
 ;CHECK: ret
 define void @vec_copysign_f32(float* %a, float* %b, float* noalias %c) {
 entry:
-  %0 = load float* %a, align 4
-  %1 = load float* %b, align 4
+  %0 = load float, float* %a, align 4
+  %1 = load float, float* %b, align 4
   %call0 = tail call float @llvm.copysign.f32(float %0, float %1) nounwind readnone
   store float %call0, float* %c, align 4
 
   %ix2 = getelementptr inbounds float, float* %a, i64 1
-  %2 = load float* %ix2, align 4
+  %2 = load float, float* %ix2, align 4
   %ix3 = getelementptr inbounds float, float* %b, i64 1
-  %3 = load float* %ix3, align 4
+  %3 = load float, float* %ix3, align 4
   %call1 = tail call float @llvm.copysign.f32(float %2, float %3) nounwind readnone
   %c1 = getelementptr inbounds float, float* %c, i64 1
   store float %call1, float* %c1, align 4
 
   %ix4 = getelementptr inbounds float, float* %a, i64 2
-  %4 = load float* %ix4, align 4
+  %4 = load float, float* %ix4, align 4
   %ix5 = getelementptr inbounds float, float* %b, i64 2
-  %5 = load float* %ix5, align 4
+  %5 = load float, float* %ix5, align 4
   %call2 = tail call float @llvm.copysign.f32(float %4, float %5) nounwind readnone
   %c2 = getelementptr inbounds float, float* %c, i64 2
   store float %call2, float* %c2, align 4
 
   %ix6 = getelementptr inbounds float, float* %a, i64 3
-  %6 = load float* %ix6, align 4
+  %6 = load float, float* %ix6, align 4
   %ix7 = getelementptr inbounds float, float* %b, i64 3
-  %7 = load float* %ix7, align 4
+  %7 = load float, float* %ix7, align 4
   %call3 = tail call float @llvm.copysign.f32(float %6, float %7) nounwind readnone
   %c3 = getelementptr inbounds float, float* %c, i64 3
   store float %call3, float* %c3, align 4
@@ -75,29 +75,29 @@ declare i32 @llvm.bswap.i32(i32) nounwind readnone
 
 define void @vec_bswap_i32(i32* %a, i32* %b, i32* %c) {
 entry:
-  %i0 = load i32* %a, align 4
-  %i1 = load i32* %b, align 4
+  %i0 = load i32, i32* %a, align 4
+  %i1 = load i32, i32* %b, align 4
   %add1 = add i32 %i0, %i1
   %call1 = tail call i32 @llvm.bswap.i32(i32 %add1) nounwind readnone
 
   %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 1
-  %i2 = load i32* %arrayidx2, align 4
+  %i2 = load i32, i32* %arrayidx2, align 4
   %arrayidx3 = getelementptr inbounds i32, i32* %b, i32 1
-  %i3 = load i32* %arrayidx3, align 4
+  %i3 = load i32, i32* %arrayidx3, align 4
   %add2 = add i32 %i2, %i3
   %call2 = tail call i32 @llvm.bswap.i32(i32 %add2) nounwind readnone
 
   %arrayidx4 = getelementptr inbounds i32, i32* %a, i32 2
-  %i4 = load i32* %arrayidx4, align 4
+  %i4 = load i32, i32* %arrayidx4, align 4
   %arrayidx5 = getelementptr inbounds i32, i32* %b, i32 2
-  %i5 = load i32* %arrayidx5, align 4
+  %i5 = load i32, i32* %arrayidx5, align 4
   %add3 = add i32 %i4, %i5
   %call3 = tail call i32 @llvm.bswap.i32(i32 %add3) nounwind readnone
 
   %arrayidx6 = getelementptr inbounds i32, i32* %a, i32 3
-  %i6 = load i32* %arrayidx6, align 4
+  %i6 = load i32, i32* %arrayidx6, align 4
   %arrayidx7 = getelementptr inbounds i32, i32* %b, i32 3
-  %i7 = load i32* %arrayidx7, align 4
+  %i7 = load i32, i32* %arrayidx7, align 4
   %add4 = add i32 %i6, %i7
   %call4 = tail call i32 @llvm.bswap.i32(i32 %add4) nounwind readnone
 
@@ -122,29 +122,29 @@ declare i32 @llvm.ctlz.i32(i32,i1) nounwind readnone
 
 define void @vec_ctlz_i32(i32* %a, i32* %b, i32* %c, i1) {
 entry:
-  %i0 = load i32* %a, align 4
-  %i1 = load i32* %b, align 4
+  %i0 = load i32, i32* %a, align 4
+  %i1 = load i32, i32* %b, align 4
   %add1 = add i32 %i0, %i1
   %call1 = tail call i32 @llvm.ctlz.i32(i32 %add1,i1 true) nounwind readnone
 
   %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 1
-  %i2 = load i32* %arrayidx2, align 4
+  %i2 = load i32, i32* %arrayidx2, align 4
   %arrayidx3 = getelementptr inbounds i32, i32* %b, i32 1
-  %i3 = load i32* %arrayidx3, align 4
+  %i3 = load i32, i32* %arrayidx3, align 4
   %add2 = add i32 %i2, %i3
   %call2 = tail call i32 @llvm.ctlz.i32(i32 %add2,i1 true) nounwind readnone
 
   %arrayidx4 = getelementptr inbounds i32, i32* %a, i32 2
-  %i4 = load i32* %arrayidx4, align 4
+  %i4 = load i32, i32* %arrayidx4, align 4
   %arrayidx5 = getelementptr inbounds i32, i32* %b, i32 2
-  %i5 = load i32* %arrayidx5, align 4
+  %i5 = load i32, i32* %arrayidx5, align 4
   %add3 = add i32 %i4, %i5
   %call3 = tail call i32 @llvm.ctlz.i32(i32 %add3,i1 true) nounwind readnone
 
   %arrayidx6 = getelementptr inbounds i32, i32* %a, i32 3
-  %i6 = load i32* %arrayidx6, align 4
+  %i6 = load i32, i32* %arrayidx6, align 4
   %arrayidx7 = getelementptr inbounds i32, i32* %b, i32 3
-  %i7 = load i32* %arrayidx7, align 4
+  %i7 = load i32, i32* %arrayidx7, align 4
   %add4 = add i32 %i6, %i7
   %call4 = tail call i32 @llvm.ctlz.i32(i32 %add4,i1 true) nounwind readnone
 
@@ -167,29 +167,29 @@ entry:
 
 define void @vec_ctlz_i32_neg(i32* %a, i32* %b, i32* %c, i1) {
 entry:
-  %i0 = load i32* %a, align 4
-  %i1 = load i32* %b, align 4
+  %i0 = load i32, i32* %a, align 4
+  %i1 = load i32, i32* %b, align 4
   %add1 = add i32 %i0, %i1
   %call1 = tail call i32 @llvm.ctlz.i32(i32 %add1,i1 true) nounwind readnone
 
   %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 1
-  %i2 = load i32* %arrayidx2, align 4
+  %i2 = load i32, i32* %arrayidx2, align 4
   %arrayidx3 = getelementptr inbounds i32, i32* %b, i32 1
-  %i3 = load i32* %arrayidx3, align 4
+  %i3 = load i32, i32* %arrayidx3, align 4
   %add2 = add i32 %i2, %i3
   %call2 = tail call i32 @llvm.ctlz.i32(i32 %add2,i1 false) nounwind readnone
 
   %arrayidx4 = getelementptr inbounds i32, i32* %a, i32 2
-  %i4 = load i32* %arrayidx4, align 4
+  %i4 = load i32, i32* %arrayidx4, align 4
   %arrayidx5 = getelementptr inbounds i32, i32* %b, i32 2
-  %i5 = load i32* %arrayidx5, align 4
+  %i5 = load i32, i32* %arrayidx5, align 4
   %add3 = add i32 %i4, %i5
   %call3 = tail call i32 @llvm.ctlz.i32(i32 %add3,i1 true) nounwind readnone
 
   %arrayidx6 = getelementptr inbounds i32, i32* %a, i32 3
-  %i6 = load i32* %arrayidx6, align 4
+  %i6 = load i32, i32* %arrayidx6, align 4
   %arrayidx7 = getelementptr inbounds i32, i32* %b, i32 3
-  %i7 = load i32* %arrayidx7, align 4
+  %i7 = load i32, i32* %arrayidx7, align 4
   %add4 = add i32 %i6, %i7
   %call4 = tail call i32 @llvm.ctlz.i32(i32 %add4,i1 false) nounwind readnone
 
@@ -212,29 +212,29 @@ declare i32 @llvm.cttz.i32(i32,i1) nounwind readnone
 
 define void @vec_cttz_i32(i32* %a, i32* %b, i32* %c, i1) {
 entry:
-  %i0 = load i32* %a, align 4
-  %i1 = load i32* %b, align 4
+  %i0 = load i32, i32* %a, align 4
+  %i1 = load i32, i32* %b, align 4
   %add1 = add i32 %i0, %i1
   %call1 = tail call i32 @llvm.cttz.i32(i32 %add1,i1 true) nounwind readnone
 
   %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 1
-  %i2 = load i32* %arrayidx2, align 4
+  %i2 = load i32, i32* %arrayidx2, align 4
   %arrayidx3 = getelementptr inbounds i32, i32* %b, i32 1
-  %i3 = load i32* %arrayidx3, align 4
+  %i3 = load i32, i32* %arrayidx3, align 4
   %add2 = add i32 %i2, %i3
   %call2 = tail call i32 @llvm.cttz.i32(i32 %add2,i1 true) nounwind readnone
 
   %arrayidx4 = getelementptr inbounds i32, i32* %a, i32 2
-  %i4 = load i32* %arrayidx4, align 4
+  %i4 = load i32, i32* %arrayidx4, align 4
   %arrayidx5 = getelementptr inbounds i32, i32* %b, i32 2
-  %i5 = load i32* %arrayidx5, align 4
+  %i5 = load i32, i32* %arrayidx5, align 4
   %add3 = add i32 %i4, %i5
   %call3 = tail call i32 @llvm.cttz.i32(i32 %add3,i1 true) nounwind readnone
 
   %arrayidx6 = getelementptr inbounds i32, i32* %a, i32 3
-  %i6 = load i32* %arrayidx6, align 4
+  %i6 = load i32, i32* %arrayidx6, align 4
   %arrayidx7 = getelementptr inbounds i32, i32* %b, i32 3
-  %i7 = load i32* %arrayidx7, align 4
+  %i7 = load i32, i32* %arrayidx7, align 4
   %add4 = add i32 %i6, %i7
   %call4 = tail call i32 @llvm.cttz.i32(i32 %add4,i1 true) nounwind readnone
 
@@ -257,29 +257,29 @@ entry:
 
 define void @vec_cttz_i32_neg(i32* %a, i32* %b, i32* %c, i1) {
 entry:
-  %i0 = load i32* %a, align 4
-  %i1 = load i32* %b, align 4
+  %i0 = load i32, i32* %a, align 4
+  %i1 = load i32, i32* %b, align 4
   %add1 = add i32 %i0, %i1
   %call1 = tail call i32 @llvm.cttz.i32(i32 %add1,i1 true) nounwind readnone
 
   %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 1
-  %i2 = load i32* %arrayidx2, align 4
+  %i2 = load i32, i32* %arrayidx2, align 4
   %arrayidx3 = getelementptr inbounds i32, i32* %b, i32 1
-  %i3 = load i32* %arrayidx3, align 4
+  %i3 = load i32, i32* %arrayidx3, align 4
   %add2 = add i32 %i2, %i3
   %call2 = tail call i32 @llvm.cttz.i32(i32 %add2,i1 false) nounwind readnone
 
   %arrayidx4 = getelementptr inbounds i32, i32* %a, i32 2
-  %i4 = load i32* %arrayidx4, align 4
+  %i4 = load i32, i32* %arrayidx4, align 4
   %arrayidx5 = getelementptr inbounds i32, i32* %b, i32 2
-  %i5 = load i32* %arrayidx5, align 4
+  %i5 = load i32, i32* %arrayidx5, align 4
   %add3 = add i32 %i4, %i5
   %call3 = tail call i32 @llvm.cttz.i32(i32 %add3,i1 true) nounwind readnone
 
   %arrayidx6 = getelementptr inbounds i32, i32* %a, i32 3
-  %i6 = load i32* %arrayidx6, align 4
+  %i6 = load i32, i32* %arrayidx6, align 4
   %arrayidx7 = getelementptr inbounds i32, i32* %b, i32 3
-  %i7 = load i32* %arrayidx7, align 4
+  %i7 = load i32, i32* %arrayidx7, align 4
   %add4 = add i32 %i6, %i7
   %call4 = tail call i32 @llvm.cttz.i32(i32 %add4,i1 false) nounwind readnone
 
@@ -300,29 +300,29 @@ entry:
 declare float @llvm.powi.f32(float, i32)
 define void @vec_powi_f32(float* %a, float* %b, float* %c, i32 %P) {
 entry:
-  %i0 = load float* %a, align 4
-  %i1 = load float* %b, align 4
+  %i0 = load float, float* %a, align 4
+  %i1 = load float, float* %b, align 4
   %add1 = fadd float %i0, %i1
   %call1 = tail call float @llvm.powi.f32(float %add1,i32 %P) nounwind readnone
 
   %arrayidx2 = getelementptr inbounds float, float* %a, i32 1
-  %i2 = load float* %arrayidx2, align 4
+  %i2 = load float, float* %arrayidx2, align 4
   %arrayidx3 = getelementptr inbounds float, float* %b, i32 1
-  %i3 = load float* %arrayidx3, align 4
+  %i3 = load float, float* %arrayidx3, align 4
   %add2 = fadd float %i2, %i3
   %call2 = tail call float @llvm.powi.f32(float %add2,i32 %P) nounwind readnone
 
   %arrayidx4 = getelementptr inbounds float, float* %a, i32 2
-  %i4 = load float* %arrayidx4, align 4
+  %i4 = load float, float* %arrayidx4, align 4
   %arrayidx5 = getelementptr inbounds float, float* %b, i32 2
-  %i5 = load float* %arrayidx5, align 4
+  %i5 = load float, float* %arrayidx5, align 4
   %add3 = fadd float %i4, %i5
   %call3 = tail call float @llvm.powi.f32(float %add3,i32 %P) nounwind readnone
 
   %arrayidx6 = getelementptr inbounds float, float* %a, i32 3
-  %i6 = load float* %arrayidx6, align 4
+  %i6 = load float, float* %arrayidx6, align 4
   %arrayidx7 = getelementptr inbounds float, float* %b, i32 3
-  %i7 = load float* %arrayidx7, align 4
+  %i7 = load float, float* %arrayidx7, align 4
   %add4 = fadd float %i6, %i7
   %call4 = tail call float @llvm.powi.f32(float %add4,i32 %P) nounwind readnone
 
@@ -346,29 +346,29 @@ entry:
 
 define void @vec_powi_f32_neg(float* %a, float* %b, float* %c, i32 %P, i32 %Q) {
 entry:
-  %i0 = load float* %a, align 4
-  %i1 = load float* %b, align 4
+  %i0 = load float, float* %a, align 4
+  %i1 = load float, float* %b, align 4
   %add1 = fadd float %i0, %i1
   %call1 = tail call float @llvm.powi.f32(float %add1,i32 %P) nounwind readnone
 
   %arrayidx2 = getelementptr inbounds float, float* %a, i32 1
-  %i2 = load float* %arrayidx2, align 4
+  %i2 = load float, float* %arrayidx2, align 4
   %arrayidx3 = getelementptr inbounds float, float* %b, i32 1
-  %i3 = load float* %arrayidx3, align 4
+  %i3 = load float, float* %arrayidx3, align 4
   %add2 = fadd float %i2, %i3
   %call2 = tail call float @llvm.powi.f32(float %add2,i32 %Q) nounwind readnone
 
   %arrayidx4 = getelementptr inbounds float, float* %a, i32 2
-  %i4 = load float* %arrayidx4, align 4
+  %i4 = load float, float* %arrayidx4, align 4
   %arrayidx5 = getelementptr inbounds float, float* %b, i32 2
-  %i5 = load float* %arrayidx5, align 4
+  %i5 = load float, float* %arrayidx5, align 4
   %add3 = fadd float %i4, %i5
   %call3 = tail call float @llvm.powi.f32(float %add3,i32 %P) nounwind readnone
 
   %arrayidx6 = getelementptr inbounds float, float* %a, i32 3
-  %i6 = load float* %arrayidx6, align 4
+  %i6 = load float, float* %arrayidx6, align 4
   %arrayidx7 = getelementptr inbounds float, float* %b, i32 3
-  %i7 = load float* %arrayidx7, align 4
+  %i7 = load float, float* %arrayidx7, align 4
   %add4 = fadd float %i6, %i7
   %call4 = tail call float @llvm.powi.f32(float %add4,i32 %Q) nounwind readnone
 
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/long_chains.ll b/llvm/test/Transforms/SLPVectorizer/X86/long_chains.ll
index cd9d59f5c34..f87dabf4c9f 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/long_chains.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/long_chains.ll
@@ -13,9 +13,9 @@ target triple = "x86_64-apple-macosx10.8.0"
 ; CHECK: ret
 define i32 @test(double* nocapture %A, i8* nocapture %B) {
 entry:
-  %0 = load i8* %B, align 1
+  %0 = load i8, i8* %B, align 1
   %arrayidx1 = getelementptr inbounds i8, i8* %B, i64 1
-  %1 = load i8* %arrayidx1, align 1
+  %1 = load i8, i8* %arrayidx1, align 1
   %add = add i8 %0, 3
   %add4 = add i8 %1, 3
   %conv6 = sitofp i8 %add to double
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/loopinvariant.ll b/llvm/test/Transforms/SLPVectorizer/X86/loopinvariant.ll
index c113d89140e..0c16c34a188 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/loopinvariant.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/loopinvariant.ll
@@ -19,42 +19,42 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %add1 = add nsw i32 %0, %n
   store i32 %add1, i32* %arrayidx, align 4
   %1 = or i64 %indvars.iv, 1
   %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 %1
-  %2 = load i32* %arrayidx4, align 4
+  %2 = load i32, i32* %arrayidx4, align 4
   %add5 = add nsw i32 %2, %n
   store i32 %add5, i32* %arrayidx4, align 4
   %3 = or i64 %indvars.iv, 2
   %arrayidx8 = getelementptr inbounds i32, i32* %A, i64 %3
-  %4 = load i32* %arrayidx8, align 4
+  %4 = load i32, i32* %arrayidx8, align 4
   %add9 = add nsw i32 %4, %n
   store i32 %add9, i32* %arrayidx8, align 4
   %5 = or i64 %indvars.iv, 3
   %arrayidx12 = getelementptr inbounds i32, i32* %A, i64 %5
-  %6 = load i32* %arrayidx12, align 4
+  %6 = load i32, i32* %arrayidx12, align 4
   %add13 = add nsw i32 %6, %n
   store i32 %add13, i32* %arrayidx12, align 4
   %7 = or i64 %indvars.iv, 4
   %arrayidx16 = getelementptr inbounds i32, i32* %A, i64 %7
-  %8 = load i32* %arrayidx16, align 4
+  %8 = load i32, i32* %arrayidx16, align 4
   %add17 = add nsw i32 %8, %n
   store i32 %add17, i32* %arrayidx16, align 4
   %9 = or i64 %indvars.iv, 5
   %arrayidx20 = getelementptr inbounds i32, i32* %A, i64 %9
-  %10 = load i32* %arrayidx20, align 4
+  %10 = load i32, i32* %arrayidx20, align 4
   %add21 = add nsw i32 %10, %n
   store i32 %add21, i32* %arrayidx20, align 4
   %11 = or i64 %indvars.iv, 6
   %arrayidx24 = getelementptr inbounds i32, i32* %A, i64 %11
-  %12 = load i32* %arrayidx24, align 4
+  %12 = load i32, i32* %arrayidx24, align 4
   %add25 = add nsw i32 %12, %n
   store i32 %add25, i32* %arrayidx24, align 4
   %13 = or i64 %indvars.iv, 7
   %arrayidx28 = getelementptr inbounds i32, i32* %A, i64 %13
-  %14 = load i32* %arrayidx28, align 4
+  %14 = load i32, i32* %arrayidx28, align 4
   %add29 = add nsw i32 %14, %n
   store i32 %add29, i32* %arrayidx28, align 4
   %indvars.iv.next = add i64 %indvars.iv, 8
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/metadata.ll b/llvm/test/Transforms/SLPVectorizer/X86/metadata.ll
index 0aa1d12c674..ebef6b53c6a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/metadata.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/metadata.ll
@@ -12,13 +12,13 @@ target triple = "x86_64-apple-macosx10.8.0"
 
 define void @test1(double* %a, double* %b, double* %c) {
 entry:
-  %i0 = load double* %a, align 8, !tbaa !4
-  %i1 = load double* %b, align 8, !tbaa !4
+  %i0 = load double, double* %a, align 8, !tbaa !4
+  %i1 = load double, double* %b, align 8, !tbaa !4
   %mul = fmul double %i0, %i1, !fpmath !0
   %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double* %arrayidx3, align 8, !tbaa !4
+  %i3 = load double, double* %arrayidx3, align 8, !tbaa !4
   %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double* %arrayidx4, align 8, !tbaa !4
+  %i4 = load double, double* %arrayidx4, align 8, !tbaa !4
   %mul5 = fmul double %i3, %i4, !fpmath !0
   store double %mul, double* %c, align 8, !tbaa !4
   %arrayidx5 = getelementptr inbounds double, double* %c, i64 1
@@ -35,13 +35,13 @@ entry:
 
 define void @test2(double* %a, double* %b, i8* %e) {
 entry:
-  %i0 = load double* %a, align 8, !tbaa !4
-  %i1 = load double* %b, align 8, !tbaa !4
+  %i0 = load double, double* %a, align 8, !tbaa !4
+  %i1 = load double, double* %b, align 8, !tbaa !4
   %mul = fmul double %i0, %i1, !fpmath !1
   %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double* %arrayidx3, align 8, !tbaa !4
+  %i3 = load double, double* %arrayidx3, align 8, !tbaa !4
   %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double* %arrayidx4, align 8, !tbaa !4
+  %i4 = load double, double* %arrayidx4, align 8, !tbaa !4
   %mul5 = fmul double %i3, %i4, !fpmath !1
   %c = bitcast i8* %e to double*
   store double %mul, double* %c, align 8, !tbaa !4
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/multi_block.ll b/llvm/test/Transforms/SLPVectorizer/X86/multi_block.ll
index 91f9ad53d33..993054a090c 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/multi_block.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/multi_block.ll
@@ -25,9 +25,9 @@ target triple = "x86_64-apple-macosx10.7.0"
 ;CHECK: store <2 x double>
 ;CHECK: ret
 define i32 @bar(double* nocapture %A, i32 %d) {
-  %1 = load double* %A, align 8
+  %1 = load double, double* %A, align 8
   %2 = getelementptr inbounds double, double* %A, i64 1
-  %3 = load double* %2, align 8
+  %3 = load double, double* %2, align 8
   %4 = fptrunc double %1 to float
   %5 = fptrunc double %3 to float
   %6 = icmp eq i32 %d, 0
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/multi_user.ll b/llvm/test/Transforms/SLPVectorizer/X86/multi_user.ll
index 93204e92982..3197f6db266 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/multi_user.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/multi_user.ll
@@ -20,27 +20,27 @@ target triple = "x86_64-apple-macosx10.7.0"
 define i32 @foo(i32* nocapture %A, i32 %n) {
   %1 = mul nsw i32 %n, 5
   %2 = add nsw i32 %1, 7
-  %3 = load i32* %A, align 4
+  %3 = load i32, i32* %A, align 4
   %4 = add nsw i32 %2, %3
   store i32 %4, i32* %A, align 4
   %5 = add nsw i32 %1, 8
   %6 = getelementptr inbounds i32, i32* %A, i64 1
-  %7 = load i32* %6, align 4
+  %7 = load i32, i32* %6, align 4
   %8 = add nsw i32 %5, %7
   store i32 %8, i32* %6, align 4
   %9 = add nsw i32 %1, 9
   %10 = getelementptr inbounds i32, i32* %A, i64 2
-  %11 = load i32* %10, align 4
+  %11 = load i32, i32* %10, align 4
   %12 = add nsw i32 %9, %11
   store i32 %12, i32* %10, align 4
   %13 = add nsw i32 %1, 10
   %14 = getelementptr inbounds i32, i32* %A, i64 3
-  %15 = load i32* %14, align 4
+  %15 = load i32, i32* %14, align 4
   %16 = add nsw i32 %13, %15
   store i32 %16, i32* %14, align 4
   %17 = add nsw i32 %1, 11
   %18 = getelementptr inbounds i32, i32* %A, i64 4
-  %19 = load i32* %18, align 4
+  %19 = load i32, i32* %18, align 4
   %20 = add nsw i32 %17, %19
   store i32 %20, i32* %18, align 4
   ret i32 undef
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/odd_store.ll b/llvm/test/Transforms/SLPVectorizer/X86/odd_store.ll
index 6c8beb8d45c..25f049adf47 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/odd_store.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/odd_store.ll
@@ -19,14 +19,14 @@ target triple = "x86_64-apple-macosx10.8.0"
 ;CHECK: ret
 define i32 @foo(i8* noalias nocapture %A, float* noalias nocapture %B, float %T) {
   %1 = getelementptr inbounds float, float* %B, i64 10
-  %2 = load float* %1, align 4
+  %2 = load float, float* %1, align 4
   %3 = fmul float %2, %T
   %4 = fpext float %3 to double
   %5 = fadd double %4, 4.000000e+00
   %6 = fptosi double %5 to i8
   store i8 %6, i8* %A, align 1
   %7 = getelementptr inbounds float, float* %B, i64 11
-  %8 = load float* %7, align 4
+  %8 = load float, float* %7, align 4
   %9 = fmul float %8, %T
   %10 = fpext float %9 to double
   %11 = fadd double %10, 5.000000e+00
@@ -34,7 +34,7 @@ define i32 @foo(i8* noalias nocapture %A, float* noalias nocapture %B, float %T)
   %13 = getelementptr inbounds i8, i8* %A, i64 1
   store i8 %12, i8* %13, align 1
   %14 = getelementptr inbounds float, float* %B, i64 12
-  %15 = load float* %14, align 4
+  %15 = load float, float* %14, align 4
   %16 = fmul float %15, %T
   %17 = fpext float %16 to double
   %18 = fadd double %17, 6.000000e+00
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/operandorder.ll b/llvm/test/Transforms/SLPVectorizer/X86/operandorder.ll
index 929c3c21c25..4a88dbf0de4 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/operandorder.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/operandorder.ll
@@ -14,8 +14,8 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 define void @shuffle_operands1(double * noalias %from, double * noalias %to,
                                double %v1, double %v2) {
   %from_1 = getelementptr double, double *%from, i64 1
-  %v0_1 = load double * %from
-  %v0_2 = load double * %from_1
+  %v0_1 = load double , double * %from
+  %v0_2 = load double , double * %from_1
   %v1_1 = fadd double %v0_1, %v1
   %v1_2 = fadd double %v2, %v0_2
   %to_2 = getelementptr double, double * %to, i64 1
@@ -36,8 +36,8 @@ br label %lp
 lp:
   %p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
   %from_1 = getelementptr double, double *%from, i64 1
-  %v0_1 = load double * %from
-  %v0_2 = load double * %from_1
+  %v0_1 = load double , double * %from
+  %v0_2 = load double , double * %from_1
   %v1_1 = fadd double %v0_1, %p
   %v1_2 = fadd double %v0_1, %v0_2
   %to_2 = getelementptr double, double * %to, i64 1
@@ -61,8 +61,8 @@ br label %lp
 lp:
   %p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
   %from_1 = getelementptr double, double *%from, i64 1
-  %v0_1 = load double * %from
-  %v0_2 = load double * %from_1
+  %v0_1 = load double , double * %from
+  %v0_2 = load double , double * %from_1
   %v1_1 = fadd double %p, %v0_1
   %v1_2 = fadd double %v0_2, %v0_1
   %to_2 = getelementptr double, double * %to, i64 1
@@ -86,8 +86,8 @@ br label %lp
 lp:
   %p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
   %from_1 = getelementptr double, double *%from, i64 1
-  %v0_1 = load double * %from
-  %v0_2 = load double * %from_1
+  %v0_1 = load double , double * %from
+  %v0_2 = load double , double * %from_1
   %v1_1 = fadd double %p, %v0_1
   %v1_2 = fadd double %v0_1, %v0_2
   %to_2 = getelementptr double, double * %to, i64 1
@@ -112,8 +112,8 @@ br label %lp
 lp:
   %p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
   %from_1 = getelementptr double, double *%from, i64 1
-  %v0_1 = load double * %from
-  %v0_2 = load double * %from_1
+  %v0_1 = load double , double * %from
+  %v0_2 = load double , double * %from_1
   %v1_1 = fadd double %v0_2, %v0_1
   %v1_2 = fadd double %p, %v0_1
   %to_2 = getelementptr double, double * %to, i64 1
@@ -137,8 +137,8 @@ br label %lp
 lp:
   %p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
   %from_1 = getelementptr double, double *%from, i64 1
-  %v0_1 = load double * %from
-  %v0_2 = load double * %from_1
+  %v0_1 = load double , double * %from
+  %v0_2 = load double , double * %from_1
   %v1_1 = fadd double %v0_1, %v0_2
   %v1_2 = fadd double %p, %v0_1
   %to_2 = getelementptr double, double * %to, i64 1
@@ -163,8 +163,8 @@ br label %lp
 lp:
   %p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
   %from_1 = getelementptr double, double *%from, i64 1
-  %v0_1 = load double * %from
-  %v0_2 = load double * %from_1
+  %v0_1 = load double , double * %from
+  %v0_2 = load double , double * %from_1
   %v1_1 = fadd double %v0_1, %v0_2
   %v1_2 = fadd double %v0_1, %p
   %to_2 = getelementptr double, double * %to, i64 1
@@ -181,7 +181,7 @@ ext:
 
 ; CHECK-LABEL: good_load_order
 
-; CHECK: %[[V1:[0-9]+]] = load <4 x float>*
+; CHECK: %[[V1:[0-9]+]] = load <4 x float>, <4 x float>*
 ; CHECK: %[[V2:[0-9]+]] = insertelement <4 x float> undef, float %1, i32 0
 ; CHECK: %[[V3:[0-9]+]] = shufflevector <4 x float> %[[V2]], <4 x float> %[[V1]], <4 x i32> <i32 0, i32 4, i32 5, i32 6>
 ; CHECK:                = fmul <4 x float> %[[V1]], %[[V3]]
@@ -193,7 +193,7 @@ entry:
   br label %for.cond1.preheader
 
 for.cond1.preheader:
-  %0 = load float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), align 16
+  %0 = load float, float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), align 16
   br label %for.body3
 
 for.body3:
@@ -201,28 +201,28 @@ for.body3:
   %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]
   %2 = add nsw i64 %indvars.iv, 1
   %arrayidx = getelementptr inbounds [32000 x float], [32000 x float]* @a, i64 0, i64 %2
-  %3 = load float* %arrayidx, align 4
+  %3 = load float, float* %arrayidx, align 4
   %arrayidx5 = getelementptr inbounds [32000 x float], [32000 x float]* @a, i64 0, i64 %indvars.iv
   %mul6 = fmul float %3, %1
   store float %mul6, float* %arrayidx5, align 4
   %4 = add nsw i64 %indvars.iv, 2
   %arrayidx11 = getelementptr inbounds [32000 x float], [32000 x float]* @a, i64 0, i64 %4
-  %5 = load float* %arrayidx11, align 4
+  %5 = load float, float* %arrayidx11, align 4
   %mul15 = fmul float %5, %3
   store float %mul15, float* %arrayidx, align 4
   %6 = add nsw i64 %indvars.iv, 3
   %arrayidx21 = getelementptr inbounds [32000 x float], [32000 x float]* @a, i64 0, i64 %6
-  %7 = load float* %arrayidx21, align 4
+  %7 = load float, float* %arrayidx21, align 4
   %mul25 = fmul float %7, %5
   store float %mul25, float* %arrayidx11, align 4
   %8 = add nsw i64 %indvars.iv, 4
   %arrayidx31 = getelementptr inbounds [32000 x float], [32000 x float]* @a, i64 0, i64 %8
-  %9 = load float* %arrayidx31, align 4
+  %9 = load float, float* %arrayidx31, align 4
   %mul35 = fmul float %9, %7
   store float %mul35, float* %arrayidx21, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 5
   %arrayidx41 = getelementptr inbounds [32000 x float], [32000 x float]* @a, i64 0, i64 %indvars.iv.next
-  %10 = load float* %arrayidx41, align 4
+  %10 = load float, float* %arrayidx41, align 4
   %mul45 = fmul float %10, %9
   store float %mul45, float* %arrayidx31, align 4
   %11 = trunc i64 %indvars.iv.next to i32
@@ -238,17 +238,17 @@ for.end:
 ;  c[1] = b[1]+a[1]; // swapped b[1] and a[1]
 
 ; CHECK-LABEL: load_reorder_double
-; CHECK: load <2 x double>*
+; CHECK: load <2 x double>, <2 x double>*
 ; CHECK: fadd <2 x double>
 define void @load_reorder_double(double* nocapture %c, double* noalias nocapture readonly %a, double* noalias nocapture readonly %b){
-  %1 = load double* %a
-  %2 = load double* %b
+  %1 = load double, double* %a
+  %2 = load double, double* %b
   %3 = fadd double %1, %2
   store double %3, double* %c
   %4 = getelementptr inbounds double, double* %b, i64 1
-  %5 = load double* %4
+  %5 = load double, double* %4
   %6 = getelementptr inbounds double, double* %a, i64 1
-  %7 = load double* %6
+  %7 = load double, double* %6
   %8 = fadd double %5, %7
   %9 = getelementptr inbounds double, double* %c, i64 1
   store double %8, double* %9
@@ -262,31 +262,31 @@ define void @load_reorder_double(double* nocapture %c, double* noalias nocapture
 ;  c[3] = a[3]+b[3];
 
 ; CHECK-LABEL: load_reorder_float
-; CHECK: load <4 x float>*
+; CHECK: load <4 x float>, <4 x float>*
 ; CHECK: fadd <4 x float>
 define void @load_reorder_float(float* nocapture %c, float* noalias nocapture readonly %a, float* noalias nocapture readonly %b){
-  %1 = load float* %a
-  %2 = load float* %b
+  %1 = load float, float* %a
+  %2 = load float, float* %b
   %3 = fadd float %1, %2
   store float %3, float* %c
   %4 = getelementptr inbounds float, float* %b, i64 1
-  %5 = load float* %4
+  %5 = load float, float* %4
   %6 = getelementptr inbounds float, float* %a, i64 1
-  %7 = load float* %6
+  %7 = load float, float* %6
   %8 = fadd float %5, %7
   %9 = getelementptr inbounds float, float* %c, i64 1
   store float %8, float* %9
   %10 = getelementptr inbounds float, float* %a, i64 2
-  %11 = load float* %10
+  %11 = load float, float* %10
   %12 = getelementptr inbounds float, float* %b, i64 2
-  %13 = load float* %12
+  %13 = load float, float* %12
   %14 = fadd float %11, %13
   %15 = getelementptr inbounds float, float* %c, i64 2
   store float %14, float* %15
   %16 = getelementptr inbounds float, float* %a, i64 3
-  %17 = load float* %16
+  %17 = load float, float* %16
   %18 = getelementptr inbounds float, float* %b, i64 3
-  %19 = load float* %18
+  %19 = load float, float* %18
   %20 = fadd float %17, %19
   %21 = getelementptr inbounds float, float* %c, i64 3
   store float %20, float* %21
@@ -300,43 +300,43 @@ define void @load_reorder_float(float* nocapture %c, float* noalias nocapture re
 ; a[3] = (b[3]+c[3])+d[3];
 
 ; CHECK-LABEL: opcode_reorder
-; CHECK: load <4 x float>*
+; CHECK: load <4 x float>, <4 x float>*
 ; CHECK: fadd <4 x float>
 define void @opcode_reorder(float* noalias nocapture %a, float* noalias nocapture readonly %b, 
                             float* noalias nocapture readonly %c,float* noalias nocapture readonly %d){
-  %1 = load float* %b
-  %2 = load float* %c
+  %1 = load float, float* %b
+  %2 = load float, float* %c
   %3 = fadd float %1, %2
-  %4 = load float* %d
+  %4 = load float, float* %d
   %5 = fadd float %3, %4
   store float %5, float* %a
   %6 = getelementptr inbounds float, float* %d, i64 1
-  %7 = load float* %6
+  %7 = load float, float* %6
   %8 = getelementptr inbounds float, float* %b, i64 1
-  %9 = load float* %8
+  %9 = load float, float* %8
   %10 = getelementptr inbounds float, float* %c, i64 1
-  %11 = load float* %10
+  %11 = load float, float* %10
   %12 = fadd float %9, %11
   %13 = fadd float %7, %12
   %14 = getelementptr inbounds float, float* %a, i64 1
   store float %13, float* %14
   %15 = getelementptr inbounds float, float* %b, i64 2
-  %16 = load float* %15
+  %16 = load float, float* %15
   %17 = getelementptr inbounds float, float* %c, i64 2
-  %18 = load float* %17
+  %18 = load float, float* %17
   %19 = fadd float %16, %18
   %20 = getelementptr inbounds float, float* %d, i64 2
-  %21 = load float* %20
+  %21 = load float, float* %20
   %22 = fadd float %19, %21
   %23 = getelementptr inbounds float, float* %a, i64 2
   store float %22, float* %23
   %24 = getelementptr inbounds float, float* %b, i64 3
-  %25 = load float* %24
+  %25 = load float, float* %24
   %26 = getelementptr inbounds float, float* %c, i64 3
-  %27 = load float* %26
+  %27 = load float, float* %26
   %28 = fadd float %25, %27
   %29 = getelementptr inbounds float, float* %d, i64 3
-  %30 = load float* %29
+  %30 = load float, float* %29
   %31 = fadd float %28, %30
   %32 = getelementptr inbounds float, float* %a, i64 3
   store float %31, float* %32
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/opt.ll b/llvm/test/Transforms/SLPVectorizer/X86/opt.ll
index d6954caadc5..824e9992af0 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/opt.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/opt.ll
@@ -15,13 +15,13 @@ target triple = "x86_64-apple-macosx10.8.0"
 
 define void @test1(double* %a, double* %b, double* %c) {
 entry:
-  %i0 = load double* %a, align 8
-  %i1 = load double* %b, align 8
+  %i0 = load double, double* %a, align 8
+  %i1 = load double, double* %b, align 8
   %mul = fmul double %i0, %i1
   %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double* %arrayidx3, align 8
+  %i3 = load double, double* %arrayidx3, align 8
   %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double* %arrayidx4, align 8
+  %i4 = load double, double* %arrayidx4, align 8
   %mul5 = fmul double %i3, %i4
   store double %mul, double* %c, align 8
   %arrayidx5 = getelementptr inbounds double, double* %c, i64 1
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/ordering.ll b/llvm/test/Transforms/SLPVectorizer/X86/ordering.ll
index d2ecd4546dd..0fa72c94c27 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/ordering.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/ordering.ll
@@ -5,7 +5,7 @@ target triple = "x86_64-apple-macosx10.8.0"
 
 define void @updateModelQPFrame(i32 %m_Bits) {
 entry:
-  %0 = load double* undef, align 8
+  %0 = load double, double* undef, align 8
   %mul = fmul double undef, %0
   %mul2 = fmul double undef, %mul
   %mul4 = fmul double %0, %mul2
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi.ll
index 7654577c365..ef94467f509 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/phi.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/phi.ll
@@ -30,9 +30,9 @@ entry:
 
 if.else:                                          ; preds = %entry
   %arrayidx = getelementptr inbounds double, double* %A, i64 10
-  %0 = load double* %arrayidx, align 8
+  %0 = load double, double* %arrayidx, align 8
   %arrayidx1 = getelementptr inbounds double, double* %A, i64 11
-  %1 = load double* %arrayidx1, align 8
+  %1 = load double, double* %arrayidx1, align 8
   br label %if.end
 
 if.end:                                           ; preds = %entry, %if.else
@@ -70,8 +70,8 @@ if.end:                                           ; preds = %entry, %if.else
 define i32 @foo2(double* noalias nocapture %B, double* noalias nocapture %A, i32 %n, i32 %m) #0 {
 entry:
   %arrayidx = getelementptr inbounds double, double* %A, i64 1
-  %0 = load double* %arrayidx, align 8
-  %1 = load double* %A, align 8
+  %0 = load double, double* %arrayidx, align 8
+  %1 = load double, double* %A, align 8
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
@@ -123,15 +123,15 @@ for.end:                                          ; preds = %for.body
 
 define float @foo3(float* nocapture readonly %A) #0 {
 entry:
-  %0 = load float* %A, align 4
+  %0 = load float, float* %A, align 4
   %arrayidx1 = getelementptr inbounds float, float* %A, i64 1
-  %1 = load float* %arrayidx1, align 4
+  %1 = load float, float* %arrayidx1, align 4
   %arrayidx2 = getelementptr inbounds float, float* %A, i64 2
-  %2 = load float* %arrayidx2, align 4
+  %2 = load float, float* %arrayidx2, align 4
   %arrayidx3 = getelementptr inbounds float, float* %A, i64 3
-  %3 = load float* %arrayidx3, align 4
+  %3 = load float, float* %arrayidx3, align 4
   %arrayidx4 = getelementptr inbounds float, float* %A, i64 4
-  %4 = load float* %arrayidx4, align 4
+  %4 = load float, float* %arrayidx4, align 4
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
@@ -149,17 +149,17 @@ for.body:                                         ; preds = %for.body, %entry
   %add11 = fadd float %G.053, %mul10
   %7 = add nsw i64 %indvars.iv, 2
   %arrayidx14 = getelementptr inbounds float, float* %A, i64 %7
-  %8 = load float* %arrayidx14, align 4
+  %8 = load float, float* %arrayidx14, align 4
   %mul15 = fmul float %8, 9.000000e+00
   %add16 = fadd float %B.054, %mul15
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 3
   %arrayidx19 = getelementptr inbounds float, float* %A, i64 %indvars.iv.next
-  %9 = load float* %arrayidx19, align 4
+  %9 = load float, float* %arrayidx19, align 4
   %mul20 = fmul float %9, 1.000000e+01
   %add21 = fadd float %Y.055, %mul20
   %10 = add nsw i64 %indvars.iv, 4
   %arrayidx24 = getelementptr inbounds float, float* %A, i64 %10
-  %11 = load float* %arrayidx24, align 4
+  %11 = load float, float* %arrayidx24, align 4
   %mul25 = fmul float %11, 1.100000e+01
   %add26 = fadd float %P.056, %mul25
   %12 = trunc i64 %indvars.iv.next to i32
@@ -215,22 +215,22 @@ define void @test(x86_fp80* %i1, x86_fp80* %i2, x86_fp80* %o) {
 ; We disable the vectorization of x86_fp80 for now. 
 
 entry:
-  %i1.0 = load x86_fp80* %i1, align 16
+  %i1.0 = load x86_fp80, x86_fp80* %i1, align 16
   %i1.gep1 = getelementptr x86_fp80, x86_fp80* %i1, i64 1
-  %i1.1 = load x86_fp80* %i1.gep1, align 16
-; CHECK: load x86_fp80*
-; CHECK: load x86_fp80*
+  %i1.1 = load x86_fp80, x86_fp80* %i1.gep1, align 16
+; CHECK: load x86_fp80, x86_fp80*
+; CHECK: load x86_fp80, x86_fp80*
 ; CHECK-NOT: insertelement <2 x x86_fp80>
 ; CHECK-NOT: insertelement <2 x x86_fp80>
   br i1 undef, label %then, label %end
 
 then:
   %i2.gep0 = getelementptr inbounds x86_fp80, x86_fp80* %i2, i64 0
-  %i2.0 = load x86_fp80* %i2.gep0, align 16
+  %i2.0 = load x86_fp80, x86_fp80* %i2.gep0, align 16
   %i2.gep1 = getelementptr inbounds x86_fp80, x86_fp80* %i2, i64 1
-  %i2.1 = load x86_fp80* %i2.gep1, align 16
-; CHECK: load x86_fp80*
-; CHECK: load x86_fp80*
+  %i2.1 = load x86_fp80, x86_fp80* %i2.gep1, align 16
+; CHECK: load x86_fp80, x86_fp80*
+; CHECK: load x86_fp80, x86_fp80*
 ; CHECK-NOT: insertelement <2 x x86_fp80>
 ; CHECK-NOT: insertelement <2 x x86_fp80>
   br label %end
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi3.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi3.ll
index fd8d3613720..61628301aec 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/phi3.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/phi3.ll
@@ -12,7 +12,7 @@ declare %struct.GPar.0.16.26* @Rf_gpptr(...)
 define void @Rf_GReset() {
 entry:
   %sub = fsub double -0.000000e+00, undef
-  %0 = load double* @d, align 8
+  %0 = load double, double* @d, align 8
   %sub1 = fsub double -0.000000e+00, %0
   br i1 icmp eq (%struct.GPar.0.16.26* (...)* inttoptr (i64 115 to %struct.GPar.0.16.26* (...)*), %struct.GPar.0.16.26* (...)* @Rf_gpptr), label %if.then, label %if.end7
 
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi_overalignedtype.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi_overalignedtype.ll
index 3da83f9502f..fa08effcd64 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/phi_overalignedtype.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/phi_overalignedtype.ll
@@ -12,22 +12,22 @@ define void @test(double* %i1, double* %i2, double* %o) {
 ; size is less than the alignment, and through various different GEP formations.
 
 entry:
-  %i1.0 = load double* %i1, align 16
+  %i1.0 = load double, double* %i1, align 16
   %i1.gep1 = getelementptr double, double* %i1, i64 1
-  %i1.1 = load double* %i1.gep1, align 16
-; CHECK: load double*
-; CHECK: load double*
+  %i1.1 = load double, double* %i1.gep1, align 16
+; CHECK: load double, double*
+; CHECK: load double, double*
 ; CHECK: insertelement <2 x double>
 ; CHECK: insertelement <2 x double>
   br i1 undef, label %then, label %end
 
 then:
   %i2.gep0 = getelementptr inbounds double, double* %i2, i64 0
-  %i2.0 = load double* %i2.gep0, align 16
+  %i2.0 = load double, double* %i2.gep0, align 16
   %i2.gep1 = getelementptr inbounds double, double* %i2, i64 1
-  %i2.1 = load double* %i2.gep1, align 16
-; CHECK: load double*
-; CHECK: load double*
+  %i2.1 = load double, double* %i2.gep1, align 16
+; CHECK: load double, double*
+; CHECK: load double, double*
 ; CHECK: insertelement <2 x double>
 ; CHECK: insertelement <2 x double>
   br label %end
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/powof2div.ll b/llvm/test/Transforms/SLPVectorizer/X86/powof2div.ll
index 02512b33795..a97b870f3f4 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/powof2div.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/powof2div.ll
@@ -4,36 +4,36 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
 ;CHECK-LABEL: @powof2div(
-;CHECK: load <4 x i32>*
+;CHECK: load <4 x i32>, <4 x i32>*
 ;CHECK: add nsw <4 x i32>
 ;CHECK: sdiv <4 x i32>
 define void @powof2div(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* noalias nocapture readonly %c){
 entry:
-  %0 = load i32* %b, align 4
-  %1 = load i32* %c, align 4
+  %0 = load i32, i32* %b, align 4
+  %1 = load i32, i32* %c, align 4
   %add = add nsw i32 %1, %0
   %div = sdiv i32 %add, 2
   store i32 %div, i32* %a, align 4
   %arrayidx3 = getelementptr inbounds i32, i32* %b, i64 1
-  %2 = load i32* %arrayidx3, align 4
+  %2 = load i32, i32* %arrayidx3, align 4
   %arrayidx4 = getelementptr inbounds i32, i32* %c, i64 1
-  %3 = load i32* %arrayidx4, align 4
+  %3 = load i32, i32* %arrayidx4, align 4
   %add5 = add nsw i32 %3, %2
   %div6 = sdiv i32 %add5, 2
   %arrayidx7 = getelementptr inbounds i32, i32* %a, i64 1
   store i32 %div6, i32* %arrayidx7, align 4
   %arrayidx8 = getelementptr inbounds i32, i32* %b, i64 2
-  %4 = load i32* %arrayidx8, align 4
+  %4 = load i32, i32* %arrayidx8, align 4
   %arrayidx9 = getelementptr inbounds i32, i32* %c, i64 2
-  %5 = load i32* %arrayidx9, align 4
+  %5 = load i32, i32* %arrayidx9, align 4
   %add10 = add nsw i32 %5, %4
   %div11 = sdiv i32 %add10, 2
   %arrayidx12 = getelementptr inbounds i32, i32* %a, i64 2
   store i32 %div11, i32* %arrayidx12, align 4
   %arrayidx13 = getelementptr inbounds i32, i32* %b, i64 3
-  %6 = load i32* %arrayidx13, align 4
+  %6 = load i32, i32* %arrayidx13, align 4
   %arrayidx14 = getelementptr inbounds i32, i32* %c, i64 3
-  %7 = load i32* %arrayidx14, align 4
+  %7 = load i32, i32* %arrayidx14, align 4
   %add15 = add nsw i32 %7, %6
   %div16 = sdiv i32 %add15, 2
   %arrayidx17 = getelementptr inbounds i32, i32* %a, i64 3
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr16628.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr16628.ll
index 3f9d775eeeb..c22ed34d33d 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr16628.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr16628.ll
@@ -11,10 +11,10 @@ target triple = "x86_64-apple-macosx10.9.0"
 define void @f() {
 entry:
   %call = tail call i32 (...)* @g()
-  %0 = load i32* @c, align 4
+  %0 = load i32, i32* @c, align 4
   %lnot = icmp eq i32 %0, 0
   %lnot.ext = zext i1 %lnot to i32
-  %1 = load i16* @a, align 2
+  %1 = load i16, i16* @a, align 2
   %lnot2 = icmp eq i16 %1, 0
   %lnot.ext3 = zext i1 %lnot2 to i32
   %or = or i32 %lnot.ext3, %lnot.ext
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr16899.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr16899.ll
index 5fe038eb814..0de14ec3585 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr16899.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr16899.ll
@@ -7,10 +7,10 @@ target triple = "i386--netbsd"
 ; Function Attrs: noreturn nounwind readonly
 define i32 @fn1() #0 {
 entry:
-  %0 = load i32** @a, align 4, !tbaa !4
-  %1 = load i32* %0, align 4, !tbaa !5
+  %0 = load i32*, i32** @a, align 4, !tbaa !4
+  %1 = load i32, i32* %0, align 4, !tbaa !5
   %arrayidx1 = getelementptr inbounds i32, i32* %0, i32 1
-  %2 = load i32* %arrayidx1, align 4, !tbaa !5
+  %2 = load i32, i32* %arrayidx1, align 4, !tbaa !5
   br label %do.body
 
 do.body:                                          ; preds = %do.body, %entry
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr19657.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr19657.ll
index f5e24674e8f..a687aec7610 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr19657.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr19657.ll
@@ -3,7 +3,7 @@
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-;CHECK: load <2 x double>*
+;CHECK: load <2 x double>, <2 x double>*
 ;CHECK: fadd <2 x double>
 ;CHECK: store <2 x double>
 
@@ -11,60 +11,60 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @foo(double* %x) #0 {
   %1 = alloca double*, align 8
   store double* %x, double** %1, align 8
-  %2 = load double** %1, align 8
+  %2 = load double*, double** %1, align 8
   %3 = getelementptr inbounds double, double* %2, i64 0
-  %4 = load double* %3, align 8
-  %5 = load double** %1, align 8
+  %4 = load double, double* %3, align 8
+  %5 = load double*, double** %1, align 8
   %6 = getelementptr inbounds double, double* %5, i64 0
-  %7 = load double* %6, align 8
+  %7 = load double, double* %6, align 8
   %8 = fadd double %4, %7
-  %9 = load double** %1, align 8
+  %9 = load double*, double** %1, align 8
   %10 = getelementptr inbounds double, double* %9, i64 0
-  %11 = load double* %10, align 8
+  %11 = load double, double* %10, align 8
   %12 = fadd double %8, %11
-  %13 = load double** %1, align 8
+  %13 = load double*, double** %1, align 8
   %14 = getelementptr inbounds double, double* %13, i64 0
   store double %12, double* %14, align 8
-  %15 = load double** %1, align 8
+  %15 = load double*, double** %1, align 8
   %16 = getelementptr inbounds double, double* %15, i64 1
-  %17 = load double* %16, align 8
-  %18 = load double** %1, align 8
+  %17 = load double, double* %16, align 8
+  %18 = load double*, double** %1, align 8
   %19 = getelementptr inbounds double, double* %18, i64 1
-  %20 = load double* %19, align 8
+  %20 = load double, double* %19, align 8
   %21 = fadd double %17, %20
-  %22 = load double** %1, align 8
+  %22 = load double*, double** %1, align 8
   %23 = getelementptr inbounds double, double* %22, i64 1
-  %24 = load double* %23, align 8
+  %24 = load double, double* %23, align 8
   %25 = fadd double %21, %24
-  %26 = load double** %1, align 8
+  %26 = load double*, double** %1, align 8
   %27 = getelementptr inbounds double, double* %26, i64 1
   store double %25, double* %27, align 8
-  %28 = load double** %1, align 8
+  %28 = load double*, double** %1, align 8
   %29 = getelementptr inbounds double, double* %28, i64 2
-  %30 = load double* %29, align 8
-  %31 = load double** %1, align 8
+  %30 = load double, double* %29, align 8
+  %31 = load double*, double** %1, align 8
   %32 = getelementptr inbounds double, double* %31, i64 2
-  %33 = load double* %32, align 8
+  %33 = load double, double* %32, align 8
   %34 = fadd double %30, %33
-  %35 = load double** %1, align 8
+  %35 = load double*, double** %1, align 8
   %36 = getelementptr inbounds double, double* %35, i64 2
-  %37 = load double* %36, align 8
+  %37 = load double, double* %36, align 8
   %38 = fadd double %34, %37
-  %39 = load double** %1, align 8
+  %39 = load double*, double** %1, align 8
   %40 = getelementptr inbounds double, double* %39, i64 2
   store double %38, double* %40, align 8
-  %41 = load double** %1, align 8
+  %41 = load double*, double** %1, align 8
   %42 = getelementptr inbounds double, double* %41, i64 3
-  %43 = load double* %42, align 8
-  %44 = load double** %1, align 8
+  %43 = load double, double* %42, align 8
+  %44 = load double*, double** %1, align 8
   %45 = getelementptr inbounds double, double* %44, i64 3
-  %46 = load double* %45, align 8
+  %46 = load double, double* %45, align 8
   %47 = fadd double %43, %46
-  %48 = load double** %1, align 8
+  %48 = load double*, double** %1, align 8
   %49 = getelementptr inbounds double, double* %48, i64 3
-  %50 = load double* %49, align 8
+  %50 = load double, double* %49, align 8
   %51 = fadd double %47, %50
-  %52 = load double** %1, align 8
+  %52 = load double*, double** %1, align 8
   %53 = getelementptr inbounds double, double* %52, i64 3
   store double %51, double* %53, align 8
   ret void
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll b/llvm/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll
index 49ddd9bf8d6..a3b0c8ff027 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll
@@ -15,10 +15,10 @@ define void @exact(i32* %x) {
   %idx3 = getelementptr inbounds i32, i32* %x, i64 2
   %idx4 = getelementptr inbounds i32, i32* %x, i64 3
 
-  %load1 = load i32* %idx1, align 4
-  %load2 = load i32* %idx2, align 4
-  %load3 = load i32* %idx3, align 4
-  %load4 = load i32* %idx4, align 4
+  %load1 = load i32, i32* %idx1, align 4
+  %load2 = load i32, i32* %idx2, align 4
+  %load3 = load i32, i32* %idx3, align 4
+  %load4 = load i32, i32* %idx4, align 4
 
   %op1 = lshr exact i32 %load1, 1
   %op2 = lshr exact i32 %load2, 1
@@ -41,10 +41,10 @@ define void @not_exact(i32* %x) {
   %idx3 = getelementptr inbounds i32, i32* %x, i64 2
   %idx4 = getelementptr inbounds i32, i32* %x, i64 3
 
-  %load1 = load i32* %idx1, align 4
-  %load2 = load i32* %idx2, align 4
-  %load3 = load i32* %idx3, align 4
-  %load4 = load i32* %idx4, align 4
+  %load1 = load i32, i32* %idx1, align 4
+  %load2 = load i32, i32* %idx2, align 4
+  %load3 = load i32, i32* %idx3, align 4
+  %load4 = load i32, i32* %idx4, align 4
 
   %op1 = lshr exact i32 %load1, 1
   %op2 = lshr i32 %load2, 1
@@ -67,10 +67,10 @@ define void @nsw(i32* %x) {
   %idx3 = getelementptr inbounds i32, i32* %x, i64 2
   %idx4 = getelementptr inbounds i32, i32* %x, i64 3
 
-  %load1 = load i32* %idx1, align 4
-  %load2 = load i32* %idx2, align 4
-  %load3 = load i32* %idx3, align 4
-  %load4 = load i32* %idx4, align 4
+  %load1 = load i32, i32* %idx1, align 4
+  %load2 = load i32, i32* %idx2, align 4
+  %load3 = load i32, i32* %idx3, align 4
+  %load4 = load i32, i32* %idx4, align 4
 
   %op1 = add nsw i32 %load1, 1
   %op2 = add nsw i32 %load2, 1
@@ -93,10 +93,10 @@ define void @not_nsw(i32* %x) {
   %idx3 = getelementptr inbounds i32, i32* %x, i64 2
   %idx4 = getelementptr inbounds i32, i32* %x, i64 3
 
-  %load1 = load i32* %idx1, align 4
-  %load2 = load i32* %idx2, align 4
-  %load3 = load i32* %idx3, align 4
-  %load4 = load i32* %idx4, align 4
+  %load1 = load i32, i32* %idx1, align 4
+  %load2 = load i32, i32* %idx2, align 4
+  %load3 = load i32, i32* %idx3, align 4
+  %load4 = load i32, i32* %idx4, align 4
 
   %op1 = add nsw i32 %load1, 1
   %op2 = add nsw i32 %load2, 1
@@ -119,10 +119,10 @@ define void @nuw(i32* %x) {
   %idx3 = getelementptr inbounds i32, i32* %x, i64 2
   %idx4 = getelementptr inbounds i32, i32* %x, i64 3
 
-  %load1 = load i32* %idx1, align 4
-  %load2 = load i32* %idx2, align 4
-  %load3 = load i32* %idx3, align 4
-  %load4 = load i32* %idx4, align 4
+  %load1 = load i32, i32* %idx1, align 4
+  %load2 = load i32, i32* %idx2, align 4
+  %load3 = load i32, i32* %idx3, align 4
+  %load4 = load i32, i32* %idx4, align 4
 
   %op1 = add nuw i32 %load1, 1
   %op2 = add nuw i32 %load2, 1
@@ -145,10 +145,10 @@ define void @not_nuw(i32* %x) {
   %idx3 = getelementptr inbounds i32, i32* %x, i64 2
   %idx4 = getelementptr inbounds i32, i32* %x, i64 3
 
-  %load1 = load i32* %idx1, align 4
-  %load2 = load i32* %idx2, align 4
-  %load3 = load i32* %idx3, align 4
-  %load4 = load i32* %idx4, align 4
+  %load1 = load i32, i32* %idx1, align 4
+  %load2 = load i32, i32* %idx2, align 4
+  %load3 = load i32, i32* %idx3, align 4
+  %load4 = load i32, i32* %idx4, align 4
 
   %op1 = add nuw i32 %load1, 1
   %op2 = add i32 %load2, 1
@@ -171,10 +171,10 @@ define void @nnan(float* %x) {
   %idx3 = getelementptr inbounds float, float* %x, i64 2
   %idx4 = getelementptr inbounds float, float* %x, i64 3
 
-  %load1 = load float* %idx1, align 4
-  %load2 = load float* %idx2, align 4
-  %load3 = load float* %idx3, align 4
-  %load4 = load float* %idx4, align 4
+  %load1 = load float, float* %idx1, align 4
+  %load2 = load float, float* %idx2, align 4
+  %load3 = load float, float* %idx3, align 4
+  %load4 = load float, float* %idx4, align 4
 
   %op1 = fadd fast nnan float %load1, 1.0
   %op2 = fadd nnan ninf float %load2, 1.0
@@ -197,10 +197,10 @@ define void @not_nnan(float* %x) {
   %idx3 = getelementptr inbounds float, float* %x, i64 2
   %idx4 = getelementptr inbounds float, float* %x, i64 3
 
-  %load1 = load float* %idx1, align 4
-  %load2 = load float* %idx2, align 4
-  %load3 = load float* %idx3, align 4
-  %load4 = load float* %idx4, align 4
+  %load1 = load float, float* %idx1, align 4
+  %load2 = load float, float* %idx2, align 4
+  %load3 = load float, float* %idx3, align 4
+  %load4 = load float, float* %idx4, align 4
 
   %op1 = fadd nnan float %load1, 1.0
   %op2 = fadd ninf float %load2, 1.0
@@ -223,10 +223,10 @@ define void @only_fast(float* %x) {
   %idx3 = getelementptr inbounds float, float* %x, i64 2
   %idx4 = getelementptr inbounds float, float* %x, i64 3
 
-  %load1 = load float* %idx1, align 4
-  %load2 = load float* %idx2, align 4
-  %load3 = load float* %idx3, align 4
-  %load4 = load float* %idx4, align 4
+  %load1 = load float, float* %idx1, align 4
+  %load2 = load float, float* %idx2, align 4
+  %load3 = load float, float* %idx3, align 4
+  %load4 = load float, float* %idx4, align 4
 
   %op1 = fadd fast nnan float %load1, 1.0
   %op2 = fadd fast nnan ninf float %load2, 1.0
@@ -249,10 +249,10 @@ define void @only_arcp(float* %x) {
   %idx3 = getelementptr inbounds float, float* %x, i64 2
   %idx4 = getelementptr inbounds float, float* %x, i64 3
 
-  %load1 = load float* %idx1, align 4
-  %load2 = load float* %idx2, align 4
-  %load3 = load float* %idx3, align 4
-  %load4 = load float* %idx4, align 4
+  %load1 = load float, float* %idx1, align 4
+  %load2 = load float, float* %idx2, align 4
+  %load3 = load float, float* %idx3, align 4
+  %load4 = load float, float* %idx4, align 4
 
   %op1 = fadd fast float %load1, 1.0
   %op2 = fadd fast float %load2, 1.0
@@ -276,10 +276,10 @@ define void @addsub_all_nsw(i32* %x) {
   %idx3 = getelementptr inbounds i32, i32* %x, i64 2
   %idx4 = getelementptr inbounds i32, i32* %x, i64 3
 
-  %load1 = load i32* %idx1, align 4
-  %load2 = load i32* %idx2, align 4
-  %load3 = load i32* %idx3, align 4
-  %load4 = load i32* %idx4, align 4
+  %load1 = load i32, i32* %idx1, align 4
+  %load2 = load i32, i32* %idx2, align 4
+  %load3 = load i32, i32* %idx3, align 4
+  %load4 = load i32, i32* %idx4, align 4
 
   %op1 = add nsw i32 %load1, 1
   %op2 = sub nsw i32 %load2, 1
@@ -303,10 +303,10 @@ define void @addsub_some_nsw(i32* %x) {
   %idx3 = getelementptr inbounds i32, i32* %x, i64 2
   %idx4 = getelementptr inbounds i32, i32* %x, i64 3
 
-  %load1 = load i32* %idx1, align 4
-  %load2 = load i32* %idx2, align 4
-  %load3 = load i32* %idx3, align 4
-  %load4 = load i32* %idx4, align 4
+  %load1 = load i32, i32* %idx1, align 4
+  %load2 = load i32, i32* %idx2, align 4
+  %load3 = load i32, i32* %idx3, align 4
+  %load4 = load i32, i32* %idx4, align 4
 
   %op1 = add nsw i32 %load1, 1
   %op2 = sub nsw i32 %load2, 1
@@ -330,10 +330,10 @@ define void @addsub_no_nsw(i32* %x) {
   %idx3 = getelementptr inbounds i32, i32* %x, i64 2
   %idx4 = getelementptr inbounds i32, i32* %x, i64 3
 
-  %load1 = load i32* %idx1, align 4
-  %load2 = load i32* %idx2, align 4
-  %load3 = load i32* %idx3, align 4
-  %load4 = load i32* %idx4, align 4
+  %load1 = load i32, i32* %idx1, align 4
+  %load2 = load i32, i32* %idx2, align 4
+  %load3 = load i32, i32* %idx3, align 4
+  %load4 = load i32, i32* %idx4, align 4
 
   %op1 = add i32 %load1, 1
   %op2 = sub nsw i32 %load2, 1
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll
index 1dc63563c02..4c5f1266336 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll
@@ -24,11 +24,11 @@ for.body:                                         ; preds = %entry, %for.body
   %sum.014 = phi double [ %add6, %for.body ], [ 0.000000e+00, %entry ]
   %mul = shl nsw i32 %i.015, 1
   %arrayidx = getelementptr inbounds double, double* %A, i32 %mul
-  %0 = load double* %arrayidx, align 4
+  %0 = load double, double* %arrayidx, align 4
   %mul1 = fmul double %0, 7.000000e+00
   %add12 = or i32 %mul, 1
   %arrayidx3 = getelementptr inbounds double, double* %A, i32 %add12
-  %1 = load double* %arrayidx3, align 4
+  %1 = load double, double* %arrayidx3, align 4
   %mul4 = fmul double %1, 7.000000e+00
   %add5 = fadd double %mul1, %mul4
   %add6 = fadd double %sum.014, %add5
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll
index cd3175c41d5..507a61aa16f 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll
@@ -14,12 +14,12 @@ define double @foo(double* nocapture %D) {
   %sum.01 = phi double [ 0.000000e+00, %0 ], [ %9, %1 ]
   %2 = shl nsw i32 %i.02, 1
   %3 = getelementptr inbounds double, double* %D, i32 %2
-  %4 = load double* %3, align 4
+  %4 = load double, double* %3, align 4
   %A4 = fmul double %4, %4
   %A42 = fmul double %A4, %A4
   %5 = or i32 %2, 1
   %6 = getelementptr inbounds double, double* %D, i32 %5
-  %7 = load double* %6, align 4
+  %7 = load double, double* %6, align 4
   %A7 = fmul double %7, %7
   %A72 = fmul double %A7, %A7
   %8 = fadd double %A42, %A72
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/return.ll b/llvm/test/Transforms/SLPVectorizer/X86/return.ll
index 13a6cf4df66..242edf5885f 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/return.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/return.ll
@@ -13,17 +13,17 @@ target triple = "x86_64--linux-gnu"
 ; }
 
 ; CHECK-LABEL: @return1
-; CHECK: %0 = load <2 x double>*
-; CHECK: %1 = load <2 x double>*
+; CHECK: %0 = load <2 x double>, <2 x double>*
+; CHECK: %1 = load <2 x double>, <2 x double>*
 ; CHECK: %2 = fadd <2 x double>
 
 define double @return1() {
 entry:
-  %a0 = load double* getelementptr inbounds ([4 x double]* @a, i32 0, i32 0), align 8
-  %b0 = load double* getelementptr inbounds ([4 x double]* @b, i32 0, i32 0), align 8
+  %a0 = load double, double* getelementptr inbounds ([4 x double]* @a, i32 0, i32 0), align 8
+  %b0 = load double, double* getelementptr inbounds ([4 x double]* @b, i32 0, i32 0), align 8
   %add0 = fadd double %a0, %b0
-  %a1 = load double* getelementptr inbounds ([4 x double]* @a, i32 0, i32 1), align 8
-  %b1 = load double* getelementptr inbounds ([4 x double]* @b, i32 0, i32 1), align 8
+  %a1 = load double, double* getelementptr inbounds ([4 x double]* @a, i32 0, i32 1), align 8
+  %b1 = load double, double* getelementptr inbounds ([4 x double]* @b, i32 0, i32 1), align 8
   %add1 = fadd double %a1, %b1
   %add2 = fadd double %add0, %add1
   ret double %add2
@@ -34,20 +34,20 @@ entry:
 ; }
 
 ; CHECK-LABEL: @return2
-; CHECK: %1 = load <2 x double>*
-; CHECK: %3 = load <2 x double>* %2
+; CHECK: %1 = load <2 x double>, <2 x double>*
+; CHECK: %3 = load <2 x double>, <2 x double>* %2
 ; CHECK: %4 = fadd <2 x double> %1, %3
 
 define double @return2(double* nocapture readonly %x) {
 entry:
-  %x0 = load double* %x, align 4
+  %x0 = load double, double* %x, align 4
   %arrayidx1 = getelementptr inbounds double, double* %x, i32 2
-  %x2 = load double* %arrayidx1, align 4
+  %x2 = load double, double* %arrayidx1, align 4
   %add3 = fadd double %x0, %x2
   %arrayidx2 = getelementptr inbounds double, double* %x, i32 1
-  %x1 = load double* %arrayidx2, align 4
+  %x1 = load double, double* %arrayidx2, align 4
   %arrayidx3 = getelementptr inbounds double, double* %x, i32 3
-  %x3 = load double* %arrayidx3, align 4
+  %x3 = load double, double* %arrayidx3, align 4
   %add4 = fadd double %x1, %x3
   %add5 = fadd double %add3, %add4
   ret double %add5
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/rgb_phi.ll b/llvm/test/Transforms/SLPVectorizer/X86/rgb_phi.ll
index 2a3cc6dc40f..0bdb7dab172 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/rgb_phi.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/rgb_phi.ll
@@ -33,11 +33,11 @@ target triple = "i386-apple-macosx10.9.0"
 
 define float @foo(float* nocapture readonly %A) {
 entry:
-  %0 = load float* %A, align 4
+  %0 = load float, float* %A, align 4
   %arrayidx1 = getelementptr inbounds float, float* %A, i64 1
-  %1 = load float* %arrayidx1, align 4
+  %1 = load float, float* %arrayidx1, align 4
   %arrayidx2 = getelementptr inbounds float, float* %A, i64 2
-  %2 = load float* %arrayidx2, align 4
+  %2 = load float, float* %arrayidx2, align 4
   br label %for.body
 
 for.body:                                         ; preds = %for.body.for.body_crit_edge, %entry
@@ -50,12 +50,12 @@ for.body:                                         ; preds = %for.body.for.body_c
   %add4 = fadd float %R.030, %mul
   %4 = add nsw i64 %indvars.iv, 1
   %arrayidx7 = getelementptr inbounds float, float* %A, i64 %4
-  %5 = load float* %arrayidx7, align 4
+  %5 = load float, float* %arrayidx7, align 4
   %mul8 = fmul float %5, 8.000000e+00
   %add9 = fadd float %G.031, %mul8
   %6 = add nsw i64 %indvars.iv, 2
   %arrayidx12 = getelementptr inbounds float, float* %A, i64 %6
-  %7 = load float* %arrayidx12, align 4
+  %7 = load float, float* %arrayidx12, align 4
   %mul13 = fmul float %7, 9.000000e+00
   %add14 = fadd float %B.032, %mul13
   %indvars.iv.next = add i64 %indvars.iv, 3
@@ -65,7 +65,7 @@ for.body:                                         ; preds = %for.body.for.body_c
 
 for.body.for.body_crit_edge:                      ; preds = %for.body
   %arrayidx3.phi.trans.insert = getelementptr inbounds float, float* %A, i64 %indvars.iv.next
-  %.pre = load float* %arrayidx3.phi.trans.insert, align 4
+  %.pre = load float, float* %arrayidx3.phi.trans.insert, align 4
   br label %for.body
 
 for.end:                                          ; preds = %for.body
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/saxpy.ll b/llvm/test/Transforms/SLPVectorizer/X86/saxpy.ll
index da2654ad4bd..a9ca093c0cd 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/saxpy.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/saxpy.ll
@@ -10,34 +10,34 @@ target triple = "x86_64-apple-macosx10.8.0"
 
 define void @SAXPY(i32* noalias nocapture %x, i32* noalias nocapture %y, i32 %a, i64 %i) {
   %1 = getelementptr inbounds i32, i32* %x, i64 %i
-  %2 = load i32* %1, align 4
+  %2 = load i32, i32* %1, align 4
   %3 = mul nsw i32 %2, %a
   %4 = getelementptr inbounds i32, i32* %y, i64 %i
-  %5 = load i32* %4, align 4
+  %5 = load i32, i32* %4, align 4
   %6 = add nsw i32 %3, %5
   store i32 %6, i32* %1, align 4
   %7 = add i64 %i, 1
   %8 = getelementptr inbounds i32, i32* %x, i64 %7
-  %9 = load i32* %8, align 4
+  %9 = load i32, i32* %8, align 4
   %10 = mul nsw i32 %9, %a
   %11 = getelementptr inbounds i32, i32* %y, i64 %7
-  %12 = load i32* %11, align 4
+  %12 = load i32, i32* %11, align 4
   %13 = add nsw i32 %10, %12
   store i32 %13, i32* %8, align 4
   %14 = add i64 %i, 2
   %15 = getelementptr inbounds i32, i32* %x, i64 %14
-  %16 = load i32* %15, align 4
+  %16 = load i32, i32* %15, align 4
   %17 = mul nsw i32 %16, %a
   %18 = getelementptr inbounds i32, i32* %y, i64 %14
-  %19 = load i32* %18, align 4
+  %19 = load i32, i32* %18, align 4
   %20 = add nsw i32 %17, %19
   store i32 %20, i32* %15, align 4
   %21 = add i64 %i, 3
   %22 = getelementptr inbounds i32, i32* %x, i64 %21
-  %23 = load i32* %22, align 4
+  %23 = load i32, i32* %22, align 4
   %24 = mul nsw i32 %23, %a
   %25 = getelementptr inbounds i32, i32* %y, i64 %21
-  %26 = load i32* %25, align 4
+  %26 = load i32, i32* %25, align 4
   %27 = add nsw i32 %24, %26
   store i32 %27, i32* %22, align 4
   ret void
@@ -48,13 +48,13 @@ define void @SAXPY_crash(i32* noalias nocapture %x, i32* noalias nocapture %y, i
   %1 = add i64 %i, 1
   %2 = getelementptr inbounds i32, i32* %x, i64 %1
   %3 = getelementptr inbounds i32, i32* %y, i64 %1
-  %4 = load i32* %3, align 4
+  %4 = load i32, i32* %3, align 4
   %5 = add nsw i32 undef, %4
   store i32 %5, i32* %2, align 4
   %6 = add i64 %i, 2
   %7 = getelementptr inbounds i32, i32* %x, i64 %6
   %8 = getelementptr inbounds i32, i32* %y, i64 %6
-  %9 = load i32* %8, align 4
+  %9 = load i32, i32* %8, align 4
   %10 = add nsw i32 undef, %9
   store i32 %10, i32* %7, align 4
   ret void
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/scheduling.ll
index 9e23a6a42bf..33bdc6a6b04 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/scheduling.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/scheduling.ll
@@ -25,40 +25,40 @@ for.body:                                         ; preds = %for.body, %entry
   %a.088 = phi i32 [ 0, %entry ], [ %add52, %for.body ]
   %1 = shl i64 %indvars.iv, 3
   %arrayidx = getelementptr inbounds i32, i32* %diff, i64 %1
-  %2 = load i32* %arrayidx, align 4
+  %2 = load i32, i32* %arrayidx, align 4
   %3 = or i64 %1, 4
   %arrayidx2 = getelementptr inbounds i32, i32* %diff, i64 %3
-  %4 = load i32* %arrayidx2, align 4
+  %4 = load i32, i32* %arrayidx2, align 4
   %add3 = add nsw i32 %4, %2
   %arrayidx6 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 0
   store i32 %add3, i32* %arrayidx6, align 16
   %add10 = add nsw i32 %add3, %a.088
   %5 = or i64 %1, 1
   %arrayidx13 = getelementptr inbounds i32, i32* %diff, i64 %5
-  %6 = load i32* %arrayidx13, align 4
+  %6 = load i32, i32* %arrayidx13, align 4
   %7 = or i64 %1, 5
   %arrayidx16 = getelementptr inbounds i32, i32* %diff, i64 %7
-  %8 = load i32* %arrayidx16, align 4
+  %8 = load i32, i32* %arrayidx16, align 4
   %add17 = add nsw i32 %8, %6
   %arrayidx20 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 1
   store i32 %add17, i32* %arrayidx20, align 4
   %add24 = add nsw i32 %add10, %add17
   %9 = or i64 %1, 2
   %arrayidx27 = getelementptr inbounds i32, i32* %diff, i64 %9
-  %10 = load i32* %arrayidx27, align 4
+  %10 = load i32, i32* %arrayidx27, align 4
   %11 = or i64 %1, 6
   %arrayidx30 = getelementptr inbounds i32, i32* %diff, i64 %11
-  %12 = load i32* %arrayidx30, align 4
+  %12 = load i32, i32* %arrayidx30, align 4
   %add31 = add nsw i32 %12, %10
   %arrayidx34 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 2
   store i32 %add31, i32* %arrayidx34, align 8
   %add38 = add nsw i32 %add24, %add31
   %13 = or i64 %1, 3
   %arrayidx41 = getelementptr inbounds i32, i32* %diff, i64 %13
-  %14 = load i32* %arrayidx41, align 4
+  %14 = load i32, i32* %arrayidx41, align 4
   %15 = or i64 %1, 7
   %arrayidx44 = getelementptr inbounds i32, i32* %diff, i64 %15
-  %16 = load i32* %arrayidx44, align 4
+  %16 = load i32, i32* %arrayidx44, align 4
   %add45 = add nsw i32 %16, %14
   %arrayidx48 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 3
   store i32 %add45, i32* %arrayidx48, align 4
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/simple-loop.ll b/llvm/test/Transforms/SLPVectorizer/X86/simple-loop.ll
index ccb165fec66..c9bb88497ac 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/simple-loop.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/simple-loop.ll
@@ -13,16 +13,16 @@ define i32 @rollable(i32* noalias nocapture %in, i32* noalias nocapture %out, i6
   %2 = shl i64 %i.019, 2
   %3 = getelementptr inbounds i32, i32* %in, i64 %2
 ;CHECK:load <4 x i32>
-  %4 = load i32* %3, align 4
+  %4 = load i32, i32* %3, align 4
   %5 = or i64 %2, 1
   %6 = getelementptr inbounds i32, i32* %in, i64 %5
-  %7 = load i32* %6, align 4
+  %7 = load i32, i32* %6, align 4
   %8 = or i64 %2, 2
   %9 = getelementptr inbounds i32, i32* %in, i64 %8
-  %10 = load i32* %9, align 4
+  %10 = load i32, i32* %9, align 4
   %11 = or i64 %2, 3
   %12 = getelementptr inbounds i32, i32* %in, i64 %11
-  %13 = load i32* %12, align 4
+  %13 = load i32, i32* %12, align 4
 ;CHECK:mul <4 x i32>
   %14 = mul i32 %4, 7
 ;CHECK:add <4 x i32>
@@ -62,16 +62,16 @@ define i32 @unrollable(i32* %in, i32* %out, i64 %n) nounwind ssp uwtable {
   %i.019 = phi i64 [ %26, %.lr.ph ], [ 0, %0 ]
   %2 = shl i64 %i.019, 2
   %3 = getelementptr inbounds i32, i32* %in, i64 %2
-  %4 = load i32* %3, align 4
+  %4 = load i32, i32* %3, align 4
   %5 = or i64 %2, 1
   %6 = getelementptr inbounds i32, i32* %in, i64 %5
-  %7 = load i32* %6, align 4
+  %7 = load i32, i32* %6, align 4
   %8 = or i64 %2, 2
   %9 = getelementptr inbounds i32, i32* %in, i64 %8
-  %10 = load i32* %9, align 4
+  %10 = load i32, i32* %9, align 4
   %11 = or i64 %2, 3
   %12 = getelementptr inbounds i32, i32* %in, i64 %11
-  %13 = load i32* %12, align 4
+  %13 = load i32, i32* %12, align 4
   %14 = mul i32 %4, 7
   %15 = add i32 %14, 7
   %16 = mul i32 %7, 7
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/simplebb.ll b/llvm/test/Transforms/SLPVectorizer/X86/simplebb.ll
index 83c0e82e990..a5d9ad9685c 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/simplebb.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/simplebb.ll
@@ -9,13 +9,13 @@ target triple = "x86_64-apple-macosx10.8.0"
 ; CHECK: ret
 define void @test1(double* %a, double* %b, double* %c) {
 entry:
-  %i0 = load double* %a, align 8
-  %i1 = load double* %b, align 8
+  %i0 = load double, double* %a, align 8
+  %i1 = load double, double* %b, align 8
   %mul = fmul double %i0, %i1
   %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double* %arrayidx3, align 8
+  %i3 = load double, double* %arrayidx3, align 8
   %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double* %arrayidx4, align 8
+  %i4 = load double, double* %arrayidx4, align 8
   %mul5 = fmul double %i3, %i4
   store double %mul, double* %c, align 8
   %arrayidx5 = getelementptr inbounds double, double* %c, i64 1
@@ -29,13 +29,13 @@ entry:
 ; CHECK: ret
 define void @test2(double* %a, double* %b, i8* %e) {
 entry:
-  %i0 = load double* %a, align 8
-  %i1 = load double* %b, align 8
+  %i0 = load double, double* %a, align 8
+  %i1 = load double, double* %b, align 8
   %mul = fmul double %i0, %i1
   %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double* %arrayidx3, align 8
+  %i3 = load double, double* %arrayidx3, align 8
   %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double* %arrayidx4, align 8
+  %i4 = load double, double* %arrayidx4, align 8
   %mul5 = fmul double %i3, %i4
   %c = bitcast i8* %e to double*
   store double %mul, double* %c, align 8
@@ -52,13 +52,13 @@ entry:
 ; CHECK: ret
 define void @test_volatile_load(double* %a, double* %b, double* %c) {
 entry:
-  %i0 = load volatile double* %a, align 8
-  %i1 = load volatile double* %b, align 8
+  %i0 = load volatile double, double* %a, align 8
+  %i1 = load volatile double, double* %b, align 8
   %mul = fmul double %i0, %i1
   %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double* %arrayidx3, align 8
+  %i3 = load double, double* %arrayidx3, align 8
   %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double* %arrayidx4, align 8
+  %i4 = load double, double* %arrayidx4, align 8
   %mul5 = fmul double %i3, %i4
   store double %mul, double* %c, align 8
   %arrayidx5 = getelementptr inbounds double, double* %c, i64 1
@@ -72,13 +72,13 @@ entry:
 ; CHECK: ret
 define void @test_volatile_store(double* %a, double* %b, double* %c) {
 entry:
-  %i0 = load double* %a, align 8
-  %i1 = load double* %b, align 8
+  %i0 = load double, double* %a, align 8
+  %i1 = load double, double* %b, align 8
   %mul = fmul double %i0, %i1
   %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double* %arrayidx3, align 8
+  %i3 = load double, double* %arrayidx3, align 8
   %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double* %arrayidx4, align 8
+  %i4 = load double, double* %arrayidx4, align 8
   %mul5 = fmul double %i3, %i4
   store volatile double %mul, double* %c, align 8
   %arrayidx5 = getelementptr inbounds double, double* %c, i64 1
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll b/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll
index cbce6877a86..6c93222ef93 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll
@@ -17,10 +17,10 @@ for.body:                                         ; preds = %entry, %for.body
   %i.015 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
   %dst.addr.014 = phi double* [ %add.ptr4, %for.body ], [ %dst, %entry ]
   %src.addr.013 = phi double* [ %add.ptr, %for.body ], [ %src, %entry ]
-  %0 = load double* %src.addr.013, align 8
+  %0 = load double, double* %src.addr.013, align 8
   store double %0, double* %dst.addr.014, align 8
   %arrayidx2 = getelementptr inbounds double, double* %src.addr.013, i64 1
-  %1 = load double* %arrayidx2, align 8
+  %1 = load double, double* %arrayidx2, align 8
   %arrayidx3 = getelementptr inbounds double, double* %dst.addr.014, i64 1
   store double %1, double* %arrayidx3, align 8
   %add.ptr = getelementptr inbounds double, double* %src.addr.013, i64 %i.015
@@ -47,18 +47,18 @@ for.body:                                         ; preds = %entry, %for.body
   %i.023 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
   %dst.addr.022 = phi float* [ %add.ptr8, %for.body ], [ %dst, %entry ]
   %src.addr.021 = phi float* [ %add.ptr, %for.body ], [ %src, %entry ]
-  %0 = load float* %src.addr.021, align 4
+  %0 = load float, float* %src.addr.021, align 4
   store float %0, float* %dst.addr.022, align 4
   %arrayidx2 = getelementptr inbounds float, float* %src.addr.021, i64 1
-  %1 = load float* %arrayidx2, align 4
+  %1 = load float, float* %arrayidx2, align 4
   %arrayidx3 = getelementptr inbounds float, float* %dst.addr.022, i64 1
   store float %1, float* %arrayidx3, align 4
   %arrayidx4 = getelementptr inbounds float, float* %src.addr.021, i64 2
-  %2 = load float* %arrayidx4, align 4
+  %2 = load float, float* %arrayidx4, align 4
   %arrayidx5 = getelementptr inbounds float, float* %dst.addr.022, i64 2
   store float %2, float* %arrayidx5, align 4
   %arrayidx6 = getelementptr inbounds float, float* %src.addr.021, i64 3
-  %3 = load float* %arrayidx6, align 4
+  %3 = load float, float* %arrayidx6, align 4
   %arrayidx7 = getelementptr inbounds float, float* %dst.addr.022, i64 3
   store float %3, float* %arrayidx7, align 4
   %add.ptr = getelementptr inbounds float, float* %src.addr.021, i64 %i.023
@@ -85,10 +85,10 @@ for.body:                                         ; preds = %entry, %for.body
   %i.015 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
   %dst.addr.014 = phi double* [ %add.ptr4, %for.body ], [ %dst, %entry ]
   %src.addr.013 = phi double* [ %add.ptr, %for.body ], [ %src, %entry ]
-  %0 = load double* %src.addr.013, align 8
+  %0 = load double, double* %src.addr.013, align 8
   store double %0, double* %dst.addr.014, align 8
   %arrayidx2 = getelementptr inbounds double, double* %src.addr.013, i64 2
-  %1 = load double* %arrayidx2, align 8
+  %1 = load double, double* %arrayidx2, align 8
   %arrayidx3 = getelementptr inbounds double, double* %dst.addr.014, i64 1 
   store double %1, double* %arrayidx3, align 8
   %add.ptr = getelementptr inbounds double, double* %src.addr.013, i64 %i.015
@@ -115,18 +115,18 @@ for.body:                                         ; preds = %entry, %for.body
   %i.023 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
   %dst.addr.022 = phi float* [ %add.ptr8, %for.body ], [ %dst, %entry ]
   %src.addr.021 = phi float* [ %add.ptr, %for.body ], [ %src, %entry ]
-  %0 = load float* %src.addr.021, align 4
+  %0 = load float, float* %src.addr.021, align 4
   store float %0, float* %dst.addr.022, align 4
   %arrayidx2 = getelementptr inbounds float, float* %src.addr.021, i64 4 
-  %1 = load float* %arrayidx2, align 4
+  %1 = load float, float* %arrayidx2, align 4
   %arrayidx3 = getelementptr inbounds float, float* %dst.addr.022, i64 1
   store float %1, float* %arrayidx3, align 4
   %arrayidx4 = getelementptr inbounds float, float* %src.addr.021, i64 2
-  %2 = load float* %arrayidx4, align 4
+  %2 = load float, float* %arrayidx4, align 4
   %arrayidx5 = getelementptr inbounds float, float* %dst.addr.022, i64 2
   store float %2, float* %arrayidx5, align 4
   %arrayidx6 = getelementptr inbounds float, float* %src.addr.021, i64 3
-  %3 = load float* %arrayidx6, align 4
+  %3 = load float, float* %arrayidx6, align 4
   %arrayidx7 = getelementptr inbounds float, float* %dst.addr.022, i64 3
   store float %3, float* %arrayidx7, align 4
   %add.ptr = getelementptr inbounds float, float* %src.addr.021, i64 %i.023
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/unreachable.ll b/llvm/test/Transforms/SLPVectorizer/X86/unreachable.ll
index b1d23e9cc0e..f29f69d7e82 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/unreachable.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/unreachable.ll
@@ -12,15 +12,15 @@ entry:
 
 bb1:                                    ; an unreachable block
   %t3 = getelementptr inbounds i32, i32* %x, i64 4
-  %t4 = load i32* %t3, align 4
+  %t4 = load i32, i32* %t3, align 4
   %t5 = getelementptr inbounds i32, i32* %x, i64 5
-  %t6 = load i32* %t5, align 4
+  %t6 = load i32, i32* %t5, align 4
   %bad = fadd float %bad, 0.000000e+00  ; <- an instruction with self dependency,
                                         ;    but legal in unreachable code
   %t7 = getelementptr inbounds i32, i32* %x, i64 6
-  %t8 = load i32* %t7, align 4
+  %t8 = load i32, i32* %t7, align 4
   %t9 = getelementptr inbounds i32, i32* %x, i64 7
-  %t10 = load i32* %t9, align 4
+  %t10 = load i32, i32* %t9, align 4
   br label %bb2
 
 bb2:
diff --git a/llvm/test/Transforms/SLPVectorizer/XCore/no-vector-registers.ll b/llvm/test/Transforms/SLPVectorizer/XCore/no-vector-registers.ll
index cca309bd87f..efd5386f520 100644
--- a/llvm/test/Transforms/SLPVectorizer/XCore/no-vector-registers.ll
+++ b/llvm/test/Transforms/SLPVectorizer/XCore/no-vector-registers.ll
@@ -8,13 +8,13 @@ target triple = "xcore"
 ; CHECK-NOT: <2 x double>
 define void @test1(double* %a, double* %b, double* %c) {
 entry:
-  %i0 = load double* %a, align 8
-  %i1 = load double* %b, align 8
+  %i0 = load double, double* %a, align 8
+  %i1 = load double, double* %b, align 8
   %mul = fmul double %i0, %i1
   %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double* %arrayidx3, align 8
+  %i3 = load double, double* %arrayidx3, align 8
   %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double* %arrayidx4, align 8
+  %i4 = load double, double* %arrayidx4, align 8
   %mul5 = fmul double %i3, %i4
   store double %mul, double* %c, align 8
   %arrayidx5 = getelementptr inbounds double, double* %c, i64 1