7 files changed, 142 insertions, 9 deletions
diff --git a/llvm/test/CodeGen/PowerPC/unal4-std.ll b/llvm/test/CodeGen/PowerPC/unal4-std.ll
index 9f29e31cb90..e9110991116 100644
--- a/llvm/test/CodeGen/PowerPC/unal4-std.ll
+++ b/llvm/test/CodeGen/PowerPC/unal4-std.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -mcpu=pwr7 | FileCheck %s
+; RUN: llc < %s -mcpu=pwr7 -mattr=-vsx| FileCheck %s
+; RUN: llc < %s -mcpu=pwr7 -mattr=+vsx | FileCheck -check-prefix=CHECK-VSX %s
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
@@ -22,6 +23,9 @@ if.end210:                                        ; preds = %entry
 ; a multiple of 4).
 ; CHECK: @copy_to_conceal
 ; CHECK: stdx {{[0-9]+}}, 0,
+
+; CHECK-VSX: @copy_to_conceal
+; CHECK-VSX: stxvw4x {{[0-9]+}}, 0,
 }
 
 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/llvm/test/CodeGen/PowerPC/unaligned.ll b/llvm/test/CodeGen/PowerPC/unaligned.ll
index 0c59516f118..64c03cdda35 100644
--- a/llvm/test/CodeGen/PowerPC/unaligned.ll
+++ b/llvm/test/CodeGen/PowerPC/unaligned.ll
@@ -92,10 +92,14 @@ entry:
 ; CHECK-DAG: stdx
 ; CHECK: stdx
 
+; For VSX on P7, unaligned loads and stores are preferable to aligned
+; stack slots, but lvsl/vperm is better still.  (On P8 lxvw4x is preferable.)
+; Using unaligned stxvw4x is preferable on both machines.
 ; CHECK-VSX: @foo6
-; CHECK-VSX-DAG: ld
-; CHECK-VSX-DAG: ld
-; CHECK-VSX-DAG: stdx
-; CHECK-VSX: stdx
+; CHECK-VSX-DAG: lvsl
+; CHECK-VSX-DAG: lvx
+; CHECK-VSX-DAG: lvx
+; CHECK-VSX: vperm
+; CHECK-VSX: stxvw4x
 }
 
diff --git a/llvm/test/CodeGen/PowerPC/vec-abi-align.ll b/llvm/test/CodeGen/PowerPC/vec-abi-align.ll
index 3239cf6c06a..5075ff2b8c0 100644
--- a/llvm/test/CodeGen/PowerPC/vec-abi-align.ll
+++ b/llvm/test/CodeGen/PowerPC/vec-abi-align.ll
@@ -1,4 +1,5 @@
-; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+vsx < %s | FileCheck -check-prefix=CHECK-VSX %s
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
@@ -16,6 +17,10 @@ entry:
 ; CHECK-LABEL: @test1
 ; CHECK: stvx 2,
 ; CHECK: blr
+
+; CHECK-VSX-LABEL: @test1
+; CHECK-VSX: stxvw4x 34,
+; CHECK-VSX: blr
 }
 
 ; Function Attrs: nounwind
@@ -35,6 +40,13 @@ entry:
 ; CHECK: addi [[REGB:[0-9]+]], 1, 112
 ; CHECK: lvx 2, [[REGB]], [[REG16]]
 ; CHECK: blr
+
+; CHECK-VSX-LABEL: @test2
+; CHECK-VSX: ld {{[0-9]+}}, 112(1)
+; CHECK-VSX: li [[REG16:[0-9]+]], 16
+; CHECK-VSX: addi [[REGB:[0-9]+]], 1, 112
+; CHECK-VSX: lxvw4x {{[0-9]+}}, [[REGB]], [[REG16]]
+; CHECK-VSX: blr
 }
 
 ; Function Attrs: nounwind
@@ -54,6 +66,13 @@ entry:
 ; CHECK: addi [[REGB:[0-9]+]], 1, 128
 ; CHECK: lvx 2, [[REGB]], [[REG16]]
 ; CHECK: blr
+
+; CHECK-VSX-LABEL: @test3
+; CHECK-VSX: ld {{[0-9]+}}, 128(1)
+; CHECK-VSX: li [[REG16:[0-9]+]], 16
+; CHECK-VSX: addi [[REGB:[0-9]+]], 1, 128
+; CHECK-VSX: lxvw4x {{[0-9]+}}, [[REGB]], [[REG16]]
+; CHECK-VSX: blr
 }
 
 attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/PowerPC/vec_misaligned.ll b/llvm/test/CodeGen/PowerPC/vec_misaligned.ll
index 304a84d49a9..73a4a4d395d 100644
--- a/llvm/test/CodeGen/PowerPC/vec_misaligned.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_misaligned.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=ppc32 -mcpu=g5 | FileCheck %s
-; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mattr=+altivec | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mattr=+altivec -mattr=-vsx -mattr=-power8-vector | FileCheck %s
 ; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mattr=+altivec | FileCheck %s -check-prefix=CHECK-LE
 
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
diff --git a/llvm/test/CodeGen/PowerPC/vrspill.ll b/llvm/test/CodeGen/PowerPC/vrspill.ll
index c3d1bf8f1ea..b990442aed8 100644
--- a/llvm/test/CodeGen/PowerPC/vrspill.ll
+++ b/llvm/test/CodeGen/PowerPC/vrspill.ll
@@ -1,5 +1,7 @@
-; RUN: llc -O0 -mtriple=powerpc-unknown-linux-gnu -mattr=+altivec -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -O0 -mtriple=powerpc64-unknown-linux-gnu -mattr=+altivec -verify-machineinstrs -fast-isel=false < %s | FileCheck %s
+; RUN: llc -O0 -mtriple=powerpc-unknown-linux-gnu -mattr=+altivec -mattr=-vsx -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -O0 -mtriple=powerpc64-unknown-linux-gnu -mattr=+altivec -mattr=-vsx -verify-machineinstrs -fast-isel=false < %s | FileCheck %s
+; RUN: llc -O0 -mtriple=powerpc-unknown-linux-gnu -mattr=+altivec -mattr=+vsx -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK-VSX %s
+; RUN: llc -O0 -mtriple=powerpc64-unknown-linux-gnu -mattr=+altivec -mattr=+vsx -verify-machineinstrs -fast-isel=false < %s | FileCheck -check-prefix=CHECK-VSX %s
 
 ; This verifies that we generate correct spill/reload code for vector regs.
 
@@ -15,4 +17,9 @@ entry:
 
 ; CHECK: stvx 2,
 
+; We would prefer to test for "stxvw4x 34," but current -O0 code
+; needlessly generates "vor 3,2,2 / stxvw4x 35,0,3", so we'll settle for
+; the opcode.
+; CHECK-VSX: stxvw4x
+
 declare void @foo(i32*)
diff --git a/llvm/test/CodeGen/PowerPC/vsx-p8.ll b/llvm/test/CodeGen/PowerPC/vsx-p8.ll
new file mode 100644
index 00000000000..81406b6f079
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/vsx-p8.ll
@@ -0,0 +1,42 @@
+; RUN: llc -mcpu=pwr8 -mattr=+power8-vector < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Unaligned loads/stores on P8 and later should use VSX where possible.
+
+define <2 x double> @test28u(<2 x double>* %a) {
+  %v = load <2 x double>* %a, align 8
+  ret <2 x double> %v
+
+; CHECK-LABEL: @test28u
+; CHECK: lxvd2x 34, 0, 3
+; CHECK: blr
+}
+
+define void @test29u(<2 x double>* %a, <2 x double> %b) {
+  store <2 x double> %b, <2 x double>* %a, align 8
+  ret void
+
+; CHECK-LABEL: @test29u
+; CHECK: stxvd2x 34, 0, 3
+; CHECK: blr
+}
+
+define <4 x float> @test32u(<4 x float>* %a) {
+  %v = load <4 x float>* %a, align 8
+  ret <4 x float> %v
+
+; CHECK-LABEL: @test32u
+; CHECK: lxvw4x 34, 0, 3
+; CHECK: blr
+}
+
+define void @test33u(<4 x float>* %a, <4 x float> %b) {
+  store <4 x float> %b, <4 x float>* %a, align 8
+  ret void
+
+; CHECK-LABEL: @test33u
+; CHECK: stxvw4x 34, 0, 3
+; CHECK: blr
+}
+
diff --git a/llvm/test/CodeGen/PowerPC/vsx.ll b/llvm/test/CodeGen/PowerPC/vsx.ll
index 2f226e1f614..65343f4a9ba 100644
--- a/llvm/test/CodeGen/PowerPC/vsx.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx.ll
@@ -356,6 +356,63 @@ define void @test31(<2 x i64>* %a, <2 x i64> %b) {
 ; CHECK: blr
 }
 
+define <4 x float> @test32(<4 x float>* %a) {
+  %v = load <4 x float>* %a, align 16
+  ret <4 x float> %v
+
+; CHECK-LABEL: @test32
+; CHECK: lxvw4x 34, 0, 3
+; CHECK: blr
+}
+
+define void @test33(<4 x float>* %a, <4 x float> %b) {
+  store <4 x float> %b, <4 x float>* %a, align 16
+  ret void
+
+; CHECK-LABEL: @test33
+; CHECK: stxvw4x 34, 0, 3
+; CHECK: blr
+}
+
+define <4 x float> @test32u(<4 x float>* %a) {
+  %v = load <4 x float>* %a, align 8
+  ret <4 x float> %v
+
+; CHECK-LABEL: @test32u
+; CHECK-DAG: lvsl
+; CHECK-DAG: lvx
+; CHECK-DAG: lvx
+; CHECK: vperm 2,
+; CHECK: blr
+}
+
+define void @test33u(<4 x float>* %a, <4 x float> %b) {
+  store <4 x float> %b, <4 x float>* %a, align 8
+  ret void
+
+; CHECK-LABEL: @test33u
+; CHECK: stxvw4x 34, 0, 3
+; CHECK: blr
+}
+
+define <4 x i32> @test34(<4 x i32>* %a) {
+  %v = load <4 x i32>* %a, align 16
+  ret <4 x i32> %v
+
+; CHECK-LABEL: @test34
+; CHECK: lxvw4x 34, 0, 3
+; CHECK: blr
+}
+
+define void @test35(<4 x i32>* %a, <4 x i32> %b) {
+  store <4 x i32> %b, <4 x i32>* %a, align 16
+  ret void
+
+; CHECK-LABEL: @test35
+; CHECK: stxvw4x 34, 0, 3
+; CHECK: blr
+}
+
 define <2 x double> @test40(<2 x i64> %a) {
   %v = uitofp <2 x i64> %a to <2 x double>
   ret <2 x double> %v