summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/PowerPC
diff options
context:
space:
mode:
authorNemanja Ivanovic <nemanja.i.ibm@gmail.com>2016-09-23 13:25:31 +0000
committerNemanja Ivanovic <nemanja.i.ibm@gmail.com>2016-09-23 13:25:31 +0000
commitd2c3c51a70a3c0f6234b40db82442a6183992191 (patch)
tree297e5f15c74a272cbebdb84e8787b686bcb13045 /llvm/test/CodeGen/PowerPC
parentf34f45fd534140f082d41dbb6a7c60dc675870c4 (diff)
downloadbcm5719-llvm-d2c3c51a70a3c0f6234b40db82442a6183992191.tar.gz
bcm5719-llvm-d2c3c51a70a3c0f6234b40db82442a6183992191.zip
[Power9] Exploit move and splat instructions for build_vector improvement
This patch corresponds to review: https://reviews.llvm.org/D21135 This patch exploits the following instructions: mtvsrws lxvwsx mtvsrdd mfvsrld In order to improve some build_vector and extractelement patterns. llvm-svn: 282246
Diffstat (limited to 'llvm/test/CodeGen/PowerPC')
-rw-r--r--llvm/test/CodeGen/PowerPC/power9-moves-and-splats.ll167
-rw-r--r--llvm/test/CodeGen/PowerPC/ppc64-i128-abi.ll18
2 files changed, 177 insertions, 8 deletions
diff --git a/llvm/test/CodeGen/PowerPC/power9-moves-and-splats.ll b/llvm/test/CodeGen/PowerPC/power9-moves-and-splats.ll
new file mode 100644
index 00000000000..89e09625a14
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/power9-moves-and-splats.ll
@@ -0,0 +1,167 @@
+; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
+; RUN: llc -mcpu=pwr9 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s \
+; RUN: --check-prefix=CHECK-BE
+
+@Globi = external global i32, align 4
+@Globf = external global float, align 4
+
+define <2 x i64> @test1(i64 %a, i64 %b) {
+entry:
+; CHECK-LABEL: test1
+; CHECK: mtvsrdd 34, 4, 3
+; CHECK-BE-LABEL: test1
+; CHECK-BE: mtvsrdd 34, 3, 4
+ %vecins = insertelement <2 x i64> undef, i64 %a, i32 0
+ %vecins1 = insertelement <2 x i64> %vecins, i64 %b, i32 1
+ ret <2 x i64> %vecins1
+}
+
+define i64 @test2(<2 x i64> %a) {
+entry:
+; CHECK-LABEL: test2
+; CHECK: mfvsrld 3, 34
+ %0 = extractelement <2 x i64> %a, i32 0
+ ret i64 %0
+}
+
+define i64 @test3(<2 x i64> %a) {
+entry:
+; CHECK-BE-LABEL: test3
+; CHECK-BE: mfvsrld 3, 34
+ %0 = extractelement <2 x i64> %a, i32 1
+ ret i64 %0
+}
+
+define <4 x i32> @test4(i32* nocapture readonly %in) {
+entry:
+; CHECK-LABEL: test4
+; CHECK: lxvwsx 34, 0, 3
+; CHECK-NOT: xxspltw
+; CHECK-BE-LABEL: test4
+; CHECK-BE: lxvwsx 34, 0, 3
+; CHECK-BE-NOT: xxspltw
+ %0 = load i32, i32* %in, align 4
+ %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0
+ %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
+ ret <4 x i32> %splat.splat
+}
+
+define <4 x float> @test5(float* nocapture readonly %in) {
+entry:
+; CHECK-LABEL: test5
+; CHECK: lxvwsx 34, 0, 3
+; CHECK-NOT: xxspltw
+; CHECK-BE-LABEL: test5
+; CHECK-BE: lxvwsx 34, 0, 3
+; CHECK-BE-NOT: xxspltw
+ %0 = load float, float* %in, align 4
+ %splat.splatinsert = insertelement <4 x float> undef, float %0, i32 0
+ %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
+ ret <4 x float> %splat.splat
+}
+
+define <4 x i32> @test6() {
+entry:
+; CHECK-LABEL: test6
+; CHECK: addis
+; CHECK: ld [[TOC:[0-9]+]], .LC0
+; CHECK: lxvwsx 34, 0, 3
+; CHECK-NOT: xxspltw
+; CHECK-BE-LABEL: test6
+; CHECK-BE: addis
+; CHECK-BE: ld [[TOC:[0-9]+]], .LC0
+; CHECK-BE: lxvwsx 34, 0, 3
+; CHECK-BE-NOT: xxspltw
+ %0 = load i32, i32* @Globi, align 4
+ %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0
+ %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
+ ret <4 x i32> %splat.splat
+}
+
+define <4 x float> @test7() {
+entry:
+; CHECK-LABEL: test7
+; CHECK: addis
+; CHECK: ld [[TOC:[0-9]+]], .LC1
+; CHECK: lxvwsx 34, 0, 3
+; CHECK-NOT: xxspltw
+; CHECK-BE-LABEL: test7
+; CHECK-BE: addis
+; CHECK-BE: ld [[TOC:[0-9]+]], .LC1
+; CHECK-BE: lxvwsx 34, 0, 3
+; CHECK-BE-NOT: xxspltw
+ %0 = load float, float* @Globf, align 4
+ %splat.splatinsert = insertelement <4 x float> undef, float %0, i32 0
+ %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
+ ret <4 x float> %splat.splat
+}
+
+define <16 x i8> @test8() {
+entry:
+; CHECK-LABEL: test8
+; CHECK: xxlxor 34, 34, 34
+; CHECK-BE-LABEL: test8
+; CHECK-BE: xxlxor 34, 34, 34
+ ret <16 x i8> zeroinitializer
+}
+
+define <16 x i8> @test9() {
+entry:
+; CHECK-LABEL: test9
+; CHECK: xxspltib 34, 1
+; CHECK-BE-LABEL: test9
+; CHECK-BE: xxspltib 34, 1
+ ret <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+}
+
+define <16 x i8> @test10() {
+entry:
+; CHECK-LABEL: test10
+; CHECK: xxspltib 34, 127
+; CHECK-BE-LABEL: test10
+; CHECK-BE: xxspltib 34, 127
+ ret <16 x i8> <i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127>
+}
+
+define <16 x i8> @test11() {
+entry:
+; CHECK-LABEL: test11
+; CHECK: xxspltib 34, 128
+; CHECK-BE-LABEL: test11
+; CHECK-BE: xxspltib 34, 128
+ ret <16 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>
+}
+
+define <16 x i8> @test12() {
+entry:
+; CHECK-LABEL: test12
+; CHECK: xxspltib 34, 255
+; CHECK-BE-LABEL: test12
+; CHECK-BE: xxspltib 34, 255
+ ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+}
+
+define <16 x i8> @test13() {
+entry:
+; CHECK-LABEL: test13
+; CHECK: xxspltib 34, 129
+; CHECK-BE-LABEL: test13
+; CHECK-BE: xxspltib 34, 129
+ ret <16 x i8> <i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127>
+}
+
+define <4 x i32> @test14(<4 x i32> %a, i32* nocapture readonly %b) {
+entry:
+; CHECK-LABEL: test14
+; CHECK: lwz [[LD:[0-9]+]],
+; CHECK: mtvsrws 34, [[LD]]
+; CHECK-BE-LABEL: test14
+; CHECK-BE: lwz [[LD:[0-9]+]],
+; CHECK-BE: mtvsrws 34, [[LD]]
+ %0 = load i32, i32* %b, align 4
+ %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0
+ %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
+ %1 = add i32 %0, 5
+ store i32 %1, i32* %b, align 4
+ ret <4 x i32> %splat.splat
+}
diff --git a/llvm/test/CodeGen/PowerPC/ppc64-i128-abi.ll b/llvm/test/CodeGen/PowerPC/ppc64-i128-abi.ll
index 8d5a8cdf3a3..d81a1104c53 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-i128-abi.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-i128-abi.ll
@@ -17,16 +17,16 @@
; RUN: -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-LE-NOVSX
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
-; RUN: -mcpu=pwr9 < %s | FileCheck %s -check-prefix=CHECK-P9 \
-; RUN: --implicit-check-not xxswapd
+; RUN: -mcpu=pwr9 -ppc-vsr-nums-as-vr < %s | FileCheck %s \
+; RUN: -check-prefix=CHECK-P9 --implicit-check-not xxswapd
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=pwr9 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-NOVSX \
; RUN: --implicit-check-not xxswapd
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
-; RUN: -mcpu=pwr9 -mattr=-power9-vector < %s | FileCheck %s \
-; RUN: -check-prefix=CHECK-LE
+; RUN: -mcpu=pwr9 -mattr=-power9-vector -mattr=-direct-move < %s | \
+; RUN: FileCheck %s -check-prefix=CHECK-LE
@x = common global <1 x i128> zeroinitializer, align 16
@y = common global <1 x i128> zeroinitializer, align 16
@@ -55,8 +55,10 @@ define <1 x i128> @v1i128_increment_by_one(<1 x i128> %a) nounwind {
; CHECK-LE: blr
; CHECK-P9-LABEL: @v1i128_increment_by_one
-; CHECK-P9: lxvx
-; CHECK-P9: vadduqm 2, 2, 3
+; CHECK-P9-DAG: li [[R1:r[0-9]+]], 1
+; CHECK-P9-DAG: li [[R2:r[0-9]+]], 0
+; CHECK-P9: mtvsrdd [[V1:v[0-9]+]], [[R2]], [[R1]]
+; CHECK-P9: vadduqm v2, v2, [[V1]]
; CHECK-P9: blr
; CHECK-BE-LABEL: @v1i128_increment_by_one
@@ -232,8 +234,8 @@ define <1 x i128> @call_v1i128_increment_by_val() nounwind {
; CHECK-LE: blr
; CHECK-P9-LABEL: @call_v1i128_increment_by_val
-; CHECK-P9-DAG: lxvx 34
-; CHECK-P9-DAG: lxvx 35
+; CHECK-P9-DAG: lxvx v2
+; CHECK-P9-DAG: lxvx v3
; CHECK-P9: bl v1i128_increment_by_val
; CHECK-P9: blr
OpenPOWER on IntegriCloud