diff options
| author | Nemanja Ivanovic <nemanja.i.ibm@gmail.com> | 2016-09-23 13:25:31 +0000 |
|---|---|---|
| committer | Nemanja Ivanovic <nemanja.i.ibm@gmail.com> | 2016-09-23 13:25:31 +0000 |
| commit | d2c3c51a70a3c0f6234b40db82442a6183992191 (patch) | |
| tree | 297e5f15c74a272cbebdb84e8787b686bcb13045 /llvm/test/CodeGen/PowerPC | |
| parent | f34f45fd534140f082d41dbb6a7c60dc675870c4 (diff) | |
| download | bcm5719-llvm-d2c3c51a70a3c0f6234b40db82442a6183992191.tar.gz bcm5719-llvm-d2c3c51a70a3c0f6234b40db82442a6183992191.zip | |
[Power9] Exploit move and splat instructions for build_vector improvement
This patch corresponds to review:
https://reviews.llvm.org/D21135
This patch exploits the following instructions:
mtvsrws
lxvwsx
mtvsrdd
mfvsrld
In order to improve some build_vector and extractelement patterns.
llvm-svn: 282246
Diffstat (limited to 'llvm/test/CodeGen/PowerPC')
| -rw-r--r-- | llvm/test/CodeGen/PowerPC/power9-moves-and-splats.ll | 167 | ||||
| -rw-r--r-- | llvm/test/CodeGen/PowerPC/ppc64-i128-abi.ll | 18 |
2 files changed, 177 insertions, 8 deletions
diff --git a/llvm/test/CodeGen/PowerPC/power9-moves-and-splats.ll b/llvm/test/CodeGen/PowerPC/power9-moves-and-splats.ll new file mode 100644 index 00000000000..89e09625a14 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/power9-moves-and-splats.ll @@ -0,0 +1,167 @@ +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s \ +; RUN: --check-prefix=CHECK-BE + +@Globi = external global i32, align 4 +@Globf = external global float, align 4 + +define <2 x i64> @test1(i64 %a, i64 %b) { +entry: +; CHECK-LABEL: test1 +; CHECK: mtvsrdd 34, 4, 3 +; CHECK-BE-LABEL: test1 +; CHECK-BE: mtvsrdd 34, 3, 4 + %vecins = insertelement <2 x i64> undef, i64 %a, i32 0 + %vecins1 = insertelement <2 x i64> %vecins, i64 %b, i32 1 + ret <2 x i64> %vecins1 +} + +define i64 @test2(<2 x i64> %a) { +entry: +; CHECK-LABEL: test2 +; CHECK: mfvsrld 3, 34 + %0 = extractelement <2 x i64> %a, i32 0 + ret i64 %0 +} + +define i64 @test3(<2 x i64> %a) { +entry: +; CHECK-BE-LABEL: test3 +; CHECK-BE: mfvsrld 3, 34 + %0 = extractelement <2 x i64> %a, i32 1 + ret i64 %0 +} + +define <4 x i32> @test4(i32* nocapture readonly %in) { +entry: +; CHECK-LABEL: test4 +; CHECK: lxvwsx 34, 0, 3 +; CHECK-NOT: xxspltw +; CHECK-BE-LABEL: test4 +; CHECK-BE: lxvwsx 34, 0, 3 +; CHECK-BE-NOT: xxspltw + %0 = load i32, i32* %in, align 4 + %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0 + %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer + ret <4 x i32> %splat.splat +} + +define <4 x float> @test5(float* nocapture readonly %in) { +entry: +; CHECK-LABEL: test5 +; CHECK: lxvwsx 34, 0, 3 +; CHECK-NOT: xxspltw +; CHECK-BE-LABEL: test5 +; CHECK-BE: lxvwsx 34, 0, 3 +; CHECK-BE-NOT: xxspltw + %0 = load float, float* %in, align 4 + %splat.splatinsert = insertelement <4 x float> undef, float %0, i32 0 + %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer + ret <4 x float> %splat.splat +} + +define <4 x i32> @test6() { +entry: +; CHECK-LABEL: test6 +; CHECK: addis +; CHECK: ld [[TOC:[0-9]+]], .LC0 +; CHECK: lxvwsx 34, 0, 3 +; CHECK-NOT: xxspltw +; CHECK-BE-LABEL: test6 +; CHECK-BE: addis +; CHECK-BE: ld [[TOC:[0-9]+]], .LC0 +; CHECK-BE: lxvwsx 34, 0, 3 +; CHECK-BE-NOT: xxspltw + %0 = load i32, i32* @Globi, align 4 + %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0 + %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer + ret <4 x i32> %splat.splat +} + +define <4 x float> @test7() { +entry: +; CHECK-LABEL: test7 +; CHECK: addis +; CHECK: ld [[TOC:[0-9]+]], .LC1 +; CHECK: lxvwsx 34, 0, 3 +; CHECK-NOT: xxspltw +; CHECK-BE-LABEL: test7 +; CHECK-BE: addis +; CHECK-BE: ld [[TOC:[0-9]+]], .LC1 +; CHECK-BE: lxvwsx 34, 0, 3 +; CHECK-BE-NOT: xxspltw + %0 = load float, float* @Globf, align 4 + %splat.splatinsert = insertelement <4 x float> undef, float %0, i32 0 + %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer + ret <4 x float> %splat.splat +} + +define <16 x i8> @test8() { +entry: +; CHECK-LABEL: test8 +; CHECK: xxlxor 34, 34, 34 +; CHECK-BE-LABEL: test8 +; CHECK-BE: xxlxor 34, 34, 34 + ret <16 x i8> zeroinitializer +} + +define <16 x i8> @test9() { +entry: +; CHECK-LABEL: test9 +; CHECK: xxspltib 34, 1 +; CHECK-BE-LABEL: test9 +; CHECK-BE: xxspltib 34, 1 + ret <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> +} + +define <16 x i8> @test10() { +entry: +; CHECK-LABEL: test10 +; CHECK: xxspltib 34, 127 +; CHECK-BE-LABEL: test10 +; CHECK-BE: xxspltib 34, 127 + ret <16 x i8> <i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127> +} + +define <16 x i8> @test11() { +entry: +; CHECK-LABEL: test11 +; CHECK: xxspltib 34, 128 +; CHECK-BE-LABEL: test11 +; CHECK-BE: xxspltib 34, 128 + ret <16 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128> +} + +define <16 x i8> @test12() { +entry: +; CHECK-LABEL: test12 +; CHECK: xxspltib 34, 255 +; CHECK-BE-LABEL: test12 +; CHECK-BE: xxspltib 34, 255 + ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> +} + +define <16 x i8> @test13() { +entry: +; CHECK-LABEL: test13 +; CHECK: xxspltib 34, 129 +; CHECK-BE-LABEL: test13 +; CHECK-BE: xxspltib 34, 129 + ret <16 x i8> <i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127> +} + +define <4 x i32> @test14(<4 x i32> %a, i32* nocapture readonly %b) { +entry: +; CHECK-LABEL: test14 +; CHECK: lwz [[LD:[0-9]+]], +; CHECK: mtvsrws 34, [[LD]] +; CHECK-BE-LABEL: test14 +; CHECK-BE: lwz [[LD:[0-9]+]], +; CHECK-BE: mtvsrws 34, [[LD]] + %0 = load i32, i32* %b, align 4 + %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0 + %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer + %1 = add i32 %0, 5 + store i32 %1, i32* %b, align 4 + ret <4 x i32> %splat.splat +} diff --git a/llvm/test/CodeGen/PowerPC/ppc64-i128-abi.ll b/llvm/test/CodeGen/PowerPC/ppc64-i128-abi.ll index 8d5a8cdf3a3..d81a1104c53 100644 --- a/llvm/test/CodeGen/PowerPC/ppc64-i128-abi.ll +++ b/llvm/test/CodeGen/PowerPC/ppc64-i128-abi.ll @@ -17,16 +17,16 @@ ; RUN: -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-LE-NOVSX ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ -; RUN: -mcpu=pwr9 < %s | FileCheck %s -check-prefix=CHECK-P9 \ -; RUN: --implicit-check-not xxswapd +; RUN: -mcpu=pwr9 -ppc-vsr-nums-as-vr < %s | FileCheck %s \ +; RUN: -check-prefix=CHECK-P9 --implicit-check-not xxswapd ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ ; RUN: -mcpu=pwr9 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-NOVSX \ ; RUN: --implicit-check-not xxswapd ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ -; RUN: -mcpu=pwr9 -mattr=-power9-vector < %s | FileCheck %s \ -; RUN: -check-prefix=CHECK-LE +; RUN: -mcpu=pwr9 -mattr=-power9-vector -mattr=-direct-move < %s | \ +; RUN: FileCheck %s -check-prefix=CHECK-LE @x = common global <1 x i128> zeroinitializer, align 16 @y = common global <1 x i128> zeroinitializer, align 16 @@ -55,8 +55,10 @@ define <1 x i128> @v1i128_increment_by_one(<1 x i128> %a) nounwind { ; CHECK-LE: blr ; CHECK-P9-LABEL: @v1i128_increment_by_one -; CHECK-P9: lxvx -; CHECK-P9: vadduqm 2, 2, 3 +; CHECK-P9-DAG: li [[R1:r[0-9]+]], 1 +; CHECK-P9-DAG: li [[R2:r[0-9]+]], 0 +; CHECK-P9: mtvsrdd [[V1:v[0-9]+]], [[R2]], [[R1]] +; CHECK-P9: vadduqm v2, v2, [[V1]] ; CHECK-P9: blr ; CHECK-BE-LABEL: @v1i128_increment_by_one @@ -232,8 +234,8 @@ define <1 x i128> @call_v1i128_increment_by_val() nounwind { ; CHECK-LE: blr ; CHECK-P9-LABEL: @call_v1i128_increment_by_val -; CHECK-P9-DAG: lxvx 34 -; CHECK-P9-DAG: lxvx 35 +; CHECK-P9-DAG: lxvx v2 +; CHECK-P9-DAG: lxvx v3 ; CHECK-P9: bl v1i128_increment_by_val ; CHECK-P9: blr |

