summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/PowerPC/scalar_vector_test_3.ll
diff options
context:
space:
mode:
authorTom Stellard <tstellar@redhat.com>2018-11-30 04:51:41 +0000
committerTom Stellard <tstellar@redhat.com>2018-11-30 04:51:41 +0000
commit4a6ae60f26152979c80137df145e834a889a64fc (patch)
tree6b95e448dd40f893c395665a6ee816ed00ed1ab9 /llvm/test/CodeGen/PowerPC/scalar_vector_test_3.ll
parentd6ffc0c6ead2c0e94f7268b7270f30cc3f478f19 (diff)
downloadbcm5719-llvm-4a6ae60f26152979c80137df145e834a889a64fc.tar.gz
bcm5719-llvm-4a6ae60f26152979c80137df145e834a889a64fc.zip
Merging r339260:
------------------------------------------------------------------------ r339260 | syzaara | 2018-08-08 08:20:43 -0700 (Wed, 08 Aug 2018) | 13 lines [PowerPC] Improve codegen for vector loads using scalar_to_vector This patch aims to improve the codegen for vector loads involving the scalar_to_vector (load X) sequence. Initially, ld->mv instructions were used for scalar_to_vector (load X), so this patch allows scalar_to_vector (load X) to utilize: LXSD and LXSDX for i64 and f64 LXSIWAX for i32 (sign extension to i64) LXSIWZX for i32 and f64 Committing on behalf of Amy Kwan. Differential Revision: https://reviews.llvm.org/D48950 ------------------------------------------------------------------------ llvm-svn: 347957
Diffstat (limited to 'llvm/test/CodeGen/PowerPC/scalar_vector_test_3.ll')
-rw-r--r--llvm/test/CodeGen/PowerPC/scalar_vector_test_3.ll265
1 files changed, 265 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/PowerPC/scalar_vector_test_3.ll b/llvm/test/CodeGen/PowerPC/scalar_vector_test_3.ll
new file mode 100644
index 00000000000..c63044a79a5
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/scalar_vector_test_3.ll
@@ -0,0 +1,265 @@
+; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
+; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9LE
+; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
+; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9BE
+; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
+; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8LE
+; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
+; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8BE
+
+; Function Attrs: norecurse nounwind readonly
+define <2 x i64> @s2v_test1(i32* nocapture readonly %int32, <2 x i64> %vec) {
+; P9LE-LABEL: s2v_test1:
+; P9LE: # %bb.0: # %entry
+; P9LE-NEXT: lfiwax f0, 0, r3
+; P9LE-NEXT: xxpermdi v3, f0, f0, 2
+; P9LE-NEXT: xxpermdi v2, v2, v3, 1
+; P9LE-NEXT: blr
+
+; P9BE-LABEL: s2v_test1:
+; P9BE: # %bb.0: # %entry
+; P9BE-NEXT: lfiwax f0, 0, r3
+; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
+; P9BE-NEXT: blr
+
+; P8LE-LABEL: s2v_test1:
+; P8LE: # %bb.0: # %entry
+; P8LE-NEXT: lfiwax f0, 0, r3
+; P8LE-NEXT: xxpermdi v3, f0, f0, 2
+; P8LE-NEXT: xxpermdi v2, v2, v3, 1
+; P8LE-NEXT: blr
+
+; P8BE-LABEL: s2v_test1:
+; P8BE: # %bb.0: # %entry
+; P8BE-NEXT: lfiwax f0, 0, r3
+; P8BE-NEXT: xxpermdi v2, vs0, v2, 1
+; P8BE-NEXT: blr
+entry:
+ %0 = load i32, i32* %int32, align 4
+ %conv = sext i32 %0 to i64
+ %vecins = insertelement <2 x i64> %vec, i64 %conv, i32 0
+ ret <2 x i64> %vecins
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <2 x i64> @s2v_test2(i32* nocapture readonly %int32, <2 x i64> %vec) {
+; P9LE-LABEL: s2v_test2:
+; P9LE: # %bb.0: # %entry
+; P9LE-NEXT: addi r3, r3, 4
+; P9LE-NEXT: lfiwax f0, 0, r3
+; P9LE-NEXT: xxpermdi v3, f0, f0, 2
+; P9LE-NEXT: xxpermdi v2, v2, v3, 1
+; P9LE-NEXT: blr
+
+; P9BE-LABEL: s2v_test2:
+; P9BE: # %bb.0: # %entry
+; P9BE-NEXT: addi r3, r3, 4
+; P9BE-NEXT: lfiwax f0, 0, r3
+; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
+; P9BE-NEXT: blr
+
+; P8LE-LABEL: s2v_test2:
+; P8LE: # %bb.0: # %entry
+; P8LE-NEXT: addi r3, r3, 4
+; P8LE-NEXT: lfiwax f0, 0, r3
+; P8LE-NEXT: xxpermdi v3, f0, f0, 2
+; P8LE-NEXT: xxpermdi v2, v2, v3, 1
+; P8LE-NEXT: blr
+
+; P8BE-LABEL: s2v_test2:
+; P8BE: # %bb.0: # %entry
+; P8BE-NEXT: addi r3, r3, 4
+; P8BE-NEXT: lfiwax f0, 0, r3
+; P8BE-NEXT: xxpermdi v2, vs0, v2, 1
+; P8BE-NEXT: blr
+entry:
+ %arrayidx = getelementptr inbounds i32, i32* %int32, i64 1
+ %0 = load i32, i32* %arrayidx, align 4
+ %conv = sext i32 %0 to i64
+ %vecins = insertelement <2 x i64> %vec, i64 %conv, i32 0
+ ret <2 x i64> %vecins
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <2 x i64> @s2v_test3(i32* nocapture readonly %int32, <2 x i64> %vec, i32 signext %Idx) {
+; P9LE-LABEL: s2v_test3:
+; P9LE: # %bb.0: # %entry
+; P9LE-NEXT: sldi r4, r7, 2
+; P9LE-NEXT: lfiwax f0, r3, r4
+; P9LE-NEXT: xxpermdi v3, f0, f0, 2
+; P9LE-NEXT: xxpermdi v2, v2, v3, 1
+; P9LE-NEXT: blr
+
+; P9BE-LABEL: s2v_test3:
+; P9BE: # %bb.0: # %entry
+; P9BE-NEXT: sldi r4, r7, 2
+; P9BE-NEXT: lfiwax f0, r3, r4
+; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
+; P9BE-NEXT: blr
+
+; P8LE-LABEL: s2v_test3:
+; P8LE: # %bb.0: # %entry
+; P8LE-NEXT: sldi r4, r7, 2
+; P8LE-NEXT: lfiwax f0, r3, r4
+; P8LE-NEXT: xxpermdi v3, f0, f0, 2
+; P8LE-NEXT: xxpermdi v2, v2, v3, 1
+; P8LE-NEXT: blr
+
+; P8BE-LABEL: s2v_test3:
+; P8BE: # %bb.0: # %entry
+; P8BE-NEXT: sldi r4, r7, 2
+; P8BE-NEXT: lfiwax f0, r3, r4
+; P8BE-NEXT: xxpermdi v2, vs0, v2, 1
+; P8BE-NEXT: blr
+entry:
+ %idxprom = sext i32 %Idx to i64
+ %arrayidx = getelementptr inbounds i32, i32* %int32, i64 %idxprom
+ %0 = load i32, i32* %arrayidx, align 4
+ %conv = sext i32 %0 to i64
+ %vecins = insertelement <2 x i64> %vec, i64 %conv, i32 0
+ ret <2 x i64> %vecins
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <2 x i64> @s2v_test4(i32* nocapture readonly %int32, <2 x i64> %vec) {
+; P9LE-LABEL: s2v_test4:
+; P9LE: # %bb.0: # %entry
+; P9LE-NEXT: addi r3, r3, 4
+; P9LE-NEXT: lfiwax f0, 0, r3
+; P9LE-NEXT: xxpermdi v3, f0, f0, 2
+; P9LE-NEXT: xxpermdi v2, v2, v3, 1
+; P9LE-NEXT: blr
+
+; P9BE-LABEL: s2v_test4:
+; P9BE: # %bb.0: # %entry
+; P9BE-NEXT: addi r3, r3, 4
+; P9BE-NEXT: lfiwax f0, 0, r3
+; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
+; P9BE-NEXT: blr
+
+; P8LE-LABEL: s2v_test4:
+; P8LE: # %bb.0: # %entry
+; P8LE-NEXT: addi r3, r3, 4
+; P8LE-NEXT: lfiwax f0, 0, r3
+; P8LE-NEXT: xxpermdi v3, f0, f0, 2
+; P8LE-NEXT: xxpermdi v2, v2, v3, 1
+; P8LE-NEXT: blr
+
+; P8BE-LABEL: s2v_test4:
+; P8BE: # %bb.0: # %entry
+; P8BE-NEXT: addi r3, r3, 4
+; P8BE-NEXT: lfiwax f0, 0, r3
+; P8BE-NEXT: xxpermdi v2, vs0, v2, 1
+; P8BE-NEXT: blr
+entry:
+ %arrayidx = getelementptr inbounds i32, i32* %int32, i64 1
+ %0 = load i32, i32* %arrayidx, align 4
+ %conv = sext i32 %0 to i64
+ %vecins = insertelement <2 x i64> %vec, i64 %conv, i32 0
+ ret <2 x i64> %vecins
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <2 x i64> @s2v_test5(<2 x i64> %vec, i32* nocapture readonly %ptr1) {
+; P9LE-LABEL: s2v_test5:
+; P9LE: # %bb.0: # %entry
+; P9LE-NEXT: lfiwax f0, 0, r5
+; P9LE-NEXT: xxpermdi v3, f0, f0, 2
+; P9LE-NEXT: xxpermdi v2, v2, v3, 1
+; P9LE-NEXT: blr
+
+; P9BE-LABEL: s2v_test5:
+; P9BE: # %bb.0: # %entry
+; P9BE-NEXT: lfiwax f0, 0, r5
+; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
+; P9BE-NEXT: blr
+
+; P8LE-LABEL: s2v_test5:
+; P8LE: # %bb.0: # %entry
+; P8LE-NEXT: lfiwax f0, 0, r5
+; P8LE-NEXT: xxpermdi v3, f0, f0, 2
+; P8LE-NEXT: xxpermdi v2, v2, v3, 1
+; P8LE-NEXT: blr
+
+; P8BE-LABEL: s2v_test5:
+; P8BE: # %bb.0: # %entry
+; P8BE-NEXT: lfiwax f0, 0, r5
+; P8BE-NEXT: xxpermdi v2, vs0, v2, 1
+; P8BE-NEXT: blr
+entry:
+ %0 = load i32, i32* %ptr1, align 4
+ %conv = sext i32 %0 to i64
+ %vecins = insertelement <2 x i64> %vec, i64 %conv, i32 0
+ ret <2 x i64> %vecins
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <2 x i64> @s2v_test6(i32* nocapture readonly %ptr) {
+; P9LE-LABEL: s2v_test6:
+; P9LE: # %bb.0: # %entry
+; P9LE-NEXT: lfiwax f0, 0, r3
+; P9LE-NEXT: xxpermdi v2, f0, f0, 2
+; P9LE-NEXT: xxspltd v2, v2, 1
+; P9LE-NEXT: blr
+
+; P9BE-LABEL: s2v_test6:
+; P9BE: # %bb.0: # %entry
+; P9BE-NEXT: lfiwax f0, 0, r3
+; P9BE-NEXT: xxspltd v2, vs0, 0
+; P9BE-NEXT: blr
+
+; P8LE-LABEL: s2v_test6:
+; P8LE: # %bb.0: # %entry
+; P8LE-NEXT: lfiwax f0, 0, r3
+; P8LE-NEXT: xxpermdi v2, f0, f0, 2
+; P8LE-NEXT: xxspltd v2, v2, 1
+; P8LE-NEXT: blr
+
+; P8BE-LABEL: s2v_test6:
+; P8BE: # %bb.0: # %entry
+; P8BE-NEXT: lfiwax f0, 0, r3
+; P8BE-NEXT: xxspltd v2, vs0, 0
+; P8BE-NEXT: blr
+entry:
+ %0 = load i32, i32* %ptr, align 4
+ %conv = sext i32 %0 to i64
+ %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0
+ %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
+ ret <2 x i64> %splat.splat
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <2 x i64> @s2v_test7(i32* nocapture readonly %ptr) {
+; P9LE-LABEL: s2v_test7:
+; P9LE: # %bb.0: # %entry
+; P9LE-NEXT: lfiwax f0, 0, r3
+; P9LE-NEXT: xxpermdi v2, f0, f0, 2
+; P9LE-NEXT: xxspltd v2, v2, 1
+; P9LE-NEXT: blr
+
+; P9BE-LABEL: s2v_test7:
+; P9BE: # %bb.0: # %entry
+; P9BE-NEXT: lfiwax f0, 0, r3
+; P9BE-NEXT: xxspltd v2, vs0, 0
+; P9BE-NEXT: blr
+
+; P8LE-LABEL: s2v_test7:
+; P8LE: # %bb.0: # %entry
+; P8LE-NEXT: lfiwax f0, 0, r3
+; P8LE-NEXT: xxpermdi v2, f0, f0, 2
+; P8LE-NEXT: xxspltd v2, v2, 1
+; P8LE-NEXT: blr
+
+; P8BE-LABEL: s2v_test7:
+; P8BE: # %bb.0: # %entry
+; P8BE-NEXT: lfiwax f0, 0, r3
+; P8BE-NEXT: xxspltd v2, vs0, 0
+; P8BE-NEXT: blr
+entry:
+ %0 = load i32, i32* %ptr, align 4
+ %conv = sext i32 %0 to i64
+ %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0
+ %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
+ ret <2 x i64> %splat.splat
+}
+
OpenPOWER on IntegriCloud