summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/PowerPC/scalar_vector_test_3.ll
diff options
context:
space:
mode:
authorZaara Syeda <syzaara@ca.ibm.com>2018-08-08 15:20:43 +0000
committerZaara Syeda <syzaara@ca.ibm.com>2018-08-08 15:20:43 +0000
commitb2595b988b324fd33fe0b81733b90b4807da0719 (patch)
treee5d9703702fc61c29ab0c1de06d382cce9374561 /llvm/test/CodeGen/PowerPC/scalar_vector_test_3.ll
parentb006dafbef716d4392d52fd35879112ce2fb4d91 (diff)
downloadbcm5719-llvm-b2595b988b324fd33fe0b81733b90b4807da0719.tar.gz
bcm5719-llvm-b2595b988b324fd33fe0b81733b90b4807da0719.zip
[PowerPC] Improve codegen for vector loads using scalar_to_vector
This patch aims to improve the codegen for vector loads involving the scalar_to_vector (load X) sequence. Initially, ld->mv instructions were used for scalar_to_vector (load X), so this patch allows scalar_to_vector (load X) to utilize: LXSD and LXSDX for i64 and f64 LXSIWAX for i32 (sign extension to i64) LXSIWZX for i32 and f64 Committing on behalf of Amy Kwan. Differential Revision: https://reviews.llvm.org/D48950 llvm-svn: 339260
Diffstat (limited to 'llvm/test/CodeGen/PowerPC/scalar_vector_test_3.ll')
-rw-r--r--llvm/test/CodeGen/PowerPC/scalar_vector_test_3.ll265
1 files changed, 265 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/PowerPC/scalar_vector_test_3.ll b/llvm/test/CodeGen/PowerPC/scalar_vector_test_3.ll
new file mode 100644
index 00000000000..c63044a79a5
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/scalar_vector_test_3.ll
@@ -0,0 +1,265 @@
+; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
+; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9LE
+; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
+; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9BE
+; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
+; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8LE
+; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
+; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8BE
+
+; Function Attrs: norecurse nounwind readonly
+define <2 x i64> @s2v_test1(i32* nocapture readonly %int32, <2 x i64> %vec) {
+; P9LE-LABEL: s2v_test1:
+; P9LE: # %bb.0: # %entry
+; P9LE-NEXT: lfiwax f0, 0, r3
+; P9LE-NEXT: xxpermdi v3, f0, f0, 2
+; P9LE-NEXT: xxpermdi v2, v2, v3, 1
+; P9LE-NEXT: blr
+
+; P9BE-LABEL: s2v_test1:
+; P9BE: # %bb.0: # %entry
+; P9BE-NEXT: lfiwax f0, 0, r3
+; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
+; P9BE-NEXT: blr
+
+; P8LE-LABEL: s2v_test1:
+; P8LE: # %bb.0: # %entry
+; P8LE-NEXT: lfiwax f0, 0, r3
+; P8LE-NEXT: xxpermdi v3, f0, f0, 2
+; P8LE-NEXT: xxpermdi v2, v2, v3, 1
+; P8LE-NEXT: blr
+
+; P8BE-LABEL: s2v_test1:
+; P8BE: # %bb.0: # %entry
+; P8BE-NEXT: lfiwax f0, 0, r3
+; P8BE-NEXT: xxpermdi v2, vs0, v2, 1
+; P8BE-NEXT: blr
+entry:
+ %0 = load i32, i32* %int32, align 4
+ %conv = sext i32 %0 to i64
+ %vecins = insertelement <2 x i64> %vec, i64 %conv, i32 0
+ ret <2 x i64> %vecins
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <2 x i64> @s2v_test2(i32* nocapture readonly %int32, <2 x i64> %vec) {
+; P9LE-LABEL: s2v_test2:
+; P9LE: # %bb.0: # %entry
+; P9LE-NEXT: addi r3, r3, 4
+; P9LE-NEXT: lfiwax f0, 0, r3
+; P9LE-NEXT: xxpermdi v3, f0, f0, 2
+; P9LE-NEXT: xxpermdi v2, v2, v3, 1
+; P9LE-NEXT: blr
+
+; P9BE-LABEL: s2v_test2:
+; P9BE: # %bb.0: # %entry
+; P9BE-NEXT: addi r3, r3, 4
+; P9BE-NEXT: lfiwax f0, 0, r3
+; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
+; P9BE-NEXT: blr
+
+; P8LE-LABEL: s2v_test2:
+; P8LE: # %bb.0: # %entry
+; P8LE-NEXT: addi r3, r3, 4
+; P8LE-NEXT: lfiwax f0, 0, r3
+; P8LE-NEXT: xxpermdi v3, f0, f0, 2
+; P8LE-NEXT: xxpermdi v2, v2, v3, 1
+; P8LE-NEXT: blr
+
+; P8BE-LABEL: s2v_test2:
+; P8BE: # %bb.0: # %entry
+; P8BE-NEXT: addi r3, r3, 4
+; P8BE-NEXT: lfiwax f0, 0, r3
+; P8BE-NEXT: xxpermdi v2, vs0, v2, 1
+; P8BE-NEXT: blr
+entry:
+ %arrayidx = getelementptr inbounds i32, i32* %int32, i64 1
+ %0 = load i32, i32* %arrayidx, align 4
+ %conv = sext i32 %0 to i64
+ %vecins = insertelement <2 x i64> %vec, i64 %conv, i32 0
+ ret <2 x i64> %vecins
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <2 x i64> @s2v_test3(i32* nocapture readonly %int32, <2 x i64> %vec, i32 signext %Idx) {
+; P9LE-LABEL: s2v_test3:
+; P9LE: # %bb.0: # %entry
+; P9LE-NEXT: sldi r4, r7, 2
+; P9LE-NEXT: lfiwax f0, r3, r4
+; P9LE-NEXT: xxpermdi v3, f0, f0, 2
+; P9LE-NEXT: xxpermdi v2, v2, v3, 1
+; P9LE-NEXT: blr
+
+; P9BE-LABEL: s2v_test3:
+; P9BE: # %bb.0: # %entry
+; P9BE-NEXT: sldi r4, r7, 2
+; P9BE-NEXT: lfiwax f0, r3, r4
+; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
+; P9BE-NEXT: blr
+
+; P8LE-LABEL: s2v_test3:
+; P8LE: # %bb.0: # %entry
+; P8LE-NEXT: sldi r4, r7, 2
+; P8LE-NEXT: lfiwax f0, r3, r4
+; P8LE-NEXT: xxpermdi v3, f0, f0, 2
+; P8LE-NEXT: xxpermdi v2, v2, v3, 1
+; P8LE-NEXT: blr
+
+; P8BE-LABEL: s2v_test3:
+; P8BE: # %bb.0: # %entry
+; P8BE-NEXT: sldi r4, r7, 2
+; P8BE-NEXT: lfiwax f0, r3, r4
+; P8BE-NEXT: xxpermdi v2, vs0, v2, 1
+; P8BE-NEXT: blr
+entry:
+ %idxprom = sext i32 %Idx to i64
+ %arrayidx = getelementptr inbounds i32, i32* %int32, i64 %idxprom
+ %0 = load i32, i32* %arrayidx, align 4
+ %conv = sext i32 %0 to i64
+ %vecins = insertelement <2 x i64> %vec, i64 %conv, i32 0
+ ret <2 x i64> %vecins
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <2 x i64> @s2v_test4(i32* nocapture readonly %int32, <2 x i64> %vec) {
+; P9LE-LABEL: s2v_test4:
+; P9LE: # %bb.0: # %entry
+; P9LE-NEXT: addi r3, r3, 4
+; P9LE-NEXT: lfiwax f0, 0, r3
+; P9LE-NEXT: xxpermdi v3, f0, f0, 2
+; P9LE-NEXT: xxpermdi v2, v2, v3, 1
+; P9LE-NEXT: blr
+
+; P9BE-LABEL: s2v_test4:
+; P9BE: # %bb.0: # %entry
+; P9BE-NEXT: addi r3, r3, 4
+; P9BE-NEXT: lfiwax f0, 0, r3
+; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
+; P9BE-NEXT: blr
+
+; P8LE-LABEL: s2v_test4:
+; P8LE: # %bb.0: # %entry
+; P8LE-NEXT: addi r3, r3, 4
+; P8LE-NEXT: lfiwax f0, 0, r3
+; P8LE-NEXT: xxpermdi v3, f0, f0, 2
+; P8LE-NEXT: xxpermdi v2, v2, v3, 1
+; P8LE-NEXT: blr
+
+; P8BE-LABEL: s2v_test4:
+; P8BE: # %bb.0: # %entry
+; P8BE-NEXT: addi r3, r3, 4
+; P8BE-NEXT: lfiwax f0, 0, r3
+; P8BE-NEXT: xxpermdi v2, vs0, v2, 1
+; P8BE-NEXT: blr
+entry:
+ %arrayidx = getelementptr inbounds i32, i32* %int32, i64 1
+ %0 = load i32, i32* %arrayidx, align 4
+ %conv = sext i32 %0 to i64
+ %vecins = insertelement <2 x i64> %vec, i64 %conv, i32 0
+ ret <2 x i64> %vecins
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <2 x i64> @s2v_test5(<2 x i64> %vec, i32* nocapture readonly %ptr1) {
+; P9LE-LABEL: s2v_test5:
+; P9LE: # %bb.0: # %entry
+; P9LE-NEXT: lfiwax f0, 0, r5
+; P9LE-NEXT: xxpermdi v3, f0, f0, 2
+; P9LE-NEXT: xxpermdi v2, v2, v3, 1
+; P9LE-NEXT: blr
+
+; P9BE-LABEL: s2v_test5:
+; P9BE: # %bb.0: # %entry
+; P9BE-NEXT: lfiwax f0, 0, r5
+; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
+; P9BE-NEXT: blr
+
+; P8LE-LABEL: s2v_test5:
+; P8LE: # %bb.0: # %entry
+; P8LE-NEXT: lfiwax f0, 0, r5
+; P8LE-NEXT: xxpermdi v3, f0, f0, 2
+; P8LE-NEXT: xxpermdi v2, v2, v3, 1
+; P8LE-NEXT: blr
+
+; P8BE-LABEL: s2v_test5:
+; P8BE: # %bb.0: # %entry
+; P8BE-NEXT: lfiwax f0, 0, r5
+; P8BE-NEXT: xxpermdi v2, vs0, v2, 1
+; P8BE-NEXT: blr
+entry:
+ %0 = load i32, i32* %ptr1, align 4
+ %conv = sext i32 %0 to i64
+ %vecins = insertelement <2 x i64> %vec, i64 %conv, i32 0
+ ret <2 x i64> %vecins
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <2 x i64> @s2v_test6(i32* nocapture readonly %ptr) {
+; P9LE-LABEL: s2v_test6:
+; P9LE: # %bb.0: # %entry
+; P9LE-NEXT: lfiwax f0, 0, r3
+; P9LE-NEXT: xxpermdi v2, f0, f0, 2
+; P9LE-NEXT: xxspltd v2, v2, 1
+; P9LE-NEXT: blr
+
+; P9BE-LABEL: s2v_test6:
+; P9BE: # %bb.0: # %entry
+; P9BE-NEXT: lfiwax f0, 0, r3
+; P9BE-NEXT: xxspltd v2, vs0, 0
+; P9BE-NEXT: blr
+
+; P8LE-LABEL: s2v_test6:
+; P8LE: # %bb.0: # %entry
+; P8LE-NEXT: lfiwax f0, 0, r3
+; P8LE-NEXT: xxpermdi v2, f0, f0, 2
+; P8LE-NEXT: xxspltd v2, v2, 1
+; P8LE-NEXT: blr
+
+; P8BE-LABEL: s2v_test6:
+; P8BE: # %bb.0: # %entry
+; P8BE-NEXT: lfiwax f0, 0, r3
+; P8BE-NEXT: xxspltd v2, vs0, 0
+; P8BE-NEXT: blr
+entry:
+ %0 = load i32, i32* %ptr, align 4
+ %conv = sext i32 %0 to i64
+ %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0
+ %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
+ ret <2 x i64> %splat.splat
+}
+
+; Function Attrs: norecurse nounwind readonly
+define <2 x i64> @s2v_test7(i32* nocapture readonly %ptr) {
+; P9LE-LABEL: s2v_test7:
+; P9LE: # %bb.0: # %entry
+; P9LE-NEXT: lfiwax f0, 0, r3
+; P9LE-NEXT: xxpermdi v2, f0, f0, 2
+; P9LE-NEXT: xxspltd v2, v2, 1
+; P9LE-NEXT: blr
+
+; P9BE-LABEL: s2v_test7:
+; P9BE: # %bb.0: # %entry
+; P9BE-NEXT: lfiwax f0, 0, r3
+; P9BE-NEXT: xxspltd v2, vs0, 0
+; P9BE-NEXT: blr
+
+; P8LE-LABEL: s2v_test7:
+; P8LE: # %bb.0: # %entry
+; P8LE-NEXT: lfiwax f0, 0, r3
+; P8LE-NEXT: xxpermdi v2, f0, f0, 2
+; P8LE-NEXT: xxspltd v2, v2, 1
+; P8LE-NEXT: blr
+
+; P8BE-LABEL: s2v_test7:
+; P8BE: # %bb.0: # %entry
+; P8BE-NEXT: lfiwax f0, 0, r3
+; P8BE-NEXT: xxspltd v2, vs0, 0
+; P8BE-NEXT: blr
+entry:
+ %0 = load i32, i32* %ptr, align 4
+ %conv = sext i32 %0 to i64
+ %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0
+ %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
+ ret <2 x i64> %splat.splat
+}
+
OpenPOWER on IntegriCloud