summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/PowerPC/vec_int_ext.ll
diff options
context:
space:
mode:
authorTony Jiang <jtony@ca.ibm.com>2017-07-05 16:00:38 +0000
committerTony Jiang <jtony@ca.ibm.com>2017-07-05 16:00:38 +0000
commit9a91a1811001e976aa90a02ed341023ab9d1841b (patch)
tree29a47878a46edf05e8e0d4a15e32242591b0f577 /llvm/test/CodeGen/PowerPC/vec_int_ext.ll
parentd560a64e426540ee894f7f861b1ec7380a1a92fd (diff)
downloadbcm5719-llvm-9a91a1811001e976aa90a02ed341023ab9d1841b.tar.gz
bcm5719-llvm-9a91a1811001e976aa90a02ed341023ab9d1841b.zip
[Power9] Exploit vector integer extend instructions when indices aren't correct.
This patch adds on to the exploitation added by https://reviews.llvm.org/D33510. This now catches build vector nodes where the inputs are coming from sign extended vector extract elements where the indices used by the vector extract are not correct. We can still use the new hardware instructions by adding a shuffle to move the elements to the correct indices. I introduced a new PPCISD node here because adding a vector_shuffle and changing the elements of the vector_extracts was getting undone by another DAG combine. Commit on behalf of Zaara Syeda (syzaara@ca.ibm.com) Differential Revision: https://reviews.llvm.org/D34009 llvm-svn: 307169
Diffstat (limited to 'llvm/test/CodeGen/PowerPC/vec_int_ext.ll')
-rw-r--r--llvm/test/CodeGen/PowerPC/vec_int_ext.ll253
1 files changed, 225 insertions, 28 deletions
diff --git a/llvm/test/CodeGen/PowerPC/vec_int_ext.ll b/llvm/test/CodeGen/PowerPC/vec_int_ext.ll
index 9e1218c423b..d7bed503318 100644
--- a/llvm/test/CodeGen/PowerPC/vec_int_ext.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_int_ext.ll
@@ -1,12 +1,18 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -verify-machineinstrs -mcpu=pwr9 < %s | FileCheck %s -check-prefix=PWR9
-target triple = "powerpc64le-unknown-linux-gnu"
-
-define <4 x i32> @vextsb2w(<16 x i8> %a) {
-; PWR9-LABEL: vextsb2w:
-; PWR9: # BB#0: # %entry
-; PWR9-NEXT: vextsb2w 2, 2
-; PWR9-NEXT: blr
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-gnu-linux -mcpu=pwr9 < %s | FileCheck %s -check-prefix=CHECK-LE
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-gnu-linux -mcpu=pwr9 < %s | FileCheck %s -check-prefix=CHECK-BE
+
+define <4 x i32> @vextsb2wLE(<16 x i8> %a) {
+; CHECK-LE-LABEL: vextsb2wLE:
+; CHECK-LE: # BB#0: # %entry
+; CHECK-LE-NEXT: vextsb2w 2, 2
+; CHECK-LE-NEXT: blr
+; CHECK-BE-LABEL: vextsb2wLE:
+; CHECK-BE: # BB#0: # %entry
+; CHECK-BE: vperm 2, 2, 2, 3
+; CHECK-BE-NEXT: vextsb2w 2, 2
+; CHECK-BE-NEXT: blr
+
entry:
%vecext = extractelement <16 x i8> %a, i32 0
%conv = sext i8 %vecext to i32
@@ -23,11 +29,17 @@ entry:
ret <4 x i32> %vecinit9
}
-define <2 x i64> @vextsb2d(<16 x i8> %a) {
-; PWR9-LABEL: vextsb2d:
-; PWR9: # BB#0: # %entry
-; PWR9-NEXT: vextsb2d 2, 2
-; PWR9-NEXT: blr
+define <2 x i64> @vextsb2dLE(<16 x i8> %a) {
+; CHECK-LE-LABEL: vextsb2dLE:
+; CHECK-LE: # BB#0: # %entry
+; CHECK-LE-NEXT: vextsb2d 2, 2
+; CHECK-LE-NEXT: blr
+; CHECK-BE-LABEL: vextsb2dLE:
+; CHECK-BE: # BB#0: # %entry
+; CHECK-BE: vperm 2, 2, 2, 3
+; CHECK-BE-NEXT: vextsb2d 2, 2
+; CHECK-BE-NEXT: blr
+
entry:
%vecext = extractelement <16 x i8> %a, i32 0
%conv = sext i8 %vecext to i64
@@ -38,11 +50,17 @@ entry:
ret <2 x i64> %vecinit3
}
-define <4 x i32> @vextsh2w(<8 x i16> %a) {
-; PWR9-LABEL: vextsh2w:
-; PWR9: # BB#0: # %entry
-; PWR9-NEXT: vextsh2w 2, 2
-; PWR9-NEXT: blr
+define <4 x i32> @vextsh2wLE(<8 x i16> %a) {
+; CHECK-LE-LABEL: vextsh2wLE:
+; CHECK-LE: # BB#0: # %entry
+; CHECK-LE-NEXT: vextsh2w 2, 2
+; CHECK-LE-NEXT: blr
+; CHECK-BE-LABEL: vextsh2wLE:
+; CHECK-BE: # BB#0: # %entry
+; CHECK-BE: vperm 2, 2, 2, 3
+; CHECK-BE-NEXT: vextsh2w 2, 2
+; CHECK-BE-NEXT: blr
+
entry:
%vecext = extractelement <8 x i16> %a, i32 0
%conv = sext i16 %vecext to i32
@@ -59,11 +77,17 @@ entry:
ret <4 x i32> %vecinit9
}
-define <2 x i64> @vextsh2d(<8 x i16> %a) {
-; PWR9-LABEL: vextsh2d:
-; PWR9: # BB#0: # %entry
-; PWR9-NEXT: vextsh2d 2, 2
-; PWR9-NEXT: blr
+define <2 x i64> @vextsh2dLE(<8 x i16> %a) {
+; CHECK-LE-LABEL: vextsh2dLE:
+; CHECK-LE: # BB#0: # %entry
+; CHECK-LE-NEXT: vextsh2d 2, 2
+; CHECK-LE-NEXT: blr
+; CHECK-BE-LABEL: vextsh2dLE:
+; CHECK-BE: # BB#0: # %entry
+; CHECK-BE: vperm 2, 2, 2, 3
+; CHECK-BE-NEXT: vextsh2d 2, 2
+; CHECK-BE-NEXT: blr
+
entry:
%vecext = extractelement <8 x i16> %a, i32 0
%conv = sext i16 %vecext to i64
@@ -74,11 +98,17 @@ entry:
ret <2 x i64> %vecinit3
}
-define <2 x i64> @vextsw2d(<4 x i32> %a) {
-; PWR9-LABEL: vextsw2d:
-; PWR9: # BB#0: # %entry
-; PWR9-NEXT: vextsw2d 2, 2
-; PWR9-NEXT: blr
+define <2 x i64> @vextsw2dLE(<4 x i32> %a) {
+; CHECK-LE-LABEL: vextsw2dLE:
+; CHECK-LE: # BB#0: # %entry
+; CHECK-LE-NEXT: vextsw2d 2, 2
+; CHECK-LE-NEXT: blr
+; CHECK-BE-LABEL: vextsw2dLE:
+; CHECK-BE: # BB#0: # %entry
+; CHECK-BE: vmrgew
+; CHECK-BE-NEXT: vextsw2d 2, 2
+; CHECK-BE-NEXT: blr
+
entry:
%vecext = extractelement <4 x i32> %a, i32 0
%conv = sext i32 %vecext to i64
@@ -88,3 +118,170 @@ entry:
%vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
ret <2 x i64> %vecinit3
}
+
+define <4 x i32> @vextsb2wBE(<16 x i8> %a) {
+; CHECK-BE-LABEL: vextsb2wBE:
+; CHECK-BE: # BB#0: # %entry
+; CHECK-BE-NEXT: vextsb2w 2, 2
+; CHECK-BE-NEXT: blr
+; CHECK-LE-LABEL: vextsb2wBE:
+; CHECK-LE: # BB#0: # %entry
+; CHECK-LE-NEXT: vsldoi 2, 2, 2, 13
+; CHECK-LE-NEXT: vextsb2w 2, 2
+; CHECK-LE-NEXT: blr
+entry:
+ %vecext = extractelement <16 x i8> %a, i32 3
+ %conv = sext i8 %vecext to i32
+ %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0
+ %vecext1 = extractelement <16 x i8> %a, i32 7
+ %conv2 = sext i8 %vecext1 to i32
+ %vecinit3 = insertelement <4 x i32> %vecinit, i32 %conv2, i32 1
+ %vecext4 = extractelement <16 x i8> %a, i32 11
+ %conv5 = sext i8 %vecext4 to i32
+ %vecinit6 = insertelement <4 x i32> %vecinit3, i32 %conv5, i32 2
+ %vecext7 = extractelement <16 x i8> %a, i32 15
+ %conv8 = sext i8 %vecext7 to i32
+ %vecinit9 = insertelement <4 x i32> %vecinit6, i32 %conv8, i32 3
+ ret <4 x i32> %vecinit9
+}
+
+define <2 x i64> @vextsb2dBE(<16 x i8> %a) {
+; CHECK-BE-LABEL: vextsb2dBE:
+; CHECK-BE: # BB#0: # %entry
+; CHECK-BE-NEXT: vextsb2d 2, 2
+; CHECK-BE-NEXT: blr
+; CHECK-LE-LABEL: vextsb2dBE:
+; CHECK-LE: # BB#0: # %entry
+; CHECK-LE-NEXT: vsldoi 2, 2, 2, 9
+; CHECK-LE-NEXT: vextsb2d 2, 2
+; CHECK-LE-NEXT: blr
+entry:
+ %vecext = extractelement <16 x i8> %a, i32 7
+ %conv = sext i8 %vecext to i64
+ %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
+ %vecext1 = extractelement <16 x i8> %a, i32 15
+ %conv2 = sext i8 %vecext1 to i64
+ %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
+ ret <2 x i64> %vecinit3
+}
+
+define <4 x i32> @vextsh2wBE(<8 x i16> %a) {
+; CHECK-BE-LABEL: vextsh2wBE:
+; CHECK-BE: # BB#0: # %entry
+; CHECK-BE-NEXT: vextsh2w 2, 2
+; CHECK-BE-NEXT: blr
+; CHECK-LE-LABEL: vextsh2wBE:
+; CHECK-LE: # BB#0: # %entry
+; CHECK-LE-NEXT: vsldoi 2, 2, 2, 14
+; CHECK-LE-NEXT: vextsh2w 2, 2
+; CHECK-LE-NEXT: blr
+entry:
+ %vecext = extractelement <8 x i16> %a, i32 1
+ %conv = sext i16 %vecext to i32
+ %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0
+ %vecext1 = extractelement <8 x i16> %a, i32 3
+ %conv2 = sext i16 %vecext1 to i32
+ %vecinit3 = insertelement <4 x i32> %vecinit, i32 %conv2, i32 1
+ %vecext4 = extractelement <8 x i16> %a, i32 5
+ %conv5 = sext i16 %vecext4 to i32
+ %vecinit6 = insertelement <4 x i32> %vecinit3, i32 %conv5, i32 2
+ %vecext7 = extractelement <8 x i16> %a, i32 7
+ %conv8 = sext i16 %vecext7 to i32
+ %vecinit9 = insertelement <4 x i32> %vecinit6, i32 %conv8, i32 3
+ ret <4 x i32> %vecinit9
+}
+
+define <2 x i64> @vextsh2dBE(<8 x i16> %a) {
+; CHECK-BE-LABEL: vextsh2dBE:
+; CHECK-BE: # BB#0: # %entry
+; CHECK-BE-NEXT: vextsh2d 2, 2
+; CHECK-BE-NEXT: blr
+; CHECK-LE-LABEL: vextsh2dBE:
+; CHECK-LE: # BB#0: # %entry
+; CHECK-LE-NEXT: vsldoi 2, 2, 2, 10
+; CHECK-LE-NEXT: vextsh2d 2, 2
+; CHECK-LE-NEXT: blr
+entry:
+ %vecext = extractelement <8 x i16> %a, i32 3
+ %conv = sext i16 %vecext to i64
+ %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
+ %vecext1 = extractelement <8 x i16> %a, i32 7
+ %conv2 = sext i16 %vecext1 to i64
+ %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
+ ret <2 x i64> %vecinit3
+}
+
+define <2 x i64> @vextsw2dBE(<4 x i32> %a) {
+; CHECK-BE-LABEL: vextsw2dBE:
+; CHECK-BE: # BB#0: # %entry
+; CHECK-BE-NEXT: vextsw2d 2, 2
+; CHECK-BE-NEXT: blr
+; CHECK-LE-LABEL: vextsw2dBE:
+; CHECK-LE: # BB#0: # %entry
+; CHECK-LE-NEXT: vsldoi 2, 2, 2, 12
+; CHECK-LE-NEXT: vextsw2d 2, 2
+; CHECK-LE-NEXT: blr
+entry:
+ %vecext = extractelement <4 x i32> %a, i32 1
+ %conv = sext i32 %vecext to i64
+ %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
+ %vecext1 = extractelement <4 x i32> %a, i32 3
+ %conv2 = sext i32 %vecext1 to i64
+ %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
+ ret <2 x i64> %vecinit3
+}
+
+define <2 x i64> @vextDiffVectors(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LE-LABEL: vextDiffVectors:
+; CHECK-LE: # BB#0: # %entry
+; CHECK-LE-NOT: vextsw2d
+
+; CHECK-BE-LABEL: vextDiffVectors:
+; CHECK-BE: # BB#0: # %entry
+; CHECK-BE-NOT: vextsw2d
+entry:
+ %vecext = extractelement <4 x i32> %a, i32 0
+ %conv = sext i32 %vecext to i64
+ %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
+ %vecext1 = extractelement <4 x i32> %b, i32 2
+ %conv2 = sext i32 %vecext1 to i64
+ %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
+ ret <2 x i64> %vecinit3
+}
+
+define <8 x i16> @testInvalidExtend(<16 x i8> %a) {
+entry:
+; CHECK-LE-LABEL: testInvalidExtend:
+; CHECK-LE: # BB#0: # %entry
+; CHECK-LE-NOT: vexts
+
+; CHECK-BE-LABEL: testInvalidExtend:
+; CHECK-BE: # BB#0: # %entry
+; CHECK-BE-NOT: vexts
+
+ %vecext = extractelement <16 x i8> %a, i32 0
+ %conv = sext i8 %vecext to i16
+ %vecinit = insertelement <8 x i16> undef, i16 %conv, i32 0
+ %vecext1 = extractelement <16 x i8> %a, i32 2
+ %conv2 = sext i8 %vecext1 to i16
+ %vecinit3 = insertelement <8 x i16> %vecinit, i16 %conv2, i32 1
+ %vecext4 = extractelement <16 x i8> %a, i32 4
+ %conv5 = sext i8 %vecext4 to i16
+ %vecinit6 = insertelement <8 x i16> %vecinit3, i16 %conv5, i32 2
+ %vecext7 = extractelement <16 x i8> %a, i32 6
+ %conv8 = sext i8 %vecext7 to i16
+ %vecinit9 = insertelement <8 x i16> %vecinit6, i16 %conv8, i32 3
+ %vecext10 = extractelement <16 x i8> %a, i32 8
+ %conv11 = sext i8 %vecext10 to i16
+ %vecinit12 = insertelement <8 x i16> %vecinit9, i16 %conv11, i32 4
+ %vecext13 = extractelement <16 x i8> %a, i32 10
+ %conv14 = sext i8 %vecext13 to i16
+ %vecinit15 = insertelement <8 x i16> %vecinit12, i16 %conv14, i32 5
+ %vecext16 = extractelement <16 x i8> %a, i32 12
+ %conv17 = sext i8 %vecext16 to i16
+ %vecinit18 = insertelement <8 x i16> %vecinit15, i16 %conv17, i32 6
+ %vecext19 = extractelement <16 x i8> %a, i32 14
+ %conv20 = sext i8 %vecext19 to i16
+ %vecinit21 = insertelement <8 x i16> %vecinit18, i16 %conv20, i32 7
+ ret <8 x i16> %vecinit21
+}
OpenPOWER on IntegriCloud