summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2016-06-28 08:08:15 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2016-06-28 08:08:15 +0000
commitc15d2178313a3b904158a472e4aa1f118e040395 (patch)
treec4106e39019e725374e21f045e25343eb5a59c90 /llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll
parent36b2c0a683ccfafbaad948ced64b6912aa5bf826 (diff)
downloadbcm5719-llvm-c15d2178313a3b904158a472e4aa1f118e040395.tar.gz
bcm5719-llvm-c15d2178313a3b904158a472e4aa1f118e040395.zip
[X86][SSE] Added support for combining target shuffles to (V)PSHUFD/VPERMILPD/VPERMILPS immediate permutes
This patch allows target shuffles to be combined to single input immediate permute instructions - (V)PSHUFD/VPERMILPD/VPERMILPS - allowing more general pattern matching than what we current do and improves the likelihood of memory folding compared to existing patterns which tend to reuse the input in multiple arguments. Further permute instructions (V)PSHUFLW/(V)PSHUFHW/(V)PERMQ/(V)PERMPD may be added in the future but its proven tricky to create tests cases for them so far. (V)PSHUFLW/(V)PSHUFHW is already handled quite well in combineTargetShuffle so it may be that removing some of that code may allow us to perform more of the combining in one place without duplication. Differential Revision: http://reviews.llvm.org/D21148 llvm-svn: 273999
Diffstat (limited to 'llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll')
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll42
1 files changed, 21 insertions, 21 deletions
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll
index ceda13c22ee..f9448d9cf96 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll
@@ -152,8 +152,8 @@ define <8 x float> @shuffle_v8f32_01014545(<8 x float> %a, <8 x float> %b) {
define <8 x float> @shuffle_v8f32_00112233(<8 x float> %a, <8 x float> %b) {
; AVX1-LABEL: shuffle_v8f32_00112233:
; AVX1: # BB#0:
-; AVX1-NEXT: vunpcklps {{.*#+}} xmm1 = xmm0[0,0,1,1]
-; AVX1-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,1,1]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
@@ -196,7 +196,7 @@ define <8 x float> @shuffle_v8f32_08080808(<8 x float> %a, <8 x float> %b) {
; AVX1-LABEL: shuffle_v8f32_08080808:
; AVX1: # BB#0:
; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
-; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: retq
;
@@ -213,7 +213,7 @@ define <8 x float> @shuffle_v8f32_08084c4c(<8 x float> %a, <8 x float> %b) {
; ALL-LABEL: shuffle_v8f32_08084c4c:
; ALL: # BB#0:
; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4]
-; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
+; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
; ALL-NEXT: retq
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
ret <8 x float> %shuffle
@@ -907,8 +907,8 @@ define <8 x i32> @shuffle_v8i32_00000000(<8 x i32> %a, <8 x i32> %b) {
define <8 x i32> @shuffle_v8i32_00000010(<8 x i32> %a, <8 x i32> %b) {
; AVX1-LABEL: shuffle_v8i32_00000010:
; AVX1: # BB#0:
-; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
-; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
@@ -924,8 +924,8 @@ define <8 x i32> @shuffle_v8i32_00000010(<8 x i32> %a, <8 x i32> %b) {
define <8 x i32> @shuffle_v8i32_00000200(<8 x i32> %a, <8 x i32> %b) {
; AVX1-LABEL: shuffle_v8i32_00000200:
; AVX1: # BB#0:
-; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
-; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,0]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
@@ -941,8 +941,8 @@ define <8 x i32> @shuffle_v8i32_00000200(<8 x i32> %a, <8 x i32> %b) {
define <8 x i32> @shuffle_v8i32_00003000(<8 x i32> %a, <8 x i32> %b) {
; AVX1-LABEL: shuffle_v8i32_00003000:
; AVX1: # BB#0:
-; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
-; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,0,0,0]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,0,0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
@@ -1042,8 +1042,8 @@ define <8 x i32> @shuffle_v8i32_01014545(<8 x i32> %a, <8 x i32> %b) {
define <8 x i32> @shuffle_v8i32_00112233(<8 x i32> %a, <8 x i32> %b) {
; AVX1-LABEL: shuffle_v8i32_00112233:
; AVX1: # BB#0:
-; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,1,1]
-; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,0,1,1]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
@@ -1059,8 +1059,8 @@ define <8 x i32> @shuffle_v8i32_00112233(<8 x i32> %a, <8 x i32> %b) {
define <8 x i32> @shuffle_v8i32_00001111(<8 x i32> %a, <8 x i32> %b) {
; AVX1-LABEL: shuffle_v8i32_00001111:
; AVX1: # BB#0:
-; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
-; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
@@ -1091,7 +1091,7 @@ define <8 x i32> @shuffle_v8i32_08080808(<8 x i32> %a, <8 x i32> %b) {
; AVX1-LABEL: shuffle_v8i32_08080808:
; AVX1: # BB#0:
; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
-; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: retq
;
@@ -1108,7 +1108,7 @@ define <8 x i32> @shuffle_v8i32_08084c4c(<8 x i32> %a, <8 x i32> %b) {
; AVX1-LABEL: shuffle_v8i32_08084c4c:
; AVX1: # BB#0:
; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4]
-; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
+; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v8i32_08084c4c:
@@ -1239,8 +1239,8 @@ define <8 x i32> @shuffle_v8i32_08991abb(<8 x i32> %a, <8 x i32> %b) {
define <8 x i32> @shuffle_v8i32_091b2d3f(<8 x i32> %a, <8 x i32> %b) {
; AVX1-LABEL: shuffle_v8i32_091b2d3f:
; AVX1: # BB#0:
-; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3]
-; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[0,1,1,3]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,3,3]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
; AVX1-NEXT: retq
@@ -1257,7 +1257,7 @@ define <8 x i32> @shuffle_v8i32_091b2d3f(<8 x i32> %a, <8 x i32> %b) {
define <8 x i32> @shuffle_v8i32_09ab1def(<8 x i32> %a, <8 x i32> %b) {
; AVX1-LABEL: shuffle_v8i32_09ab1def:
; AVX1: # BB#0:
-; AVX1-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
; AVX1-NEXT: retq
@@ -2050,7 +2050,7 @@ define <8 x i32> @shuffle_v8i32_z0U2zUz6(<8 x i32> %a) {
; AVX1: # BB#0:
; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1
; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6]
-; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[2,0,3,1,6,4,7,5]
+; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,0,3,1,6,4,7,5]
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v8i32_z0U2zUz6:
@@ -2066,7 +2066,7 @@ define <8 x i32> @shuffle_v8i32_1U3z5zUU(<8 x i32> %a) {
; AVX1: # BB#0:
; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1
; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
-; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
+; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v8i32_1U3z5zUU:
OpenPOWER on IntegriCloud