[x86] add restriction for lowering to vpermps

This transform was added with rL351346, and we had an escape for shufps, but we also want one for unpckps vs. vpermps because vpermps doesn't take an immediate shuffle index operand. llvm-svn: 352333
author: Sanjay Patel <spatel@rotateright.com> 2019-01-27 21:53:33 +0000
committer: Sanjay Patel <spatel@rotateright.com> 2019-01-27 21:53:33 +0000
commit: ebe6b43aecc7d784a66afc63d746b106052ba7e3 (patch)
tree: 7995493ee9be9daa1ccdb7ca79c00c1d559f6005 /llvm/test
parent: 816c9b3e25463d26e4bb3e0cc8889987c7af2704 (diff)
download: bcm5719-llvm-ebe6b43aecc7d784a66afc63d746b106052ba7e3.tar.gz
bcm5719-llvm-ebe6b43aecc7d784a66afc63d746b106052ba7e3.zip
1 files changed, 26 insertions, 58 deletions
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-unpck.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-unpck.ll
index 4bd4a481069..47d9c41e019 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-128-unpck.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-128-unpck.ll
@@ -45,23 +45,15 @@ define <2 x double> @unpckh_unary_extracted_v8f64(<4 x double> %x) {
   ret <2 x double> %r
 }
 
-; FIXME: vpermps requires a constant load for the index op. It's unlikely to be profitable.
+; vpermps requires a constant load for the index op. It's unlikely to be profitable.
 
 define <4 x i32> @unpckh_unary_extracted_v8i32(<8 x i32> %x) {
-; AVX1-LABEL: unpckh_unary_extracted_v8i32:
-; AVX1:       # %bb.0:
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; AVX1-NEXT:    vzeroupper
-; AVX1-NEXT:    retq
-;
-; AVX2OR512VL-LABEL: unpckh_unary_extracted_v8i32:
-; AVX2OR512VL:       # %bb.0:
-; AVX2OR512VL-NEXT:    vmovaps {{.*#+}} ymm1 = <2,6,3,7,u,u,u,u>
-; AVX2OR512VL-NEXT:    vpermps %ymm0, %ymm1, %ymm0
-; AVX2OR512VL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
-; AVX2OR512VL-NEXT:    vzeroupper
-; AVX2OR512VL-NEXT:    retq
+; ALL-LABEL: unpckh_unary_extracted_v8i32:
+; ALL:       # %bb.0:
+; ALL-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; ALL-NEXT:    vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; ALL-NEXT:    vzeroupper
+; ALL-NEXT:    retq
   %extrl = shufflevector <8 x i32> %x, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   %extrh = shufflevector <8 x i32> %x, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %r = shufflevector <4 x i32> %extrl, <4 x i32> %extrh, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
@@ -69,20 +61,12 @@ define <4 x i32> @unpckh_unary_extracted_v8i32(<8 x i32> %x) {
 }
 
 define <4 x float> @unpckh_unary_extracted_v8f32(<8 x float> %x) {
-; AVX1-LABEL: unpckh_unary_extracted_v8f32:
-; AVX1:       # %bb.0:
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; AVX1-NEXT:    vzeroupper
-; AVX1-NEXT:    retq
-;
-; AVX2OR512VL-LABEL: unpckh_unary_extracted_v8f32:
-; AVX2OR512VL:       # %bb.0:
-; AVX2OR512VL-NEXT:    vmovaps {{.*#+}} ymm1 = <2,6,3,7,u,u,u,u>
-; AVX2OR512VL-NEXT:    vpermps %ymm0, %ymm1, %ymm0
-; AVX2OR512VL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
-; AVX2OR512VL-NEXT:    vzeroupper
-; AVX2OR512VL-NEXT:    retq
+; ALL-LABEL: unpckh_unary_extracted_v8f32:
+; ALL:       # %bb.0:
+; ALL-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; ALL-NEXT:    vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; ALL-NEXT:    vzeroupper
+; ALL-NEXT:    retq
   %extrl = shufflevector <8 x float> %x, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   %extrh = shufflevector <8 x float> %x, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %r = shufflevector <4 x float> %extrl, <4 x float> %extrh, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
@@ -169,23 +153,15 @@ define <2 x double> @unpckl_unary_extracted_v8f64(<4 x double> %x) {
   ret <2 x double> %r
 }
 
-; FIXME: vpermps requires a constant load for the index op. It's unlikely to be profitable.
+; vpermps requires a constant load for the index op. It's unlikely to be profitable.
 
 define <4 x i32> @unpckl_unary_extracted_v8i32(<8 x i32> %x) {
-; AVX1-LABEL: unpckl_unary_extracted_v8i32:
-; AVX1:       # %bb.0:
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; AVX1-NEXT:    vzeroupper
-; AVX1-NEXT:    retq
-;
-; AVX2OR512VL-LABEL: unpckl_unary_extracted_v8i32:
-; AVX2OR512VL:       # %bb.0:
-; AVX2OR512VL-NEXT:    vmovaps {{.*#+}} ymm1 = <0,4,1,5,u,u,u,u>
-; AVX2OR512VL-NEXT:    vpermps %ymm0, %ymm1, %ymm0
-; AVX2OR512VL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
-; AVX2OR512VL-NEXT:    vzeroupper
-; AVX2OR512VL-NEXT:    retq
+; ALL-LABEL: unpckl_unary_extracted_v8i32:
+; ALL:       # %bb.0:
+; ALL-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; ALL-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; ALL-NEXT:    vzeroupper
+; ALL-NEXT:    retq
   %extrl = shufflevector <8 x i32> %x, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   %extrh = shufflevector <8 x i32> %x, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %r = shufflevector <4 x i32> %extrl, <4 x i32> %extrh, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
@@ -193,20 +169,12 @@ define <4 x i32> @unpckl_unary_extracted_v8i32(<8 x i32> %x) {
 }
 
 define <4 x float> @unpckl_unary_extracted_v8f32(<8 x float> %x) {
-; AVX1-LABEL: unpckl_unary_extracted_v8f32:
-; AVX1:       # %bb.0:
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; AVX1-NEXT:    vzeroupper
-; AVX1-NEXT:    retq
-;
-; AVX2OR512VL-LABEL: unpckl_unary_extracted_v8f32:
-; AVX2OR512VL:       # %bb.0:
-; AVX2OR512VL-NEXT:    vmovaps {{.*#+}} ymm1 = <0,4,1,5,u,u,u,u>
-; AVX2OR512VL-NEXT:    vpermps %ymm0, %ymm1, %ymm0
-; AVX2OR512VL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
-; AVX2OR512VL-NEXT:    vzeroupper
-; AVX2OR512VL-NEXT:    retq
+; ALL-LABEL: unpckl_unary_extracted_v8f32:
+; ALL:       # %bb.0:
+; ALL-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; ALL-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; ALL-NEXT:    vzeroupper
+; ALL-NEXT:    retq
   %extrl = shufflevector <8 x float> %x, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   %extrh = shufflevector <8 x float> %x, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %r = shufflevector <4 x float> %extrl, <4 x float> %extrh, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
author	Sanjay Patel <spatel@rotateright.com>	2019-01-27 21:53:33 +0000
committer	Sanjay Patel <spatel@rotateright.com>	2019-01-27 21:53:33 +0000
commit	ebe6b43aecc7d784a66afc63d746b106052ba7e3 (patch)
tree	7995493ee9be9daa1ccdb7ca79c00c1d559f6005 /llvm/test
parent	816c9b3e25463d26e4bb3e0cc8889987c7af2704 (diff)
download	bcm5719-llvm-ebe6b43aecc7d784a66afc63d746b106052ba7e3.tar.gz bcm5719-llvm-ebe6b43aecc7d784a66afc63d746b106052ba7e3.zip