summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2017-09-04 13:51:57 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2017-09-04 13:51:57 +0000
commit91751b42f639f6419acbf493895078e0b3b13d7f (patch)
treef6f7c7d488f1c6dace70244f3a2ba6bc43d51824
parentadffa8b2e96c0e59551f486b6bfe0d70aaaeda45 (diff)
downloadbcm5719-llvm-91751b42f639f6419acbf493895078e0b3b13d7f.tar.gz
bcm5719-llvm-91751b42f639f6419acbf493895078e0b3b13d7f.zip
[X86][AVX512] Add support for VPERMILPS v16f32 shuffle lowering (PR34382)
Avoid use of VPERMPS where we don't need it by instead using the variable mask version of VPERMILPS for unary shuffles. llvm-svn: 312486
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp9
-rw-r--r--llvm/test/CodeGen/X86/avx512-shuffles/in_lane_permute.ll70
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll3
3 files changed, 40 insertions, 42 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 80f1afe310f..023e539250c 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -13452,6 +13452,15 @@ static SDValue lowerV16F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// Otherwise, fall back to a SHUFPS sequence.
return lowerVectorShuffleWithSHUFPS(DL, MVT::v16f32, RepeatedMask, V1, V2, DAG);
}
+
+ // If we have a single input shuffle with different shuffle patterns in the
+ // 128-bit lanes and don't lane cross, use variable mask VPERMILPS.
+ if (V2.isUndef() &&
+ !is128BitLaneCrossingShuffleMask(MVT::v16f32, Mask)) {
+ SDValue VPermMask = getConstVector(Mask, MVT::v16i32, DAG, DL, true);
+ return DAG.getNode(X86ISD::VPERMILPV, DL, MVT::v16f32, V1, VPermMask);
+ }
+
// If we have AVX512F support, we can use VEXPAND.
if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v16f32, Zeroable, Mask,
V1, V2, DAG, Subtarget))
diff --git a/llvm/test/CodeGen/X86/avx512-shuffles/in_lane_permute.ll b/llvm/test/CodeGen/X86/avx512-shuffles/in_lane_permute.ll
index d58c12d10d8..33126ed64c0 100644
--- a/llvm/test/CodeGen/X86/avx512-shuffles/in_lane_permute.ll
+++ b/llvm/test/CodeGen/X86/avx512-shuffles/in_lane_permute.ll
@@ -701,8 +701,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_imm_mem_mask7(<8 x float>* %vp) {
define <16 x float> @test_16xfloat_perm_mask0(<16 x float> %vec) {
; CHECK-LABEL: test_16xfloat_perm_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [1,1,3,1,6,4,6,5,8,9,8,11,13,13,13,15]
-; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[1,1,3,1,6,4,6,5,8,9,8,11,13,13,13,15]
; CHECK-NEXT: retq
%res = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 1, i32 6, i32 4, i32 6, i32 5, i32 8, i32 9, i32 8, i32 11, i32 13, i32 13, i32 13, i32 15>
ret <16 x float> %res
@@ -710,10 +709,9 @@ define <16 x float> @test_16xfloat_perm_mask0(<16 x float> %vec) {
define <16 x float> @test_masked_16xfloat_perm_mask0(<16 x float> %vec, <16 x float> %vec2) {
; CHECK-LABEL: test_masked_16xfloat_perm_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [1,1,3,1,6,4,6,5,8,9,8,11,13,13,13,15]
; CHECK-NEXT: movw $16429, %ax # imm = 0x402D
; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vpermps %zmm0, %zmm2, %zmm1 {%k1}
+; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} = zmm0[1,1,3,1,6,4,6,5,8,9,8,11,13,13,13,15]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 1, i32 6, i32 4, i32 6, i32 5, i32 8, i32 9, i32 8, i32 11, i32 13, i32 13, i32 13, i32 15>
@@ -724,10 +722,9 @@ define <16 x float> @test_masked_16xfloat_perm_mask0(<16 x float> %vec, <16 x fl
define <16 x float> @test_masked_z_16xfloat_perm_mask0(<16 x float> %vec) {
; CHECK-LABEL: test_masked_z_16xfloat_perm_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [1,1,3,1,6,4,6,5,8,9,8,11,13,13,13,15]
; CHECK-NEXT: movw $16429, %ax # imm = 0x402D
; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0 {%k1} {z}
+; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,1,6,4,6,5,8,9,8,11,13,13,13,15]
; CHECK-NEXT: retq
%shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 1, i32 6, i32 4, i32 6, i32 5, i32 8, i32 9, i32 8, i32 11, i32 13, i32 13, i32 13, i32 15>
%res = select <16 x i1> <i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0>, <16 x float> %shuf, <16 x float> zeroinitializer
@@ -760,10 +757,9 @@ define <16 x float> @test_masked_z_16xfloat_perm_imm_mask1(<16 x float> %vec) {
define <16 x float> @test_masked_16xfloat_perm_mask2(<16 x float> %vec, <16 x float> %vec2) {
; CHECK-LABEL: test_masked_16xfloat_perm_mask2:
; CHECK: # BB#0:
-; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [1,2,0,0,5,4,6,5,11,10,9,9,14,13,14,12]
; CHECK-NEXT: movw $28987, %ax # imm = 0x713B
; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vpermps %zmm0, %zmm2, %zmm1 {%k1}
+; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} = zmm0[1,2,0,0,5,4,6,5,11,10,9,9,14,13,14,12]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 2, i32 0, i32 0, i32 5, i32 4, i32 6, i32 5, i32 11, i32 10, i32 9, i32 9, i32 14, i32 13, i32 14, i32 12>
@@ -774,10 +770,9 @@ define <16 x float> @test_masked_16xfloat_perm_mask2(<16 x float> %vec, <16 x fl
define <16 x float> @test_masked_z_16xfloat_perm_mask2(<16 x float> %vec) {
; CHECK-LABEL: test_masked_z_16xfloat_perm_mask2:
; CHECK: # BB#0:
-; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [1,2,0,0,5,4,6,5,11,10,9,9,14,13,14,12]
; CHECK-NEXT: movw $28987, %ax # imm = 0x713B
; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0 {%k1} {z}
+; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[1,2,0,0,5,4,6,5,11,10,9,9,14,13,14,12]
; CHECK-NEXT: retq
%shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 2, i32 0, i32 0, i32 5, i32 4, i32 6, i32 5, i32 11, i32 10, i32 9, i32 9, i32 14, i32 13, i32 14, i32 12>
%res = select <16 x i1> <i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0>, <16 x float> %shuf, <16 x float> zeroinitializer
@@ -818,10 +813,9 @@ define <16 x float> @test_masked_z_16xfloat_perm_imm_mask3(<16 x float> %vec) {
define <16 x float> @test_masked_16xfloat_perm_mask4(<16 x float> %vec, <16 x float> %vec2) {
; CHECK-LABEL: test_masked_16xfloat_perm_mask4:
; CHECK: # BB#0:
-; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [1,2,3,3,5,5,5,7,11,11,8,11,14,12,14,15]
; CHECK-NEXT: movw $30908, %ax # imm = 0x78BC
; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vpermps %zmm0, %zmm2, %zmm1 {%k1}
+; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} = zmm0[1,2,3,3,5,5,5,7,11,11,8,11,14,12,14,15]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 2, i32 3, i32 3, i32 5, i32 5, i32 5, i32 7, i32 11, i32 11, i32 8, i32 11, i32 14, i32 12, i32 14, i32 15>
@@ -832,10 +826,9 @@ define <16 x float> @test_masked_16xfloat_perm_mask4(<16 x float> %vec, <16 x fl
define <16 x float> @test_masked_z_16xfloat_perm_mask4(<16 x float> %vec) {
; CHECK-LABEL: test_masked_z_16xfloat_perm_mask4:
; CHECK: # BB#0:
-; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [1,2,3,3,5,5,5,7,11,11,8,11,14,12,14,15]
; CHECK-NEXT: movw $30908, %ax # imm = 0x78BC
; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0 {%k1} {z}
+; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[1,2,3,3,5,5,5,7,11,11,8,11,14,12,14,15]
; CHECK-NEXT: retq
%shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 2, i32 3, i32 3, i32 5, i32 5, i32 5, i32 7, i32 11, i32 11, i32 8, i32 11, i32 14, i32 12, i32 14, i32 15>
%res = select <16 x i1> <i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0>, <16 x float> %shuf, <16 x float> zeroinitializer
@@ -868,8 +861,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_imm_mask5(<16 x float> %vec) {
define <16 x float> @test_16xfloat_perm_mask6(<16 x float> %vec) {
; CHECK-LABEL: test_16xfloat_perm_mask6:
; CHECK: # BB#0:
-; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [2,0,3,2,4,4,6,7,9,11,8,11,13,12,13,13]
-; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[2,0,3,2,4,4,6,7,9,11,8,11,13,12,13,13]
; CHECK-NEXT: retq
%res = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 2, i32 0, i32 3, i32 2, i32 4, i32 4, i32 6, i32 7, i32 9, i32 11, i32 8, i32 11, i32 13, i32 12, i32 13, i32 13>
ret <16 x float> %res
@@ -877,10 +869,9 @@ define <16 x float> @test_16xfloat_perm_mask6(<16 x float> %vec) {
define <16 x float> @test_masked_16xfloat_perm_mask6(<16 x float> %vec, <16 x float> %vec2) {
; CHECK-LABEL: test_masked_16xfloat_perm_mask6:
; CHECK: # BB#0:
-; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [2,0,3,2,4,4,6,7,9,11,8,11,13,12,13,13]
; CHECK-NEXT: movw $-28239, %ax # imm = 0x91B1
; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vpermps %zmm0, %zmm2, %zmm1 {%k1}
+; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} = zmm0[2,0,3,2,4,4,6,7,9,11,8,11,13,12,13,13]
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 2, i32 0, i32 3, i32 2, i32 4, i32 4, i32 6, i32 7, i32 9, i32 11, i32 8, i32 11, i32 13, i32 12, i32 13, i32 13>
@@ -891,10 +882,9 @@ define <16 x float> @test_masked_16xfloat_perm_mask6(<16 x float> %vec, <16 x fl
define <16 x float> @test_masked_z_16xfloat_perm_mask6(<16 x float> %vec) {
; CHECK-LABEL: test_masked_z_16xfloat_perm_mask6:
; CHECK: # BB#0:
-; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [2,0,3,2,4,4,6,7,9,11,8,11,13,12,13,13]
; CHECK-NEXT: movw $-28239, %ax # imm = 0x91B1
; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0 {%k1} {z}
+; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[2,0,3,2,4,4,6,7,9,11,8,11,13,12,13,13]
; CHECK-NEXT: retq
%shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 2, i32 0, i32 3, i32 2, i32 4, i32 4, i32 6, i32 7, i32 9, i32 11, i32 8, i32 11, i32 13, i32 12, i32 13, i32 13>
%res = select <16 x i1> <i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1>, <16 x float> %shuf, <16 x float> zeroinitializer
@@ -927,8 +917,8 @@ define <16 x float> @test_masked_z_16xfloat_perm_imm_mask7(<16 x float> %vec) {
define <16 x float> @test_16xfloat_perm_mem_mask0(<16 x float>* %vp) {
; CHECK-LABEL: test_16xfloat_perm_mem_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [3,3,3,0,6,6,6,6,11,10,9,10,12,14,12,12]
-; CHECK-NEXT: vpermps (%rdi), %zmm0, %zmm0
+; CHECK-NEXT: vmovaps (%rdi), %zmm0
+; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,3,3,0,6,6,6,6,11,10,9,10,12,14,12,12]
; CHECK-NEXT: retq
%vec = load <16 x float>, <16 x float>* %vp
%res = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 0, i32 6, i32 6, i32 6, i32 6, i32 11, i32 10, i32 9, i32 10, i32 12, i32 14, i32 12, i32 12>
@@ -937,10 +927,10 @@ define <16 x float> @test_16xfloat_perm_mem_mask0(<16 x float>* %vp) {
define <16 x float> @test_masked_16xfloat_perm_mem_mask0(<16 x float>* %vp, <16 x float> %vec2) {
; CHECK-LABEL: test_masked_16xfloat_perm_mem_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [3,3,3,0,6,6,6,6,11,10,9,10,12,14,12,12]
+; CHECK-NEXT: vmovaps (%rdi), %zmm1
; CHECK-NEXT: movw $-22887, %ax # imm = 0xA699
; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = zmm1[3,3,3,0,6,6,6,6,11,10,9,10,12,14,12,12]
; CHECK-NEXT: retq
%vec = load <16 x float>, <16 x float>* %vp
%shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 0, i32 6, i32 6, i32 6, i32 6, i32 11, i32 10, i32 9, i32 10, i32 12, i32 14, i32 12, i32 12>
@@ -951,10 +941,10 @@ define <16 x float> @test_masked_16xfloat_perm_mem_mask0(<16 x float>* %vp, <16
define <16 x float> @test_masked_z_16xfloat_perm_mem_mask0(<16 x float>* %vp) {
; CHECK-LABEL: test_masked_z_16xfloat_perm_mem_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [3,3,3,0,6,6,6,6,11,10,9,10,12,14,12,12]
+; CHECK-NEXT: vmovaps (%rdi), %zmm0
; CHECK-NEXT: movw $-22887, %ax # imm = 0xA699
; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vpermps (%rdi), %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,3,0,6,6,6,6,11,10,9,10,12,14,12,12]
; CHECK-NEXT: retq
%vec = load <16 x float>, <16 x float>* %vp
%shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 0, i32 6, i32 6, i32 6, i32 6, i32 11, i32 10, i32 9, i32 10, i32 12, i32 14, i32 12, i32 12>
@@ -991,10 +981,10 @@ define <16 x float> @test_masked_z_16xfloat_perm_imm_mem_mask1(<16 x float>* %vp
define <16 x float> @test_masked_16xfloat_perm_mem_mask2(<16 x float>* %vp, <16 x float> %vec2) {
; CHECK-LABEL: test_masked_16xfloat_perm_mem_mask2:
; CHECK: # BB#0:
-; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [2,0,0,3,5,5,6,5,9,8,8,8,14,12,13,13]
+; CHECK-NEXT: vmovaps (%rdi), %zmm1
; CHECK-NEXT: movw $-8399, %ax # imm = 0xDF31
; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = zmm1[2,0,0,3,5,5,6,5,9,8,8,8,14,12,13,13]
; CHECK-NEXT: retq
%vec = load <16 x float>, <16 x float>* %vp
%shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 2, i32 0, i32 0, i32 3, i32 5, i32 5, i32 6, i32 5, i32 9, i32 8, i32 8, i32 8, i32 14, i32 12, i32 13, i32 13>
@@ -1005,10 +995,10 @@ define <16 x float> @test_masked_16xfloat_perm_mem_mask2(<16 x float>* %vp, <16
define <16 x float> @test_masked_z_16xfloat_perm_mem_mask2(<16 x float>* %vp) {
; CHECK-LABEL: test_masked_z_16xfloat_perm_mem_mask2:
; CHECK: # BB#0:
-; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [2,0,0,3,5,5,6,5,9,8,8,8,14,12,13,13]
+; CHECK-NEXT: vmovaps (%rdi), %zmm0
; CHECK-NEXT: movw $-8399, %ax # imm = 0xDF31
; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vpermps (%rdi), %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[2,0,0,3,5,5,6,5,9,8,8,8,14,12,13,13]
; CHECK-NEXT: retq
%vec = load <16 x float>, <16 x float>* %vp
%shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 2, i32 0, i32 0, i32 3, i32 5, i32 5, i32 6, i32 5, i32 9, i32 8, i32 8, i32 8, i32 14, i32 12, i32 13, i32 13>
@@ -1054,10 +1044,10 @@ define <16 x float> @test_masked_z_16xfloat_perm_imm_mem_mask3(<16 x float>* %vp
define <16 x float> @test_masked_16xfloat_perm_mem_mask4(<16 x float>* %vp, <16 x float> %vec2) {
; CHECK-LABEL: test_masked_16xfloat_perm_mem_mask4:
; CHECK: # BB#0:
-; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [3,3,1,1,6,5,5,6,11,11,10,9,15,14,12,12]
+; CHECK-NEXT: vmovaps (%rdi), %zmm1
; CHECK-NEXT: movw $1218, %ax # imm = 0x4C2
; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = zmm1[3,3,1,1,6,5,5,6,11,11,10,9,15,14,12,12]
; CHECK-NEXT: retq
%vec = load <16 x float>, <16 x float>* %vp
%shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 3, i32 3, i32 1, i32 1, i32 6, i32 5, i32 5, i32 6, i32 11, i32 11, i32 10, i32 9, i32 15, i32 14, i32 12, i32 12>
@@ -1068,10 +1058,10 @@ define <16 x float> @test_masked_16xfloat_perm_mem_mask4(<16 x float>* %vp, <16
define <16 x float> @test_masked_z_16xfloat_perm_mem_mask4(<16 x float>* %vp) {
; CHECK-LABEL: test_masked_z_16xfloat_perm_mem_mask4:
; CHECK: # BB#0:
-; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [3,3,1,1,6,5,5,6,11,11,10,9,15,14,12,12]
+; CHECK-NEXT: vmovaps (%rdi), %zmm0
; CHECK-NEXT: movw $1218, %ax # imm = 0x4C2
; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vpermps (%rdi), %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,1,1,6,5,5,6,11,11,10,9,15,14,12,12]
; CHECK-NEXT: retq
%vec = load <16 x float>, <16 x float>* %vp
%shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 3, i32 3, i32 1, i32 1, i32 6, i32 5, i32 5, i32 6, i32 11, i32 11, i32 10, i32 9, i32 15, i32 14, i32 12, i32 12>
@@ -1108,8 +1098,8 @@ define <16 x float> @test_masked_z_16xfloat_perm_imm_mem_mask5(<16 x float>* %vp
define <16 x float> @test_16xfloat_perm_mem_mask6(<16 x float>* %vp) {
; CHECK-LABEL: test_16xfloat_perm_mem_mask6:
; CHECK: # BB#0:
-; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [2,1,1,2,6,5,5,7,9,11,9,9,12,15,14,15]
-; CHECK-NEXT: vpermps (%rdi), %zmm0, %zmm0
+; CHECK-NEXT: vmovaps (%rdi), %zmm0
+; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[2,1,1,2,6,5,5,7,9,11,9,9,12,15,14,15]
; CHECK-NEXT: retq
%vec = load <16 x float>, <16 x float>* %vp
%res = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 2, i32 1, i32 1, i32 2, i32 6, i32 5, i32 5, i32 7, i32 9, i32 11, i32 9, i32 9, i32 12, i32 15, i32 14, i32 15>
@@ -1118,10 +1108,10 @@ define <16 x float> @test_16xfloat_perm_mem_mask6(<16 x float>* %vp) {
define <16 x float> @test_masked_16xfloat_perm_mem_mask6(<16 x float>* %vp, <16 x float> %vec2) {
; CHECK-LABEL: test_masked_16xfloat_perm_mem_mask6:
; CHECK: # BB#0:
-; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [2,1,1,2,6,5,5,7,9,11,9,9,12,15,14,15]
+; CHECK-NEXT: vmovaps (%rdi), %zmm1
; CHECK-NEXT: movw $-20907, %ax # imm = 0xAE55
; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = zmm1[2,1,1,2,6,5,5,7,9,11,9,9,12,15,14,15]
; CHECK-NEXT: retq
%vec = load <16 x float>, <16 x float>* %vp
%shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 2, i32 1, i32 1, i32 2, i32 6, i32 5, i32 5, i32 7, i32 9, i32 11, i32 9, i32 9, i32 12, i32 15, i32 14, i32 15>
@@ -1132,10 +1122,10 @@ define <16 x float> @test_masked_16xfloat_perm_mem_mask6(<16 x float>* %vp, <16
define <16 x float> @test_masked_z_16xfloat_perm_mem_mask6(<16 x float>* %vp) {
; CHECK-LABEL: test_masked_z_16xfloat_perm_mem_mask6:
; CHECK: # BB#0:
-; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [2,1,1,2,6,5,5,7,9,11,9,9,12,15,14,15]
+; CHECK-NEXT: vmovaps (%rdi), %zmm0
; CHECK-NEXT: movw $-20907, %ax # imm = 0xAE55
; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: vpermps (%rdi), %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1,1,2,6,5,5,7,9,11,9,9,12,15,14,15]
; CHECK-NEXT: retq
%vec = load <16 x float>, <16 x float>* %vp
%shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 2, i32 1, i32 1, i32 2, i32 6, i32 5, i32 5, i32 7, i32 9, i32 11, i32 9, i32 9, i32 12, i32 15, i32 14, i32 15>
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll
index 5d6178e0d92..f378cc9a866 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll
@@ -67,8 +67,7 @@ define <16 x float> @shuffle_v16f32_vunpcklps_swap(<16 x float> %a, <16 x float>
define <16 x float> @shuffle_v16f32_01_01_03_00_06_04_05_07_08_08_09_09_15_14_14_12(<16 x float> %a0) {
; ALL-LABEL: shuffle_v16f32_01_01_03_00_06_04_05_07_08_08_09_09_15_14_14_12:
; ALL: # BB#0:
-; ALL-NEXT: vmovaps {{.*#+}} zmm1 = [1,1,3,0,6,4,5,7,8,8,9,9,15,14,14,12]
-; ALL-NEXT: vpermps %zmm0, %zmm1, %zmm0
+; ALL-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[1,1,3,0,6,4,5,7,8,8,9,9,15,14,14,12]
; ALL-NEXT: retq
%shuffle = shufflevector <16 x float> %a0, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 0, i32 6, i32 4, i32 5, i32 7, i32 8, i32 8, i32 9, i32 9, i32 15, i32 14, i32 14, i32 12>
ret <16 x float> %shuffle
OpenPOWER on IntegriCloud