diff options
Diffstat (limited to 'llvm/test/CodeGen/X86/avx512-shuffle-schedule.ll')
| -rwxr-xr-x | llvm/test/CodeGen/X86/avx512-shuffle-schedule.ll | 1376 |
1 files changed, 688 insertions, 688 deletions
diff --git a/llvm/test/CodeGen/X86/avx512-shuffle-schedule.ll b/llvm/test/CodeGen/X86/avx512-shuffle-schedule.ll index 325dad9cb9e..d1af9d44608 100755 --- a/llvm/test/CodeGen/X86/avx512-shuffle-schedule.ll +++ b/llvm/test/CodeGen/X86/avx512-shuffle-schedule.ll @@ -23,7 +23,7 @@ define <16 x i16> @test_masked_16xi16_perm_mask0(<16 x i16> %vec, <16 x i16> %ve ; GENERIC-LABEL: test_masked_16xi16_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -45,7 +45,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_mask0(<16 x i16> %vec, <16 x i16> % ; GENERIC-LABEL: test_masked_z_16xi16_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -64,7 +64,7 @@ define <16 x i16> @test_masked_16xi16_perm_mask1(<16 x i16> %vec, <16 x i16> %ve ; GENERIC-LABEL: test_masked_16xi16_perm_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -86,7 +86,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_mask1(<16 x i16> %vec, <16 x i16> % ; GENERIC-LABEL: test_masked_z_16xi16_perm_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -105,7 +105,7 @@ define <16 x i16> @test_masked_16xi16_perm_mask2(<16 x i16> %vec, <16 x i16> %ve ; GENERIC-LABEL: test_masked_16xi16_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -127,7 +127,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_mask2(<16 x i16> %vec, <16 x i16> % ; GENERIC-LABEL: test_masked_z_16xi16_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -161,7 +161,7 @@ define <16 x i16> @test_masked_16xi16_perm_mask3(<16 x i16> %vec, <16 x i16> %ve ; GENERIC-LABEL: test_masked_16xi16_perm_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -183,7 +183,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_mask3(<16 x i16> %vec, <16 x i16> % ; GENERIC-LABEL: test_masked_z_16xi16_perm_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -218,7 +218,7 @@ define <16 x i16> @test_masked_16xi16_perm_mem_mask0(<16 x i16>* %vp, <16 x i16> ; GENERIC-LABEL: test_masked_16xi16_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -239,7 +239,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_mem_mask0(<16 x i16>* %vp, <16 x i1 ; GENERIC-LABEL: test_masked_z_16xi16_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -260,7 +260,7 @@ define <16 x i16> @test_masked_16xi16_perm_mem_mask1(<16 x i16>* %vp, <16 x i16> ; GENERIC-LABEL: test_masked_16xi16_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -281,7 +281,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_mem_mask1(<16 x i16>* %vp, <16 x i1 ; GENERIC-LABEL: test_masked_z_16xi16_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -302,7 +302,7 @@ define <16 x i16> @test_masked_16xi16_perm_mem_mask2(<16 x i16>* %vp, <16 x i16> ; GENERIC-LABEL: test_masked_16xi16_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -323,7 +323,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_mem_mask2(<16 x i16>* %vp, <16 x i1 ; GENERIC-LABEL: test_masked_z_16xi16_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -360,7 +360,7 @@ define <16 x i16> @test_masked_16xi16_perm_mem_mask3(<16 x i16>* %vp, <16 x i16> ; GENERIC-LABEL: test_masked_16xi16_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -381,7 +381,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_mem_mask3(<16 x i16>* %vp, <16 x i1 ; GENERIC-LABEL: test_masked_z_16xi16_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -417,7 +417,7 @@ define <32 x i16> @test_masked_32xi16_perm_mask0(<32 x i16> %vec, <32 x i16> %ve ; GENERIC-LABEL: test_masked_32xi16_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] sched: [6:0.50] -; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -439,7 +439,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_mask0(<32 x i16> %vec, <32 x i16> % ; GENERIC-LABEL: test_masked_z_32xi16_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] sched: [6:0.50] -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -458,7 +458,7 @@ define <32 x i16> @test_masked_32xi16_perm_mask1(<32 x i16> %vec, <32 x i16> %ve ; GENERIC-LABEL: test_masked_32xi16_perm_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [1,8,7,30,11,9,11,30,20,19,22,12,13,20,0,6,10,7,20,12,28,18,13,12,22,13,21,1,14,8,5,16] sched: [6:0.50] -; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -480,7 +480,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_mask1(<32 x i16> %vec, <32 x i16> % ; GENERIC-LABEL: test_masked_z_32xi16_perm_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,8,7,30,11,9,11,30,20,19,22,12,13,20,0,6,10,7,20,12,28,18,13,12,22,13,21,1,14,8,5,16] sched: [6:0.50] -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -499,7 +499,7 @@ define <32 x i16> @test_masked_32xi16_perm_mask2(<32 x i16> %vec, <32 x i16> %ve ; GENERIC-LABEL: test_masked_32xi16_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [15,17,24,28,15,9,14,25,28,25,6,31,20,2,23,31,12,21,10,6,22,0,26,16,3,3,20,27,8,31,3,27] sched: [6:0.50] -; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -521,7 +521,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_mask2(<32 x i16> %vec, <32 x i16> % ; GENERIC-LABEL: test_masked_z_32xi16_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,17,24,28,15,9,14,25,28,25,6,31,20,2,23,31,12,21,10,6,22,0,26,16,3,3,20,27,8,31,3,27] sched: [6:0.50] -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -555,7 +555,7 @@ define <32 x i16> @test_masked_32xi16_perm_mask3(<32 x i16> %vec, <32 x i16> %ve ; GENERIC-LABEL: test_masked_32xi16_perm_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] sched: [6:0.50] -; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -577,7 +577,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_mask3(<32 x i16> %vec, <32 x i16> % ; GENERIC-LABEL: test_masked_z_32xi16_perm_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] sched: [6:0.50] -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -612,7 +612,7 @@ define <32 x i16> @test_masked_32xi16_perm_mem_mask0(<32 x i16>* %vp, <32 x i16> ; GENERIC-LABEL: test_masked_32xi16_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [6:0.50] -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -633,7 +633,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_mem_mask0(<32 x i16>* %vp, <32 x i1 ; GENERIC-LABEL: test_masked_z_32xi16_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [6:0.50] -; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -654,7 +654,7 @@ define <32 x i16> @test_masked_32xi16_perm_mem_mask1(<32 x i16>* %vp, <32 x i16> ; GENERIC-LABEL: test_masked_32xi16_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [31,20,2,2,23,1,0,12,16,14,15,18,21,13,11,31,8,24,13,11,2,27,22,28,14,21,3,12,6,1,30,6] sched: [6:0.50] -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -675,7 +675,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_mem_mask1(<32 x i16>* %vp, <32 x i1 ; GENERIC-LABEL: test_masked_z_32xi16_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [31,20,2,2,23,1,0,12,16,14,15,18,21,13,11,31,8,24,13,11,2,27,22,28,14,21,3,12,6,1,30,6] sched: [6:0.50] -; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -696,7 +696,7 @@ define <32 x i16> @test_masked_32xi16_perm_mem_mask2(<32 x i16>* %vp, <32 x i16> ; GENERIC-LABEL: test_masked_32xi16_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [4,6,12,17,4,31,31,4,12,21,28,15,29,10,15,15,21,6,19,7,10,30,28,26,1,4,8,25,26,18,22,25] sched: [6:0.50] -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -717,7 +717,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_mem_mask2(<32 x i16>* %vp, <32 x i1 ; GENERIC-LABEL: test_masked_z_32xi16_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [4,6,12,17,4,31,31,4,12,21,28,15,29,10,15,15,21,6,19,7,10,30,28,26,1,4,8,25,26,18,22,25] sched: [6:0.50] -; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -754,7 +754,7 @@ define <32 x i16> @test_masked_32xi16_perm_mem_mask3(<32 x i16>* %vp, <32 x i16> ; GENERIC-LABEL: test_masked_32xi16_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [6:0.50] -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -775,7 +775,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_mem_mask3(<32 x i16>* %vp, <32 x i1 ; GENERIC-LABEL: test_masked_z_32xi16_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [6:0.50] -; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -811,7 +811,7 @@ define <8 x i32> @test_masked_8xi32_perm_mask0(<8 x i32> %vec, <8 x i32> %vec2, ; GENERIC-LABEL: test_masked_8xi32_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [4,2,0,6,7,2,3,6] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -833,7 +833,7 @@ define <8 x i32> @test_masked_z_8xi32_perm_mask0(<8 x i32> %vec, <8 x i32> %mask ; GENERIC-LABEL: test_masked_z_8xi32_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [4,2,0,6,7,2,3,6] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -852,7 +852,7 @@ define <8 x i32> @test_masked_8xi32_perm_mask1(<8 x i32> %vec, <8 x i32> %vec2, ; GENERIC-LABEL: test_masked_8xi32_perm_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [0,5,1,2,6,0,0,3] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -874,7 +874,7 @@ define <8 x i32> @test_masked_z_8xi32_perm_mask1(<8 x i32> %vec, <8 x i32> %mask ; GENERIC-LABEL: test_masked_z_8xi32_perm_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [0,5,1,2,6,0,0,3] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -893,7 +893,7 @@ define <8 x i32> @test_masked_8xi32_perm_mask2(<8 x i32> %vec, <8 x i32> %vec2, ; GENERIC-LABEL: test_masked_8xi32_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [3,6,5,5,1,7,3,4] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -915,7 +915,7 @@ define <8 x i32> @test_masked_z_8xi32_perm_mask2(<8 x i32> %vec, <8 x i32> %mask ; GENERIC-LABEL: test_masked_z_8xi32_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [3,6,5,5,1,7,3,4] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -949,7 +949,7 @@ define <8 x i32> @test_masked_8xi32_perm_mask3(<8 x i32> %vec, <8 x i32> %vec2, ; GENERIC-LABEL: test_masked_8xi32_perm_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [3,0,3,1,0,4,5,0] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -971,7 +971,7 @@ define <8 x i32> @test_masked_z_8xi32_perm_mask3(<8 x i32> %vec, <8 x i32> %mask ; GENERIC-LABEL: test_masked_z_8xi32_perm_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [3,0,3,1,0,4,5,0] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1006,7 +1006,7 @@ define <8 x i32> @test_masked_8xi32_perm_mem_mask0(<8 x i32>* %vp, <8 x i32> %ve ; GENERIC-LABEL: test_masked_8xi32_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [3,7,4,3,5,2,0,5] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1027,7 +1027,7 @@ define <8 x i32> @test_masked_z_8xi32_perm_mem_mask0(<8 x i32>* %vp, <8 x i32> % ; GENERIC-LABEL: test_masked_z_8xi32_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [3,7,4,3,5,2,0,5] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1048,7 +1048,7 @@ define <8 x i32> @test_masked_8xi32_perm_mem_mask1(<8 x i32>* %vp, <8 x i32> %ve ; GENERIC-LABEL: test_masked_8xi32_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [4,6,1,7,6,7,6,5] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1069,7 +1069,7 @@ define <8 x i32> @test_masked_z_8xi32_perm_mem_mask1(<8 x i32>* %vp, <8 x i32> % ; GENERIC-LABEL: test_masked_z_8xi32_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [4,6,1,7,6,7,6,5] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1090,7 +1090,7 @@ define <8 x i32> @test_masked_8xi32_perm_mem_mask2(<8 x i32>* %vp, <8 x i32> %ve ; GENERIC-LABEL: test_masked_8xi32_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [6,4,6,1,6,3,6,3] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1111,7 +1111,7 @@ define <8 x i32> @test_masked_z_8xi32_perm_mem_mask2(<8 x i32>* %vp, <8 x i32> % ; GENERIC-LABEL: test_masked_z_8xi32_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [6,4,6,1,6,3,6,3] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1148,7 +1148,7 @@ define <8 x i32> @test_masked_8xi32_perm_mem_mask3(<8 x i32>* %vp, <8 x i32> %ve ; GENERIC-LABEL: test_masked_8xi32_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [6,0,0,7,3,7,7,5] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1169,7 +1169,7 @@ define <8 x i32> @test_masked_z_8xi32_perm_mem_mask3(<8 x i32>* %vp, <8 x i32> % ; GENERIC-LABEL: test_masked_z_8xi32_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [6,0,0,7,3,7,7,5] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1205,7 +1205,7 @@ define <16 x i32> @test_masked_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %ve ; GENERIC-LABEL: test_masked_16xi32_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -1227,7 +1227,7 @@ define <16 x i32> @test_masked_z_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> % ; GENERIC-LABEL: test_masked_z_16xi32_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1246,7 +1246,7 @@ define <16 x i32> @test_masked_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> %ve ; GENERIC-LABEL: test_masked_16xi32_perm_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -1268,7 +1268,7 @@ define <16 x i32> @test_masked_z_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> % ; GENERIC-LABEL: test_masked_z_16xi32_perm_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1287,7 +1287,7 @@ define <16 x i32> @test_masked_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> %ve ; GENERIC-LABEL: test_masked_16xi32_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -1309,7 +1309,7 @@ define <16 x i32> @test_masked_z_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> % ; GENERIC-LABEL: test_masked_z_16xi32_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1343,7 +1343,7 @@ define <16 x i32> @test_masked_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> %ve ; GENERIC-LABEL: test_masked_16xi32_perm_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -1365,7 +1365,7 @@ define <16 x i32> @test_masked_z_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> % ; GENERIC-LABEL: test_masked_z_16xi32_perm_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1400,7 +1400,7 @@ define <16 x i32> @test_masked_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i32> ; GENERIC-LABEL: test_masked_16xi32_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1421,7 +1421,7 @@ define <16 x i32> @test_masked_z_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i3 ; GENERIC-LABEL: test_masked_z_16xi32_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1442,7 +1442,7 @@ define <16 x i32> @test_masked_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i32> ; GENERIC-LABEL: test_masked_16xi32_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1463,7 +1463,7 @@ define <16 x i32> @test_masked_z_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i3 ; GENERIC-LABEL: test_masked_z_16xi32_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1484,7 +1484,7 @@ define <16 x i32> @test_masked_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i32> ; GENERIC-LABEL: test_masked_16xi32_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1505,7 +1505,7 @@ define <16 x i32> @test_masked_z_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i3 ; GENERIC-LABEL: test_masked_z_16xi32_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1542,7 +1542,7 @@ define <16 x i32> @test_masked_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i32> ; GENERIC-LABEL: test_masked_16xi32_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1563,7 +1563,7 @@ define <16 x i32> @test_masked_z_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i3 ; GENERIC-LABEL: test_masked_z_16xi32_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1596,7 +1596,7 @@ define <4 x i64> @test_4xi64_perm_mask0(<4 x i64> %vec) { define <4 x i64> @test_masked_4xi64_perm_mask0(<4 x i64> %vec, <4 x i64> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_4xi64_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,0,3,1] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -1616,7 +1616,7 @@ define <4 x i64> @test_masked_4xi64_perm_mask0(<4 x i64> %vec, <4 x i64> %vec2, define <4 x i64> @test_masked_z_4xi64_perm_mask0(<4 x i64> %vec, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_4xi64_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,0,3,1] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1633,7 +1633,7 @@ define <4 x i64> @test_masked_z_4xi64_perm_mask0(<4 x i64> %vec, <4 x i64> %mask define <4 x i64> @test_masked_4xi64_perm_mask1(<4 x i64> %vec, <4 x i64> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_4xi64_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[1,2,0,3] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -1653,7 +1653,7 @@ define <4 x i64> @test_masked_4xi64_perm_mask1(<4 x i64> %vec, <4 x i64> %vec2, define <4 x i64> @test_masked_z_4xi64_perm_mask1(<4 x i64> %vec, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_4xi64_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,0,3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1670,7 +1670,7 @@ define <4 x i64> @test_masked_z_4xi64_perm_mask1(<4 x i64> %vec, <4 x i64> %mask define <4 x i64> @test_masked_4xi64_perm_mask2(<4 x i64> %vec, <4 x i64> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_4xi64_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,2,2,1] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -1690,7 +1690,7 @@ define <4 x i64> @test_masked_4xi64_perm_mask2(<4 x i64> %vec, <4 x i64> %vec2, define <4 x i64> @test_masked_z_4xi64_perm_mask2(<4 x i64> %vec, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_4xi64_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,2,2,1] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1720,7 +1720,7 @@ define <4 x i64> @test_4xi64_perm_mask3(<4 x i64> %vec) { define <4 x i64> @test_masked_4xi64_perm_mask3(<4 x i64> %vec, <4 x i64> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_4xi64_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,1,3,3] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -1740,7 +1740,7 @@ define <4 x i64> @test_masked_4xi64_perm_mask3(<4 x i64> %vec, <4 x i64> %vec2, define <4 x i64> @test_masked_z_4xi64_perm_mask3(<4 x i64> %vec, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_4xi64_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,3,3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1771,7 +1771,7 @@ define <4 x i64> @test_4xi64_perm_mem_mask0(<4 x i64>* %vp) { define <4 x i64> @test_masked_4xi64_perm_mem_mask0(<4 x i64>* %vp, <4 x i64> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_4xi64_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,2,0] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1790,7 +1790,7 @@ define <4 x i64> @test_masked_4xi64_perm_mem_mask0(<4 x i64>* %vp, <4 x i64> %ve define <4 x i64> @test_masked_z_4xi64_perm_mem_mask0(<4 x i64>* %vp, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_4xi64_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,2,0] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1809,7 +1809,7 @@ define <4 x i64> @test_masked_z_4xi64_perm_mem_mask0(<4 x i64>* %vp, <4 x i64> % define <4 x i64> @test_masked_4xi64_perm_mem_mask1(<4 x i64>* %vp, <4 x i64> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_4xi64_perm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,1,1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1828,7 +1828,7 @@ define <4 x i64> @test_masked_4xi64_perm_mem_mask1(<4 x i64>* %vp, <4 x i64> %ve define <4 x i64> @test_masked_z_4xi64_perm_mem_mask1(<4 x i64>* %vp, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_4xi64_perm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,1,1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1847,7 +1847,7 @@ define <4 x i64> @test_masked_z_4xi64_perm_mem_mask1(<4 x i64>* %vp, <4 x i64> % define <4 x i64> @test_masked_4xi64_perm_mem_mask2(<4 x i64>* %vp, <4 x i64> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_4xi64_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[0,1,2,0] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1866,7 +1866,7 @@ define <4 x i64> @test_masked_4xi64_perm_mem_mask2(<4 x i64>* %vp, <4 x i64> %ve define <4 x i64> @test_masked_z_4xi64_perm_mem_mask2(<4 x i64>* %vp, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_4xi64_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,0] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1899,7 +1899,7 @@ define <4 x i64> @test_4xi64_perm_mem_mask3(<4 x i64>* %vp) { define <4 x i64> @test_masked_4xi64_perm_mem_mask3(<4 x i64>* %vp, <4 x i64> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_4xi64_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,0,1,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1918,7 +1918,7 @@ define <4 x i64> @test_masked_4xi64_perm_mem_mask3(<4 x i64>* %vp, <4 x i64> %ve define <4 x i64> @test_masked_z_4xi64_perm_mem_mask3(<4 x i64>* %vp, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_4xi64_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,0,1,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1953,7 +1953,7 @@ define <8 x i64> @test_masked_8xi64_perm_mask0(<8 x i64> %vec, <8 x i64> %vec2, ; GENERIC-LABEL: test_masked_8xi64_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,4,7,6,5,5,1,6] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -1975,7 +1975,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_mask0(<8 x i64> %vec, <8 x i64> %mask ; GENERIC-LABEL: test_masked_z_8xi64_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,4,7,6,5,5,1,6] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1993,7 +1993,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_mask0(<8 x i64> %vec, <8 x i64> %mask define <8 x i64> @test_masked_8xi64_perm_imm_mask1(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[1,0,1,1,5,4,5,5] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -2013,7 +2013,7 @@ define <8 x i64> @test_masked_8xi64_perm_imm_mask1(<8 x i64> %vec, <8 x i64> %ve define <8 x i64> @test_masked_z_8xi64_perm_imm_mask1(<8 x i64> %vec, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[1,0,1,1,5,4,5,5] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2031,7 +2031,7 @@ define <8 x i64> @test_masked_8xi64_perm_mask2(<8 x i64> %vec, <8 x i64> %vec2, ; GENERIC-LABEL: test_masked_8xi64_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [1,3,7,3,3,5,4,1] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -2053,7 +2053,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_mask2(<8 x i64> %vec, <8 x i64> %mask ; GENERIC-LABEL: test_masked_z_8xi64_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,3,7,3,3,5,4,1] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2084,7 +2084,7 @@ define <8 x i64> @test_8xi64_perm_imm_mask3(<8 x i64> %vec) { define <8 x i64> @test_masked_8xi64_perm_imm_mask3(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,1,7,5,7,5] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -2104,7 +2104,7 @@ define <8 x i64> @test_masked_8xi64_perm_imm_mask3(<8 x i64> %vec, <8 x i64> %ve define <8 x i64> @test_masked_z_8xi64_perm_imm_mask3(<8 x i64> %vec, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,1,7,5,7,5] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2122,7 +2122,7 @@ define <8 x i64> @test_masked_8xi64_perm_mask4(<8 x i64> %vec, <8 x i64> %vec2, ; GENERIC-LABEL: test_masked_8xi64_perm_mask4: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [6,3,1,1,7,4,0,3] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -2144,7 +2144,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_mask4(<8 x i64> %vec, <8 x i64> %mask ; GENERIC-LABEL: test_masked_z_8xi64_perm_mask4: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [6,3,1,1,7,4,0,3] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2162,7 +2162,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_mask4(<8 x i64> %vec, <8 x i64> %mask define <8 x i64> @test_masked_8xi64_perm_imm_mask5(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[0,0,0,0,4,4,4,4] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -2182,7 +2182,7 @@ define <8 x i64> @test_masked_8xi64_perm_imm_mask5(<8 x i64> %vec, <8 x i64> %ve define <8 x i64> @test_masked_z_8xi64_perm_imm_mask5(<8 x i64> %vec, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,0,0,4,4,4,4] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2215,7 +2215,7 @@ define <8 x i64> @test_masked_8xi64_perm_mask6(<8 x i64> %vec, <8 x i64> %vec2, ; GENERIC-LABEL: test_masked_8xi64_perm_mask6: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [5,1,4,4,5,4,2,7] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -2237,7 +2237,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_mask6(<8 x i64> %vec, <8 x i64> %mask ; GENERIC-LABEL: test_masked_z_8xi64_perm_mask6: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [5,1,4,4,5,4,2,7] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2255,7 +2255,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_mask6(<8 x i64> %vec, <8 x i64> %mask define <8 x i64> @test_masked_8xi64_perm_imm_mask7(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[3,3,3,3,7,7,7,7] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -2275,7 +2275,7 @@ define <8 x i64> @test_masked_8xi64_perm_imm_mask7(<8 x i64> %vec, <8 x i64> %ve define <8 x i64> @test_masked_z_8xi64_perm_imm_mask7(<8 x i64> %vec, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,3,3,7,7,7,7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2309,7 +2309,7 @@ define <8 x i64> @test_masked_8xi64_perm_mem_mask0(<8 x i64>* %vp, <8 x i64> %ve ; GENERIC-LABEL: test_masked_8xi64_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [5,1,6,5,7,3,7,3] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2330,7 +2330,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_mem_mask0(<8 x i64>* %vp, <8 x i64> % ; GENERIC-LABEL: test_masked_z_8xi64_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [5,1,6,5,7,3,7,3] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2350,7 +2350,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_mem_mask0(<8 x i64>* %vp, <8 x i64> % define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask1(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[1,1,1,0,5,5,5,4] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2369,7 +2369,7 @@ define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask1(<8 x i64>* %vp, <8 x i64> define <8 x i64> @test_masked_z_8xi64_perm_imm_mem_mask1(<8 x i64>* %vp, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,1,1,0,5,5,5,4] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2389,7 +2389,7 @@ define <8 x i64> @test_masked_8xi64_perm_mem_mask2(<8 x i64>* %vp, <8 x i64> %ve ; GENERIC-LABEL: test_masked_8xi64_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,2,1,4,1,1,5,5] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2410,7 +2410,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_mem_mask2(<8 x i64>* %vp, <8 x i64> % ; GENERIC-LABEL: test_masked_z_8xi64_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,2,1,4,1,1,5,5] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2444,7 +2444,7 @@ define <8 x i64> @test_8xi64_perm_imm_mem_mask3(<8 x i64>* %vp) { define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask3(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[1,3,1,1,5,7,5,5] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2463,7 +2463,7 @@ define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask3(<8 x i64>* %vp, <8 x i64> define <8 x i64> @test_masked_z_8xi64_perm_imm_mem_mask3(<8 x i64>* %vp, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,3,1,1,5,7,5,5] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2483,7 +2483,7 @@ define <8 x i64> @test_masked_8xi64_perm_mem_mask4(<8 x i64>* %vp, <8 x i64> %ve ; GENERIC-LABEL: test_masked_8xi64_perm_mem_mask4: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [5,0,7,0,3,5,0,6] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2504,7 +2504,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_mem_mask4(<8 x i64>* %vp, <8 x i64> % ; GENERIC-LABEL: test_masked_z_8xi64_perm_mem_mask4: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [5,0,7,0,3,5,0,6] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2524,7 +2524,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_mem_mask4(<8 x i64>* %vp, <8 x i64> % define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask5(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mem_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[3,1,0,0,7,5,4,4] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2543,7 +2543,7 @@ define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask5(<8 x i64>* %vp, <8 x i64> define <8 x i64> @test_masked_z_8xi64_perm_imm_mem_mask5(<8 x i64>* %vp, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mem_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,1,0,0,7,5,4,4] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2579,7 +2579,7 @@ define <8 x i64> @test_masked_8xi64_perm_mem_mask6(<8 x i64>* %vp, <8 x i64> %ve ; GENERIC-LABEL: test_masked_8xi64_perm_mem_mask6: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,6,3,7,3,0,3,6] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2600,7 +2600,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_mem_mask6(<8 x i64>* %vp, <8 x i64> % ; GENERIC-LABEL: test_masked_z_8xi64_perm_mem_mask6: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,6,3,7,3,0,3,6] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2620,7 +2620,7 @@ define <8 x i64> @test_masked_z_8xi64_perm_mem_mask6(<8 x i64>* %vp, <8 x i64> % define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask7(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mem_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[3,0,0,1,7,4,4,5] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2639,7 +2639,7 @@ define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask7(<8 x i64>* %vp, <8 x i64> define <8 x i64> @test_masked_z_8xi64_perm_imm_mem_mask7(<8 x i64>* %vp, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mem_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,0,0,1,7,4,4,5] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2674,7 +2674,7 @@ define <8 x float> @test_masked_8xfloat_perm_mask0(<8 x float> %vec, <8 x float> ; GENERIC-LABEL: test_masked_8xfloat_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm3 = [3,4,2,4,1,2,3,4] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -2696,7 +2696,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mask0(<8 x float> %vec, <8 x i32> ; GENERIC-LABEL: test_masked_z_8xfloat_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [3,4,2,4,1,2,3,4] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2715,7 +2715,7 @@ define <8 x float> @test_masked_8xfloat_perm_mask1(<8 x float> %vec, <8 x float> ; GENERIC-LABEL: test_masked_8xfloat_perm_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm3 = [4,2,1,0,6,0,5,1] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -2737,7 +2737,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mask1(<8 x float> %vec, <8 x i64> ; GENERIC-LABEL: test_masked_z_8xfloat_perm_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [4,2,1,0,6,0,5,1] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2756,7 +2756,7 @@ define <8 x float> @test_masked_8xfloat_perm_mask2(<8 x float> %vec, <8 x float> ; GENERIC-LABEL: test_masked_8xfloat_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm3 = [2,5,5,5,4,6,0,5] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -2778,7 +2778,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mask2(<8 x float> %vec, <8 x i32> ; GENERIC-LABEL: test_masked_z_8xfloat_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [2,5,5,5,4,6,0,5] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2812,7 +2812,7 @@ define <8 x float> @test_masked_8xfloat_perm_mask3(<8 x float> %vec, <8 x float> ; GENERIC-LABEL: test_masked_8xfloat_perm_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm3 = [0,5,2,5,5,5,1,6] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -2834,7 +2834,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mask3(<8 x float> %vec, <8 x i32> ; GENERIC-LABEL: test_masked_z_8xfloat_perm_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [0,5,2,5,5,5,1,6] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2869,7 +2869,7 @@ define <8 x float> @test_masked_8xfloat_perm_mem_mask0(<8 x float>* %vp, <8 x fl ; GENERIC-LABEL: test_masked_8xfloat_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [5,2,1,6,4,2,4,0] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2890,7 +2890,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mem_mask0(<8 x float>* %vp, <8 x ; GENERIC-LABEL: test_masked_z_8xfloat_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [5,2,1,6,4,2,4,0] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2911,7 +2911,7 @@ define <8 x float> @test_masked_8xfloat_perm_mem_mask1(<8 x float>* %vp, <8 x fl ; GENERIC-LABEL: test_masked_8xfloat_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [1,3,7,4,0,6,6,6] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2932,7 +2932,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mem_mask1(<8 x float>* %vp, <8 x ; GENERIC-LABEL: test_masked_z_8xfloat_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [1,3,7,4,0,6,6,6] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2953,7 +2953,7 @@ define <8 x float> @test_masked_8xfloat_perm_mem_mask2(<8 x float>* %vp, <8 x fl ; GENERIC-LABEL: test_masked_8xfloat_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [4,5,1,5,6,6,2,4] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2974,7 +2974,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mem_mask2(<8 x float>* %vp, <8 x ; GENERIC-LABEL: test_masked_z_8xfloat_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [4,5,1,5,6,6,2,4] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3011,7 +3011,7 @@ define <8 x float> @test_masked_8xfloat_perm_mem_mask3(<8 x float>* %vp, <8 x fl ; GENERIC-LABEL: test_masked_8xfloat_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [5,7,0,6,4,2,3,0] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3032,7 +3032,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mem_mask3(<8 x float>* %vp, <8 x ; GENERIC-LABEL: test_masked_z_8xfloat_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [5,7,0,6,4,2,3,0] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3068,7 +3068,7 @@ define <16 x float> @test_masked_16xfloat_perm_mask0(<16 x float> %vec, <16 x fl ; GENERIC-LABEL: test_masked_16xfloat_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm3 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -3090,7 +3090,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mask0(<16 x float> %vec, <16 x ; GENERIC-LABEL: test_masked_z_16xfloat_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3109,7 +3109,7 @@ define <16 x float> @test_masked_16xfloat_perm_mask1(<16 x float> %vec, <16 x fl ; GENERIC-LABEL: test_masked_16xfloat_perm_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm3 = [11,10,4,10,4,5,8,11,2,0,10,0,0,3,10,1] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -3131,7 +3131,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mask1(<16 x float> %vec, <16 x ; GENERIC-LABEL: test_masked_z_16xfloat_perm_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [11,10,4,10,4,5,8,11,2,0,10,0,0,3,10,1] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3150,7 +3150,7 @@ define <16 x float> @test_masked_16xfloat_perm_mask2(<16 x float> %vec, <16 x fl ; GENERIC-LABEL: test_masked_16xfloat_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm3 = [0,15,6,14,3,6,5,2,5,15,11,6,6,4,8,11] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -3172,7 +3172,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mask2(<16 x float> %vec, <16 x ; GENERIC-LABEL: test_masked_z_16xfloat_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [0,15,6,14,3,6,5,2,5,15,11,6,6,4,8,11] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3206,7 +3206,7 @@ define <16 x float> @test_masked_16xfloat_perm_mask3(<16 x float> %vec, <16 x fl ; GENERIC-LABEL: test_masked_16xfloat_perm_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm3 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -3228,7 +3228,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mask3(<16 x float> %vec, <16 x ; GENERIC-LABEL: test_masked_z_16xfloat_perm_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3263,7 +3263,7 @@ define <16 x float> @test_masked_16xfloat_perm_mem_mask0(<16 x float>* %vp, <16 ; GENERIC-LABEL: test_masked_16xfloat_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3284,7 +3284,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mem_mask0(<16 x float>* %vp, <1 ; GENERIC-LABEL: test_masked_z_16xfloat_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3305,7 +3305,7 @@ define <16 x float> @test_masked_16xfloat_perm_mem_mask1(<16 x float>* %vp, <16 ; GENERIC-LABEL: test_masked_16xfloat_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [4,2,3,5,11,6,4,7,6,4,14,8,15,12,9,4] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3326,7 +3326,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mem_mask1(<16 x float>* %vp, <1 ; GENERIC-LABEL: test_masked_z_16xfloat_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [4,2,3,5,11,6,4,7,6,4,14,8,15,12,9,4] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3347,7 +3347,7 @@ define <16 x float> @test_masked_16xfloat_perm_mem_mask2(<16 x float>* %vp, <16 ; GENERIC-LABEL: test_masked_16xfloat_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [10,7,11,6,7,0,11,0,10,9,12,4,10,3,8,5] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3368,7 +3368,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mem_mask2(<16 x float>* %vp, <1 ; GENERIC-LABEL: test_masked_z_16xfloat_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [10,7,11,6,7,0,11,0,10,9,12,4,10,3,8,5] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3405,7 +3405,7 @@ define <16 x float> @test_masked_16xfloat_perm_mem_mask3(<16 x float>* %vp, <16 ; GENERIC-LABEL: test_masked_16xfloat_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3426,7 +3426,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mem_mask3(<16 x float>* %vp, <1 ; GENERIC-LABEL: test_masked_z_16xfloat_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] sched: [6:0.50] -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3459,7 +3459,7 @@ define <4 x double> @test_4xdouble_perm_mask0(<4 x double> %vec) { define <4 x double> @test_masked_4xdouble_perm_mask0(<4 x double> %vec, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_4xdouble_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[2,1,3,2] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -3479,7 +3479,7 @@ define <4 x double> @test_masked_4xdouble_perm_mask0(<4 x double> %vec, <4 x dou define <4 x double> @test_masked_z_4xdouble_perm_mask0(<4 x double> %vec, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,3,2] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3496,7 +3496,7 @@ define <4 x double> @test_masked_z_4xdouble_perm_mask0(<4 x double> %vec, <4 x i define <4 x double> @test_masked_4xdouble_perm_mask1(<4 x double> %vec, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_4xdouble_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[3,0,0,0] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -3516,7 +3516,7 @@ define <4 x double> @test_masked_4xdouble_perm_mask1(<4 x double> %vec, <4 x dou define <4 x double> @test_masked_z_4xdouble_perm_mask1(<4 x double> %vec, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[3,0,0,0] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3533,7 +3533,7 @@ define <4 x double> @test_masked_z_4xdouble_perm_mask1(<4 x double> %vec, <4 x i define <4 x double> @test_masked_4xdouble_perm_mask2(<4 x double> %vec, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_4xdouble_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[0,3,3,1] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -3553,7 +3553,7 @@ define <4 x double> @test_masked_4xdouble_perm_mask2(<4 x double> %vec, <4 x dou define <4 x double> @test_masked_z_4xdouble_perm_mask2(<4 x double> %vec, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0,3,3,1] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3583,7 +3583,7 @@ define <4 x double> @test_4xdouble_perm_mask3(<4 x double> %vec) { define <4 x double> @test_masked_4xdouble_perm_mask3(<4 x double> %vec, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_4xdouble_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[3,3,3,2] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -3603,7 +3603,7 @@ define <4 x double> @test_masked_4xdouble_perm_mask3(<4 x double> %vec, <4 x dou define <4 x double> @test_masked_z_4xdouble_perm_mask3(<4 x double> %vec, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3,3,2] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3634,7 +3634,7 @@ define <4 x double> @test_4xdouble_perm_mem_mask0(<4 x double>* %vp) { define <4 x double> @test_masked_4xdouble_perm_mem_mask0(<4 x double>* %vp, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_4xdouble_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[0,0,2,0] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3653,7 +3653,7 @@ define <4 x double> @test_masked_4xdouble_perm_mem_mask0(<4 x double>* %vp, <4 x define <4 x double> @test_masked_z_4xdouble_perm_mem_mask0(<4 x double>* %vp, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,0] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3672,7 +3672,7 @@ define <4 x double> @test_masked_z_4xdouble_perm_mem_mask0(<4 x double>* %vp, <4 define <4 x double> @test_masked_4xdouble_perm_mem_mask1(<4 x double>* %vp, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_4xdouble_perm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[0,2,3,2] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3691,7 +3691,7 @@ define <4 x double> @test_masked_4xdouble_perm_mem_mask1(<4 x double>* %vp, <4 x define <4 x double> @test_masked_z_4xdouble_perm_mem_mask1(<4 x double>* %vp, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,2,3,2] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3710,7 +3710,7 @@ define <4 x double> @test_masked_z_4xdouble_perm_mem_mask1(<4 x double>* %vp, <4 define <4 x double> @test_masked_4xdouble_perm_mem_mask2(<4 x double>* %vp, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_4xdouble_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[3,1,1,1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3729,7 +3729,7 @@ define <4 x double> @test_masked_4xdouble_perm_mem_mask2(<4 x double>* %vp, <4 x define <4 x double> @test_masked_z_4xdouble_perm_mem_mask2(<4 x double>* %vp, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,1,1,1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3762,7 +3762,7 @@ define <4 x double> @test_4xdouble_perm_mem_mask3(<4 x double>* %vp) { define <4 x double> @test_masked_4xdouble_perm_mem_mask3(<4 x double>* %vp, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_4xdouble_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[3,2,3,2] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3781,7 +3781,7 @@ define <4 x double> @test_masked_4xdouble_perm_mem_mask3(<4 x double>* %vp, <4 x define <4 x double> @test_masked_z_4xdouble_perm_mem_mask3(<4 x double>* %vp, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,2] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3816,7 +3816,7 @@ define <8 x double> @test_masked_8xdouble_perm_mask0(<8 x double> %vec, <8 x dou ; GENERIC-LABEL: test_masked_8xdouble_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm3 = [5,7,4,2,7,4,3,4] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -3838,7 +3838,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mask0(<8 x double> %vec, <8 x i ; GENERIC-LABEL: test_masked_z_8xdouble_perm_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [5,7,4,2,7,4,3,4] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3856,7 +3856,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mask0(<8 x double> %vec, <8 x i define <8 x double> @test_masked_8xdouble_perm_imm_mask1(<8 x double> %vec, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,0,0,2,7,4,4,6] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -3876,7 +3876,7 @@ define <8 x double> @test_masked_8xdouble_perm_imm_mask1(<8 x double> %vec, <8 x define <8 x double> @test_masked_z_8xdouble_perm_imm_mask1(<8 x double> %vec, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,0,2,7,4,4,6] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3894,7 +3894,7 @@ define <8 x double> @test_masked_8xdouble_perm_mask2(<8 x double> %vec, <8 x dou ; GENERIC-LABEL: test_masked_8xdouble_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm3 = [7,5,5,5,3,5,1,7] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -3916,7 +3916,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mask2(<8 x double> %vec, <8 x i ; GENERIC-LABEL: test_masked_z_8xdouble_perm_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [7,5,5,5,3,5,1,7] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3947,7 +3947,7 @@ define <8 x double> @test_8xdouble_perm_imm_mask3(<8 x double> %vec) { define <8 x double> @test_masked_8xdouble_perm_imm_mask3(<8 x double> %vec, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[1,3,3,0,5,7,7,4] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -3967,7 +3967,7 @@ define <8 x double> @test_masked_8xdouble_perm_imm_mask3(<8 x double> %vec, <8 x define <8 x double> @test_masked_z_8xdouble_perm_imm_mask3(<8 x double> %vec, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,3,3,0,5,7,7,4] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3985,7 +3985,7 @@ define <8 x double> @test_masked_8xdouble_perm_mask4(<8 x double> %vec, <8 x dou ; GENERIC-LABEL: test_masked_8xdouble_perm_mask4: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm3 = [3,5,3,4,6,5,7,1] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -4007,7 +4007,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mask4(<8 x double> %vec, <8 x i ; GENERIC-LABEL: test_masked_z_8xdouble_perm_mask4: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [3,5,3,4,6,5,7,1] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4025,7 +4025,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mask4(<8 x double> %vec, <8 x i define <8 x double> @test_masked_8xdouble_perm_imm_mask5(<8 x double> %vec, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,3,2,3,7,7,6,7] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -4045,7 +4045,7 @@ define <8 x double> @test_masked_8xdouble_perm_imm_mask5(<8 x double> %vec, <8 x define <8 x double> @test_masked_z_8xdouble_perm_imm_mask5(<8 x double> %vec, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,2,3,7,7,6,7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4078,7 +4078,7 @@ define <8 x double> @test_masked_8xdouble_perm_mask6(<8 x double> %vec, <8 x dou ; GENERIC-LABEL: test_masked_8xdouble_perm_mask6: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm3 = [2,7,6,4,0,0,0,2] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -4100,7 +4100,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mask6(<8 x double> %vec, <8 x i ; GENERIC-LABEL: test_masked_z_8xdouble_perm_mask6: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [2,7,6,4,0,0,0,2] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4118,7 +4118,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mask6(<8 x double> %vec, <8 x i define <8 x double> @test_masked_8xdouble_perm_imm_mask7(<8 x double> %vec, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,2,7,5,7,6] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -4138,7 +4138,7 @@ define <8 x double> @test_masked_8xdouble_perm_imm_mask7(<8 x double> %vec, <8 x define <8 x double> @test_masked_z_8xdouble_perm_imm_mask7(<8 x double> %vec, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,2,7,5,7,6] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4172,7 +4172,7 @@ define <8 x double> @test_masked_8xdouble_perm_mem_mask0(<8 x double>* %vp, <8 x ; GENERIC-LABEL: test_masked_8xdouble_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [0,3,4,0,4,2,0,1] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4193,7 +4193,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mem_mask0(<8 x double>* %vp, <8 ; GENERIC-LABEL: test_masked_z_8xdouble_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm1 = [0,3,4,0,4,2,0,1] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4213,7 +4213,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mem_mask0(<8 x double>* %vp, <8 define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask1(<8 x double>* %vp, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[0,2,0,3,4,6,4,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4232,7 +4232,7 @@ define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask1(<8 x double>* %vp, define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask1(<8 x double>* %vp, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,2,0,3,4,6,4,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4252,7 +4252,7 @@ define <8 x double> @test_masked_8xdouble_perm_mem_mask2(<8 x double>* %vp, <8 x ; GENERIC-LABEL: test_masked_8xdouble_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [6,7,2,7,7,6,2,5] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4273,7 +4273,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mem_mask2(<8 x double>* %vp, <8 ; GENERIC-LABEL: test_masked_z_8xdouble_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm1 = [6,7,2,7,7,6,2,5] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4307,7 +4307,7 @@ define <8 x double> @test_8xdouble_perm_imm_mem_mask3(<8 x double>* %vp) { define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask3(<8 x double>* %vp, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[2,1,1,0,6,5,5,4] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4326,7 +4326,7 @@ define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask3(<8 x double>* %vp, define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask3(<8 x double>* %vp, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,1,1,0,6,5,5,4] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4346,7 +4346,7 @@ define <8 x double> @test_masked_8xdouble_perm_mem_mask4(<8 x double>* %vp, <8 x ; GENERIC-LABEL: test_masked_8xdouble_perm_mem_mask4: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [1,1,3,5,6,0,6,0] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4367,7 +4367,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mem_mask4(<8 x double>* %vp, <8 ; GENERIC-LABEL: test_masked_z_8xdouble_perm_mem_mask4: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm1 = [1,1,3,5,6,0,6,0] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4387,7 +4387,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mem_mask4(<8 x double>* %vp, <8 define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask5(<8 x double>* %vp, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mem_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[2,2,2,3,6,6,6,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4406,7 +4406,7 @@ define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask5(<8 x double>* %vp, define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask5(<8 x double>* %vp, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,2,2,3,6,6,6,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4442,7 +4442,7 @@ define <8 x double> @test_masked_8xdouble_perm_mem_mask6(<8 x double>* %vp, <8 x ; GENERIC-LABEL: test_masked_8xdouble_perm_mem_mask6: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [2,4,0,4,6,1,2,5] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4463,7 +4463,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mem_mask6(<8 x double>* %vp, <8 ; GENERIC-LABEL: test_masked_z_8xdouble_perm_mem_mask6: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd {{.*#+}} zmm1 = [2,4,0,4,6,1,2,5] sched: [6:0.50] -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4483,7 +4483,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mem_mask6(<8 x double>* %vp, <8 define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask7(<8 x double>* %vp, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mem_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[0,3,2,0,4,7,6,4] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4502,7 +4502,7 @@ define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask7(<8 x double>* %vp, define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask7(<8 x double>* %vp, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,3,2,0,4,7,6,4] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4534,7 +4534,7 @@ define <16 x i8> @test_16xi8_perm_mask0(<16 x i8> %vec) { define <16 x i8> @test_masked_16xi8_perm_mask0(<16 x i8> %vec, <16 x i8> %vec2, <16 x i8> %mask) { ; GENERIC-LABEL: test_masked_16xi8_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [6:0.50] ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -4554,7 +4554,7 @@ define <16 x i8> @test_masked_16xi8_perm_mask0(<16 x i8> %vec, <16 x i8> %vec2, define <16 x i8> @test_masked_z_16xi8_perm_mask0(<16 x i8> %vec, <16 x i8> %mask) { ; GENERIC-LABEL: test_masked_z_16xi8_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4571,7 +4571,7 @@ define <16 x i8> @test_masked_z_16xi8_perm_mask0(<16 x i8> %vec, <16 x i8> %mask define <16 x i8> @test_masked_16xi8_perm_mask1(<16 x i8> %vec, <16 x i8> %vec2, <16 x i8> %mask) { ; GENERIC-LABEL: test_masked_16xi8_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [6:0.50] ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -4591,7 +4591,7 @@ define <16 x i8> @test_masked_16xi8_perm_mask1(<16 x i8> %vec, <16 x i8> %vec2, define <16 x i8> @test_masked_z_16xi8_perm_mask1(<16 x i8> %vec, <16 x i8> %mask) { ; GENERIC-LABEL: test_masked_z_16xi8_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4608,7 +4608,7 @@ define <16 x i8> @test_masked_z_16xi8_perm_mask1(<16 x i8> %vec, <16 x i8> %mask define <16 x i8> @test_masked_16xi8_perm_mask2(<16 x i8> %vec, <16 x i8> %vec2, <16 x i8> %mask) { ; GENERIC-LABEL: test_masked_16xi8_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [6:0.50] ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -4628,7 +4628,7 @@ define <16 x i8> @test_masked_16xi8_perm_mask2(<16 x i8> %vec, <16 x i8> %vec2, define <16 x i8> @test_masked_z_16xi8_perm_mask2(<16 x i8> %vec, <16 x i8> %mask) { ; GENERIC-LABEL: test_masked_z_16xi8_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4658,7 +4658,7 @@ define <16 x i8> @test_16xi8_perm_mask3(<16 x i8> %vec) { define <16 x i8> @test_masked_16xi8_perm_mask3(<16 x i8> %vec, <16 x i8> %vec2, <16 x i8> %mask) { ; GENERIC-LABEL: test_masked_16xi8_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [6:0.50] ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -4678,7 +4678,7 @@ define <16 x i8> @test_masked_16xi8_perm_mask3(<16 x i8> %vec, <16 x i8> %vec2, define <16 x i8> @test_masked_z_16xi8_perm_mask3(<16 x i8> %vec, <16 x i8> %mask) { ; GENERIC-LABEL: test_masked_z_16xi8_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4712,7 +4712,7 @@ define <16 x i8> @test_masked_16xi8_perm_mem_mask0(<16 x i8>* %vp, <16 x i8> %ve ; GENERIC-LABEL: test_masked_16xi8_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %xmm2 # sched: [6:0.50] -; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4733,7 +4733,7 @@ define <16 x i8> @test_masked_z_16xi8_perm_mem_mask0(<16 x i8>* %vp, <16 x i8> % ; GENERIC-LABEL: test_masked_z_16xi8_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50] -; GENERIC-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4754,7 +4754,7 @@ define <16 x i8> @test_masked_16xi8_perm_mem_mask1(<16 x i8>* %vp, <16 x i8> %ve ; GENERIC-LABEL: test_masked_16xi8_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %xmm2 # sched: [6:0.50] -; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4775,7 +4775,7 @@ define <16 x i8> @test_masked_z_16xi8_perm_mem_mask1(<16 x i8>* %vp, <16 x i8> % ; GENERIC-LABEL: test_masked_z_16xi8_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50] -; GENERIC-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4796,7 +4796,7 @@ define <16 x i8> @test_masked_16xi8_perm_mem_mask2(<16 x i8>* %vp, <16 x i8> %ve ; GENERIC-LABEL: test_masked_16xi8_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %xmm2 # sched: [6:0.50] -; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4817,7 +4817,7 @@ define <16 x i8> @test_masked_z_16xi8_perm_mem_mask2(<16 x i8>* %vp, <16 x i8> % ; GENERIC-LABEL: test_masked_z_16xi8_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50] -; GENERIC-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4854,7 +4854,7 @@ define <16 x i8> @test_masked_16xi8_perm_mem_mask3(<16 x i8>* %vp, <16 x i8> %ve ; GENERIC-LABEL: test_masked_16xi8_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %xmm2 # sched: [6:0.50] -; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4875,7 +4875,7 @@ define <16 x i8> @test_masked_z_16xi8_perm_mem_mask3(<16 x i8>* %vp, <16 x i8> % ; GENERIC-LABEL: test_masked_z_16xi8_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50] -; GENERIC-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4908,7 +4908,7 @@ define <32 x i8> @test_32xi8_perm_mask0(<32 x i8> %vec) { define <32 x i8> @test_masked_32xi8_perm_mask0(<32 x i8> %vec, <32 x i8> %vec2, <32 x i8> %mask) { ; GENERIC-LABEL: test_masked_32xi8_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [6:0.50] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -4928,7 +4928,7 @@ define <32 x i8> @test_masked_32xi8_perm_mask0(<32 x i8> %vec, <32 x i8> %vec2, define <32 x i8> @test_masked_z_32xi8_perm_mask0(<32 x i8> %vec, <32 x i8> %mask) { ; GENERIC-LABEL: test_masked_z_32xi8_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4945,7 +4945,7 @@ define <32 x i8> @test_masked_z_32xi8_perm_mask0(<32 x i8> %vec, <32 x i8> %mask define <32 x i8> @test_masked_32xi8_perm_mask1(<32 x i8> %vec, <32 x i8> %vec2, <32 x i8> %mask) { ; GENERIC-LABEL: test_masked_32xi8_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] sched: [6:0.50] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -4965,7 +4965,7 @@ define <32 x i8> @test_masked_32xi8_perm_mask1(<32 x i8> %vec, <32 x i8> %vec2, define <32 x i8> @test_masked_z_32xi8_perm_mask1(<32 x i8> %vec, <32 x i8> %mask) { ; GENERIC-LABEL: test_masked_z_32xi8_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4982,7 +4982,7 @@ define <32 x i8> @test_masked_z_32xi8_perm_mask1(<32 x i8> %vec, <32 x i8> %mask define <32 x i8> @test_masked_32xi8_perm_mask2(<32 x i8> %vec, <32 x i8> %vec2, <32 x i8> %mask) { ; GENERIC-LABEL: test_masked_32xi8_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] sched: [6:0.50] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -5002,7 +5002,7 @@ define <32 x i8> @test_masked_32xi8_perm_mask2(<32 x i8> %vec, <32 x i8> %vec2, define <32 x i8> @test_masked_z_32xi8_perm_mask2(<32 x i8> %vec, <32 x i8> %mask) { ; GENERIC-LABEL: test_masked_z_32xi8_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5032,7 +5032,7 @@ define <32 x i8> @test_32xi8_perm_mask3(<32 x i8> %vec) { define <32 x i8> @test_masked_32xi8_perm_mask3(<32 x i8> %vec, <32 x i8> %vec2, <32 x i8> %mask) { ; GENERIC-LABEL: test_masked_32xi8_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [6:0.50] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -5052,7 +5052,7 @@ define <32 x i8> @test_masked_32xi8_perm_mask3(<32 x i8> %vec, <32 x i8> %vec2, define <32 x i8> @test_masked_z_32xi8_perm_mask3(<32 x i8> %vec, <32 x i8> %mask) { ; GENERIC-LABEL: test_masked_z_32xi8_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5086,7 +5086,7 @@ define <32 x i8> @test_masked_32xi8_perm_mem_mask0(<32 x i8>* %vp, <32 x i8> %ve ; GENERIC-LABEL: test_masked_32xi8_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %ymm2 # sched: [7:0.50] -; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5107,7 +5107,7 @@ define <32 x i8> @test_masked_z_32xi8_perm_mem_mask0(<32 x i8>* %vp, <32 x i8> % ; GENERIC-LABEL: test_masked_z_32xi8_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50] -; GENERIC-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5128,7 +5128,7 @@ define <32 x i8> @test_masked_32xi8_perm_mem_mask1(<32 x i8>* %vp, <32 x i8> %ve ; GENERIC-LABEL: test_masked_32xi8_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %ymm2 # sched: [7:0.50] -; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5149,7 +5149,7 @@ define <32 x i8> @test_masked_z_32xi8_perm_mem_mask1(<32 x i8>* %vp, <32 x i8> % ; GENERIC-LABEL: test_masked_z_32xi8_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50] -; GENERIC-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5170,7 +5170,7 @@ define <32 x i8> @test_masked_32xi8_perm_mem_mask2(<32 x i8>* %vp, <32 x i8> %ve ; GENERIC-LABEL: test_masked_32xi8_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %ymm2 # sched: [7:0.50] -; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5191,7 +5191,7 @@ define <32 x i8> @test_masked_z_32xi8_perm_mem_mask2(<32 x i8>* %vp, <32 x i8> % ; GENERIC-LABEL: test_masked_z_32xi8_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50] -; GENERIC-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5228,7 +5228,7 @@ define <32 x i8> @test_masked_32xi8_perm_mem_mask3(<32 x i8>* %vp, <32 x i8> %ve ; GENERIC-LABEL: test_masked_32xi8_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %ymm2 # sched: [7:0.50] -; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5249,7 +5249,7 @@ define <32 x i8> @test_masked_z_32xi8_perm_mem_mask3(<32 x i8>* %vp, <32 x i8> % ; GENERIC-LABEL: test_masked_z_32xi8_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50] -; GENERIC-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5282,7 +5282,7 @@ define <64 x i8> @test_64xi8_perm_mask0(<64 x i8> %vec) { define <64 x i8> @test_masked_64xi8_perm_mask0(<64 x i8> %vec, <64 x i8> %vec2, <64 x i8> %mask) { ; GENERIC-LABEL: test_masked_64xi8_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [6:0.50] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -5302,7 +5302,7 @@ define <64 x i8> @test_masked_64xi8_perm_mask0(<64 x i8> %vec, <64 x i8> %vec2, define <64 x i8> @test_masked_z_64xi8_perm_mask0(<64 x i8> %vec, <64 x i8> %mask) { ; GENERIC-LABEL: test_masked_z_64xi8_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5319,7 +5319,7 @@ define <64 x i8> @test_masked_z_64xi8_perm_mask0(<64 x i8> %vec, <64 x i8> %mask define <64 x i8> @test_masked_64xi8_perm_mask1(<64 x i8> %vec, <64 x i8> %vec2, <64 x i8> %mask) { ; GENERIC-LABEL: test_masked_64xi8_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] sched: [6:0.50] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -5339,7 +5339,7 @@ define <64 x i8> @test_masked_64xi8_perm_mask1(<64 x i8> %vec, <64 x i8> %vec2, define <64 x i8> @test_masked_z_64xi8_perm_mask1(<64 x i8> %vec, <64 x i8> %mask) { ; GENERIC-LABEL: test_masked_z_64xi8_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5356,7 +5356,7 @@ define <64 x i8> @test_masked_z_64xi8_perm_mask1(<64 x i8> %vec, <64 x i8> %mask define <64 x i8> @test_masked_64xi8_perm_mask2(<64 x i8> %vec, <64 x i8> %vec2, <64 x i8> %mask) { ; GENERIC-LABEL: test_masked_64xi8_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] sched: [6:0.50] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -5376,7 +5376,7 @@ define <64 x i8> @test_masked_64xi8_perm_mask2(<64 x i8> %vec, <64 x i8> %vec2, define <64 x i8> @test_masked_z_64xi8_perm_mask2(<64 x i8> %vec, <64 x i8> %mask) { ; GENERIC-LABEL: test_masked_z_64xi8_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5406,7 +5406,7 @@ define <64 x i8> @test_64xi8_perm_mask3(<64 x i8> %vec) { define <64 x i8> @test_masked_64xi8_perm_mask3(<64 x i8> %vec, <64 x i8> %vec2, <64 x i8> %mask) { ; GENERIC-LABEL: test_masked_64xi8_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [6:0.50] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -5426,7 +5426,7 @@ define <64 x i8> @test_masked_64xi8_perm_mask3(<64 x i8> %vec, <64 x i8> %vec2, define <64 x i8> @test_masked_z_64xi8_perm_mask3(<64 x i8> %vec, <64 x i8> %mask) { ; GENERIC-LABEL: test_masked_z_64xi8_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5460,7 +5460,7 @@ define <64 x i8> @test_masked_64xi8_perm_mem_mask0(<64 x i8>* %vp, <64 x i8> %ve ; GENERIC-LABEL: test_masked_64xi8_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [6:0.50] -; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5481,7 +5481,7 @@ define <64 x i8> @test_masked_z_64xi8_perm_mem_mask0(<64 x i8>* %vp, <64 x i8> % ; GENERIC-LABEL: test_masked_z_64xi8_perm_mem_mask0: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [6:0.50] -; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5502,7 +5502,7 @@ define <64 x i8> @test_masked_64xi8_perm_mem_mask1(<64 x i8>* %vp, <64 x i8> %ve ; GENERIC-LABEL: test_masked_64xi8_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [6:0.50] -; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5523,7 +5523,7 @@ define <64 x i8> @test_masked_z_64xi8_perm_mem_mask1(<64 x i8>* %vp, <64 x i8> % ; GENERIC-LABEL: test_masked_z_64xi8_perm_mem_mask1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [6:0.50] -; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5544,7 +5544,7 @@ define <64 x i8> @test_masked_64xi8_perm_mem_mask2(<64 x i8>* %vp, <64 x i8> %ve ; GENERIC-LABEL: test_masked_64xi8_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [6:0.50] -; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5565,7 +5565,7 @@ define <64 x i8> @test_masked_z_64xi8_perm_mem_mask2(<64 x i8>* %vp, <64 x i8> % ; GENERIC-LABEL: test_masked_z_64xi8_perm_mem_mask2: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [6:0.50] -; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5602,7 +5602,7 @@ define <64 x i8> @test_masked_64xi8_perm_mem_mask3(<64 x i8>* %vp, <64 x i8> %ve ; GENERIC-LABEL: test_masked_64xi8_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [6:0.50] -; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5623,7 +5623,7 @@ define <64 x i8> @test_masked_z_64xi8_perm_mem_mask3(<64 x i8>* %vp, <64 x i8> % ; GENERIC-LABEL: test_masked_z_64xi8_perm_mem_mask3: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [6:0.50] -; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [6:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5656,7 +5656,7 @@ define <8 x i16> @test_8xi16_perm_high_mask0(<8 x i16> %vec) { define <8 x i16> @test_masked_8xi16_perm_high_mask0(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_8xi16_perm_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,7,6] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -5676,7 +5676,7 @@ define <8 x i16> @test_masked_8xi16_perm_high_mask0(<8 x i16> %vec, <8 x i16> %v define <8 x i16> @test_masked_z_8xi16_perm_high_mask0(<8 x i16> %vec, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,6,5,7,6] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5693,7 +5693,7 @@ define <8 x i16> @test_masked_z_8xi16_perm_high_mask0(<8 x i16> %vec, <8 x i16> define <8 x i16> @test_masked_8xi16_perm_low_mask1(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_8xi16_perm_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[0,3,0,0,4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -5713,7 +5713,7 @@ define <8 x i16> @test_masked_8xi16_perm_low_mask1(<8 x i16> %vec, <8 x i16> %ve define <8 x i16> @test_masked_z_8xi16_perm_low_mask1(<8 x i16> %vec, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,3,0,0,4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5730,7 +5730,7 @@ define <8 x i16> @test_masked_z_8xi16_perm_low_mask1(<8 x i16> %vec, <8 x i16> % define <8 x i16> @test_masked_8xi16_perm_high_mask2(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_8xi16_perm_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,4,4,5] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -5750,7 +5750,7 @@ define <8 x i16> @test_masked_8xi16_perm_high_mask2(<8 x i16> %vec, <8 x i16> %v define <8 x i16> @test_masked_z_8xi16_perm_high_mask2(<8 x i16> %vec, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,5,4,4,5] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5780,7 +5780,7 @@ define <8 x i16> @test_8xi16_perm_low_mask3(<8 x i16> %vec) { define <8 x i16> @test_masked_8xi16_perm_low_mask3(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_8xi16_perm_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[2,1,1,1,4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -5800,7 +5800,7 @@ define <8 x i16> @test_masked_8xi16_perm_low_mask3(<8 x i16> %vec, <8 x i16> %ve define <8 x i16> @test_masked_z_8xi16_perm_low_mask3(<8 x i16> %vec, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[2,1,1,1,4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5817,7 +5817,7 @@ define <8 x i16> @test_masked_z_8xi16_perm_low_mask3(<8 x i16> %vec, <8 x i16> % define <8 x i16> @test_masked_8xi16_perm_high_mask4(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_8xi16_perm_high_mask4: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,5,7,6] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -5837,7 +5837,7 @@ define <8 x i16> @test_masked_8xi16_perm_high_mask4(<8 x i16> %vec, <8 x i16> %v define <8 x i16> @test_masked_z_8xi16_perm_high_mask4(<8 x i16> %vec, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mask4: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,5,5,7,6] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5854,7 +5854,7 @@ define <8 x i16> @test_masked_z_8xi16_perm_high_mask4(<8 x i16> %vec, <8 x i16> define <8 x i16> @test_masked_8xi16_perm_low_mask5(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_8xi16_perm_low_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[3,3,2,1,4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -5874,7 +5874,7 @@ define <8 x i16> @test_masked_8xi16_perm_low_mask5(<8 x i16> %vec, <8 x i16> %ve define <8 x i16> @test_masked_z_8xi16_perm_low_mask5(<8 x i16> %vec, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[3,3,2,1,4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5904,7 +5904,7 @@ define <8 x i16> @test_8xi16_perm_high_mask6(<8 x i16> %vec) { define <8 x i16> @test_masked_8xi16_perm_high_mask6(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_8xi16_perm_high_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,6,5] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -5924,7 +5924,7 @@ define <8 x i16> @test_masked_8xi16_perm_high_mask6(<8 x i16> %vec, <8 x i16> %v define <8 x i16> @test_masked_z_8xi16_perm_high_mask6(<8 x i16> %vec, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,6,5,6,5] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5941,7 +5941,7 @@ define <8 x i16> @test_masked_z_8xi16_perm_high_mask6(<8 x i16> %vec, <8 x i16> define <8 x i16> @test_masked_8xi16_perm_low_mask7(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_8xi16_perm_low_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0,4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -5961,7 +5961,7 @@ define <8 x i16> @test_masked_8xi16_perm_low_mask7(<8 x i16> %vec, <8 x i16> %ve define <8 x i16> @test_masked_z_8xi16_perm_low_mask7(<8 x i16> %vec, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0,2,0,4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5992,7 +5992,7 @@ define <8 x i16> @test_8xi16_perm_high_mem_mask0(<8 x i16>* %vp) { define <8 x i16> @test_masked_8xi16_perm_high_mem_mask0(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_8xi16_perm_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,7,4,6] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6011,7 +6011,7 @@ define <8 x i16> @test_masked_8xi16_perm_high_mem_mask0(<8 x i16>* %vp, <8 x i16 define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask0(<8 x i16>* %vp, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,7,4,6] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6030,7 +6030,7 @@ define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask0(<8 x i16>* %vp, <8 x i define <8 x i16> @test_masked_8xi16_perm_low_mem_mask1(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_8xi16_perm_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[1,3,3,2,4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6049,7 +6049,7 @@ define <8 x i16> @test_masked_8xi16_perm_low_mem_mask1(<8 x i16>* %vp, <8 x i16> define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask1(<8 x i16>* %vp, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6068,7 +6068,7 @@ define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask1(<8 x i16>* %vp, <8 x i1 define <8 x i16> @test_masked_8xi16_perm_high_mem_mask2(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_8xi16_perm_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,6,6,5,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6087,7 +6087,7 @@ define <8 x i16> @test_masked_8xi16_perm_high_mem_mask2(<8 x i16>* %vp, <8 x i16 define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask2(<8 x i16>* %vp, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,6,6,5,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6120,7 +6120,7 @@ define <8 x i16> @test_8xi16_perm_low_mem_mask3(<8 x i16>* %vp) { define <8 x i16> @test_masked_8xi16_perm_low_mem_mask3(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_8xi16_perm_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[3,1,2,0,4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6139,7 +6139,7 @@ define <8 x i16> @test_masked_8xi16_perm_low_mem_mask3(<8 x i16>* %vp, <8 x i16> define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask3(<8 x i16>* %vp, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[3,1,2,0,4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6158,7 +6158,7 @@ define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask3(<8 x i16>* %vp, <8 x i1 define <8 x i16> @test_masked_8xi16_perm_high_mem_mask4(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_8xi16_perm_high_mem_mask4: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,6,7,5] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6177,7 +6177,7 @@ define <8 x i16> @test_masked_8xi16_perm_high_mem_mask4(<8 x i16>* %vp, <8 x i16 define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask4(<8 x i16>* %vp, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mem_mask4: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,6,7,5] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6196,7 +6196,7 @@ define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask4(<8 x i16>* %vp, <8 x i define <8 x i16> @test_masked_8xi16_perm_low_mem_mask5(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_8xi16_perm_low_mem_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[2,1,3,2,4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6215,7 +6215,7 @@ define <8 x i16> @test_masked_8xi16_perm_low_mem_mask5(<8 x i16>* %vp, <8 x i16> define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask5(<8 x i16>* %vp, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mem_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[2,1,3,2,4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6248,7 +6248,7 @@ define <8 x i16> @test_8xi16_perm_high_mem_mask6(<8 x i16>* %vp) { define <8 x i16> @test_masked_8xi16_perm_high_mem_mask6(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_8xi16_perm_high_mem_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,4,4,4] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6267,7 +6267,7 @@ define <8 x i16> @test_masked_8xi16_perm_high_mem_mask6(<8 x i16>* %vp, <8 x i16 define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask6(<8 x i16>* %vp, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mem_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,4,4,4] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6286,7 +6286,7 @@ define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask6(<8 x i16>* %vp, <8 x i define <8 x i16> @test_masked_8xi16_perm_low_mem_mask7(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_8xi16_perm_low_mem_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[0,3,3,1,4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6305,7 +6305,7 @@ define <8 x i16> @test_masked_8xi16_perm_low_mem_mask7(<8 x i16>* %vp, <8 x i16> define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask7(<8 x i16>* %vp, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mem_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[0,3,3,1,4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6337,7 +6337,7 @@ define <16 x i16> @test_16xi16_perm_high_mask0(<16 x i16> %vec) { define <16 x i16> @test_masked_16xi16_perm_high_mask0(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -6357,7 +6357,7 @@ define <16 x i16> @test_masked_16xi16_perm_high_mask0(<16 x i16> %vec, <16 x i16 define <16 x i16> @test_masked_z_16xi16_perm_high_mask0(<16 x i16> %vec, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6374,7 +6374,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_high_mask0(<16 x i16> %vec, <16 x i define <16 x i16> @test_masked_16xi16_perm_low_mask1(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[0,2,3,2,4,5,6,7,8,10,11,10,12,13,14,15] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -6394,7 +6394,7 @@ define <16 x i16> @test_masked_16xi16_perm_low_mask1(<16 x i16> %vec, <16 x i16> define <16 x i16> @test_masked_z_16xi16_perm_low_mask1(<16 x i16> %vec, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,2,3,2,4,5,6,7,8,10,11,10,12,13,14,15] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6411,7 +6411,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_low_mask1(<16 x i16> %vec, <16 x i1 define <16 x i16> @test_masked_16xi16_perm_high_mask2(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,7,5,5,5,8,9,10,11,15,13,13,13] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -6431,7 +6431,7 @@ define <16 x i16> @test_masked_16xi16_perm_high_mask2(<16 x i16> %vec, <16 x i16 define <16 x i16> @test_masked_z_16xi16_perm_high_mask2(<16 x i16> %vec, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,7,5,5,5,8,9,10,11,15,13,13,13] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6461,7 +6461,7 @@ define <16 x i16> @test_16xi16_perm_low_mask3(<16 x i16> %vec) { define <16 x i16> @test_masked_16xi16_perm_low_mask3(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,2,3,2,4,5,6,7,11,10,11,10,12,13,14,15] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -6481,7 +6481,7 @@ define <16 x i16> @test_masked_16xi16_perm_low_mask3(<16 x i16> %vec, <16 x i16> define <16 x i16> @test_masked_z_16xi16_perm_low_mask3(<16 x i16> %vec, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2,3,2,4,5,6,7,11,10,11,10,12,13,14,15] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6498,7 +6498,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_low_mask3(<16 x i16> %vec, <16 x i1 define <16 x i16> @test_masked_16xi16_perm_high_mask4(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_high_mask4: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,6,7,4,7,8,9,10,11,14,15,12,15] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -6518,7 +6518,7 @@ define <16 x i16> @test_masked_16xi16_perm_high_mask4(<16 x i16> %vec, <16 x i16 define <16 x i16> @test_masked_z_16xi16_perm_high_mask4(<16 x i16> %vec, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mask4: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,6,7,4,7,8,9,10,11,14,15,12,15] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6535,7 +6535,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_high_mask4(<16 x i16> %vec, <16 x i define <16 x i16> @test_masked_16xi16_perm_low_mask5(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_low_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,3,3,0,4,5,6,7,11,11,11,8,12,13,14,15] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -6555,7 +6555,7 @@ define <16 x i16> @test_masked_16xi16_perm_low_mask5(<16 x i16> %vec, <16 x i16> define <16 x i16> @test_masked_z_16xi16_perm_low_mask5(<16 x i16> %vec, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3,3,0,4,5,6,7,11,11,11,8,12,13,14,15] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6585,7 +6585,7 @@ define <16 x i16> @test_16xi16_perm_high_mask6(<16 x i16> %vec) { define <16 x i16> @test_masked_16xi16_perm_high_mask6(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_high_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,6,7,6,5,8,9,10,11,14,15,14,13] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -6605,7 +6605,7 @@ define <16 x i16> @test_masked_16xi16_perm_high_mask6(<16 x i16> %vec, <16 x i16 define <16 x i16> @test_masked_z_16xi16_perm_high_mask6(<16 x i16> %vec, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,6,7,6,5,8,9,10,11,14,15,14,13] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6622,7 +6622,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_high_mask6(<16 x i16> %vec, <16 x i define <16 x i16> @test_masked_16xi16_perm_low_mask7(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_low_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,2,1,2,4,5,6,7,11,10,9,10,12,13,14,15] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -6642,7 +6642,7 @@ define <16 x i16> @test_masked_16xi16_perm_low_mask7(<16 x i16> %vec, <16 x i16> define <16 x i16> @test_masked_z_16xi16_perm_low_mask7(<16 x i16> %vec, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2,1,2,4,5,6,7,11,10,9,10,12,13,14,15] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6673,7 +6673,7 @@ define <16 x i16> @test_16xi16_perm_high_mem_mask0(<16 x i16>* %vp) { define <16 x i16> @test_masked_16xi16_perm_high_mem_mask0(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6692,7 +6692,7 @@ define <16 x i16> @test_masked_16xi16_perm_high_mem_mask0(<16 x i16>* %vp, <16 x define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask0(<16 x i16>* %vp, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6711,7 +6711,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask0(<16 x i16>* %vp, <16 define <16 x i16> @test_masked_16xi16_perm_low_mem_mask1(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[1,3,3,0,4,5,6,7,9,11,11,8,12,13,14,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6730,7 +6730,7 @@ define <16 x i16> @test_masked_16xi16_perm_low_mem_mask1(<16 x i16>* %vp, <16 x define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask1(<16 x i16>* %vp, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[1,3,3,0,4,5,6,7,9,11,11,8,12,13,14,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6749,7 +6749,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask1(<16 x i16>* %vp, <16 define <16 x i16> @test_masked_16xi16_perm_high_mem_mask2(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,5,6,5,6,8,9,10,11,13,14,13,14] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6768,7 +6768,7 @@ define <16 x i16> @test_masked_16xi16_perm_high_mem_mask2(<16 x i16>* %vp, <16 x define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask2(<16 x i16>* %vp, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,5,6,5,6,8,9,10,11,13,14,13,14] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6801,7 +6801,7 @@ define <16 x i16> @test_16xi16_perm_low_mem_mask3(<16 x i16>* %vp) { define <16 x i16> @test_masked_16xi16_perm_low_mem_mask3(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6820,7 +6820,7 @@ define <16 x i16> @test_masked_16xi16_perm_low_mem_mask3(<16 x i16>* %vp, <16 x define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask3(<16 x i16>* %vp, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6839,7 +6839,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask3(<16 x i16>* %vp, <16 define <16 x i16> @test_masked_16xi16_perm_high_mem_mask4(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_high_mem_mask4: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6858,7 +6858,7 @@ define <16 x i16> @test_masked_16xi16_perm_high_mem_mask4(<16 x i16>* %vp, <16 x define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask4(<16 x i16>* %vp, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mem_mask4: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6877,7 +6877,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask4(<16 x i16>* %vp, <16 define <16 x i16> @test_masked_16xi16_perm_low_mem_mask5(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_low_mem_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[1,3,3,2,4,5,6,7,9,11,11,10,12,13,14,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6896,7 +6896,7 @@ define <16 x i16> @test_masked_16xi16_perm_low_mem_mask5(<16 x i16>* %vp, <16 x define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask5(<16 x i16>* %vp, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mem_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7,9,11,11,10,12,13,14,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6929,7 +6929,7 @@ define <16 x i16> @test_16xi16_perm_high_mem_mask6(<16 x i16>* %vp) { define <16 x i16> @test_masked_16xi16_perm_high_mem_mask6(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_high_mem_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6948,7 +6948,7 @@ define <16 x i16> @test_masked_16xi16_perm_high_mem_mask6(<16 x i16>* %vp, <16 x define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask6(<16 x i16>* %vp, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mem_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6967,7 +6967,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask6(<16 x i16>* %vp, <16 define <16 x i16> @test_masked_16xi16_perm_low_mem_mask7(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_low_mem_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[3,1,3,2,4,5,6,7,11,9,11,10,12,13,14,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6986,7 +6986,7 @@ define <16 x i16> @test_masked_16xi16_perm_low_mem_mask7(<16 x i16>* %vp, <16 x define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask7(<16 x i16>* %vp, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mem_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[3,1,3,2,4,5,6,7,11,9,11,10,12,13,14,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7018,7 +7018,7 @@ define <32 x i16> @test_32xi16_perm_high_mask0(<32 x i16> %vec) { define <32 x i16> @test_masked_32xi16_perm_high_mask0(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -7038,7 +7038,7 @@ define <32 x i16> @test_masked_32xi16_perm_high_mask0(<32 x i16> %vec, <32 x i16 define <32 x i16> @test_masked_z_32xi16_perm_high_mask0(<32 x i16> %vec, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7055,7 +7055,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_high_mask0(<32 x i16> %vec, <32 x i define <32 x i16> @test_masked_32xi16_perm_low_mask1(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[2,1,0,0,4,5,6,7,10,9,8,8,12,13,14,15,18,17,16,16,20,21,22,23,26,25,24,24,28,29,30,31] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -7075,7 +7075,7 @@ define <32 x i16> @test_masked_32xi16_perm_low_mask1(<32 x i16> %vec, <32 x i16> define <32 x i16> @test_masked_z_32xi16_perm_low_mask1(<32 x i16> %vec, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1,0,0,4,5,6,7,10,9,8,8,12,13,14,15,18,17,16,16,20,21,22,23,26,25,24,24,28,29,30,31] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7092,7 +7092,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_low_mask1(<32 x i16> %vec, <32 x i1 define <32 x i16> @test_masked_32xi16_perm_high_mask2(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,6,4,7,8,9,10,11,12,14,12,15,16,17,18,19,20,22,20,23,24,25,26,27,28,30,28,31] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -7112,7 +7112,7 @@ define <32 x i16> @test_masked_32xi16_perm_high_mask2(<32 x i16> %vec, <32 x i16 define <32 x i16> @test_masked_z_32xi16_perm_high_mask2(<32 x i16> %vec, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,6,4,7,8,9,10,11,12,14,12,15,16,17,18,19,20,22,20,23,24,25,26,27,28,30,28,31] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7142,7 +7142,7 @@ define <32 x i16> @test_32xi16_perm_low_mask3(<32 x i16> %vec) { define <32 x i16> @test_masked_32xi16_perm_low_mask3(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -7162,7 +7162,7 @@ define <32 x i16> @test_masked_32xi16_perm_low_mask3(<32 x i16> %vec, <32 x i16> define <32 x i16> @test_masked_z_32xi16_perm_low_mask3(<32 x i16> %vec, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7179,7 +7179,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_low_mask3(<32 x i16> %vec, <32 x i1 define <32 x i16> @test_masked_32xi16_perm_high_mask4(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_high_mask4: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,7,7,5,6,8,9,10,11,15,15,13,14,16,17,18,19,23,23,21,22,24,25,26,27,31,31,29,30] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -7199,7 +7199,7 @@ define <32 x i16> @test_masked_32xi16_perm_high_mask4(<32 x i16> %vec, <32 x i16 define <32 x i16> @test_masked_z_32xi16_perm_high_mask4(<32 x i16> %vec, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mask4: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,7,7,5,6,8,9,10,11,15,15,13,14,16,17,18,19,23,23,21,22,24,25,26,27,31,31,29,30] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7216,7 +7216,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_high_mask4(<32 x i16> %vec, <32 x i define <32 x i16> @test_masked_32xi16_perm_low_mask5(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_low_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[2,1,1,0,4,5,6,7,10,9,9,8,12,13,14,15,18,17,17,16,20,21,22,23,26,25,25,24,28,29,30,31] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -7236,7 +7236,7 @@ define <32 x i16> @test_masked_32xi16_perm_low_mask5(<32 x i16> %vec, <32 x i16> define <32 x i16> @test_masked_z_32xi16_perm_low_mask5(<32 x i16> %vec, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1,1,0,4,5,6,7,10,9,9,8,12,13,14,15,18,17,17,16,20,21,22,23,26,25,25,24,28,29,30,31] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7266,7 +7266,7 @@ define <32 x i16> @test_32xi16_perm_high_mask6(<32 x i16> %vec) { define <32 x i16> @test_masked_32xi16_perm_high_mask6(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_high_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -7286,7 +7286,7 @@ define <32 x i16> @test_masked_32xi16_perm_high_mask6(<32 x i16> %vec, <32 x i16 define <32 x i16> @test_masked_z_32xi16_perm_high_mask6(<32 x i16> %vec, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7303,7 +7303,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_high_mask6(<32 x i16> %vec, <32 x i define <32 x i16> @test_masked_32xi16_perm_low_mask7(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_low_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[3,0,3,0,4,5,6,7,11,8,11,8,12,13,14,15,19,16,19,16,20,21,22,23,27,24,27,24,28,29,30,31] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -7323,7 +7323,7 @@ define <32 x i16> @test_masked_32xi16_perm_low_mask7(<32 x i16> %vec, <32 x i16> define <32 x i16> @test_masked_z_32xi16_perm_low_mask7(<32 x i16> %vec, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,3,0,4,5,6,7,11,8,11,8,12,13,14,15,19,16,19,16,20,21,22,23,27,24,27,24,28,29,30,31] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7354,7 +7354,7 @@ define <32 x i16> @test_32xi16_perm_high_mem_mask0(<32 x i16>* %vp) { define <32 x i16> @test_masked_32xi16_perm_high_mem_mask0(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7373,7 +7373,7 @@ define <32 x i16> @test_masked_32xi16_perm_high_mem_mask0(<32 x i16>* %vp, <32 x define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask0(<32 x i16>* %vp, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7392,7 +7392,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask0(<32 x i16>* %vp, <32 define <32 x i16> @test_masked_32xi16_perm_low_mem_mask1(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15,17,17,19,19,20,21,22,23,25,25,27,27,28,29,30,31] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7411,7 +7411,7 @@ define <32 x i16> @test_masked_32xi16_perm_low_mem_mask1(<32 x i16>* %vp, <32 x define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask1(<32 x i16>* %vp, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15,17,17,19,19,20,21,22,23,25,25,27,27,28,29,30,31] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7430,7 +7430,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask1(<32 x i16>* %vp, <32 define <32 x i16> @test_masked_32xi16_perm_high_mem_mask2(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,7,6,4,8,9,10,11,12,15,14,12,16,17,18,19,20,23,22,20,24,25,26,27,28,31,30,28] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7449,7 +7449,7 @@ define <32 x i16> @test_masked_32xi16_perm_high_mem_mask2(<32 x i16>* %vp, <32 x define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask2(<32 x i16>* %vp, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,7,6,4,8,9,10,11,12,15,14,12,16,17,18,19,20,23,22,20,24,25,26,27,28,31,30,28] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7482,7 +7482,7 @@ define <32 x i16> @test_32xi16_perm_low_mem_mask3(<32 x i16>* %vp) { define <32 x i16> @test_masked_32xi16_perm_low_mem_mask3(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7501,7 +7501,7 @@ define <32 x i16> @test_masked_32xi16_perm_low_mem_mask3(<32 x i16>* %vp, <32 x define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask3(<32 x i16>* %vp, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7520,7 +7520,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask3(<32 x i16>* %vp, <32 define <32 x i16> @test_masked_32xi16_perm_high_mem_mask4(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_high_mem_mask4: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,7,4,6,5,8,9,10,11,15,12,14,13,16,17,18,19,23,20,22,21,24,25,26,27,31,28,30,29] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7539,7 +7539,7 @@ define <32 x i16> @test_masked_32xi16_perm_high_mem_mask4(<32 x i16>* %vp, <32 x define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask4(<32 x i16>* %vp, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mem_mask4: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,7,4,6,5,8,9,10,11,15,12,14,13,16,17,18,19,23,20,22,21,24,25,26,27,31,28,30,29] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7559,7 +7559,7 @@ define <32 x i16> @test_masked_32xi16_perm_low_mem_mask5(<32 x i16>* %vp, <32 x ; GENERIC-LABEL: test_masked_32xi16_perm_low_mem_mask5: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpshufd {{.*#+}} zmm2 = mem[0,0,2,3,4,4,6,7,8,8,10,11,12,12,14,15] sched: [6:1.00] -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vmovdqu16 %zmm2, %zmm0 {%k1} # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7580,7 +7580,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask5(<32 x i16>* %vp, <32 ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mem_mask5: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpshufd {{.*#+}} zmm1 = mem[0,0,2,3,4,4,6,7,8,8,10,11,12,12,14,15] sched: [6:1.00] -; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7614,7 +7614,7 @@ define <32 x i16> @test_32xi16_perm_high_mem_mask6(<32 x i16>* %vp) { define <32 x i16> @test_masked_32xi16_perm_high_mem_mask6(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_high_mem_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7633,7 +7633,7 @@ define <32 x i16> @test_masked_32xi16_perm_high_mem_mask6(<32 x i16>* %vp, <32 x define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask6(<32 x i16>* %vp, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mem_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7652,7 +7652,7 @@ define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask6(<32 x i16>* %vp, <32 define <32 x i16> @test_masked_32xi16_perm_low_mem_mask7(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_low_mem_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[3,1,3,0,4,5,6,7,11,9,11,8,12,13,14,15,19,17,19,16,20,21,22,23,27,25,27,24,28,29,30,31] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7671,7 +7671,7 @@ define <32 x i16> @test_masked_32xi16_perm_low_mem_mask7(<32 x i16>* %vp, <32 x define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask7(<32 x i16>* %vp, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mem_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[3,1,3,0,4,5,6,7,11,9,11,8,12,13,14,15,19,17,19,16,20,21,22,23,27,25,27,24,28,29,30,31] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7703,7 +7703,7 @@ define <4 x i32> @test_4xi32_perm_mask0(<4 x i32> %vec) { define <4 x i32> @test_masked_4xi32_perm_mask0(<4 x i32> %vec, <4 x i32> %vec2, <4 x i32> %mask) { ; GENERIC-LABEL: test_masked_4xi32_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[2,3,3,0] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -7723,7 +7723,7 @@ define <4 x i32> @test_masked_4xi32_perm_mask0(<4 x i32> %vec, <4 x i32> %vec2, define <4 x i32> @test_masked_z_4xi32_perm_mask0(<4 x i32> %vec, <4 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_4xi32_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[2,3,3,0] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7740,7 +7740,7 @@ define <4 x i32> @test_masked_z_4xi32_perm_mask0(<4 x i32> %vec, <4 x i32> %mask define <4 x i32> @test_masked_4xi32_perm_mask1(<4 x i32> %vec, <4 x i32> %vec2, <4 x i32> %mask) { ; GENERIC-LABEL: test_masked_4xi32_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -7760,7 +7760,7 @@ define <4 x i32> @test_masked_4xi32_perm_mask1(<4 x i32> %vec, <4 x i32> %vec2, define <4 x i32> @test_masked_z_4xi32_perm_mask1(<4 x i32> %vec, <4 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_4xi32_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0,2,0] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7777,7 +7777,7 @@ define <4 x i32> @test_masked_z_4xi32_perm_mask1(<4 x i32> %vec, <4 x i32> %mask define <4 x i32> @test_masked_4xi32_perm_mask2(<4 x i32> %vec, <4 x i32> %vec2, <4 x i32> %mask) { ; GENERIC-LABEL: test_masked_4xi32_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[3,0,1,0] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -7797,7 +7797,7 @@ define <4 x i32> @test_masked_4xi32_perm_mask2(<4 x i32> %vec, <4 x i32> %vec2, define <4 x i32> @test_masked_z_4xi32_perm_mask2(<4 x i32> %vec, <4 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_4xi32_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[3,0,1,0] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7827,7 +7827,7 @@ define <4 x i32> @test_4xi32_perm_mask3(<4 x i32> %vec) { define <4 x i32> @test_masked_4xi32_perm_mask3(<4 x i32> %vec, <4 x i32> %vec2, <4 x i32> %mask) { ; GENERIC-LABEL: test_masked_4xi32_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,1,0,3] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -7847,7 +7847,7 @@ define <4 x i32> @test_masked_4xi32_perm_mask3(<4 x i32> %vec, <4 x i32> %vec2, define <4 x i32> @test_masked_z_4xi32_perm_mask3(<4 x i32> %vec, <4 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_4xi32_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,0,3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7878,7 +7878,7 @@ define <4 x i32> @test_4xi32_perm_mem_mask0(<4 x i32>* %vp) { define <4 x i32> @test_masked_4xi32_perm_mem_mask0(<4 x i32>* %vp, <4 x i32> %vec2, <4 x i32> %mask) { ; GENERIC-LABEL: test_masked_4xi32_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[0,1,3,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7897,7 +7897,7 @@ define <4 x i32> @test_masked_4xi32_perm_mem_mask0(<4 x i32>* %vp, <4 x i32> %ve define <4 x i32> @test_masked_z_4xi32_perm_mem_mask0(<4 x i32>* %vp, <4 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_4xi32_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,1,3,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7916,7 +7916,7 @@ define <4 x i32> @test_masked_z_4xi32_perm_mem_mask0(<4 x i32>* %vp, <4 x i32> % define <4 x i32> @test_masked_4xi32_perm_mem_mask1(<4 x i32>* %vp, <4 x i32> %vec2, <4 x i32> %mask) { ; GENERIC-LABEL: test_masked_4xi32_perm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[2,2,3,1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7935,7 +7935,7 @@ define <4 x i32> @test_masked_4xi32_perm_mem_mask1(<4 x i32>* %vp, <4 x i32> %ve define <4 x i32> @test_masked_z_4xi32_perm_mem_mask1(<4 x i32>* %vp, <4 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_4xi32_perm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[2,2,3,1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7954,7 +7954,7 @@ define <4 x i32> @test_masked_z_4xi32_perm_mem_mask1(<4 x i32>* %vp, <4 x i32> % define <4 x i32> @test_masked_4xi32_perm_mem_mask2(<4 x i32>* %vp, <4 x i32> %vec2, <4 x i32> %mask) { ; GENERIC-LABEL: test_masked_4xi32_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[0,3,0,1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7973,7 +7973,7 @@ define <4 x i32> @test_masked_4xi32_perm_mem_mask2(<4 x i32>* %vp, <4 x i32> %ve define <4 x i32> @test_masked_z_4xi32_perm_mem_mask2(<4 x i32>* %vp, <4 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_4xi32_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,3,0,1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8006,7 +8006,7 @@ define <4 x i32> @test_4xi32_perm_mem_mask3(<4 x i32>* %vp) { define <4 x i32> @test_masked_4xi32_perm_mem_mask3(<4 x i32>* %vp, <4 x i32> %vec2, <4 x i32> %mask) { ; GENERIC-LABEL: test_masked_4xi32_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[1,0,1,0] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8025,7 +8025,7 @@ define <4 x i32> @test_masked_4xi32_perm_mem_mask3(<4 x i32>* %vp, <4 x i32> %ve define <4 x i32> @test_masked_z_4xi32_perm_mem_mask3(<4 x i32>* %vp, <4 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_4xi32_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[1,0,1,0] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8057,7 +8057,7 @@ define <8 x i32> @test2_8xi32_perm_mask0(<8 x i32> %vec) { define <8 x i32> @test2_masked_8xi32_perm_mask0(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test2_masked_8xi32_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[2,3,1,0,6,7,5,4] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -8077,7 +8077,7 @@ define <8 x i32> @test2_masked_8xi32_perm_mask0(<8 x i32> %vec, <8 x i32> %vec2, define <8 x i32> @test2_masked_z_8xi32_perm_mask0(<8 x i32> %vec, <8 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3,1,0,6,7,5,4] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8094,7 +8094,7 @@ define <8 x i32> @test2_masked_z_8xi32_perm_mask0(<8 x i32> %vec, <8 x i32> %mas define <8 x i32> @test2_masked_8xi32_perm_mask1(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test2_masked_8xi32_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[0,3,3,3,4,7,7,7] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -8114,7 +8114,7 @@ define <8 x i32> @test2_masked_8xi32_perm_mask1(<8 x i32> %vec, <8 x i32> %vec2, define <8 x i32> @test2_masked_z_8xi32_perm_mask1(<8 x i32> %vec, <8 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[0,3,3,3,4,7,7,7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8131,7 +8131,7 @@ define <8 x i32> @test2_masked_z_8xi32_perm_mask1(<8 x i32> %vec, <8 x i32> %mas define <8 x i32> @test2_masked_8xi32_perm_mask2(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test2_masked_8xi32_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[1,2,0,3,5,6,4,7] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -8151,7 +8151,7 @@ define <8 x i32> @test2_masked_8xi32_perm_mask2(<8 x i32> %vec, <8 x i32> %vec2, define <8 x i32> @test2_masked_z_8xi32_perm_mask2(<8 x i32> %vec, <8 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,0,3,5,6,4,7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8181,7 +8181,7 @@ define <8 x i32> @test2_8xi32_perm_mask3(<8 x i32> %vec) { define <8 x i32> @test2_masked_8xi32_perm_mask3(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test2_masked_8xi32_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[1,3,1,0,5,7,5,4] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -8201,7 +8201,7 @@ define <8 x i32> @test2_masked_8xi32_perm_mask3(<8 x i32> %vec, <8 x i32> %vec2, define <8 x i32> @test2_masked_z_8xi32_perm_mask3(<8 x i32> %vec, <8 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,3,1,0,5,7,5,4] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8232,7 +8232,7 @@ define <8 x i32> @test2_8xi32_perm_mem_mask0(<8 x i32>* %vp) { define <8 x i32> @test2_masked_8xi32_perm_mem_mask0(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test2_masked_8xi32_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[1,0,2,0,5,4,6,4] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8251,7 +8251,7 @@ define <8 x i32> @test2_masked_8xi32_perm_mem_mask0(<8 x i32>* %vp, <8 x i32> %v define <8 x i32> @test2_masked_z_8xi32_perm_mem_mask0(<8 x i32>* %vp, <8 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[1,0,2,0,5,4,6,4] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8270,7 +8270,7 @@ define <8 x i32> @test2_masked_z_8xi32_perm_mem_mask0(<8 x i32>* %vp, <8 x i32> define <8 x i32> @test2_masked_8xi32_perm_mem_mask1(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test2_masked_8xi32_perm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[0,3,2,0,4,7,6,4] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8289,7 +8289,7 @@ define <8 x i32> @test2_masked_8xi32_perm_mem_mask1(<8 x i32>* %vp, <8 x i32> %v define <8 x i32> @test2_masked_z_8xi32_perm_mem_mask1(<8 x i32>* %vp, <8 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[0,3,2,0,4,7,6,4] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8308,7 +8308,7 @@ define <8 x i32> @test2_masked_z_8xi32_perm_mem_mask1(<8 x i32>* %vp, <8 x i32> define <8 x i32> @test2_masked_8xi32_perm_mem_mask2(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test2_masked_8xi32_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[3,2,3,1,7,6,7,5] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8327,7 +8327,7 @@ define <8 x i32> @test2_masked_8xi32_perm_mem_mask2(<8 x i32>* %vp, <8 x i32> %v define <8 x i32> @test2_masked_z_8xi32_perm_mem_mask2(<8 x i32>* %vp, <8 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,1,7,6,7,5] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8360,7 +8360,7 @@ define <8 x i32> @test2_8xi32_perm_mem_mask3(<8 x i32>* %vp) { define <8 x i32> @test2_masked_8xi32_perm_mem_mask3(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test2_masked_8xi32_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[3,2,0,0,7,6,4,4] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8379,7 +8379,7 @@ define <8 x i32> @test2_masked_8xi32_perm_mem_mask3(<8 x i32>* %vp, <8 x i32> %v define <8 x i32> @test2_masked_z_8xi32_perm_mem_mask3(<8 x i32>* %vp, <8 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,0,0,7,6,4,4] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8411,7 +8411,7 @@ define <16 x i32> @test2_16xi32_perm_mask0(<16 x i32> %vec) { define <16 x i32> @test2_masked_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_16xi32_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -8431,7 +8431,7 @@ define <16 x i32> @test2_masked_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %v define <16 x i32> @test2_masked_z_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8448,7 +8448,7 @@ define <16 x i32> @test2_masked_z_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> define <16 x i32> @test2_masked_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_16xi32_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[2,0,3,0,6,4,7,4,10,8,11,8,14,12,15,12] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -8468,7 +8468,7 @@ define <16 x i32> @test2_masked_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> %v define <16 x i32> @test2_masked_z_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[2,0,3,0,6,4,7,4,10,8,11,8,14,12,15,12] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8485,7 +8485,7 @@ define <16 x i32> @test2_masked_z_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> define <16 x i32> @test2_masked_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_16xi32_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[1,3,3,0,5,7,7,4,9,11,11,8,13,15,15,12] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -8505,7 +8505,7 @@ define <16 x i32> @test2_masked_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> %v define <16 x i32> @test2_masked_z_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,3,3,0,5,7,7,4,9,11,11,8,13,15,15,12] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8535,7 +8535,7 @@ define <16 x i32> @test2_16xi32_perm_mask3(<16 x i32> %vec) { define <16 x i32> @test2_masked_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_16xi32_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -8555,7 +8555,7 @@ define <16 x i32> @test2_masked_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> %v define <16 x i32> @test2_masked_z_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8586,7 +8586,7 @@ define <16 x i32> @test2_16xi32_perm_mem_mask0(<16 x i32>* %vp) { define <16 x i32> @test2_masked_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_16xi32_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8605,7 +8605,7 @@ define <16 x i32> @test2_masked_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i32 define <16 x i32> @test2_masked_z_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8624,7 +8624,7 @@ define <16 x i32> @test2_masked_z_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i define <16 x i32> @test2_masked_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_16xi32_perm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[1,0,0,2,5,4,4,6,9,8,8,10,13,12,12,14] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8643,7 +8643,7 @@ define <16 x i32> @test2_masked_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i32 define <16 x i32> @test2_masked_z_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,0,2,5,4,4,6,9,8,8,10,13,12,12,14] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8662,7 +8662,7 @@ define <16 x i32> @test2_masked_z_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i define <16 x i32> @test2_masked_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_16xi32_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[2,0,1,2,6,4,5,6,10,8,9,10,14,12,13,14] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8681,7 +8681,7 @@ define <16 x i32> @test2_masked_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i32 define <16 x i32> @test2_masked_z_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[2,0,1,2,6,4,5,6,10,8,9,10,14,12,13,14] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8714,7 +8714,7 @@ define <16 x i32> @test2_16xi32_perm_mem_mask3(<16 x i32>* %vp) { define <16 x i32> @test2_masked_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_16xi32_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8733,7 +8733,7 @@ define <16 x i32> @test2_masked_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i32 define <16 x i32> @test2_masked_z_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8765,7 +8765,7 @@ define <8 x float> @test2_8xfloat_shuff_mask0(<8 x float> %vec1, <8 x float> %ve define <8 x float> @test2_8xfloat_masked_shuff_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test2_8xfloat_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -8785,7 +8785,7 @@ define <8 x float> @test2_8xfloat_masked_shuff_mask0(<8 x float> %vec1, <8 x flo define <8 x float> @test2_8xfloat_zero_masked_shuff_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test2_8xfloat_zero_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8802,7 +8802,7 @@ define <8 x float> @test2_8xfloat_zero_masked_shuff_mask0(<8 x float> %vec1, <8 define <8 x float> @test2_8xfloat_masked_shuff_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test2_8xfloat_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -8822,7 +8822,7 @@ define <8 x float> @test2_8xfloat_masked_shuff_mask1(<8 x float> %vec1, <8 x flo define <8 x float> @test2_8xfloat_zero_masked_shuff_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test2_8xfloat_zero_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8839,7 +8839,7 @@ define <8 x float> @test2_8xfloat_zero_masked_shuff_mask1(<8 x float> %vec1, <8 define <8 x float> @test2_8xfloat_masked_shuff_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test2_8xfloat_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -8859,7 +8859,7 @@ define <8 x float> @test2_8xfloat_masked_shuff_mask2(<8 x float> %vec1, <8 x flo define <8 x float> @test2_8xfloat_zero_masked_shuff_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test2_8xfloat_zero_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8889,7 +8889,7 @@ define <8 x float> @test2_8xfloat_shuff_mask3(<8 x float> %vec1, <8 x float> %ve define <8 x float> @test2_8xfloat_masked_shuff_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test2_8xfloat_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -8909,7 +8909,7 @@ define <8 x float> @test2_8xfloat_masked_shuff_mask3(<8 x float> %vec1, <8 x flo define <8 x float> @test_8xfloat_zero_masked_shuff_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8940,7 +8940,7 @@ define <8 x float> @test_8xfloat_shuff_mem_mask0(<8 x float> %vec1, <8 x float>* define <8 x float> @test_8xfloat_masked_shuff_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -8961,7 +8961,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mem_mask0(<8 x float> %vec1, <8 x define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -8980,7 +8980,7 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask0(<8 x float> %vec1, define <8 x float> @test_8xfloat_masked_shuff_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9001,7 +9001,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mem_mask1(<8 x float> %vec1, <8 x define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9020,7 +9020,7 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask1(<8 x float> %vec1, define <8 x float> @test_8xfloat_masked_shuff_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9041,7 +9041,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mem_mask2(<8 x float> %vec1, <8 x define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9074,7 +9074,7 @@ define <8 x float> @test_8xfloat_shuff_mem_mask3(<8 x float> %vec1, <8 x float>* define <8 x float> @test_8xfloat_masked_shuff_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9095,7 +9095,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mem_mask3(<8 x float> %vec1, <8 x define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9127,7 +9127,7 @@ define <16 x float> @test_16xfloat_shuff_mask0(<16 x float> %vec1, <16 x float> define <16 x float> @test_16xfloat_masked_shuff_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9147,7 +9147,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mask0(<16 x float> %vec1, <16 x define <16 x float> @test_16xfloat_zero_masked_shuff_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9164,7 +9164,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mask0(<16 x float> %vec1, < define <16 x float> @test_16xfloat_masked_shuff_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3,8,9,10,11],zmm1[0,1,2,3,12,13,14,15] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9184,7 +9184,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mask1(<16 x float> %vec1, <16 x define <16 x float> @test_16xfloat_zero_masked_shuff_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,8,9,10,11],zmm1[0,1,2,3,12,13,14,15] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9201,7 +9201,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mask1(<16 x float> %vec1, < define <16 x float> @test_16xfloat_masked_shuff_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[12,13,14,15,4,5,6,7],zmm1[0,1,2,3,4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9221,7 +9221,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mask2(<16 x float> %vec1, <16 x define <16 x float> @test_16xfloat_zero_masked_shuff_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,4,5,6,7],zmm1[0,1,2,3,4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9251,7 +9251,7 @@ define <16 x float> @test_16xfloat_shuff_mask3(<16 x float> %vec1, <16 x float> define <16 x float> @test_16xfloat_masked_shuff_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9271,7 +9271,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mask3(<16 x float> %vec1, <16 x define <16 x float> @test_16xfloat_zero_masked_shuff_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9302,7 +9302,7 @@ define <16 x float> @test_16xfloat_shuff_mem_mask0(<16 x float> %vec1, <16 x flo define <16 x float> @test_16xfloat_masked_shuff_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9323,7 +9323,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mem_mask0(<16 x float> %vec1, <1 define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9342,7 +9342,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask0(<16 x float> %vec define <16 x float> @test_16xfloat_masked_shuff_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9363,7 +9363,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mem_mask1(<16 x float> %vec1, <1 define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9382,7 +9382,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask1(<16 x float> %vec define <16 x float> @test_16xfloat_masked_shuff_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9403,7 +9403,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mem_mask2(<16 x float> %vec1, <1 define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9436,7 +9436,7 @@ define <16 x float> @test_16xfloat_shuff_mem_mask3(<16 x float> %vec1, <16 x flo define <16 x float> @test_16xfloat_masked_shuff_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9457,7 +9457,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mem_mask3(<16 x float> %vec1, <1 define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9489,7 +9489,7 @@ define <4 x double> @test_4xdouble_shuff_mask0(<4 x double> %vec1, <4 x double> define <4 x double> @test_4xdouble_masked_shuff_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9509,7 +9509,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mask0(<4 x double> %vec1, <4 x d define <4 x double> @test_4xdouble_zero_masked_shuff_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9526,7 +9526,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mask0(<4 x double> %vec1, < define <4 x double> @test_4xdouble_masked_shuff_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9546,7 +9546,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mask1(<4 x double> %vec1, <4 x d define <4 x double> @test_4xdouble_zero_masked_shuff_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9563,7 +9563,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mask1(<4 x double> %vec1, < define <4 x double> @test_4xdouble_masked_shuff_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9583,7 +9583,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mask2(<4 x double> %vec1, <4 x d define <4 x double> @test_4xdouble_zero_masked_shuff_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9613,7 +9613,7 @@ define <4 x double> @test_4xdouble_shuff_mask3(<4 x double> %vec1, <4 x double> define <4 x double> @test_4xdouble_masked_shuff_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9633,7 +9633,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mask3(<4 x double> %vec1, <4 x d define <4 x double> @test_4xdouble_zero_masked_shuff_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9664,7 +9664,7 @@ define <4 x double> @test_4xdouble_shuff_mem_mask0(<4 x double> %vec1, <4 x doub define <4 x double> @test_4xdouble_masked_shuff_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9685,7 +9685,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mem_mask0(<4 x double> %vec1, <4 define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9704,7 +9704,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask0(<4 x double> %vec define <4 x double> @test_4xdouble_masked_shuff_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9725,7 +9725,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mem_mask1(<4 x double> %vec1, <4 define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9744,7 +9744,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask1(<4 x double> %vec define <4 x double> @test_4xdouble_masked_shuff_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9765,7 +9765,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mem_mask2(<4 x double> %vec1, <4 define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9798,7 +9798,7 @@ define <4 x double> @test_4xdouble_shuff_mem_mask3(<4 x double> %vec1, <4 x doub define <4 x double> @test_4xdouble_masked_shuff_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9819,7 +9819,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mem_mask3(<4 x double> %vec1, <4 define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9851,7 +9851,7 @@ define <8 x double> @test_8xdouble_shuff_mask0(<8 x double> %vec1, <8 x double> define <8 x double> @test_8xdouble_masked_shuff_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,2,3],zmm1[6,7,0,1] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9871,7 +9871,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mask0(<8 x double> %vec1, <8 x d define <8 x double> @test_8xdouble_zero_masked_shuff_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,2,3],zmm1[6,7,0,1] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9888,7 +9888,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mask0(<8 x double> %vec1, < define <8 x double> @test_8xdouble_masked_shuff_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,4,5],zmm1[0,1,4,5] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9908,7 +9908,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mask1(<8 x double> %vec1, <8 x d define <8 x double> @test_8xdouble_zero_masked_shuff_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[0,1,4,5] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9925,7 +9925,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mask1(<8 x double> %vec1, < define <8 x double> @test_8xdouble_masked_shuff_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,4,5],zmm1[4,5,0,1] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9945,7 +9945,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mask2(<8 x double> %vec1, <8 x d define <8 x double> @test_8xdouble_zero_masked_shuff_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,4,5],zmm1[4,5,0,1] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -9975,7 +9975,7 @@ define <8 x double> @test_8xdouble_shuff_mask3(<8 x double> %vec1, <8 x double> define <8 x double> @test_8xdouble_masked_shuff_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,4,5],zmm1[4,5,2,3] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -9995,7 +9995,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mask3(<8 x double> %vec1, <8 x d define <8 x double> @test_8xdouble_zero_masked_shuff_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,4,5],zmm1[4,5,2,3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10026,7 +10026,7 @@ define <8 x double> @test_8xdouble_shuff_mem_mask0(<8 x double> %vec1, <8 x doub define <8 x double> @test_8xdouble_masked_shuff_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,0,1],mem[0,1,0,1] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10047,7 +10047,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mem_mask0(<8 x double> %vec1, <8 define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,0,1],mem[0,1,0,1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10066,7 +10066,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask0(<8 x double> %vec define <8 x double> @test_8xdouble_masked_shuff_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,6,7],mem[0,1,2,3] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10087,7 +10087,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mem_mask1(<8 x double> %vec1, <8 define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,6,7],mem[0,1,2,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10106,7 +10106,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask1(<8 x double> %vec define <8 x double> @test_8xdouble_masked_shuff_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3],mem[0,1,4,5] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10127,7 +10127,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mem_mask2(<8 x double> %vec1, <8 define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3],mem[0,1,4,5] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10160,7 +10160,7 @@ define <8 x double> @test_8xdouble_shuff_mem_mask3(<8 x double> %vec1, <8 x doub define <8 x double> @test_8xdouble_masked_shuff_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[4,5,0,1] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10181,7 +10181,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mem_mask3(<8 x double> %vec1, <8 define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[4,5,0,1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10213,7 +10213,7 @@ define <8 x i32> @test_8xi32_shuff_mask0(<8 x i32> %vec1, <8 x i32> %vec2) { define <8 x i32> @test_8xi32_masked_shuff_mask0(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xi32_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10233,7 +10233,7 @@ define <8 x i32> @test_8xi32_masked_shuff_mask0(<8 x i32> %vec1, <8 x i32> %vec2 define <8 x i32> @test_8xi32_zero_masked_shuff_mask0(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10250,7 +10250,7 @@ define <8 x i32> @test_8xi32_zero_masked_shuff_mask0(<8 x i32> %vec1, <8 x i32> define <8 x i32> @test_8xi32_masked_shuff_mask1(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xi32_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10270,7 +10270,7 @@ define <8 x i32> @test_8xi32_masked_shuff_mask1(<8 x i32> %vec1, <8 x i32> %vec2 define <8 x i32> @test_8xi32_zero_masked_shuff_mask1(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10287,7 +10287,7 @@ define <8 x i32> @test_8xi32_zero_masked_shuff_mask1(<8 x i32> %vec1, <8 x i32> define <8 x i32> @test_8xi32_masked_shuff_mask2(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xi32_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10307,7 +10307,7 @@ define <8 x i32> @test_8xi32_masked_shuff_mask2(<8 x i32> %vec1, <8 x i32> %vec2 define <8 x i32> @test_8xi32_zero_masked_shuff_mask2(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10337,7 +10337,7 @@ define <8 x i32> @test_8xi32_shuff_mask3(<8 x i32> %vec1, <8 x i32> %vec2) { define <8 x i32> @test_8xi32_masked_shuff_mask3(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xi32_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10357,7 +10357,7 @@ define <8 x i32> @test_8xi32_masked_shuff_mask3(<8 x i32> %vec1, <8 x i32> %vec2 define <8 x i32> @test_8xi32_zero_masked_shuff_mask3(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10388,7 +10388,7 @@ define <8 x i32> @test_8xi32_shuff_mem_mask0(<8 x i32> %vec1, <8 x i32>* %vec2p) define <8 x i32> @test_8xi32_masked_shuff_mem_mask0(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xi32_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10409,7 +10409,7 @@ define <8 x i32> @test_8xi32_masked_shuff_mem_mask0(<8 x i32> %vec1, <8 x i32>* define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask0(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10428,7 +10428,7 @@ define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask0(<8 x i32> %vec1, <8 x i define <8 x i32> @test_8xi32_masked_shuff_mem_mask1(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xi32_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10449,7 +10449,7 @@ define <8 x i32> @test_8xi32_masked_shuff_mem_mask1(<8 x i32> %vec1, <8 x i32>* define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask1(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10468,7 +10468,7 @@ define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask1(<8 x i32> %vec1, <8 x i define <8 x i32> @test_8xi32_masked_shuff_mem_mask2(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xi32_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10489,7 +10489,7 @@ define <8 x i32> @test_8xi32_masked_shuff_mem_mask2(<8 x i32> %vec1, <8 x i32>* define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask2(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10522,7 +10522,7 @@ define <8 x i32> @test_8xi32_shuff_mem_mask3(<8 x i32> %vec1, <8 x i32>* %vec2p) define <8 x i32> @test_8xi32_masked_shuff_mem_mask3(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xi32_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10543,7 +10543,7 @@ define <8 x i32> @test_8xi32_masked_shuff_mem_mask3(<8 x i32> %vec1, <8 x i32>* define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask3(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10575,7 +10575,7 @@ define <16 x i32> @test_16xi32_shuff_mask0(<16 x i32> %vec1, <16 x i32> %vec2) { define <16 x i32> @test_16xi32_masked_shuff_mask0(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,4,5,6,7],zmm1[4,5,6,7,12,13,14,15] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10595,7 +10595,7 @@ define <16 x i32> @test_16xi32_masked_shuff_mask0(<16 x i32> %vec1, <16 x i32> % define <16 x i32> @test_16xi32_zero_masked_shuff_mask0(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],zmm1[4,5,6,7,12,13,14,15] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10612,7 +10612,7 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mask0(<16 x i32> %vec1, <16 x i define <16 x i32> @test_16xi32_masked_shuff_mask1(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,8,9,10,11],zmm1[8,9,10,11,4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10632,7 +10632,7 @@ define <16 x i32> @test_16xi32_masked_shuff_mask1(<16 x i32> %vec1, <16 x i32> % define <16 x i32> @test_16xi32_zero_masked_shuff_mask1(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,8,9,10,11],zmm1[8,9,10,11,4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10649,7 +10649,7 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mask1(<16 x i32> %vec1, <16 x i define <16 x i32> @test_16xi32_masked_shuff_mask2(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,8,9,10,11],zmm1[0,1,2,3,0,1,2,3] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10669,7 +10669,7 @@ define <16 x i32> @test_16xi32_masked_shuff_mask2(<16 x i32> %vec1, <16 x i32> % define <16 x i32> @test_16xi32_zero_masked_shuff_mask2(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],zmm1[0,1,2,3,0,1,2,3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10699,7 +10699,7 @@ define <16 x i32> @test_16xi32_shuff_mask3(<16 x i32> %vec1, <16 x i32> %vec2) { define <16 x i32> @test_16xi32_masked_shuff_mask3(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,0,1,2,3],zmm1[8,9,10,11,4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10719,7 +10719,7 @@ define <16 x i32> @test_16xi32_masked_shuff_mask3(<16 x i32> %vec1, <16 x i32> % define <16 x i32> @test_16xi32_zero_masked_shuff_mask3(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],zmm1[8,9,10,11,4,5,6,7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10750,7 +10750,7 @@ define <16 x i32> @test_16xi32_shuff_mem_mask0(<16 x i32> %vec1, <16 x i32>* %ve define <16 x i32> @test_16xi32_masked_shuff_mem_mask0(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10771,7 +10771,7 @@ define <16 x i32> @test_16xi32_masked_shuff_mem_mask0(<16 x i32> %vec1, <16 x i3 define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask0(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10790,7 +10790,7 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask0(<16 x i32> %vec1, <16 define <16 x i32> @test_16xi32_masked_shuff_mem_mask1(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10811,7 +10811,7 @@ define <16 x i32> @test_16xi32_masked_shuff_mem_mask1(<16 x i32> %vec1, <16 x i3 define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask1(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10830,7 +10830,7 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask1(<16 x i32> %vec1, <16 define <16 x i32> @test_16xi32_masked_shuff_mem_mask2(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10851,7 +10851,7 @@ define <16 x i32> @test_16xi32_masked_shuff_mem_mask2(<16 x i32> %vec1, <16 x i3 define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask2(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10884,7 +10884,7 @@ define <16 x i32> @test_16xi32_shuff_mem_mask3(<16 x i32> %vec1, <16 x i32>* %ve define <16 x i32> @test_16xi32_masked_shuff_mem_mask3(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10905,7 +10905,7 @@ define <16 x i32> @test_16xi32_masked_shuff_mem_mask3(<16 x i32> %vec1, <16 x i3 define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask3(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10937,7 +10937,7 @@ define <4 x i64> @test_4xi64_shuff_mask0(<4 x i64> %vec1, <4 x i64> %vec2) { define <4 x i64> @test_4xi64_masked_shuff_mask0(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xi64_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10957,7 +10957,7 @@ define <4 x i64> @test_4xi64_masked_shuff_mask0(<4 x i64> %vec1, <4 x i64> %vec2 define <4 x i64> @test_4xi64_zero_masked_shuff_mask0(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -10974,7 +10974,7 @@ define <4 x i64> @test_4xi64_zero_masked_shuff_mask0(<4 x i64> %vec1, <4 x i64> define <4 x i64> @test_4xi64_masked_shuff_mask1(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xi64_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -10994,7 +10994,7 @@ define <4 x i64> @test_4xi64_masked_shuff_mask1(<4 x i64> %vec1, <4 x i64> %vec2 define <4 x i64> @test_4xi64_zero_masked_shuff_mask1(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11011,7 +11011,7 @@ define <4 x i64> @test_4xi64_zero_masked_shuff_mask1(<4 x i64> %vec1, <4 x i64> define <4 x i64> @test_4xi64_masked_shuff_mask2(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xi64_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -11031,7 +11031,7 @@ define <4 x i64> @test_4xi64_masked_shuff_mask2(<4 x i64> %vec1, <4 x i64> %vec2 define <4 x i64> @test_4xi64_zero_masked_shuff_mask2(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11061,7 +11061,7 @@ define <4 x i64> @test_4xi64_shuff_mask3(<4 x i64> %vec1, <4 x i64> %vec2) { define <4 x i64> @test_4xi64_masked_shuff_mask3(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xi64_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -11081,7 +11081,7 @@ define <4 x i64> @test_4xi64_masked_shuff_mask3(<4 x i64> %vec1, <4 x i64> %vec2 define <4 x i64> @test_4xi64_zero_masked_shuff_mask3(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11112,7 +11112,7 @@ define <4 x i64> @test_4xi64_shuff_mem_mask0(<4 x i64> %vec1, <4 x i64>* %vec2p) define <4 x i64> @test_4xi64_masked_shuff_mem_mask0(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xi64_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -11133,7 +11133,7 @@ define <4 x i64> @test_4xi64_masked_shuff_mem_mask0(<4 x i64> %vec1, <4 x i64>* define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask0(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11152,7 +11152,7 @@ define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask0(<4 x i64> %vec1, <4 x i define <4 x i64> @test_4xi64_masked_shuff_mem_mask1(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xi64_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -11173,7 +11173,7 @@ define <4 x i64> @test_4xi64_masked_shuff_mem_mask1(<4 x i64> %vec1, <4 x i64>* define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask1(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11192,7 +11192,7 @@ define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask1(<4 x i64> %vec1, <4 x i define <4 x i64> @test_4xi64_masked_shuff_mem_mask2(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xi64_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -11213,7 +11213,7 @@ define <4 x i64> @test_4xi64_masked_shuff_mem_mask2(<4 x i64> %vec1, <4 x i64>* define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask2(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11246,7 +11246,7 @@ define <4 x i64> @test_4xi64_shuff_mem_mask3(<4 x i64> %vec1, <4 x i64>* %vec2p) define <4 x i64> @test_4xi64_masked_shuff_mem_mask3(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xi64_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -11267,7 +11267,7 @@ define <4 x i64> @test_4xi64_masked_shuff_mem_mask3(<4 x i64> %vec1, <4 x i64>* define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask3(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11299,7 +11299,7 @@ define <8 x i64> @test_8xi64_shuff_mask0(<8 x i64> %vec1, <8 x i64> %vec2) { define <8 x i64> @test_8xi64_masked_shuff_mask0(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,4,5],zmm1[4,5,4,5] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -11319,7 +11319,7 @@ define <8 x i64> @test_8xi64_masked_shuff_mask0(<8 x i64> %vec1, <8 x i64> %vec2 define <8 x i64> @test_8xi64_zero_masked_shuff_mask0(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,4,5],zmm1[4,5,4,5] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11336,7 +11336,7 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mask0(<8 x i64> %vec1, <8 x i64> define <8 x i64> @test_8xi64_masked_shuff_mask1(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,4,5],zmm1[2,3,4,5] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -11356,7 +11356,7 @@ define <8 x i64> @test_8xi64_masked_shuff_mask1(<8 x i64> %vec1, <8 x i64> %vec2 define <8 x i64> @test_8xi64_zero_masked_shuff_mask1(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,4,5],zmm1[2,3,4,5] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11373,7 +11373,7 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mask1(<8 x i64> %vec1, <8 x i64> define <8 x i64> @test_8xi64_masked_shuff_mask2(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,4,5],zmm1[0,1,0,1] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -11393,7 +11393,7 @@ define <8 x i64> @test_8xi64_masked_shuff_mask2(<8 x i64> %vec1, <8 x i64> %vec2 define <8 x i64> @test_8xi64_zero_masked_shuff_mask2(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[0,1,0,1] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11423,7 +11423,7 @@ define <8 x i64> @test_8xi64_shuff_mask3(<8 x i64> %vec1, <8 x i64> %vec2) { define <8 x i64> @test_8xi64_masked_shuff_mask3(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[2,3,6,7],zmm1[4,5,2,3] sched: [1:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -11443,7 +11443,7 @@ define <8 x i64> @test_8xi64_masked_shuff_mask3(<8 x i64> %vec1, <8 x i64> %vec2 define <8 x i64> @test_8xi64_zero_masked_shuff_mask3(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,6,7],zmm1[4,5,2,3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11474,7 +11474,7 @@ define <8 x i64> @test_8xi64_shuff_mem_mask0(<8 x i64> %vec1, <8 x i64>* %vec2p) define <8 x i64> @test_8xi64_masked_shuff_mem_mask0(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,2,3],mem[4,5,2,3] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -11495,7 +11495,7 @@ define <8 x i64> @test_8xi64_masked_shuff_mem_mask0(<8 x i64> %vec1, <8 x i64>* define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask0(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,2,3],mem[4,5,2,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11514,7 +11514,7 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask0(<8 x i64> %vec1, <8 x i define <8 x i64> @test_8xi64_masked_shuff_mem_mask1(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[0,1,0,1] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -11535,7 +11535,7 @@ define <8 x i64> @test_8xi64_masked_shuff_mem_mask1(<8 x i64> %vec1, <8 x i64>* define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask1(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[0,1,0,1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11554,7 +11554,7 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask1(<8 x i64> %vec1, <8 x i define <8 x i64> @test_8xi64_masked_shuff_mem_mask2(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[4,5,0,1],mem[2,3,2,3] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -11575,7 +11575,7 @@ define <8 x i64> @test_8xi64_masked_shuff_mem_mask2(<8 x i64> %vec1, <8 x i64>* define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask2(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,0,1],mem[2,3,2,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11608,7 +11608,7 @@ define <8 x i64> @test_8xi64_shuff_mem_mask3(<8 x i64> %vec1, <8 x i64>* %vec2p) define <8 x i64> @test_8xi64_masked_shuff_mem_mask3(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[6,7,2,3] sched: [6:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -11629,7 +11629,7 @@ define <8 x i64> @test_8xi64_masked_shuff_mem_mask3(<8 x i64> %vec1, <8 x i64>* define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask3(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[6,7,2,3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11661,7 +11661,7 @@ define <4 x float> @test_4xfloat_unpack_low_mask0(<4 x float> %vec1, <4 x float> define <4 x float> @test_4xfloat_masked_unpack_low_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -11681,7 +11681,7 @@ define <4 x float> @test_4xfloat_masked_unpack_low_mask0(<4 x float> %vec1, <4 x define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11698,7 +11698,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask0(<4 x float> %vec1, define <4 x float> @test_4xfloat_masked_unpack_low_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -11718,7 +11718,7 @@ define <4 x float> @test_4xfloat_masked_unpack_low_mask1(<4 x float> %vec1, <4 x define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11735,7 +11735,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask1(<4 x float> %vec1, define <4 x float> @test_4xfloat_masked_unpack_low_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -11755,7 +11755,7 @@ define <4 x float> @test_4xfloat_masked_unpack_low_mask2(<4 x float> %vec1, <4 x define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11785,7 +11785,7 @@ define <4 x float> @test_4xfloat_unpack_low_mask3(<4 x float> %vec1, <4 x float> define <4 x float> @test_4xfloat_masked_unpack_low_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -11805,7 +11805,7 @@ define <4 x float> @test_4xfloat_masked_unpack_low_mask3(<4 x float> %vec1, <4 x define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11836,7 +11836,7 @@ define <4 x float> @test_4xfloat_unpack_low_mem_mask0(<4 x float> %vec1, <4 x fl define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -11857,7 +11857,7 @@ define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask0(<4 x float> %vec1, define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11876,7 +11876,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask0(<4 x float> %v define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask1(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -11897,7 +11897,7 @@ define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask1(<4 x float> %vec1, define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask1(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11916,7 +11916,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask1(<4 x float> %v define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask2(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -11937,7 +11937,7 @@ define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask2(<4 x float> %vec1, define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask2(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -11970,7 +11970,7 @@ define <4 x float> @test_4xfloat_unpack_low_mem_mask3(<4 x float> %vec1, <4 x fl define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -11991,7 +11991,7 @@ define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask3(<4 x float> %vec1, define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12023,7 +12023,7 @@ define <8 x float> @test_8xfloat_unpack_low_mask0(<8 x float> %vec1, <8 x float> define <8 x float> @test_8xfloat_masked_unpack_low_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -12043,7 +12043,7 @@ define <8 x float> @test_8xfloat_masked_unpack_low_mask0(<8 x float> %vec1, <8 x define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12060,7 +12060,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask0(<8 x float> %vec1, define <8 x float> @test_8xfloat_masked_unpack_low_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -12080,7 +12080,7 @@ define <8 x float> @test_8xfloat_masked_unpack_low_mask1(<8 x float> %vec1, <8 x define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12097,7 +12097,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask1(<8 x float> %vec1, define <8 x float> @test_8xfloat_masked_unpack_low_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -12117,7 +12117,7 @@ define <8 x float> @test_8xfloat_masked_unpack_low_mask2(<8 x float> %vec1, <8 x define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12147,7 +12147,7 @@ define <8 x float> @test_8xfloat_unpack_low_mask3(<8 x float> %vec1, <8 x float> define <8 x float> @test_8xfloat_masked_unpack_low_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -12167,7 +12167,7 @@ define <8 x float> @test_8xfloat_masked_unpack_low_mask3(<8 x float> %vec1, <8 x define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12198,7 +12198,7 @@ define <8 x float> @test_8xfloat_unpack_low_mem_mask0(<8 x float> %vec1, <8 x fl define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -12219,7 +12219,7 @@ define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask0(<8 x float> %vec1, define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12238,7 +12238,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask0(<8 x float> %v define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -12259,7 +12259,7 @@ define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask1(<8 x float> %vec1, define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12278,7 +12278,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask1(<8 x float> %v define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -12299,7 +12299,7 @@ define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask2(<8 x float> %vec1, define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12332,7 +12332,7 @@ define <8 x float> @test_8xfloat_unpack_low_mem_mask3(<8 x float> %vec1, <8 x fl define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -12353,7 +12353,7 @@ define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask3(<8 x float> %vec1, define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12385,7 +12385,7 @@ define <16 x float> @test_16xfloat_unpack_low_mask0(<16 x float> %vec1, <16 x fl define <16 x float> @test_16xfloat_masked_unpack_low_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -12405,7 +12405,7 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mask0(<16 x float> %vec1, < define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12422,7 +12422,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask0(<16 x float> %ve define <16 x float> @test_16xfloat_masked_unpack_low_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -12442,7 +12442,7 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mask1(<16 x float> %vec1, < define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12459,7 +12459,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask1(<16 x float> %ve define <16 x float> @test_16xfloat_masked_unpack_low_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -12479,7 +12479,7 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mask2(<16 x float> %vec1, < define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12509,7 +12509,7 @@ define <16 x float> @test_16xfloat_unpack_low_mask3(<16 x float> %vec1, <16 x fl define <16 x float> @test_16xfloat_masked_unpack_low_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -12529,7 +12529,7 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mask3(<16 x float> %vec1, < define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12560,7 +12560,7 @@ define <16 x float> @test_16xfloat_unpack_low_mem_mask0(<16 x float> %vec1, <16 define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -12581,7 +12581,7 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask0(<16 x float> %vec define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12600,7 +12600,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask0(<16 x float> define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -12621,7 +12621,7 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask1(<16 x float> %vec define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12640,7 +12640,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask1(<16 x float> define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -12661,7 +12661,7 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask2(<16 x float> %vec define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12694,7 +12694,7 @@ define <16 x float> @test_16xfloat_unpack_low_mem_mask3(<16 x float> %vec1, <16 define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -12715,7 +12715,7 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask3(<16 x float> %vec define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12747,7 +12747,7 @@ define <2 x double> @test_2xdouble_unpack_low_mask0(<2 x double> %vec1, <2 x dou define <2 x double> @test_2xdouble_masked_unpack_low_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x i64> %mask) { ; GENERIC-LABEL: test_2xdouble_masked_unpack_low_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %xmm3, %xmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %xmm3, %xmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -12767,7 +12767,7 @@ define <2 x double> @test_2xdouble_masked_unpack_low_mask0(<2 x double> %vec1, < define <2 x double> @test_2xdouble_zero_masked_unpack_low_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x i64> %mask) { ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_low_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12784,7 +12784,7 @@ define <2 x double> @test_2xdouble_zero_masked_unpack_low_mask0(<2 x double> %ve define <2 x double> @test_2xdouble_masked_unpack_low_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x i64> %mask) { ; GENERIC-LABEL: test_2xdouble_masked_unpack_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %xmm3, %xmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %xmm3, %xmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -12804,7 +12804,7 @@ define <2 x double> @test_2xdouble_masked_unpack_low_mask1(<2 x double> %vec1, < define <2 x double> @test_2xdouble_zero_masked_unpack_low_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x i64> %mask) { ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12835,7 +12835,7 @@ define <2 x double> @test_2xdouble_unpack_low_mem_mask0(<2 x double> %vec1, <2 x define <2 x double> @test_2xdouble_masked_unpack_low_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %vec3, <2 x i64> %mask) { ; GENERIC-LABEL: test_2xdouble_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -12856,7 +12856,7 @@ define <2 x double> @test_2xdouble_masked_unpack_low_mem_mask0(<2 x double> %vec define <2 x double> @test_2xdouble_zero_masked_unpack_low_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p, <2 x i64> %mask) { ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12875,7 +12875,7 @@ define <2 x double> @test_2xdouble_zero_masked_unpack_low_mem_mask0(<2 x double> define <2 x double> @test_2xdouble_masked_unpack_low_mem_mask1(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %vec3, <2 x i64> %mask) { ; GENERIC-LABEL: test_2xdouble_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -12896,7 +12896,7 @@ define <2 x double> @test_2xdouble_masked_unpack_low_mem_mask1(<2 x double> %vec define <2 x double> @test_2xdouble_zero_masked_unpack_low_mem_mask1(<2 x double> %vec1, <2 x double>* %vec2p, <2 x i64> %mask) { ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12928,7 +12928,7 @@ define <4 x double> @test_4xdouble_unpack_low_mask0(<4 x double> %vec1, <4 x dou define <4 x double> @test_4xdouble_masked_unpack_low_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -12948,7 +12948,7 @@ define <4 x double> @test_4xdouble_masked_unpack_low_mask0(<4 x double> %vec1, < define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -12965,7 +12965,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask0(<4 x double> %ve define <4 x double> @test_4xdouble_masked_unpack_low_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -12985,7 +12985,7 @@ define <4 x double> @test_4xdouble_masked_unpack_low_mask1(<4 x double> %vec1, < define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13002,7 +13002,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask1(<4 x double> %ve define <4 x double> @test_4xdouble_masked_unpack_low_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -13022,7 +13022,7 @@ define <4 x double> @test_4xdouble_masked_unpack_low_mask2(<4 x double> %vec1, < define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13052,7 +13052,7 @@ define <4 x double> @test_4xdouble_unpack_low_mask3(<4 x double> %vec1, <4 x dou define <4 x double> @test_4xdouble_masked_unpack_low_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -13072,7 +13072,7 @@ define <4 x double> @test_4xdouble_masked_unpack_low_mask3(<4 x double> %vec1, < define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13103,7 +13103,7 @@ define <4 x double> @test_4xdouble_unpack_low_mem_mask0(<4 x double> %vec1, <4 x define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -13124,7 +13124,7 @@ define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask0(<4 x double> %vec define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13143,7 +13143,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask0(<4 x double> define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -13164,7 +13164,7 @@ define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask1(<4 x double> %vec define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13183,7 +13183,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask1(<4 x double> define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -13204,7 +13204,7 @@ define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask2(<4 x double> %vec define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13237,7 +13237,7 @@ define <4 x double> @test_4xdouble_unpack_low_mem_mask3(<4 x double> %vec1, <4 x define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -13258,7 +13258,7 @@ define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask3(<4 x double> %vec define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13290,7 +13290,7 @@ define <8 x double> @test_8xdouble_unpack_low_mask0(<8 x double> %vec1, <8 x dou define <8 x double> @test_8xdouble_masked_unpack_low_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -13310,7 +13310,7 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mask0(<8 x double> %vec1, < define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13327,7 +13327,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask0(<8 x double> %ve define <8 x double> @test_8xdouble_masked_unpack_low_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -13347,7 +13347,7 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mask1(<8 x double> %vec1, < define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13364,7 +13364,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask1(<8 x double> %ve define <8 x double> @test_8xdouble_masked_unpack_low_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -13384,7 +13384,7 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mask2(<8 x double> %vec1, < define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13414,7 +13414,7 @@ define <8 x double> @test_8xdouble_unpack_low_mask3(<8 x double> %vec1, <8 x dou define <8 x double> @test_8xdouble_masked_unpack_low_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -13434,7 +13434,7 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mask3(<8 x double> %vec1, < define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13465,7 +13465,7 @@ define <8 x double> @test_8xdouble_unpack_low_mem_mask0(<8 x double> %vec1, <8 x define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -13486,7 +13486,7 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask0(<8 x double> %vec define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13505,7 +13505,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask0(<8 x double> define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -13526,7 +13526,7 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask1(<8 x double> %vec define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13545,7 +13545,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask1(<8 x double> define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -13566,7 +13566,7 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask2(<8 x double> %vec define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13599,7 +13599,7 @@ define <8 x double> @test_8xdouble_unpack_low_mem_mask3(<8 x double> %vec1, <8 x define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -13620,7 +13620,7 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask3(<8 x double> %vec define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13652,7 +13652,7 @@ define <4 x float> @test_4xfloat_unpack_high_mask0(<4 x float> %vec1, <4 x float define <4 x float> @test_4xfloat_masked_unpack_high_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -13672,7 +13672,7 @@ define <4 x float> @test_4xfloat_masked_unpack_high_mask0(<4 x float> %vec1, <4 define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13689,7 +13689,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask0(<4 x float> %vec1 define <4 x float> @test_4xfloat_masked_unpack_high_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -13709,7 +13709,7 @@ define <4 x float> @test_4xfloat_masked_unpack_high_mask1(<4 x float> %vec1, <4 define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13726,7 +13726,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask1(<4 x float> %vec1 define <4 x float> @test_4xfloat_masked_unpack_high_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -13746,7 +13746,7 @@ define <4 x float> @test_4xfloat_masked_unpack_high_mask2(<4 x float> %vec1, <4 define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13776,7 +13776,7 @@ define <4 x float> @test_4xfloat_unpack_high_mask3(<4 x float> %vec1, <4 x float define <4 x float> @test_4xfloat_masked_unpack_high_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -13796,7 +13796,7 @@ define <4 x float> @test_4xfloat_masked_unpack_high_mask3(<4 x float> %vec1, <4 define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13827,7 +13827,7 @@ define <4 x float> @test_4xfloat_unpack_high_mem_mask0(<4 x float> %vec1, <4 x f define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -13848,7 +13848,7 @@ define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask0(<4 x float> %vec1, define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13867,7 +13867,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask0(<4 x float> % define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask1(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -13888,7 +13888,7 @@ define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask1(<4 x float> %vec1, define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask1(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13907,7 +13907,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask1(<4 x float> % define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask2(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -13928,7 +13928,7 @@ define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask2(<4 x float> %vec1, define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask2(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13961,7 +13961,7 @@ define <4 x float> @test_4xfloat_unpack_high_mem_mask3(<4 x float> %vec1, <4 x f define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -13982,7 +13982,7 @@ define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask3(<4 x float> %vec1, define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14014,7 +14014,7 @@ define <8 x float> @test_8xfloat_unpack_high_mask0(<8 x float> %vec1, <8 x float define <8 x float> @test_8xfloat_masked_unpack_high_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -14034,7 +14034,7 @@ define <8 x float> @test_8xfloat_masked_unpack_high_mask0(<8 x float> %vec1, <8 define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14051,7 +14051,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask0(<8 x float> %vec1 define <8 x float> @test_8xfloat_masked_unpack_high_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -14071,7 +14071,7 @@ define <8 x float> @test_8xfloat_masked_unpack_high_mask1(<8 x float> %vec1, <8 define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14088,7 +14088,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask1(<8 x float> %vec1 define <8 x float> @test_8xfloat_masked_unpack_high_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -14108,7 +14108,7 @@ define <8 x float> @test_8xfloat_masked_unpack_high_mask2(<8 x float> %vec1, <8 define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14138,7 +14138,7 @@ define <8 x float> @test_8xfloat_unpack_high_mask3(<8 x float> %vec1, <8 x float define <8 x float> @test_8xfloat_masked_unpack_high_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -14158,7 +14158,7 @@ define <8 x float> @test_8xfloat_masked_unpack_high_mask3(<8 x float> %vec1, <8 define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14189,7 +14189,7 @@ define <8 x float> @test_8xfloat_unpack_high_mem_mask0(<8 x float> %vec1, <8 x f define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -14210,7 +14210,7 @@ define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask0(<8 x float> %vec1, define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14229,7 +14229,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask0(<8 x float> % define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -14250,7 +14250,7 @@ define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask1(<8 x float> %vec1, define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14269,7 +14269,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask1(<8 x float> % define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -14290,7 +14290,7 @@ define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask2(<8 x float> %vec1, define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14323,7 +14323,7 @@ define <8 x float> @test_8xfloat_unpack_high_mem_mask3(<8 x float> %vec1, <8 x f define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -14344,7 +14344,7 @@ define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask3(<8 x float> %vec1, define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14376,7 +14376,7 @@ define <16 x float> @test_16xfloat_unpack_high_mask0(<16 x float> %vec1, <16 x f define <16 x float> @test_16xfloat_masked_unpack_high_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -14396,7 +14396,7 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mask0(<16 x float> %vec1, define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14413,7 +14413,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask0(<16 x float> %v define <16 x float> @test_16xfloat_masked_unpack_high_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -14433,7 +14433,7 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mask1(<16 x float> %vec1, define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14450,7 +14450,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask1(<16 x float> %v define <16 x float> @test_16xfloat_masked_unpack_high_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -14470,7 +14470,7 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mask2(<16 x float> %vec1, define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14500,7 +14500,7 @@ define <16 x float> @test_16xfloat_unpack_high_mask3(<16 x float> %vec1, <16 x f define <16 x float> @test_16xfloat_masked_unpack_high_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -14520,7 +14520,7 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mask3(<16 x float> %vec1, define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14551,7 +14551,7 @@ define <16 x float> @test_16xfloat_unpack_high_mem_mask0(<16 x float> %vec1, <16 define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -14572,7 +14572,7 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask0(<16 x float> %ve define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14591,7 +14591,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask0(<16 x float define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -14612,7 +14612,7 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask1(<16 x float> %ve define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14631,7 +14631,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask1(<16 x float define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -14652,7 +14652,7 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask2(<16 x float> %ve define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14685,7 +14685,7 @@ define <16 x float> @test_16xfloat_unpack_high_mem_mask3(<16 x float> %vec1, <16 define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [6:1.00] ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -14706,7 +14706,7 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask3(<16 x float> %ve define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14738,7 +14738,7 @@ define <2 x double> @test_2xdouble_unpack_high_mask0(<2 x double> %vec1, <2 x do define <2 x double> @test_2xdouble_masked_unpack_high_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x i64> %mask) { ; GENERIC-LABEL: test_2xdouble_masked_unpack_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %xmm3, %xmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %xmm3, %xmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -14758,7 +14758,7 @@ define <2 x double> @test_2xdouble_masked_unpack_high_mask0(<2 x double> %vec1, define <2 x double> @test_2xdouble_zero_masked_unpack_high_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x i64> %mask) { ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14775,7 +14775,7 @@ define <2 x double> @test_2xdouble_zero_masked_unpack_high_mask0(<2 x double> %v define <2 x double> @test_2xdouble_masked_unpack_high_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x i64> %mask) { ; GENERIC-LABEL: test_2xdouble_masked_unpack_high_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %xmm3, %xmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %xmm3, %xmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -14795,7 +14795,7 @@ define <2 x double> @test_2xdouble_masked_unpack_high_mask1(<2 x double> %vec1, define <2 x double> @test_2xdouble_zero_masked_unpack_high_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x i64> %mask) { ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_high_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14826,7 +14826,7 @@ define <2 x double> @test_2xdouble_unpack_high_mem_mask0(<2 x double> %vec1, <2 define <2 x double> @test_2xdouble_masked_unpack_high_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %vec3, <2 x i64> %mask) { ; GENERIC-LABEL: test_2xdouble_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -14847,7 +14847,7 @@ define <2 x double> @test_2xdouble_masked_unpack_high_mem_mask0(<2 x double> %ve define <2 x double> @test_2xdouble_zero_masked_unpack_high_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p, <2 x i64> %mask) { ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14866,7 +14866,7 @@ define <2 x double> @test_2xdouble_zero_masked_unpack_high_mem_mask0(<2 x double define <2 x double> @test_2xdouble_masked_unpack_high_mem_mask1(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %vec3, <2 x i64> %mask) { ; GENERIC-LABEL: test_2xdouble_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -14887,7 +14887,7 @@ define <2 x double> @test_2xdouble_masked_unpack_high_mem_mask1(<2 x double> %ve define <2 x double> @test_2xdouble_zero_masked_unpack_high_mem_mask1(<2 x double> %vec1, <2 x double>* %vec2p, <2 x i64> %mask) { ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14919,7 +14919,7 @@ define <4 x double> @test_4xdouble_unpack_high_mask0(<4 x double> %vec1, <4 x do define <4 x double> @test_4xdouble_masked_unpack_high_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -14939,7 +14939,7 @@ define <4 x double> @test_4xdouble_masked_unpack_high_mask0(<4 x double> %vec1, define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14956,7 +14956,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask0(<4 x double> %v define <4 x double> @test_4xdouble_masked_unpack_high_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -14976,7 +14976,7 @@ define <4 x double> @test_4xdouble_masked_unpack_high_mask1(<4 x double> %vec1, define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -14993,7 +14993,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask1(<4 x double> %v define <4 x double> @test_4xdouble_masked_unpack_high_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -15013,7 +15013,7 @@ define <4 x double> @test_4xdouble_masked_unpack_high_mask2(<4 x double> %vec1, define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -15043,7 +15043,7 @@ define <4 x double> @test_4xdouble_unpack_high_mask3(<4 x double> %vec1, <4 x do define <4 x double> @test_4xdouble_masked_unpack_high_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -15063,7 +15063,7 @@ define <4 x double> @test_4xdouble_masked_unpack_high_mask3(<4 x double> %vec1, define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -15094,7 +15094,7 @@ define <4 x double> @test_4xdouble_unpack_high_mem_mask0(<4 x double> %vec1, <4 define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -15115,7 +15115,7 @@ define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask0(<4 x double> %ve define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -15134,7 +15134,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask0(<4 x double define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -15155,7 +15155,7 @@ define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask1(<4 x double> %ve define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -15174,7 +15174,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask1(<4 x double define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -15195,7 +15195,7 @@ define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask2(<4 x double> %ve define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -15228,7 +15228,7 @@ define <4 x double> @test_4xdouble_unpack_high_mem_mask3(<4 x double> %vec1, <4 define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -15249,7 +15249,7 @@ define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask3(<4 x double> %ve define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -15281,7 +15281,7 @@ define <8 x double> @test_8xdouble_unpack_high_mask0(<8 x double> %vec1, <8 x do define <8 x double> @test_8xdouble_masked_unpack_high_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -15301,7 +15301,7 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mask0(<8 x double> %vec1, define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -15318,7 +15318,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask0(<8 x double> %v define <8 x double> @test_8xdouble_masked_unpack_high_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -15338,7 +15338,7 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mask1(<8 x double> %vec1, define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -15355,7 +15355,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask1(<8 x double> %v define <8 x double> @test_8xdouble_masked_unpack_high_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -15375,7 +15375,7 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mask2(<8 x double> %vec1, define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -15405,7 +15405,7 @@ define <8 x double> @test_8xdouble_unpack_high_mask3(<8 x double> %vec1, <8 x do define <8 x double> @test_8xdouble_masked_unpack_high_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] ; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -15425,7 +15425,7 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mask3(<8 x double> %vec1, define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -15456,7 +15456,7 @@ define <8 x double> @test_8xdouble_unpack_high_mem_mask0(<8 x double> %vec1, <8 define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -15477,7 +15477,7 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask0(<8 x double> %ve define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -15496,7 +15496,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask0(<8 x double define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -15517,7 +15517,7 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask1(<8 x double> %ve define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -15536,7 +15536,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask1(<8 x double define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -15557,7 +15557,7 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask2(<8 x double> %ve define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -15590,7 +15590,7 @@ define <8 x double> @test_8xdouble_unpack_high_mem_mask3(<8 x double> %vec1, <8 define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [6:1.00] ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -15611,7 +15611,7 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask3(<8 x double> %ve define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [6:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; |

