diff options
Diffstat (limited to 'llvm/test/CodeGen/X86/vector-trunc-math.ll')
-rw-r--r-- | llvm/test/CodeGen/X86/vector-trunc-math.ll | 228 |
1 files changed, 120 insertions, 108 deletions
diff --git a/llvm/test/CodeGen/X86/vector-trunc-math.ll b/llvm/test/CodeGen/X86/vector-trunc-math.ll index d9f186e64f1..e552f5f4036 100644 --- a/llvm/test/CodeGen/X86/vector-trunc-math.ll +++ b/llvm/test/CodeGen/X86/vector-trunc-math.ll @@ -233,7 +233,8 @@ define <16 x i8> @trunc_add_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwin ; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm7 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3 ; AVX1-NEXT: vpaddq %xmm7, %xmm3, %xmm3 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm7 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] +; AVX1-NEXT: vmovddup {{.*#+}} xmm7 = [1.2598673968951787E-321,1.2598673968951787E-321] +; AVX1-NEXT: # xmm7 = mem[0,0] ; AVX1-NEXT: vpand %xmm7, %xmm3, %xmm3 ; AVX1-NEXT: vpand %xmm7, %xmm6, %xmm6 ; AVX1-NEXT: vpackusdw %xmm3, %xmm6, %xmm3 @@ -347,7 +348,7 @@ define <16 x i8> @trunc_add_v16i32_v16i8(<16 x i32> %a0, <16 x i32> %a1) nounwin ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 ; AVX1-NEXT: vpaddd %xmm3, %xmm1, %xmm1 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] +; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [3.57331108E-43,3.57331108E-43,3.57331108E-43,3.57331108E-43] ; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1 ; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vpackusdw %xmm1, %xmm2, %xmm1 @@ -680,22 +681,23 @@ define <16 x i8> @trunc_add_const_v16i64_v16i8(<16 x i64> %a0) nounwind { ; AVX1-LABEL: trunc_add_const_v16i64_v16i8: ; AVX1: # %bb.0: ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] -; AVX1-NEXT: vpand %xmm5, %xmm4, %xmm4 -; AVX1-NEXT: vpand %xmm5, %xmm3, %xmm3 +; AVX1-NEXT: vmovddup {{.*#+}} xmm5 = [1.2598673968951787E-321,1.2598673968951787E-321] +; AVX1-NEXT: # xmm5 = mem[0,0] +; AVX1-NEXT: vandpd %xmm5, %xmm4, %xmm4 +; AVX1-NEXT: vandpd %xmm5, %xmm3, %xmm3 ; AVX1-NEXT: vpackusdw %xmm4, %xmm3, %xmm3 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 -; AVX1-NEXT: vpand %xmm5, %xmm4, %xmm4 -; AVX1-NEXT: vpand %xmm5, %xmm2, %xmm2 +; AVX1-NEXT: vandpd %xmm5, %xmm4, %xmm4 +; AVX1-NEXT: vandpd %xmm5, %xmm2, %xmm2 ; AVX1-NEXT: vpackusdw %xmm4, %xmm2, %xmm2 ; AVX1-NEXT: vpackusdw %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 -; AVX1-NEXT: vpand %xmm5, %xmm3, %xmm3 -; AVX1-NEXT: vpand %xmm5, %xmm1, %xmm1 +; AVX1-NEXT: vandpd %xmm5, %xmm3, %xmm3 +; AVX1-NEXT: vandpd %xmm5, %xmm1, %xmm1 ; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 -; AVX1-NEXT: vpand %xmm5, %xmm3, %xmm3 -; AVX1-NEXT: vpand %xmm5, %xmm0, %xmm0 +; AVX1-NEXT: vandpd %xmm5, %xmm3, %xmm3 +; AVX1-NEXT: vandpd %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpackusdw %xmm3, %xmm0, %xmm0 ; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 @@ -781,13 +783,13 @@ define <16 x i8> @trunc_add_const_v16i32_v16i8(<16 x i32> %a0) nounwind { ; AVX1-LABEL: trunc_add_const_v16i32_v16i8: ; AVX1: # %bb.0: ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] -; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [3.57331108E-43,3.57331108E-43,3.57331108E-43,3.57331108E-43] +; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vandps %xmm3, %xmm1, %xmm1 ; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vandps %xmm3, %xmm0, %xmm0 ; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0 @@ -1106,7 +1108,8 @@ define <16 x i8> @trunc_sub_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwin ; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm7 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3 ; AVX1-NEXT: vpsubq %xmm7, %xmm3, %xmm3 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm7 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] +; AVX1-NEXT: vmovddup {{.*#+}} xmm7 = [1.2598673968951787E-321,1.2598673968951787E-321] +; AVX1-NEXT: # xmm7 = mem[0,0] ; AVX1-NEXT: vpand %xmm7, %xmm3, %xmm3 ; AVX1-NEXT: vpand %xmm7, %xmm6, %xmm6 ; AVX1-NEXT: vpackusdw %xmm3, %xmm6, %xmm3 @@ -1220,7 +1223,7 @@ define <16 x i8> @trunc_sub_v16i32_v16i8(<16 x i32> %a0, <16 x i32> %a1) nounwin ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 ; AVX1-NEXT: vpsubd %xmm3, %xmm1, %xmm1 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] +; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [3.57331108E-43,3.57331108E-43,3.57331108E-43,3.57331108E-43] ; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1 ; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vpackusdw %xmm1, %xmm2, %xmm1 @@ -1575,7 +1578,8 @@ define <16 x i8> @trunc_sub_const_v16i64_v16i8(<16 x i64> %a0) nounwind { ; AVX1-NEXT: vpsubq {{.*}}(%rip), %xmm3, %xmm7 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3 ; AVX1-NEXT: vpsubq {{.*}}(%rip), %xmm3, %xmm3 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] +; AVX1-NEXT: vmovddup {{.*#+}} xmm4 = [1.2598673968951787E-321,1.2598673968951787E-321] +; AVX1-NEXT: # xmm4 = mem[0,0] ; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3 ; AVX1-NEXT: vpand %xmm4, %xmm7, %xmm7 ; AVX1-NEXT: vpackusdw %xmm3, %xmm7, %xmm3 @@ -1687,7 +1691,7 @@ define <16 x i8> @trunc_sub_const_v16i32_v16i8(<16 x i32> %a0) nounwind { ; AVX1-NEXT: vpsubd {{.*}}(%rip), %xmm1, %xmm3 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 ; AVX1-NEXT: vpsubd {{.*}}(%rip), %xmm1, %xmm1 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] +; AVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [3.57331108E-43,3.57331108E-43,3.57331108E-43,3.57331108E-43] ; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1 ; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3 ; AVX1-NEXT: vpackusdw %xmm1, %xmm3, %xmm1 @@ -2275,7 +2279,8 @@ define <16 x i8> @trunc_mul_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwin ; AVX1-NEXT: vpsllq $32, %xmm6, %xmm6 ; AVX1-NEXT: vpmuludq %xmm4, %xmm3, %xmm3 ; AVX1-NEXT: vpaddq %xmm6, %xmm3, %xmm3 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] +; AVX1-NEXT: vmovddup {{.*#+}} xmm4 = [1.2598673968951787E-321,1.2598673968951787E-321] +; AVX1-NEXT: # xmm4 = mem[0,0] ; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3 ; AVX1-NEXT: vpand %xmm4, %xmm2, %xmm2 ; AVX1-NEXT: vpackusdw %xmm3, %xmm2, %xmm2 @@ -2451,7 +2456,7 @@ define <16 x i8> @trunc_mul_v16i32_v16i8(<16 x i32> %a0, <16 x i32> %a1) nounwin ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 ; AVX1-NEXT: vpmulld %xmm3, %xmm1, %xmm1 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] +; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [3.57331108E-43,3.57331108E-43,3.57331108E-43,3.57331108E-43] ; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1 ; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vpackusdw %xmm1, %xmm2, %xmm1 @@ -2909,7 +2914,8 @@ define <16 x i8> @trunc_mul_const_v16i64_v16i8(<16 x i64> %a0) nounwind { ; AVX1-NEXT: vpmuludq %xmm6, %xmm3, %xmm3 ; AVX1-NEXT: vpsllq $32, %xmm3, %xmm3 ; AVX1-NEXT: vpaddq %xmm3, %xmm7, %xmm3 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] +; AVX1-NEXT: vmovddup {{.*#+}} xmm6 = [1.2598673968951787E-321,1.2598673968951787E-321] +; AVX1-NEXT: # xmm6 = mem[0,0] ; AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3 ; AVX1-NEXT: vpand %xmm6, %xmm0, %xmm0 ; AVX1-NEXT: vpackusdw %xmm3, %xmm0, %xmm0 @@ -3049,7 +3055,7 @@ define <16 x i8> @trunc_mul_const_v16i32_v16i8(<16 x i32> %a0) nounwind { ; AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm3 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 ; AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] +; AVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [3.57331108E-43,3.57331108E-43,3.57331108E-43,3.57331108E-43] ; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1 ; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3 ; AVX1-NEXT: vpackusdw %xmm1, %xmm3, %xmm1 @@ -3351,27 +3357,28 @@ define <16 x i8> @trunc_and_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwin ; ; AVX1-LABEL: trunc_and_v16i64_v16i8: ; AVX1: # %bb.0: -; AVX1-NEXT: vandps %ymm4, %ymm0, %ymm0 -; AVX1-NEXT: vandps %ymm5, %ymm1, %ymm1 -; AVX1-NEXT: vandps %ymm6, %ymm2, %ymm2 -; AVX1-NEXT: vandps %ymm7, %ymm3, %ymm3 +; AVX1-NEXT: vandpd %ymm4, %ymm0, %ymm0 +; AVX1-NEXT: vandpd %ymm5, %ymm1, %ymm1 +; AVX1-NEXT: vandpd %ymm6, %ymm2, %ymm2 +; AVX1-NEXT: vandpd %ymm7, %ymm3, %ymm3 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 -; AVX1-NEXT: vmovaps {{.*#+}} xmm5 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] -; AVX1-NEXT: vandps %xmm5, %xmm4, %xmm4 -; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3 +; AVX1-NEXT: vmovddup {{.*#+}} xmm5 = [1.2598673968951787E-321,1.2598673968951787E-321] +; AVX1-NEXT: # xmm5 = mem[0,0] +; AVX1-NEXT: vandpd %xmm5, %xmm4, %xmm4 +; AVX1-NEXT: vandpd %xmm5, %xmm3, %xmm3 ; AVX1-NEXT: vpackusdw %xmm4, %xmm3, %xmm3 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 -; AVX1-NEXT: vandps %xmm5, %xmm4, %xmm4 -; AVX1-NEXT: vandps %xmm5, %xmm2, %xmm2 +; AVX1-NEXT: vandpd %xmm5, %xmm4, %xmm4 +; AVX1-NEXT: vandpd %xmm5, %xmm2, %xmm2 ; AVX1-NEXT: vpackusdw %xmm4, %xmm2, %xmm2 ; AVX1-NEXT: vpackusdw %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 -; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3 -; AVX1-NEXT: vandps %xmm5, %xmm1, %xmm1 +; AVX1-NEXT: vandpd %xmm5, %xmm3, %xmm3 +; AVX1-NEXT: vandpd %xmm5, %xmm1, %xmm1 ; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 -; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3 -; AVX1-NEXT: vandps %xmm5, %xmm0, %xmm0 +; AVX1-NEXT: vandpd %xmm5, %xmm3, %xmm3 +; AVX1-NEXT: vandpd %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpackusdw %xmm3, %xmm0, %xmm0 ; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 @@ -3468,7 +3475,7 @@ define <16 x i8> @trunc_and_v16i32_v16i8(<16 x i32> %a0, <16 x i32> %a1) nounwin ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 ; AVX1-NEXT: vandps %ymm3, %ymm1, %ymm1 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] +; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [3.57331108E-43,3.57331108E-43,3.57331108E-43,3.57331108E-43] ; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vandps %xmm3, %xmm1, %xmm1 ; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 @@ -3751,22 +3758,23 @@ define <16 x i8> @trunc_and_const_v16i64_v16i8(<16 x i64> %a0) nounwind { ; AVX1-LABEL: trunc_and_const_v16i64_v16i8: ; AVX1: # %bb.0: ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] -; AVX1-NEXT: vpand %xmm5, %xmm4, %xmm4 -; AVX1-NEXT: vpand %xmm5, %xmm3, %xmm3 +; AVX1-NEXT: vmovddup {{.*#+}} xmm5 = [1.2598673968951787E-321,1.2598673968951787E-321] +; AVX1-NEXT: # xmm5 = mem[0,0] +; AVX1-NEXT: vandpd %xmm5, %xmm4, %xmm4 +; AVX1-NEXT: vandpd %xmm5, %xmm3, %xmm3 ; AVX1-NEXT: vpackusdw %xmm4, %xmm3, %xmm3 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 -; AVX1-NEXT: vpand %xmm5, %xmm4, %xmm4 -; AVX1-NEXT: vpand %xmm5, %xmm2, %xmm2 +; AVX1-NEXT: vandpd %xmm5, %xmm4, %xmm4 +; AVX1-NEXT: vandpd %xmm5, %xmm2, %xmm2 ; AVX1-NEXT: vpackusdw %xmm4, %xmm2, %xmm2 ; AVX1-NEXT: vpackusdw %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 -; AVX1-NEXT: vpand %xmm5, %xmm3, %xmm3 -; AVX1-NEXT: vpand %xmm5, %xmm1, %xmm1 +; AVX1-NEXT: vandpd %xmm5, %xmm3, %xmm3 +; AVX1-NEXT: vandpd %xmm5, %xmm1, %xmm1 ; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 -; AVX1-NEXT: vpand %xmm5, %xmm3, %xmm3 -; AVX1-NEXT: vpand %xmm5, %xmm0, %xmm0 +; AVX1-NEXT: vandpd %xmm5, %xmm3, %xmm3 +; AVX1-NEXT: vandpd %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpackusdw %xmm3, %xmm0, %xmm0 ; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 @@ -3852,13 +3860,13 @@ define <16 x i8> @trunc_and_const_v16i32_v16i8(<16 x i32> %a0) nounwind { ; AVX1-LABEL: trunc_and_const_v16i32_v16i8: ; AVX1: # %bb.0: ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] -; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [3.57331108E-43,3.57331108E-43,3.57331108E-43,3.57331108E-43] +; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vandps %xmm3, %xmm1, %xmm1 ; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vandps %xmm3, %xmm0, %xmm0 ; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 @@ -4153,27 +4161,28 @@ define <16 x i8> @trunc_xor_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwin ; ; AVX1-LABEL: trunc_xor_v16i64_v16i8: ; AVX1: # %bb.0: -; AVX1-NEXT: vxorps %ymm4, %ymm0, %ymm0 -; AVX1-NEXT: vxorps %ymm5, %ymm1, %ymm1 -; AVX1-NEXT: vxorps %ymm6, %ymm2, %ymm2 -; AVX1-NEXT: vxorps %ymm7, %ymm3, %ymm3 +; AVX1-NEXT: vxorpd %ymm4, %ymm0, %ymm0 +; AVX1-NEXT: vxorpd %ymm5, %ymm1, %ymm1 +; AVX1-NEXT: vxorpd %ymm6, %ymm2, %ymm2 +; AVX1-NEXT: vxorpd %ymm7, %ymm3, %ymm3 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 -; AVX1-NEXT: vmovaps {{.*#+}} xmm5 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] -; AVX1-NEXT: vandps %xmm5, %xmm4, %xmm4 -; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3 +; AVX1-NEXT: vmovddup {{.*#+}} xmm5 = [1.2598673968951787E-321,1.2598673968951787E-321] +; AVX1-NEXT: # xmm5 = mem[0,0] +; AVX1-NEXT: vandpd %xmm5, %xmm4, %xmm4 +; AVX1-NEXT: vandpd %xmm5, %xmm3, %xmm3 ; AVX1-NEXT: vpackusdw %xmm4, %xmm3, %xmm3 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 -; AVX1-NEXT: vandps %xmm5, %xmm4, %xmm4 -; AVX1-NEXT: vandps %xmm5, %xmm2, %xmm2 +; AVX1-NEXT: vandpd %xmm5, %xmm4, %xmm4 +; AVX1-NEXT: vandpd %xmm5, %xmm2, %xmm2 ; AVX1-NEXT: vpackusdw %xmm4, %xmm2, %xmm2 ; AVX1-NEXT: vpackusdw %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 -; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3 -; AVX1-NEXT: vandps %xmm5, %xmm1, %xmm1 +; AVX1-NEXT: vandpd %xmm5, %xmm3, %xmm3 +; AVX1-NEXT: vandpd %xmm5, %xmm1, %xmm1 ; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 -; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3 -; AVX1-NEXT: vandps %xmm5, %xmm0, %xmm0 +; AVX1-NEXT: vandpd %xmm5, %xmm3, %xmm3 +; AVX1-NEXT: vandpd %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpackusdw %xmm3, %xmm0, %xmm0 ; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 @@ -4270,7 +4279,7 @@ define <16 x i8> @trunc_xor_v16i32_v16i8(<16 x i32> %a0, <16 x i32> %a1) nounwin ; AVX1-NEXT: vxorps %ymm2, %ymm0, %ymm0 ; AVX1-NEXT: vxorps %ymm3, %ymm1, %ymm1 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] +; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [3.57331108E-43,3.57331108E-43,3.57331108E-43,3.57331108E-43] ; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vandps %xmm3, %xmm1, %xmm1 ; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 @@ -4553,22 +4562,23 @@ define <16 x i8> @trunc_xor_const_v16i64_v16i8(<16 x i64> %a0) nounwind { ; AVX1-LABEL: trunc_xor_const_v16i64_v16i8: ; AVX1: # %bb.0: ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] -; AVX1-NEXT: vpand %xmm5, %xmm4, %xmm4 -; AVX1-NEXT: vpand %xmm5, %xmm3, %xmm3 +; AVX1-NEXT: vmovddup {{.*#+}} xmm5 = [1.2598673968951787E-321,1.2598673968951787E-321] +; AVX1-NEXT: # xmm5 = mem[0,0] +; AVX1-NEXT: vandpd %xmm5, %xmm4, %xmm4 +; AVX1-NEXT: vandpd %xmm5, %xmm3, %xmm3 ; AVX1-NEXT: vpackusdw %xmm4, %xmm3, %xmm3 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 -; AVX1-NEXT: vpand %xmm5, %xmm4, %xmm4 -; AVX1-NEXT: vpand %xmm5, %xmm2, %xmm2 +; AVX1-NEXT: vandpd %xmm5, %xmm4, %xmm4 +; AVX1-NEXT: vandpd %xmm5, %xmm2, %xmm2 ; AVX1-NEXT: vpackusdw %xmm4, %xmm2, %xmm2 ; AVX1-NEXT: vpackusdw %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 -; AVX1-NEXT: vpand %xmm5, %xmm3, %xmm3 -; AVX1-NEXT: vpand %xmm5, %xmm1, %xmm1 +; AVX1-NEXT: vandpd %xmm5, %xmm3, %xmm3 +; AVX1-NEXT: vandpd %xmm5, %xmm1, %xmm1 ; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 -; AVX1-NEXT: vpand %xmm5, %xmm3, %xmm3 -; AVX1-NEXT: vpand %xmm5, %xmm0, %xmm0 +; AVX1-NEXT: vandpd %xmm5, %xmm3, %xmm3 +; AVX1-NEXT: vandpd %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpackusdw %xmm3, %xmm0, %xmm0 ; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 @@ -4654,13 +4664,13 @@ define <16 x i8> @trunc_xor_const_v16i32_v16i8(<16 x i32> %a0) nounwind { ; AVX1-LABEL: trunc_xor_const_v16i32_v16i8: ; AVX1: # %bb.0: ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] -; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [3.57331108E-43,3.57331108E-43,3.57331108E-43,3.57331108E-43] +; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vandps %xmm3, %xmm1, %xmm1 ; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vandps %xmm3, %xmm0, %xmm0 ; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 @@ -4955,27 +4965,28 @@ define <16 x i8> @trunc_or_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwind ; ; AVX1-LABEL: trunc_or_v16i64_v16i8: ; AVX1: # %bb.0: -; AVX1-NEXT: vorps %ymm4, %ymm0, %ymm0 -; AVX1-NEXT: vorps %ymm5, %ymm1, %ymm1 -; AVX1-NEXT: vorps %ymm6, %ymm2, %ymm2 -; AVX1-NEXT: vorps %ymm7, %ymm3, %ymm3 +; AVX1-NEXT: vorpd %ymm4, %ymm0, %ymm0 +; AVX1-NEXT: vorpd %ymm5, %ymm1, %ymm1 +; AVX1-NEXT: vorpd %ymm6, %ymm2, %ymm2 +; AVX1-NEXT: vorpd %ymm7, %ymm3, %ymm3 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 -; AVX1-NEXT: vmovaps {{.*#+}} xmm5 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] -; AVX1-NEXT: vandps %xmm5, %xmm4, %xmm4 -; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3 +; AVX1-NEXT: vmovddup {{.*#+}} xmm5 = [1.2598673968951787E-321,1.2598673968951787E-321] +; AVX1-NEXT: # xmm5 = mem[0,0] +; AVX1-NEXT: vandpd %xmm5, %xmm4, %xmm4 +; AVX1-NEXT: vandpd %xmm5, %xmm3, %xmm3 ; AVX1-NEXT: vpackusdw %xmm4, %xmm3, %xmm3 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 -; AVX1-NEXT: vandps %xmm5, %xmm4, %xmm4 -; AVX1-NEXT: vandps %xmm5, %xmm2, %xmm2 +; AVX1-NEXT: vandpd %xmm5, %xmm4, %xmm4 +; AVX1-NEXT: vandpd %xmm5, %xmm2, %xmm2 ; AVX1-NEXT: vpackusdw %xmm4, %xmm2, %xmm2 ; AVX1-NEXT: vpackusdw %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 -; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3 -; AVX1-NEXT: vandps %xmm5, %xmm1, %xmm1 +; AVX1-NEXT: vandpd %xmm5, %xmm3, %xmm3 +; AVX1-NEXT: vandpd %xmm5, %xmm1, %xmm1 ; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 -; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3 -; AVX1-NEXT: vandps %xmm5, %xmm0, %xmm0 +; AVX1-NEXT: vandpd %xmm5, %xmm3, %xmm3 +; AVX1-NEXT: vandpd %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpackusdw %xmm3, %xmm0, %xmm0 ; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 @@ -5072,7 +5083,7 @@ define <16 x i8> @trunc_or_v16i32_v16i8(<16 x i32> %a0, <16 x i32> %a1) nounwind ; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0 ; AVX1-NEXT: vorps %ymm3, %ymm1, %ymm1 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] +; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [3.57331108E-43,3.57331108E-43,3.57331108E-43,3.57331108E-43] ; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vandps %xmm3, %xmm1, %xmm1 ; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 @@ -5355,22 +5366,23 @@ define <16 x i8> @trunc_or_const_v16i64_v16i8(<16 x i64> %a0) nounwind { ; AVX1-LABEL: trunc_or_const_v16i64_v16i8: ; AVX1: # %bb.0: ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] -; AVX1-NEXT: vpand %xmm5, %xmm4, %xmm4 -; AVX1-NEXT: vpand %xmm5, %xmm3, %xmm3 +; AVX1-NEXT: vmovddup {{.*#+}} xmm5 = [1.2598673968951787E-321,1.2598673968951787E-321] +; AVX1-NEXT: # xmm5 = mem[0,0] +; AVX1-NEXT: vandpd %xmm5, %xmm4, %xmm4 +; AVX1-NEXT: vandpd %xmm5, %xmm3, %xmm3 ; AVX1-NEXT: vpackusdw %xmm4, %xmm3, %xmm3 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 -; AVX1-NEXT: vpand %xmm5, %xmm4, %xmm4 -; AVX1-NEXT: vpand %xmm5, %xmm2, %xmm2 +; AVX1-NEXT: vandpd %xmm5, %xmm4, %xmm4 +; AVX1-NEXT: vandpd %xmm5, %xmm2, %xmm2 ; AVX1-NEXT: vpackusdw %xmm4, %xmm2, %xmm2 ; AVX1-NEXT: vpackusdw %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 -; AVX1-NEXT: vpand %xmm5, %xmm3, %xmm3 -; AVX1-NEXT: vpand %xmm5, %xmm1, %xmm1 +; AVX1-NEXT: vandpd %xmm5, %xmm3, %xmm3 +; AVX1-NEXT: vandpd %xmm5, %xmm1, %xmm1 ; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 -; AVX1-NEXT: vpand %xmm5, %xmm3, %xmm3 -; AVX1-NEXT: vpand %xmm5, %xmm0, %xmm0 +; AVX1-NEXT: vandpd %xmm5, %xmm3, %xmm3 +; AVX1-NEXT: vandpd %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpackusdw %xmm3, %xmm0, %xmm0 ; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 @@ -5456,13 +5468,13 @@ define <16 x i8> @trunc_or_const_v16i32_v16i8(<16 x i32> %a0) nounwind { ; AVX1-LABEL: trunc_or_const_v16i32_v16i8: ; AVX1: # %bb.0: ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] -; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [3.57331108E-43,3.57331108E-43,3.57331108E-43,3.57331108E-43] +; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vandps %xmm3, %xmm1, %xmm1 ; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vandps %xmm3, %xmm0, %xmm0 ; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 |