summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/vector-trunc-usat.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/X86/vector-trunc-usat.ll')
-rw-r--r--llvm/test/CodeGen/X86/vector-trunc-usat.ll718
1 files changed, 718 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/vector-trunc-usat.ll b/llvm/test/CodeGen/X86/vector-trunc-usat.ll
index e24def71513..82488d7bc7b 100644
--- a/llvm/test/CodeGen/X86/vector-trunc-usat.ll
+++ b/llvm/test/CodeGen/X86/vector-trunc-usat.ll
@@ -15,6 +15,224 @@
; Unsigned saturation truncation to vXi32
;
+define <2 x i32> @trunc_usat_v2i64_v2i32(<2 x i64> %a0) {
+; SSE2-LABEL: trunc_usat_v2i64_v2i32:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456]
+; SSE2-NEXT: pxor %xmm0, %xmm1
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259455,9223372039002259455]
+; SSE2-NEXT: movdqa %xmm2, %xmm3
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSE2-NEXT: pand %xmm4, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
+; SSE2-NEXT: por %xmm1, %xmm2
+; SSE2-NEXT: pand %xmm2, %xmm0
+; SSE2-NEXT: pandn {{.*}}(%rip), %xmm2
+; SSE2-NEXT: por %xmm0, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: trunc_usat_v2i64_v2i32:
+; SSSE3: # %bb.0:
+; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456]
+; SSSE3-NEXT: pxor %xmm0, %xmm1
+; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259455,9223372039002259455]
+; SSSE3-NEXT: movdqa %xmm2, %xmm3
+; SSSE3-NEXT: pcmpgtd %xmm1, %xmm3
+; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
+; SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
+; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSSE3-NEXT: pand %xmm4, %xmm1
+; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
+; SSSE3-NEXT: por %xmm1, %xmm2
+; SSSE3-NEXT: pand %xmm2, %xmm0
+; SSSE3-NEXT: pandn {{.*}}(%rip), %xmm2
+; SSSE3-NEXT: por %xmm0, %xmm2
+; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: trunc_usat_v2i64_v2i32:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movdqa %xmm0, %xmm1
+; SSE41-NEXT: movapd {{.*#+}} xmm2 = [4294967295,4294967295]
+; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [9223372039002259456,9223372039002259456]
+; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [9223372039002259455,9223372039002259455]
+; SSE41-NEXT: movdqa %xmm3, %xmm4
+; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
+; SSE41-NEXT: pcmpgtd %xmm0, %xmm3
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
+; SSE41-NEXT: pand %xmm4, %xmm0
+; SSE41-NEXT: por %xmm3, %xmm0
+; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: trunc_usat_v2i64_v2i32:
+; AVX: # %bb.0:
+; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [4294967295,4294967295]
+; AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm2
+; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372041149743103,9223372041149743103]
+; AVX-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
+; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX-NEXT: retq
+;
+; AVX512F-LABEL: trunc_usat_v2i64_v2i32:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [4294967295,4294967295]
+; AVX512F-NEXT: vpminuq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: trunc_usat_v2i64_v2i32:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpminuq {{.*}}(%rip), %xmm0, %xmm0
+; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: trunc_usat_v2i64_v2i32:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [4294967295,4294967295]
+; AVX512BW-NEXT: vpminuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512BWVL-LABEL: trunc_usat_v2i64_v2i32:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpminuq {{.*}}(%rip), %xmm0, %xmm0
+; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX512BWVL-NEXT: retq
+;
+; SKX-LABEL: trunc_usat_v2i64_v2i32:
+; SKX: # %bb.0:
+; SKX-NEXT: vpminuq {{.*}}(%rip), %xmm0, %xmm0
+; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SKX-NEXT: retq
+ %1 = icmp ult <2 x i64> %a0, <i64 4294967295, i64 4294967295>
+ %2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 4294967295, i64 4294967295>
+ %3 = trunc <2 x i64> %2 to <2 x i32>
+ ret <2 x i32> %3
+}
+
+define void @trunc_usat_v2i64_v2i32_store(<2 x i64> %a0, <2 x i32>* %p1) {
+; SSE2-LABEL: trunc_usat_v2i64_v2i32_store:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456]
+; SSE2-NEXT: pxor %xmm0, %xmm1
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259455,9223372039002259455]
+; SSE2-NEXT: movdqa %xmm2, %xmm3
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSE2-NEXT: pand %xmm4, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
+; SSE2-NEXT: por %xmm1, %xmm2
+; SSE2-NEXT: pand %xmm2, %xmm0
+; SSE2-NEXT: pandn {{.*}}(%rip), %xmm2
+; SSE2-NEXT: por %xmm0, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
+; SSE2-NEXT: movq %xmm0, (%rdi)
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: trunc_usat_v2i64_v2i32_store:
+; SSSE3: # %bb.0:
+; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456]
+; SSSE3-NEXT: pxor %xmm0, %xmm1
+; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259455,9223372039002259455]
+; SSSE3-NEXT: movdqa %xmm2, %xmm3
+; SSSE3-NEXT: pcmpgtd %xmm1, %xmm3
+; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
+; SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
+; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSSE3-NEXT: pand %xmm4, %xmm1
+; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
+; SSSE3-NEXT: por %xmm1, %xmm2
+; SSSE3-NEXT: pand %xmm2, %xmm0
+; SSSE3-NEXT: pandn {{.*}}(%rip), %xmm2
+; SSSE3-NEXT: por %xmm0, %xmm2
+; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
+; SSSE3-NEXT: movq %xmm0, (%rdi)
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: trunc_usat_v2i64_v2i32_store:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movdqa %xmm0, %xmm1
+; SSE41-NEXT: movapd {{.*#+}} xmm2 = [4294967295,4294967295]
+; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [9223372039002259456,9223372039002259456]
+; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [9223372039002259455,9223372039002259455]
+; SSE41-NEXT: movdqa %xmm3, %xmm4
+; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
+; SSE41-NEXT: pcmpgtd %xmm0, %xmm3
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
+; SSE41-NEXT: pand %xmm4, %xmm0
+; SSE41-NEXT: por %xmm3, %xmm0
+; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
+; SSE41-NEXT: movq %xmm0, (%rdi)
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: trunc_usat_v2i64_v2i32_store:
+; AVX: # %bb.0:
+; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [4294967295,4294967295]
+; AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm2
+; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372041149743103,9223372041149743103]
+; AVX-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
+; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX-NEXT: vmovlpd %xmm0, (%rdi)
+; AVX-NEXT: retq
+;
+; AVX512F-LABEL: trunc_usat_v2i64_v2i32_store:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [4294967295,4294967295]
+; AVX512F-NEXT: vpminuq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX512F-NEXT: vmovq %xmm0, (%rdi)
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: trunc_usat_v2i64_v2i32_store:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpmovusqd %xmm0, (%rdi)
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: trunc_usat_v2i64_v2i32_store:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [4294967295,4294967295]
+; AVX512BW-NEXT: vpminuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX512BW-NEXT: vmovq %xmm0, (%rdi)
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512BWVL-LABEL: trunc_usat_v2i64_v2i32_store:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpmovusqd %xmm0, (%rdi)
+; AVX512BWVL-NEXT: retq
+;
+; SKX-LABEL: trunc_usat_v2i64_v2i32_store:
+; SKX: # %bb.0:
+; SKX-NEXT: vpmovusqd %xmm0, (%rdi)
+; SKX-NEXT: retq
+ %1 = icmp ult <2 x i64> %a0, <i64 4294967295, i64 4294967295>
+ %2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 4294967295, i64 4294967295>
+ %3 = trunc <2 x i64> %2 to <2 x i32>
+ store <2 x i32> %3, <2 x i32>* %p1
+ ret void
+}
+
define <4 x i32> @trunc_usat_v4i64_v4i32(<4 x i64> %a0) {
; SSE2-LABEL: trunc_usat_v4i64_v4i32:
; SSE2: # %bb.0:
@@ -479,6 +697,278 @@ define <8 x i32> @trunc_usat_v8i64_v8i32(<8 x i64>* %p0) {
; Unsigned saturation truncation to vXi16
;
+define <2 x i16> @trunc_usat_v2i64_v2i16(<2 x i64> %a0) {
+; SSE2-LABEL: trunc_usat_v2i64_v2i16:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456]
+; SSE2-NEXT: pxor %xmm0, %xmm1
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002324991,9223372039002324991]
+; SSE2-NEXT: movdqa %xmm2, %xmm3
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSE2-NEXT: pand %xmm4, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
+; SSE2-NEXT: por %xmm1, %xmm2
+; SSE2-NEXT: pand %xmm2, %xmm0
+; SSE2-NEXT: pandn {{.*}}(%rip), %xmm2
+; SSE2-NEXT: por %xmm0, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: trunc_usat_v2i64_v2i16:
+; SSSE3: # %bb.0:
+; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456]
+; SSSE3-NEXT: pxor %xmm0, %xmm1
+; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002324991,9223372039002324991]
+; SSSE3-NEXT: movdqa %xmm2, %xmm3
+; SSSE3-NEXT: pcmpgtd %xmm1, %xmm3
+; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
+; SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
+; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSSE3-NEXT: pand %xmm4, %xmm1
+; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
+; SSSE3-NEXT: por %xmm1, %xmm2
+; SSSE3-NEXT: pand %xmm2, %xmm0
+; SSSE3-NEXT: pandn {{.*}}(%rip), %xmm2
+; SSSE3-NEXT: por %xmm0, %xmm2
+; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
+; SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: trunc_usat_v2i64_v2i16:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movdqa %xmm0, %xmm1
+; SSE41-NEXT: movapd {{.*#+}} xmm2 = [65535,65535]
+; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [9223372039002259456,9223372039002259456]
+; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [9223372039002324991,9223372039002324991]
+; SSE41-NEXT: movdqa %xmm3, %xmm4
+; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
+; SSE41-NEXT: pcmpgtd %xmm0, %xmm3
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
+; SSE41-NEXT: pand %xmm4, %xmm0
+; SSE41-NEXT: por %xmm3, %xmm0
+; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
+; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: trunc_usat_v2i64_v2i16:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vmovapd {{.*#+}} xmm1 = [65535,65535]
+; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm2
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343]
+; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX1-NEXT: retq
+;
+; AVX2-SLOW-LABEL: trunc_usat_v2i64_v2i16:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vmovapd {{.*#+}} xmm1 = [65535,65535]
+; AVX2-SLOW-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm2
+; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343]
+; AVX2-SLOW-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
+; AVX2-SLOW-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: trunc_usat_v2i64_v2i16:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vmovapd {{.*#+}} xmm1 = [65535,65535]
+; AVX2-FAST-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm2
+; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343]
+; AVX2-FAST-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
+; AVX2-FAST-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15]
+; AVX2-FAST-NEXT: retq
+;
+; AVX512F-LABEL: trunc_usat_v2i64_v2i16:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [65535,65535]
+; AVX512F-NEXT: vpminuq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: trunc_usat_v2i64_v2i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpminuq {{.*}}(%rip), %xmm0, %xmm0
+; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15]
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: trunc_usat_v2i64_v2i16:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [65535,65535]
+; AVX512BW-NEXT: vpminuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15]
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512BWVL-LABEL: trunc_usat_v2i64_v2i16:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpminuq {{.*}}(%rip), %xmm0, %xmm0
+; AVX512BWVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15]
+; AVX512BWVL-NEXT: retq
+;
+; SKX-LABEL: trunc_usat_v2i64_v2i16:
+; SKX: # %bb.0:
+; SKX-NEXT: vpminuq {{.*}}(%rip), %xmm0, %xmm0
+; SKX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15]
+; SKX-NEXT: retq
+ %1 = icmp ult <2 x i64> %a0, <i64 65535, i64 65535>
+ %2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 65535, i64 65535>
+ %3 = trunc <2 x i64> %2 to <2 x i16>
+ ret <2 x i16> %3
+}
+
+define void @trunc_usat_v2i64_v2i16_store(<2 x i64> %a0, <2 x i16>* %p1) {
+; SSE2-LABEL: trunc_usat_v2i64_v2i16_store:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456]
+; SSE2-NEXT: pxor %xmm0, %xmm1
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002324991,9223372039002324991]
+; SSE2-NEXT: movdqa %xmm2, %xmm3
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSE2-NEXT: pand %xmm4, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
+; SSE2-NEXT: por %xmm1, %xmm2
+; SSE2-NEXT: pand %xmm2, %xmm0
+; SSE2-NEXT: pandn {{.*}}(%rip), %xmm2
+; SSE2-NEXT: por %xmm0, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; SSE2-NEXT: movd %xmm0, (%rdi)
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: trunc_usat_v2i64_v2i16_store:
+; SSSE3: # %bb.0:
+; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456]
+; SSSE3-NEXT: pxor %xmm0, %xmm1
+; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002324991,9223372039002324991]
+; SSSE3-NEXT: movdqa %xmm2, %xmm3
+; SSSE3-NEXT: pcmpgtd %xmm1, %xmm3
+; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
+; SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
+; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSSE3-NEXT: pand %xmm4, %xmm1
+; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
+; SSSE3-NEXT: por %xmm1, %xmm2
+; SSSE3-NEXT: pand %xmm2, %xmm0
+; SSSE3-NEXT: pandn {{.*}}(%rip), %xmm2
+; SSSE3-NEXT: por %xmm0, %xmm2
+; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
+; SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; SSSE3-NEXT: movd %xmm0, (%rdi)
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: trunc_usat_v2i64_v2i16_store:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movdqa %xmm0, %xmm1
+; SSE41-NEXT: movapd {{.*#+}} xmm2 = [65535,65535]
+; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [9223372039002259456,9223372039002259456]
+; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [9223372039002324991,9223372039002324991]
+; SSE41-NEXT: movdqa %xmm3, %xmm4
+; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
+; SSE41-NEXT: pcmpgtd %xmm0, %xmm3
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
+; SSE41-NEXT: pand %xmm4, %xmm0
+; SSE41-NEXT: por %xmm3, %xmm0
+; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
+; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; SSE41-NEXT: movd %xmm0, (%rdi)
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: trunc_usat_v2i64_v2i16_store:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vmovapd {{.*#+}} xmm1 = [65535,65535]
+; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm2
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343]
+; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX1-NEXT: vmovd %xmm0, (%rdi)
+; AVX1-NEXT: retq
+;
+; AVX2-SLOW-LABEL: trunc_usat_v2i64_v2i16_store:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vmovapd {{.*#+}} xmm1 = [65535,65535]
+; AVX2-SLOW-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm2
+; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343]
+; AVX2-SLOW-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
+; AVX2-SLOW-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX2-SLOW-NEXT: vmovd %xmm0, (%rdi)
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: trunc_usat_v2i64_v2i16_store:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vmovapd {{.*#+}} xmm1 = [65535,65535]
+; AVX2-FAST-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm2
+; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343]
+; AVX2-FAST-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
+; AVX2-FAST-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15]
+; AVX2-FAST-NEXT: vmovd %xmm0, (%rdi)
+; AVX2-FAST-NEXT: retq
+;
+; AVX512F-LABEL: trunc_usat_v2i64_v2i16_store:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [65535,65535]
+; AVX512F-NEXT: vpminuq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX512F-NEXT: vmovd %xmm0, (%rdi)
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: trunc_usat_v2i64_v2i16_store:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpmovusqw %xmm0, (%rdi)
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: trunc_usat_v2i64_v2i16_store:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [65535,65535]
+; AVX512BW-NEXT: vpminuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15]
+; AVX512BW-NEXT: vmovd %xmm0, (%rdi)
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512BWVL-LABEL: trunc_usat_v2i64_v2i16_store:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpmovusqw %xmm0, (%rdi)
+; AVX512BWVL-NEXT: retq
+;
+; SKX-LABEL: trunc_usat_v2i64_v2i16_store:
+; SKX: # %bb.0:
+; SKX-NEXT: vpmovusqw %xmm0, (%rdi)
+; SKX-NEXT: retq
+ %1 = icmp ult <2 x i64> %a0, <i64 65535, i64 65535>
+ %2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 65535, i64 65535>
+ %3 = trunc <2 x i64> %2 to <2 x i16>
+ store <2 x i16> %3, <2 x i16>* %p1
+ ret void
+}
+
define <4 x i16> @trunc_usat_v4i64_v4i16(<4 x i64> %a0) {
; SSE2-LABEL: trunc_usat_v4i64_v4i16:
; SSE2: # %bb.0:
@@ -1592,6 +2082,234 @@ define <16 x i16> @trunc_usat_v16i32_v16i16(<16 x i32>* %p0) {
; Unsigned saturation truncation to vXi8
;
+define <2 x i8> @trunc_usat_v2i64_v2i8(<2 x i64> %a0) {
+; SSE2-LABEL: trunc_usat_v2i64_v2i8:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456]
+; SSE2-NEXT: pxor %xmm0, %xmm1
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259711,9223372039002259711]
+; SSE2-NEXT: movdqa %xmm2, %xmm3
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
+; SSE2-NEXT: pand %xmm4, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
+; SSE2-NEXT: por %xmm2, %xmm1
+; SSE2-NEXT: pand %xmm1, %xmm0
+; SSE2-NEXT: pandn {{.*}}(%rip), %xmm1
+; SSE2-NEXT: por %xmm0, %xmm1
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
+; SSE2-NEXT: packuswb %xmm1, %xmm1
+; SSE2-NEXT: packuswb %xmm1, %xmm1
+; SSE2-NEXT: packuswb %xmm1, %xmm1
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: trunc_usat_v2i64_v2i8:
+; SSSE3: # %bb.0:
+; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456]
+; SSSE3-NEXT: pxor %xmm0, %xmm1
+; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259711,9223372039002259711]
+; SSSE3-NEXT: movdqa %xmm2, %xmm3
+; SSSE3-NEXT: pcmpgtd %xmm1, %xmm3
+; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
+; SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
+; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSSE3-NEXT: pand %xmm4, %xmm1
+; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
+; SSSE3-NEXT: por %xmm1, %xmm2
+; SSSE3-NEXT: pand %xmm2, %xmm0
+; SSSE3-NEXT: pandn {{.*}}(%rip), %xmm2
+; SSSE3-NEXT: por %xmm2, %xmm0
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: trunc_usat_v2i64_v2i8:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movdqa %xmm0, %xmm1
+; SSE41-NEXT: movapd {{.*#+}} xmm2 = [255,255]
+; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [9223372039002259456,9223372039002259456]
+; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [9223372039002259711,9223372039002259711]
+; SSE41-NEXT: movdqa %xmm3, %xmm4
+; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
+; SSE41-NEXT: pcmpgtd %xmm0, %xmm3
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
+; SSE41-NEXT: pand %xmm4, %xmm0
+; SSE41-NEXT: por %xmm3, %xmm0
+; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2
+; SSE41-NEXT: pshufb {{.*#+}} xmm2 = xmm2[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: trunc_usat_v2i64_v2i8:
+; AVX: # %bb.0:
+; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [255,255]
+; AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm2
+; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854776063,9223372036854776063]
+; AVX-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
+; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX-NEXT: retq
+;
+; AVX512F-LABEL: trunc_usat_v2i64_v2i8:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [255,255]
+; AVX512F-NEXT: vpminuq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: trunc_usat_v2i64_v2i8:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpminuq {{.*}}(%rip), %xmm0, %xmm0
+; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: trunc_usat_v2i64_v2i8:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [255,255]
+; AVX512BW-NEXT: vpminuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512BWVL-LABEL: trunc_usat_v2i64_v2i8:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpminuq {{.*}}(%rip), %xmm0, %xmm0
+; AVX512BWVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512BWVL-NEXT: retq
+;
+; SKX-LABEL: trunc_usat_v2i64_v2i8:
+; SKX: # %bb.0:
+; SKX-NEXT: vpminuq {{.*}}(%rip), %xmm0, %xmm0
+; SKX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; SKX-NEXT: retq
+ %1 = icmp ult <2 x i64> %a0, <i64 255, i64 255>
+ %2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 255, i64 255>
+ %3 = trunc <2 x i64> %2 to <2 x i8>
+ ret <2 x i8> %3
+}
+
+define void @trunc_usat_v2i64_v2i8_store(<2 x i64> %a0, <2 x i8>* %p1) {
+; SSE2-LABEL: trunc_usat_v2i64_v2i8_store:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456]
+; SSE2-NEXT: pxor %xmm0, %xmm1
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259711,9223372039002259711]
+; SSE2-NEXT: movdqa %xmm2, %xmm3
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSE2-NEXT: pand %xmm4, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
+; SSE2-NEXT: por %xmm1, %xmm2
+; SSE2-NEXT: pand %xmm2, %xmm0
+; SSE2-NEXT: pandn {{.*}}(%rip), %xmm2
+; SSE2-NEXT: por %xmm0, %xmm2
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm2
+; SSE2-NEXT: packuswb %xmm2, %xmm2
+; SSE2-NEXT: packuswb %xmm0, %xmm2
+; SSE2-NEXT: packuswb %xmm0, %xmm2
+; SSE2-NEXT: movd %xmm2, %eax
+; SSE2-NEXT: movw %ax, (%rdi)
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: trunc_usat_v2i64_v2i8_store:
+; SSSE3: # %bb.0:
+; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456]
+; SSSE3-NEXT: pxor %xmm0, %xmm1
+; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259711,9223372039002259711]
+; SSSE3-NEXT: movdqa %xmm2, %xmm3
+; SSSE3-NEXT: pcmpgtd %xmm1, %xmm3
+; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
+; SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
+; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSSE3-NEXT: pand %xmm4, %xmm1
+; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
+; SSSE3-NEXT: por %xmm1, %xmm2
+; SSSE3-NEXT: pand %xmm2, %xmm0
+; SSSE3-NEXT: pandn {{.*}}(%rip), %xmm2
+; SSSE3-NEXT: por %xmm0, %xmm2
+; SSSE3-NEXT: pshufb {{.*#+}} xmm2 = xmm2[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; SSSE3-NEXT: movd %xmm2, %eax
+; SSSE3-NEXT: movw %ax, (%rdi)
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: trunc_usat_v2i64_v2i8_store:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movdqa %xmm0, %xmm1
+; SSE41-NEXT: movapd {{.*#+}} xmm2 = [255,255]
+; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [9223372039002259456,9223372039002259456]
+; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [9223372039002259711,9223372039002259711]
+; SSE41-NEXT: movdqa %xmm3, %xmm4
+; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
+; SSE41-NEXT: pcmpgtd %xmm0, %xmm3
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
+; SSE41-NEXT: pand %xmm4, %xmm0
+; SSE41-NEXT: por %xmm3, %xmm0
+; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2
+; SSE41-NEXT: pshufb {{.*#+}} xmm2 = xmm2[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; SSE41-NEXT: pextrw $0, %xmm2, (%rdi)
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: trunc_usat_v2i64_v2i8_store:
+; AVX: # %bb.0:
+; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [255,255]
+; AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm2
+; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854776063,9223372036854776063]
+; AVX-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
+; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX-NEXT: vpextrw $0, %xmm0, (%rdi)
+; AVX-NEXT: retq
+;
+; AVX512F-LABEL: trunc_usat_v2i64_v2i8_store:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [255,255]
+; AVX512F-NEXT: vpminuq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi)
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: trunc_usat_v2i64_v2i8_store:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpmovusqb %xmm0, (%rdi)
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: trunc_usat_v2i64_v2i8_store:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [255,255]
+; AVX512BW-NEXT: vpminuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi)
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512BWVL-LABEL: trunc_usat_v2i64_v2i8_store:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpmovusqb %xmm0, (%rdi)
+; AVX512BWVL-NEXT: retq
+;
+; SKX-LABEL: trunc_usat_v2i64_v2i8_store:
+; SKX: # %bb.0:
+; SKX-NEXT: vpmovusqb %xmm0, (%rdi)
+; SKX-NEXT: retq
+ %1 = icmp ult <2 x i64> %a0, <i64 255, i64 255>
+ %2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 255, i64 255>
+ %3 = trunc <2 x i64> %2 to <2 x i8>
+ store <2 x i8> %3, <2 x i8>* %p1
+ ret void
+}
+
define <4 x i8> @trunc_usat_v4i64_v4i8(<4 x i64> %a0) {
; SSE2-LABEL: trunc_usat_v4i64_v4i8:
; SSE2: # %bb.0:
OpenPOWER on IntegriCloud