summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/test/CodeGen/X86/min-legal-vector-width.ll19
-rw-r--r--llvm/test/CodeGen/X86/vector-trunc-packus.ll54
-rw-r--r--llvm/test/CodeGen/X86/vector-trunc-ssat.ll51
-rw-r--r--llvm/test/CodeGen/X86/vector-trunc-usat.ll208
4 files changed, 332 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/min-legal-vector-width.ll b/llvm/test/CodeGen/X86/min-legal-vector-width.ll
index 5c851c1c723..832692480e0 100644
--- a/llvm/test/CodeGen/X86/min-legal-vector-width.ll
+++ b/llvm/test/CodeGen/X86/min-legal-vector-width.ll
@@ -1097,3 +1097,22 @@ define <16 x i8> @trunc_packus_v16i32_v16i8(<16 x i32>* %p, <16 x i8>* %q) "min-
%f = trunc <16 x i32> %e to <16 x i8>
ret <16 x i8> %f
}
+
+define <32 x i8> @trunc_packus_v32i32_v32i8(<32 x i32> %a0) {
+; CHECK-LABEL: trunc_packus_v32i32_v32i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpmaxsd %zmm2, %zmm0, %zmm0
+; CHECK-NEXT: vpmovusdb %zmm0, %xmm0
+; CHECK-NEXT: vpmaxsd %zmm2, %zmm1, %zmm1
+; CHECK-NEXT: vpmovusdb %zmm1, %xmm1
+; CHECK-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %1 = icmp slt <32 x i32> %a0, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
+ %2 = select <32 x i1> %1, <32 x i32> %a0, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
+ %3 = icmp sgt <32 x i32> %2, zeroinitializer
+ %4 = select <32 x i1> %3, <32 x i32> %2, <32 x i32> zeroinitializer
+ %5 = trunc <32 x i32> %4 to <32 x i8>
+ ret <32 x i8> %5
+}
+
diff --git a/llvm/test/CodeGen/X86/vector-trunc-packus.ll b/llvm/test/CodeGen/X86/vector-trunc-packus.ll
index b0d6a20bdf3..704ac81394b 100644
--- a/llvm/test/CodeGen/X86/vector-trunc-packus.ll
+++ b/llvm/test/CodeGen/X86/vector-trunc-packus.ll
@@ -3087,3 +3087,57 @@ define <32 x i8> @trunc_packus_v32i16_v32i8(<32 x i16> %a0) {
%5 = trunc <32 x i16> %4 to <32 x i8>
ret <32 x i8> %5
}
+
+define <32 x i8> @trunc_packus_v32i32_v32i8(<32 x i32> %a0) {
+; SSE-LABEL: trunc_packus_v32i32_v32i8:
+; SSE: # %bb.0:
+; SSE-NEXT: packssdw %xmm3, %xmm2
+; SSE-NEXT: packssdw %xmm1, %xmm0
+; SSE-NEXT: packuswb %xmm2, %xmm0
+; SSE-NEXT: packssdw %xmm7, %xmm6
+; SSE-NEXT: packssdw %xmm5, %xmm4
+; SSE-NEXT: packuswb %xmm6, %xmm4
+; SSE-NEXT: movdqa %xmm4, %xmm1
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: trunc_packus_v32i32_v32i8:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
+; AVX1-NEXT: vpackssdw %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vpackssdw %xmm4, %xmm2, %xmm2
+; AVX1-NEXT: vpackuswb %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT: vpackssdw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpackssdw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: trunc_packus_v32i32_v32i8:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpackssdw %ymm3, %ymm2, %ymm2
+; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,1,3]
+; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
+; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: trunc_packus_v32i32_v32i8:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX512-NEXT: vpmaxsd %zmm2, %zmm0, %zmm0
+; AVX512-NEXT: vpmovusdb %zmm0, %xmm0
+; AVX512-NEXT: vpmaxsd %zmm2, %zmm1, %zmm1
+; AVX512-NEXT: vpmovusdb %zmm1, %xmm1
+; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
+ %1 = icmp slt <32 x i32> %a0, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
+ %2 = select <32 x i1> %1, <32 x i32> %a0, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
+ %3 = icmp sgt <32 x i32> %2, zeroinitializer
+ %4 = select <32 x i1> %3, <32 x i32> %2, <32 x i32> zeroinitializer
+ %5 = trunc <32 x i32> %4 to <32 x i8>
+ ret <32 x i8> %5
+}
diff --git a/llvm/test/CodeGen/X86/vector-trunc-ssat.ll b/llvm/test/CodeGen/X86/vector-trunc-ssat.ll
index 774a478a5d7..54f0d35eb74 100644
--- a/llvm/test/CodeGen/X86/vector-trunc-ssat.ll
+++ b/llvm/test/CodeGen/X86/vector-trunc-ssat.ll
@@ -3049,3 +3049,54 @@ define <32 x i8> @trunc_ssat_v32i16_v32i8(<32 x i16> %a0) {
%5 = trunc <32 x i16> %4 to <32 x i8>
ret <32 x i8> %5
}
+
+define <32 x i8> @trunc_ssat_v32i32_v32i8(<32 x i32> %a0) {
+; SSE-LABEL: trunc_ssat_v32i32_v32i8:
+; SSE: # %bb.0:
+; SSE-NEXT: packssdw %xmm3, %xmm2
+; SSE-NEXT: packssdw %xmm1, %xmm0
+; SSE-NEXT: packsswb %xmm2, %xmm0
+; SSE-NEXT: packssdw %xmm7, %xmm6
+; SSE-NEXT: packssdw %xmm5, %xmm4
+; SSE-NEXT: packsswb %xmm6, %xmm4
+; SSE-NEXT: movdqa %xmm4, %xmm1
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: trunc_ssat_v32i32_v32i8:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
+; AVX1-NEXT: vpackssdw %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vpackssdw %xmm4, %xmm2, %xmm2
+; AVX1-NEXT: vpacksswb %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT: vpackssdw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpackssdw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: trunc_ssat_v32i32_v32i8:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpackssdw %ymm3, %ymm2, %ymm2
+; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,1,3]
+; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
+; AVX2-NEXT: vpacksswb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: trunc_ssat_v32i32_v32i8:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpmovsdb %zmm0, %xmm0
+; AVX512-NEXT: vpmovsdb %zmm1, %xmm1
+; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
+ %1 = icmp slt <32 x i32> %a0, <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
+ %2 = select <32 x i1> %1, <32 x i32> %a0, <32 x i32> <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
+ %3 = icmp sgt <32 x i32> %2, <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128>
+ %4 = select <32 x i1> %3, <32 x i32> %2, <32 x i32> <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128>
+ %5 = trunc <32 x i32> %4 to <32 x i8>
+ ret <32 x i8> %5
+}
diff --git a/llvm/test/CodeGen/X86/vector-trunc-usat.ll b/llvm/test/CodeGen/X86/vector-trunc-usat.ll
index 7489d393585..2c443db99e1 100644
--- a/llvm/test/CodeGen/X86/vector-trunc-usat.ll
+++ b/llvm/test/CodeGen/X86/vector-trunc-usat.ll
@@ -2453,3 +2453,211 @@ define <32 x i8> @trunc_usat_v32i16_v32i8(<32 x i16> %a0) {
%3 = trunc <32 x i16> %2 to <32 x i8>
ret <32 x i8> %3
}
+
+define <32 x i8> @trunc_usat_v32i32_v32i8(<32 x i32> %a0) {
+; SSE2-LABEL: trunc_usat_v32i32_v32i8:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa %xmm1, %xmm8
+; SSE2-NEXT: movdqa {{.*#+}} xmm10 = [255,255,255,255]
+; SSE2-NEXT: movdqa {{.*#+}} xmm11 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm5, %xmm1
+; SSE2-NEXT: pxor %xmm11, %xmm1
+; SSE2-NEXT: movdqa {{.*#+}} xmm9 = [2147483903,2147483903,2147483903,2147483903]
+; SSE2-NEXT: movdqa %xmm9, %xmm12
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm12
+; SSE2-NEXT: pand %xmm12, %xmm5
+; SSE2-NEXT: pandn %xmm10, %xmm12
+; SSE2-NEXT: por %xmm5, %xmm12
+; SSE2-NEXT: movdqa %xmm4, %xmm5
+; SSE2-NEXT: pxor %xmm11, %xmm5
+; SSE2-NEXT: movdqa %xmm9, %xmm1
+; SSE2-NEXT: pcmpgtd %xmm5, %xmm1
+; SSE2-NEXT: pand %xmm1, %xmm4
+; SSE2-NEXT: pandn %xmm10, %xmm1
+; SSE2-NEXT: por %xmm4, %xmm1
+; SSE2-NEXT: packuswb %xmm12, %xmm1
+; SSE2-NEXT: movdqa %xmm7, %xmm4
+; SSE2-NEXT: pxor %xmm11, %xmm4
+; SSE2-NEXT: movdqa %xmm9, %xmm5
+; SSE2-NEXT: pcmpgtd %xmm4, %xmm5
+; SSE2-NEXT: pand %xmm5, %xmm7
+; SSE2-NEXT: pandn %xmm10, %xmm5
+; SSE2-NEXT: por %xmm7, %xmm5
+; SSE2-NEXT: movdqa %xmm6, %xmm4
+; SSE2-NEXT: pxor %xmm11, %xmm4
+; SSE2-NEXT: movdqa %xmm9, %xmm7
+; SSE2-NEXT: pcmpgtd %xmm4, %xmm7
+; SSE2-NEXT: pand %xmm7, %xmm6
+; SSE2-NEXT: pandn %xmm10, %xmm7
+; SSE2-NEXT: por %xmm6, %xmm7
+; SSE2-NEXT: packuswb %xmm5, %xmm7
+; SSE2-NEXT: packuswb %xmm7, %xmm1
+; SSE2-NEXT: movdqa %xmm8, %xmm4
+; SSE2-NEXT: pxor %xmm11, %xmm4
+; SSE2-NEXT: movdqa %xmm9, %xmm5
+; SSE2-NEXT: pcmpgtd %xmm4, %xmm5
+; SSE2-NEXT: pand %xmm5, %xmm8
+; SSE2-NEXT: pandn %xmm10, %xmm5
+; SSE2-NEXT: por %xmm8, %xmm5
+; SSE2-NEXT: movdqa %xmm0, %xmm4
+; SSE2-NEXT: pxor %xmm11, %xmm4
+; SSE2-NEXT: movdqa %xmm9, %xmm6
+; SSE2-NEXT: pcmpgtd %xmm4, %xmm6
+; SSE2-NEXT: pand %xmm6, %xmm0
+; SSE2-NEXT: pandn %xmm10, %xmm6
+; SSE2-NEXT: por %xmm6, %xmm0
+; SSE2-NEXT: packuswb %xmm5, %xmm0
+; SSE2-NEXT: movdqa %xmm3, %xmm4
+; SSE2-NEXT: pxor %xmm11, %xmm4
+; SSE2-NEXT: movdqa %xmm9, %xmm5
+; SSE2-NEXT: pcmpgtd %xmm4, %xmm5
+; SSE2-NEXT: pand %xmm5, %xmm3
+; SSE2-NEXT: pandn %xmm10, %xmm5
+; SSE2-NEXT: por %xmm3, %xmm5
+; SSE2-NEXT: pxor %xmm2, %xmm11
+; SSE2-NEXT: pcmpgtd %xmm11, %xmm9
+; SSE2-NEXT: pand %xmm9, %xmm2
+; SSE2-NEXT: pandn %xmm10, %xmm9
+; SSE2-NEXT: por %xmm2, %xmm9
+; SSE2-NEXT: packuswb %xmm5, %xmm9
+; SSE2-NEXT: packuswb %xmm9, %xmm0
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: trunc_usat_v32i32_v32i8:
+; SSSE3: # %bb.0:
+; SSSE3-NEXT: movdqa %xmm1, %xmm8
+; SSSE3-NEXT: movdqa {{.*#+}} xmm10 = [255,255,255,255]
+; SSSE3-NEXT: movdqa {{.*#+}} xmm11 = [2147483648,2147483648,2147483648,2147483648]
+; SSSE3-NEXT: movdqa %xmm5, %xmm1
+; SSSE3-NEXT: pxor %xmm11, %xmm1
+; SSSE3-NEXT: movdqa {{.*#+}} xmm9 = [2147483903,2147483903,2147483903,2147483903]
+; SSSE3-NEXT: movdqa %xmm9, %xmm12
+; SSSE3-NEXT: pcmpgtd %xmm1, %xmm12
+; SSSE3-NEXT: pand %xmm12, %xmm5
+; SSSE3-NEXT: pandn %xmm10, %xmm12
+; SSSE3-NEXT: por %xmm5, %xmm12
+; SSSE3-NEXT: movdqa %xmm4, %xmm5
+; SSSE3-NEXT: pxor %xmm11, %xmm5
+; SSSE3-NEXT: movdqa %xmm9, %xmm1
+; SSSE3-NEXT: pcmpgtd %xmm5, %xmm1
+; SSSE3-NEXT: pand %xmm1, %xmm4
+; SSSE3-NEXT: pandn %xmm10, %xmm1
+; SSSE3-NEXT: por %xmm4, %xmm1
+; SSSE3-NEXT: packuswb %xmm12, %xmm1
+; SSSE3-NEXT: movdqa %xmm7, %xmm4
+; SSSE3-NEXT: pxor %xmm11, %xmm4
+; SSSE3-NEXT: movdqa %xmm9, %xmm5
+; SSSE3-NEXT: pcmpgtd %xmm4, %xmm5
+; SSSE3-NEXT: pand %xmm5, %xmm7
+; SSSE3-NEXT: pandn %xmm10, %xmm5
+; SSSE3-NEXT: por %xmm7, %xmm5
+; SSSE3-NEXT: movdqa %xmm6, %xmm4
+; SSSE3-NEXT: pxor %xmm11, %xmm4
+; SSSE3-NEXT: movdqa %xmm9, %xmm7
+; SSSE3-NEXT: pcmpgtd %xmm4, %xmm7
+; SSSE3-NEXT: pand %xmm7, %xmm6
+; SSSE3-NEXT: pandn %xmm10, %xmm7
+; SSSE3-NEXT: por %xmm6, %xmm7
+; SSSE3-NEXT: packuswb %xmm5, %xmm7
+; SSSE3-NEXT: packuswb %xmm7, %xmm1
+; SSSE3-NEXT: movdqa %xmm8, %xmm4
+; SSSE3-NEXT: pxor %xmm11, %xmm4
+; SSSE3-NEXT: movdqa %xmm9, %xmm5
+; SSSE3-NEXT: pcmpgtd %xmm4, %xmm5
+; SSSE3-NEXT: pand %xmm5, %xmm8
+; SSSE3-NEXT: pandn %xmm10, %xmm5
+; SSSE3-NEXT: por %xmm8, %xmm5
+; SSSE3-NEXT: movdqa %xmm0, %xmm4
+; SSSE3-NEXT: pxor %xmm11, %xmm4
+; SSSE3-NEXT: movdqa %xmm9, %xmm6
+; SSSE3-NEXT: pcmpgtd %xmm4, %xmm6
+; SSSE3-NEXT: pand %xmm6, %xmm0
+; SSSE3-NEXT: pandn %xmm10, %xmm6
+; SSSE3-NEXT: por %xmm6, %xmm0
+; SSSE3-NEXT: packuswb %xmm5, %xmm0
+; SSSE3-NEXT: movdqa %xmm3, %xmm4
+; SSSE3-NEXT: pxor %xmm11, %xmm4
+; SSSE3-NEXT: movdqa %xmm9, %xmm5
+; SSSE3-NEXT: pcmpgtd %xmm4, %xmm5
+; SSSE3-NEXT: pand %xmm5, %xmm3
+; SSSE3-NEXT: pandn %xmm10, %xmm5
+; SSSE3-NEXT: por %xmm3, %xmm5
+; SSSE3-NEXT: pxor %xmm2, %xmm11
+; SSSE3-NEXT: pcmpgtd %xmm11, %xmm9
+; SSSE3-NEXT: pand %xmm9, %xmm2
+; SSSE3-NEXT: pandn %xmm10, %xmm9
+; SSSE3-NEXT: por %xmm2, %xmm9
+; SSSE3-NEXT: packuswb %xmm5, %xmm9
+; SSSE3-NEXT: packuswb %xmm9, %xmm0
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: trunc_usat_v32i32_v32i8:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movdqa {{.*#+}} xmm8 = [255,255,255,255]
+; SSE41-NEXT: pminud %xmm8, %xmm5
+; SSE41-NEXT: pminud %xmm8, %xmm4
+; SSE41-NEXT: packusdw %xmm5, %xmm4
+; SSE41-NEXT: pminud %xmm8, %xmm7
+; SSE41-NEXT: pminud %xmm8, %xmm6
+; SSE41-NEXT: packusdw %xmm7, %xmm6
+; SSE41-NEXT: packuswb %xmm6, %xmm4
+; SSE41-NEXT: pminud %xmm8, %xmm1
+; SSE41-NEXT: pminud %xmm8, %xmm0
+; SSE41-NEXT: packusdw %xmm1, %xmm0
+; SSE41-NEXT: pminud %xmm8, %xmm3
+; SSE41-NEXT: pminud %xmm8, %xmm2
+; SSE41-NEXT: packusdw %xmm3, %xmm2
+; SSE41-NEXT: packuswb %xmm2, %xmm0
+; SSE41-NEXT: movdqa %xmm4, %xmm1
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: trunc_usat_v32i32_v32i8:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [255,255,255,255]
+; AVX1-NEXT: vpminud %xmm5, %xmm4, %xmm4
+; AVX1-NEXT: vpminud %xmm5, %xmm0, %xmm0
+; AVX1-NEXT: vpackusdw %xmm4, %xmm0, %xmm0
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpminud %xmm5, %xmm4, %xmm4
+; AVX1-NEXT: vpminud %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpackusdw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm1
+; AVX1-NEXT: vpminud %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpminud %xmm5, %xmm2, %xmm2
+; AVX1-NEXT: vpackusdw %xmm1, %xmm2, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vpminud %xmm5, %xmm2, %xmm2
+; AVX1-NEXT: vpminud %xmm5, %xmm3, %xmm3
+; AVX1-NEXT: vpackusdw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: trunc_usat_v32i32_v32i8:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255]
+; AVX2-NEXT: vpminud %ymm4, %ymm1, %ymm1
+; AVX2-NEXT: vpminud %ymm4, %ymm0, %ymm0
+; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpminud %ymm4, %ymm3, %ymm1
+; AVX2-NEXT: vpminud %ymm4, %ymm2, %ymm2
+; AVX2-NEXT: vpackusdw %ymm1, %ymm2, %ymm1
+; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,1,3]
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
+; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: trunc_usat_v32i32_v32i8:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpmovusdb %zmm0, %xmm0
+; AVX512-NEXT: vpmovusdb %zmm1, %xmm1
+; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
+ %1 = icmp ult <32 x i32> %a0, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
+ %2 = select <32 x i1> %1, <32 x i32> %a0, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
+ %3 = trunc <32 x i32> %2 to <32 x i8>
+ ret <32 x i8> %3
+}
OpenPOWER on IntegriCloud