summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/vector-trunc-usat.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/X86/vector-trunc-usat.ll')
-rw-r--r--llvm/test/CodeGen/X86/vector-trunc-usat.ll208
1 files changed, 208 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/vector-trunc-usat.ll b/llvm/test/CodeGen/X86/vector-trunc-usat.ll
index 7489d393585..2c443db99e1 100644
--- a/llvm/test/CodeGen/X86/vector-trunc-usat.ll
+++ b/llvm/test/CodeGen/X86/vector-trunc-usat.ll
@@ -2453,3 +2453,211 @@ define <32 x i8> @trunc_usat_v32i16_v32i8(<32 x i16> %a0) {
%3 = trunc <32 x i16> %2 to <32 x i8>
ret <32 x i8> %3
}
+
+define <32 x i8> @trunc_usat_v32i32_v32i8(<32 x i32> %a0) {
+; SSE2-LABEL: trunc_usat_v32i32_v32i8:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa %xmm1, %xmm8
+; SSE2-NEXT: movdqa {{.*#+}} xmm10 = [255,255,255,255]
+; SSE2-NEXT: movdqa {{.*#+}} xmm11 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm5, %xmm1
+; SSE2-NEXT: pxor %xmm11, %xmm1
+; SSE2-NEXT: movdqa {{.*#+}} xmm9 = [2147483903,2147483903,2147483903,2147483903]
+; SSE2-NEXT: movdqa %xmm9, %xmm12
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm12
+; SSE2-NEXT: pand %xmm12, %xmm5
+; SSE2-NEXT: pandn %xmm10, %xmm12
+; SSE2-NEXT: por %xmm5, %xmm12
+; SSE2-NEXT: movdqa %xmm4, %xmm5
+; SSE2-NEXT: pxor %xmm11, %xmm5
+; SSE2-NEXT: movdqa %xmm9, %xmm1
+; SSE2-NEXT: pcmpgtd %xmm5, %xmm1
+; SSE2-NEXT: pand %xmm1, %xmm4
+; SSE2-NEXT: pandn %xmm10, %xmm1
+; SSE2-NEXT: por %xmm4, %xmm1
+; SSE2-NEXT: packuswb %xmm12, %xmm1
+; SSE2-NEXT: movdqa %xmm7, %xmm4
+; SSE2-NEXT: pxor %xmm11, %xmm4
+; SSE2-NEXT: movdqa %xmm9, %xmm5
+; SSE2-NEXT: pcmpgtd %xmm4, %xmm5
+; SSE2-NEXT: pand %xmm5, %xmm7
+; SSE2-NEXT: pandn %xmm10, %xmm5
+; SSE2-NEXT: por %xmm7, %xmm5
+; SSE2-NEXT: movdqa %xmm6, %xmm4
+; SSE2-NEXT: pxor %xmm11, %xmm4
+; SSE2-NEXT: movdqa %xmm9, %xmm7
+; SSE2-NEXT: pcmpgtd %xmm4, %xmm7
+; SSE2-NEXT: pand %xmm7, %xmm6
+; SSE2-NEXT: pandn %xmm10, %xmm7
+; SSE2-NEXT: por %xmm6, %xmm7
+; SSE2-NEXT: packuswb %xmm5, %xmm7
+; SSE2-NEXT: packuswb %xmm7, %xmm1
+; SSE2-NEXT: movdqa %xmm8, %xmm4
+; SSE2-NEXT: pxor %xmm11, %xmm4
+; SSE2-NEXT: movdqa %xmm9, %xmm5
+; SSE2-NEXT: pcmpgtd %xmm4, %xmm5
+; SSE2-NEXT: pand %xmm5, %xmm8
+; SSE2-NEXT: pandn %xmm10, %xmm5
+; SSE2-NEXT: por %xmm8, %xmm5
+; SSE2-NEXT: movdqa %xmm0, %xmm4
+; SSE2-NEXT: pxor %xmm11, %xmm4
+; SSE2-NEXT: movdqa %xmm9, %xmm6
+; SSE2-NEXT: pcmpgtd %xmm4, %xmm6
+; SSE2-NEXT: pand %xmm6, %xmm0
+; SSE2-NEXT: pandn %xmm10, %xmm6
+; SSE2-NEXT: por %xmm6, %xmm0
+; SSE2-NEXT: packuswb %xmm5, %xmm0
+; SSE2-NEXT: movdqa %xmm3, %xmm4
+; SSE2-NEXT: pxor %xmm11, %xmm4
+; SSE2-NEXT: movdqa %xmm9, %xmm5
+; SSE2-NEXT: pcmpgtd %xmm4, %xmm5
+; SSE2-NEXT: pand %xmm5, %xmm3
+; SSE2-NEXT: pandn %xmm10, %xmm5
+; SSE2-NEXT: por %xmm3, %xmm5
+; SSE2-NEXT: pxor %xmm2, %xmm11
+; SSE2-NEXT: pcmpgtd %xmm11, %xmm9
+; SSE2-NEXT: pand %xmm9, %xmm2
+; SSE2-NEXT: pandn %xmm10, %xmm9
+; SSE2-NEXT: por %xmm2, %xmm9
+; SSE2-NEXT: packuswb %xmm5, %xmm9
+; SSE2-NEXT: packuswb %xmm9, %xmm0
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: trunc_usat_v32i32_v32i8:
+; SSSE3: # %bb.0:
+; SSSE3-NEXT: movdqa %xmm1, %xmm8
+; SSSE3-NEXT: movdqa {{.*#+}} xmm10 = [255,255,255,255]
+; SSSE3-NEXT: movdqa {{.*#+}} xmm11 = [2147483648,2147483648,2147483648,2147483648]
+; SSSE3-NEXT: movdqa %xmm5, %xmm1
+; SSSE3-NEXT: pxor %xmm11, %xmm1
+; SSSE3-NEXT: movdqa {{.*#+}} xmm9 = [2147483903,2147483903,2147483903,2147483903]
+; SSSE3-NEXT: movdqa %xmm9, %xmm12
+; SSSE3-NEXT: pcmpgtd %xmm1, %xmm12
+; SSSE3-NEXT: pand %xmm12, %xmm5
+; SSSE3-NEXT: pandn %xmm10, %xmm12
+; SSSE3-NEXT: por %xmm5, %xmm12
+; SSSE3-NEXT: movdqa %xmm4, %xmm5
+; SSSE3-NEXT: pxor %xmm11, %xmm5
+; SSSE3-NEXT: movdqa %xmm9, %xmm1
+; SSSE3-NEXT: pcmpgtd %xmm5, %xmm1
+; SSSE3-NEXT: pand %xmm1, %xmm4
+; SSSE3-NEXT: pandn %xmm10, %xmm1
+; SSSE3-NEXT: por %xmm4, %xmm1
+; SSSE3-NEXT: packuswb %xmm12, %xmm1
+; SSSE3-NEXT: movdqa %xmm7, %xmm4
+; SSSE3-NEXT: pxor %xmm11, %xmm4
+; SSSE3-NEXT: movdqa %xmm9, %xmm5
+; SSSE3-NEXT: pcmpgtd %xmm4, %xmm5
+; SSSE3-NEXT: pand %xmm5, %xmm7
+; SSSE3-NEXT: pandn %xmm10, %xmm5
+; SSSE3-NEXT: por %xmm7, %xmm5
+; SSSE3-NEXT: movdqa %xmm6, %xmm4
+; SSSE3-NEXT: pxor %xmm11, %xmm4
+; SSSE3-NEXT: movdqa %xmm9, %xmm7
+; SSSE3-NEXT: pcmpgtd %xmm4, %xmm7
+; SSSE3-NEXT: pand %xmm7, %xmm6
+; SSSE3-NEXT: pandn %xmm10, %xmm7
+; SSSE3-NEXT: por %xmm6, %xmm7
+; SSSE3-NEXT: packuswb %xmm5, %xmm7
+; SSSE3-NEXT: packuswb %xmm7, %xmm1
+; SSSE3-NEXT: movdqa %xmm8, %xmm4
+; SSSE3-NEXT: pxor %xmm11, %xmm4
+; SSSE3-NEXT: movdqa %xmm9, %xmm5
+; SSSE3-NEXT: pcmpgtd %xmm4, %xmm5
+; SSSE3-NEXT: pand %xmm5, %xmm8
+; SSSE3-NEXT: pandn %xmm10, %xmm5
+; SSSE3-NEXT: por %xmm8, %xmm5
+; SSSE3-NEXT: movdqa %xmm0, %xmm4
+; SSSE3-NEXT: pxor %xmm11, %xmm4
+; SSSE3-NEXT: movdqa %xmm9, %xmm6
+; SSSE3-NEXT: pcmpgtd %xmm4, %xmm6
+; SSSE3-NEXT: pand %xmm6, %xmm0
+; SSSE3-NEXT: pandn %xmm10, %xmm6
+; SSSE3-NEXT: por %xmm6, %xmm0
+; SSSE3-NEXT: packuswb %xmm5, %xmm0
+; SSSE3-NEXT: movdqa %xmm3, %xmm4
+; SSSE3-NEXT: pxor %xmm11, %xmm4
+; SSSE3-NEXT: movdqa %xmm9, %xmm5
+; SSSE3-NEXT: pcmpgtd %xmm4, %xmm5
+; SSSE3-NEXT: pand %xmm5, %xmm3
+; SSSE3-NEXT: pandn %xmm10, %xmm5
+; SSSE3-NEXT: por %xmm3, %xmm5
+; SSSE3-NEXT: pxor %xmm2, %xmm11
+; SSSE3-NEXT: pcmpgtd %xmm11, %xmm9
+; SSSE3-NEXT: pand %xmm9, %xmm2
+; SSSE3-NEXT: pandn %xmm10, %xmm9
+; SSSE3-NEXT: por %xmm2, %xmm9
+; SSSE3-NEXT: packuswb %xmm5, %xmm9
+; SSSE3-NEXT: packuswb %xmm9, %xmm0
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: trunc_usat_v32i32_v32i8:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movdqa {{.*#+}} xmm8 = [255,255,255,255]
+; SSE41-NEXT: pminud %xmm8, %xmm5
+; SSE41-NEXT: pminud %xmm8, %xmm4
+; SSE41-NEXT: packusdw %xmm5, %xmm4
+; SSE41-NEXT: pminud %xmm8, %xmm7
+; SSE41-NEXT: pminud %xmm8, %xmm6
+; SSE41-NEXT: packusdw %xmm7, %xmm6
+; SSE41-NEXT: packuswb %xmm6, %xmm4
+; SSE41-NEXT: pminud %xmm8, %xmm1
+; SSE41-NEXT: pminud %xmm8, %xmm0
+; SSE41-NEXT: packusdw %xmm1, %xmm0
+; SSE41-NEXT: pminud %xmm8, %xmm3
+; SSE41-NEXT: pminud %xmm8, %xmm2
+; SSE41-NEXT: packusdw %xmm3, %xmm2
+; SSE41-NEXT: packuswb %xmm2, %xmm0
+; SSE41-NEXT: movdqa %xmm4, %xmm1
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: trunc_usat_v32i32_v32i8:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [255,255,255,255]
+; AVX1-NEXT: vpminud %xmm5, %xmm4, %xmm4
+; AVX1-NEXT: vpminud %xmm5, %xmm0, %xmm0
+; AVX1-NEXT: vpackusdw %xmm4, %xmm0, %xmm0
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpminud %xmm5, %xmm4, %xmm4
+; AVX1-NEXT: vpminud %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpackusdw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm1
+; AVX1-NEXT: vpminud %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpminud %xmm5, %xmm2, %xmm2
+; AVX1-NEXT: vpackusdw %xmm1, %xmm2, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vpminud %xmm5, %xmm2, %xmm2
+; AVX1-NEXT: vpminud %xmm5, %xmm3, %xmm3
+; AVX1-NEXT: vpackusdw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: trunc_usat_v32i32_v32i8:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255]
+; AVX2-NEXT: vpminud %ymm4, %ymm1, %ymm1
+; AVX2-NEXT: vpminud %ymm4, %ymm0, %ymm0
+; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpminud %ymm4, %ymm3, %ymm1
+; AVX2-NEXT: vpminud %ymm4, %ymm2, %ymm2
+; AVX2-NEXT: vpackusdw %ymm1, %ymm2, %ymm1
+; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,1,3]
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
+; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: trunc_usat_v32i32_v32i8:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpmovusdb %zmm0, %xmm0
+; AVX512-NEXT: vpmovusdb %zmm1, %xmm1
+; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
+ %1 = icmp ult <32 x i32> %a0, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
+ %2 = select <32 x i1> %1, <32 x i32> %a0, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
+ %3 = trunc <32 x i32> %2 to <32 x i8>
+ ret <32 x i8> %3
+}
OpenPOWER on IntegriCloud