From 243a3d56d8726ecee55480f9faf6eaa2ec160004 Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Tue, 15 May 2018 10:24:12 +0000 Subject: [X86] Improve unsigned saturation downconvert detection. Summary: New unsigned saturation downconvert patterns detection was implemented in X86 Codegen: (truncate (smin (smax (x, C1), C2)) to dest_type), where C1 >= 0 and C2 is unsigned max of destination type. (truncate (smax (smin (x, C2), C1)) to dest_type) where C1 >= 0, C2 is unsigned max of destination type and C1 <= C2. These two patterns are equivalent to: (truncate (umin (smax(x, C1), unsigned_max_of_dest_type)) to dest_type) Reviewers: RKSimon Subscribers: llvm-commits, a.elovikov Differential Revision: https://reviews.llvm.org/D45315 llvm-svn: 332336 --- llvm/test/CodeGen/X86/avx512-trunc.ll | 45 ++++++++++------------------ llvm/test/CodeGen/X86/vector-trunc-packus.ll | 42 +++++++++----------------- 2 files changed, 29 insertions(+), 58 deletions(-) (limited to 'llvm/test') diff --git a/llvm/test/CodeGen/X86/avx512-trunc.ll b/llvm/test/CodeGen/X86/avx512-trunc.ll index 8bd29e14809..ca7e22316fb 100644 --- a/llvm/test/CodeGen/X86/avx512-trunc.ll +++ b/llvm/test/CodeGen/X86/avx512-trunc.ll @@ -790,8 +790,7 @@ define void @smax_usat_trunc_wb_256_mem1(<16 x i16> %i, <16 x i8>* %res) { ; SKX: ## %bb.0: ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; SKX-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 -; SKX-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0 -; SKX-NEXT: vpmovwb %ymm0, (%rdi) +; SKX-NEXT: vpmovuswb %ymm0, (%rdi) ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq %x1 = icmp sgt <16 x i16> %i, @@ -817,10 +816,9 @@ define void @smax_usat_trunc_wb_256_mem2(<16 x i16> %i, <16 x i8>* %res) { ; ; SKX-LABEL: smax_usat_trunc_wb_256_mem2: ; SKX: ## %bb.0: -; SKX-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; SKX-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 -; SKX-NEXT: vpmovwb %ymm0, (%rdi) +; SKX-NEXT: vpmovuswb %ymm0, (%rdi) ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq %x1 = icmp slt <16 x i16> %i, @@ -847,8 +845,7 @@ define <16 x i8> @smax_usat_trunc_wb_256(<16 x i16> %i) { ; SKX: ## %bb.0: ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; SKX-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 -; SKX-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0 -; SKX-NEXT: vpmovwb %ymm0, %xmm0 +; SKX-NEXT: vpmovuswb %ymm0, %xmm0 ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq %x1 = icmp sgt <16 x i16> %i, @@ -873,8 +870,7 @@ define void @smax_usat_trunc_wb_128_mem(<8 x i16> %i, <8 x i8>* %res) { ; SKX: ## %bb.0: ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; SKX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 -; SKX-NEXT: vpminsw {{.*}}(%rip), %xmm0, %xmm0 -; SKX-NEXT: vpmovwb %xmm0, (%rdi) +; SKX-NEXT: vpmovuswb %xmm0, (%rdi) ; SKX-NEXT: retq %x1 = icmp sgt <8 x i16> %i, %x2 = select <8 x i1> %x1, <8 x i16> %i, <8 x i16> @@ -890,8 +886,7 @@ define void @smax_usat_trunc_db_512_mem(<16 x i32> %i, <16 x i8>* %res) { ; ALL: ## %bb.0: ; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; ALL-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0 -; ALL-NEXT: vpminsd {{.*}}(%rip){1to16}, %zmm0, %zmm0 -; ALL-NEXT: vpmovdb %zmm0, (%rdi) +; ALL-NEXT: vpmovusdb %zmm0, (%rdi) ; ALL-NEXT: vzeroupper ; ALL-NEXT: retq %x1 = icmp sgt <16 x i32> %i, @@ -908,8 +903,7 @@ define void @smax_usat_trunc_qb_512_mem(<8 x i64> %i, <8 x i8>* %res) { ; ALL: ## %bb.0: ; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; ALL-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 -; ALL-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; ALL-NEXT: vpmovqb %zmm0, (%rdi) +; ALL-NEXT: vpmovusqb %zmm0, (%rdi) ; ALL-NEXT: vzeroupper ; ALL-NEXT: retq %x1 = icmp sgt <8 x i64> %i, @@ -926,8 +920,7 @@ define void @smax_usat_trunc_qd_512_mem(<8 x i64> %i, <8 x i32>* %res) { ; ALL: ## %bb.0: ; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; ALL-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 -; ALL-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; ALL-NEXT: vpmovqd %zmm0, (%rdi) +; ALL-NEXT: vpmovusqd %zmm0, (%rdi) ; ALL-NEXT: vzeroupper ; ALL-NEXT: retq %x1 = icmp sgt <8 x i64> %i, @@ -944,8 +937,7 @@ define void @smax_usat_trunc_qw_512_mem(<8 x i64> %i, <8 x i16>* %res) { ; ALL: ## %bb.0: ; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; ALL-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 -; ALL-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; ALL-NEXT: vpmovqw %zmm0, (%rdi) +; ALL-NEXT: vpmovusqw %zmm0, (%rdi) ; ALL-NEXT: vzeroupper ; ALL-NEXT: retq %x1 = icmp sgt <8 x i64> %i, @@ -961,13 +953,10 @@ define <32 x i8> @smax_usat_trunc_db_1024(<32 x i32> %i) { ; KNL-LABEL: smax_usat_trunc_db_1024: ; KNL: ## %bb.0: ; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; KNL-NEXT: vpmaxsd %zmm2, %zmm0, %zmm0 ; KNL-NEXT: vpmaxsd %zmm2, %zmm1, %zmm1 -; KNL-NEXT: vpbroadcastd {{.*#+}} zmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; KNL-NEXT: vpminsd %zmm2, %zmm1, %zmm1 -; KNL-NEXT: vpminsd %zmm2, %zmm0, %zmm0 -; KNL-NEXT: vpmovdb %zmm0, %xmm0 -; KNL-NEXT: vpmovdb %zmm1, %xmm1 +; KNL-NEXT: vpmaxsd %zmm2, %zmm0, %zmm0 +; KNL-NEXT: vpmovusdb %zmm0, %xmm0 +; KNL-NEXT: vpmovusdb %zmm1, %xmm1 ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; KNL-NEXT: retq ; @@ -996,13 +985,10 @@ define void @smax_usat_trunc_db_1024_mem(<32 x i32> %i, <32 x i8>* %p) { ; KNL-LABEL: smax_usat_trunc_db_1024_mem: ; KNL: ## %bb.0: ; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; KNL-NEXT: vpmaxsd %zmm2, %zmm0, %zmm0 ; KNL-NEXT: vpmaxsd %zmm2, %zmm1, %zmm1 -; KNL-NEXT: vpbroadcastd {{.*#+}} zmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; KNL-NEXT: vpminsd %zmm2, %zmm1, %zmm1 -; KNL-NEXT: vpminsd %zmm2, %zmm0, %zmm0 -; KNL-NEXT: vpmovdb %zmm0, %xmm0 -; KNL-NEXT: vpmovdb %zmm1, %xmm1 +; KNL-NEXT: vpmaxsd %zmm2, %zmm0, %zmm0 +; KNL-NEXT: vpmovusdb %zmm0, %xmm0 +; KNL-NEXT: vpmovusdb %zmm1, %xmm1 ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; KNL-NEXT: vmovdqu %ymm0, (%rdi) ; KNL-NEXT: vzeroupper @@ -1036,8 +1022,7 @@ define <16 x i16> @smax_usat_trunc_dw_512(<16 x i32> %i) { ; ALL: ## %bb.0: ; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; ALL-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0 -; ALL-NEXT: vpminsd {{.*}}(%rip){1to16}, %zmm0, %zmm0 -; ALL-NEXT: vpmovdw %zmm0, %ymm0 +; ALL-NEXT: vpmovusdw %zmm0, %ymm0 ; ALL-NEXT: retq %x1 = icmp sgt <16 x i32> %i, %x2 = select <16 x i1> %x1, <16 x i32> %i, <16 x i32> diff --git a/llvm/test/CodeGen/X86/vector-trunc-packus.ll b/llvm/test/CodeGen/X86/vector-trunc-packus.ll index f47425b92c4..afd692866db 100644 --- a/llvm/test/CodeGen/X86/vector-trunc-packus.ll +++ b/llvm/test/CodeGen/X86/vector-trunc-packus.ll @@ -244,10 +244,9 @@ define <4 x i32> @trunc_packus_v4i64_v4i32(<4 x i64> %a0) { ; ; AVX512VL-LABEL: trunc_packus_v4i64_v4i32: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpminsq {{.*}}(%rip){1to4}, %ymm0, %ymm0 ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512VL-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0 -; AVX512VL-NEXT: vpmovqd %ymm0, %xmm0 +; AVX512VL-NEXT: vpmovusqd %ymm0, %xmm0 ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq ; @@ -265,10 +264,9 @@ define <4 x i32> @trunc_packus_v4i64_v4i32(<4 x i64> %a0) { ; ; AVX512BWVL-LABEL: trunc_packus_v4i64_v4i32: ; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vpminsq {{.*}}(%rip){1to4}, %ymm0, %ymm0 ; AVX512BWVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512BWVL-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0 -; AVX512BWVL-NEXT: vpmovqd %ymm0, %xmm0 +; AVX512BWVL-NEXT: vpmovusqd %ymm0, %xmm0 ; AVX512BWVL-NEXT: vzeroupper ; AVX512BWVL-NEXT: retq %1 = icmp slt <4 x i64> %a0, @@ -665,10 +663,9 @@ define <8 x i32> @trunc_packus_v8i64_v8i32(<8 x i64> %a0) { ; ; AVX512-LABEL: trunc_packus_v8i64_v8i32: ; AVX512: # %bb.0: -; AVX512-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 ; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vpmovqd %zmm0, %ymm0 +; AVX512-NEXT: vpmovusqd %zmm0, %ymm0 ; AVX512-NEXT: retq %1 = icmp slt <8 x i64> %a0, %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> @@ -1070,10 +1067,9 @@ define <8 x i16> @trunc_packus_v8i64_v8i16(<8 x i64> %a0) { ; ; AVX512-LABEL: trunc_packus_v8i64_v8i16: ; AVX512: # %bb.0: -; AVX512-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 ; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512-NEXT: vpmovusqw %zmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = icmp slt <8 x i64> %a0, @@ -1170,10 +1166,9 @@ define <8 x i16> @trunc_packus_v8i32_v8i16(<8 x i32> %a0) { ; ; AVX512VL-LABEL: trunc_packus_v8i32_v8i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpminsd {{.*}}(%rip){1to8}, %ymm0, %ymm0 ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512VL-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 -; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0 +; AVX512VL-NEXT: vpmovusdw %ymm0, %xmm0 ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq ; @@ -1190,10 +1185,9 @@ define <8 x i16> @trunc_packus_v8i32_v8i16(<8 x i32> %a0) { ; ; AVX512BWVL-LABEL: trunc_packus_v8i32_v8i16: ; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vpminsd {{.*}}(%rip){1to8}, %ymm0, %ymm0 ; AVX512BWVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512BWVL-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 -; AVX512BWVL-NEXT: vpmovdw %ymm0, %xmm0 +; AVX512BWVL-NEXT: vpmovusdw %ymm0, %xmm0 ; AVX512BWVL-NEXT: vzeroupper ; AVX512BWVL-NEXT: retq %1 = icmp slt <8 x i32> %a0, @@ -1325,10 +1319,9 @@ define <16 x i16> @trunc_packus_v16i32_v16i16(<16 x i32> %a0) { ; ; AVX512-LABEL: trunc_packus_v16i32_v16i16: ; AVX512: # %bb.0: -; AVX512-NEXT: vpminsd {{.*}}(%rip){1to16}, %zmm0, %zmm0 ; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512-NEXT: vpmovusdw %zmm0, %ymm0 ; AVX512-NEXT: retq %1 = icmp slt <16 x i32> %a0, %2 = select <16 x i1> %1, <16 x i32> %a0, <16 x i32> @@ -2130,10 +2123,9 @@ define void @trunc_packus_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) { ; ; AVX512-LABEL: trunc_packus_v8i64_v8i8_store: ; AVX512: # %bb.0: -; AVX512-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 ; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vpmovqb %zmm0, (%rdi) +; AVX512-NEXT: vpmovusqb %zmm0, (%rdi) ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = icmp slt <8 x i64> %a0, @@ -3042,10 +3034,9 @@ define void @trunc_packus_v8i32_v8i8_store(<8 x i32> %a0, <8 x i8> *%p1) { ; ; AVX512VL-LABEL: trunc_packus_v8i32_v8i8_store: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpminsd {{.*}}(%rip){1to8}, %ymm0, %ymm0 ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512VL-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 -; AVX512VL-NEXT: vpmovdb %ymm0, (%rdi) +; AVX512VL-NEXT: vpmovusdb %ymm0, (%rdi) ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq ; @@ -3063,10 +3054,9 @@ define void @trunc_packus_v8i32_v8i8_store(<8 x i32> %a0, <8 x i8> *%p1) { ; ; AVX512BWVL-LABEL: trunc_packus_v8i32_v8i8_store: ; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vpminsd {{.*}}(%rip){1to8}, %ymm0, %ymm0 ; AVX512BWVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512BWVL-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 -; AVX512BWVL-NEXT: vpmovdb %ymm0, (%rdi) +; AVX512BWVL-NEXT: vpmovusdb %ymm0, (%rdi) ; AVX512BWVL-NEXT: vzeroupper ; AVX512BWVL-NEXT: retq %1 = icmp slt <8 x i32> %a0, @@ -3107,10 +3097,9 @@ define <16 x i8> @trunc_packus_v16i32_v16i8(<16 x i32> %a0) { ; ; AVX512-LABEL: trunc_packus_v16i32_v16i8: ; AVX512: # %bb.0: -; AVX512-NEXT: vpminsd {{.*}}(%rip){1to16}, %zmm0, %zmm0 ; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512-NEXT: vpmovusdb %zmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = icmp slt <16 x i32> %a0, @@ -3173,10 +3162,9 @@ define <16 x i8> @trunc_packus_v16i16_v16i8(<16 x i16> %a0) { ; ; AVX512BWVL-LABEL: trunc_packus_v16i16_v16i8: ; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0 ; AVX512BWVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512BWVL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 -; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0 +; AVX512BWVL-NEXT: vpmovuswb %ymm0, %xmm0 ; AVX512BWVL-NEXT: vzeroupper ; AVX512BWVL-NEXT: retq %1 = icmp slt <16 x i16> %a0, @@ -3242,18 +3230,16 @@ define <32 x i8> @trunc_packus_v32i16_v32i8(<32 x i16> %a0) { ; ; AVX512BW-LABEL: trunc_packus_v32i16_v32i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpminsw {{.*}}(%rip), %zmm0, %zmm0 ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512BW-NEXT: vpmaxsw %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 +; AVX512BW-NEXT: vpmovuswb %zmm0, %ymm0 ; AVX512BW-NEXT: retq ; ; AVX512BWVL-LABEL: trunc_packus_v32i16_v32i8: ; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vpminsw {{.*}}(%rip), %zmm0, %zmm0 ; AVX512BWVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512BWVL-NEXT: vpmaxsw %zmm1, %zmm0, %zmm0 -; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0 +; AVX512BWVL-NEXT: vpmovuswb %zmm0, %ymm0 ; AVX512BWVL-NEXT: retq %1 = icmp slt <32 x i16> %a0, %2 = select <32 x i1> %1, <32 x i16> %a0, <32 x i16> -- cgit v1.2.3