summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86
diff options
context:
space:
mode:
authorElena Demikhovsky <elena.demikhovsky@intel.com>2017-01-11 12:59:32 +0000
committerElena Demikhovsky <elena.demikhovsky@intel.com>2017-01-11 12:59:32 +0000
commit9d0e7c33d3fd35393d3f4de23fad6b33e7699c74 (patch)
tree3e5c43fc44458e64edd68e5fc6dab478ed644b0d /llvm/test/CodeGen/X86
parentffdd0728584319ad800803a3f3328838d832bdfa (diff)
downloadbcm5719-llvm-9d0e7c33d3fd35393d3f4de23fad6b33e7699c74.tar.gz
bcm5719-llvm-9d0e7c33d3fd35393d3f4de23fad6b33e7699c74.zip
X86 CodeGen: Optimized pattern for truncate with unsigned saturation.
DAG patterns optimization: truncate + unsigned saturation supported by VPMOVUS* instructions in AVX-512. And VPACKUS* instructions on SEE* targets. Differential Revision: https://reviews.llvm.org/D28216 llvm-svn: 291670
Diffstat (limited to 'llvm/test/CodeGen/X86')
-rw-r--r--llvm/test/CodeGen/X86/avx-trunc.ll26
-rw-r--r--llvm/test/CodeGen/X86/avx512-trunc.ll205
2 files changed, 231 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/avx-trunc.ll b/llvm/test/CodeGen/X86/avx-trunc.ll
index 789ca241394..c729b988cfb 100644
--- a/llvm/test/CodeGen/X86/avx-trunc.ll
+++ b/llvm/test/CodeGen/X86/avx-trunc.ll
@@ -39,3 +39,29 @@ define <16 x i8> @trunc_16_8(<16 x i16> %A) nounwind uwtable readnone ssp{
%B = trunc <16 x i16> %A to <16 x i8>
ret <16 x i8> %B
}
+
+define <16 x i8> @usat_trunc_wb_256(<16 x i16> %i) {
+; CHECK-LABEL: usat_trunc_wb_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
+; CHECK-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %x3 = icmp ult <16 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+ %x5 = select <16 x i1> %x3, <16 x i16> %i, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+ %x6 = trunc <16 x i16> %x5 to <16 x i8>
+ ret <16 x i8> %x6
+}
+
+define <8 x i16> @usat_trunc_dw_256(<8 x i32> %i) {
+; CHECK-LABEL: usat_trunc_dw_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
+; CHECK-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %x3 = icmp ult <8 x i32> %i, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
+ %x5 = select <8 x i1> %x3, <8 x i32> %i, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
+ %x6 = trunc <8 x i32> %x5 to <8 x i16>
+ ret <8 x i16> %x6
+}
diff --git a/llvm/test/CodeGen/X86/avx512-trunc.ll b/llvm/test/CodeGen/X86/avx512-trunc.ll
index 646697b82c2..fb6c55b26e7 100644
--- a/llvm/test/CodeGen/X86/avx512-trunc.ll
+++ b/llvm/test/CodeGen/X86/avx512-trunc.ll
@@ -500,3 +500,208 @@ define void @trunc_wb_128_mem(<8 x i16> %i, <8 x i8>* %res) #0 {
store <8 x i8> %x, <8 x i8>* %res
ret void
}
+
+
+define void @usat_trunc_wb_256_mem(<16 x i16> %i, <16 x i8>* %res) {
+; KNL-LABEL: usat_trunc_wb_256_mem:
+; KNL: ## BB#0:
+; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; KNL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
+; KNL-NEXT: vmovdqu %xmm0, (%rdi)
+; KNL-NEXT: retq
+;
+; SKX-LABEL: usat_trunc_wb_256_mem:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovuswb %ymm0, (%rdi)
+; SKX-NEXT: retq
+ %x3 = icmp ult <16 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+ %x5 = select <16 x i1> %x3, <16 x i16> %i, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+ %x6 = trunc <16 x i16> %x5 to <16 x i8>
+ store <16 x i8> %x6, <16 x i8>* %res, align 1
+ ret void
+}
+
+define <16 x i8> @usat_trunc_wb_256(<16 x i16> %i) {
+; KNL-LABEL: usat_trunc_wb_256:
+; KNL: ## BB#0:
+; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; KNL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: usat_trunc_wb_256:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovuswb %ymm0, %xmm0
+; SKX-NEXT: retq
+ %x3 = icmp ult <16 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+ %x5 = select <16 x i1> %x3, <16 x i16> %i, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+ %x6 = trunc <16 x i16> %x5 to <16 x i8>
+ ret <16 x i8> %x6
+}
+
+define void @usat_trunc_wb_128_mem(<8 x i16> %i, <8 x i8>* %res) {
+; KNL-LABEL: usat_trunc_wb_128_mem:
+; KNL: ## BB#0:
+; KNL-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0
+; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; KNL-NEXT: vmovq %xmm0, (%rdi)
+; KNL-NEXT: retq
+;
+; SKX-LABEL: usat_trunc_wb_128_mem:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovuswb %xmm0, (%rdi)
+; SKX-NEXT: retq
+ %x3 = icmp ult <8 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+ %x5 = select <8 x i1> %x3, <8 x i16> %i, <8 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+ %x6 = trunc <8 x i16> %x5 to <8 x i8>
+ store <8 x i8> %x6, <8 x i8>* %res, align 1
+ ret void
+}
+
+define void @usat_trunc_db_512_mem(<16 x i32> %i, <16 x i8>* %res) {
+; ALL-LABEL: usat_trunc_db_512_mem:
+; ALL: ## BB#0:
+; ALL-NEXT: vpmovusdb %zmm0, (%rdi)
+; ALL-NEXT: retq
+ %x3 = icmp ult <16 x i32> %i, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
+ %x5 = select <16 x i1> %x3, <16 x i32> %i, <16 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
+ %x6 = trunc <16 x i32> %x5 to <16 x i8>
+ store <16 x i8> %x6, <16 x i8>* %res, align 1
+ ret void
+}
+
+define void @usat_trunc_qb_512_mem(<8 x i64> %i, <8 x i8>* %res) {
+; ALL-LABEL: usat_trunc_qb_512_mem:
+; ALL: ## BB#0:
+; ALL-NEXT: vpmovusqb %zmm0, (%rdi)
+; ALL-NEXT: retq
+ %x3 = icmp ult <8 x i64> %i, <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255>
+ %x5 = select <8 x i1> %x3, <8 x i64> %i, <8 x i64> <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255>
+ %x6 = trunc <8 x i64> %x5 to <8 x i8>
+ store <8 x i8> %x6, <8 x i8>* %res, align 1
+ ret void
+}
+
+define void @usat_trunc_qd_512_mem(<8 x i64> %i, <8 x i32>* %res) {
+; ALL-LABEL: usat_trunc_qd_512_mem:
+; ALL: ## BB#0:
+; ALL-NEXT: vpmovusqd %zmm0, (%rdi)
+; ALL-NEXT: retq
+ %x3 = icmp ult <8 x i64> %i, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+ %x5 = select <8 x i1> %x3, <8 x i64> %i, <8 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+ %x6 = trunc <8 x i64> %x5 to <8 x i32>
+ store <8 x i32> %x6, <8 x i32>* %res, align 1
+ ret void
+}
+
+define void @usat_trunc_qw_512_mem(<8 x i64> %i, <8 x i16>* %res) {
+; ALL-LABEL: usat_trunc_qw_512_mem:
+; ALL: ## BB#0:
+; ALL-NEXT: vpmovusqw %zmm0, (%rdi)
+; ALL-NEXT: retq
+ %x3 = icmp ult <8 x i64> %i, <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
+ %x5 = select <8 x i1> %x3, <8 x i64> %i, <8 x i64> <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
+ %x6 = trunc <8 x i64> %x5 to <8 x i16>
+ store <8 x i16> %x6, <8 x i16>* %res, align 1
+ ret void
+}
+
+define <32 x i8> @usat_trunc_db_1024(<32 x i32> %i) {
+; KNL-LABEL: usat_trunc_db_1024:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovusdb %zmm0, %xmm0
+; KNL-NEXT: vpmovusdb %zmm1, %xmm1
+; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: usat_trunc_db_1024:
+; SKX: ## BB#0:
+; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm2
+; SKX-NEXT: vpminud %zmm2, %zmm1, %zmm1
+; SKX-NEXT: vpminud %zmm2, %zmm0, %zmm0
+; SKX-NEXT: vpmovdw %zmm0, %ymm0
+; SKX-NEXT: vpmovdw %zmm1, %ymm1
+; SKX-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; SKX-NEXT: vpmovwb %zmm0, %ymm0
+; SKX-NEXT: retq
+ %x3 = icmp ult <32 x i32> %i, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
+ %x5 = select <32 x i1> %x3, <32 x i32> %i, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
+ %x6 = trunc <32 x i32> %x5 to <32 x i8>
+ ret <32 x i8> %x6
+}
+
+define void @usat_trunc_db_1024_mem(<32 x i32> %i, <32 x i8>* %p) {
+; KNL-LABEL: usat_trunc_db_1024_mem:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovusdb %zmm0, %xmm0
+; KNL-NEXT: vpmovusdb %zmm1, %xmm1
+; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; KNL-NEXT: vmovdqu %ymm0, (%rdi)
+; KNL-NEXT: retq
+;
+; SKX-LABEL: usat_trunc_db_1024_mem:
+; SKX: ## BB#0:
+; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm2
+; SKX-NEXT: vpminud %zmm2, %zmm1, %zmm1
+; SKX-NEXT: vpminud %zmm2, %zmm0, %zmm0
+; SKX-NEXT: vpmovdw %zmm0, %ymm0
+; SKX-NEXT: vpmovdw %zmm1, %ymm1
+; SKX-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; SKX-NEXT: vpmovwb %zmm0, (%rdi)
+; SKX-NEXT: retq
+ %x3 = icmp ult <32 x i32> %i, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
+ %x5 = select <32 x i1> %x3, <32 x i32> %i, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
+ %x6 = trunc <32 x i32> %x5 to <32 x i8>
+ store <32 x i8>%x6, <32 x i8>* %p, align 1
+ ret void
+}
+
+define <16 x i16> @usat_trunc_dw_512(<16 x i32> %i) {
+; ALL-LABEL: usat_trunc_dw_512:
+; ALL: ## BB#0:
+; ALL-NEXT: vpmovusdw %zmm0, %ymm0
+; ALL-NEXT: retq
+ %x3 = icmp ult <16 x i32> %i, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
+ %x5 = select <16 x i1> %x3, <16 x i32> %i, <16 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
+ %x6 = trunc <16 x i32> %x5 to <16 x i16>
+ ret <16 x i16> %x6
+}
+
+define <8 x i8> @usat_trunc_wb_128(<8 x i16> %i) {
+; ALL-LABEL: usat_trunc_wb_128:
+; ALL: ## BB#0:
+; ALL-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0
+; ALL-NEXT: retq
+ %x3 = icmp ult <8 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+ %x5 = select <8 x i1> %x3, <8 x i16> %i, <8 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+ %x6 = trunc <8 x i16> %x5 to <8 x i8>
+ ret <8 x i8>%x6
+}
+
+define <16 x i16> @usat_trunc_qw_1024(<16 x i64> %i) {
+; KNL-LABEL: usat_trunc_qw_1024:
+; KNL: ## BB#0:
+; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm2
+; KNL-NEXT: vpminuq %zmm2, %zmm1, %zmm1
+; KNL-NEXT: vpminuq %zmm2, %zmm0, %zmm0
+; KNL-NEXT: vpmovqd %zmm0, %ymm0
+; KNL-NEXT: vpmovqd %zmm1, %ymm1
+; KNL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; KNL-NEXT: vpmovdw %zmm0, %ymm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: usat_trunc_qw_1024:
+; SKX: ## BB#0:
+; SKX-NEXT: vpbroadcastq {{.*}}(%rip), %zmm2
+; SKX-NEXT: vpminuq %zmm2, %zmm1, %zmm1
+; SKX-NEXT: vpminuq %zmm2, %zmm0, %zmm0
+; SKX-NEXT: vpmovqd %zmm0, %ymm0
+; SKX-NEXT: vpmovqd %zmm1, %ymm1
+; SKX-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0
+; SKX-NEXT: vpmovdw %zmm0, %ymm0
+; SKX-NEXT: retq
+ %x3 = icmp ult <16 x i64> %i, <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
+ %x5 = select <16 x i1> %x3, <16 x i64> %i, <16 x i64> <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
+ %x6 = trunc <16 x i64> %x5 to <16 x i16>
+ ret <16 x i16> %x6
+}
+
OpenPOWER on IntegriCloud