summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2017-10-17 21:14:53 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2017-10-17 21:14:53 +0000
commit7cd4e2c96f931ebb6fab4e2bb20e89bc9de7c9e9 (patch)
tree158c3db79ed3c0839c94596b9f244064e6c06720 /llvm
parent3dc67a1d8a87772c5c7302cb6bda3a24db0c3b88 (diff)
downloadbcm5719-llvm-7cd4e2c96f931ebb6fab4e2bb20e89bc9de7c9e9.tar.gz
bcm5719-llvm-7cd4e2c96f931ebb6fab4e2bb20e89bc9de7c9e9.zip
[X86][SSE] Tests packuswb/truncation codegen from PR34773
llvm-svn: 316033
Diffstat (limited to 'llvm')
-rw-r--r--llvm/test/CodeGen/X86/vector-trunc.ll120
1 files changed, 120 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/vector-trunc.ll b/llvm/test/CodeGen/X86/vector-trunc.ll
index 9729f4a4b6e..30aaff2f7ea 100644
--- a/llvm/test/CodeGen/X86/vector-trunc.ll
+++ b/llvm/test/CodeGen/X86/vector-trunc.ll
@@ -1931,3 +1931,123 @@ entry:
ret <16 x i8> %1
}
+define void @PR34773(i16* %a0, i8* %a1) {
+; SSE-LABEL: PR34773:
+; SSE: # BB#0:
+; SSE-NEXT: movdqu (%rdi), %xmm0
+; SSE-NEXT: movdqu 16(%rdi), %xmm1
+; SSE-NEXT: movdqu 32(%rdi), %xmm2
+; SSE-NEXT: movdqu 48(%rdi), %xmm3
+; SSE-NEXT: psrlw $8, %xmm1
+; SSE-NEXT: psrlw $8, %xmm0
+; SSE-NEXT: packuswb %xmm1, %xmm0
+; SSE-NEXT: psrlw $8, %xmm3
+; SSE-NEXT: psrlw $8, %xmm2
+; SSE-NEXT: packuswb %xmm3, %xmm2
+; SSE-NEXT: movdqu %xmm0, (%rsi)
+; SSE-NEXT: movdqu %xmm2, 16(%rsi)
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: PR34773:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovdqu (%rdi), %ymm0
+; AVX1-NEXT: vmovdqu 32(%rdi), %ymm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = <1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u>
+; AVX1-NEXT: vpshufb %xmm4, %xmm0, %xmm0
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
+; AVX1-NEXT: vpshufb %xmm5, %xmm2, %xmm2
+; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX1-NEXT: vpshufb %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm2
+; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vmovups %ymm0, (%rsi)
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: PR34773:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovdqu (%rdi), %ymm0
+; AVX2-NEXT: vmovdqu 32(%rdi), %ymm1
+; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0
+; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
+; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
+; AVX2-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vmovdqu %ymm0, (%rsi)
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: PR34773:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqu (%rdi), %ymm0
+; AVX512F-NEXT: vmovdqu 32(%rdi), %ymm1
+; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0
+; AVX512F-NEXT: vpsrlw $8, %ymm1, %ymm1
+; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
+; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1
+; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
+; AVX512F-NEXT: vmovdqu %xmm0, (%rsi)
+; AVX512F-NEXT: vmovdqu %xmm1, 16(%rsi)
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: PR34773:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vmovdqu (%rdi), %ymm0
+; AVX512VL-NEXT: vmovdqu 32(%rdi), %ymm1
+; AVX512VL-NEXT: vpsrlw $8, %ymm0, %ymm0
+; AVX512VL-NEXT: vpsrlw $8, %ymm1, %ymm1
+; AVX512VL-NEXT: vpmovsxwd %ymm0, %zmm0
+; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512VL-NEXT: vpmovsxwd %ymm1, %zmm1
+; AVX512VL-NEXT: vpmovdb %zmm1, %xmm1
+; AVX512VL-NEXT: vmovdqu %xmm0, (%rsi)
+; AVX512VL-NEXT: vmovdqu %xmm1, 16(%rsi)
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: PR34773:
+; AVX512BW: # BB#0:
+; AVX512BW-NEXT: vmovdqu (%rdi), %ymm0
+; AVX512BW-NEXT: vmovdqu 32(%rdi), %ymm1
+; AVX512BW-NEXT: vpsrlw $8, %ymm0, %ymm0
+; AVX512BW-NEXT: vpsrlw $8, %ymm1, %ymm1
+; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
+; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1
+; AVX512BW-NEXT: vmovdqu %xmm0, (%rsi)
+; AVX512BW-NEXT: vmovdqu %xmm1, 16(%rsi)
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512BWVL-LABEL: PR34773:
+; AVX512BWVL: # BB#0:
+; AVX512BWVL-NEXT: vpsrlw $8, (%rdi), %ymm0
+; AVX512BWVL-NEXT: vpsrlw $8, 32(%rdi), %ymm1
+; AVX512BWVL-NEXT: vpmovwb %ymm0, (%rsi)
+; AVX512BWVL-NEXT: vpmovwb %ymm1, 16(%rsi)
+; AVX512BWVL-NEXT: vzeroupper
+; AVX512BWVL-NEXT: retq
+ %1 = getelementptr i16, i16* %a0, i64 16
+ %2 = getelementptr i8, i8* %a1, i64 16
+ %3 = bitcast i16* %a0 to <16 x i16>*
+ %4 = bitcast i16* %1 to <16 x i16>*
+ %5 = bitcast i8* %a1 to <16 x i8>*
+ %6 = bitcast i8* %2 to <16 x i8>*
+ %7 = load <16 x i16>, <16 x i16>* %3, align 2
+ %8 = load <16 x i16>, <16 x i16>* %4, align 2
+ %9 = lshr <16 x i16> %7, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+ %10 = lshr <16 x i16> %8, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+ %11 = trunc <16 x i16> %9 to <16 x i8>
+ %12 = trunc <16 x i16> %10 to <16 x i8>
+ store <16 x i8> %11, <16 x i8>* %5, align 1
+ store <16 x i8> %12, <16 x i8>* %6, align 1
+ ret void
+}
OpenPOWER on IntegriCloud