summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86
diff options
context:
space:
mode:
authorAhmed Bougacha <ahmed.bougacha@gmail.com>2016-06-29 16:56:09 +0000
committerAhmed Bougacha <ahmed.bougacha@gmail.com>2016-06-29 16:56:09 +0000
commit15a2f6d58c97f481dc82130f87d98f76381f5e8b (patch)
tree8162a26028597a1f1b5516c9b57ea2c26a10c006 /llvm/test/CodeGen/X86
parent84cfb884e28fce0571e8f04c76f42546df805072 (diff)
downloadbcm5719-llvm-15a2f6d58c97f481dc82130f87d98f76381f5e8b.tar.gz
bcm5719-llvm-15a2f6d58c97f481dc82130f87d98f76381f5e8b.zip
[X86] Lower blended PACKUSes using appropriate types.
When lowering two blended PACKUS, we used to disregard the types of the PACKUS inputs, indiscriminately generating a v16i8 PACKUS. This leads to non-selectable things like: (v16i8 (PACKUS (v4i32 v0), (v4i32 v1))) Instead, check that the PACKUSes have the same type, and use that as the final result type. llvm-svn: 274138
Diffstat (limited to 'llvm/test/CodeGen/X86')
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-sse41.ll59
1 files changed, 59 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-sse41.ll b/llvm/test/CodeGen/X86/vector-shuffle-sse41.ll
new file mode 100644
index 00000000000..be9a4b95077
--- /dev/null
+++ b/llvm/test/CodeGen/X86/vector-shuffle-sse41.ll
@@ -0,0 +1,59 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX
+
+define <8 x i16> @blend_packusdw(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> %a3) {
+; SSE41-LABEL: blend_packusdw:
+; SSE41: # BB#0:
+; SSE41-NEXT: packusdw %xmm2, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: blend_packusdw:
+; AVX: # BB#0:
+; AVX-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %p0 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1)
+ %p1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a2, <4 x i32> %a3)
+ %s0 = shufflevector <8 x i16> %p0, <8 x i16> %p1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+ ret <8 x i16> %s0
+}
+
+define <16 x i8> @blend_packuswb(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2, <8 x i16> %a3) {
+; SSE41-LABEL: blend_packuswb:
+; SSE41: # BB#0:
+; SSE41-NEXT: packuswb %xmm2, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: blend_packuswb:
+; AVX: # BB#0:
+; AVX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %p0 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1)
+ %p1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a2, <8 x i16> %a3)
+ %s0 = shufflevector <16 x i8> %p0, <16 x i8> %p1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+ ret <16 x i8> %s0
+}
+
+define <8 x i16> @blend_packusdw_packuswb(<4 x i32> %a0, <4 x i32> %a1, <8 x i16> %a2, <8 x i16> %a3) {
+; SSE41-LABEL: blend_packusdw_packuswb:
+; SSE41: # BB#0:
+; SSE41-NEXT: packusdw %xmm1, %xmm0
+; SSE41-NEXT: packuswb %xmm3, %xmm2
+; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: blend_packusdw_packuswb:
+; AVX: # BB#0:
+; AVX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpackuswb %xmm3, %xmm2, %xmm1
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT: retq
+ %p0 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1)
+ %p1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a2, <8 x i16> %a3)
+ %b1 = bitcast <16 x i8> %p1 to <8 x i16>
+ %s0 = shufflevector <8 x i16> %p0, <8 x i16> %b1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+ ret <8 x i16> %s0
+}
+
+declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>)
OpenPOWER on IntegriCloud