summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2019-08-12 14:01:16 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2019-08-12 14:01:16 +0000
commit9a452740ec89f856492841af3f9c616f5be96c7c (patch)
tree1c724ca43b3528c0c96ab3303c8b8d642bbd84b7 /llvm/test
parent489efc68a572c42f71590dd6bc0ea21d82d727b3 (diff)
downloadbcm5719-llvm-9a452740ec89f856492841af3f9c616f5be96c7c.tar.gz
bcm5719-llvm-9a452740ec89f856492841af3f9c616f5be96c7c.zip
[X86][SSE] Add test showing missing demanded elts PSADBW handling
llvm-svn: 368575
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/X86/psadbw.ll26
1 files changed, 26 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/psadbw.ll b/llvm/test/CodeGen/X86/psadbw.ll
index 46916898063..753e88c3dbc 100644
--- a/llvm/test/CodeGen/X86/psadbw.ll
+++ b/llvm/test/CodeGen/X86/psadbw.ll
@@ -13,5 +13,31 @@ define <2 x i64> @combine_psadbw_shift(<16 x i8> %0, <16 x i8> %1) {
ret <2 x i64> %4
}
+; Propagate the demanded result elements to the 8 aliasing source elements.
+define i64 @combine_psadbw_demandedelt(<16 x i8> %0, <16 x i8> %1) {
+; X86-LABEL: combine_psadbw_demandedelt:
+; X86: # %bb.0:
+; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
+; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,3,2]
+; X86-NEXT: psadbw %xmm0, %xmm1
+; X86-NEXT: movd %xmm1, %eax
+; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; X86-NEXT: movd %xmm0, %edx
+; X86-NEXT: retl
+;
+; X64-LABEL: combine_psadbw_demandedelt:
+; X64: # %bb.0:
+; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
+; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,3,2]
+; X64-NEXT: psadbw %xmm0, %xmm1
+; X64-NEXT: movq %xmm1, %rax
+; X64-NEXT: retq
+ %3 = shufflevector <16 x i8> %0, <16 x i8> %0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11>
+ %4 = shufflevector <16 x i8> %1, <16 x i8> %1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11>
+ %5 = tail call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %3, <16 x i8> %4)
+ %6 = extractelement <2 x i64> %5, i32 0
+ ret i64 %6
+}
+
declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>)
OpenPOWER on IntegriCloud