summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2019-08-12 14:01:16 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2019-08-12 14:01:16 +0000
commit9a452740ec89f856492841af3f9c616f5be96c7c (patch)
tree1c724ca43b3528c0c96ab3303c8b8d642bbd84b7
parent489efc68a572c42f71590dd6bc0ea21d82d727b3 (diff)
downloadbcm5719-llvm-9a452740ec89f856492841af3f9c616f5be96c7c.tar.gz
bcm5719-llvm-9a452740ec89f856492841af3f9c616f5be96c7c.zip
[X86][SSE] Add test showing missing demanded elts PSADBW handling
llvm-svn: 368575
-rw-r--r--llvm/test/CodeGen/X86/psadbw.ll26
1 files changed, 26 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/psadbw.ll b/llvm/test/CodeGen/X86/psadbw.ll
index 46916898063..753e88c3dbc 100644
--- a/llvm/test/CodeGen/X86/psadbw.ll
+++ b/llvm/test/CodeGen/X86/psadbw.ll
@@ -13,5 +13,31 @@ define <2 x i64> @combine_psadbw_shift(<16 x i8> %0, <16 x i8> %1) {
ret <2 x i64> %4
}
+; Propagate the demanded result elements to the 8 aliasing source elements.
+define i64 @combine_psadbw_demandedelt(<16 x i8> %0, <16 x i8> %1) {
+; X86-LABEL: combine_psadbw_demandedelt:
+; X86: # %bb.0:
+; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
+; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,3,2]
+; X86-NEXT: psadbw %xmm0, %xmm1
+; X86-NEXT: movd %xmm1, %eax
+; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; X86-NEXT: movd %xmm0, %edx
+; X86-NEXT: retl
+;
+; X64-LABEL: combine_psadbw_demandedelt:
+; X64: # %bb.0:
+; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
+; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,3,2]
+; X64-NEXT: psadbw %xmm0, %xmm1
+; X64-NEXT: movq %xmm1, %rax
+; X64-NEXT: retq
+ %3 = shufflevector <16 x i8> %0, <16 x i8> %0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11>
+ %4 = shufflevector <16 x i8> %1, <16 x i8> %1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11>
+ %5 = tail call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %3, <16 x i8> %4)
+ %6 = extractelement <2 x i64> %5, i32 0
+ ret i64 %6
+}
+
declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>)
OpenPOWER on IntegriCloud