[CGP / PowerPC] avoid multi-block overhead for simple memcmp expansion

The test diff for PowerPC shows we can better optimize if this case is one block. For x86, there's would be a substantial difference if CGP expansion was enabled because branches are assumed cheap and SDAG can't optimize across blocks. Instead of this: _cmp_eq8: movq (%rdi), %rax cmpq (%rsi), %rax je LBB23_1 ## BB#2: ## %res_block movl $1, %ecx jmp LBB23_3 LBB23_1: xorl %ecx, %ecx LBB23_3: ## %endblock xorl %eax, %eax testl %ecx, %ecx sete %al retq We get this: cmp_eq8: movq (%rdi), %rcx xorl %eax, %eax cmpq (%rsi), %rcx sete %al retq And that matches the optimal codegen that we get from the current expansion in SelectionDAGBuilder::visitMemCmpCall(). If this looks right, then I just need to confirm that vector-sized expansion will work from here, and we can enable CGP memcmp() expansion for x86. Ie, we'll bypass the power-of-2 special cases currently optimized in SDAG because we can lower the IR produced here optimally. Differential Revision: https://reviews.llvm.org/D34005 llvm-svn: 304987
author: Sanjay Patel <spatel@rotateright.com> 2017-06-08 16:53:18 +0000
committer: Sanjay Patel <spatel@rotateright.com> 2017-06-08 16:53:18 +0000
commit: e7c5041c2ae10b4da0526d5abdc5cbcd647f1b45 (patch)
tree: 3761f6a721bd3583f07f40fb2fdb3bd7fe9767c0 /llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll
parent: 8cb1d0931f2749a9731e1a59f869a509baa83640 (diff)
download: bcm5719-llvm-e7c5041c2ae10b4da0526d5abdc5cbcd647f1b45.tar.gz
bcm5719-llvm-e7c5041c2ae10b4da0526d5abdc5cbcd647f1b45.zip
1 files changed, 5 insertions, 5 deletions
diff --git a/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll b/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll
index 84d3e884102..e2c842f25b9 100644
--- a/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll
+++ b/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll
@@ -17,13 +17,13 @@ declare signext i32 @memcmp(i8* nocapture, i8* nocapture, i64) local_unnamed_add
 ; Check 4 bytes - requires 1 load for each param.
 define signext i32 @zeroEqualityTest02(i8* %x, i8* %y) {
 ; CHECK-LABEL: zeroEqualityTest02:
-; CHECK:       # BB#0: # %loadbb
+; CHECK:       # BB#0:
 ; CHECK-NEXT:    lwz 3, 0(3)
 ; CHECK-NEXT:    lwz 4, 0(4)
-; CHECK-NEXT:    li 5, 1
-; CHECK-NEXT:    cmplw 3, 4
-; CHECK-NEXT:    isel 3, 0, 5, 2
-; CHECK-NEXT:    clrldi 3, 3, 32
+; CHECK-NEXT:    xor 3, 3, 4
+; CHECK-NEXT:    cntlzw 3, 3
+; CHECK-NEXT:    srwi 3, 3, 5
+; CHECK-NEXT:    xori 3, 3, 1
 ; CHECK-NEXT:    blr
   %call = tail call signext i32 @memcmp(i8* %x, i8* %y, i64 4)
   %not.cmp = icmp ne i32 %call, 0
author	Sanjay Patel <spatel@rotateright.com>	2017-06-08 16:53:18 +0000
committer	Sanjay Patel <spatel@rotateright.com>	2017-06-08 16:53:18 +0000
commit	e7c5041c2ae10b4da0526d5abdc5cbcd647f1b45 (patch)
tree	3761f6a721bd3583f07f40fb2fdb3bd7fe9767c0 /llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll
parent	8cb1d0931f2749a9731e1a59f869a509baa83640 (diff)
download	bcm5719-llvm-e7c5041c2ae10b4da0526d5abdc5cbcd647f1b45.tar.gz bcm5719-llvm-e7c5041c2ae10b4da0526d5abdc5cbcd647f1b45.zip