diff options
author | Sanjay Patel <spatel@rotateright.com> | 2017-06-08 16:53:18 +0000 |
---|---|---|
committer | Sanjay Patel <spatel@rotateright.com> | 2017-06-08 16:53:18 +0000 |
commit | e7c5041c2ae10b4da0526d5abdc5cbcd647f1b45 (patch) | |
tree | 3761f6a721bd3583f07f40fb2fdb3bd7fe9767c0 /llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll | |
parent | 8cb1d0931f2749a9731e1a59f869a509baa83640 (diff) | |
download | bcm5719-llvm-e7c5041c2ae10b4da0526d5abdc5cbcd647f1b45.tar.gz bcm5719-llvm-e7c5041c2ae10b4da0526d5abdc5cbcd647f1b45.zip |
[CGP / PowerPC] avoid multi-block overhead for simple memcmp expansion
The test diff for PowerPC shows we can better optimize if this case is one block.
For x86, there's would be a substantial difference if CGP expansion was enabled because branches are assumed
cheap and SDAG can't optimize across blocks.
Instead of this:
_cmp_eq8:
movq (%rdi), %rax
cmpq (%rsi), %rax
je LBB23_1
## BB#2: ## %res_block
movl $1, %ecx
jmp LBB23_3
LBB23_1:
xorl %ecx, %ecx
LBB23_3: ## %endblock
xorl %eax, %eax
testl %ecx, %ecx
sete %al
retq
We get this:
cmp_eq8:
movq (%rdi), %rcx
xorl %eax, %eax
cmpq (%rsi), %rcx
sete %al
retq
And that matches the optimal codegen that we get from the current expansion in SelectionDAGBuilder::visitMemCmpCall().
If this looks right, then I just need to confirm that vector-sized expansion will work from here, and we can enable
CGP memcmp() expansion for x86. Ie, we'll bypass the power-of-2 special cases currently optimized in SDAG because we
can lower the IR produced here optimally.
Differential Revision: https://reviews.llvm.org/D34005
llvm-svn: 304987
Diffstat (limited to 'llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll')
-rw-r--r-- | llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll | 10 |
1 files changed, 5 insertions, 5 deletions
diff --git a/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll b/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll index 84d3e884102..e2c842f25b9 100644 --- a/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll +++ b/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll @@ -17,13 +17,13 @@ declare signext i32 @memcmp(i8* nocapture, i8* nocapture, i64) local_unnamed_add ; Check 4 bytes - requires 1 load for each param. define signext i32 @zeroEqualityTest02(i8* %x, i8* %y) { ; CHECK-LABEL: zeroEqualityTest02: -; CHECK: # BB#0: # %loadbb +; CHECK: # BB#0: ; CHECK-NEXT: lwz 3, 0(3) ; CHECK-NEXT: lwz 4, 0(4) -; CHECK-NEXT: li 5, 1 -; CHECK-NEXT: cmplw 3, 4 -; CHECK-NEXT: isel 3, 0, 5, 2 -; CHECK-NEXT: clrldi 3, 3, 32 +; CHECK-NEXT: xor 3, 3, 4 +; CHECK-NEXT: cntlzw 3, 3 +; CHECK-NEXT: srwi 3, 3, 5 +; CHECK-NEXT: xori 3, 3, 1 ; CHECK-NEXT: blr %call = tail call signext i32 @memcmp(i8* %x, i8* %y, i64 4) %not.cmp = icmp ne i32 %call, 0 |