diff options
author | Sanjay Patel <spatel@rotateright.com> | 2015-11-09 21:16:49 +0000 |
---|---|---|
committer | Sanjay Patel <spatel@rotateright.com> | 2015-11-09 21:16:49 +0000 |
commit | 32538d68117772c93c059a35523ea974397a8c56 (patch) | |
tree | 8866ae4fbcd4832e29a1544ef0674a6b15d156f3 | |
parent | 65bc2b12233248f086461bea4d57bd2920c624c8 (diff) | |
download | bcm5719-llvm-32538d68117772c93c059a35523ea974397a8c56.tar.gz bcm5719-llvm-32538d68117772c93c059a35523ea974397a8c56.zip |
[x86] try harder to match bitwise 'or' into an LEA
The motivation for this patch starts with the epic fail example in PR18007:
https://llvm.org/bugs/show_bug.cgi?id=18007
...unfortunately, this patch makes no difference for that case, but it solves some
simpler cases. We'll get there some day. :)
The current 'or' matching code was using computeKnownBits() via
isBaseWithConstantOffset() -> MaskedValueIsZero(), but that's an unnecessarily limited use.
We can do more by copying the logic in ValueTracking's haveNoCommonBitsSet(), so we can
treat the 'or' as if it was an 'add'.
There's a TODO comment here because we should lift the bit-checking logic into a helper
function, so it's not duplicated in DAGCombiner.
An example of the better LEA matching:
leal (%rdi,%rdi), %eax
andl $1, %esi
orl %esi, %eax
Becomes:
andl $1, %esi
leal (%rsi,%rdi,2), %eax
Differential Revision: http://reviews.llvm.org/D13956
llvm-svn: 252515
-rw-r--r-- | llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 32 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/or-lea.ll | 18 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/x86-64-double-precision-shift-left.ll | 15 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/x86-64-double-precision-shift-right.ll | 7 |
4 files changed, 36 insertions, 36 deletions
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index c5a1093af12..0cbeda91ccc 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1338,19 +1338,29 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM, return false; break; - case ISD::OR: - // Handle "X | C" as "X + C" iff X is known to have C bits clear. - if (CurDAG->isBaseWithConstantOffset(N)) { - X86ISelAddressMode Backup = AM; - ConstantSDNode *CN = cast<ConstantSDNode>(N.getOperand(1)); - - // Start with the LHS as an addr mode. - if (!matchAddressRecursively(N.getOperand(0), AM, Depth+1) && - !foldOffsetIntoAddress(CN->getSExtValue(), AM)) + case ISD::OR: { + // TODO: The bit-checking logic should be put into a helper function and + // used by DAGCombiner. + + // We want to look through a transform in InstCombine and DAGCombiner that + // turns 'add' into 'or', so we can treat this 'or' exactly like an 'add'. + APInt LHSZero, LHSOne; + APInt RHSZero, RHSOne; + CurDAG->computeKnownBits(N.getOperand(0), LHSZero, LHSOne); + CurDAG->computeKnownBits(N.getOperand(1), RHSZero, RHSOne); + + // If we know that there are no common bits set by the operands of this + // 'or', it is equivalent to an 'add'. For example: + // (or (and x, 1), (shl y, 3)) --> (add (and x, 1), (shl y, 3)) + // An 'lea' can then be used to match the shift (multiply) and add: + // and $1, %esi + // lea (%rsi, %rdi, 8), %rax + if ((LHSZero | RHSZero).isAllOnesValue()) + if (!matchAdd(N, AM, Depth)) return false; - AM = Backup; - } + break; + } case ISD::AND: { // Perform some heroic transforms on an and of a constant-count shift diff --git a/llvm/test/CodeGen/X86/or-lea.ll b/llvm/test/CodeGen/X86/or-lea.ll index bd117207e6c..f28cc8569cf 100644 --- a/llvm/test/CodeGen/X86/or-lea.ll +++ b/llvm/test/CodeGen/X86/or-lea.ll @@ -8,9 +8,8 @@ define i32 @or_shift1_and1(i32 %x, i32 %y) { ; CHECK-LABEL: or_shift1_and1: ; CHECK: # BB#0: -; CHECK-NEXT: addl %edi, %edi ; CHECK-NEXT: andl $1, %esi -; CHECK-NEXT: leal (%rsi,%rdi), %eax +; CHECK-NEXT: leal (%rsi,%rdi,2), %eax ; CHECK-NEXT: retq %shl = shl i32 %x, 1 @@ -22,9 +21,8 @@ define i32 @or_shift1_and1(i32 %x, i32 %y) { define i32 @or_shift1_and1_swapped(i32 %x, i32 %y) { ; CHECK-LABEL: or_shift1_and1_swapped: ; CHECK: # BB#0: -; CHECK-NEXT: leal (%rdi,%rdi), %eax ; CHECK-NEXT: andl $1, %esi -; CHECK-NEXT: orl %esi, %eax +; CHECK-NEXT: leal (%rsi,%rdi,2), %eax ; CHECK-NEXT: retq %shl = shl i32 %x, 1 @@ -36,9 +34,8 @@ define i32 @or_shift1_and1_swapped(i32 %x, i32 %y) { define i32 @or_shift2_and1(i32 %x, i32 %y) { ; CHECK-LABEL: or_shift2_and1: ; CHECK: # BB#0: -; CHECK-NEXT: leal (,%rdi,4), %eax ; CHECK-NEXT: andl $1, %esi -; CHECK-NEXT: orl %esi, %eax +; CHECK-NEXT: leal (%rsi,%rdi,4), %eax ; CHECK-NEXT: retq %shl = shl i32 %x, 2 @@ -50,9 +47,8 @@ define i32 @or_shift2_and1(i32 %x, i32 %y) { define i32 @or_shift3_and1(i32 %x, i32 %y) { ; CHECK-LABEL: or_shift3_and1: ; CHECK: # BB#0: -; CHECK-NEXT: leal (,%rdi,8), %eax ; CHECK-NEXT: andl $1, %esi -; CHECK-NEXT: orl %esi, %eax +; CHECK-NEXT: leal (%rsi,%rdi,8), %eax ; CHECK-NEXT: retq %shl = shl i32 %x, 3 @@ -64,9 +60,8 @@ define i32 @or_shift3_and1(i32 %x, i32 %y) { define i32 @or_shift3_and7(i32 %x, i32 %y) { ; CHECK-LABEL: or_shift3_and7: ; CHECK: # BB#0: -; CHECK-NEXT: leal (,%rdi,8), %eax ; CHECK-NEXT: andl $7, %esi -; CHECK-NEXT: orl %esi, %eax +; CHECK-NEXT: leal (%rsi,%rdi,8), %eax ; CHECK-NEXT: retq %shl = shl i32 %x, 3 @@ -112,9 +107,8 @@ define i32 @or_shift3_and8(i32 %x, i32 %y) { define i64 @or_shift1_and1_64(i64 %x, i64 %y) { ; CHECK-LABEL: or_shift1_and1_64: ; CHECK: # BB#0: -; CHECK-NEXT: addq %rdi, %rdi ; CHECK-NEXT: andl $1, %esi -; CHECK-NEXT: leaq (%rsi,%rdi), %rax +; CHECK-NEXT: leaq (%rsi,%rdi,2), %rax ; CHECK-NEXT: retq %shl = shl i64 %x, 1 diff --git a/llvm/test/CodeGen/X86/x86-64-double-precision-shift-left.ll b/llvm/test/CodeGen/X86/x86-64-double-precision-shift-left.ll index f2380f23b8e..75e9052c129 100644 --- a/llvm/test/CodeGen/X86/x86-64-double-precision-shift-left.ll +++ b/llvm/test/CodeGen/X86/x86-64-double-precision-shift-left.ll @@ -8,11 +8,9 @@ ; return (a << 1) | (b >> 63); ;} -; CHECK: lshift1: -; CHECK: addq {{.*}},{{.*}} -; CHECK-NEXT: shrq $63, {{.*}} -; CHECK-NEXT: leaq ({{.*}},{{.*}}), {{.*}} - +; CHECK-LABEL: lshift1: +; CHECK: shrq $63, %rsi +; CHECK-NEXT: leaq (%rsi,%rdi,2), %rax define i64 @lshift1(i64 %a, i64 %b) nounwind readnone uwtable { entry: @@ -27,10 +25,9 @@ entry: ; return (a << 2) | (b >> 62); ;} -; CHECK: lshift2: -; CHECK: shlq $2, {{.*}} -; CHECK-NEXT: shrq $62, {{.*}} -; CHECK-NEXT: leaq ({{.*}},{{.*}}), {{.*}} +; CHECK-LABEL: lshift2: +; CHECK: shrq $62, %rsi +; CHECK-NEXT: leaq (%rsi,%rdi,4), %rax define i64 @lshift2(i64 %a, i64 %b) nounwind readnone uwtable { entry: diff --git a/llvm/test/CodeGen/X86/x86-64-double-precision-shift-right.ll b/llvm/test/CodeGen/X86/x86-64-double-precision-shift-right.ll index 5edaad89df4..bc2f39ee666 100644 --- a/llvm/test/CodeGen/X86/x86-64-double-precision-shift-right.ll +++ b/llvm/test/CodeGen/X86/x86-64-double-precision-shift-right.ll @@ -61,10 +61,9 @@ define i64 @rshift7(i64 %a, i64 %b) nounwind readnone uwtable { ; return (a >> 63) | (b << 1); ;} -; CHECK: rshift63: -; CHECK: shrq $63, {{.*}} -; CHECK-NEXT: leaq ({{.*}},{{.*}}), {{.*}} -; CHECK-NEXT: orq {{.*}}, {{.*}} +; CHECK-LABEL: rshift63: +; CHECK: shrq $63, %rdi +; CHECK-NEXT: leaq (%rdi,%rsi,2), %rax define i64 @rshift63(i64 %a, i64 %b) nounwind readnone uwtable { %1 = lshr i64 %a, 63 |