diff options
| author | Hans Wennborg <hans@hanshq.net> | 2016-12-16 16:34:59 +0000 |
|---|---|---|
| committer | Hans Wennborg <hans@hanshq.net> | 2016-12-16 16:34:59 +0000 |
| commit | 35f21cba1304f989fd755fc44992142f9cecbc65 (patch) | |
| tree | e8a5be88ffc609f0dcb0e5fda31f9d66b883bf42 /llvm/lib | |
| parent | e9fda9f2013b1356aa48db6897eeaee2c21da603 (diff) | |
| download | bcm5719-llvm-35f21cba1304f989fd755fc44992142f9cecbc65.tar.gz bcm5719-llvm-35f21cba1304f989fd755fc44992142f9cecbc65.zip | |
[X86] Fold (setcc (cmp (atomic_load_add x, -C) C), COND) to (setcc (LADD x, -C), COND) (PR31367)
atomic_load_add returns the value before addition, but sets EFLAGS based on the
result of the addition. That means it's setting the flags based on effectively
subtracting C from the value at x, which is also what the outer cmp does.
This targets a pattern that occurs frequently with reference counting pointers:
void decrement(long volatile *ptr) {
if (_InterlockedDecrement(ptr) == 0)
release();
}
Clang would previously compile it (for 32-bit at -Os) as:
00000000 <?decrement@@YAXPCJ@Z>:
0: 8b 44 24 04 mov 0x4(%esp),%eax
4: 31 c9 xor %ecx,%ecx
6: 49 dec %ecx
7: f0 0f c1 08 lock xadd %ecx,(%eax)
b: 83 f9 01 cmp $0x1,%ecx
e: 0f 84 00 00 00 00 je 14 <?decrement@@YAXPCJ@Z+0x14>
14: c3 ret
and with this patch it becomes:
00000000 <?decrement@@YAXPCJ@Z>:
0: 8b 44 24 04 mov 0x4(%esp),%eax
4: f0 ff 08 lock decl (%eax)
7: 0f 84 00 00 00 00 je d <?decrement@@YAXPCJ@Z+0xd>
d: c3 ret
(Equivalent variants with _InterlockedExchangeAdd, std::atomic<>'s fetch_add
or pre-decrement operator generate the same code.)
Differential Revision: https://reviews.llvm.org/D27781
llvm-svn: 289955
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 29 |
1 files changed, 20 insertions, 9 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index c9aa9e2eb7d..82a7f8c8274 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -28879,11 +28879,19 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, return SDValue(); } -/// Combine: +/// Combine brcond/cmov/setcc/.. based on comparing the result of +/// atomic_load_add to use EFLAGS produced by the addition +/// directly if possible. For example: +/// +/// (setcc (cmp (atomic_load_add x, -C) C), COND_E) +/// becomes: +/// (setcc (LADD x, -C), COND_E) +/// +/// and /// (brcond/cmov/setcc .., (cmp (atomic_load_add x, 1), 0), COND_S) -/// to: +/// becomes: /// (brcond/cmov/setcc .., (LADD x, 1), COND_LE) -/// i.e., reusing the EFLAGS produced by the LOCKed instruction. +/// /// Note that this is only legal for some op/cc combinations. static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC, SelectionDAG &DAG) { @@ -28892,7 +28900,7 @@ static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC, (Cmp.getOpcode() == X86ISD::SUB && !Cmp->hasAnyUseOfValue(0)))) return SDValue(); - // This only applies to variations of the common case: + // This applies to variations of the common case: // (icmp slt x, 0) -> (icmp sle (add x, 1), 0) // (icmp sge x, 0) -> (icmp sgt (add x, 1), 0) // (icmp sle x, 0) -> (icmp slt (sub x, 1), 0) @@ -28911,8 +28919,9 @@ static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC, return SDValue(); auto *CmpRHSC = dyn_cast<ConstantSDNode>(CmpRHS); - if (!CmpRHSC || CmpRHSC->getZExtValue() != 0) + if (!CmpRHSC) return SDValue(); + APInt Comparand = CmpRHSC->getAPIntValue(); const unsigned Opc = CmpLHS.getOpcode(); @@ -28928,13 +28937,15 @@ static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC, if (Opc == ISD::ATOMIC_LOAD_SUB) Addend = -Addend; - if (CC == X86::COND_S && Addend == 1) + if (Comparand == -Addend) + CC = CC; // No change. + else if (CC == X86::COND_S && Comparand == 0 && Addend == 1) CC = X86::COND_LE; - else if (CC == X86::COND_NS && Addend == 1) + else if (CC == X86::COND_NS && Comparand == 0 && Addend == 1) CC = X86::COND_G; - else if (CC == X86::COND_G && Addend == -1) + else if (CC == X86::COND_G && Comparand == 0 && Addend == -1) CC = X86::COND_GE; - else if (CC == X86::COND_LE && Addend == -1) + else if (CC == X86::COND_LE && Comparand == 0 && Addend == -1) CC = X86::COND_L; else return SDValue(); |

