diff options
-rw-r--r-- | llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 31 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/combine-sbb.ll | 30 |
2 files changed, 41 insertions, 20 deletions
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 95d31e62caf..fcf9b4148b4 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2464,6 +2464,37 @@ bool X86DAGToDAGISel::selectLEAAddr(SDValue N, Complexity += 2; } + // Heuristic: try harder to form an LEA from ADD if the operands set flags. + // Unlike ADD, LEA does not affect flags, so we will be less likely to require + // duplicating flag-producing instructions later in the pipeline. + if (N.getOpcode() == ISD::ADD) { + auto isMathWithFlags = [](SDValue V) { + switch (V.getOpcode()) { + case X86ISD::ADD: + case X86ISD::SUB: + case X86ISD::ADC: + case X86ISD::SBB: + /* TODO: These opcodes can be added safely, but we may want to justify + their inclusion for different reasons (better for reg-alloc). + case X86ISD::SMUL: + case X86ISD::UMUL: + case X86ISD::OR: + case X86ISD::XOR: + case X86ISD::AND: + */ + // Value 1 is the flag output of the node - verify it's not dead. + return !SDValue(V.getNode(), 1).use_empty(); + default: + return false; + } + }; + // TODO: This could be an 'or' rather than 'and' to make the transform more + // likely to happen. We might want to factor in whether there's a + // load folding opportunity for the math op that disappears with LEA. + if (isMathWithFlags(N.getOperand(0)) && isMathWithFlags(N.getOperand(1))) + Complexity++; + } + if (AM.Disp) Complexity++; diff --git a/llvm/test/CodeGen/X86/combine-sbb.ll b/llvm/test/CodeGen/X86/combine-sbb.ll index bba72c56609..6eb0e1e0f0c 100644 --- a/llvm/test/CodeGen/X86/combine-sbb.ll +++ b/llvm/test/CodeGen/X86/combine-sbb.ll @@ -309,35 +309,25 @@ define i32 @PR40483_sub5(i32*, i32) nounwind { define i32 @PR40483_sub6(i32*, i32) nounwind { ; X86-LABEL: PR40483_sub6: ; X86: # %bb.0: -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl (%edx), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %esi, %ecx -; X86-NEXT: subl %edi, %ecx +; X86-NEXT: movl (%edx), %ecx ; X86-NEXT: xorl %eax, %eax -; X86-NEXT: subl %edi, %esi -; X86-NEXT: movl %esi, (%edx) +; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, (%edx) ; X86-NEXT: jae .LBB8_2 ; X86-NEXT: # %bb.1: -; X86-NEXT: addl %ecx, %ecx -; X86-NEXT: movl %ecx, %eax +; X86-NEXT: leal (%ecx,%ecx), %eax ; X86-NEXT: .LBB8_2: -; X86-NEXT: popl %esi -; X86-NEXT: popl %edi ; X86-NEXT: retl ; ; X64-LABEL: PR40483_sub6: ; X64: # %bb.0: -; X64-NEXT: movl (%rdi), %ecx -; X64-NEXT: movl %ecx, %edx -; X64-NEXT: subl %esi, %edx -; X64-NEXT: addl %edx, %edx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: subl %esi, %ecx -; X64-NEXT: movl %ecx, (%rdi) -; X64-NEXT: cmovbl %edx, %eax +; X64-NEXT: movl (%rdi), %eax +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: subl %esi, %eax +; X64-NEXT: movl %eax, (%rdi) +; X64-NEXT: leal (%rax,%rax), %eax +; X64-NEXT: cmovael %ecx, %eax ; X64-NEXT: retq %3 = load i32, i32* %0, align 8 %4 = tail call { i8, i32 } @llvm.x86.subborrow.32(i8 0, i32 %3, i32 %1) |