diff options
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 31 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/combine-sbb.ll | 30 | 
2 files changed, 41 insertions, 20 deletions
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 95d31e62caf..fcf9b4148b4 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2464,6 +2464,37 @@ bool X86DAGToDAGISel::selectLEAAddr(SDValue N,        Complexity += 2;    } +  // Heuristic: try harder to form an LEA from ADD if the operands set flags. +  // Unlike ADD, LEA does not affect flags, so we will be less likely to require +  // duplicating flag-producing instructions later in the pipeline. +  if (N.getOpcode() == ISD::ADD) { +    auto isMathWithFlags = [](SDValue V) { +      switch (V.getOpcode()) { +      case X86ISD::ADD: +      case X86ISD::SUB: +      case X86ISD::ADC: +      case X86ISD::SBB: +      /* TODO: These opcodes can be added safely, but we may want to justify +               their inclusion for different reasons (better for reg-alloc). +      case X86ISD::SMUL: +      case X86ISD::UMUL: +      case X86ISD::OR: +      case X86ISD::XOR: +      case X86ISD::AND: +      */ +        // Value 1 is the flag output of the node - verify it's not dead. +        return !SDValue(V.getNode(), 1).use_empty(); +      default: +        return false; +      } +    }; +    // TODO: This could be an 'or' rather than 'and' to make the transform more +    //       likely to happen. We might want to factor in whether there's a +    //       load folding opportunity for the math op that disappears with LEA. +    if (isMathWithFlags(N.getOperand(0)) && isMathWithFlags(N.getOperand(1))) +      Complexity++; +  } +    if (AM.Disp)      Complexity++; diff --git a/llvm/test/CodeGen/X86/combine-sbb.ll b/llvm/test/CodeGen/X86/combine-sbb.ll index bba72c56609..6eb0e1e0f0c 100644 --- a/llvm/test/CodeGen/X86/combine-sbb.ll +++ b/llvm/test/CodeGen/X86/combine-sbb.ll @@ -309,35 +309,25 @@ define i32 @PR40483_sub5(i32*, i32) nounwind {  define i32 @PR40483_sub6(i32*, i32) nounwind {  ; X86-LABEL: PR40483_sub6:  ; X86:       # %bb.0: -; X86-NEXT:    pushl %edi -; X86-NEXT:    pushl %esi  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx -; X86-NEXT:    movl (%edx), %esi -; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi -; X86-NEXT:    movl %esi, %ecx -; X86-NEXT:    subl %edi, %ecx +; X86-NEXT:    movl (%edx), %ecx  ; X86-NEXT:    xorl %eax, %eax -; X86-NEXT:    subl %edi, %esi -; X86-NEXT:    movl %esi, (%edx) +; X86-NEXT:    subl {{[0-9]+}}(%esp), %ecx +; X86-NEXT:    movl %ecx, (%edx)  ; X86-NEXT:    jae .LBB8_2  ; X86-NEXT:  # %bb.1: -; X86-NEXT:    addl %ecx, %ecx -; X86-NEXT:    movl %ecx, %eax +; X86-NEXT:    leal (%ecx,%ecx), %eax  ; X86-NEXT:  .LBB8_2: -; X86-NEXT:    popl %esi -; X86-NEXT:    popl %edi  ; X86-NEXT:    retl  ;  ; X64-LABEL: PR40483_sub6:  ; X64:       # %bb.0: -; X64-NEXT:    movl (%rdi), %ecx -; X64-NEXT:    movl %ecx, %edx -; X64-NEXT:    subl %esi, %edx -; X64-NEXT:    addl %edx, %edx -; X64-NEXT:    xorl %eax, %eax -; X64-NEXT:    subl %esi, %ecx -; X64-NEXT:    movl %ecx, (%rdi) -; X64-NEXT:    cmovbl %edx, %eax +; X64-NEXT:    movl (%rdi), %eax +; X64-NEXT:    xorl %ecx, %ecx +; X64-NEXT:    subl %esi, %eax +; X64-NEXT:    movl %eax, (%rdi) +; X64-NEXT:    leal (%rax,%rax), %eax +; X64-NEXT:    cmovael %ecx, %eax  ; X64-NEXT:    retq    %3 = load i32, i32* %0, align 8    %4 = tail call { i8, i32 } @llvm.x86.subborrow.32(i8 0, i32 %3, i32 %1)  | 

