[X86] Only reorder srl/and on last DAG combiner run

This seems to interfere with a target independent brcond combine that looks for the (srl (and X, C1), C2) pattern to enable TEST instructions. Once we flip, that combine doesn't fire and we end up exposing it to the X86 specific BT combine which causes us to emit a BT instruction. BT has lower throughput than TEST. We could try to make the brcond combine aware of the alternate pattern, but since the flip was just a code size reduction and not likely to enable other combines, it seemed easier to just delay it until after lowering. Differential Revision: https://reviews.llvm.org/D43201 llvm-svn: 325371
author: Craig Topper <craig.topper@intel.com> 2018-02-16 18:51:09 +0000
committer: Craig Topper <craig.topper@intel.com> 2018-02-16 18:51:09 +0000
commit: de565fc73e9047a3044e8680c88aa6a3530a16e6 (patch)
tree: cdd81832ecc23ade8fad9d77172543e883008389
parent: 3d1f4b954d888a30e2aa09848b0c696dfa20b787 (diff)
download: bcm5719-llvm-de565fc73e9047a3044e8680c88aa6a3530a16e6.tar.gz
bcm5719-llvm-de565fc73e9047a3044e8680c88aa6a3530a16e6.zip
5 files changed, 41 insertions, 30 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 7fd196ef24b..70b04376e1b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -32941,11 +32941,17 @@ static SDValue combineShiftRightArithmetic(SDNode *N, SelectionDAG &DAG) {
   return SDValue();
 }
 
-static SDValue combineShiftRightLogical(SDNode *N, SelectionDAG &DAG) {
+static SDValue combineShiftRightLogical(SDNode *N, SelectionDAG &DAG,
+                                        TargetLowering::DAGCombinerInfo &DCI) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
   EVT VT = N0.getValueType();
 
+  // Only do this on the last DAG combine as it can interfere with other
+  // combines.
+  if (!DCI.isAfterLegalizeVectorOps())
+    return SDValue();
+
   // Try to improve a sequence of srl (and X, C1), C2 by inverting the order.
   // TODO: This is a generic DAG combine that became an x86-only combine to
   // avoid shortcomings in other folds such as bswap, bit-test ('bt'), and
@@ -32996,7 +33002,7 @@ static SDValue combineShift(SDNode* N, SelectionDAG &DAG,
       return V;
 
   if (N->getOpcode() == ISD::SRL)
-    if (SDValue V = combineShiftRightLogical(N, DAG))
+    if (SDValue V = combineShiftRightLogical(N, DAG, DCI))
       return V;
 
   return SDValue();
diff --git a/llvm/test/CodeGen/X86/live-out-reg-info.ll b/llvm/test/CodeGen/X86/live-out-reg-info.ll
index e4644665d65..882e17e1244 100644
--- a/llvm/test/CodeGen/X86/live-out-reg-info.ll
+++ b/llvm/test/CodeGen/X86/live-out-reg-info.ll
@@ -12,8 +12,8 @@ define void @foo(i32 %a) {
 ; CHECK-NEXT:    pushq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    shrl $23, %edi
-; CHECK-NEXT:    btl $8, %edi
-; CHECK-NEXT:    jb .LBB0_2
+; CHECK-NEXT:    testl $256, %edi # imm = 0x100
+; CHECK-NEXT:    jne .LBB0_2
 ; CHECK-NEXT:  # %bb.1: # %true
 ; CHECK-NEXT:    callq qux
 ; CHECK-NEXT:  .LBB0_2: # %false
diff --git a/llvm/test/CodeGen/X86/test-shrink.ll b/llvm/test/CodeGen/X86/test-shrink.ll
index 0cc7849e8e4..e44233fdd94 100644
--- a/llvm/test/CodeGen/X86/test-shrink.ll
+++ b/llvm/test/CodeGen/X86/test-shrink.ll
@@ -6,8 +6,8 @@
 define void @g64xh(i64 inreg %x) nounwind {
 ; CHECK-LINUX64-LABEL: g64xh:
 ; CHECK-LINUX64:       # %bb.0:
-; CHECK-LINUX64-NEXT:    btl $11, %edi
-; CHECK-LINUX64-NEXT:    jb .LBB0_2
+; CHECK-LINUX64-NEXT:    testl $2048, %edi # imm = 0x800
+; CHECK-LINUX64-NEXT:    jne .LBB0_2
 ; CHECK-LINUX64-NEXT:  # %bb.1: # %yes
 ; CHECK-LINUX64-NEXT:    pushq %rax
 ; CHECK-LINUX64-NEXT:    callq bar
@@ -18,8 +18,8 @@ define void @g64xh(i64 inreg %x) nounwind {
 ; CHECK-WIN32-64-LABEL: g64xh:
 ; CHECK-WIN32-64:       # %bb.0:
 ; CHECK-WIN32-64-NEXT:    subq $40, %rsp
-; CHECK-WIN32-64-NEXT:    btl $11, %ecx
-; CHECK-WIN32-64-NEXT:    jb .LBB0_2
+; CHECK-WIN32-64-NEXT:    testl $2048, %ecx # imm = 0x800
+; CHECK-WIN32-64-NEXT:    jne .LBB0_2
 ; CHECK-WIN32-64-NEXT:  # %bb.1: # %yes
 ; CHECK-WIN32-64-NEXT:    callq bar
 ; CHECK-WIN32-64-NEXT:  .LBB0_2: # %no
@@ -28,8 +28,8 @@ define void @g64xh(i64 inreg %x) nounwind {
 ;
 ; CHECK-X86-LABEL: g64xh:
 ; CHECK-X86:       # %bb.0:
-; CHECK-X86-NEXT:    btl $11, %eax
-; CHECK-X86-NEXT:    jb .LBB0_2
+; CHECK-X86-NEXT:    testl $2048, %eax # imm = 0x800
+; CHECK-X86-NEXT:    jne .LBB0_2
 ; CHECK-X86-NEXT:  # %bb.1: # %yes
 ; CHECK-X86-NEXT:    calll bar
 ; CHECK-X86-NEXT:  .LBB0_2: # %no
@@ -90,8 +90,8 @@ no:
 define void @g32xh(i32 inreg %x) nounwind {
 ; CHECK-LINUX64-LABEL: g32xh:
 ; CHECK-LINUX64:       # %bb.0:
-; CHECK-LINUX64-NEXT:    btl $11, %edi
-; CHECK-LINUX64-NEXT:    jb .LBB2_2
+; CHECK-LINUX64-NEXT:    testl $2048, %edi # imm = 0x800
+; CHECK-LINUX64-NEXT:    jne .LBB2_2
 ; CHECK-LINUX64-NEXT:  # %bb.1: # %yes
 ; CHECK-LINUX64-NEXT:    pushq %rax
 ; CHECK-LINUX64-NEXT:    callq bar
@@ -102,8 +102,8 @@ define void @g32xh(i32 inreg %x) nounwind {
 ; CHECK-WIN32-64-LABEL: g32xh:
 ; CHECK-WIN32-64:       # %bb.0:
 ; CHECK-WIN32-64-NEXT:    subq $40, %rsp
-; CHECK-WIN32-64-NEXT:    btl $11, %ecx
-; CHECK-WIN32-64-NEXT:    jb .LBB2_2
+; CHECK-WIN32-64-NEXT:    testl $2048, %ecx # imm = 0x800
+; CHECK-WIN32-64-NEXT:    jne .LBB2_2
 ; CHECK-WIN32-64-NEXT:  # %bb.1: # %yes
 ; CHECK-WIN32-64-NEXT:    callq bar
 ; CHECK-WIN32-64-NEXT:  .LBB2_2: # %no
@@ -112,8 +112,8 @@ define void @g32xh(i32 inreg %x) nounwind {
 ;
 ; CHECK-X86-LABEL: g32xh:
 ; CHECK-X86:       # %bb.0:
-; CHECK-X86-NEXT:    btl $11, %eax
-; CHECK-X86-NEXT:    jb .LBB2_2
+; CHECK-X86-NEXT:    testl $2048, %eax # imm = 0x800
+; CHECK-X86-NEXT:    jne .LBB2_2
 ; CHECK-X86-NEXT:  # %bb.1: # %yes
 ; CHECK-X86-NEXT:    calll bar
 ; CHECK-X86-NEXT:  .LBB2_2: # %no
@@ -174,8 +174,8 @@ no:
 define void @g16xh(i16 inreg %x) nounwind {
 ; CHECK-LINUX64-LABEL: g16xh:
 ; CHECK-LINUX64:       # %bb.0:
-; CHECK-LINUX64-NEXT:    btl $11, %edi
-; CHECK-LINUX64-NEXT:    jb .LBB4_2
+; CHECK-LINUX64-NEXT:    testl $2048, %edi # imm = 0x800
+; CHECK-LINUX64-NEXT:    jne .LBB4_2
 ; CHECK-LINUX64-NEXT:  # %bb.1: # %yes
 ; CHECK-LINUX64-NEXT:    pushq %rax
 ; CHECK-LINUX64-NEXT:    callq bar
@@ -186,8 +186,8 @@ define void @g16xh(i16 inreg %x) nounwind {
 ; CHECK-WIN32-64-LABEL: g16xh:
 ; CHECK-WIN32-64:       # %bb.0:
 ; CHECK-WIN32-64-NEXT:    subq $40, %rsp
-; CHECK-WIN32-64-NEXT:    btl $11, %ecx
-; CHECK-WIN32-64-NEXT:    jb .LBB4_2
+; CHECK-WIN32-64-NEXT:    testl $2048, %ecx # imm = 0x800
+; CHECK-WIN32-64-NEXT:    jne .LBB4_2
 ; CHECK-WIN32-64-NEXT:  # %bb.1: # %yes
 ; CHECK-WIN32-64-NEXT:    callq bar
 ; CHECK-WIN32-64-NEXT:  .LBB4_2: # %no
@@ -196,8 +196,8 @@ define void @g16xh(i16 inreg %x) nounwind {
 ;
 ; CHECK-X86-LABEL: g16xh:
 ; CHECK-X86:       # %bb.0:
-; CHECK-X86-NEXT:    btl $11, %eax
-; CHECK-X86-NEXT:    jb .LBB4_2
+; CHECK-X86-NEXT:    testl $2048, %eax # imm = 0x800
+; CHECK-X86-NEXT:    jne .LBB4_2
 ; CHECK-X86-NEXT:  # %bb.1: # %yes
 ; CHECK-X86-NEXT:    calll bar
 ; CHECK-X86-NEXT:  .LBB4_2: # %no
diff --git a/llvm/test/CodeGen/X86/test-vs-bittest.ll b/llvm/test/CodeGen/X86/test-vs-bittest.ll
index 44f77e8b7ce..d20a7579092 100644
--- a/llvm/test/CodeGen/X86/test-vs-bittest.ll
+++ b/llvm/test/CodeGen/X86/test-vs-bittest.ll
@@ -6,8 +6,8 @@ define void @test64(i64 inreg %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pushq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    btl $11, %edi
-; CHECK-NEXT:    jb .LBB0_2
+; CHECK-NEXT:    testl $2048, %edi # imm = 0x800
+; CHECK-NEXT:    jne .LBB0_2
 ; CHECK-NEXT:  # %bb.1: # %yes
 ; CHECK-NEXT:    callq bar
 ; CHECK-NEXT:  .LBB0_2: # %no
@@ -47,6 +47,11 @@ no:
   ret void
 }
 
+; This test is identical to test64 above with only the destination of the br
+; reversed. This somehow causes the two functions to get slightly different
+; initial IR. One has an extra invert of the setcc. This previous caused one
+; the functions to use a BT while the other used a TEST due to another DAG
+; combine messing with an expected canonical form.
 define void @test64_2(i64 inreg %x) {
 ; CHECK-LABEL: test64_2:
 ; CHECK:       # %bb.0:
@@ -190,8 +195,8 @@ define void @test32(i32 inreg %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pushq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    btl $11, %edi
-; CHECK-NEXT:    jb .LBB8_2
+; CHECK-NEXT:    testl $2048, %edi # imm = 0x800
+; CHECK-NEXT:    jne .LBB8_2
 ; CHECK-NEXT:  # %bb.1: # %yes
 ; CHECK-NEXT:    callq bar
 ; CHECK-NEXT:  .LBB8_2: # %no
@@ -282,8 +287,8 @@ define void @test16(i16 inreg %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pushq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    btl $11, %edi
-; CHECK-NEXT:    jb .LBB12_2
+; CHECK-NEXT:    testl $2048, %edi # imm = 0x800
+; CHECK-NEXT:    jne .LBB12_2
 ; CHECK-NEXT:  # %bb.1: # %yes
 ; CHECK-NEXT:    callq bar
 ; CHECK-NEXT:  .LBB12_2: # %no
diff --git a/llvm/test/CodeGen/X86/xor-icmp.ll b/llvm/test/CodeGen/X86/xor-icmp.ll
index 6cdc3186cd4..1b5acce51d5 100644
--- a/llvm/test/CodeGen/X86/xor-icmp.ll
+++ b/llvm/test/CodeGen/X86/xor-icmp.ll
@@ -19,8 +19,8 @@ define i32 @t(i32 %a, i32 %b) nounwind ssp {
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    xorl %esi, %edi
 ; X64-NEXT:    xorl %eax, %eax
-; X64-NEXT:    btl $14, %edi
-; X64-NEXT:    jae .LBB0_1
+; X64-NEXT:    testl $16384, %edi # imm = 0x4000
+; X64-NEXT:    je .LBB0_1
 ; X64-NEXT:  # %bb.2: # %bb1
 ; X64-NEXT:    jmp bar # TAILCALL
 ; X64-NEXT:  .LBB0_1: # %bb
author	Craig Topper <craig.topper@intel.com>	2018-02-16 18:51:09 +0000
committer	Craig Topper <craig.topper@intel.com>	2018-02-16 18:51:09 +0000
commit	de565fc73e9047a3044e8680c88aa6a3530a16e6 (patch)
tree	cdd81832ecc23ade8fad9d77172543e883008389
parent	3d1f4b954d888a30e2aa09848b0c696dfa20b787 (diff)
download	bcm5719-llvm-de565fc73e9047a3044e8680c88aa6a3530a16e6.tar.gz bcm5719-llvm-de565fc73e9047a3044e8680c88aa6a3530a16e6.zip