diff options
author | Craig Topper <craig.topper@intel.com> | 2019-10-29 10:47:02 -0700 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2019-10-29 11:38:15 -0700 |
commit | 772533d9214b6e23762847fc7080a4201396fb10 (patch) | |
tree | 9f63b6dfb704f5269110541b72d8f9a842e7baef | |
parent | e6581783f767b7dcaf84223aeae05d2467106113 (diff) | |
download | bcm5719-llvm-772533d9214b6e23762847fc7080a4201396fb10.tar.gz bcm5719-llvm-772533d9214b6e23762847fc7080a4201396fb10.zip |
[X86] Narrow i64 compares with constant to i32 when the upper 32-bits are known zero.
This catches some cases. There are probably ways to improve this.
I tried doing it as a combine on the setcc, but that broke
some cases involving flag reuse in place of test.
I renamed the isX86CCUnsigned to isX86CCSigned and flipped its
polarity to make it consistent with the similar functions for
ISD::SETCC. This avoids calling EQ/NE as being signed or unsigned.
Fixes PR43823.
Differential Revision: https://reviews.llvm.org/D69499
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 22 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/cmp.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/ctpop-combine.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/pr37063.ll | 6 |
5 files changed, 26 insertions, 14 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 1606fb8f108..2feba027b68 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -4643,8 +4643,8 @@ bool X86::isCalleePop(CallingConv::ID CallingConv, } } -/// Return true if the condition is an unsigned comparison operation. -static bool isX86CCUnsigned(unsigned X86CC) { +/// Return true if the condition is an signed comparison operation. +static bool isX86CCSigned(unsigned X86CC) { switch (X86CC) { default: llvm_unreachable("Invalid integer condition!"); @@ -4654,12 +4654,12 @@ static bool isX86CCUnsigned(unsigned X86CC) { case X86::COND_A: case X86::COND_BE: case X86::COND_AE: - return true; + return false; case X86::COND_G: case X86::COND_GE: case X86::COND_L: case X86::COND_LE: - return false; + return true; } } @@ -20154,7 +20154,7 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, if ((COp0 && !COp0->getAPIntValue().isSignedIntN(8)) || (COp1 && !COp1->getAPIntValue().isSignedIntN(8))) { unsigned ExtendOp = - isX86CCUnsigned(X86CC) ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND; + isX86CCSigned(X86CC) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; if (X86CC == X86::COND_E || X86CC == X86::COND_NE) { // For equality comparisons try to use SIGN_EXTEND if the input was // truncate from something with enough sign bits. @@ -20178,6 +20178,18 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, Op1 = DAG.getNode(ExtendOp, dl, CmpVT, Op1); } } + + // Try to shrink i64 compares if the input has enough zero bits. + // FIXME: Do this for non-constant compares for constant on LHS? + if (CmpVT == MVT::i64 && isa<ConstantSDNode>(Op1) && !isX86CCSigned(X86CC) && + Op0.hasOneUse() && // Hacky way to not break CSE opportunities with sub. + cast<ConstantSDNode>(Op1)->getAPIntValue().getActiveBits() <= 32 && + DAG.MaskedValueIsZero(Op0, APInt::getHighBitsSet(64, 32))) { + CmpVT = MVT::i32; + Op0 = DAG.getNode(ISD::TRUNCATE, dl, CmpVT, Op0); + Op1 = DAG.getNode(ISD::TRUNCATE, dl, CmpVT, Op1); + } + // Use SUB instead of CMP to enable CSE between SUB and CMP. SDVTList VTs = DAG.getVTList(CmpVT, MVT::i32); SDValue Sub = DAG.getNode(X86ISD::SUB, dl, VTs, Op0, Op1); diff --git a/llvm/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll b/llvm/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll index bfa40738fee..f4b1ca511fc 100644 --- a/llvm/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll +++ b/llvm/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; REQUIRES: asserts -; RUN: llc < %s -mattr=+sse3,+sse4.1 -mcpu=penryn -stats 2>&1 | grep "6 machinelicm" +; RUN: llc < %s -mattr=+sse3,+sse4.1 -mcpu=penryn -stats 2>&1 | grep "5 machinelicm" ; RUN: llc < %s -mattr=+sse3,+sse4.1 -mcpu=penryn | FileCheck %s ; rdar://6627786 ; rdar://7792037 @@ -33,7 +33,7 @@ define %struct.__vv* @t(%struct.Key* %desc, i64 %p) nounwind ssp { ; CHECK-NEXT: jne LBB0_1 ; CHECK-NEXT: ## %bb.2: ## %bb26 ; CHECK-NEXT: ## in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: cmpq $1048576, %r14 ## imm = 0x100000 +; CHECK-NEXT: cmpl $1048576, %r14d ## imm = 0x100000 ; CHECK-NEXT: jne LBB0_1 ; CHECK-NEXT: ## %bb.3: ## %bb.i ; CHECK-NEXT: ## in Loop: Header=BB0_1 Depth=1 diff --git a/llvm/test/CodeGen/X86/cmp.ll b/llvm/test/CodeGen/X86/cmp.ll index d13fecb8ed2..5f63223e69b 100644 --- a/llvm/test/CodeGen/X86/cmp.ll +++ b/llvm/test/CodeGen/X86/cmp.ll @@ -184,7 +184,7 @@ define i32 @test8(i64 %res) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: shrq $32, %rdi # encoding: [0x48,0xc1,0xef,0x20] ; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] -; CHECK-NEXT: cmpq $3, %rdi # encoding: [0x48,0x83,0xff,0x03] +; CHECK-NEXT: cmpl $3, %edi # encoding: [0x83,0xff,0x03] ; CHECK-NEXT: setb %al # encoding: [0x0f,0x92,0xc0] ; CHECK-NEXT: retq # encoding: [0xc3] entry: @@ -224,7 +224,7 @@ define i32 @test11(i64 %l) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: shrq $47, %rdi # encoding: [0x48,0xc1,0xef,0x2f] ; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] -; CHECK-NEXT: cmpq $1, %rdi # encoding: [0x48,0x83,0xff,0x01] +; CHECK-NEXT: cmpl $1, %edi # encoding: [0x83,0xff,0x01] ; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] ; CHECK-NEXT: retq # encoding: [0xc3] entry: diff --git a/llvm/test/CodeGen/X86/ctpop-combine.ll b/llvm/test/CodeGen/X86/ctpop-combine.ll index cdef5771f2c..e60935cc873 100644 --- a/llvm/test/CodeGen/X86/ctpop-combine.ll +++ b/llvm/test/CodeGen/X86/ctpop-combine.ll @@ -116,7 +116,7 @@ define i32 @ctpop_eq_one(i64 %x) nounwind readnone { ; POPCOUNT: # %bb.0: ; POPCOUNT-NEXT: popcntq %rdi, %rcx ; POPCOUNT-NEXT: xorl %eax, %eax -; POPCOUNT-NEXT: cmpq $1, %rcx +; POPCOUNT-NEXT: cmpl $1, %ecx ; POPCOUNT-NEXT: sete %al ; POPCOUNT-NEXT: retq ; @@ -141,7 +141,7 @@ define i32 @ctpop_ne_one(i64 %x) nounwind readnone { ; POPCOUNT: # %bb.0: ; POPCOUNT-NEXT: popcntq %rdi, %rcx ; POPCOUNT-NEXT: xorl %eax, %eax -; POPCOUNT-NEXT: cmpq $1, %rcx +; POPCOUNT-NEXT: cmpl $1, %ecx ; POPCOUNT-NEXT: setne %al ; POPCOUNT-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/pr37063.ll b/llvm/test/CodeGen/X86/pr37063.ll index cf5e1fa5670..f7f8d622da5 100644 --- a/llvm/test/CodeGen/X86/pr37063.ll +++ b/llvm/test/CodeGen/X86/pr37063.ll @@ -6,9 +6,9 @@ declare void @bar() define void @foo(i64*) { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: # %start -; CHECK-NEXT: movq (%rdi), %rax -; CHECK-NEXT: andl $-2, %eax -; CHECK-NEXT: cmpq $4, %rax +; CHECK-NEXT: movl (%rdi), %eax +; CHECK-NEXT: andl $6, %eax +; CHECK-NEXT: cmpl $4, %eax ; CHECK-NEXT: jne .LBB0_2 ; CHECK-NEXT: # %bb.1: # %bb1 ; CHECK-NEXT: retq |