diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-07-29 14:50:25 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-07-29 14:50:25 +0000 |
commit | 718cb0ea6288794de5b6138a958df0ee4b865a8f (patch) | |
tree | 32ede29417de70a2825eda75e3ccf72325ad13b4 | |
parent | 670a5d88a3cfca6c2f569e9edd5a931803f15eff (diff) | |
download | bcm5719-llvm-718cb0ea6288794de5b6138a958df0ee4b865a8f.tar.gz bcm5719-llvm-718cb0ea6288794de5b6138a958df0ee4b865a8f.zip |
[SelectionDAG][X86] CombineBT - more aggressively determine demanded bits
This patch is in 2 parts:
1 - replace combineBT's use of SimplifyDemandedBits (hasOneUse only) with SelectionDAG::GetDemandedBits to more aggressively determine the lower bits used by BT.
2 - update SelectionDAG::GetDemandedBits to support ANY_EXTEND - if the demanded bits are only in the non-extended portion, then peek through and demand from the source value and then ANY_EXTEND that if we found a match.
Differential Revision: https://reviews.llvm.org/D35896
llvm-svn: 309486
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 12 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 20 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/bt.ll | 6 |
3 files changed, 21 insertions, 17 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index da69ef3b231..a396f3368cf 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1999,6 +1999,18 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &Mask) { return V.getOperand(0); break; } + case ISD::ANY_EXTEND: { + SDValue Src = V.getOperand(0); + unsigned SrcBitWidth = Src.getScalarValueSizeInBits(); + // Being conservative here - only peek through if we only demand bits in the + // non-extended source (even though the extended bits are technically undef). + if (Mask.getActiveBits() > SrcBitWidth) + break; + APInt SrcMask = Mask.trunc(SrcBitWidth); + if (SDValue DemandedSrc = GetDemandedBits(Src, SrcMask)) + return getNode(ISD::ANY_EXTEND, SDLoc(V), V.getValueType(), DemandedSrc); + break; + } } return SDValue(); } diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index a0fd72dd6a9..da9adcb50fb 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -34200,19 +34200,15 @@ static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG, static SDValue combineBT(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + // BT ignores high bits in the bit index operand. - SDValue Op1 = N->getOperand(1); - if (Op1.hasOneUse()) { - unsigned BitWidth = Op1.getValueSizeInBits(); - APInt DemandedMask = APInt::getLowBitsSet(BitWidth, Log2_32(BitWidth)); - KnownBits Known; - TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), - !DCI.isBeforeLegalizeOps()); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (TLI.ShrinkDemandedConstant(Op1, DemandedMask, TLO) || - TLI.SimplifyDemandedBits(Op1, DemandedMask, Known, TLO)) - DCI.CommitTargetLoweringOpt(TLO); - } + unsigned BitWidth = N1.getValueSizeInBits(); + APInt DemandedMask = APInt::getLowBitsSet(BitWidth, Log2_32(BitWidth)); + if (SDValue DemandedN1 = DAG.GetDemandedBits(N1, DemandedMask)) + return DAG.getNode(X86ISD::BT, SDLoc(N), MVT::i32, N0, DemandedN1); + return SDValue(); } diff --git a/llvm/test/CodeGen/X86/bt.ll b/llvm/test/CodeGen/X86/bt.ll index 5cfad109541..c8050a33916 100644 --- a/llvm/test/CodeGen/X86/bt.ll +++ b/llvm/test/CodeGen/X86/bt.ll @@ -1101,8 +1101,6 @@ define void @demanded_i32(i32* nocapture readonly, i32* nocapture, i32) nounwind ; X86-NEXT: movl (%edx,%eax,4), %esi ; X86-NEXT: movl $1, %edx ; X86-NEXT: shll %cl, %edx -; X86-NEXT: andb $31, %cl -; X86-NEXT: movzbl %cl, %ecx ; X86-NEXT: btl %ecx, %esi ; X86-NEXT: jae .LBB30_2 ; X86-NEXT: # BB#1: @@ -1120,9 +1118,7 @@ define void @demanded_i32(i32* nocapture readonly, i32* nocapture, i32) nounwind ; X64-NEXT: movl $1, %edi ; X64-NEXT: movl %edx, %ecx ; X64-NEXT: shll %cl, %edi -; X64-NEXT: andb $31, %dl -; X64-NEXT: movzbl %dl, %ecx -; X64-NEXT: btl %ecx, %r8d +; X64-NEXT: btl %edx, %r8d ; X64-NEXT: jae .LBB30_2 ; X64-NEXT: # BB#1: ; X64-NEXT: orl %edi, (%rsi,%rax,4) |