diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2019-04-22 14:04:35 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2019-04-22 14:04:35 +0000 |
| commit | 6276ce014283c3f7d291342d59cb3428c0c31600 (patch) | |
| tree | 15371d1c24dc5112840352c1f6575e13e37752e1 /llvm/lib/CodeGen | |
| parent | 9bc6c77220fbdd59d3246a0930425f672d5e85a7 (diff) | |
| download | bcm5719-llvm-6276ce014283c3f7d291342d59cb3428c0c31600.tar.gz bcm5719-llvm-6276ce014283c3f7d291342d59cb3428c0c31600.zip | |
[TargetLowering][AMDGPU][X86] Improve SimplifyDemandedBits bitcast handling
This patch adds support for BigBitWidth -> SmallBitWidth bitcasts, splitting the DemandedBits/Elts accordingly.
The AMDGPU backend needed an extra (srl (and x, c1 << c2), c2) -> (and (srl(x, c2), c1) combine to encourage BFE creation, I investigated putting this in DAGCombine but it caused a lot of noise on other targets - some improvements, some regressions.
The X86 changes are all definite wins.
Differential Revision: https://reviews.llvm.org/D60462
llvm-svn: 358887
Diffstat (limited to 'llvm/lib/CodeGen')
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 26 |
1 files changed, 25 insertions, 1 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 636fa6c1dab..eb089e5201f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1471,12 +1471,36 @@ bool TargetLowering::SimplifyDemandedBits( if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, KnownSrcBits, TLO, Depth + 1)) return true; + } else if ((NumSrcEltBits % BitWidth) == 0 && + TLO.DAG.getDataLayout().isLittleEndian()) { + unsigned Scale = NumSrcEltBits / BitWidth; + unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1; + APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits); + APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts); + for (unsigned i = 0; i != NumElts; ++i) + if (DemandedElts[i]) { + unsigned Offset = (i % Scale) * BitWidth; + DemandedSrcBits.insertBits(DemandedBits, Offset); + DemandedSrcElts.setBit(i / Scale); + } + + if (SrcVT.isVector()) { + APInt KnownSrcUndef, KnownSrcZero; + if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef, + KnownSrcZero, TLO, Depth + 1)) + return true; + } + + KnownBits KnownSrcBits; + if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, + KnownSrcBits, TLO, Depth + 1)) + return true; } // If this is a bitcast, let computeKnownBits handle it. Only do this on a // recursive call where Known may be useful to the caller. if (Depth > 0) { - Known = TLO.DAG.computeKnownBits(Op, Depth); + Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth); return false; } break; |

