diff options
author | Craig Topper <craig.topper@intel.com> | 2019-05-13 04:03:35 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2019-05-13 04:03:35 +0000 |
commit | 61e556d2bdf3fa0a10dbaadd2dd03d01c341bd27 (patch) | |
tree | 1d9a5a6ac4e14c07eee68a7b68b2fe7fed6758f7 /llvm/lib | |
parent | 3e6d69063d71dfa04add3628a93f8ac6acc0c1e0 (diff) | |
download | bcm5719-llvm-61e556d2bdf3fa0a10dbaadd2dd03d01c341bd27.tar.gz bcm5719-llvm-61e556d2bdf3fa0a10dbaadd2dd03d01c341bd27.zip |
Recommit r358887 "[TargetLowering][AMDGPU][X86] Improve SimplifyDemandedBits bitcast handling"
I've included a new fix in X86RegisterInfo to prevent PR41619 without
reintroducing r359392. We might be able to improve that in the base class
implementation of shouldRewriteCopySrc somehow. But this hopefully enables
forward progress on SimplifyDemandedBits improvements for now.
Original commit message:
This patch adds support for BigBitWidth -> SmallBitWidth bitcasts, splitting the DemandedBits/Elts accordingly.
The AMDGPU backend needed an extra (srl (and x, c1 << c2), c2) -> (and (srl(x, c2), c1) combine to encourage BFE creation, I investigated putting this in DAGComb
but it caused a lot of noise on other targets - some improvements, some regressions.
The X86 changes are all definite wins.
llvm-svn: 360552
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 26 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86RegisterInfo.cpp | 15 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86RegisterInfo.h | 5 |
3 files changed, 45 insertions, 1 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 502913886e3..fc7e7d9c3ce 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1581,12 +1581,36 @@ bool TargetLowering::SimplifyDemandedBits( if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, KnownSrcBits, TLO, Depth + 1)) return true; + } else if ((NumSrcEltBits % BitWidth) == 0 && + TLO.DAG.getDataLayout().isLittleEndian()) { + unsigned Scale = NumSrcEltBits / BitWidth; + unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1; + APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits); + APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts); + for (unsigned i = 0; i != NumElts; ++i) + if (DemandedElts[i]) { + unsigned Offset = (i % Scale) * BitWidth; + DemandedSrcBits.insertBits(DemandedBits, Offset); + DemandedSrcElts.setBit(i / Scale); + } + + if (SrcVT.isVector()) { + APInt KnownSrcUndef, KnownSrcZero; + if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef, + KnownSrcZero, TLO, Depth + 1)) + return true; + } + + KnownBits KnownSrcBits; + if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, + KnownSrcBits, TLO, Depth + 1)) + return true; } // If this is a bitcast, let computeKnownBits handle it. Only do this on a // recursive call where Known may be useful to the caller. if (Depth > 0) { - Known = TLO.DAG.computeKnownBits(Op, Depth); + Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth); return false; } break; diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp index 7dec87cdcb0..1e62958e722 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -216,6 +216,21 @@ X86RegisterInfo::getPointerRegClass(const MachineFunction &MF, } } +bool X86RegisterInfo::shouldRewriteCopySrc(const TargetRegisterClass *DefRC, + unsigned DefSubReg, + const TargetRegisterClass *SrcRC, + unsigned SrcSubReg) const { + // Prevent rewriting a copy where the destination size is larger than the + // input size. See PR41619. + // FIXME: Should this be factored into the base implementation somehow. + if (DefRC->hasSuperClassEq(&X86::GR64RegClass) && DefSubReg == 0 && + SrcRC->hasSuperClassEq(&X86::GR64RegClass) && SrcSubReg == X86::sub_32bit) + return false; + + return TargetRegisterInfo::shouldRewriteCopySrc(DefRC, DefSubReg, + SrcRC, SrcSubReg); +} + const TargetRegisterClass * X86RegisterInfo::getGPRsForTailCall(const MachineFunction &MF) const { const Function &F = MF.getFunction(); diff --git a/llvm/lib/Target/X86/X86RegisterInfo.h b/llvm/lib/Target/X86/X86RegisterInfo.h index d362f20b472..81e4920f6a5 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.h +++ b/llvm/lib/Target/X86/X86RegisterInfo.h @@ -74,6 +74,11 @@ public: getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &MF) const override; + bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC, + unsigned DefSubReg, + const TargetRegisterClass *SrcRC, + unsigned SrcSubReg) const override; + /// getPointerRegClass - Returns a TargetRegisterClass used for pointer /// values. const TargetRegisterClass * |