diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-01-18 22:01:13 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-01-18 22:01:13 +0000 |
| commit | 6e3a45193aa927de0d4a677594f74c31cfc26727 (patch) | |
| tree | 9aa2723f6e9c9845519753e867047853014630b6 /llvm/lib/Target | |
| parent | 77d86d1c08132e299f785515cb18a0e8cd724c9d (diff) | |
| download | bcm5719-llvm-6e3a45193aa927de0d4a677594f74c31cfc26727.tar.gz bcm5719-llvm-6e3a45193aa927de0d4a677594f74c31cfc26727.zip | |
AMDGPU: Split 64-bit and of constant up
This breaks the tests that were meant for testing
64-bit inline immediates, so move those to shl where
they won't be broken up.
This should be repeated for the other related bit ops.
llvm-svn: 258095
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 61 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h | 8 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 3 |
3 files changed, 70 insertions, 2 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 69f4c7cf359..477851d4d3a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -28,7 +28,7 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/IR/DataLayout.h" - +#include "SIInstrInfo.h" using namespace llvm; static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT, @@ -376,6 +376,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM, setOperationAction(ISD::FNEARBYINT, MVT::f32, Custom); setOperationAction(ISD::FNEARBYINT, MVT::f64, Custom); + setTargetDAGCombine(ISD::AND); setTargetDAGCombine(ISD::SHL); setTargetDAGCombine(ISD::SRL); setTargetDAGCombine(ISD::MUL); @@ -1177,6 +1178,21 @@ SDValue AMDGPUTargetLowering::CombineFMinMaxLegacy(SDLoc DL, return SDValue(); } +std::pair<SDValue, SDValue> +AMDGPUTargetLowering::split64BitValue(SDValue Op, SelectionDAG &DAG) const { + SDLoc SL(Op); + + SDValue Vec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Op); + + const SDValue Zero = DAG.getConstant(0, SL, MVT::i32); + const SDValue One = DAG.getConstant(1, SL, MVT::i32); + + SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, Zero); + SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, One); + + return std::make_pair(Lo, Hi); +} + SDValue AMDGPUTargetLowering::ScalarizeVectorLoad(const SDValue Op, SelectionDAG &DAG) const { LoadSDNode *Load = cast<LoadSDNode>(Op); @@ -2539,6 +2555,43 @@ SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N, SN->getBasePtr(), SN->getMemOperand()); } +// TODO: Should repeat for other bit ops. +SDValue AMDGPUTargetLowering::performAndCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + if (N->getValueType(0) != MVT::i64) + return SDValue(); + + // Break up 64-bit and of a constant into two 32-bit ands. This will typically + // happen anyway for a VALU 64-bit and. This exposes other 32-bit integer + // combine opportunities since most 64-bit operations are decomposed this way. + // TODO: We won't want this for SALU especially if it is an inline immediate. + const ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1)); + if (!RHS) + return SDValue(); + + uint64_t Val = RHS->getZExtValue(); + if (Lo_32(Val) != 0 && Hi_32(Val) != 0 && !RHS->hasOneUse()) { + // If either half of the constant is 0, this is really a 32-bit and, so + // split it. If we can re-use the full materialized constant, keep it. + return SDValue(); + } + + SDLoc SL(N); + SelectionDAG &DAG = DCI.DAG; + + SDValue Lo, Hi; + std::tie(Lo, Hi) = split64BitValue(N->getOperand(0), DAG); + + SDValue LoRHS = DAG.getConstant(Lo_32(Val), SL, MVT::i32); + SDValue HiRHS = DAG.getConstant(Hi_32(Val), SL, MVT::i32); + + SDValue LoAnd = DAG.getNode(ISD::AND, SL, MVT::i32, Lo, LoRHS); + SDValue HiAnd = DAG.getNode(ISD::AND, SL, MVT::i32, Hi, HiRHS); + + SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, LoAnd, HiAnd); + return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec); +} + SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N, DAGCombinerInfo &DCI) const { if (N->getValueType(0) != MVT::i64) @@ -2751,6 +2804,12 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, return performSrlCombine(N, DCI); } + case ISD::AND: { + if (DCI.getDAGCombineLevel() < AfterLegalizeDAG) + break; + + return performAndCombine(N, DCI); + } case ISD::MUL: return performMulCombine(N, DCI); case AMDGPUISD::MUL_I24: diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index 920df8f5a6a..fa52c66f5a7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -67,21 +67,27 @@ private: SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; +protected: SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue performAndCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performShlCombine(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue performSraCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performSrlCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performMulCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performCtlzCombine(SDLoc SL, SDValue Cond, SDValue LHS, SDValue RHS, DAGCombinerInfo &DCI) const; SDValue performSelectCombine(SDNode *N, DAGCombinerInfo &DCI) const; -protected: static EVT getEquivalentMemType(LLVMContext &Context, EVT VT); static EVT getEquivalentLoadRegType(LLVMContext &Context, EVT VT); virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const; + /// Return 64-bit value Op as two 32-bit integers. + std::pair<SDValue, SDValue> split64BitValue(SDValue Op, + SelectionDAG &DAG) const; + /// \brief Split a vector load into a scalar load of each component. SDValue ScalarizeVectorLoad(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 544867513d9..2c22cee4bf4 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1915,6 +1915,9 @@ SDValue SITargetLowering::performAndCombine(SDNode *N, if (DCI.isBeforeLegalize()) return SDValue(); + if (SDValue Base = AMDGPUTargetLowering::performAndCombine(N, DCI)) + return Base; + SelectionDAG &DAG = DCI.DAG; // (and (fcmp ord x, x), (fcmp une (fabs x), inf)) -> |

