diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 40 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.h | 1 | 
2 files changed, 41 insertions, 0 deletions
| diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 39e5948eca7..318452faa04 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -679,6 +679,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,    setTargetDAGCombine(ISD::SCALAR_TO_VECTOR);    setTargetDAGCombine(ISD::ZERO_EXTEND);    setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); +  setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);    // All memory operations. Some folding on the pointer operand is done to help    // matching the constant offsets in the addressing modes. @@ -8114,6 +8115,43 @@ SDValue SITargetLowering::performExtractVectorEltCombine(    return SDValue();  } +SDValue +SITargetLowering::performInsertVectorEltCombine(SDNode *N, +                                                DAGCombinerInfo &DCI) const { +  SDValue Vec = N->getOperand(0); +  SDValue Idx = N->getOperand(2); +  EVT VecVT = Vec.getValueType(); +  EVT EltVT = VecVT.getVectorElementType(); +  unsigned VecSize = VecVT.getSizeInBits(); +  unsigned EltSize = EltVT.getSizeInBits(); + +  // INSERT_VECTOR_ELT (<n x e>, var-idx) +  // => BUILD_VECTOR n x select (e, const-idx) +  // This elminates non-constant index and subsequent movrel or scratch access. +  // Sub-dword vectors of size 2 dword or less have better implementation. +  // Vectors of size bigger than 8 dwords would yield too many v_cndmask_b32 +  // instructions. +  if (isa<ConstantSDNode>(Idx) || +      VecSize > 256 || (VecSize <= 64 && EltSize < 32)) +    return SDValue(); + +  SelectionDAG &DAG = DCI.DAG; +  SDLoc SL(N); +  SDValue Ins = N->getOperand(1); +  EVT IdxVT = Idx.getValueType(); + +  SDValue V; +  SmallVector<SDValue, 16> Ops; +  for (unsigned I = 0, E = VecVT.getVectorNumElements(); I < E; ++I) { +    SDValue IC = DAG.getConstant(I, SL, IdxVT); +    SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, Vec, IC); +    SDValue V = DAG.getSelectCC(SL, Idx, IC, Ins, Elt, ISD::SETEQ); +    Ops.push_back(V); +  } + +  return DAG.getBuildVector(VecVT, SL, Ops); +} +  unsigned SITargetLowering::getFusedOpcode(const SelectionDAG &DAG,                                            const SDNode *N0,                                            const SDNode *N1) const { @@ -8722,6 +8760,8 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,    }    case ISD::EXTRACT_VECTOR_ELT:      return performExtractVectorEltCombine(N, DCI); +  case ISD::INSERT_VECTOR_ELT: +    return performInsertVectorEltCombine(N, DCI);    }    return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);  } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index 73fa05ea58f..5f8289c1f26 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -152,6 +152,7 @@ private:    SDValue performFMed3Combine(SDNode *N, DAGCombinerInfo &DCI) const;    SDValue performCvtPkRTZCombine(SDNode *N, DAGCombinerInfo &DCI) const;    SDValue performExtractVectorEltCombine(SDNode *N, DAGCombinerInfo &DCI) const; +  SDValue performInsertVectorEltCombine(SDNode *N, DAGCombinerInfo &DCI) const;    unsigned getFusedOpcode(const SelectionDAG &DAG,                            const SDNode *N0, const SDNode *N1) const; | 

