diff options
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 83 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 32 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86IntrinsicsInfo.h | 19 |
3 files changed, 50 insertions, 84 deletions
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index c3a5346f0b8..1eb078b1fe0 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -188,7 +188,6 @@ namespace { private: void Select(SDNode *N) override; - bool tryGather(SDNode *N, unsigned Opc); bool foldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM); bool matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM); @@ -1982,39 +1981,6 @@ static unsigned getFusedLdStOpcode(EVT &LdVT, unsigned Opc) { llvm_unreachable("unrecognized size for LdVT"); } -/// Customized ISel for GATHER operations. -bool X86DAGToDAGISel::tryGather(SDNode *Node, unsigned Opc) { - // Operands of Gather: VSrc, Base, VIdx, VMask, Scale - SDValue Chain = Node->getOperand(0); - SDValue VSrc = Node->getOperand(2); - SDValue Base = Node->getOperand(3); - SDValue VIdx = Node->getOperand(4); - SDValue VMask = Node->getOperand(5); - ConstantSDNode *Scale = dyn_cast<ConstantSDNode>(Node->getOperand(6)); - if (!Scale) - return false; - - SDVTList VTs = CurDAG->getVTList(VSrc.getValueType(), VSrc.getValueType(), - MVT::Other); - - SDLoc DL(Node); - - // Memory Operands: Base, Scale, Index, Disp, Segment - SDValue Disp = CurDAG->getTargetConstant(0, DL, MVT::i32); - SDValue Segment = CurDAG->getRegister(0, MVT::i32); - const SDValue Ops[] = { VSrc, Base, getI8Imm(Scale->getSExtValue(), DL), VIdx, - Disp, Segment, VMask, Chain}; - SDNode *ResNode = CurDAG->getMachineNode(Opc, DL, VTs, Ops); - // Node has 2 outputs: VDst and MVT::Other. - // ResNode has 3 outputs: VDst, VMask_wb, and MVT::Other. - // We replace VDst of Node with VDst of ResNode, and Other of Node with Other - // of ResNode. - ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0)); - ReplaceUses(SDValue(Node, 1), SDValue(ResNode, 2)); - CurDAG->RemoveDeadNode(Node); - return true; -} - void X86DAGToDAGISel::Select(SDNode *Node) { MVT NVT = Node->getSimpleValueType(0); unsigned Opc, MOpc; @@ -2052,55 +2018,6 @@ void X86DAGToDAGISel::Select(SDNode *Node) { } break; } - case ISD::INTRINSIC_W_CHAIN: { - unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); - switch (IntNo) { - default: break; - case Intrinsic::x86_avx2_gather_d_pd: - case Intrinsic::x86_avx2_gather_d_pd_256: - case Intrinsic::x86_avx2_gather_q_pd: - case Intrinsic::x86_avx2_gather_q_pd_256: - case Intrinsic::x86_avx2_gather_d_ps: - case Intrinsic::x86_avx2_gather_d_ps_256: - case Intrinsic::x86_avx2_gather_q_ps: - case Intrinsic::x86_avx2_gather_q_ps_256: - case Intrinsic::x86_avx2_gather_d_q: - case Intrinsic::x86_avx2_gather_d_q_256: - case Intrinsic::x86_avx2_gather_q_q: - case Intrinsic::x86_avx2_gather_q_q_256: - case Intrinsic::x86_avx2_gather_d_d: - case Intrinsic::x86_avx2_gather_d_d_256: - case Intrinsic::x86_avx2_gather_q_d: - case Intrinsic::x86_avx2_gather_q_d_256: { - if (!Subtarget->hasAVX2()) - break; - unsigned Opc; - switch (IntNo) { - default: llvm_unreachable("Impossible intrinsic"); - case Intrinsic::x86_avx2_gather_d_pd: Opc = X86::VGATHERDPDrm; break; - case Intrinsic::x86_avx2_gather_d_pd_256: Opc = X86::VGATHERDPDYrm; break; - case Intrinsic::x86_avx2_gather_q_pd: Opc = X86::VGATHERQPDrm; break; - case Intrinsic::x86_avx2_gather_q_pd_256: Opc = X86::VGATHERQPDYrm; break; - case Intrinsic::x86_avx2_gather_d_ps: Opc = X86::VGATHERDPSrm; break; - case Intrinsic::x86_avx2_gather_d_ps_256: Opc = X86::VGATHERDPSYrm; break; - case Intrinsic::x86_avx2_gather_q_ps: Opc = X86::VGATHERQPSrm; break; - case Intrinsic::x86_avx2_gather_q_ps_256: Opc = X86::VGATHERQPSYrm; break; - case Intrinsic::x86_avx2_gather_d_q: Opc = X86::VPGATHERDQrm; break; - case Intrinsic::x86_avx2_gather_d_q_256: Opc = X86::VPGATHERDQYrm; break; - case Intrinsic::x86_avx2_gather_q_q: Opc = X86::VPGATHERQQrm; break; - case Intrinsic::x86_avx2_gather_q_q_256: Opc = X86::VPGATHERQQYrm; break; - case Intrinsic::x86_avx2_gather_d_d: Opc = X86::VPGATHERDDrm; break; - case Intrinsic::x86_avx2_gather_d_d_256: Opc = X86::VPGATHERDDYrm; break; - case Intrinsic::x86_avx2_gather_q_d: Opc = X86::VPGATHERQDrm; break; - case Intrinsic::x86_avx2_gather_q_d_256: Opc = X86::VPGATHERQDYrm; break; - } - if (tryGather(Node, Opc)) - return; - break; - } - } - break; - } case X86ISD::GlobalBaseReg: ReplaceNode(Node, getGlobalBaseReg()); return; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 56f62dce002..c2b16402319 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -19854,6 +19854,28 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget } } +static SDValue getAVX2GatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, + SDValue Src, SDValue Mask, SDValue Base, + SDValue Index, SDValue ScaleOp, SDValue Chain, + const X86Subtarget &Subtarget) { + SDLoc dl(Op); + auto *C = cast<ConstantSDNode>(ScaleOp); + SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8); + EVT MaskVT = Mask.getValueType(); + SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other); + SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32); + SDValue Segment = DAG.getRegister(0, MVT::i32); + // If source is undef or we know it won't be used, use a zero vector + // to break register dependency. + // TODO: use undef instead and let ExeDepsFix deal with it? + if (Src.isUndef() || ISD::isBuildVectorAllOnes(Mask.getNode())) + Src = getZeroVector(Op.getSimpleValueType(), Subtarget, DAG, dl); + SDValue Ops[] = {Src, Base, Scale, Index, Disp, Segment, Mask, Chain}; + SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops); + SDValue RetOps[] = { SDValue(Res, 0), SDValue(Res, 2) }; + return DAG.getMergeValues(RetOps, dl); +} + static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, SDValue Src, SDValue Mask, SDValue Base, SDValue Index, SDValue ScaleOp, SDValue Chain, @@ -20181,6 +20203,16 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result, isValid, SDValue(Result.getNode(), 2)); } + case GATHER_AVX2: { + SDValue Chain = Op.getOperand(0); + SDValue Src = Op.getOperand(2); + SDValue Base = Op.getOperand(3); + SDValue Index = Op.getOperand(4); + SDValue Mask = Op.getOperand(5); + SDValue Scale = Op.getOperand(6); + return getAVX2GatherNode(IntrData->Opc0, Op, DAG, Src, Mask, Base, Index, + Scale, Chain, Subtarget); + } case GATHER: { //gather(v1, mask, index, base, scale); SDValue Chain = Op.getOperand(0); diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 42495bf1e33..43d0cf72721 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -36,7 +36,7 @@ enum IntrinsicType : uint16_t { TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32, EXPAND_FROM_MEM, TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM, KUNPCK, FIXUPIMM, FIXUPIMM_MASKZ, FIXUPIMMS, - FIXUPIMMS_MASKZ, CONVERT_MASK_TO_VEC, CONVERT_TO_MASK + FIXUPIMMS_MASKZ, CONVERT_MASK_TO_VEC, CONVERT_TO_MASK, GATHER_AVX2 }; struct IntrinsicData { @@ -67,6 +67,23 @@ static const IntrinsicData IntrinsicsWithChain[] = { X86_INTRINSIC_DATA(addcarryx_u32, ADX, X86ISD::ADC, 0), X86_INTRINSIC_DATA(addcarryx_u64, ADX, X86ISD::ADC, 0), + X86_INTRINSIC_DATA(avx2_gather_d_d, GATHER_AVX2, X86::VPGATHERDDrm, 0), + X86_INTRINSIC_DATA(avx2_gather_d_d_256, GATHER_AVX2, X86::VPGATHERDDYrm, 0), + X86_INTRINSIC_DATA(avx2_gather_d_pd, GATHER_AVX2, X86::VGATHERDPDrm, 0), + X86_INTRINSIC_DATA(avx2_gather_d_pd_256, GATHER_AVX2, X86::VGATHERDPDYrm, 0), + X86_INTRINSIC_DATA(avx2_gather_d_ps, GATHER_AVX2, X86::VGATHERDPSrm, 0), + X86_INTRINSIC_DATA(avx2_gather_d_ps_256, GATHER_AVX2, X86::VGATHERDPSYrm, 0), + X86_INTRINSIC_DATA(avx2_gather_d_q, GATHER_AVX2, X86::VPGATHERDQrm, 0), + X86_INTRINSIC_DATA(avx2_gather_d_q_256, GATHER_AVX2, X86::VPGATHERDQYrm, 0), + X86_INTRINSIC_DATA(avx2_gather_q_d, GATHER_AVX2, X86::VPGATHERQDrm, 0), + X86_INTRINSIC_DATA(avx2_gather_q_d_256, GATHER_AVX2, X86::VPGATHERQDYrm, 0), + X86_INTRINSIC_DATA(avx2_gather_q_pd, GATHER_AVX2, X86::VGATHERQPDrm, 0), + X86_INTRINSIC_DATA(avx2_gather_q_pd_256, GATHER_AVX2, X86::VGATHERQPDYrm, 0), + X86_INTRINSIC_DATA(avx2_gather_q_ps, GATHER_AVX2, X86::VGATHERQPSrm, 0), + X86_INTRINSIC_DATA(avx2_gather_q_ps_256, GATHER_AVX2, X86::VGATHERQPSYrm, 0), + X86_INTRINSIC_DATA(avx2_gather_q_q, GATHER_AVX2, X86::VPGATHERQQrm, 0), + X86_INTRINSIC_DATA(avx2_gather_q_q_256, GATHER_AVX2, X86::VPGATHERQQYrm, 0), + X86_INTRINSIC_DATA(avx512_gather_dpd_512, GATHER, X86::VGATHERDPDZrm, 0), X86_INTRINSIC_DATA(avx512_gather_dpi_512, GATHER, X86::VPGATHERDDZrm, 0), X86_INTRINSIC_DATA(avx512_gather_dpq_512, GATHER, X86::VPGATHERDQZrm, 0), |