diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2015-07-06 22:35:19 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2015-07-06 22:35:19 +0000 |
commit | 8fbf1c1f4a6ffe8f070b23f41cfa2df668f4913a (patch) | |
tree | 8b7db1290866506ae6b9f0cd789d7278f907b285 /llvm/lib | |
parent | 86bc91508dca4cc5d9cd4bf57016fd8a4d886ccf (diff) | |
download | bcm5719-llvm-8fbf1c1f4a6ffe8f070b23f41cfa2df668f4913a.tar.gz bcm5719-llvm-8fbf1c1f4a6ffe8f070b23f41cfa2df668f4913a.zip |
[X86][SSE] Vectorized i64 uniform constant SRA shifts
This patch adds vectorization support for uniform constant i64 arithmetic shift right operators.
Differential Revision: http://reviews.llvm.org/D9645
llvm-svn: 241514
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 50 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 3 |
2 files changed, 51 insertions, 2 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 05b3604f851..a92ab5ae2a0 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1032,6 +1032,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SHL, MVT::v2i64, Custom); setOperationAction(ISD::SHL, MVT::v4i32, Custom); + setOperationAction(ISD::SRA, MVT::v2i64, Custom); setOperationAction(ISD::SRA, MVT::v4i32, Custom); } @@ -1211,6 +1212,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SHL, MVT::v4i64, Custom); setOperationAction(ISD::SHL, MVT::v8i32, Custom); + setOperationAction(ISD::SRA, MVT::v4i64, Custom); setOperationAction(ISD::SRA, MVT::v8i32, Custom); // Custom lower several nodes for 256-bit types. @@ -16948,6 +16950,38 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, unsigned X86Opc = (Op.getOpcode() == ISD::SHL) ? X86ISD::VSHLI : (Op.getOpcode() == ISD::SRL) ? X86ISD::VSRLI : X86ISD::VSRAI; + auto ArithmeticShiftRight64 = [&](uint64_t ShiftAmt) { + assert((VT == MVT::v2i64 || VT == MVT::v4i64) && "Unexpected SRA type"); + MVT ExVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() * 2); + SDValue Ex = DAG.getBitcast(ExVT, R); + + if (ShiftAmt >= 32) { + // Splat sign to upper i32 dst, and SRA upper i32 src to lower i32. + SDValue Upper = + getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, Ex, 31, DAG); + SDValue Lower = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, Ex, + ShiftAmt - 32, DAG); + if (VT == MVT::v2i64) + Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower, {5, 1, 7, 3}); + if (VT == MVT::v4i64) + Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower, + {9, 1, 11, 3, 13, 5, 15, 7}); + } else { + // SRA upper i32, SHL whole i64 and select lower i32. + SDValue Upper = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, Ex, + ShiftAmt, DAG); + SDValue Lower = + getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, R, ShiftAmt, DAG); + Lower = DAG.getBitcast(ExVT, Lower); + if (VT == MVT::v2i64) + Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower, {4, 1, 6, 3}); + if (VT == MVT::v4i64) + Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower, + {8, 1, 10, 3, 12, 5, 14, 7}); + } + return DAG.getBitcast(VT, Ex); + }; + // Optimize shl/srl/sra with constant shift amount. if (auto *BVAmt = dyn_cast<BuildVectorSDNode>(Amt)) { if (auto *ShiftConst = BVAmt->getConstantSplatNode()) { @@ -16956,6 +16990,11 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, if (SupportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode())) return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG); + // i64 SRA needs to be performed as partial shifts. + if ((VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64)) && + Op.getOpcode() == ISD::SRA) + return ArithmeticShiftRight64(ShiftAmt); + if (VT == MVT::v16i8 || (Subtarget->hasInt256() && VT == MVT::v32i8)) { unsigned NumElts = VT.getVectorNumElements(); MVT ShiftVT = MVT::getVectorVT(MVT::i16, NumElts / 2); @@ -17039,7 +17078,12 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, if (ShAmt != ShiftAmt) return SDValue(); } - return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG); + + if (SupportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode())) + return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG); + + if (Op.getOpcode() == ISD::SRA) + return ArithmeticShiftRight64(ShiftAmt); } return SDValue(); @@ -17121,7 +17165,9 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG, if (Vals[j] != Amt.getOperand(i + j)) return SDValue(); } - return DAG.getNode(X86OpcV, dl, VT, R, Op.getOperand(1)); + + if (SupportedVectorShiftWithBaseAmnt(VT, Subtarget, Op.getOpcode())) + return DAG.getNode(X86OpcV, dl, VT, R, Op.getOperand(1)); } return SDValue(); } diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 0c82a700952..a541035f13d 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -117,6 +117,8 @@ unsigned X86TTIImpl::getArithmeticInstrCost( static const CostTblEntry<MVT::SimpleValueType> AVX2UniformConstCostTable[] = { + { ISD::SRA, MVT::v4i64, 4 }, // 2 x psrad + shuffle. + { ISD::SDIV, MVT::v16i16, 6 }, // vpmulhw sequence { ISD::UDIV, MVT::v16i16, 6 }, // vpmulhuw sequence { ISD::SDIV, MVT::v8i32, 15 }, // vpmuldq sequence @@ -211,6 +213,7 @@ unsigned X86TTIImpl::getArithmeticInstrCost( { ISD::SRA, MVT::v16i8, 4 }, // psrlw, pand, pxor, psubb. { ISD::SRA, MVT::v8i16, 1 }, // psraw. { ISD::SRA, MVT::v4i32, 1 }, // psrad. + { ISD::SRA, MVT::v2i64, 4 }, // 2 x psrad + shuffle. { ISD::SDIV, MVT::v8i16, 6 }, // pmulhw sequence { ISD::UDIV, MVT::v8i16, 6 }, // pmulhuw sequence |