diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-01-26 17:11:34 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-01-26 17:11:34 +0000 |
| commit | 76ede609f6de2beedaa027e0a423183aa9605478 (patch) | |
| tree | 77dc7c14347e151d8ce82450b0d461b205d9e3bc /llvm/lib | |
| parent | d567c27c84cf5cf9a462a82b92d178dc8219b26d (diff) | |
| download | bcm5719-llvm-76ede609f6de2beedaa027e0a423183aa9605478.tar.gz bcm5719-llvm-76ede609f6de2beedaa027e0a423183aa9605478.zip | |
[X86][SSE] Don't colaesce v4i32 extracts
We currently coalesce v4i32 extracts from all 4 elements to 2 v2i64 extracts + shifts/sign-extends.
This seems to have been added back in the days when we tended to spill vectors and reload scalars, or ended up with repeated shuffles moving everything down to 0'th index. I don't think either of these are likely these days as we have better EXTRACT_VECTOR_ELT and VECTOR_SHUFFLE handling, and the existing code tends to make it very difficult for various vector and load combines.
Differential Revision: https://reviews.llvm.org/D42308
llvm-svn: 323541
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 97 |
1 files changed, 1 insertions, 96 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index a7049415088..5bc274eb5a2 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -31239,102 +31239,7 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG, if (SDValue MinMax = combineHorizontalMinMaxResult(N, DAG, Subtarget)) return MinMax; - // Only operate on vectors of 4 elements, where the alternative shuffling - // gets to be more expensive. - if (SrcVT != MVT::v4i32) - return SDValue(); - - // Check whether every use of InputVector is an EXTRACT_VECTOR_ELT with a - // single use which is a sign-extend or zero-extend, and all elements are - // used. - SmallVector<SDNode *, 4> Uses; - unsigned ExtractedElements = 0; - for (SDNode::use_iterator UI = InputVector.getNode()->use_begin(), - UE = InputVector.getNode()->use_end(); UI != UE; ++UI) { - if (UI.getUse().getResNo() != InputVector.getResNo()) - return SDValue(); - - SDNode *Extract = *UI; - if (Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT) - return SDValue(); - - if (Extract->getValueType(0) != MVT::i32) - return SDValue(); - if (!Extract->hasOneUse()) - return SDValue(); - if (Extract->use_begin()->getOpcode() != ISD::SIGN_EXTEND && - Extract->use_begin()->getOpcode() != ISD::ZERO_EXTEND) - return SDValue(); - if (!isa<ConstantSDNode>(Extract->getOperand(1))) - return SDValue(); - - // Record which element was extracted. - ExtractedElements |= 1 << Extract->getConstantOperandVal(1); - Uses.push_back(Extract); - } - - // If not all the elements were used, this may not be worthwhile. - if (ExtractedElements != 15) - return SDValue(); - - // Ok, we've now decided to do the transformation. - // If 64-bit shifts are legal, use the extract-shift sequence, - // otherwise bounce the vector off the cache. - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - SDValue Vals[4]; - - if (TLI.isOperationLegal(ISD::SRA, MVT::i64)) { - SDValue Cst = DAG.getBitcast(MVT::v2i64, InputVector); - auto &DL = DAG.getDataLayout(); - EVT VecIdxTy = DAG.getTargetLoweringInfo().getVectorIdxTy(DL); - SDValue BottomHalf = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Cst, - DAG.getConstant(0, dl, VecIdxTy)); - SDValue TopHalf = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Cst, - DAG.getConstant(1, dl, VecIdxTy)); - - SDValue ShAmt = DAG.getConstant( - 32, dl, DAG.getTargetLoweringInfo().getShiftAmountTy(MVT::i64, DL)); - Vals[0] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BottomHalf); - Vals[1] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, - DAG.getNode(ISD::SRA, dl, MVT::i64, BottomHalf, ShAmt)); - Vals[2] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, TopHalf); - Vals[3] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, - DAG.getNode(ISD::SRA, dl, MVT::i64, TopHalf, ShAmt)); - } else { - // Store the value to a temporary stack slot. - SDValue StackPtr = DAG.CreateStackTemporary(SrcVT); - SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InputVector, StackPtr, - MachinePointerInfo()); - - EVT ElementType = SrcVT.getVectorElementType(); - unsigned EltSize = ElementType.getSizeInBits() / 8; - - // Replace each use (extract) with a load of the appropriate element. - for (unsigned i = 0; i < 4; ++i) { - uint64_t Offset = EltSize * i; - auto PtrVT = TLI.getPointerTy(DAG.getDataLayout()); - SDValue OffsetVal = DAG.getConstant(Offset, dl, PtrVT); - - SDValue ScalarAddr = - DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, OffsetVal); - - // Load the scalar. - Vals[i] = - DAG.getLoad(ElementType, dl, Ch, ScalarAddr, MachinePointerInfo()); - } - } - - // Replace the extracts - for (SmallVectorImpl<SDNode *>::iterator UI = Uses.begin(), - UE = Uses.end(); UI != UE; ++UI) { - SDNode *Extract = *UI; - - uint64_t IdxVal = Extract->getConstantOperandVal(1); - DAG.ReplaceAllUsesOfValueWith(SDValue(Extract, 0), Vals[IdxVal]); - } - - // The replacement was made in place; return N so it won't be revisited. - return SDValue(N, 0); + return SDValue(); } /// If a vector select has an operand that is -1 or 0, try to simplify the |

