diff options
author | Ahmed Bougacha <ahmed.bougacha@gmail.com> | 2014-12-23 06:07:31 +0000 |
---|---|---|
committer | Ahmed Bougacha <ahmed.bougacha@gmail.com> | 2014-12-23 06:07:31 +0000 |
commit | 4553bff4126068ce5b9f451b75f71d367320cec7 (patch) | |
tree | d591daa8e5565f9550132450c5966ff1b49f31c6 /llvm/lib | |
parent | 279663c1b4bfcec7f849dd2becf207841c45c0cf (diff) | |
download | bcm5719-llvm-4553bff4126068ce5b9f451b75f71d367320cec7.tar.gz bcm5719-llvm-4553bff4126068ce5b9f451b75f71d367320cec7.zip |
[ARM] Don't break alignment when combining base updates into load/stores.
r223862/r224203 tried to also combine base-updating load/stores.
There was a mistake there: the alignment was added as is as an operand to
the ARMISD::VLD/VST node. However, the VLD/VST selection logic doesn't care
about less-than-standard alignment attributes.
For example, no matter the alignment of a v2i64 load (say 1), SelectVLD picks
VLD1q64 (because of the memory type). But VLD1q64 ("vld1.64 {dXX, dYY}") is
8-aligned, per ARMARMv7a 3.2.1.
For the 1-aligned load, what we really want is VLD1q8.
This commit introduces bitcasts if necessary, and changes the vld/vst type to
one whose standard alignment matches the original load/store alignment.
Differential Revision: http://reviews.llvm.org/D6759
llvm-svn: 224754
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/ARM/ARMISelLowering.cpp | 49 |
1 files changed, 47 insertions, 2 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index a319944c0e2..7c4ebc03d8b 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -8976,13 +8976,42 @@ static SDValue CombineBaseUpdate(SDNode *N, continue; } + EVT AlignedVecTy = VecTy; + + // If this is a less-than-standard-aligned load/store, change the type to + // match the standard alignment. + // The alignment is overlooked when selecting _UPD variants; and it's + // easier to introduce bitcasts here than fix that. + // There are 3 ways to get to this base-update combine: + // - intrinsics: they are assumed to be properly aligned (to the standard + // alignment of the memory type), so we don't need to do anything. + // - ARMISD::VLDx nodes: they are only generated from the aforementioned + // intrinsics, so, likewise, there's nothing to do. + // - generic load/store instructions: the alignment is specified as an + // explicit operand, rather than implicitly as the standard alignment + // of the memory type (like the intrisics). We need to change the + // memory type to match the explicit alignment. That way, we don't + // generate non-standard-aligned ARMISD::VLDx nodes. + if (LSBaseSDNode *LSN = dyn_cast<LSBaseSDNode>(N)) { + unsigned Alignment = LSN->getAlignment(); + if (Alignment == 0) + Alignment = 1; + if (Alignment < VecTy.getScalarSizeInBits() / 8) { + MVT EltTy = MVT::getIntegerVT(Alignment * 8); + assert(NumVecs == 1 && "Unexpected multi-element generic load/store."); + assert(!isLaneOp && "Unexpected generic load/store lane."); + unsigned NumElts = NumBytes / (EltTy.getSizeInBits() / 8); + AlignedVecTy = MVT::getVectorVT(EltTy, NumElts); + } + } + // Create the new updating load/store node. // First, create an SDVTList for the new updating node's results. EVT Tys[6]; unsigned NumResultVecs = (isLoad ? NumVecs : 0); unsigned n; for (n = 0; n < NumResultVecs; ++n) - Tys[n] = VecTy; + Tys[n] = AlignedVecTy; Tys[n++] = MVT::i32; Tys[n] = MVT::Other; SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs+2)); @@ -9000,9 +9029,17 @@ static SDValue CombineBaseUpdate(SDNode *N, for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i) Ops.push_back(N->getOperand(i)); } + + // If this is a non-standard-aligned store, the penultimate operand is the + // stored value. Bitcast it to the aligned type. + if (AlignedVecTy != VecTy && N->getOpcode() == ISD::STORE) { + SDValue &StVal = Ops[Ops.size()-2]; + StVal = DAG.getNode(ISD::BITCAST, SDLoc(N), AlignedVecTy, StVal); + } + MemSDNode *MemInt = cast<MemSDNode>(N); SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys, - Ops, MemInt->getMemoryVT(), + Ops, AlignedVecTy, MemInt->getMemOperand()); // Update the uses. @@ -9010,6 +9047,14 @@ static SDValue CombineBaseUpdate(SDNode *N, for (unsigned i = 0; i < NumResultVecs; ++i) { NewResults.push_back(SDValue(UpdN.getNode(), i)); } + + // If this is an non-standard-aligned load, the first result is the loaded + // value. Bitcast it to the expected result type. + if (AlignedVecTy != VecTy && N->getOpcode() == ISD::LOAD) { + SDValue &LdVal = NewResults[0]; + LdVal = DAG.getNode(ISD::BITCAST, SDLoc(N), VecTy, LdVal); + } + NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain DCI.CombineTo(N, NewResults); DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs)); |