diff options
author | Sanjay Patel <spatel@rotateright.com> | 2015-08-14 17:53:40 +0000 |
---|---|---|
committer | Sanjay Patel <spatel@rotateright.com> | 2015-08-14 17:53:40 +0000 |
commit | ed502905f7c285959c5555bc01f8af762dadb8d6 (patch) | |
tree | 3fd3da3e84329fb1b344b69c81712c44ff06a0b6 /llvm/lib | |
parent | 06f0678010428b0c69612fdd32a59262dfe14d59 (diff) | |
download | bcm5719-llvm-ed502905f7c285959c5555bc01f8af762dadb8d6.tar.gz bcm5719-llvm-ed502905f7c285959c5555bc01f8af762dadb8d6.zip |
[x86] fix allowsMisalignedMemoryAccess() implementation
This patch fixes the x86 implementation of allowsMisalignedMemoryAccess() to correctly
return the 'Fast' output parameter for 32-byte accesses. To test that, an existing load
merging optimization is changed to use the TLI hook. This exposes a shortcoming in the
current logic and results in the regression test update. Changing other direct users of
the isUnalignedMem32Slow() x86 CPU attribute would be a follow-on patch.
Without the fix in allowsMisalignedMemoryAccesses(), we will infinite loop when targeting
SandyBridge because LowerINSERT_SUBVECTOR() creates 32-byte loads from two 16-byte loads
while PerformLOADCombine() splits them back into 16-byte loads.
Differential Revision: http://reviews.llvm.org/D10662
llvm-svn: 245075
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 37 |
1 files changed, 27 insertions, 10 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 9a790010f8c..68a15e021cd 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1915,8 +1915,14 @@ X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned, unsigned, bool *Fast) const { - if (Fast) - *Fast = Subtarget->isUnalignedMemAccessFast(); + if (Fast) { + // FIXME: We should be checking 128-bit accesses separately from smaller + // accesses. + if (VT.getSizeInBits() == 256) + *Fast = !Subtarget->isUnalignedMem32Slow(); + else + *Fast = Subtarget->isUnalignedMemAccessFast(); + } return true; } @@ -11259,14 +11265,25 @@ static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget, // --> load32 addr if ((IdxVal == OpVT.getVectorNumElements() / 2) && Vec.getOpcode() == ISD::INSERT_SUBVECTOR && - OpVT.is256BitVector() && SubVecVT.is128BitVector() && - !Subtarget->isUnalignedMem32Slow()) { - SDValue SubVec2 = Vec.getOperand(1); - if (auto *Idx2 = dyn_cast<ConstantSDNode>(Vec.getOperand(2))) { - if (Idx2->getZExtValue() == 0) { - SDValue Ops[] = { SubVec2, SubVec }; - if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG, false)) - return Ld; + OpVT.is256BitVector() && SubVecVT.is128BitVector()) { + auto *Idx2 = dyn_cast<ConstantSDNode>(Vec.getOperand(2)); + if (Idx2 && Idx2->getZExtValue() == 0) { + SDValue SubVec2 = Vec.getOperand(1); + // If needed, look through a bitcast to get to the load. + if (SubVec2.getNode() && SubVec2.getOpcode() == ISD::BITCAST) + SubVec2 = SubVec2.getOperand(0); + + if (auto *FirstLd = dyn_cast<LoadSDNode>(SubVec2)) { + bool Fast; + unsigned Alignment = FirstLd->getAlignment(); + unsigned AS = FirstLd->getAddressSpace(); + const X86TargetLowering *TLI = Subtarget->getTargetLowering(); + if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), + OpVT, AS, Alignment, &Fast) && Fast) { + SDValue Ops[] = { SubVec2, SubVec }; + if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG, false)) + return Ld; + } } } } |