summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp67
1 files changed, 47 insertions, 20 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index afef7ece03b..f9a4d92033e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -2182,14 +2182,11 @@ static bool hasVolatileUser(SDNode *Val) {
return false;
}
-bool AMDGPUTargetLowering::shouldCombineMemoryType(const MemSDNode *M) const {
- EVT VT = M->getMemoryVT();
-
+bool AMDGPUTargetLowering::shouldCombineMemoryType(EVT VT) const {
// i32 vectors are the canonical memory type.
if (VT.getScalarType() == MVT::i32 || isTypeLegal(VT))
return false;
-
if (!VT.isByteSized())
return false;
@@ -2201,15 +2198,6 @@ bool AMDGPUTargetLowering::shouldCombineMemoryType(const MemSDNode *M) const {
if (Size == 3 || (Size > 4 && (Size % 4 != 0)))
return false;
- unsigned Align = M->getAlignment();
- if (Align < Size) {
- bool IsFast;
- if (!allowsMisalignedMemoryAccesses(VT, M->getAddressSpace(), Align, &IsFast) ||
- !IsFast) {
- return false;
- }
- }
-
return true;
}
@@ -2224,12 +2212,32 @@ SDValue AMDGPUTargetLowering::performLoadCombine(SDNode *N,
if (LN->isVolatile() || !ISD::isNormalLoad(LN) || hasVolatileUser(LN))
return SDValue();
- if (!shouldCombineMemoryType(LN))
- return SDValue();
-
SDLoc SL(N);
SelectionDAG &DAG = DCI.DAG;
EVT VT = LN->getMemoryVT();
+
+ unsigned Size = VT.getStoreSize();
+ unsigned Align = LN->getAlignment();
+ if (Align < Size && isTypeLegal(VT)) {
+ bool IsFast;
+ unsigned AS = LN->getAddressSpace();
+
+ // Expand unaligned loads earlier than legalization. Due to visitation order
+ // problems during legalization, the emitted instructions to pack and unpack
+ // the bytes again are not eliminated in the case of an unaligned copy.
+ if (!allowsMisalignedMemoryAccesses(VT, AS, Align, &IsFast)) {
+ SDValue Ops[2];
+ std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(LN, DAG);
+ return DAG.getMergeValues(Ops, SDLoc(N));
+ }
+
+ if (!IsFast)
+ return SDValue();
+ }
+
+ if (!shouldCombineMemoryType(VT))
+ return SDValue();
+
EVT NewVT = getEquivalentMemType(*DAG.getContext(), VT);
SDValue NewLoad
@@ -2252,15 +2260,34 @@ SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N,
if (SN->isVolatile() || !ISD::isNormalStore(SN))
return SDValue();
- if (!shouldCombineMemoryType(SN))
- return SDValue();
-
- SDValue Val = SN->getValue();
EVT VT = SN->getMemoryVT();
+ unsigned Size = VT.getStoreSize();
SDLoc SL(N);
SelectionDAG &DAG = DCI.DAG;
+ unsigned Align = SN->getAlignment();
+ if (Align < Size && isTypeLegal(VT)) {
+ bool IsFast;
+ unsigned AS = SN->getAddressSpace();
+
+ // Expand unaligned stores earlier than legalization. Due to visitation
+ // order problems during legalization, the emitted instructions to pack and
+ // unpack the bytes again are not eliminated in the case of an unaligned
+ // copy.
+ if (!allowsMisalignedMemoryAccesses(VT, AS, Align, &IsFast))
+ return expandUnalignedStore(SN, DAG);
+
+ if (!IsFast)
+ return SDValue();
+ }
+
+ if (!shouldCombineMemoryType(VT))
+ return SDValue();
+
EVT NewVT = getEquivalentMemType(*DAG.getContext(), VT);
+ SDValue Val = SN->getValue();
+
+ //DCI.AddToWorklist(Val.getNode());
bool OtherUses = !Val.hasOneUse();
SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, NewVT, Val);
OpenPOWER on IntegriCloud