summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBjorn Pettersson <bjorn.a.pettersson@ericsson.com>2017-12-05 14:50:05 +0000
committerBjorn Pettersson <bjorn.a.pettersson@ericsson.com>2017-12-05 14:50:05 +0000
commit823b299fbce7baa6493052dd66e909201fba0f77 (patch)
tree3fb5a0d6bc3dfeb33e0eaf3eef8a388e138dfa54
parent71660c61e67ff511035365a2bea2cf5f24260f4c (diff)
downloadbcm5719-llvm-823b299fbce7baa6493052dd66e909201fba0f77.tar.gz
bcm5719-llvm-823b299fbce7baa6493052dd66e909201fba0f77.zip
[DAGCombine] Handle big endian correctly in CombineConsecutiveLoads
Summary: Found out, at code inspection, that there was a fault in DAGCombiner::CombineConsecutiveLoads for big-endian targets. A BUILD_PAIR is always having the least significant bits of the composite value in element 0. So when we are doing the checks for consecutive loads, for big endian targets, we should check if the load to elt 1 is at the lower address and the load to elt 0 is at the higher address. Normally this bug only resulted in missed oppurtunities for doing the load combine. I guess that in some rare situation it could lead to faulty combines, but I've not seen that happen. Note that this patch actually will trigger load combine for some big endian regression tests. One example is test/CodeGen/PowerPC/anon_aggr.ll where we now get t76: i64,ch = load<LD8[FixedStack-9] instead of t37: i32,ch = load<LD4[FixedStack-10]> t35: i32,ch = load<LD4[FixedStack-9]> t41: i64 = build_pair t37, t35 before legalization. Then the legalization will split the LD8 into two loads, so the end result is the same. That should verify that the transfomation is correct now. Reviewers: niravd, hfinkel Reviewed By: niravd Subscribers: nemanjai, llvm-commits Differential Revision: https://reviews.llvm.org/D40444 llvm-svn: 319771
-rw-r--r--llvm/include/llvm/CodeGen/ISDOpcodes.h3
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp7
-rw-r--r--llvm/test/CodeGen/PowerPC/combine_loads_from_build_pair.ll19
3 files changed, 28 insertions, 1 deletions
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index 9e4865ff2c2..d256849be9a 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -186,7 +186,8 @@ namespace ISD {
/// BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
/// Given two values of the same integer value type, this produces a value
/// twice as big. Like EXTRACT_ELEMENT, this can only be used before
- /// legalization.
+ /// legalization. The lower part of the composite value should be in
+ /// element 0 and the upper part should be in element 1.
BUILD_PAIR,
/// MERGE_VALUES - This node takes multiple discrete operands and returns
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 507ae5b5fb7..7f02a643c0a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -8622,6 +8622,13 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
+
+ // A BUILD_PAIR is always having the least significant part in elt 0 and the
+ // most significant part in elt 1. So when combining into one large load, we
+ // need to consider the endianness.
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(LD1, LD2);
+
if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
LD1->getAddressSpace() != LD2->getAddressSpace())
return SDValue();
diff --git a/llvm/test/CodeGen/PowerPC/combine_loads_from_build_pair.ll b/llvm/test/CodeGen/PowerPC/combine_loads_from_build_pair.ll
new file mode 100644
index 00000000000..df687b67e1d
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/combine_loads_from_build_pair.ll
@@ -0,0 +1,19 @@
+; RUN: llc -verify-machineinstrs -O0 -mcpu=g4 -mtriple=powerpc-apple-darwin8 < %s -debug -stop-after=machineverifier 2>&1 | FileCheck %s
+
+define i64 @func1(i64 %p1, i64 %p2, i64 %p3, i64 %p4, { i64, i8* } %struct) {
+; Verify that we get a combine on the build_pair, creating a LD8 load somewhere
+; between "Initial selection DAG" and "Optimized lowered selection DAG".
+; The target is big-endian, and stack grows towards higher addresses,
+; so we expect the LD8 to load from the address used in the original HIBITS
+; load.
+; CHECK-LABEL: Initial selection DAG:
+; CHECK-DAG: [[LOBITS:t[0-9]+]]: i32,ch = load<LD4[FixedStack-2]>
+; CHECK-DAG: [[HIBITS:t[0-9]+]]: i32,ch = load<LD4[FixedStack-1]>
+; CHECK: Combining: t{{[0-9]+}}: i64 = build_pair [[LOBITS]], [[HIBITS]]
+; CHECK-NEXT: into
+; CHECK-SAME: load<LD8[FixedStack-1]
+; CHECK-LABEL: Optimized lowered selection DAG:
+ %result = extractvalue {i64, i8* } %struct, 0
+ ret i64 %result
+}
+
OpenPOWER on IntegriCloud