summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBjorn Pettersson <bjorn.a.pettersson@ericsson.com>2018-10-30 20:16:39 +0000
committerBjorn Pettersson <bjorn.a.pettersson@ericsson.com>2018-10-30 20:16:39 +0000
commitfe09a20f09a290274ea81d1703d4106e06a11f94 (patch)
treedd011f65def4053346cb66eb78efb297ca17f32d
parentfa03c690bd55e58fe8c8b13a9b4525e8b8cb2c9f (diff)
downloadbcm5719-llvm-fe09a20f09a290274ea81d1703d4106e06a11f94.tar.gz
bcm5719-llvm-fe09a20f09a290274ea81d1703d4106e06a11f94.zip
[DAGCombiner] Fix for big endian in ForwardStoreValueToDirectLoad
Summary: Normalize the offset for endianess before checking if the store cover the load in ForwardStoreValueToDirectLoad. Without this we missed out on some optimizations for big endian targets. If for example having a 4 bytes store followed by a 1 byte load, loading the least significant byte from the store, the STCoversLD check would fail (see @test4 in test/CodeGen/AArch64/load-store-forwarding.ll). This patch also fixes a problem seen in an out-of-tree target. The target has i40 as a legal type, it is big endian, and the StoreSize for i40 is 48 bits. So when normalizing the offset for endianess we need to take the StoreSize into account (assuming that padding added when storing into a larger StoreSize always is added at the most significant end). Reviewers: niravd Reviewed By: niravd Subscribers: javed.absar, kristof.beyls, llvm-commits, uabelho Differential Revision: https://reviews.llvm.org/D53776 llvm-svn: 345636
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp22
-rw-r--r--llvm/test/CodeGen/AArch64/load-store-forwarding.ll77
2 files changed, 90 insertions, 9 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 742ca02a03d..fba2aa9cb52 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -12854,20 +12854,24 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
int64_t Offset;
+ if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
+ return SDValue();
+
+ // Normalize for Endianness. After this Offset=0 will denote that the least
+ // significant bit in the loaded value maps to the least significant bit in
+ // the stored value). With Offset=n (for n > 0) the loaded value starts at the
+ // n:th least significant byte of the stored value.
+ if (DAG.getDataLayout().isBigEndian())
+ Offset = (STMemType.getStoreSizeInBits() -
+ LDMemType.getStoreSizeInBits()) / 8 - Offset;
+ // Check that the stored value cover all bits that are loaded.
bool STCoversLD =
- BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset) && (Offset >= 0) &&
- (Offset * 8 <= LDMemType.getSizeInBits()) &&
+ (Offset >= 0) &&
(Offset * 8 + LDMemType.getSizeInBits() <= STMemType.getSizeInBits());
-
if (!STCoversLD)
return SDValue();
- // Normalize for Endianness.
- if (DAG.getDataLayout().isBigEndian())
- Offset =
- (STMemType.getSizeInBits() - LDMemType.getSizeInBits()) / 8 - Offset;
-
// Memory as copy space (potentially masked).
if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
// Simple case: Direct non-truncating forwarding
@@ -12899,7 +12903,7 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
continue;
if (STMemType != LDMemType) {
// TODO: Support vectors? This requires extract_subvector/bitcast.
- if (!STMemType.isVector() && !LDMemType.isVector() &&
+ if (!STMemType.isVector() && !LDMemType.isVector() &&
STMemType.isInteger() && LDMemType.isInteger())
Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
else
diff --git a/llvm/test/CodeGen/AArch64/load-store-forwarding.ll b/llvm/test/CodeGen/AArch64/load-store-forwarding.ll
new file mode 100644
index 00000000000..e6124270169
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/load-store-forwarding.ll
@@ -0,0 +1,77 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64_be -o - %s | FileCheck %s --check-prefix CHECK-BE
+; RUN: llc -mtriple=aarch64 -o - %s | FileCheck %s --check-prefix CHECK-LE
+
+define i8 @test1(i32 %a, i8* %pa) {
+; CHECK-BE-LABEL: test1:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: str w0, [x1]
+; CHECK-BE-NEXT: ldrb w0, [x1]
+; CHECK-BE-NEXT: ret
+;
+; CHECK-LE-LABEL: test1:
+; CHECK-LE: // %bb.0:
+; CHECK-LE-NEXT: str w0, [x1]
+; CHECK-LE-NEXT: ret
+ %p32 = bitcast i8* %pa to i32*
+ %p8 = getelementptr i8, i8* %pa, i32 0
+ store i32 %a, i32* %p32
+ %res = load i8, i8* %p8
+ ret i8 %res
+}
+
+define i8 @test2(i32 %a, i8* %pa) {
+; CHECK-BE-LABEL: test2:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: str w0, [x1]
+; CHECK-BE-NEXT: ldrb w0, [x1, #1]
+; CHECK-BE-NEXT: ret
+;
+; CHECK-LE-LABEL: test2:
+; CHECK-LE: // %bb.0:
+; CHECK-LE-NEXT: str w0, [x1]
+; CHECK-LE-NEXT: ubfx w0, w0, #8, #8
+; CHECK-LE-NEXT: ret
+ %p32 = bitcast i8* %pa to i32*
+ %p8 = getelementptr i8, i8* %pa, i32 1
+ store i32 %a, i32* %p32
+ %res = load i8, i8* %p8
+ ret i8 %res
+}
+
+define i8 @test3(i32 %a, i8* %pa) {
+; CHECK-BE-LABEL: test3:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: str w0, [x1]
+; CHECK-BE-NEXT: ldrb w0, [x1, #2]
+; CHECK-BE-NEXT: ret
+;
+; CHECK-LE-LABEL: test3:
+; CHECK-LE: // %bb.0:
+; CHECK-LE-NEXT: str w0, [x1]
+; CHECK-LE-NEXT: ubfx w0, w0, #16, #8
+; CHECK-LE-NEXT: ret
+ %p32 = bitcast i8* %pa to i32*
+ %p8 = getelementptr i8, i8* %pa, i32 2
+ store i32 %a, i32* %p32
+ %res = load i8, i8* %p8
+ ret i8 %res
+}
+
+define i8 @test4(i32 %a, i8* %pa) {
+; CHECK-BE-LABEL: test4:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: str w0, [x1]
+; CHECK-BE-NEXT: ret
+;
+; CHECK-LE-LABEL: test4:
+; CHECK-LE: // %bb.0:
+; CHECK-LE-NEXT: str w0, [x1]
+; CHECK-LE-NEXT: lsr w0, w0, #24
+; CHECK-LE-NEXT: ret
+ %p32 = bitcast i8* %pa to i32*
+ %p8 = getelementptr i8, i8* %pa, i32 3
+ store i32 %a, i32* %p32
+ %res = load i8, i8* %p8
+ ret i8 %res
+}
OpenPOWER on IntegriCloud