summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
authorPeter Collingbourne <peter@pcc.me.uk>2018-04-23 19:09:34 +0000
committerPeter Collingbourne <peter@pcc.me.uk>2018-04-23 19:09:34 +0000
commit5ab4a4793efbe8805c3fedc8f6625ebad987ce7b (patch)
treef160a8e343181015aee8ce2b4d54dc4e1dee4794 /llvm
parentcc45e923c5c1be906ad9b56b2665b954f78c4a49 (diff)
downloadbcm5719-llvm-5ab4a4793efbe8805c3fedc8f6625ebad987ce7b.tar.gz
bcm5719-llvm-5ab4a4793efbe8805c3fedc8f6625ebad987ce7b.zip
Reland r329956, "AArch64: Introduce a DAG combine for folding offsets into addresses.", with a fix for the bot failure.
This reland includes a check to prevent the DAG combiner from folding an offset that is smaller than the existing one. This can cause oscillations between two possible DAGs, which was the cause of the hang and later assertion failure observed on the lnt-ctmark-aarch64-O3-flto bot. http://green.lab.llvm.org/green/job/lnt-ctmark-aarch64-O3-flto/2024/ Original commit message: > This is a code size win in code that takes offseted addresses > frequently, such as C++ constructors that typically need to compute > an offseted address of a vtable. This reduces the size of Chromium > for Android's .text section by 108KB. Differential Revision: https://reviews.llvm.org/D45199 llvm-svn: 330630
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp18
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp71
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-addrmode.ll31
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-vector-ldst.ll129
-rw-r--r--llvm/test/CodeGen/AArch64/fold-global-offsets.ll69
-rw-r--r--llvm/test/CodeGen/AArch64/global-merge-3.ll4
-rw-r--r--llvm/test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll11
-rw-r--r--llvm/test/CodeGen/AArch64/global-merge-ignore-single-use.ll6
8 files changed, 256 insertions, 83 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index eee59f1e719..d44eee051aa 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -743,14 +743,16 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
if (!GAN)
return true;
- const GlobalValue *GV = GAN->getGlobal();
- unsigned Alignment = GV->getAlignment();
- Type *Ty = GV->getValueType();
- if (Alignment == 0 && Ty->isSized())
- Alignment = DL.getABITypeAlignment(Ty);
-
- if (Alignment >= Size)
- return true;
+ if (GAN->getOffset() % Size == 0) {
+ const GlobalValue *GV = GAN->getGlobal();
+ unsigned Alignment = GV->getAlignment();
+ Type *Ty = GV->getValueType();
+ if (Alignment == 0 && Ty->isSized())
+ Alignment = DL.getABITypeAlignment(Ty);
+
+ if (Alignment >= Size)
+ return true;
+ }
}
if (CurDAG->isBaseWithConstantOffset(N)) {
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 19573e180ee..e12aeb46765 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -577,6 +577,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
+ setTargetDAGCombine(ISD::GlobalAddress);
+
MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 8;
MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 4;
MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 4;
@@ -3677,7 +3679,8 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
SDValue AArch64TargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty,
SelectionDAG &DAG,
unsigned Flag) const {
- return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty, 0, Flag);
+ return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty,
+ N->getOffset(), Flag);
}
SDValue AArch64TargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty,
@@ -3752,8 +3755,9 @@ SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
unsigned char OpFlags =
Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
- assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
- "unexpected offset in global node");
+ if (OpFlags != AArch64II::MO_NO_FLAG)
+ assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
+ "unexpected offset in global node");
// This also catches the large code model case for Darwin.
if ((OpFlags & AArch64II::MO_GOT) != 0) {
@@ -4991,10 +4995,8 @@ SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op,
bool AArch64TargetLowering::isOffsetFoldingLegal(
const GlobalAddressSDNode *GA) const {
- DEBUG(dbgs() << "Skipping offset folding global address: ");
- DEBUG(GA->dump());
- DEBUG(dbgs() << "AArch64 doesn't support folding offsets into global "
- "addresses\n");
+ // Offsets are folded in the DAG combine rather than here so that we can
+ // intelligently choose an offset based on the uses.
return false;
}
@@ -10617,6 +10619,59 @@ static SDValue performNVCASTCombine(SDNode *N) {
return SDValue();
}
+// If all users of the globaladdr are of the form (globaladdr + constant), find
+// the smallest constant, fold it into the globaladdr's offset and rewrite the
+// globaladdr as (globaladdr + constant) - constant.
+static SDValue performGlobalAddressCombine(SDNode *N, SelectionDAG &DAG,
+ const AArch64Subtarget *Subtarget,
+ const TargetMachine &TM) {
+ auto *GN = dyn_cast<GlobalAddressSDNode>(N);
+ if (!GN || Subtarget->ClassifyGlobalReference(GN->getGlobal(), TM) !=
+ AArch64II::MO_NO_FLAG)
+ return SDValue();
+
+ uint64_t MinOffset = -1ull;
+ for (SDNode *N : GN->uses()) {
+ if (N->getOpcode() != ISD::ADD)
+ return SDValue();
+ auto *C = dyn_cast<ConstantSDNode>(N->getOperand(0));
+ if (!C)
+ C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!C)
+ return SDValue();
+ MinOffset = std::min(MinOffset, C->getZExtValue());
+ }
+ uint64_t Offset = MinOffset + GN->getOffset();
+
+ // Require that the new offset is larger than the existing one. Otherwise, we
+ // can end up oscillating between two possible DAGs, for example,
+ // (add (add globaladdr + 10, -1), 1) and (add globaladdr + 9, 1).
+ if (Offset <= uint64_t(GN->getOffset()))
+ return SDValue();
+
+ // Check whether folding this offset is legal. It must not go out of bounds of
+ // the referenced object to avoid violating the code model, and must be
+ // smaller than 2^21 because this is the largest offset expressible in all
+ // object formats.
+ //
+ // This check also prevents us from folding negative offsets, which will end
+ // up being treated in the same way as large positive ones. They could also
+ // cause code model violations, and aren't really common enough to matter.
+ if (Offset >= (1 << 21))
+ return SDValue();
+
+ const GlobalValue *GV = GN->getGlobal();
+ Type *T = GV->getValueType();
+ if (!T->isSized() ||
+ Offset > GV->getParent()->getDataLayout().getTypeAllocSize(T))
+ return SDValue();
+
+ SDLoc DL(GN);
+ SDValue Result = DAG.getGlobalAddress(GV, DL, MVT::i64, Offset);
+ return DAG.getNode(ISD::SUB, DL, MVT::i64, Result,
+ DAG.getConstant(MinOffset, DL, MVT::i64));
+}
+
SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -10704,6 +10759,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
default:
break;
}
+ case ISD::GlobalAddress:
+ return performGlobalAddressCombine(N, DAG, Subtarget, getTargetMachine());
}
return SDValue();
}
diff --git a/llvm/test/CodeGen/AArch64/arm64-addrmode.ll b/llvm/test/CodeGen/AArch64/arm64-addrmode.ll
index 6da76792163..16f8d016063 100644
--- a/llvm/test/CodeGen/AArch64/arm64-addrmode.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-addrmode.ll
@@ -5,32 +5,31 @@
; base + offset (imm9)
; CHECK: @t1
-; CHECK: ldr xzr, [x{{[0-9]+}}, #8]
+; CHECK: ldr xzr, [x0, #8]
; CHECK: ret
-define void @t1() {
- %incdec.ptr = getelementptr inbounds i64, i64* @object, i64 1
+define void @t1(i64* %object) {
+ %incdec.ptr = getelementptr inbounds i64, i64* %object, i64 1
%tmp = load volatile i64, i64* %incdec.ptr, align 8
ret void
}
; base + offset (> imm9)
; CHECK: @t2
-; CHECK: sub [[ADDREG:x[0-9]+]], x{{[0-9]+}}, #264
+; CHECK: sub [[ADDREG:x[0-9]+]], x0, #264
; CHECK: ldr xzr, [
-; CHECK: [[ADDREG]]]
; CHECK: ret
-define void @t2() {
- %incdec.ptr = getelementptr inbounds i64, i64* @object, i64 -33
+define void @t2(i64* %object) {
+ %incdec.ptr = getelementptr inbounds i64, i64* %object, i64 -33
%tmp = load volatile i64, i64* %incdec.ptr, align 8
ret void
}
; base + unsigned offset (> imm9 and <= imm12 * size of type in bytes)
; CHECK: @t3
-; CHECK: ldr xzr, [x{{[0-9]+}}, #32760]
+; CHECK: ldr xzr, [x0, #32760]
; CHECK: ret
-define void @t3() {
- %incdec.ptr = getelementptr inbounds i64, i64* @object, i64 4095
+define void @t3(i64* %object) {
+ %incdec.ptr = getelementptr inbounds i64, i64* %object, i64 4095
%tmp = load volatile i64, i64* %incdec.ptr, align 8
ret void
}
@@ -38,10 +37,10 @@ define void @t3() {
; base + unsigned offset (> imm12 * size of type in bytes)
; CHECK: @t4
; CHECK: orr w[[NUM:[0-9]+]], wzr, #0x8000
-; CHECK: ldr xzr, [x{{[0-9]+}}, x[[NUM]]]
+; CHECK: ldr xzr, [x0, x[[NUM]]]
; CHECK: ret
-define void @t4() {
- %incdec.ptr = getelementptr inbounds i64, i64* @object, i64 4096
+define void @t4(i64* %object) {
+ %incdec.ptr = getelementptr inbounds i64, i64* %object, i64 4096
%tmp = load volatile i64, i64* %incdec.ptr, align 8
ret void
}
@@ -58,12 +57,12 @@ define void @t5(i64 %a) {
; base + reg + imm
; CHECK: @t6
-; CHECK: add [[ADDREG:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}, lsl #3
+; CHECK: add [[ADDREG:x[0-9]+]], x1, x0, lsl #3
; CHECK-NEXT: orr w[[NUM:[0-9]+]], wzr, #0x8000
; CHECK: ldr xzr, [x{{[0-9]+}}, x[[NUM]]]
; CHECK: ret
-define void @t6(i64 %a) {
- %tmp1 = getelementptr inbounds i64, i64* @object, i64 %a
+define void @t6(i64 %a, i64* %object) {
+ %tmp1 = getelementptr inbounds i64, i64* %object, i64 %a
%incdec.ptr = getelementptr inbounds i64, i64* %tmp1, i64 4096
%tmp = load volatile i64, i64* %incdec.ptr, align 8
ret void
diff --git a/llvm/test/CodeGen/AArch64/arm64-vector-ldst.ll b/llvm/test/CodeGen/AArch64/arm64-vector-ldst.ll
index 938b3d1d059..6e530cb258b 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vector-ldst.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vector-ldst.ll
@@ -264,149 +264,196 @@ entry:
; Add a bunch of tests for rdar://13258794: Match LDUR/STUR for D and Q
; registers for unscaled vector accesses
-@str = global [63 x i8] c"Test case for rdar://13258794: LDUR/STUR for D and Q registers\00", align 1
-define <1 x i64> @fct0() nounwind readonly ssp {
+define <1 x i64> @fct0(i8* %str) nounwind readonly ssp {
entry:
; CHECK-LABEL: fct0:
; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
- %0 = load <1 x i64>, <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <1 x i64>*), align 8
+ %p = getelementptr inbounds i8, i8* %str, i64 3
+ %q = bitcast i8* %p to <1 x i64>*
+ %0 = load <1 x i64>, <1 x i64>* %q, align 8
ret <1 x i64> %0
}
-define <2 x i32> @fct1() nounwind readonly ssp {
+define <2 x i32> @fct1(i8* %str) nounwind readonly ssp {
entry:
; CHECK-LABEL: fct1:
; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
- %0 = load <2 x i32>, <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i32>*), align 8
+ %p = getelementptr inbounds i8, i8* %str, i64 3
+ %q = bitcast i8* %p to <2 x i32>*
+ %0 = load <2 x i32>, <2 x i32>* %q, align 8
ret <2 x i32> %0
}
-define <4 x i16> @fct2() nounwind readonly ssp {
+define <4 x i16> @fct2(i8* %str) nounwind readonly ssp {
entry:
; CHECK-LABEL: fct2:
; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
- %0 = load <4 x i16>, <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i16>*), align 8
+ %p = getelementptr inbounds i8, i8* %str, i64 3
+ %q = bitcast i8* %p to <4 x i16>*
+ %0 = load <4 x i16>, <4 x i16>* %q, align 8
ret <4 x i16> %0
}
-define <8 x i8> @fct3() nounwind readonly ssp {
+define <8 x i8> @fct3(i8* %str) nounwind readonly ssp {
entry:
; CHECK-LABEL: fct3:
; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
- %0 = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i8>*), align 8
+ %p = getelementptr inbounds i8, i8* %str, i64 3
+ %q = bitcast i8* %p to <8 x i8>*
+ %0 = load <8 x i8>, <8 x i8>* %q, align 8
ret <8 x i8> %0
}
-define <2 x i64> @fct4() nounwind readonly ssp {
+define <2 x i64> @fct4(i8* %str) nounwind readonly ssp {
entry:
; CHECK-LABEL: fct4:
; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
- %0 = load <2 x i64>, <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i64>*), align 16
+ %p = getelementptr inbounds i8, i8* %str, i64 3
+ %q = bitcast i8* %p to <2 x i64>*
+ %0 = load <2 x i64>, <2 x i64>* %q, align 16
ret <2 x i64> %0
}
-define <4 x i32> @fct5() nounwind readonly ssp {
+define <4 x i32> @fct5(i8* %str) nounwind readonly ssp {
entry:
; CHECK-LABEL: fct5:
; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
- %0 = load <4 x i32>, <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i32>*), align 16
+ %p = getelementptr inbounds i8, i8* %str, i64 3
+ %q = bitcast i8* %p to <4 x i32>*
+ %0 = load <4 x i32>, <4 x i32>* %q, align 16
ret <4 x i32> %0
}
-define <8 x i16> @fct6() nounwind readonly ssp {
+define <8 x i16> @fct6(i8* %str) nounwind readonly ssp {
entry:
; CHECK-LABEL: fct6:
; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
- %0 = load <8 x i16>, <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i16>*), align 16
+ %p = getelementptr inbounds i8, i8* %str, i64 3
+ %q = bitcast i8* %p to <8 x i16>*
+ %0 = load <8 x i16>, <8 x i16>* %q, align 16
ret <8 x i16> %0
}
-define <16 x i8> @fct7() nounwind readonly ssp {
+define <16 x i8> @fct7(i8* %str) nounwind readonly ssp {
entry:
; CHECK-LABEL: fct7:
; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
- %0 = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <16 x i8>*), align 16
+ %p = getelementptr inbounds i8, i8* %str, i64 3
+ %q = bitcast i8* %p to <16 x i8>*
+ %0 = load <16 x i8>, <16 x i8>* %q, align 16
ret <16 x i8> %0
}
-define void @fct8() nounwind ssp {
+define void @fct8(i8* %str) nounwind ssp {
entry:
; CHECK-LABEL: fct8:
; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
- %0 = load <1 x i64>, <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <1 x i64>*), align 8
- store <1 x i64> %0, <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <1 x i64>*), align 8
+ %p = getelementptr inbounds i8, i8* %str, i64 3
+ %q = bitcast i8* %p to <1 x i64>*
+ %0 = load <1 x i64>, <1 x i64>* %q, align 8
+ %p2 = getelementptr inbounds i8, i8* %str, i64 4
+ %q2 = bitcast i8* %p2 to <1 x i64>*
+ store <1 x i64> %0, <1 x i64>* %q2, align 8
ret void
}
-define void @fct9() nounwind ssp {
+define void @fct9(i8* %str) nounwind ssp {
entry:
; CHECK-LABEL: fct9:
; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
- %0 = load <2 x i32>, <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i32>*), align 8
- store <2 x i32> %0, <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <2 x i32>*), align 8
+ %p = getelementptr inbounds i8, i8* %str, i64 3
+ %q = bitcast i8* %p to <2 x i32>*
+ %0 = load <2 x i32>, <2 x i32>* %q, align 8
+ %p2 = getelementptr inbounds i8, i8* %str, i64 4
+ %q2 = bitcast i8* %p2 to <2 x i32>*
+ store <2 x i32> %0, <2 x i32>* %q2, align 8
ret void
}
-define void @fct10() nounwind ssp {
+define void @fct10(i8* %str) nounwind ssp {
entry:
; CHECK-LABEL: fct10:
; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
- %0 = load <4 x i16>, <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i16>*), align 8
- store <4 x i16> %0, <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <4 x i16>*), align 8
+ %p = getelementptr inbounds i8, i8* %str, i64 3
+ %q = bitcast i8* %p to <4 x i16>*
+ %0 = load <4 x i16>, <4 x i16>* %q, align 8
+ %p2 = getelementptr inbounds i8, i8* %str, i64 4
+ %q2 = bitcast i8* %p2 to <4 x i16>*
+ store <4 x i16> %0, <4 x i16>* %q2, align 8
ret void
}
-define void @fct11() nounwind ssp {
+define void @fct11(i8* %str) nounwind ssp {
entry:
; CHECK-LABEL: fct11:
; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
- %0 = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i8>*), align 8
- store <8 x i8> %0, <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <8 x i8>*), align 8
+ %p = getelementptr inbounds i8, i8* %str, i64 3
+ %q = bitcast i8* %p to <8 x i8>*
+ %0 = load <8 x i8>, <8 x i8>* %q, align 8
+ %p2 = getelementptr inbounds i8, i8* %str, i64 4
+ %q2 = bitcast i8* %p2 to <8 x i8>*
+ store <8 x i8> %0, <8 x i8>* %q2, align 8
ret void
}
-define void @fct12() nounwind ssp {
+define void @fct12(i8* %str) nounwind ssp {
entry:
; CHECK-LABEL: fct12:
; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
- %0 = load <2 x i64>, <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i64>*), align 16
- store <2 x i64> %0, <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <2 x i64>*), align 16
+ %p = getelementptr inbounds i8, i8* %str, i64 3
+ %q = bitcast i8* %p to <2 x i64>*
+ %0 = load <2 x i64>, <2 x i64>* %q, align 16
+ %p2 = getelementptr inbounds i8, i8* %str, i64 4
+ %q2 = bitcast i8* %p2 to <2 x i64>*
+ store <2 x i64> %0, <2 x i64>* %q2, align 16
ret void
}
-define void @fct13() nounwind ssp {
+define void @fct13(i8* %str) nounwind ssp {
entry:
; CHECK-LABEL: fct13:
; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
- %0 = load <4 x i32>, <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i32>*), align 16
- store <4 x i32> %0, <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <4 x i32>*), align 16
+ %p = getelementptr inbounds i8, i8* %str, i64 3
+ %q = bitcast i8* %p to <4 x i32>*
+ %0 = load <4 x i32>, <4 x i32>* %q, align 16
+ %p2 = getelementptr inbounds i8, i8* %str, i64 4
+ %q2 = bitcast i8* %p2 to <4 x i32>*
+ store <4 x i32> %0, <4 x i32>* %q2, align 16
ret void
}
-define void @fct14() nounwind ssp {
+define void @fct14(i8* %str) nounwind ssp {
entry:
; CHECK-LABEL: fct14:
; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
- %0 = load <8 x i16>, <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i16>*), align 16
- store <8 x i16> %0, <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <8 x i16>*), align 16
+ %p = getelementptr inbounds i8, i8* %str, i64 3
+ %q = bitcast i8* %p to <8 x i16>*
+ %0 = load <8 x i16>, <8 x i16>* %q, align 16
+ %p2 = getelementptr inbounds i8, i8* %str, i64 4
+ %q2 = bitcast i8* %p2 to <8 x i16>*
+ store <8 x i16> %0, <8 x i16>* %q2, align 16
ret void
}
-define void @fct15() nounwind ssp {
+define void @fct15(i8* %str) nounwind ssp {
entry:
; CHECK-LABEL: fct15:
; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
- %0 = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <16 x i8>*), align 16
- store <16 x i8> %0, <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <16 x i8>*), align 16
+ %p = getelementptr inbounds i8, i8* %str, i64 3
+ %q = bitcast i8* %p to <16 x i8>*
+ %0 = load <16 x i8>, <16 x i8>* %q, align 16
+ %p2 = getelementptr inbounds i8, i8* %str, i64 4
+ %q2 = bitcast i8* %p2 to <16 x i8>*
+ store <16 x i8> %0, <16 x i8>* %q2, align 16
ret void
}
diff --git a/llvm/test/CodeGen/AArch64/fold-global-offsets.ll b/llvm/test/CodeGen/AArch64/fold-global-offsets.ll
new file mode 100644
index 00000000000..ffcdc2bee5f
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/fold-global-offsets.ll
@@ -0,0 +1,69 @@
+; RUN: llc < %s -mtriple=arm64-linux-gnu | FileCheck %s
+
+@x1 = external hidden global [2 x i64]
+@x2 = external hidden global [16777216 x i64]
+@x3 = external hidden global { [9 x i8*], [8 x i8*] }
+
+define i64 @f1() {
+ ; CHECK: f1:
+ ; CHECK: adrp x8, x1+16
+ ; CHECK: ldr x0, [x8, :lo12:x1+16]
+ %l = load i64, i64* getelementptr ([2 x i64], [2 x i64]* @x1, i64 0, i64 2)
+ ret i64 %l
+}
+
+define i64 @f2() {
+ ; CHECK: f2:
+ ; CHECK: adrp x8, x1
+ ; CHECK: add x8, x8, :lo12:x1
+ ; CHECK: ldr x0, [x8, #24]
+ %l = load i64, i64* getelementptr ([2 x i64], [2 x i64]* @x1, i64 0, i64 3)
+ ret i64 %l
+}
+
+define i64 @f3() {
+ ; CHECK: f3:
+ ; CHECK: adrp x8, x1+1
+ ; CHECK: add x8, x8, :lo12:x1+1
+ ; CHECK: ldr x0, [x8]
+ %l = load i64, i64* bitcast (i8* getelementptr (i8, i8* bitcast ([2 x i64]* @x1 to i8*), i64 1) to i64*)
+ ret i64 %l
+}
+
+define [2 x i64] @f4() {
+ ; CHECK: f4:
+ ; CHECK: adrp x8, x2+8
+ ; CHECK: add x8, x8, :lo12:x2+8
+ ; CHECK: ldp x0, x1, [x8]
+ %l = load [2 x i64], [2 x i64]* bitcast (i8* getelementptr (i8, i8* bitcast ([16777216 x i64]* @x2 to i8*), i64 8) to [2 x i64]*)
+ ret [2 x i64] %l
+}
+
+define i64 @f5() {
+ ; CHECK: f5:
+ ; CHECK: adrp x8, x2+2097144
+ ; CHECK: ldr x0, [x8, :lo12:x2+2097144]
+ ; CHECK: ret
+ %l = load i64, i64* getelementptr ([16777216 x i64], [16777216 x i64]* @x2, i64 0, i64 262143)
+ ret i64 %l
+}
+
+define i64 @f6() {
+ ; CHECK: f6:
+ ; CHECK: adrp x8, x2
+ ; CHECK: add x8, x8, :lo12:x2
+ ; CHECK: orr w9, wzr, #0x200000
+ ; CHECK: ldr x0, [x8, x9]
+ ; CHECK: ret
+ %l = load i64, i64* getelementptr ([16777216 x i64], [16777216 x i64]* @x2, i64 0, i64 262144)
+ ret i64 %l
+}
+
+define i32 @f7() {
+entry:
+ ; CHECK: f7
+ ; CHECK: adrp x8, x3+108
+ ; CHECK: ldr w0, [x8, :lo12:x3+108]
+ %l = load i32, i32* getelementptr (i32, i32* inttoptr (i64 trunc (i128 lshr (i128 bitcast (<2 x i64> <i64 undef, i64 ptrtoint (i8** getelementptr inbounds ({ [9 x i8*], [8 x i8*] }, { [9 x i8*], [8 x i8*] }* @x3, i64 0, inrange i32 1, i64 2) to i64)> to i128), i128 64) to i64) to i32*), i64 5)
+ ret i32 %l
+}
diff --git a/llvm/test/CodeGen/AArch64/global-merge-3.ll b/llvm/test/CodeGen/AArch64/global-merge-3.ll
index 106d6da4a4a..4844d9622fe 100644
--- a/llvm/test/CodeGen/AArch64/global-merge-3.ll
+++ b/llvm/test/CodeGen/AArch64/global-merge-3.ll
@@ -10,8 +10,8 @@ define void @f1(i32 %a1, i32 %a2, i32 %a3) {
;CHECK-APPLE-IOS: adrp x8, __MergedGlobals_x@PAGE
;CHECK-APPLE-IOS-NOT: adrp
;CHECK-APPLE-IOS: add x8, x8, __MergedGlobals_x@PAGEOFF
-;CHECK-APPLE-IOS: adrp x9, __MergedGlobals_y@PAGE
-;CHECK-APPLE-IOS: add x9, x9, __MergedGlobals_y@PAGEOFF
+;CHECK-APPLE-IOS: adrp x9, __MergedGlobals_y@PAGE+12
+;CHECK-APPLE-IOS: str w1, [x9, __MergedGlobals_y@PAGEOFF+12]
%x3 = getelementptr inbounds [1000 x i32], [1000 x i32]* @x, i32 0, i64 3
%y3 = getelementptr inbounds [1000 x i32], [1000 x i32]* @y, i32 0, i64 3
store i32 %a1, i32* %x3, align 4
diff --git a/llvm/test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll b/llvm/test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll
index 1c1b4f6b045..8207f8cbef0 100644
--- a/llvm/test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll
+++ b/llvm/test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll
@@ -44,9 +44,9 @@ define void @f2(i32 %a1, i32 %a2) nounwind {
; CHECK-LABEL: f3:
define void @f3(i32 %a1, i32 %a2) minsize nounwind {
-; CHECK-NEXT: adrp x8, [[SET]]@PAGE
-; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF
-; CHECK-NEXT: stp w0, w1, [x8, #8]
+; CHECK-NEXT: adrp x8, [[SET]]@PAGE+8
+; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF+8
+; CHECK-NEXT: stp w0, w1, [x8]
; CHECK-NEXT: ret
store i32 %a1, i32* @m3, align 4
store i32 %a2, i32* @n3, align 4
@@ -57,10 +57,9 @@ define void @f3(i32 %a1, i32 %a2) minsize nounwind {
; CHECK-LABEL: f4:
define void @f4(i32 %a1, i32 %a2) nounwind {
-; CHECK-NEXT: adrp x8, [[SET]]@PAGE
-; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF
+; CHECK-NEXT: adrp x8, [[SET]]@PAGE+8
; CHECK-NEXT: adrp x9, _n4@PAGE
-; CHECK-NEXT: str w0, [x8, #8]
+; CHECK-NEXT: str w0, [x8, [[SET]]@PAGEOFF+8]
; CHECK-NEXT: str w1, [x9, _n4@PAGEOFF]
; CHECK-NEXT: ret
store i32 %a1, i32* @m3, align 4
diff --git a/llvm/test/CodeGen/AArch64/global-merge-ignore-single-use.ll b/llvm/test/CodeGen/AArch64/global-merge-ignore-single-use.ll
index 97e283c972a..b3b8406b763 100644
--- a/llvm/test/CodeGen/AArch64/global-merge-ignore-single-use.ll
+++ b/llvm/test/CodeGen/AArch64/global-merge-ignore-single-use.ll
@@ -38,9 +38,9 @@ define void @f2(i32 %a1, i32 %a2, i32 %a3) #0 {
; CHECK-LABEL: f3:
define void @f3(i32 %a1, i32 %a2) #0 {
-; CHECK-NEXT: adrp x8, [[SET]]@PAGE
-; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF
-; CHECK-NEXT: stp w0, w1, [x8, #12]
+; CHECK-NEXT: adrp x8, [[SET]]@PAGE+12
+; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF+12
+; CHECK-NEXT: stp w0, w1, [x8]
; CHECK-NEXT: ret
store i32 %a1, i32* @m2, align 4
store i32 %a2, i32* @n2, align 4
OpenPOWER on IntegriCloud