diff options
| author | Peter Collingbourne <peter@pcc.me.uk> | 2018-04-09 19:59:57 +0000 | 
|---|---|---|
| committer | Peter Collingbourne <peter@pcc.me.uk> | 2018-04-09 19:59:57 +0000 | 
| commit | 5cff2409aef90dd85331735e65b87f3118766c7c (patch) | |
| tree | 19a5a09ed4848c0d98fd77b3efab37268b4be5c2 | |
| parent | 79f2c720b57833e2046eb3d8dd30899aee007f78 (diff) | |
| download | bcm5719-llvm-5cff2409aef90dd85331735e65b87f3118766c7c.tar.gz bcm5719-llvm-5cff2409aef90dd85331735e65b87f3118766c7c.zip  | |
AArch64: Allow offsets to be folded into addresses with ELF.
This is a code size win in code that takes offseted addresses
frequently, such as C++ constructors that typically need to compute
an offseted address of a vtable. It reduces the size of Chromium for
Android's .text section by 46KB, or 56KB with ThinLTO (which exposes
more opportunities to use a direct access rather than a GOT access).
Because the addend range is limited in COFF and Mach-O, this is
enabled for ELF only.
Differential Revision: https://reviews.llvm.org/D45199
llvm-svn: 329611
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | 18 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 23 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AArch64/arm64-2012-05-07-MemcpyAlignBug.ll | 13 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AArch64/arm64-addrmode.ll | 31 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AArch64/arm64-atomic-128.ll | 24 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AArch64/arm64-memcpy-inline.ll | 28 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AArch64/arm64-misched-multimmo.ll | 12 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AArch64/arm64-variadic-aapcs.ll | 44 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AArch64/arm64-vector-ldst.ll | 129 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AArch64/cmpxchg-O0.ll | 9 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AArch64/func-argpassing.ll | 8 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AArch64/func-calls.ll | 14 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AArch64/misched-fusion-addr.ll | 6 | 
13 files changed, 215 insertions, 144 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index eee59f1e719..d44eee051aa 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -743,14 +743,16 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,      if (!GAN)        return true; -    const GlobalValue *GV = GAN->getGlobal(); -    unsigned Alignment = GV->getAlignment(); -    Type *Ty = GV->getValueType(); -    if (Alignment == 0 && Ty->isSized()) -      Alignment = DL.getABITypeAlignment(Ty); - -    if (Alignment >= Size) -      return true; +    if (GAN->getOffset() % Size == 0) { +      const GlobalValue *GV = GAN->getGlobal(); +      unsigned Alignment = GV->getAlignment(); +      Type *Ty = GV->getValueType(); +      if (Alignment == 0 && Ty->isSized()) +        Alignment = DL.getABITypeAlignment(Ty); + +      if (Alignment >= Size) +        return true; +    }    }    if (CurDAG->isBaseWithConstantOffset(N)) { diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 28edcc2e47b..546da27cc41 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -3675,7 +3675,8 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,  SDValue AArch64TargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty,                                               SelectionDAG &DAG,                                               unsigned Flag) const { -  return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty, 0, Flag); +  return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty, +                                    N->getOffset(), Flag);  }  SDValue AArch64TargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty, @@ -3749,9 +3750,9 @@ SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,                                        : AArch64II::MO_NO_FLAG);    unsigned char OpFlags =        Subtarget->ClassifyGlobalReference(GV, getTargetMachine()); - -  assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 && -         "unexpected offset in global node"); +  if (OpFlags != AArch64II::MO_NO_FLAG) +    assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 && +           "unexpected offset in global node");    // This also catches the large code model case for Darwin.    if ((OpFlags & AArch64II::MO_GOT) != 0) { @@ -4989,11 +4990,15 @@ SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op,  bool AArch64TargetLowering::isOffsetFoldingLegal(      const GlobalAddressSDNode *GA) const { -  DEBUG(dbgs() << "Skipping offset folding global address: "); -  DEBUG(GA->dump()); -  DEBUG(dbgs() << "AArch64 doesn't support folding offsets into global " -        "addresses\n"); -  return false; +  // FIXME: Only ELF can represent the full range of possible addends here, as +  // the format stores the addend in a 64-bit field. With Mach-O the equivalent +  // field is 24 bits, and with COFF it is 21 bits. To make this work with the +  // other object formats we will need to arrange to prevent the addend from +  // going out of bounds. +  if (!getTargetMachine().getTargetTriple().isOSBinFormatELF()) +    return false; +  return Subtarget->ClassifyGlobalReference( +             GA->getGlobal(), getTargetMachine()) == AArch64II::MO_NO_FLAG;  }  bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { diff --git a/llvm/test/CodeGen/AArch64/arm64-2012-05-07-MemcpyAlignBug.ll b/llvm/test/CodeGen/AArch64/arm64-2012-05-07-MemcpyAlignBug.ll index 2b6cd7c2d28..b00425a6d02 100644 --- a/llvm/test/CodeGen/AArch64/arm64-2012-05-07-MemcpyAlignBug.ll +++ b/llvm/test/CodeGen/AArch64/arm64-2012-05-07-MemcpyAlignBug.ll @@ -6,12 +6,13 @@  ; The important thing for this test is that we need an unaligned load of `l_b'  ; ("ldr w2, [x1, #8]" in this case). -; CHECK:      adrp x[[PAGE:[0-9]+]], {{l_b@PAGE|.Lb}} -; CHECK: add  x[[ADDR:[0-9]+]], x[[PAGE]], {{l_b@PAGEOFF|:lo12:.Lb}} -; CHECK-NEXT: ldr  [[VAL:w[0-9]+]], [x[[ADDR]], #8] -; CHECK-NEXT: str  [[VAL]], [x0, #8] -; CHECK-NEXT: ldr  [[VAL2:x[0-9]+]], [x[[ADDR]]] -; CHECK-NEXT: str  [[VAL2]], [x0] +; CHECK:      adrp    x[[PAGE:[0-9]+]], .Lb+8 +; CHECK-NEXT: ldr     [[VAL:w[0-9]+]], [x[[PAGE]], :lo12:.Lb+8] +; CHECK-NEXT: str     [[VAL]], [x0, #8] +; CHECK-NEXT: adrp    x[[ADDR:[0-9]+]], .Lb +; CHECK-NEXT: add     x[[ADDR]], x[[ADDR]], :lo12:.Lb +; CHECK-NEXT: ldr     [[VAL2:x[0-9]+]], [x[[ADDR]]] +; CHECK-NEXT: str     [[VAL2]], [x0]  define void @foo(i8* %a) {    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %a, i8* align 4 bitcast ([3 x i32]* @b to i8*), i64 12, i1 false) diff --git a/llvm/test/CodeGen/AArch64/arm64-addrmode.ll b/llvm/test/CodeGen/AArch64/arm64-addrmode.ll index 6da76792163..16f8d016063 100644 --- a/llvm/test/CodeGen/AArch64/arm64-addrmode.ll +++ b/llvm/test/CodeGen/AArch64/arm64-addrmode.ll @@ -5,32 +5,31 @@  ; base + offset (imm9)  ; CHECK: @t1 -; CHECK: ldr xzr, [x{{[0-9]+}}, #8] +; CHECK: ldr xzr, [x0, #8]  ; CHECK: ret -define void @t1() { -  %incdec.ptr = getelementptr inbounds i64, i64* @object, i64 1 +define void @t1(i64* %object) { +  %incdec.ptr = getelementptr inbounds i64, i64* %object, i64 1    %tmp = load volatile i64, i64* %incdec.ptr, align 8    ret void  }  ; base + offset (> imm9)  ; CHECK: @t2 -; CHECK: sub [[ADDREG:x[0-9]+]], x{{[0-9]+}}, #264 +; CHECK: sub [[ADDREG:x[0-9]+]], x0, #264  ; CHECK: ldr xzr, [ -; CHECK: [[ADDREG]]]  ; CHECK: ret -define void @t2() { -  %incdec.ptr = getelementptr inbounds i64, i64* @object, i64 -33 +define void @t2(i64* %object) { +  %incdec.ptr = getelementptr inbounds i64, i64* %object, i64 -33    %tmp = load volatile i64, i64* %incdec.ptr, align 8    ret void  }  ; base + unsigned offset (> imm9 and <= imm12 * size of type in bytes)  ; CHECK: @t3 -; CHECK: ldr xzr, [x{{[0-9]+}}, #32760] +; CHECK: ldr xzr, [x0, #32760]  ; CHECK: ret -define void @t3() { -  %incdec.ptr = getelementptr inbounds i64, i64* @object, i64 4095 +define void @t3(i64* %object) { +  %incdec.ptr = getelementptr inbounds i64, i64* %object, i64 4095    %tmp = load volatile i64, i64* %incdec.ptr, align 8    ret void  } @@ -38,10 +37,10 @@ define void @t3() {  ; base + unsigned offset (> imm12 * size of type in bytes)  ; CHECK: @t4  ; CHECK: orr w[[NUM:[0-9]+]], wzr, #0x8000 -; CHECK: ldr xzr, [x{{[0-9]+}}, x[[NUM]]] +; CHECK: ldr xzr, [x0, x[[NUM]]]  ; CHECK: ret -define void @t4() { -  %incdec.ptr = getelementptr inbounds i64, i64* @object, i64 4096 +define void @t4(i64* %object) { +  %incdec.ptr = getelementptr inbounds i64, i64* %object, i64 4096    %tmp = load volatile i64, i64* %incdec.ptr, align 8    ret void  } @@ -58,12 +57,12 @@ define void @t5(i64 %a) {  ; base + reg + imm  ; CHECK: @t6 -; CHECK: add [[ADDREG:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}, lsl #3 +; CHECK: add [[ADDREG:x[0-9]+]], x1, x0, lsl #3  ; CHECK-NEXT: orr w[[NUM:[0-9]+]], wzr, #0x8000  ; CHECK: ldr xzr, [x{{[0-9]+}}, x[[NUM]]]  ; CHECK: ret -define void @t6(i64 %a) { -  %tmp1 = getelementptr inbounds i64, i64* @object, i64 %a +define void @t6(i64 %a, i64* %object) { +  %tmp1 = getelementptr inbounds i64, i64* %object, i64 %a    %incdec.ptr = getelementptr inbounds i64, i64* %tmp1, i64 4096    %tmp = load volatile i64, i64* %incdec.ptr, align 8    ret void diff --git a/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll b/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll index 21e3c768ee6..622d5172a2c 100644 --- a/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll +++ b/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll @@ -29,7 +29,8 @@ define void @fetch_and_nand(i128* %p, i128 %bits) {  ; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]  ; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]] -; CHECK-DAG: stp    [[DEST_REGLO]], [[DEST_REGHI]] +; CHECK-DAG: str    [[DEST_REGLO]], [{{.*}}, :lo12:var] +; CHECK-DAG: str    [[DEST_REGHI]], [{{.*}}, :lo12:var+8]    %val = atomicrmw nand i128* %p, i128 %bits release    store i128 %val, i128* @var, align 16    ret void @@ -44,7 +45,8 @@ define void @fetch_and_or(i128* %p, i128 %bits) {  ; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]  ; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]] -; CHECK-DAG: stp    [[DEST_REGLO]], [[DEST_REGHI]] +; CHECK-DAG: str    [[DEST_REGLO]], [{{.*}}, :lo12:var] +; CHECK-DAG: str    [[DEST_REGHI]], [{{.*}}, :lo12:var+8]    %val = atomicrmw or i128* %p, i128 %bits seq_cst    store i128 %val, i128* @var, align 16    ret void @@ -59,7 +61,8 @@ define void @fetch_and_add(i128* %p, i128 %bits) {  ; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]  ; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]] -; CHECK-DAG: stp    [[DEST_REGLO]], [[DEST_REGHI]] +; CHECK-DAG: str    [[DEST_REGLO]], [{{.*}}, :lo12:var] +; CHECK-DAG: str    [[DEST_REGHI]], [{{.*}}, :lo12:var+8]    %val = atomicrmw add i128* %p, i128 %bits seq_cst    store i128 %val, i128* @var, align 16    ret void @@ -74,7 +77,8 @@ define void @fetch_and_sub(i128* %p, i128 %bits) {  ; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]  ; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]] -; CHECK-DAG: stp    [[DEST_REGLO]], [[DEST_REGHI]] +; CHECK-DAG: str    [[DEST_REGLO]], [{{.*}}, :lo12:var] +; CHECK-DAG: str    [[DEST_REGHI]], [{{.*}}, :lo12:var+8]    %val = atomicrmw sub i128* %p, i128 %bits seq_cst    store i128 %val, i128* @var, align 16    ret void @@ -95,7 +99,8 @@ define void @fetch_and_min(i128* %p, i128 %bits) {  ; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]  ; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]] -; CHECK-DAG: stp    [[DEST_REGLO]], [[DEST_REGHI]] +; CHECK-DAG: str    [[DEST_REGLO]], [{{.*}}, :lo12:var] +; CHECK-DAG: str    [[DEST_REGHI]], [{{.*}}, :lo12:var+8]    %val = atomicrmw min i128* %p, i128 %bits seq_cst    store i128 %val, i128* @var, align 16    ret void @@ -116,7 +121,8 @@ define void @fetch_and_max(i128* %p, i128 %bits) {  ; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]  ; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]] -; CHECK-DAG: stp    [[DEST_REGLO]], [[DEST_REGHI]] +; CHECK-DAG: str    [[DEST_REGLO]], [{{.*}}, :lo12:var] +; CHECK-DAG: str    [[DEST_REGHI]], [{{.*}}, :lo12:var+8]    %val = atomicrmw max i128* %p, i128 %bits seq_cst    store i128 %val, i128* @var, align 16    ret void @@ -137,7 +143,8 @@ define void @fetch_and_umin(i128* %p, i128 %bits) {  ; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]  ; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]] -; CHECK-DAG: stp    [[DEST_REGLO]], [[DEST_REGHI]] +; CHECK-DAG: str    [[DEST_REGLO]], [{{.*}}, :lo12:var] +; CHECK-DAG: str    [[DEST_REGHI]], [{{.*}}, :lo12:var+8]    %val = atomicrmw umin i128* %p, i128 %bits seq_cst    store i128 %val, i128* @var, align 16    ret void @@ -158,7 +165,8 @@ define void @fetch_and_umax(i128* %p, i128 %bits) {  ; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]  ; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]] -; CHECK-DAG: stp    [[DEST_REGLO]], [[DEST_REGHI]] +; CHECK-DAG: str    [[DEST_REGLO]], [{{.*}}, :lo12:var] +; CHECK-DAG: str    [[DEST_REGHI]], [{{.*}}, :lo12:var+8]    %val = atomicrmw umax i128* %p, i128 %bits seq_cst    store i128 %val, i128* @var, align 16    ret void diff --git a/llvm/test/CodeGen/AArch64/arm64-memcpy-inline.ll b/llvm/test/CodeGen/AArch64/arm64-memcpy-inline.ll index 4f8f3a227bb..f6807711190 100644 --- a/llvm/test/CodeGen/AArch64/arm64-memcpy-inline.ll +++ b/llvm/test/CodeGen/AArch64/arm64-memcpy-inline.ll @@ -16,10 +16,12 @@  define i32 @t0() {  entry:  ; CHECK-LABEL: t0: -; CHECK: ldrb [[REG0:w[0-9]+]], [x[[BASEREG:[0-9]+]], #10] -; CHECK: strb [[REG0]], [x[[BASEREG2:[0-9]+]], #10] -; CHECK: ldrh [[REG1:w[0-9]+]], [x[[BASEREG]], #8] -; CHECK: strh [[REG1]], [x[[BASEREG2]], #8] +; CHECK: ldrb [[REG0:w[0-9]+]], [{{x[0-9]+}}, :lo12:src+10] +; CHECK: add x[[BASEREG:[0-9]+]], {{x[0-9]+}}, :lo12:src+8 +; CHECK: strb [[REG0]], [{{x[0-9]+}}, :lo12:dst+10] +; CHECK: ldrh [[REG1:w[0-9]+]], [x[[BASEREG]]] +; CHECK: add x[[BASEREG2:[0-9]+]], {{x[0-9]+}}, :lo12:dst+8 +; CHECK: strh [[REG1]], [x[[BASEREG2]]]  ; CHECK: ldr [[REG2:x[0-9]+]],  ; CHECK: str [[REG2]],    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 getelementptr inbounds (%struct.x, %struct.x* @dst, i32 0, i32 0), i8* align 8 getelementptr inbounds (%struct.x, %struct.x* @src, i32 0, i32 0), i32 11, i1 false) @@ -29,7 +31,8 @@ entry:  define void @t1(i8* nocapture %C) nounwind {  entry:  ; CHECK-LABEL: t1: -; CHECK: ldur [[DEST:q[0-9]+]], [x[[BASEREG:[0-9]+]], #15] +; CHECK: add x[[BASEREG:[0-9]+]], {{x[0-9]+}}, :lo12:.L.str1+15 +; CHECK: ldr [[DEST:q[0-9]+]], [x[[BASEREG:[0-9]+]]]  ; CHECK: stur [[DEST]], [x0, #15]  ; CHECK: ldr [[DEST:q[0-9]+]], [x[[BASEREG]]]  ; CHECK: str [[DEST]], [x0] @@ -43,7 +46,10 @@ entry:  ; CHECK: mov [[REG3:w[0-9]+]]  ; CHECK: movk [[REG3]],  ; CHECK: str [[REG3]], [x0, #32] -; CHECK: ldp [[DEST1:q[0-9]+]], [[DEST2:q[0-9]+]], [x{{[0-9]+}}] +; CHECK: add x[[BASEREG:[0-9]+]], {{x[0-9]+}}, :lo12:.L.str2+16 +; CHECK: ldr [[DEST2:q[0-9]+]], [x[[BASEREG]]] +; CHECK: add x[[BASEREG:[0-9]+]], {{x[0-9]+}}, :lo12:.L.str2 +; CHECK: ldr [[DEST1:q[0-9]+]], [x[[BASEREG]]]  ; CHECK: stp [[DEST1]], [[DEST2]], [x0]    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str2, i64 0, i64 0), i64 36, i1 false)    ret void @@ -52,8 +58,10 @@ entry:  define void @t3(i8* nocapture %C) nounwind {  entry:  ; CHECK-LABEL: t3: -; CHECK: ldr [[REG4:x[0-9]+]], [x[[BASEREG:[0-9]+]], #16] +; CHECK: add x[[BASEREG:[0-9]+]], {{x[0-9]+}}, :lo12:.L.str3+16 +; CHECK: ldr [[REG4:x[0-9]+]], [x[[BASEREG]]]  ; CHECK: str [[REG4]], [x0, #16] +; CHECK: add x[[BASEREG:[0-9]+]], {{x[0-9]+}}, :lo12:.L.str3  ; CHECK: ldr [[DEST:q[0-9]+]], [x[[BASEREG]]]  ; CHECK: str [[DEST]], [x0]    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str3, i64 0, i64 0), i64 24, i1 false) @@ -87,8 +95,10 @@ entry:  define void @t6() nounwind {  entry:  ; CHECK-LABEL: t6: -; CHECK: ldur [[REG9:x[0-9]+]], [x{{[0-9]+}}, #6] -; CHECK: stur [[REG9]], [x{{[0-9]+}}, #6] +; CHECK: add x[[BASEREG:[0-9]+]], {{x[0-9]+}}, :lo12:.L.str6+6 +; CHECK: ldr [[REG9:x[0-9]+]], [x[[BASEREG]]] +; CHECK: add x[[BASEREG:[0-9]+]], {{x[0-9]+}}, :lo12:spool.splbuf+6 +; CHECK: str [[REG9]], [x[[BASEREG]]]  ; CHECK: ldr  ; CHECK: str    call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([512 x i8], [512 x i8]* @spool.splbuf, i64 0, i64 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str6, i64 0, i64 0), i64 14, i1 false) diff --git a/llvm/test/CodeGen/AArch64/arm64-misched-multimmo.ll b/llvm/test/CodeGen/AArch64/arm64-misched-multimmo.ll index c35c72f24b0..642c6226224 100644 --- a/llvm/test/CodeGen/AArch64/arm64-misched-multimmo.ll +++ b/llvm/test/CodeGen/AArch64/arm64-misched-multimmo.ll @@ -8,11 +8,15 @@  ; Check that no scheduling dependencies are created between the paired loads and the store during post-RA MI scheduling.  ;  ; CHECK-LABEL: # Machine code for function foo: -; CHECK: SU(2):   renamable $w{{[0-9]+}}, renamable $w{{[0-9]+}} = LDPWi +; CHECK: SU(1):   renamable $w{{[0-9]+}} = LDRWui  ; CHECK: Successors: -; CHECK-NOT: ch SU(4) -; CHECK: SU(3) -; CHECK: SU(4):   STRWui $wzr, renamable $x{{[0-9]+}} +; CHECK-NOT: SU(5) +; CHECK: SU(2) +; CHECK: SU(3):   renamable $w{{[0-9]+}} = LDRWui +; CHECK: Successors: +; CHECK-NOT: SU(5) +; CHECK: SU(4) +; CHECK: SU(5):   STRWui $wzr, renamable $x{{[0-9]+}}  define i32 @foo() {  entry:    %0 = load i32, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @G2, i64 0, i64 0), align 4 diff --git a/llvm/test/CodeGen/AArch64/arm64-variadic-aapcs.ll b/llvm/test/CodeGen/AArch64/arm64-variadic-aapcs.ll index 0f8f4c5d4a4..502ae891122 100644 --- a/llvm/test/CodeGen/AArch64/arm64-variadic-aapcs.ll +++ b/llvm/test/CodeGen/AArch64/arm64-variadic-aapcs.ll @@ -11,9 +11,6 @@ define void @test_simple(i32 %n, ...) {  ; CHECK: sub sp, sp, #[[STACKSIZE:[0-9]+]]  ; CHECK: add [[STACK_TOP:x[0-9]+]], sp, #[[STACKSIZE]] -; CHECK: adrp x[[VA_LIST_HI:[0-9]+]], var -; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, :lo12:var -  ; CHECK: stp x1, x2, [sp, #[[GR_BASE:[0-9]+]]]  ; ... omit middle ones ...  ; CHECK: str x7, [sp, # @@ -22,19 +19,20 @@ define void @test_simple(i32 %n, ...) {  ; ... omit middle ones ...  ; CHECK: stp q6, q7, [sp, # -; CHECK: str [[STACK_TOP]], [x[[VA_LIST]]] +; CHECK: str [[STACK_TOP]], [{{[x[0-9]+}}, :lo12:var]  ; CHECK: add [[GR_TOPTMP:x[0-9]+]], sp, #[[GR_BASE]]  ; CHECK: add [[GR_TOP:x[0-9]+]], [[GR_TOPTMP]], #56 -; CHECK: str [[GR_TOP]], [x[[VA_LIST]], #8] +; CHECK: str [[GR_TOP]], [{{[x[0-9]+}}, :lo12:var+8]  ; CHECK: mov [[VR_TOPTMP:x[0-9]+]], sp  ; CHECK: add [[VR_TOP:x[0-9]+]], [[VR_TOPTMP]], #128 -; CHECK: str [[VR_TOP]], [x[[VA_LIST]], #16] +; CHECK: str [[VR_TOP]], [{{[x[0-9]+}}, :lo12:var+16] -; CHECK: mov     [[GRVR:x[0-9]+]], #-545460846720 -; CHECK: movk    [[GRVR]], #65480 -; CHECK: str     [[GRVR]], [x[[VA_LIST]], #24] +; CHECK: mov     [[GRVR1:w[0-9]+]], #-56 +; CHECK: str     [[GRVR1]], [{{[x[0-9]+}}, :lo12:var+24] +; CHECK: orr     [[GRVR2:w[0-9]+]], wzr, #0xffffff80 +; CHECK: str     [[GRVR2]], [{{[x[0-9]+}}, :lo12:var+28]    %addr = bitcast %va_list* @var to i8*    call void @llvm.va_start(i8* %addr) @@ -47,9 +45,6 @@ define void @test_fewargs(i32 %n, i32 %n1, i32 %n2, float %m, ...) {  ; CHECK: sub sp, sp, #[[STACKSIZE:[0-9]+]]  ; CHECK: add [[STACK_TOP:x[0-9]+]], sp, #[[STACKSIZE]] -; CHECK: adrp x[[VA_LIST_HI:[0-9]+]], var -; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, :lo12:var -  ; CHECK: stp x3, x4, [sp, #[[GR_BASE:[0-9]+]]]  ; ... omit middle ones ...  ; CHECK: str x7, [sp, # @@ -58,19 +53,20 @@ define void @test_fewargs(i32 %n, i32 %n1, i32 %n2, float %m, ...) {  ; ... omit middle ones ...  ; CHECK: str q7, [sp, # -; CHECK: str [[STACK_TOP]], [x[[VA_LIST]]] +; CHECK: str [[STACK_TOP]], [{{[x[0-9]+}}, :lo12:var]  ; CHECK: add [[GR_TOPTMP:x[0-9]+]], sp, #[[GR_BASE]]  ; CHECK: add [[GR_TOP:x[0-9]+]], [[GR_TOPTMP]], #40 -; CHECK: str [[GR_TOP]], [x[[VA_LIST]], #8] +; CHECK: str [[GR_TOP]], [{{[x[0-9]+}}, :lo12:var+8]  ; CHECK: mov [[VR_TOPTMP:x[0-9]+]], sp  ; CHECK: add [[VR_TOP:x[0-9]+]], [[VR_TOPTMP]], #112 -; CHECK: str [[VR_TOP]], [x[[VA_LIST]], #16] +; CHECK: str [[VR_TOP]], [{{[x[0-9]+}}, :lo12:var+16] -; CHECK: mov  [[GRVR_OFFS:x[0-9]+]], #-40 -; CHECK: movk [[GRVR_OFFS]], #65424, lsl #32 -; CHECK: str  [[GRVR_OFFS]], [x[[VA_LIST]], #24] +; CHECK: mov     [[GRVR1:w[0-9]+]], #-40 +; CHECK: str     [[GRVR1]], [{{[x[0-9]+}}, :lo12:var+24] +; CHECK: mov     [[GRVR2:w[0-9]+]], #-112 +; CHECK: str     [[GRVR2]], [{{[x[0-9]+}}, :lo12:var+28]    %addr = bitcast %va_list* @var to i8*    call void @llvm.va_start(i8* %addr) @@ -85,8 +81,7 @@ define void @test_nospare([8 x i64], [8 x float], ...) {    call void @llvm.va_start(i8* %addr)  ; CHECK-NOT: sub sp, sp  ; CHECK: mov [[STACK:x[0-9]+]], sp -; CHECK: add x[[VAR:[0-9]+]], {{x[0-9]+}}, :lo12:var -; CHECK: str [[STACK]], [x[[VAR]]] +; CHECK: str [[STACK]], [{{[x[0-9]+}}, :lo12:var]    ret void  } @@ -97,8 +92,7 @@ define void @test_offsetstack([8 x i64], [2 x i64], [3 x float], ...) {  ; CHECK-LABEL: test_offsetstack:  ; CHECK: stp {{q[0-9]+}}, {{q[0-9]+}}, [sp, #-80]!  ; CHECK: add [[STACK_TOP:x[0-9]+]], sp, #96 -; CHECK: add x[[VAR:[0-9]+]], {{x[0-9]+}}, :lo12:var -; CHECK: str [[STACK_TOP]], [x[[VAR]]] +; CHECK: str [[STACK_TOP]], [{{[x[0-9]+}}, :lo12:var]    %addr = bitcast %va_list* @var to i8*    call void @llvm.va_start(i8* %addr) @@ -129,13 +123,13 @@ define void @test_va_copy() {    call void @llvm.va_copy(i8* %dstaddr, i8* %srcaddr)  ; CHECK: add x[[SRC:[0-9]+]], {{x[0-9]+}}, :lo12:var -  ; CHECK: ldr [[BLOCK:q[0-9]+]], [x[[SRC]]]  ; CHECK: add x[[DST:[0-9]+]], {{x[0-9]+}}, :lo12:second_list  ; CHECK: str [[BLOCK]], [x[[DST]]] -; CHECK: ldr [[BLOCK:q[0-9]+]], [x[[SRC]], #16] -; CHECK: str [[BLOCK]], [x[[DST]], #16] +; CHECK: add x[[SRC:[0-9]+]], {{x[0-9]+}}, :lo12:var+16 +; CHECK: ldr [[BLOCK:q[0-9]+]], [x[[SRC]]] +; CHECK: str [[BLOCK]], [x[[DST]]]    ret void  ; CHECK: ret  } diff --git a/llvm/test/CodeGen/AArch64/arm64-vector-ldst.ll b/llvm/test/CodeGen/AArch64/arm64-vector-ldst.ll index 938b3d1d059..6e530cb258b 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vector-ldst.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vector-ldst.ll @@ -264,149 +264,196 @@ entry:  ; Add a bunch of tests for rdar://13258794: Match LDUR/STUR for D and Q  ; registers for unscaled vector accesses -@str = global [63 x i8] c"Test case for rdar://13258794: LDUR/STUR for D and Q registers\00", align 1 -define <1 x i64> @fct0() nounwind readonly ssp { +define <1 x i64> @fct0(i8* %str) nounwind readonly ssp {  entry:  ; CHECK-LABEL: fct0:  ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3] -  %0 = load <1 x i64>, <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <1 x i64>*), align 8 +  %p = getelementptr inbounds i8, i8* %str, i64 3 +  %q = bitcast i8* %p to <1 x i64>* +  %0 = load <1 x i64>, <1 x i64>* %q, align 8    ret <1 x i64> %0  } -define <2 x i32> @fct1() nounwind readonly ssp { +define <2 x i32> @fct1(i8* %str) nounwind readonly ssp {  entry:  ; CHECK-LABEL: fct1:  ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3] -  %0 = load <2 x i32>, <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i32>*), align 8 +  %p = getelementptr inbounds i8, i8* %str, i64 3 +  %q = bitcast i8* %p to <2 x i32>* +  %0 = load <2 x i32>, <2 x i32>* %q, align 8    ret <2 x i32> %0  } -define <4 x i16> @fct2() nounwind readonly ssp { +define <4 x i16> @fct2(i8* %str) nounwind readonly ssp {  entry:  ; CHECK-LABEL: fct2:  ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3] -  %0 = load <4 x i16>, <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i16>*), align 8 +  %p = getelementptr inbounds i8, i8* %str, i64 3 +  %q = bitcast i8* %p to <4 x i16>* +  %0 = load <4 x i16>, <4 x i16>* %q, align 8    ret <4 x i16> %0  } -define <8 x i8> @fct3() nounwind readonly ssp { +define <8 x i8> @fct3(i8* %str) nounwind readonly ssp {  entry:  ; CHECK-LABEL: fct3:  ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3] -  %0 = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i8>*), align 8 +  %p = getelementptr inbounds i8, i8* %str, i64 3 +  %q = bitcast i8* %p to <8 x i8>* +  %0 = load <8 x i8>, <8 x i8>* %q, align 8    ret <8 x i8> %0  } -define <2 x i64> @fct4() nounwind readonly ssp { +define <2 x i64> @fct4(i8* %str) nounwind readonly ssp {  entry:  ; CHECK-LABEL: fct4:  ; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3] -  %0 = load <2 x i64>, <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i64>*), align 16 +  %p = getelementptr inbounds i8, i8* %str, i64 3 +  %q = bitcast i8* %p to <2 x i64>* +  %0 = load <2 x i64>, <2 x i64>* %q, align 16    ret <2 x i64> %0  } -define <4 x i32> @fct5() nounwind readonly ssp { +define <4 x i32> @fct5(i8* %str) nounwind readonly ssp {  entry:  ; CHECK-LABEL: fct5:  ; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3] -  %0 = load <4 x i32>, <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i32>*), align 16 +  %p = getelementptr inbounds i8, i8* %str, i64 3 +  %q = bitcast i8* %p to <4 x i32>* +  %0 = load <4 x i32>, <4 x i32>* %q, align 16    ret <4 x i32> %0  } -define <8 x i16> @fct6() nounwind readonly ssp { +define <8 x i16> @fct6(i8* %str) nounwind readonly ssp {  entry:  ; CHECK-LABEL: fct6:  ; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3] -  %0 = load <8 x i16>, <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i16>*), align 16 +  %p = getelementptr inbounds i8, i8* %str, i64 3 +  %q = bitcast i8* %p to <8 x i16>* +  %0 = load <8 x i16>, <8 x i16>* %q, align 16    ret <8 x i16> %0  } -define <16 x i8> @fct7() nounwind readonly ssp { +define <16 x i8> @fct7(i8* %str) nounwind readonly ssp {  entry:  ; CHECK-LABEL: fct7:  ; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3] -  %0 = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <16 x i8>*), align 16 +  %p = getelementptr inbounds i8, i8* %str, i64 3 +  %q = bitcast i8* %p to <16 x i8>* +  %0 = load <16 x i8>, <16 x i8>* %q, align 16    ret <16 x i8> %0  } -define void @fct8() nounwind ssp { +define void @fct8(i8* %str) nounwind ssp {  entry:  ; CHECK-LABEL: fct8:  ; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]  ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] -  %0 = load <1 x i64>, <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <1 x i64>*), align 8 -  store <1 x i64> %0, <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <1 x i64>*), align 8 +  %p = getelementptr inbounds i8, i8* %str, i64 3 +  %q = bitcast i8* %p to <1 x i64>* +  %0 = load <1 x i64>, <1 x i64>* %q, align 8 +  %p2 = getelementptr inbounds i8, i8* %str, i64 4 +  %q2 = bitcast i8* %p2 to <1 x i64>* +  store <1 x i64> %0, <1 x i64>* %q2, align 8    ret void  } -define void @fct9() nounwind ssp { +define void @fct9(i8* %str) nounwind ssp {  entry:  ; CHECK-LABEL: fct9:  ; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]  ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] -  %0 = load <2 x i32>, <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i32>*), align 8 -  store <2 x i32> %0, <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <2 x i32>*), align 8 +  %p = getelementptr inbounds i8, i8* %str, i64 3 +  %q = bitcast i8* %p to <2 x i32>* +  %0 = load <2 x i32>, <2 x i32>* %q, align 8 +  %p2 = getelementptr inbounds i8, i8* %str, i64 4 +  %q2 = bitcast i8* %p2 to <2 x i32>* +  store <2 x i32> %0, <2 x i32>* %q2, align 8    ret void  } -define void @fct10() nounwind ssp { +define void @fct10(i8* %str) nounwind ssp {  entry:  ; CHECK-LABEL: fct10:  ; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]  ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] -  %0 = load <4 x i16>, <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i16>*), align 8 -  store <4 x i16> %0, <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <4 x i16>*), align 8 +  %p = getelementptr inbounds i8, i8* %str, i64 3 +  %q = bitcast i8* %p to <4 x i16>* +  %0 = load <4 x i16>, <4 x i16>* %q, align 8 +  %p2 = getelementptr inbounds i8, i8* %str, i64 4 +  %q2 = bitcast i8* %p2 to <4 x i16>* +  store <4 x i16> %0, <4 x i16>* %q2, align 8    ret void  } -define void @fct11() nounwind ssp { +define void @fct11(i8* %str) nounwind ssp {  entry:  ; CHECK-LABEL: fct11:  ; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]  ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] -  %0 = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i8>*), align 8 -  store <8 x i8> %0, <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <8 x i8>*), align 8 +  %p = getelementptr inbounds i8, i8* %str, i64 3 +  %q = bitcast i8* %p to <8 x i8>* +  %0 = load <8 x i8>, <8 x i8>* %q, align 8 +  %p2 = getelementptr inbounds i8, i8* %str, i64 4 +  %q2 = bitcast i8* %p2 to <8 x i8>* +  store <8 x i8> %0, <8 x i8>* %q2, align 8    ret void  } -define void @fct12() nounwind ssp { +define void @fct12(i8* %str) nounwind ssp {  entry:  ; CHECK-LABEL: fct12:  ; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]  ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] -  %0 = load <2 x i64>, <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i64>*), align 16 -  store <2 x i64> %0, <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <2 x i64>*), align 16 +  %p = getelementptr inbounds i8, i8* %str, i64 3 +  %q = bitcast i8* %p to <2 x i64>* +  %0 = load <2 x i64>, <2 x i64>* %q, align 16 +  %p2 = getelementptr inbounds i8, i8* %str, i64 4 +  %q2 = bitcast i8* %p2 to <2 x i64>* +  store <2 x i64> %0, <2 x i64>* %q2, align 16    ret void  } -define void @fct13() nounwind ssp { +define void @fct13(i8* %str) nounwind ssp {  entry:  ; CHECK-LABEL: fct13:  ; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]  ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] -  %0 = load <4 x i32>, <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i32>*), align 16 -  store <4 x i32> %0, <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <4 x i32>*), align 16 +  %p = getelementptr inbounds i8, i8* %str, i64 3 +  %q = bitcast i8* %p to <4 x i32>* +  %0 = load <4 x i32>, <4 x i32>* %q, align 16 +  %p2 = getelementptr inbounds i8, i8* %str, i64 4 +  %q2 = bitcast i8* %p2 to <4 x i32>* +  store <4 x i32> %0, <4 x i32>* %q2, align 16    ret void  } -define void @fct14() nounwind ssp { +define void @fct14(i8* %str) nounwind ssp {  entry:  ; CHECK-LABEL: fct14:  ; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]  ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] -  %0 = load <8 x i16>, <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i16>*), align 16 -  store <8 x i16> %0, <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <8 x i16>*), align 16 +  %p = getelementptr inbounds i8, i8* %str, i64 3 +  %q = bitcast i8* %p to <8 x i16>* +  %0 = load <8 x i16>, <8 x i16>* %q, align 16 +  %p2 = getelementptr inbounds i8, i8* %str, i64 4 +  %q2 = bitcast i8* %p2 to <8 x i16>* +  store <8 x i16> %0, <8 x i16>* %q2, align 16    ret void  } -define void @fct15() nounwind ssp { +define void @fct15(i8* %str) nounwind ssp {  entry:  ; CHECK-LABEL: fct15:  ; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]  ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] -  %0 = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <16 x i8>*), align 16 -  store <16 x i8> %0, <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <16 x i8>*), align 16 +  %p = getelementptr inbounds i8, i8* %str, i64 3 +  %q = bitcast i8* %p to <16 x i8>* +  %0 = load <16 x i8>, <16 x i8>* %q, align 16 +  %p2 = getelementptr inbounds i8, i8* %str, i64 4 +  %q2 = bitcast i8* %p2 to <16 x i8>* +  store <16 x i8> %0, <16 x i8>* %q2, align 16    ret void  } diff --git a/llvm/test/CodeGen/AArch64/cmpxchg-O0.ll b/llvm/test/CodeGen/AArch64/cmpxchg-O0.ll index bd3d328ec11..f4bf56ef1f9 100644 --- a/llvm/test/CodeGen/AArch64/cmpxchg-O0.ll +++ b/llvm/test/CodeGen/AArch64/cmpxchg-O0.ll @@ -86,11 +86,10 @@ define { i128, i1 } @test_cmpxchg_128(i128* %addr, i128 %desired, i128 %new) nou  @var128 = global i128 0  define {i128, i1} @test_cmpxchg_128_unsplit(i128* %addr) {  ; CHECK-LABEL: test_cmpxchg_128_unsplit: -; CHECK:     add x[[VAR128:[0-9]+]], {{x[0-9]+}}, :lo12:var128 -; CHECK:     ldr [[DESIRED_HI:x[0-9]+]], [x[[VAR128]], #8] -; CHECK:     ldr [[DESIRED_LO:x[0-9]+]], [x[[VAR128]]] -; CHECK:     ldr [[NEW_HI:x[0-9]+]], [x[[VAR128]], #8] -; CHECK:     ldr [[NEW_LO:x[0-9]+]], [x[[VAR128]]] +; CHECK:     ldr [[DESIRED_HI:x[0-9]+]], [{{x[0-9]+}}, :lo12:var128+8] +; CHECK:     ldr [[DESIRED_LO:x[0-9]+]], [{{x[0-9]+}}, :lo12:var128] +; CHECK:     ldr [[NEW_HI:x[0-9]+]], [{{x[0-9]+}}, :lo12:var128+8] +; CHECK:     ldr [[NEW_LO:x[0-9]+]], [{{x[0-9]+}}, :lo12:var128]  ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:  ; CHECK:     ldaxp [[OLD_LO:x[0-9]+]], [[OLD_HI:x[0-9]+]], [x0]  ; CHECK:     cmp [[OLD_LO]], [[DESIRED_LO]] diff --git a/llvm/test/CodeGen/AArch64/func-argpassing.ll b/llvm/test/CodeGen/AArch64/func-argpassing.ll index 3e6a8bb2c8c..61bfce90ae5 100644 --- a/llvm/test/CodeGen/AArch64/func-argpassing.ll +++ b/llvm/test/CodeGen/AArch64/func-argpassing.ll @@ -96,8 +96,8 @@ define [2 x i64] @return_struct() {      %addr = bitcast %myStruct* @varstruct to [2 x i64]*      %val = load [2 x i64], [2 x i64]* %addr      ret [2 x i64] %val -; CHECK: add x[[VARSTRUCT:[0-9]+]], {{x[0-9]+}}, :lo12:varstruct -; CHECK: ldp x0, x1, [x[[VARSTRUCT]]] +; CHECK: ldr x0, [{{x[0-9]+}}, :lo12:varstruct] +; CHECK: ldr x1, [{{x[0-9]+}}, :lo12:varstruct+8]      ; Make sure epilogue immediately follows  ; CHECK-NEXT: ret  } @@ -164,8 +164,8 @@ define void @stacked_fpu(float %var0, double %var1, float %var2, float %var3,  define i64 @check_i128_regalign(i32 %val0, i128 %val1, i64 %val2) {  ; CHECK-LABEL: check_i128_regalign      store i128 %val1, i128* @var128 -; CHECK-DAG: add x[[VAR128:[0-9]+]], {{x[0-9]+}}, :lo12:var128 -; CHECK-DAG: stp x2, x3, [x[[VAR128]]] +; CHECK-DAG: str x3, [{{x[0-9]+}}, :lo12:var128+8] +; CHECK-DAG: str x2, [{{x[0-9]+}}, :lo12:var128]      ret i64 %val2  ; CHECK-DAG: mov x0, x4 diff --git a/llvm/test/CodeGen/AArch64/func-calls.ll b/llvm/test/CodeGen/AArch64/func-calls.ll index 54d38a91c38..5151e8cac6c 100644 --- a/llvm/test/CodeGen/AArch64/func-calls.ll +++ b/llvm/test/CodeGen/AArch64/func-calls.ll @@ -62,11 +62,11 @@ define void @simple_rets() {    %arr = call [2 x i64] @return_smallstruct()    store [2 x i64] %arr, [2 x i64]* @varsmallstruct  ; CHECK: bl return_smallstruct -; CHECK: add x[[VARSMALLSTRUCT:[0-9]+]], {{x[0-9]+}}, :lo12:varsmallstruct -; CHECK: stp x0, x1, [x[[VARSMALLSTRUCT]]] +; CHECK: str x1, [{{x[0-9]+}}, {{#?}}:lo12:varsmallstruct+8] +; CHECK: add x8, {{x[0-9]+}}, {{#?}}:lo12:varstruct +; CHECK: str x0, [{{x[0-9]+}}, {{#?}}:lo12:varsmallstruct]    call void @return_large_struct(%myStruct* sret @varstruct) -; CHECK: add x8, {{x[0-9]+}}, {{#?}}:lo12:varstruct  ; CHECK: bl return_large_struct    ret void @@ -128,12 +128,12 @@ define void @check_i128_align() {    call void @check_i128_stackalign(i32 0, i32 1, i32 2, i32 3,                                     i32 4, i32 5, i32 6, i32 7,                                     i32 42, i128 %val) -; CHECK: add x[[VAR128:[0-9]+]], {{x[0-9]+}}, :lo12:var128 -; CHECK: ldp [[I128LO:x[0-9]+]], [[I128HI:x[0-9]+]], [x[[VAR128]]] +; CHECK: ldr [[I128LO:x[0-9]+]], [{{x[0-9]+}}, :lo12:var128] +; CHECK: ldr [[I128HI:x[0-9]+]], [{{x[0-9]+}}, :lo12:var128+8]  ; CHECK: stp [[I128HI]], {{x[0-9]+}}, [sp, #24] -; CHECK-NONEON: add x[[VAR128:[0-9]+]], {{x[0-9]+}}, :lo12:var128 -; CHECK-NONEON: ldp [[I128LO:x[0-9]+]], [[I128HI:x[0-9]+]], [x[[VAR128]]] +; CHECK-NONEON: ldr [[I128LO:x[0-9]+]], [{{x[0-9]+}}, :lo12:var128] +; CHECK-NONEON: ldr [[I128HI:x[0-9]+]], [{{x[0-9]+}}, :lo12:var128+8]  ; CHECK-NONEON: stp [[I128HI]], {{x[0-9]+}}, [sp, #24]  ; CHECK: bl check_i128_stackalign diff --git a/llvm/test/CodeGen/AArch64/misched-fusion-addr.ll b/llvm/test/CodeGen/AArch64/misched-fusion-addr.ll index f851148cca4..9366c154a6e 100644 --- a/llvm/test/CodeGen/AArch64/misched-fusion-addr.ll +++ b/llvm/test/CodeGen/AArch64/misched-fusion-addr.ll @@ -67,8 +67,10 @@ define void @ldst_64bit() {  ; CHECK-LABEL: ldst_64bit:  ; CHECK: adrp [[RL:x[0-9]+]], var_64bit  ; CHECK-NEXT: ldr {{x[0-9]+}}, {{\[}}[[RL]], {{#?}}:lo12:var_64bit{{\]}} -; CHECK: adrp [[RQ:x[0-9]+]], var_128bit -; CHECK-NEXT: add {{x[0-9]+}}, [[RQ]], {{#?}}:lo12:var_128bit +; CHECK: adrp [[RQ1:x[0-9]+]], var_128bit +; CHECK-NEXT: str {{x[0-9]+}}, {{\[}}[[RQ1]], {{#?}}:lo12:var_128bit{{\]}} +; CHECK: adrp [[RQ2:x[0-9]+]], var_128bit+8 +; CHECK-NEXT: str {{x[0-9]+}}, {{\[}}[[RQ2]], {{#?}}:lo12:var_128bit+8{{\]}}  }  define void @ldst_half() {  | 

