diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/include/llvm/IR/IntrinsicsAArch64.td | 17 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | 63 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrFormats.td | 5 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 16 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrInfo.td | 16 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AArch64/arm64-mte.ll | 497 | 
6 files changed, 592 insertions, 22 deletions
| diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index b542faaed53..e2a53c28257 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -684,3 +684,20 @@ def int_aarch64_crc32x  : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty],  def int_aarch64_crc32cx : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty],      [IntrNoMem]>;  } + +//===----------------------------------------------------------------------===// +// Memory Tagging Extensions (MTE) Intrinsics +let TargetPrefix = "aarch64" in { +def int_aarch64_irg   : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i64_ty], +    [IntrInaccessibleMemOnly]>; +def int_aarch64_addg  : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i64_ty], +    [IntrNoMem]>; +def int_aarch64_gmi   : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty], +    [IntrNoMem]>; +def int_aarch64_ldg   : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_ptr_ty], +    [IntrReadMem]>; +def int_aarch64_stg   : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty], +    [IntrWriteMem]>; +def int_aarch64_subp :  Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_ptr_ty], +    [IntrNoMem]>; +} diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 8c794b9a3d4..23d8adcde39 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -91,6 +91,12 @@ public:    bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {      return SelectAddrModeIndexed7S(N, 16, Base, OffImm);    } +  bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) { +    return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm); +  } +  bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) { +    return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm); +  }    bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {      return SelectAddrModeIndexed(N, 1, Base, OffImm);    } @@ -179,7 +185,12 @@ private:    bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,                               SDValue &Shift);    bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base, -                               SDValue &OffImm); +                               SDValue &OffImm) { +    return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm); +  } +  bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW, +                                     unsigned Size, SDValue &Base, +                                     SDValue &OffImm);    bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,                               SDValue &OffImm);    bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base, @@ -675,12 +686,13 @@ static bool isWorthFoldingADDlow(SDValue N) {    return true;  } -/// SelectAddrModeIndexed7S - Select a "register plus scaled signed 7-bit +/// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit  /// immediate" address.  The "Size" argument is the size in bytes of the memory  /// reference, which determines the scale. -bool AArch64DAGToDAGISel::SelectAddrModeIndexed7S(SDValue N, unsigned Size, -                                                  SDValue &Base, -                                                  SDValue &OffImm) { +bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, +                                                        unsigned BW, unsigned Size, +                                                        SDValue &Base, +                                                        SDValue &OffImm) {    SDLoc dl(N);    const DataLayout &DL = CurDAG->getDataLayout();    const TargetLowering *TLI = getTargetLowering(); @@ -693,24 +705,41 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexed7S(SDValue N, unsigned Size,    // As opposed to the (12-bit) Indexed addressing mode below, the 7-bit signed    // selected here doesn't support labels/immediates, only base+offset. -    if (CurDAG->isBaseWithConstantOffset(N)) {      if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { -      int64_t RHSC = RHS->getSExtValue(); -      unsigned Scale = Log2_32(Size); -      if ((RHSC & (Size - 1)) == 0 && RHSC >= -(0x40 << Scale) && -          RHSC < (0x40 << Scale)) { -        Base = N.getOperand(0); -        if (Base.getOpcode() == ISD::FrameIndex) { -          int FI = cast<FrameIndexSDNode>(Base)->getIndex(); -          Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); +      if (IsSignedImm) { +        int64_t RHSC = RHS->getSExtValue(); +        unsigned Scale = Log2_32(Size); +        int64_t Range = 0x1 << (BW-1); + +        if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) && +            RHSC < (Range << Scale)) { +          Base = N.getOperand(0); +          if (Base.getOpcode() == ISD::FrameIndex) { +            int FI = cast<FrameIndexSDNode>(Base)->getIndex(); +            Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); +          } +          OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); +          return true; +        } +      } else { +        // unsigned Immediate +        uint64_t RHSC = RHS->getZExtValue(); +        unsigned Scale = Log2_32(Size); +        uint64_t Range = 0x1 << BW; + +        if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) { +          Base = N.getOperand(0); +          if (Base.getOpcode() == ISD::FrameIndex) { +            int FI = cast<FrameIndexSDNode>(Base)->getIndex(); +            Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); +          } +          OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); +          return true;          } -        OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); -        return true;        }      }    } -    // Base only. The address will be materialized into a register before    // the memory is accessed.    //    add x0, Xbase, #offset diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 09bed434ba1..74fa5ef713d 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -355,6 +355,9 @@ def am_indexed7s32  : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S32", []>;  def am_indexed7s64  : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S64", []>;  def am_indexed7s128 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S128", []>; +def am_indexedu6s128 : ComplexPattern<i64, 2, "SelectAddrModeIndexedU6S128", []>; +def am_indexeds9s128 : ComplexPattern<i64, 2, "SelectAddrModeIndexedS9S128", []>; +  // uimm5sN predicate - True if the immediate is a multiple of N in the range  // [0 * N, 32 * N].  def UImm5s2Operand : UImmScaledMemoryIndexed<5, 2>; @@ -2339,7 +2342,7 @@ class AddSubG<bit isSub, string asm_inst, SDPatternOperator OpNode>  }  class SUBP<bit setsFlags, string asm_instr, SDPatternOperator OpNode> -      : BaseTwoOperand<0b0000, GPR64, asm_instr, null_frag, GPR64sp, GPR64sp> { +      : BaseTwoOperand<0b0000, GPR64, asm_instr, OpNode, GPR64sp, GPR64sp> {    let Inst{31} = 1;    let Inst{29} = setsFlags;  } diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 2819c50c8aa..0b5c7bb811d 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1768,7 +1768,11 @@ unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {    case AArch64::LDNPSi:    case AArch64::STNPWi:    case AArch64::STNPSi: +  case AArch64::LDG:      return 3; +  case AArch64::ADDG: +  case AArch64::STGOffset: +    return 2;    }  } @@ -2143,6 +2147,18 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,      MinOffset = 0;      MaxOffset = 4095;      break; +  case AArch64::ADDG: +    Scale = 16; +    Width = 0; +    MinOffset = 0; +    MaxOffset = 63; +    break; +  case AArch64::LDG: +  case AArch64::STGOffset: +    Scale = Width = 16; +    MinOffset = -256; +    MaxOffset = 255; +    break;    }    return true; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 7dad458d80e..fdf882c89ad 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -1233,11 +1233,11 @@ defm : STOPregister<"stumin","LDUMIN">;// STUMINx  // v8.5 Memory Tagging Extension  let Predicates = [HasMTE] in { -def IRG   : BaseTwoOperand<0b0100, GPR64sp, "irg", null_frag, GPR64sp, GPR64>, +def IRG   : BaseTwoOperand<0b0100, GPR64sp, "irg", int_aarch64_irg, GPR64sp, GPR64>,              Sched<[]>{    let Inst{31} = 1;  } -def GMI   : BaseTwoOperand<0b0101, GPR64, "gmi", null_frag, GPR64sp>, Sched<[]>{ +def GMI   : BaseTwoOperand<0b0101, GPR64, "gmi", int_aarch64_gmi, GPR64sp>, Sched<[]>{    let Inst{31} = 1;    let isNotDuplicable = 1;  } @@ -1246,7 +1246,7 @@ def SUBG  : AddSubG<1, "subg", null_frag>;  def : InstAlias<"irg $dst, $src", (IRG GPR64sp:$dst, GPR64sp:$src, XZR), 1>; -def SUBP : SUBP<0, "subp", null_frag>, Sched<[]>; +def SUBP : SUBP<0, "subp", int_aarch64_subp>, Sched<[]>;  def SUBPS : SUBP<1, "subps", null_frag>, Sched<[]>{    let Defs = [NZCV];  } @@ -1254,13 +1254,18 @@ def SUBPS : SUBP<1, "subps", null_frag>, Sched<[]>{  def : InstAlias<"cmpp $lhs, $rhs", (SUBPS XZR, GPR64sp:$lhs, GPR64sp:$rhs), 0>;  def LDG : MemTagLoad<"ldg", "\t$Rt, [$Rn, $offset]">; + +def : Pat<(int_aarch64_addg (am_indexedu6s128 GPR64sp:$Rn, uimm6s16:$imm6), imm0_15:$imm4), +          (ADDG GPR64sp:$Rn, imm0_63:$imm6, imm0_15:$imm4)>; +def : Pat<(int_aarch64_ldg GPR64:$Rt, (am_indexeds9s128 GPR64sp:$Rn,  simm9s16:$offset)), +          (LDG GPR64:$Rt, GPR64sp:$Rn,  simm9s16:$offset)>; +  def : InstAlias<"ldg $Rt, [$Rn]", (LDG GPR64:$Rt, GPR64sp:$Rn, 0), 1>;  def LDGM : MemTagVector<1, "ldgm", "\t$Rt, [$Rn]",                     (outs GPR64:$Rt), (ins GPR64sp:$Rn)>;  def STGM : MemTagVector<0, "stgm", "\t$Rt, [$Rn]",                     (outs), (ins GPR64:$Rt, GPR64sp:$Rn)>; -  def STZGM : MemTagVector<0, "stzgm", "\t$Rt, [$Rn]",                     (outs), (ins GPR64:$Rt, GPR64sp:$Rn)> {    let Inst{23} = 0; @@ -1275,6 +1280,9 @@ defm STGP     : StorePairOffset <0b01, 0, GPR64z, simm7s16, "stgp">;  def  STGPpre  : StorePairPreIdx <0b01, 0, GPR64z, simm7s16, "stgp">;  def  STGPpost : StorePairPostIdx<0b01, 0, GPR64z, simm7s16, "stgp">; +def : Pat<(int_aarch64_stg GPR64:$Rt, (am_indexeds9s128 GPR64sp:$Rn, simm9s16:$offset)), +          (STGOffset GPR64:$Rt, GPR64sp:$Rn,  simm9s16:$offset)>; +  } // Predicates = [HasMTE]  //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AArch64/arm64-mte.ll b/llvm/test/CodeGen/AArch64/arm64-mte.ll new file mode 100644 index 00000000000..c70bef5706d --- /dev/null +++ b/llvm/test/CodeGen/AArch64/arm64-mte.ll @@ -0,0 +1,497 @@ +; RUN: llc < %s -mtriple=arm64-eabi -mattr=+mte | FileCheck %s + +; test create_tag +define i32* @create_tag(i32* %ptr, i32 %m) { +entry: +; CHECK-LABEL: create_tag: +  %0 = bitcast i32* %ptr to i8* +  %1 = zext i32 %m to i64 +  %2 = tail call i8* @llvm.aarch64.irg(i8* %0, i64 %1) +  %3 = bitcast i8* %2 to i32* +  ret i32* %3 +;CHECK: irg x0, x0, {{x[0-9]+}} +} + +; *********** __arm_mte_increment_tag  ************* +; test increment_tag1 +define i32* @increment_tag1(i32* %ptr) { +entry: +; CHECK-LABEL: increment_tag1: +  %0 = bitcast i32* %ptr to i8* +  %1 = tail call i8* @llvm.aarch64.addg(i8* %0, i64 7) +  %2 = bitcast i8* %1 to i32* +  ret i32* %2 +; CHECK: addg x0, x0, #0, #7 +} + +%struct.S2K = type { [512 x i32] } +define i32* @increment_tag1stack(i32* %ptr) { +entry: +; CHECK-LABEL: increment_tag1stack: +  %s = alloca %struct.S2K, align 4 +  %0 = bitcast %struct.S2K* %s to i8* +  call void @llvm.lifetime.start.p0i8(i64 2048, i8* nonnull %0) +  %1 = call i8* @llvm.aarch64.addg(i8* nonnull %0, i64 7) +  %2 = bitcast i8* %1 to i32* +  call void @llvm.lifetime.end.p0i8(i64 2048, i8* nonnull %0) +  ret i32* %2 +; CHECK: addg x0, sp, #0, #7 +} + + +define i32* @increment_tag2(i32* %ptr) { +entry: +; CHECK-LABEL: increment_tag2: +  %add.ptr = getelementptr inbounds i32, i32* %ptr, i64 4 +  %0 = bitcast i32* %add.ptr to i8* +  %1 = tail call i8* @llvm.aarch64.addg(i8* nonnull %0, i64 7) +  %2 = bitcast i8* %1 to i32* +  ret i32* %2 +; CHECK: addg x0, x0, #16, #7 +} + +define i32* @increment_tag2stack(i32* %ptr) { +entry: +; CHECK-LABEL: increment_tag2stack: +  %s = alloca %struct.S2K, align 4 +  %0 = bitcast %struct.S2K* %s to i8* +  call void @llvm.lifetime.start.p0i8(i64 2048, i8* nonnull %0) +  %arrayidx = getelementptr inbounds %struct.S2K, %struct.S2K* %s, i64 0, i32 0, i64 4 +  %1 = bitcast i32* %arrayidx to i8* +  %2 = call i8* @llvm.aarch64.addg(i8* nonnull %1, i64 7) +  %3 = bitcast i8* %2 to i32* +  call void @llvm.lifetime.end.p0i8(i64 2048, i8* nonnull %0) +  ret i32* %3 +; CHECK: addg x0, sp, #16, #7 +} + +define i32* @increment_tag3(i32* %ptr) { +entry: +; CHECK-LABEL: increment_tag3: +  %add.ptr = getelementptr inbounds i32, i32* %ptr, i64 252 +  %0 = bitcast i32* %add.ptr to i8* +  %1 = tail call i8* @llvm.aarch64.addg(i8* nonnull %0, i64 7) +  %2 = bitcast i8* %1 to i32* +  ret i32* %2 +; CHECK: addg x0, x0, #1008, #7 +} + +define i32* @increment_tag3stack(i32* %ptr) { +entry: +; CHECK-LABEL: increment_tag3stack: +  %s = alloca %struct.S2K, align 4 +  %0 = bitcast %struct.S2K* %s to i8* +  call void @llvm.lifetime.start.p0i8(i64 2048, i8* nonnull %0) +  %arrayidx = getelementptr inbounds %struct.S2K, %struct.S2K* %s, i64 0, i32 0, i64 252 +  %1 = bitcast i32* %arrayidx to i8* +  %2 = call i8* @llvm.aarch64.addg(i8* nonnull %1, i64 7) +  %3 = bitcast i8* %2 to i32* +  call void @llvm.lifetime.end.p0i8(i64 2048, i8* nonnull %0) +  ret i32* %3 +; CHECK: addg x0, sp, #1008, #7 +} + + +define i32* @increment_tag4(i32* %ptr) { +entry: +; CHECK-LABEL: increment_tag4: +  %add.ptr = getelementptr inbounds i32, i32* %ptr, i64 256 +  %0 = bitcast i32* %add.ptr to i8* +  %1 = tail call i8* @llvm.aarch64.addg(i8* nonnull %0, i64 7) +  %2 = bitcast i8* %1 to i32* +  ret i32* %2 +; CHECK: add [[T0:x[0-9]+]], x0, #1024 +; CHECK-NEXT: addg x0, [[T0]], #0, #7 +} + +define i32* @increment_tag4stack(i32* %ptr) { +entry: +; CHECK-LABEL: increment_tag4stack: +  %s = alloca %struct.S2K, align 4 +  %0 = bitcast %struct.S2K* %s to i8* +  call void @llvm.lifetime.start.p0i8(i64 2048, i8* nonnull %0) +  %arrayidx = getelementptr inbounds %struct.S2K, %struct.S2K* %s, i64 0, i32 0, i64 256 +  %1 = bitcast i32* %arrayidx to i8* +  %2 = call i8* @llvm.aarch64.addg(i8* nonnull %1, i64 7) +  %3 = bitcast i8* %2 to i32* +  call void @llvm.lifetime.end.p0i8(i64 2048, i8* nonnull %0) +  ret i32* %3 +; CHECK: add [[T0:x[0-9]+]], {{.*}}, #1024 +; CHECK-NEXT: addg x0, [[T0]], #0, #7 +} + + +define i32* @increment_tag5(i32* %ptr) { +entry: +; CHECK-LABEL: increment_tag5: +  %add.ptr = getelementptr inbounds i32, i32* %ptr, i64 5 +  %0 = bitcast i32* %add.ptr to i8* +  %1 = tail call i8* @llvm.aarch64.addg(i8* nonnull %0, i64 7) +  %2 = bitcast i8* %1 to i32* +  ret i32* %2 +; CHECK: add [[T0:x[0-9]+]], x0, #20 +; CHECK-NEXT: addg x0, [[T0]], #0, #7 +} + +define i32* @increment_tag5stack(i32* %ptr) { +entry: +; CHECK-LABEL: increment_tag5stack: +  %s = alloca %struct.S2K, align 4 +  %0 = bitcast %struct.S2K* %s to i8* +  call void @llvm.lifetime.start.p0i8(i64 2048, i8* nonnull %0) +  %arrayidx = getelementptr inbounds %struct.S2K, %struct.S2K* %s, i64 0, i32 0, i64 5 +  %1 = bitcast i32* %arrayidx to i8* +  %2 = call i8* @llvm.aarch64.addg(i8* nonnull %1, i64 7) +  %3 = bitcast i8* %2 to i32* +  call void @llvm.lifetime.end.p0i8(i64 2048, i8* nonnull %0) +  ret i32* %3 +; CHECK: add [[T0:x[0-9]+]], {{.*}}, #20 +; CHECK-NEXT: addg x0, [[T0]], #0, #7 +} + + +; *********** __arm_mte_exclude_tag  ************* +; test exclude_tag +define i32 @exclude_tag(i32* %ptr, i32 %m) local_unnamed_addr #0 { +entry: +;CHECK-LABEL: exclude_tag: +  %0 = zext i32 %m to i64 +  %1 = bitcast i32* %ptr to i8* +  %2 = tail call i64 @llvm.aarch64.gmi(i8* %1, i64 %0) +  %conv = trunc i64 %2 to i32 +  ret i32 %conv +; CHECK: gmi	x0, x0, {{x[0-9]+}} +} + + +; *********** __arm_mte_get_tag ************* +%struct.S8K = type { [2048 x i32] } +define i32* @get_tag1(i32* %ptr) { +entry: +; CHECK-LABEL: get_tag1: +  %0 = bitcast i32* %ptr to i8* +  %1 = tail call i8* @llvm.aarch64.ldg(i8* %0, i8* %0) +  %2 = bitcast i8* %1 to i32* +  ret i32* %2 +; CHECK ldg x0, [x0] +} + +define i32* @get_tag1_two_parm(i32* %ret_ptr, i32* %ptr) { +entry: +; CHECK-LABEL: get_tag1_two_parm: +  %0 = bitcast i32* %ret_ptr to i8* +  %1 = bitcast i32* %ptr to i8* +  %2 = tail call i8* @llvm.aarch64.ldg(i8* %0, i8* %1) +  %3 = bitcast i8* %2 to i32* +  ret i32* %3 +; CHECK ldg x0, [x1] +} + +define i32* @get_tag1stack() { +entry: +; CHECK-LABEL: get_tag1stack: +  %s = alloca %struct.S8K, align 4 +  %0 = bitcast %struct.S8K* %s to i8* +  call void @llvm.lifetime.start.p0i8(i64 8192, i8* nonnull %0) +  %1 = call i8* @llvm.aarch64.ldg(i8* nonnull %0, i8* nonnull %0) +  %2 = bitcast i8* %1 to i32* +  call void @llvm.lifetime.end.p0i8(i64 8192, i8* nonnull %0) +  ret i32* %2 +; CHECK: mov [[T0:x[0-9]+]], sp +; CHECK: ldg [[T0]], [sp] +} + +define i32* @get_tag1stack_two_param(i32* %ret_ptr) { +entry: +; CHECK-LABEL: get_tag1stack_two_param: +  %s = alloca %struct.S8K, align 4 +  %0 = bitcast %struct.S8K* %s to i8* +  %1 = bitcast i32*  %ret_ptr to i8* +  call void @llvm.lifetime.start.p0i8(i64 8192, i8* nonnull %0) +  %2 = call i8* @llvm.aarch64.ldg(i8* nonnull %1, i8* nonnull %0) +  %3 = bitcast i8* %2 to i32* +  call void @llvm.lifetime.end.p0i8(i64 8192, i8* nonnull %0) +  ret i32* %3 +; CHECK-NOT: mov {{.*}}, sp +; CHECK: ldg x0, [sp] +} + + +define i32* @get_tag2(i32* %ptr) { +entry: +; CHECK-LABEL: get_tag2: +  %add.ptr = getelementptr inbounds i32, i32* %ptr, i64 4 +  %0 = bitcast i32* %add.ptr to i8* +  %1 = tail call i8* @llvm.aarch64.ldg(i8* nonnull %0, i8* nonnull %0) +  %2 = bitcast i8* %1 to i32* +  ret i32* %2 +; CHECK: add  [[T0:x[0-9]+]], x0, #16 +; CHECK: ldg  [[T0]], [x0, #16] +} + +define i32* @get_tag2stack() { +entry: +; CHECK-LABEL: get_tag2stack: +  %s = alloca %struct.S8K, align 4 +  %0 = bitcast %struct.S8K* %s to i8* +  call void @llvm.lifetime.start.p0i8(i64 8192, i8* nonnull %0) +  %arrayidx = getelementptr inbounds %struct.S8K, %struct.S8K* %s, i64 0, i32 0, i64 4 +  %1 = bitcast i32* %arrayidx to i8* +  %2 = call i8* @llvm.aarch64.ldg(i8* nonnull %1, i8* nonnull %1) +  %3 = bitcast i8* %2 to i32* +  call void @llvm.lifetime.end.p0i8(i64 8192, i8* nonnull %0) +  ret i32* %3 +; CHECK: mov [[T0:x[0-9]+]], sp +; CHECK: add x0, [[T0]], #16 +; CHECK: ldg x0, [sp, #16] +} + + +define i32* @get_tag3(i32* %ptr) { +entry: +; CHECK-LABEL: get_tag3: +  %add.ptr = getelementptr inbounds i32, i32* %ptr, i64 1020 +  %0 = bitcast i32* %add.ptr to i8* +  %1 = tail call i8* @llvm.aarch64.ldg(i8* nonnull %0, i8* nonnull %0) +  %2 = bitcast i8* %1 to i32* +  ret i32* %2 +; CHECK: add [[T0:x[0-8]+]], x0, #4080 +; CHECK: ldg [[T0]], [x0, #4080] +} + +define i32* @get_tag3stack() { +entry: +; CHECK-LABEL: get_tag3stack: +  %s = alloca %struct.S8K, align 4 +  %0 = bitcast %struct.S8K* %s to i8* +  call void @llvm.lifetime.start.p0i8(i64 8192, i8* nonnull %0) +  %arrayidx = getelementptr inbounds %struct.S8K, %struct.S8K* %s, i64 0, i32 0, i64 1020 +  %1 = bitcast i32* %arrayidx to i8* +  %2 = call i8* @llvm.aarch64.ldg(i8* nonnull %1, i8* nonnull %1) +  %3 = bitcast i8* %2 to i32* +  call void @llvm.lifetime.end.p0i8(i64 8192, i8* nonnull %0) +  ret i32* %3 +; CHECK: mov [[T0:x[0-9]+]], sp +; CHECK: add x0, [[T0]], #4080 +; CHECK: ldg x0, [sp, #4080] +} + + +define i32* @get_tag4(i32* %ptr) { +entry: +; CHECK-LABEL: get_tag4: +  %add.ptr = getelementptr inbounds i32, i32* %ptr, i64 1024 +  %0 = bitcast i32* %add.ptr to i8* +  %1 = tail call i8* @llvm.aarch64.ldg(i8* nonnull %0, i8* nonnull %0) +  %2 = bitcast i8* %1 to i32* +  ret i32* %2 +; CHECK: add x0, x0, #1, lsl #12 +; CHECK-NEXT: ldg x0, [x0] +} + +define i32* @get_tag4stack() { +entry: +; CHECK-LABEL: get_tag4stack: +  %s = alloca %struct.S8K, align 4 +  %0 = bitcast %struct.S8K* %s to i8* +  call void @llvm.lifetime.start.p0i8(i64 8192, i8* nonnull %0) +  %arrayidx = getelementptr inbounds %struct.S8K, %struct.S8K* %s, i64 0, i32 0, i64 1024 +  %1 = bitcast i32* %arrayidx to i8* +  %2 = call i8* @llvm.aarch64.ldg(i8* nonnull %1, i8* nonnull %1) +  %3 = bitcast i8* %2 to i32* +  call void @llvm.lifetime.end.p0i8(i64 8192, i8* nonnull %0) +  ret i32* %3 +; CHECK: mov [[T0:x[0-9]+]], sp +; CHECK-NEXT: add x[[T1:[0-9]+]], [[T0]], #1, lsl #12 +; CHECK-NEXT: ldg x[[T1]], [x[[T1]]] +} + +define i32* @get_tag5(i32* %ptr) { +entry: +; CHECK-LABEL: get_tag5: +  %add.ptr = getelementptr inbounds i32, i32* %ptr, i64 5 +  %0 = bitcast i32* %add.ptr to i8* +  %1 = tail call i8* @llvm.aarch64.ldg(i8* nonnull %0, i8* nonnull %0) +  %2 = bitcast i8* %1 to i32* +  ret i32* %2 +; CHECK: add x0, x0, #20 +; CHECK-NEXT: ldg x0, [x0] +} + +define i32* @get_tag5stack() { +entry: +; CHECK-LABEL: get_tag5stack: +  %s = alloca %struct.S8K, align 4 +  %0 = bitcast %struct.S8K* %s to i8* +  call void @llvm.lifetime.start.p0i8(i64 8192, i8* nonnull %0) +  %arrayidx = getelementptr inbounds %struct.S8K, %struct.S8K* %s, i64 0, i32 0, i64 5 +  %1 = bitcast i32* %arrayidx to i8* +  %2 = call i8* @llvm.aarch64.ldg(i8* nonnull %1, i8* nonnull %1) +  %3 = bitcast i8* %2 to i32* +  call void @llvm.lifetime.end.p0i8(i64 8192, i8* nonnull %0) +  ret i32* %3 +; CHECK: mov [[T0:x[0-9]+]], sp +; CHECK: add x[[T1:[0-9]+]], [[T0]], #20 +; CHECK-NEXT: ldg x[[T1]], [x[[T1]]] +} + + +; *********** __arm_mte_set_tag  ************* +define void @set_tag1(i32* %tag, i32* %ptr) { +entry: +; CHECK-LABEL: set_tag1: +  %0 = bitcast i32* %tag to i8* +  %1 = bitcast i32* %ptr to i8* +  tail call void @llvm.aarch64.stg(i8* %0, i8* %1) +  ret void +; CHECK: stg x0, [x1] +} + +define void @set_tag1stack(i32* %tag) { +entry: +; CHECK-LABEL: set_tag1stack: +  %s = alloca %struct.S8K, align 4 +  %0 = bitcast i32* %tag to i8* +  %1 = bitcast %struct.S8K* %s to i8* +  call void @llvm.lifetime.start.p0i8(i64 8192, i8* nonnull %1) +  call void @llvm.aarch64.stg(i8* %0, i8* nonnull %1) +  call void @llvm.lifetime.end.p0i8(i64 8192, i8* nonnull %0) +  ret void +; CHECK: stg x0, [sp] +} + + +define void @set_tag2(i32* %tag, i32* %ptr) { +entry: +; CHECK-LABEL: set_tag2: +  %add.ptr = getelementptr inbounds i32, i32* %ptr, i64 4 +  %0 = bitcast i32* %tag to i8* +  %1 = bitcast i32* %add.ptr to i8* +  tail call void @llvm.aarch64.stg(i8* %0, i8* %1) +  ret void +; CHECK: stg x0, [x1, #16] +} + +define void @set_tag2stack(i32* %tag, i32* %ptr) { +entry: +; CHECK-LABEL: set_tag2stack: +  %s = alloca %struct.S8K, align 4 +  %0 = bitcast %struct.S8K* %s to i8* +  call void @llvm.lifetime.start.p0i8(i64 8192, i8* nonnull %0) +  %arrayidx = getelementptr inbounds %struct.S8K, %struct.S8K* %s, i64 0, i32 0, i64 4 +  %1 = bitcast i32* %arrayidx to i8* +  %2 = bitcast i32* %tag to i8* +  call void @llvm.aarch64.stg(i8* %2, i8* nonnull %1) +  call void @llvm.lifetime.end.p0i8(i64 8192, i8* nonnull %0) +  ret void +; CHECK: stg x0, [sp, #16] +} + + + +define void @set_tag3(i32* %tag, i32* %ptr) { +entry: +; CHECK-LABEL: set_tag3: +  %add.ptr = getelementptr inbounds i32, i32* %ptr, i64 1020 +  %0 = bitcast i32* %add.ptr to i8* +  %1 = bitcast i32* %tag to i8* +  tail call void @llvm.aarch64.stg(i8* %1, i8* %0) +  ret void +; CHECK: stg x0, [x1, #4080] +} + +define void @set_tag3stack(i32* %tag, i32* %ptr) { +entry: +; CHECK-LABEL: set_tag3stack: +  %s = alloca %struct.S8K, align 4 +  %0 = bitcast %struct.S8K* %s to i8* +  call void @llvm.lifetime.start.p0i8(i64 8192, i8* nonnull %0) +  %arrayidx = getelementptr inbounds %struct.S8K, %struct.S8K* %s, i64 0, i32 0, i64 1020 +  %1 = bitcast i32* %arrayidx to i8* +  %2 = bitcast i32* %tag to i8* +  call void @llvm.aarch64.stg(i8* %2, i8* nonnull %1) +  call void @llvm.lifetime.end.p0i8(i64 8192, i8* nonnull %0) +  ret void +; CHECK: stg x0, [sp, #4080] +} + + + +define void @set_tag4(i32* %tag, i32* %ptr) { +entry: +; CHECK-LABEL: set_tag4: +  %add.ptr = getelementptr inbounds i32, i32* %ptr, i64 1024 +  %0 = bitcast i32* %add.ptr to i8* +  %1 = bitcast i32* %tag to i8* +  tail call void @llvm.aarch64.stg(i8* %1, i8* %0) +  ret void +; CHECK: add x[[T0:[0-9]+]], x1, #1, lsl #12 +; CHECK-NEXT: stg x0, [x[[T0]]] +} + +define void @set_tag4stack(i32* %tag, i32* %ptr) { +entry: +; CHECK-LABEL: set_tag4stack: +  %s = alloca %struct.S8K, align 4 +  %0 = bitcast %struct.S8K* %s to i8* +  call void @llvm.lifetime.start.p0i8(i64 8192, i8* nonnull %0) +  %arrayidx = getelementptr inbounds %struct.S8K, %struct.S8K* %s, i64 0, i32 0, i64 1024 +  %1 = bitcast i32* %arrayidx to i8* +  %2 = bitcast i32* %tag to i8* +  call void @llvm.aarch64.stg(i8* %2, i8* nonnull %1) +  call void @llvm.lifetime.end.p0i8(i64 8192, i8* nonnull %0) +  ret void +; CHECK: add x[[T0:[0-9]+]], {{.*}}, #1, lsl #12 +; CHECK-NEXT: stg x0, [x[[T0]]] +} + + +define void @set_tag5(i32* %tag, i32* %ptr) { +entry: +; CHECK-LABEL: set_tag5: +  %add.ptr = getelementptr inbounds i32, i32* %ptr, i64 5 +  %0 = bitcast i32* %add.ptr to i8* +  %1 = bitcast i32* %tag to i8* +  tail call void @llvm.aarch64.stg(i8* %1, i8* %0) +  ret void +; CHECK: add x[[T0:[0-9]+]], x1, #20 +; CHECK-NEXT: stg x0, [x[[T0]]] +} + +define void @set_tag5stack(i32* %tag, i32* %ptr) { +entry: +; CHECK-LABEL: set_tag5stack: +  %s = alloca %struct.S8K, align 4 +  %0 = bitcast %struct.S8K* %s to i8* +  call void @llvm.lifetime.start.p0i8(i64 8192, i8* nonnull %0) +  %arrayidx = getelementptr inbounds %struct.S8K, %struct.S8K* %s, i64 0, i32 0, i64 5 +  %1 = bitcast i32* %arrayidx to i8* +  %2 = bitcast i32* %tag to i8* +  call void @llvm.aarch64.stg(i8* %2, i8* nonnull %1) +  call void @llvm.lifetime.end.p0i8(i64 8192, i8* nonnull %0) +  ret void +; CHECK: add x[[T0:[0-9]+]], {{.*}}, #20 +; CHECK-NEXT: stg x0, [x[[T0]]] +} + + +; *********** __arm_mte_ptrdiff  ************* +define i64 @subtract_pointers(i32* %ptra, i32* %ptrb) { +entry: +; CHECK-LABEL: subtract_pointers: +  %0 = bitcast i32* %ptra to i8* +  %1 = bitcast i32* %ptrb to i8* +  %2 = tail call i64 @llvm.aarch64.subp(i8* %0, i8* %1) +  ret i64 %2 +; CHECK: subp x0, x0, x1 +} + +declare i8* @llvm.aarch64.irg(i8*, i64) +declare i8* @llvm.aarch64.addg(i8*, i64) +declare i64 @llvm.aarch64.gmi(i8*, i64) +declare i8* @llvm.aarch64.ldg(i8*, i8*) +declare void @llvm.aarch64.stg(i8*, i8*) +declare i64 @llvm.aarch64.subp(i8*, i8*) + +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) | 

