diff options
| author | Kerry McLaughlin <kerry.mclaughlin@arm.com> | 2019-12-11 13:48:10 +0000 | 
|---|---|---|
| committer | Kerry McLaughlin <kerry.mclaughlin@arm.com> | 2019-12-11 13:58:39 +0000 | 
| commit | c0a3ab365514e126b694e009503d537d0e67eb01 (patch) | |
| tree | 11929ad995f3c44d4521d4517a56bdab580e758a | |
| parent | 17554b89617e084848784dfd9ac58e2718d8f8f7 (diff) | |
| download | bcm5719-llvm-c0a3ab365514e126b694e009503d537d0e67eb01.tar.gz bcm5719-llvm-c0a3ab365514e126b694e009503d537d0e67eb01.zip  | |
Revert "[AArch64][SVE] Implement intrinsics for non-temporal loads & stores"
This reverts commit 3f5bf35f868d1e33cd02a5825d33ed4675be8cb1 as it was
causing build failures in llvm-clang-x86_64-expensive-checks:
http://lab.llvm.org:8011/builders/llvm-clang-x86_64-expensive-checks-debian/builds/392
http://lab.llvm.org:8011/builders/llvm-clang-x86_64-expensive-checks-ubuntu/builds/1045
| -rw-r--r-- | llvm/include/llvm/IR/IntrinsicsAArch64.td | 26 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 5 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 66 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrInfo.td | 22 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td | 12 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll | 88 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll | 95 | 
7 files changed, 3 insertions, 311 deletions
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index f7db41e030d..1cd39d6e351 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -768,20 +768,6 @@ def llvm_nxv4f32_ty : LLVMType<nxv4f32>;  def llvm_nxv2f64_ty : LLVMType<nxv2f64>;  let TargetPrefix = "aarch64" in {  // All intrinsics start with "llvm.aarch64.". - -  class AdvSIMD_1Vec_PredLoad_Intrinsic -    : Intrinsic<[llvm_anyvector_ty], -                [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, -                 LLVMPointerTo<0>], -                [IntrReadMem, IntrArgMemOnly]>; - -  class AdvSIMD_1Vec_PredStore_Intrinsic -    : Intrinsic<[], -                [llvm_anyvector_ty, -                 LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, -                 LLVMPointerTo<0>], -                [IntrArgMemOnly, NoCapture<2>]>; -    class AdvSIMD_Merged1VectorArg_Intrinsic      : Intrinsic<[llvm_anyvector_ty],                  [LLVMMatchType<0>, @@ -1048,18 +1034,6 @@ class AdvSIMD_GatherLoad_VecTorBase_Intrinsic                  [IntrReadMem, IntrArgMemOnly]>;  // -// Loads -// - -def int_aarch64_sve_ldnt1 : AdvSIMD_1Vec_PredLoad_Intrinsic; - -// -// Stores -// - -def int_aarch64_sve_stnt1 : AdvSIMD_1Vec_PredStore_Intrinsic; - -//  // Integer arithmetic  // diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index a426e32d418..9ca51e72ec7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -25,7 +25,6 @@  #include "llvm/ADT/Triple.h"  #include "llvm/ADT/Twine.h"  #include "llvm/Analysis/BlockFrequencyInfo.h" -#include "llvm/Analysis/MemoryLocation.h"  #include "llvm/Analysis/ProfileSummaryInfo.h"  #include "llvm/Analysis/ValueTracking.h"  #include "llvm/CodeGen/ISDOpcodes.h" @@ -6590,9 +6589,7 @@ SDValue SelectionDAG::getMemIntrinsicNode(    if (Align == 0)  // Ensure that codegen never sees alignment 0      Align = getEVTAlignment(MemVT); -  if (!Size && MemVT.isScalableVector()) -    Size = MemoryLocation::UnknownSize; -  else if (!Size) +  if (!Size)      Size = MemVT.getStoreSize();    MachineFunction &MF = getMachineFunction(); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 1aa0a9b2bf1..2033c102386 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -8514,26 +8514,6 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,      Info.align = Align(16);      Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;      return true; -  case Intrinsic::aarch64_sve_ldnt1: { -    PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType()); -    Info.opc = ISD::INTRINSIC_W_CHAIN; -    Info.memVT = MVT::getVT(PtrTy->getElementType()); -    Info.ptrVal = I.getArgOperand(1); -    Info.offset = 0; -    Info.align = MaybeAlign(DL.getABITypeAlignment(PtrTy->getElementType())); -    Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MONonTemporal; -    return true; -  } -  case Intrinsic::aarch64_sve_stnt1: { -    PointerType *PtrTy = cast<PointerType>(I.getArgOperand(2)->getType()); -    Info.opc = ISD::INTRINSIC_W_CHAIN; -    Info.memVT = MVT::getVT(PtrTy->getElementType()); -    Info.ptrVal = I.getArgOperand(2); -    Info.offset = 0; -    Info.align = MaybeAlign(DL.getABITypeAlignment(PtrTy->getElementType())); -    Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MONonTemporal; -    return true; -  }    default:      break;    } @@ -10963,48 +10943,6 @@ static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St,    return NewST1;  } -static SDValue performLDNT1Combine(SDNode *N, SelectionDAG &DAG) { -  SDLoc DL(N); -  EVT VT = N->getValueType(0); -  EVT PtrTy = N->getOperand(3).getValueType(); - -  EVT LoadVT = VT; -  if (VT.isFloatingPoint()) -    LoadVT = VT.changeTypeToInteger(); - -  auto *MINode = cast<MemIntrinsicSDNode>(N); -  SDValue PassThru = DAG.getConstant(0, DL, LoadVT); -  SDValue L = DAG.getMaskedLoad(VT, DL, MINode->getChain(), -                                MINode->getOperand(3), DAG.getUNDEF(PtrTy), -                                MINode->getOperand(2), PassThru, -                                MINode->getMemoryVT(), MINode->getMemOperand(), -                                ISD::UNINDEXED, ISD::NON_EXTLOAD, false); - -   if (VT.isFloatingPoint()) { -     SDValue Ops[] = { DAG.getNode(ISD::BITCAST, DL, VT, L), L.getValue(1) }; -     return DAG.getMergeValues(Ops, DL); -   } - -  return L; -} - -static SDValue performSTNT1Combine(SDNode *N, SelectionDAG &DAG) { -  SDLoc DL(N); - -  SDValue Data = N->getOperand(2); -  EVT DataVT = Data.getValueType(); -  EVT PtrTy = N->getOperand(4).getValueType(); - -  if (DataVT.isFloatingPoint()) -    Data = DAG.getNode(ISD::BITCAST, DL, DataVT.changeTypeToInteger(), Data); - -  auto *MINode = cast<MemIntrinsicSDNode>(N); -  return DAG.getMaskedStore(MINode->getChain(), DL, Data, MINode->getOperand(4), -                            DAG.getUNDEF(PtrTy), MINode->getOperand(3), -                            MINode->getMemoryVT(), MINode->getMemOperand(), -                            ISD::UNINDEXED, false, false); -} -  /// Replace a splat of zeros to a vector store by scalar stores of WZR/XZR.  The  /// load store optimizer pass will merge them to store pair stores.  This should  /// be better than a movi to create the vector zero followed by a vector store @@ -12281,10 +12219,6 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,      case Intrinsic::aarch64_neon_st3lane:      case Intrinsic::aarch64_neon_st4lane:        return performNEONPostLDSTCombine(N, DCI, DAG); -    case Intrinsic::aarch64_sve_ldnt1: -      return performLDNT1Combine(N, DAG); -    case Intrinsic::aarch64_sve_stnt1: -      return performSTNT1Combine(N, DAG);      case Intrinsic::aarch64_sve_ld1_gather:        return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1);      case Intrinsic::aarch64_sve_ld1_gather_index: diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 9eef93cb9ce..48872dc09cd 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -265,8 +265,7 @@ def nonext_masked_load :    PatFrag<(ops node:$ptr, node:$pred, node:$def),            (masked_ld node:$ptr, undef, node:$pred, node:$def), [{    return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD && -         cast<MaskedLoadSDNode>(N)->isUnindexed() && -         !cast<MaskedLoadSDNode>(N)->isNonTemporal(); +         cast<MaskedLoadSDNode>(N)->isUnindexed();  }]>;  // sign extending masked load fragments.  def asext_masked_load : @@ -314,21 +313,12 @@ def zext_masked_load_i32 :    return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;  }]>; -def non_temporal_load : -   PatFrag<(ops node:$ptr, node:$pred, node:$def), -           (masked_ld node:$ptr, undef, node:$pred, node:$def), [{ -   return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD && -          cast<MaskedLoadSDNode>(N)->isUnindexed() && -          cast<MaskedLoadSDNode>(N)->isNonTemporal(); -}]>; -  // non-truncating masked store fragment.  def nontrunc_masked_store :    PatFrag<(ops node:$val, node:$ptr, node:$pred),            (masked_st node:$val, node:$ptr, undef, node:$pred), [{    return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() && -         cast<MaskedStoreSDNode>(N)->isUnindexed() && -         !cast<MaskedStoreSDNode>(N)->isNonTemporal(); +         cast<MaskedStoreSDNode>(N)->isUnindexed();  }]>;  // truncating masked store fragments.  def trunc_masked_store : @@ -353,14 +343,6 @@ def trunc_masked_store_i32 :    return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;  }]>; -def non_temporal_store : -  PatFrag<(ops node:$val, node:$ptr, node:$pred), -          (masked_st node:$val, node:$ptr, undef, node:$pred), [{ -  return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() && -         cast<MaskedStoreSDNode>(N)->isUnindexed() && -         cast<MaskedStoreSDNode>(N)->isNonTemporal(); -}]>; -  // Node definitions.  def AArch64adrp          : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>;  def AArch64adr           : SDNode<"AArch64ISD::ADR", SDTIntUnaryOp, []>; diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index f5818cccb15..4321c57705f 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1179,18 +1179,6 @@ let Predicates = [HasSVE] in {    // 16-element contiguous stores    defm : pred_store<nxv16i8, nxv16i1, nontrunc_masked_store, ST1B_IMM>; -  defm : pred_load<nxv16i8, nxv16i1, non_temporal_load, LDNT1B_ZRR>; -  defm : pred_load<nxv8i16, nxv8i1,  non_temporal_load, LDNT1H_ZRR>; -  defm : pred_load<nxv4i32, nxv4i1,  non_temporal_load, LDNT1W_ZRR>; -  defm : pred_load<nxv2i64, nxv2i1,  non_temporal_load, LDNT1D_ZRR>; -  defm : pred_load<nxv8f16, nxv8i1,  non_temporal_load, LDNT1H_ZRR>; -  defm : pred_load<nxv4f32, nxv4i1,  non_temporal_load, LDNT1W_ZRR>; -  defm : pred_load<nxv2f64, nxv2i1,  non_temporal_load, LDNT1D_ZRR>; - -  defm : pred_store<nxv16i8, nxv16i1, non_temporal_store, STNT1B_ZRR>; -  defm : pred_store<nxv8i16, nxv8i1,  non_temporal_store, STNT1H_ZRR>; -  defm : pred_store<nxv4i32, nxv4i1,  non_temporal_store, STNT1W_ZRR>; -  defm : pred_store<nxv2i64, nxv2i1,  non_temporal_store, STNT1D_ZRR>;  }  let Predicates = [HasSVE2] in { diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll deleted file mode 100644 index 00408a959f6..00000000000 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll +++ /dev/null @@ -1,88 +0,0 @@ -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s - -; -; LDNT1B -; - -define <vscale x 16 x i8> @ldnt1b_i8(<vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) { -; CHECK-LABEL: ldnt1b_i8: -; CHECK: ldnt1b { z0.b }, p0/z, [x0, #0] -; CHECK-NEXT: ret -  %res = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnt1.nxv16i8(<vscale x 16 x i1> %pred, -                                                                 <vscale x 16 x i8>* %addr) -  ret <vscale x 16 x i8> %res -} - -; -; LDNT1H -; - -define <vscale x 8 x i16> @ldnt1h_i16(<vscale x 8 x i1> %pred, <vscale x 8 x i16>* %addr) { -; CHECK-LABEL: ldnt1h_i16: -; CHECK: ldnt1h { z0.h }, p0/z, [x0, #0, lsl #1] -; CHECK-NEXT: ret -  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.ldnt1.nxv8i16(<vscale x 8 x i1> %pred, -                                                                 <vscale x 8 x i16>* %addr) -  ret <vscale x 8 x i16> %res -} - -define <vscale x 8 x half> @ldnt1h_f16(<vscale x 8 x i1> %pred, <vscale x 8 x half>* %addr) { -; CHECK-LABEL: ldnt1h_f16: -; CHECK: ldnt1h { z0.h }, p0/z, [x0, #0, lsl #1] -; CHECK-NEXT: ret -  %res = call <vscale x 8 x half> @llvm.aarch64.sve.ldnt1.nxv8f16(<vscale x 8 x i1> %pred, -                                                                  <vscale x 8 x half>* %addr) -  ret <vscale x 8 x half> %res -} - -; -; LDNT1W -; - -define <vscale x 4 x i32> @ldnt1w_i32(<vscale x 4 x i1> %pred, <vscale x 4 x i32>* %addr) { -; CHECK-LABEL: ldnt1w_i32: -; CHECK: ldnt1w { z0.s }, p0/z, [x0, #0, lsl #2] -; CHECK-NEXT: ret -  %res = call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.nxv4i32(<vscale x 4 x i1> %pred, -                                                                 <vscale x 4 x i32>* %addr) -  ret <vscale x 4 x i32> %res -} - -define <vscale x 4 x float> @ldnt1w_f32(<vscale x 4 x i1> %pred, <vscale x 4 x float>* %addr) { -; CHECK-LABEL: ldnt1w_f32: -; CHECK: ldnt1w { z0.s }, p0/z, [x0, #0, lsl #2] -; CHECK-NEXT: ret -  %res = call <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.nxv4f32(<vscale x 4 x i1> %pred, -                                                                   <vscale x 4 x float>* %addr) -  ret <vscale x 4 x float> %res -} - -; -; LDNT1D -; - -define <vscale x 2 x i64> @ldnt1d_i64(<vscale x 2 x i1> %pred, <vscale x 2 x i64>* %addr) { -; CHECK-LABEL: ldnt1d_i64: -; CHECK: ldnt1d { z0.d }, p0/z, [x0, #0, lsl #3] -; CHECK-NEXT: ret -  %res = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.nxv2i64(<vscale x 2 x i1> %pred, -                                                                 <vscale x 2 x i64>* %addr) -  ret <vscale x 2 x i64> %res -} - -define <vscale x 2 x double> @ldnt1d_f64(<vscale x 2 x i1> %pred, <vscale x 2 x double>* %addr) { -; CHECK-LABEL: ldnt1d_f64: -; CHECK: ldnt1d { z0.d }, p0/z, [x0, #0, lsl #3] -; CHECK-NEXT: ret -  %res = call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.nxv2f64(<vscale x 2 x i1> %pred, -                                                                    <vscale x 2 x double>* %addr) -  ret <vscale x 2 x double> %res -} - -declare <vscale x 16 x i8> @llvm.aarch64.sve.ldnt1.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>*) -declare <vscale x 8 x i16> @llvm.aarch64.sve.ldnt1.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>*) -declare <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>*) -declare <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>*) -declare <vscale x 8 x half> @llvm.aarch64.sve.ldnt1.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>*) -declare <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>*) -declare <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>*) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll deleted file mode 100644 index 2e409366f16..00000000000 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll +++ /dev/null @@ -1,95 +0,0 @@ -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s - -; -; STNT1B -; - -define void @stnt1b_i8(<vscale x 16 x i8> %data, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) { -; CHECK-LABEL: stnt1b_i8: -; CHECK: stnt1b { z0.b }, p0, [x0, #0] -; CHECK-NEXT: ret -  call void @llvm.aarch64.sve.stnt1.nxv16i8(<vscale x 16 x i8> %data, -                                            <vscale x 16 x i1> %pred, -                                            <vscale x 16 x i8>* %addr) -  ret void -} - -; -; STNT1H -; - -define void @stnt1h_i16(<vscale x 8 x i16> %data, <vscale x 8 x i1> %pred, <vscale x 8 x i16>* %addr) { -; CHECK-LABEL: stnt1h_i16: -; CHECK: stnt1h { z0.h }, p0, [x0, #0, lsl #1] -; CHECK-NEXT: ret -  call void @llvm.aarch64.sve.stnt1.nxv8i16(<vscale x 8 x i16> %data, -                                            <vscale x 8 x i1> %pred, -                                            <vscale x 8 x i16>* %addr) -  ret void -} - -define void @stnt1h_f16(<vscale x 8 x half> %data, <vscale x 8 x i1> %pred, <vscale x 8 x half>* %addr) { -; CHECK-LABEL: stnt1h_f16: -; CHECK: stnt1h { z0.h }, p0, [x0, #0, lsl #1] -; CHECK-NEXT: ret -  call void @llvm.aarch64.sve.stnt1.nxv8f16(<vscale x 8 x half> %data, -                                            <vscale x 8 x i1> %pred, -                                            <vscale x 8 x half>* %addr) -  ret void -} - -; -; STNT1W -; - -define void @stnt1w_i32(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pred, <vscale x 4 x i32>* %addr) { -; CHECK-LABEL: stnt1w_i32: -; CHECK: stnt1w { z0.s }, p0, [x0, #0, lsl #2] -; CHECK-NEXT: ret -  call void @llvm.aarch64.sve.stnt1.nxv4i32(<vscale x 4 x i32> %data, -                                            <vscale x 4 x i1> %pred, -                                            <vscale x 4 x i32>* %addr) -  ret void -} - -define void @stnt1w_f32(<vscale x 4 x float> %data, <vscale x 4 x i1> %pred, <vscale x 4 x float>* %addr) { -; CHECK-LABEL: stnt1w_f32: -; CHECK: stnt1w { z0.s }, p0, [x0, #0, lsl #2] -; CHECK-NEXT: ret -  call void @llvm.aarch64.sve.stnt1.nxv4f32(<vscale x 4 x float> %data, -                                            <vscale x 4 x i1> %pred, -                                            <vscale x 4 x float>* %addr) -  ret void -} - -; -; STNT1D -; - -define void @stnt1d_i64(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pred, <vscale x 2 x i64>* %addr) { -; CHECK-LABEL: stnt1d_i64: -; CHECK: stnt1d { z0.d }, p0, [x0, #0, lsl #3] -; CHECK-NEXT: ret -  call void @llvm.aarch64.sve.stnt1.nxv2i64(<vscale x 2 x i64> %data, -                                            <vscale x 2 x i1> %pred, -                                            <vscale x 2 x i64>* %addr) -  ret void -} - -define void @stnt1d_f64(<vscale x 2 x double> %data, <vscale x 2 x i1> %pred, <vscale x 2 x double>* %addr) { -; CHECK-LABEL: stnt1d_f64: -; CHECK: stnt1d { z0.d }, p0, [x0, #0, lsl #3] -; CHECK-NEXT: ret -  call void @llvm.aarch64.sve.stnt1.nxv2f64(<vscale x 2 x double> %data, -                                            <vscale x 2 x i1> %pred, -                                            <vscale x 2 x double>* %addr) -  ret void -} - -declare void @llvm.aarch64.sve.stnt1.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>*) -declare void @llvm.aarch64.sve.stnt1.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>*) -declare void @llvm.aarch64.sve.stnt1.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>*) -declare void @llvm.aarch64.sve.stnt1.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>*) -declare void @llvm.aarch64.sve.stnt1.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, <vscale x 8 x half>*) -declare void @llvm.aarch64.sve.stnt1.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>*) -declare void @llvm.aarch64.sve.stnt1.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x double>*)  | 

