diff options
Diffstat (limited to 'llvm/lib/Target/ARM')
| -rw-r--r-- | llvm/lib/Target/ARM/ARMISelLowering.cpp | 31 | ||||
| -rw-r--r-- | llvm/lib/Target/ARM/ARMInstrMVE.td | 83 | ||||
| -rw-r--r-- | llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp | 20 | ||||
| -rw-r--r-- | llvm/lib/Target/ARM/ARMTargetTransformInfo.h | 3 |
4 files changed, 137 insertions, 0 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index d341ee820d3..0ffb931bc4f 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -259,6 +259,8 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { setOperationAction(ISD::UMAX, VT, Legal); setOperationAction(ISD::ABS, VT, Legal); setOperationAction(ISD::SETCC, VT, Custom); + setOperationAction(ISD::MLOAD, VT, Custom); + setOperationAction(ISD::MSTORE, VT, Legal); // No native support for these. setOperationAction(ISD::UDIV, VT, Expand); @@ -304,6 +306,8 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { setOperationAction(ISD::BUILD_VECTOR, VT.getVectorElementType(), Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal); setOperationAction(ISD::SETCC, VT, Custom); + setOperationAction(ISD::MLOAD, VT, Custom); + setOperationAction(ISD::MSTORE, VT, Legal); // Pre and Post inc are supported on loads and stores for (unsigned im = (unsigned)ISD::PRE_INC; @@ -8848,6 +8852,31 @@ static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG) { ST->getMemOperand()); } +static SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) { + MaskedLoadSDNode *N = cast<MaskedLoadSDNode>(Op.getNode()); + MVT VT = Op.getSimpleValueType(); + SDValue Mask = N->getMask(); + SDValue PassThru = N->getPassThru(); + SDLoc dl(Op); + + if (ISD::isBuildVectorAllZeros(PassThru.getNode()) || + (PassThru->getOpcode() == ARMISD::VMOVIMM && + isNullConstant(PassThru->getOperand(0)))) + return Op; + + // MVE Masked loads use zero as the passthru value. Here we convert undef to + // zero too, and other values are lowered to a select. + SDValue ZeroVec = DAG.getNode(ARMISD::VMOVIMM, dl, VT, + DAG.getTargetConstant(0, dl, MVT::i32)); + SDValue NewLoad = DAG.getMaskedLoad( + VT, dl, N->getChain(), N->getBasePtr(), Mask, ZeroVec, N->getMemoryVT(), + N->getMemOperand(), N->getExtensionType(), N->isExpandingLoad()); + SDValue Combo = NewLoad; + if (!PassThru.isUndef()) + Combo = DAG.getNode(ISD::VSELECT, dl, VT, Mask, NewLoad, PassThru); + return DAG.getMergeValues({Combo, NewLoad.getValue(1)}, dl); +} + static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) { if (isStrongerThanMonotonic(cast<AtomicSDNode>(Op)->getOrdering())) // Acquire/Release load/store is not legal for targets without a dmb or @@ -9051,6 +9080,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { return LowerPredicateLoad(Op, DAG); case ISD::STORE: return LowerPredicateStore(Op, DAG); + case ISD::MLOAD: + return LowerMLOAD(Op, DAG); case ISD::ATOMIC_LOAD: case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG); case ISD::FSINCOS: return LowerFSINCOS(Op, DAG); diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index 93c976a85e1..5b4b65ada4e 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -4892,6 +4892,10 @@ class MVE_vector_store_typed<ValueType Ty, Instruction RegImmInst, PatFrag StoreKind, int shift> : Pat<(StoreKind (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr), (RegImmInst (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr)>; +class MVE_vector_maskedstore_typed<ValueType Ty, Instruction RegImmInst, + PatFrag StoreKind, int shift> + : Pat<(StoreKind (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr, VCCR:$pred), + (RegImmInst (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr, (i32 1), VCCR:$pred)>; multiclass MVE_vector_store<Instruction RegImmInst, PatFrag StoreKind, int shift> { @@ -4908,6 +4912,10 @@ class MVE_vector_load_typed<ValueType Ty, Instruction RegImmInst, PatFrag LoadKind, int shift> : Pat<(Ty (LoadKind t2addrmode_imm7<shift>:$addr)), (Ty (RegImmInst t2addrmode_imm7<shift>:$addr))>; +class MVE_vector_maskedload_typed<ValueType Ty, Instruction RegImmInst, + PatFrag LoadKind, int shift> + : Pat<(Ty (LoadKind t2addrmode_imm7<shift>:$addr, VCCR:$pred, (Ty NEONimmAllZerosV))), + (Ty (RegImmInst t2addrmode_imm7<shift>:$addr, (i32 1), VCCR:$pred))>; multiclass MVE_vector_load<Instruction RegImmInst, PatFrag LoadKind, int shift> { @@ -4953,6 +4961,28 @@ def aligned16_post_store : PatFrag<(ops node:$val, node:$ptr, node:$offset), return cast<StoreSDNode>(N)->getAlignment() >= 2; }]>; +def alignedmaskedload32 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (masked_ld node:$ptr, node:$pred, node:$passthru), [{ + return cast<MaskedLoadSDNode>(N)->getAlignment() >= 4; +}]>; +def alignedmaskedload16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (masked_ld node:$ptr, node:$pred, node:$passthru), [{ + return cast<MaskedLoadSDNode>(N)->getAlignment() >= 2; +}]>; +def maskedload : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (masked_ld node:$ptr, node:$pred, node:$passthru)>; + +def alignedmaskedstore32 : PatFrag<(ops node:$val, node:$ptr, node:$pred), + (masked_st node:$val, node:$ptr, node:$pred), [{ + return cast<MaskedStoreSDNode>(N)->getAlignment() >= 4; +}]>; +def alignedmaskedstore16 : PatFrag<(ops node:$val, node:$ptr, node:$pred), + (masked_st node:$val, node:$ptr, node:$pred), [{ + return cast<MaskedStoreSDNode>(N)->getAlignment() >= 2; +}]>; +def maskedstore : PatFrag<(ops node:$val, node:$ptr, node:$pred), + (masked_st node:$val, node:$ptr, node:$pred)>; + let Predicates = [HasMVEInt, IsLE] in { // Stores defm : MVE_vector_store<MVE_VSTRBU8, byte_alignedstore, 0>; @@ -4971,6 +5001,26 @@ let Predicates = [HasMVEInt, IsLE] in { defm : MVE_vector_offset_store<MVE_VSTRHU16_post, aligned16_post_store, 1>; defm : MVE_vector_offset_store<MVE_VSTRWU32_pre, aligned32_pre_store, 2>; defm : MVE_vector_offset_store<MVE_VSTRWU32_post, aligned32_post_store, 2>; + + // Unaligned masked stores (aligned are below) + def : Pat<(maskedstore (v4i32 MQPR:$val), t2addrmode_imm7<0>:$addr, VCCR:$pred), + (MVE_VSTRBU8 MQPR:$val, t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>; + def : Pat<(maskedstore (v4f32 MQPR:$val), t2addrmode_imm7<0>:$addr, VCCR:$pred), + (MVE_VSTRBU8 MQPR:$val, t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>; + def : Pat<(maskedstore (v8i16 MQPR:$val), t2addrmode_imm7<0>:$addr, VCCR:$pred), + (MVE_VSTRBU8 MQPR:$val, t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>; + def : Pat<(maskedstore (v8f16 MQPR:$val), t2addrmode_imm7<0>:$addr, VCCR:$pred), + (MVE_VSTRBU8 MQPR:$val, t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>; + + // Unaligned masked loads + def : Pat<(v4i32 (maskedload t2addrmode_imm7<0>:$addr, VCCR:$pred, (v4i32 NEONimmAllZerosV))), + (v4i32 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; + def : Pat<(v4f32 (maskedload t2addrmode_imm7<0>:$addr, VCCR:$pred, (v4f32 NEONimmAllZerosV))), + (v4f32 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; + def : Pat<(v8i16 (maskedload t2addrmode_imm7<0>:$addr, VCCR:$pred, (v8i16 NEONimmAllZerosV))), + (v8i16 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; + def : Pat<(v8f16 (maskedload t2addrmode_imm7<0>:$addr, VCCR:$pred, (v8f16 NEONimmAllZerosV))), + (v8f16 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; } let Predicates = [HasMVEInt, IsBE] in { @@ -5025,8 +5075,41 @@ let Predicates = [HasMVEInt, IsBE] in { def : MVE_vector_offset_store_typed<v4i32, MVE_VSTRWU32_post, aligned32_post_store, 2>; def : MVE_vector_offset_store_typed<v4f32, MVE_VSTRWU32_pre, aligned32_pre_store, 2>; def : MVE_vector_offset_store_typed<v4f32, MVE_VSTRWU32_post, aligned32_post_store, 2>; + + // Unaligned masked stores (aligned are below) + def : Pat<(maskedstore (v4i32 MQPR:$val), t2addrmode_imm7<0>:$addr, VCCR:$pred), + (MVE_VSTRBU8 (MVE_VREV32_8 MQPR:$val), t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>; + def : Pat<(maskedstore (v4f32 MQPR:$val), t2addrmode_imm7<0>:$addr, VCCR:$pred), + (MVE_VSTRBU8 (MVE_VREV32_8 MQPR:$val), t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>; + def : Pat<(maskedstore (v8i16 MQPR:$val), t2addrmode_imm7<0>:$addr, VCCR:$pred), + (MVE_VSTRBU8 (MVE_VREV16_8 MQPR:$val), t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>; + def : Pat<(maskedstore (v8f16 MQPR:$val), t2addrmode_imm7<0>:$addr, VCCR:$pred), + (MVE_VSTRBU8 (MVE_VREV16_8 MQPR:$val), t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>; + // Unaligned masked loads + def : Pat<(v4i32 (maskedload t2addrmode_imm7<0>:$addr, VCCR:$pred, (v4i32 NEONimmAllZerosV))), + (v4i32 (MVE_VREV32_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)))>; + def : Pat<(v4f32 (maskedload t2addrmode_imm7<0>:$addr, VCCR:$pred, (v4f32 NEONimmAllZerosV))), + (v4f32 (MVE_VREV32_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)))>; + def : Pat<(v8i16 (maskedload t2addrmode_imm7<0>:$addr, VCCR:$pred, (v8i16 NEONimmAllZerosV))), + (v8i16 (MVE_VREV16_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)))>; + def : Pat<(v8f16 (maskedload t2addrmode_imm7<0>:$addr, VCCR:$pred, (v8f16 NEONimmAllZerosV))), + (v8f16 (MVE_VREV16_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)))>; } +let Predicates = [HasMVEInt] in { + // Aligned masked store, shared between LE and BE + def : MVE_vector_maskedstore_typed<v16i8, MVE_VSTRBU8, maskedstore, 0>; + def : MVE_vector_maskedstore_typed<v8i16, MVE_VSTRHU16, alignedmaskedstore16, 1>; + def : MVE_vector_maskedstore_typed<v8f16, MVE_VSTRHU16, alignedmaskedstore16, 1>; + def : MVE_vector_maskedstore_typed<v4i32, MVE_VSTRWU32, alignedmaskedstore32, 2>; + def : MVE_vector_maskedstore_typed<v4f32, MVE_VSTRWU32, alignedmaskedstore32, 2>; + // Aligned masked loads + def : MVE_vector_maskedload_typed<v16i8, MVE_VLDRBU8, maskedload, 0>; + def : MVE_vector_maskedload_typed<v8i16, MVE_VLDRHU16, alignedmaskedload16, 1>; + def : MVE_vector_maskedload_typed<v8f16, MVE_VLDRHU16, alignedmaskedload16, 1>; + def : MVE_vector_maskedload_typed<v4i32, MVE_VLDRWU32, alignedmaskedload32, 2>; + def : MVE_vector_maskedload_typed<v4f32, MVE_VLDRWU32, alignedmaskedload32, 2>; +} // Widening/Narrowing Loads/Stores diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index e74b2b1a2ba..e8ac760786f 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -36,6 +36,10 @@ using namespace llvm; #define DEBUG_TYPE "armtti" +static cl::opt<bool> EnableMaskedLoadStores( + "enable-arm-maskedldst", cl::Hidden, cl::init(false), + cl::desc("Enable the generation of masked loads and stores")); + static cl::opt<bool> DisableLowOverheadLoops( "disable-arm-loloops", cl::Hidden, cl::init(false), cl::desc("Disable the generation of low-overhead loops")); @@ -487,6 +491,22 @@ int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE, return BaseT::getAddressComputationCost(Ty, SE, Ptr); } +bool ARMTTIImpl::isLegalMaskedLoad(Type *DataTy) { + if (!EnableMaskedLoadStores || !ST->hasMVEIntegerOps()) + return false; + + if (DataTy->isVectorTy()) { + // We don't yet support narrowing or widening masked loads/stores. Expand + // them for the moment. + unsigned VecWidth = DataTy->getPrimitiveSizeInBits(); + if (VecWidth != 128) + return false; + } + + unsigned EltWidth = DataTy->getScalarSizeInBits(); + return EltWidth == 32 || EltWidth == 16 || EltWidth == 8; +} + int ARMTTIImpl::getMemcpyCost(const Instruction *I) { const MemCpyInst *MI = dyn_cast<MemCpyInst>(I); assert(MI && "MemcpyInst expected"); diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index 507e0188549..47e98dac9f6 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -152,6 +152,9 @@ public: return ST->getMaxInterleaveFactor(); } + bool isLegalMaskedLoad(Type *DataTy); + bool isLegalMaskedStore(Type *DataTy) { return isLegalMaskedLoad(DataTy); } + int getMemcpyCost(const Instruction *I); int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp); |

