diff options
| author | Sam Parker <sam.parker@arm.com> | 2019-10-17 07:55:55 +0000 |
|---|---|---|
| committer | Sam Parker <sam.parker@arm.com> | 2019-10-17 07:55:55 +0000 |
| commit | 39af8a3a3b666929752e6bdff0bd65fedbbc34e8 (patch) | |
| tree | 064a3f3e4404889dfb732aa7c0259bd1a138fba4 /llvm/lib/Target/ARM | |
| parent | 882c43d703cd63889a5541bf8f2c014733cbbbee (diff) | |
| download | bcm5719-llvm-39af8a3a3b666929752e6bdff0bd65fedbbc34e8.tar.gz bcm5719-llvm-39af8a3a3b666929752e6bdff0bd65fedbbc34e8.zip | |
[DAGCombine][ARM] Enable extending masked loads
Add generic DAG combine for extending masked loads.
Allow us to generate sext/zext masked loads which can access v4i8,
v8i8 and v4i16 memory to produce v4i32, v8i16 and v4i32 respectively.
Differential Revision: https://reviews.llvm.org/D68337
llvm-svn: 375085
Diffstat (limited to 'llvm/lib/Target/ARM')
| -rw-r--r-- | llvm/lib/Target/ARM/ARMISelLowering.cpp | 17 | ||||
| -rw-r--r-- | llvm/lib/Target/ARM/ARMInstrMVE.td | 102 | ||||
| -rw-r--r-- | llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp | 17 |
3 files changed, 99 insertions, 37 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 71d53a389e9..e9e3c664350 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -8898,9 +8898,13 @@ static SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) { SDValue PassThru = N->getPassThru(); SDLoc dl(Op); - if (ISD::isBuildVectorAllZeros(PassThru.getNode()) || + auto IsZero = [](SDValue PassThru) { + return (ISD::isBuildVectorAllZeros(PassThru.getNode()) || (PassThru->getOpcode() == ARMISD::VMOVIMM && - isNullConstant(PassThru->getOperand(0)))) + isNullConstant(PassThru->getOperand(0)))); + }; + + if (IsZero(PassThru)) return Op; // MVE Masked loads use zero as the passthru value. Here we convert undef to @@ -8911,7 +8915,9 @@ static SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) { VT, dl, N->getChain(), N->getBasePtr(), Mask, ZeroVec, N->getMemoryVT(), N->getMemOperand(), N->getExtensionType(), N->isExpandingLoad()); SDValue Combo = NewLoad; - if (!PassThru.isUndef()) + if (!PassThru.isUndef() && + (PassThru.getOpcode() != ISD::BITCAST || + !IsZero(PassThru->getOperand(0)))) Combo = DAG.getNode(ISD::VSELECT, dl, VT, Mask, NewLoad, PassThru); return DAG.getMergeValues({Combo, NewLoad.getValue(1)}, dl); } @@ -14698,6 +14704,11 @@ bool ARMTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const { if (!isTypeLegal(VT)) return false; + if (auto *Ld = dyn_cast<MaskedLoadSDNode>(ExtVal.getOperand(0))) { + if (Ld->isExpandingLoad()) + return false; + } + // Don't create a loadext if we can fold the extension into a wide/long // instruction. // If there's more than one user instruction, the loadext is desirable no diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index 265ea79e7b2..5546fdf68ed 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -5071,16 +5071,52 @@ def aligned16_post_store : PatFrag<(ops node:$val, node:$ptr, node:$offset), return cast<StoreSDNode>(N)->getAlignment() >= 2; }]>; -def alignedmaskedload32 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), - (masked_ld node:$ptr, node:$pred, node:$passthru), [{ - return cast<MaskedLoadSDNode>(N)->getAlignment() >= 4; + +def maskedload8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (masked_ld node:$ptr, node:$pred, node:$passthru), [{ + auto *Ld = cast<MaskedLoadSDNode>(N); + return Ld->getMemoryVT().getScalarType() == MVT::i8; +}]>; +def sextmaskedload8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (maskedload8 node:$ptr, node:$pred, node:$passthru), [{ + return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD; +}]>; +def zextmaskedload8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (maskedload8 node:$ptr, node:$pred, node:$passthru), [{ + return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD; +}]>; +def extmaskedload8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (maskedload8 node:$ptr, node:$pred, node:$passthru), [{ + auto *Ld = cast<MaskedLoadSDNode>(N); + EVT ScalarVT = Ld->getMemoryVT().getScalarType(); + return ScalarVT.isInteger() && Ld->getExtensionType() == ISD::EXTLOAD; +}]>; +def alignedmaskedload16: PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (masked_ld node:$ptr, node:$pred, node:$passthru), [{ + auto *Ld = cast<MaskedLoadSDNode>(N); + EVT ScalarVT = Ld->getMemoryVT().getScalarType(); + return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && Ld->getAlignment() >= 2; }]>; -def alignedmaskedload16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), - (masked_ld node:$ptr, node:$pred, node:$passthru), [{ - return cast<MaskedLoadSDNode>(N)->getAlignment() >= 2; +def sextmaskedload16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (alignedmaskedload16 node:$ptr, node:$pred, node:$passthru), [{ + return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD; +}]>; +def zextmaskedload16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (alignedmaskedload16 node:$ptr, node:$pred, node:$passthru), [{ + return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD; +}]>; +def extmaskedload16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (alignedmaskedload16 node:$ptr, node:$pred, node:$passthru), [{ + auto *Ld = cast<MaskedLoadSDNode>(N); + EVT ScalarVT = Ld->getMemoryVT().getScalarType(); + return ScalarVT.isInteger() && Ld->getExtensionType() == ISD::EXTLOAD; +}]>; +def alignedmaskedload32: PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (masked_ld node:$ptr, node:$pred, node:$passthru), [{ + auto *Ld = cast<MaskedLoadSDNode>(N); + EVT ScalarVT = Ld->getMemoryVT().getScalarType(); + return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && Ld->getAlignment() >= 4; }]>; -def maskedload : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), - (masked_ld node:$ptr, node:$pred, node:$passthru)>; def alignedmaskedstore32 : PatFrag<(ops node:$val, node:$ptr, node:$pred), (masked_st node:$val, node:$ptr, node:$pred), [{ @@ -5090,6 +5126,7 @@ def alignedmaskedstore16 : PatFrag<(ops node:$val, node:$ptr, node:$pred), (masked_st node:$val, node:$ptr, node:$pred), [{ return cast<MaskedStoreSDNode>(N)->getAlignment() >= 2; }]>; + def maskedstore : PatFrag<(ops node:$val, node:$ptr, node:$pred), (masked_st node:$val, node:$ptr, node:$pred)>; @@ -5121,16 +5158,6 @@ let Predicates = [HasMVEInt, IsLE] in { (MVE_VSTRBU8 MQPR:$val, t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>; def : Pat<(maskedstore (v8f16 MQPR:$val), t2addrmode_imm7<0>:$addr, VCCR:$pred), (MVE_VSTRBU8 MQPR:$val, t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>; - - // Unaligned masked loads - def : Pat<(v4i32 (maskedload t2addrmode_imm7<0>:$addr, VCCR:$pred, (v4i32 NEONimmAllZerosV))), - (v4i32 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; - def : Pat<(v4f32 (maskedload t2addrmode_imm7<0>:$addr, VCCR:$pred, (v4f32 NEONimmAllZerosV))), - (v4f32 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; - def : Pat<(v8i16 (maskedload t2addrmode_imm7<0>:$addr, VCCR:$pred, (v8i16 NEONimmAllZerosV))), - (v8i16 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; - def : Pat<(v8f16 (maskedload t2addrmode_imm7<0>:$addr, VCCR:$pred, (v8f16 NEONimmAllZerosV))), - (v8f16 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; } let Predicates = [HasMVEInt, IsBE] in { @@ -5195,15 +5222,6 @@ let Predicates = [HasMVEInt, IsBE] in { (MVE_VSTRBU8 (MVE_VREV16_8 MQPR:$val), t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>; def : Pat<(maskedstore (v8f16 MQPR:$val), t2addrmode_imm7<0>:$addr, VCCR:$pred), (MVE_VSTRBU8 (MVE_VREV16_8 MQPR:$val), t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>; - // Unaligned masked loads - def : Pat<(v4i32 (maskedload t2addrmode_imm7<0>:$addr, VCCR:$pred, (v4i32 NEONimmAllZerosV))), - (v4i32 (MVE_VREV32_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)))>; - def : Pat<(v4f32 (maskedload t2addrmode_imm7<0>:$addr, VCCR:$pred, (v4f32 NEONimmAllZerosV))), - (v4f32 (MVE_VREV32_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)))>; - def : Pat<(v8i16 (maskedload t2addrmode_imm7<0>:$addr, VCCR:$pred, (v8i16 NEONimmAllZerosV))), - (v8i16 (MVE_VREV16_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)))>; - def : Pat<(v8f16 (maskedload t2addrmode_imm7<0>:$addr, VCCR:$pred, (v8f16 NEONimmAllZerosV))), - (v8f16 (MVE_VREV16_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)))>; } let Predicates = [HasMVEInt] in { @@ -5214,11 +5232,39 @@ let Predicates = [HasMVEInt] in { def : MVE_vector_maskedstore_typed<v4i32, MVE_VSTRWU32, alignedmaskedstore32, 2>; def : MVE_vector_maskedstore_typed<v4f32, MVE_VSTRWU32, alignedmaskedstore32, 2>; // Aligned masked loads - def : MVE_vector_maskedload_typed<v16i8, MVE_VLDRBU8, maskedload, 0>; + def : MVE_vector_maskedload_typed<v16i8, MVE_VLDRBU8, maskedload8, 0>; def : MVE_vector_maskedload_typed<v8i16, MVE_VLDRHU16, alignedmaskedload16, 1>; def : MVE_vector_maskedload_typed<v8f16, MVE_VLDRHU16, alignedmaskedload16, 1>; def : MVE_vector_maskedload_typed<v4i32, MVE_VLDRWU32, alignedmaskedload32, 2>; def : MVE_vector_maskedload_typed<v4f32, MVE_VLDRWU32, alignedmaskedload32, 2>; + // Extending masked loads. + def : Pat<(v8i16 (sextmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred, + (v8i16 NEONimmAllZerosV))), + (v8i16 (MVE_VLDRBS16 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; + def : Pat<(v4i32 (sextmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred, + (v4i32 NEONimmAllZerosV))), + (v4i32 (MVE_VLDRBS32 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; + def : Pat<(v8i16 (zextmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred, + (v8i16 NEONimmAllZerosV))), + (v8i16 (MVE_VLDRBU16 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; + def : Pat<(v4i32 (zextmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred, + (v4i32 NEONimmAllZerosV))), + (v4i32 (MVE_VLDRBU32 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; + def : Pat<(v8i16 (extmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred, + (v8i16 NEONimmAllZerosV))), + (v8i16 (MVE_VLDRBU16 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; + def : Pat<(v4i32 (extmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred, + (v4i32 NEONimmAllZerosV))), + (v4i32 (MVE_VLDRBU32 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; + def : Pat<(v4i32 (sextmaskedload16 t2addrmode_imm7<1>:$addr, VCCR:$pred, + (v4i32 NEONimmAllZerosV))), + (v4i32 (MVE_VLDRHS32 t2addrmode_imm7<1>:$addr, (i32 1), VCCR:$pred))>; + def : Pat<(v4i32 (zextmaskedload16 t2addrmode_imm7<1>:$addr, VCCR:$pred, + (v4i32 NEONimmAllZerosV))), + (v4i32 (MVE_VLDRHU32 t2addrmode_imm7<1>:$addr, (i32 1), VCCR:$pred))>; + def : Pat<(v4i32 (extmaskedload16 t2addrmode_imm7<1>:$addr, VCCR:$pred, + (v4i32 NEONimmAllZerosV))), + (v4i32 (MVE_VLDRHU32 t2addrmode_imm7<1>:$addr, (i32 1), VCCR:$pred))>; } // Widening/Narrowing Loads/Stores diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index b5ed0755a4b..86c8684d14d 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -495,16 +495,21 @@ bool ARMTTIImpl::isLegalMaskedLoad(Type *DataTy, MaybeAlign Alignment) { if (!EnableMaskedLoadStores || !ST->hasMVEIntegerOps()) return false; - if (DataTy->isVectorTy()) { - // We don't yet support narrowing or widening masked loads/stores. Expand - // them for the moment. - unsigned VecWidth = DataTy->getPrimitiveSizeInBits(); - if (VecWidth != 128) + if (auto *VecTy = dyn_cast<VectorType>(DataTy)) { + // Don't support v2i1 yet. + if (VecTy->getNumElements() == 2) + return false; + + // We don't support extending fp types. + unsigned VecWidth = DataTy->getPrimitiveSizeInBits(); + if (VecWidth != 128 && VecTy->getElementType()->isFloatingPointTy()) return false; } unsigned EltWidth = DataTy->getScalarSizeInBits(); - return EltWidth == 32 || EltWidth == 16 || EltWidth == 8; + return (EltWidth == 32 && (!Alignment || Alignment >= 4)) || + (EltWidth == 16 && (!Alignment || Alignment >= 2)) || + (EltWidth == 8); } int ARMTTIImpl::getMemcpyCost(const Instruction *I) { |

