summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/ARM
diff options
context:
space:
mode:
authorSam Parker <sam.parker@arm.com>2019-10-17 07:55:55 +0000
committerSam Parker <sam.parker@arm.com>2019-10-17 07:55:55 +0000
commit39af8a3a3b666929752e6bdff0bd65fedbbc34e8 (patch)
tree064a3f3e4404889dfb732aa7c0259bd1a138fba4 /llvm/lib/Target/ARM
parent882c43d703cd63889a5541bf8f2c014733cbbbee (diff)
downloadbcm5719-llvm-39af8a3a3b666929752e6bdff0bd65fedbbc34e8.tar.gz
bcm5719-llvm-39af8a3a3b666929752e6bdff0bd65fedbbc34e8.zip
[DAGCombine][ARM] Enable extending masked loads
Add generic DAG combine for extending masked loads. Allow us to generate sext/zext masked loads which can access v4i8, v8i8 and v4i16 memory to produce v4i32, v8i16 and v4i32 respectively. Differential Revision: https://reviews.llvm.org/D68337 llvm-svn: 375085
Diffstat (limited to 'llvm/lib/Target/ARM')
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp17
-rw-r--r--llvm/lib/Target/ARM/ARMInstrMVE.td102
-rw-r--r--llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp17
3 files changed, 99 insertions, 37 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 71d53a389e9..e9e3c664350 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -8898,9 +8898,13 @@ static SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) {
SDValue PassThru = N->getPassThru();
SDLoc dl(Op);
- if (ISD::isBuildVectorAllZeros(PassThru.getNode()) ||
+ auto IsZero = [](SDValue PassThru) {
+ return (ISD::isBuildVectorAllZeros(PassThru.getNode()) ||
(PassThru->getOpcode() == ARMISD::VMOVIMM &&
- isNullConstant(PassThru->getOperand(0))))
+ isNullConstant(PassThru->getOperand(0))));
+ };
+
+ if (IsZero(PassThru))
return Op;
// MVE Masked loads use zero as the passthru value. Here we convert undef to
@@ -8911,7 +8915,9 @@ static SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) {
VT, dl, N->getChain(), N->getBasePtr(), Mask, ZeroVec, N->getMemoryVT(),
N->getMemOperand(), N->getExtensionType(), N->isExpandingLoad());
SDValue Combo = NewLoad;
- if (!PassThru.isUndef())
+ if (!PassThru.isUndef() &&
+ (PassThru.getOpcode() != ISD::BITCAST ||
+ !IsZero(PassThru->getOperand(0))))
Combo = DAG.getNode(ISD::VSELECT, dl, VT, Mask, NewLoad, PassThru);
return DAG.getMergeValues({Combo, NewLoad.getValue(1)}, dl);
}
@@ -14698,6 +14704,11 @@ bool ARMTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
if (!isTypeLegal(VT))
return false;
+ if (auto *Ld = dyn_cast<MaskedLoadSDNode>(ExtVal.getOperand(0))) {
+ if (Ld->isExpandingLoad())
+ return false;
+ }
+
// Don't create a loadext if we can fold the extension into a wide/long
// instruction.
// If there's more than one user instruction, the loadext is desirable no
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index 265ea79e7b2..5546fdf68ed 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -5071,16 +5071,52 @@ def aligned16_post_store : PatFrag<(ops node:$val, node:$ptr, node:$offset),
return cast<StoreSDNode>(N)->getAlignment() >= 2;
}]>;
-def alignedmaskedload32 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
- (masked_ld node:$ptr, node:$pred, node:$passthru), [{
- return cast<MaskedLoadSDNode>(N)->getAlignment() >= 4;
+
+def maskedload8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
+ (masked_ld node:$ptr, node:$pred, node:$passthru), [{
+ auto *Ld = cast<MaskedLoadSDNode>(N);
+ return Ld->getMemoryVT().getScalarType() == MVT::i8;
+}]>;
+def sextmaskedload8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
+ (maskedload8 node:$ptr, node:$pred, node:$passthru), [{
+ return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD;
+}]>;
+def zextmaskedload8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
+ (maskedload8 node:$ptr, node:$pred, node:$passthru), [{
+ return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD;
+}]>;
+def extmaskedload8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
+ (maskedload8 node:$ptr, node:$pred, node:$passthru), [{
+ auto *Ld = cast<MaskedLoadSDNode>(N);
+ EVT ScalarVT = Ld->getMemoryVT().getScalarType();
+ return ScalarVT.isInteger() && Ld->getExtensionType() == ISD::EXTLOAD;
+}]>;
+def alignedmaskedload16: PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
+ (masked_ld node:$ptr, node:$pred, node:$passthru), [{
+ auto *Ld = cast<MaskedLoadSDNode>(N);
+ EVT ScalarVT = Ld->getMemoryVT().getScalarType();
+ return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && Ld->getAlignment() >= 2;
}]>;
-def alignedmaskedload16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
- (masked_ld node:$ptr, node:$pred, node:$passthru), [{
- return cast<MaskedLoadSDNode>(N)->getAlignment() >= 2;
+def sextmaskedload16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
+ (alignedmaskedload16 node:$ptr, node:$pred, node:$passthru), [{
+ return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD;
+}]>;
+def zextmaskedload16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
+ (alignedmaskedload16 node:$ptr, node:$pred, node:$passthru), [{
+ return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD;
+}]>;
+def extmaskedload16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
+ (alignedmaskedload16 node:$ptr, node:$pred, node:$passthru), [{
+ auto *Ld = cast<MaskedLoadSDNode>(N);
+ EVT ScalarVT = Ld->getMemoryVT().getScalarType();
+ return ScalarVT.isInteger() && Ld->getExtensionType() == ISD::EXTLOAD;
+}]>;
+def alignedmaskedload32: PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
+ (masked_ld node:$ptr, node:$pred, node:$passthru), [{
+ auto *Ld = cast<MaskedLoadSDNode>(N);
+ EVT ScalarVT = Ld->getMemoryVT().getScalarType();
+ return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && Ld->getAlignment() >= 4;
}]>;
-def maskedload : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
- (masked_ld node:$ptr, node:$pred, node:$passthru)>;
def alignedmaskedstore32 : PatFrag<(ops node:$val, node:$ptr, node:$pred),
(masked_st node:$val, node:$ptr, node:$pred), [{
@@ -5090,6 +5126,7 @@ def alignedmaskedstore16 : PatFrag<(ops node:$val, node:$ptr, node:$pred),
(masked_st node:$val, node:$ptr, node:$pred), [{
return cast<MaskedStoreSDNode>(N)->getAlignment() >= 2;
}]>;
+
def maskedstore : PatFrag<(ops node:$val, node:$ptr, node:$pred),
(masked_st node:$val, node:$ptr, node:$pred)>;
@@ -5121,16 +5158,6 @@ let Predicates = [HasMVEInt, IsLE] in {
(MVE_VSTRBU8 MQPR:$val, t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>;
def : Pat<(maskedstore (v8f16 MQPR:$val), t2addrmode_imm7<0>:$addr, VCCR:$pred),
(MVE_VSTRBU8 MQPR:$val, t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>;
-
- // Unaligned masked loads
- def : Pat<(v4i32 (maskedload t2addrmode_imm7<0>:$addr, VCCR:$pred, (v4i32 NEONimmAllZerosV))),
- (v4i32 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>;
- def : Pat<(v4f32 (maskedload t2addrmode_imm7<0>:$addr, VCCR:$pred, (v4f32 NEONimmAllZerosV))),
- (v4f32 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>;
- def : Pat<(v8i16 (maskedload t2addrmode_imm7<0>:$addr, VCCR:$pred, (v8i16 NEONimmAllZerosV))),
- (v8i16 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>;
- def : Pat<(v8f16 (maskedload t2addrmode_imm7<0>:$addr, VCCR:$pred, (v8f16 NEONimmAllZerosV))),
- (v8f16 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>;
}
let Predicates = [HasMVEInt, IsBE] in {
@@ -5195,15 +5222,6 @@ let Predicates = [HasMVEInt, IsBE] in {
(MVE_VSTRBU8 (MVE_VREV16_8 MQPR:$val), t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>;
def : Pat<(maskedstore (v8f16 MQPR:$val), t2addrmode_imm7<0>:$addr, VCCR:$pred),
(MVE_VSTRBU8 (MVE_VREV16_8 MQPR:$val), t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>;
- // Unaligned masked loads
- def : Pat<(v4i32 (maskedload t2addrmode_imm7<0>:$addr, VCCR:$pred, (v4i32 NEONimmAllZerosV))),
- (v4i32 (MVE_VREV32_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)))>;
- def : Pat<(v4f32 (maskedload t2addrmode_imm7<0>:$addr, VCCR:$pred, (v4f32 NEONimmAllZerosV))),
- (v4f32 (MVE_VREV32_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)))>;
- def : Pat<(v8i16 (maskedload t2addrmode_imm7<0>:$addr, VCCR:$pred, (v8i16 NEONimmAllZerosV))),
- (v8i16 (MVE_VREV16_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)))>;
- def : Pat<(v8f16 (maskedload t2addrmode_imm7<0>:$addr, VCCR:$pred, (v8f16 NEONimmAllZerosV))),
- (v8f16 (MVE_VREV16_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)))>;
}
let Predicates = [HasMVEInt] in {
@@ -5214,11 +5232,39 @@ let Predicates = [HasMVEInt] in {
def : MVE_vector_maskedstore_typed<v4i32, MVE_VSTRWU32, alignedmaskedstore32, 2>;
def : MVE_vector_maskedstore_typed<v4f32, MVE_VSTRWU32, alignedmaskedstore32, 2>;
// Aligned masked loads
- def : MVE_vector_maskedload_typed<v16i8, MVE_VLDRBU8, maskedload, 0>;
+ def : MVE_vector_maskedload_typed<v16i8, MVE_VLDRBU8, maskedload8, 0>;
def : MVE_vector_maskedload_typed<v8i16, MVE_VLDRHU16, alignedmaskedload16, 1>;
def : MVE_vector_maskedload_typed<v8f16, MVE_VLDRHU16, alignedmaskedload16, 1>;
def : MVE_vector_maskedload_typed<v4i32, MVE_VLDRWU32, alignedmaskedload32, 2>;
def : MVE_vector_maskedload_typed<v4f32, MVE_VLDRWU32, alignedmaskedload32, 2>;
+ // Extending masked loads.
+ def : Pat<(v8i16 (sextmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred,
+ (v8i16 NEONimmAllZerosV))),
+ (v8i16 (MVE_VLDRBS16 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>;
+ def : Pat<(v4i32 (sextmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred,
+ (v4i32 NEONimmAllZerosV))),
+ (v4i32 (MVE_VLDRBS32 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>;
+ def : Pat<(v8i16 (zextmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred,
+ (v8i16 NEONimmAllZerosV))),
+ (v8i16 (MVE_VLDRBU16 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>;
+ def : Pat<(v4i32 (zextmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred,
+ (v4i32 NEONimmAllZerosV))),
+ (v4i32 (MVE_VLDRBU32 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>;
+ def : Pat<(v8i16 (extmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred,
+ (v8i16 NEONimmAllZerosV))),
+ (v8i16 (MVE_VLDRBU16 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>;
+ def : Pat<(v4i32 (extmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred,
+ (v4i32 NEONimmAllZerosV))),
+ (v4i32 (MVE_VLDRBU32 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>;
+ def : Pat<(v4i32 (sextmaskedload16 t2addrmode_imm7<1>:$addr, VCCR:$pred,
+ (v4i32 NEONimmAllZerosV))),
+ (v4i32 (MVE_VLDRHS32 t2addrmode_imm7<1>:$addr, (i32 1), VCCR:$pred))>;
+ def : Pat<(v4i32 (zextmaskedload16 t2addrmode_imm7<1>:$addr, VCCR:$pred,
+ (v4i32 NEONimmAllZerosV))),
+ (v4i32 (MVE_VLDRHU32 t2addrmode_imm7<1>:$addr, (i32 1), VCCR:$pred))>;
+ def : Pat<(v4i32 (extmaskedload16 t2addrmode_imm7<1>:$addr, VCCR:$pred,
+ (v4i32 NEONimmAllZerosV))),
+ (v4i32 (MVE_VLDRHU32 t2addrmode_imm7<1>:$addr, (i32 1), VCCR:$pred))>;
}
// Widening/Narrowing Loads/Stores
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index b5ed0755a4b..86c8684d14d 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -495,16 +495,21 @@ bool ARMTTIImpl::isLegalMaskedLoad(Type *DataTy, MaybeAlign Alignment) {
if (!EnableMaskedLoadStores || !ST->hasMVEIntegerOps())
return false;
- if (DataTy->isVectorTy()) {
- // We don't yet support narrowing or widening masked loads/stores. Expand
- // them for the moment.
- unsigned VecWidth = DataTy->getPrimitiveSizeInBits();
- if (VecWidth != 128)
+ if (auto *VecTy = dyn_cast<VectorType>(DataTy)) {
+ // Don't support v2i1 yet.
+ if (VecTy->getNumElements() == 2)
+ return false;
+
+ // We don't support extending fp types.
+ unsigned VecWidth = DataTy->getPrimitiveSizeInBits();
+ if (VecWidth != 128 && VecTy->getElementType()->isFloatingPointTy())
return false;
}
unsigned EltWidth = DataTy->getScalarSizeInBits();
- return EltWidth == 32 || EltWidth == 16 || EltWidth == 8;
+ return (EltWidth == 32 && (!Alignment || Alignment >= 4)) ||
+ (EltWidth == 16 && (!Alignment || Alignment >= 2)) ||
+ (EltWidth == 8);
}
int ARMTTIImpl::getMemcpyCost(const Instruction *I) {
OpenPOWER on IntegriCloud