summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorDavid Green <david.green@arm.com>2019-11-21 14:56:37 +0000
committerDavid Green <david.green@arm.com>2019-11-26 16:21:01 +0000
commitb5315ae8ffa6fb8befdd558d0dfd04295dbc5523 (patch)
tree410cc867173b5eef25c6b4f0d75b2f5813457304 /llvm/lib/Target
parent549db744bde29c8331411a4b41607a33c363c108 (diff)
downloadbcm5719-llvm-b5315ae8ffa6fb8befdd558d0dfd04295dbc5523.tar.gz
bcm5719-llvm-b5315ae8ffa6fb8befdd558d0dfd04295dbc5523.zip
[Codegen][ARM] Add addressing modes from masked loads and stores
MVE has a basic symmetry between it's normal loads/store operations and the masked variants. This means that masked loads and stores can use pre-inc and post-inc addressing modes, just like the standard loads and stores already do. To enable that, this patch adds all the relevant infrastructure for treating masked loads/stores addressing modes in the same way as normal loads/stores. This involves: - Adding an AddressingMode to MaskedLoadStoreSDNode, along with an extra Offset operand that is added after the PtrBase. - Extending the IndexedModeActions from 8bits to 16bits to store the legality of masked operations as well as normal ones. This array is fairly small, so doubling the size still won't make it very large. Offset masked loads can then be controlled with setIndexedMaskedLoadAction, similar to standard loads. - The same methods that combine to indexed loads, such as CombineToPostIndexedLoadStore, are adjusted to handle masked loads in the same way. - The ARM backend is then adjusted to make use of these indexed masked loads/stores. - The X86 backend is adjusted to hopefully be no functional changes. Differential Revision: https://reviews.llvm.org/D70176
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.td27
-rw-r--r--llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp118
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp74
-rw-r--r--llvm/lib/Target/ARM/ARMInstrMVE.td118
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp44
-rw-r--r--llvm/lib/Target/X86/X86InstrFragmentsSIMD.td35
6 files changed, 310 insertions, 106 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 80cf31ff3d5..ec84c1efbaf 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -262,15 +262,17 @@ def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4,
// non-extending masked load fragment.
def nonext_masked_load :
PatFrag<(ops node:$ptr, node:$pred, node:$def),
- (masked_ld node:$ptr, node:$pred, node:$def), [{
- return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD;
+ (masked_ld node:$ptr, undef, node:$pred, node:$def), [{
+ return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD &&
+ cast<MaskedLoadSDNode>(N)->isUnindexed();
}]>;
// sign extending masked load fragments.
def asext_masked_load :
PatFrag<(ops node:$ptr, node:$pred, node:$def),
- (masked_ld node:$ptr, node:$pred, node:$def),[{
- return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD ||
- cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD;
+ (masked_ld node:$ptr, undef, node:$pred, node:$def),[{
+ return (cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD ||
+ cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD) &&
+ cast<MaskedLoadSDNode>(N)->isUnindexed();
}]>;
def asext_masked_load_i8 :
PatFrag<(ops node:$ptr, node:$pred, node:$def),
@@ -290,8 +292,9 @@ def asext_masked_load_i32 :
// zero extending masked load fragments.
def zext_masked_load :
PatFrag<(ops node:$ptr, node:$pred, node:$def),
- (masked_ld node:$ptr, node:$pred, node:$def), [{
- return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD;
+ (masked_ld node:$ptr, undef, node:$pred, node:$def), [{
+ return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD &&
+ cast<MaskedLoadSDNode>(N)->isUnindexed();
}]>;
def zext_masked_load_i8 :
PatFrag<(ops node:$ptr, node:$pred, node:$def),
@@ -312,14 +315,16 @@ def zext_masked_load_i32 :
// non-truncating masked store fragment.
def nontrunc_masked_store :
PatFrag<(ops node:$val, node:$ptr, node:$pred),
- (masked_st node:$val, node:$ptr, node:$pred), [{
- return !cast<MaskedStoreSDNode>(N)->isTruncatingStore();
+ (masked_st node:$val, node:$ptr, undef, node:$pred), [{
+ return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
+ cast<MaskedStoreSDNode>(N)->isUnindexed();
}]>;
// truncating masked store fragments.
def trunc_masked_store :
PatFrag<(ops node:$val, node:$ptr, node:$pred),
- (masked_st node:$val, node:$ptr, node:$pred), [{
- return cast<MaskedStoreSDNode>(N)->isTruncatingStore();
+ (masked_st node:$val, node:$ptr, undef, node:$pred), [{
+ return cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
+ cast<MaskedStoreSDNode>(N)->isUnindexed();
}]>;
def trunc_masked_store_i8 :
PatFrag<(ops node:$val, node:$ptr, node:$pred),
diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 46a2560e167..a6b334938e1 100644
--- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -1351,11 +1351,27 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
SDValue &OffImm,
unsigned Shift) {
unsigned Opcode = Op->getOpcode();
- ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
- ? cast<LoadSDNode>(Op)->getAddressingMode()
- : cast<StoreSDNode>(Op)->getAddressingMode();
+ ISD::MemIndexedMode AM;
+ switch (Opcode) {
+ case ISD::LOAD:
+ AM = cast<LoadSDNode>(Op)->getAddressingMode();
+ break;
+ case ISD::STORE:
+ AM = cast<StoreSDNode>(Op)->getAddressingMode();
+ break;
+ case ISD::MLOAD:
+ AM = cast<MaskedLoadSDNode>(Op)->getAddressingMode();
+ break;
+ case ISD::MSTORE:
+ AM = cast<MaskedStoreSDNode>(Op)->getAddressingMode();
+ break;
+ default:
+ llvm_unreachable("Unexpected Opcode for Imm7Offset");
+ }
+
int RHSC;
- if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) { // 7 bits.
+ // 7 bit constant, shifted by Shift.
+ if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) {
OffImm =
((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
? CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32)
@@ -1625,58 +1641,93 @@ bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
}
bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
- LoadSDNode *LD = cast<LoadSDNode>(N);
- ISD::MemIndexedMode AM = LD->getAddressingMode();
- if (AM == ISD::UNINDEXED)
- return false;
- EVT LoadedVT = LD->getMemoryVT();
- if (!LoadedVT.isVector())
- return false;
- bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
- SDValue Offset;
- bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
+ EVT LoadedVT;
unsigned Opcode = 0;
- unsigned Align = LD->getAlignment();
- bool IsLE = Subtarget->isLittle();
+ bool isSExtLd, isPre;
+ unsigned Align;
+ ARMVCC::VPTCodes Pred;
+ SDValue PredReg;
+ SDValue Chain, Base, Offset;
+
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ ISD::MemIndexedMode AM = LD->getAddressingMode();
+ if (AM == ISD::UNINDEXED)
+ return false;
+ LoadedVT = LD->getMemoryVT();
+ if (!LoadedVT.isVector())
+ return false;
+
+ Chain = LD->getChain();
+ Base = LD->getBasePtr();
+ Offset = LD->getOffset();
+ Align = LD->getAlignment();
+ isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
+ isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
+ Pred = ARMVCC::None;
+ PredReg = CurDAG->getRegister(0, MVT::i32);
+ } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
+ ISD::MemIndexedMode AM = LD->getAddressingMode();
+ if (AM == ISD::UNINDEXED)
+ return false;
+ LoadedVT = LD->getMemoryVT();
+ if (!LoadedVT.isVector())
+ return false;
+ Chain = LD->getChain();
+ Base = LD->getBasePtr();
+ Offset = LD->getOffset();
+ Align = LD->getAlignment();
+ isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
+ isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
+ Pred = ARMVCC::Then;
+ PredReg = LD->getMask();
+ } else
+ llvm_unreachable("Expected a Load or a Masked Load!");
+
+ // We allow LE non-masked loads to change the type (for example use a vldrb.8
+ // as opposed to a vldrw.32). This can allow extra addressing modes or
+ // alignments for what is otherwise an equivalent instruction.
+ bool CanChangeType = Subtarget->isLittle() && !isa<MaskedLoadSDNode>(N);
+
+ SDValue NewOffset;
if (Align >= 2 && LoadedVT == MVT::v4i16 &&
- SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 1)) {
+ SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) {
if (isSExtLd)
Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post;
else
Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post;
} else if (LoadedVT == MVT::v8i8 &&
- SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0)) {
+ SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) {
if (isSExtLd)
Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post;
else
Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post;
} else if (LoadedVT == MVT::v4i8 &&
- SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0)) {
+ SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) {
if (isSExtLd)
Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post;
else
Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post;
} else if (Align >= 4 &&
- (IsLE || LoadedVT == MVT::v4i32 || LoadedVT == MVT::v4f32) &&
- SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 2))
+ (CanChangeType || LoadedVT == MVT::v4i32 ||
+ LoadedVT == MVT::v4f32) &&
+ SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 2))
Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post;
else if (Align >= 2 &&
- (IsLE || LoadedVT == MVT::v8i16 || LoadedVT == MVT::v8f16) &&
- SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 1))
+ (CanChangeType || LoadedVT == MVT::v8i16 ||
+ LoadedVT == MVT::v8f16) &&
+ SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1))
Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post;
- else if ((IsLE || LoadedVT == MVT::v16i8) &&
- SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0))
+ else if ((CanChangeType || LoadedVT == MVT::v16i8) &&
+ SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0))
Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post;
else
return false;
- SDValue Chain = LD->getChain();
- SDValue Base = LD->getBasePtr();
- SDValue Ops[] = {Base, Offset,
- CurDAG->getTargetConstant(ARMVCC::None, SDLoc(N), MVT::i32),
- CurDAG->getRegister(0, MVT::i32), Chain};
- SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), LD->getValueType(0),
+ SDValue Ops[] = {Base, NewOffset,
+ CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32), PredReg,
+ Chain};
+ SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), N->getValueType(0),
MVT::i32, MVT::Other, Ops);
transferMemOperands(N, New);
ReplaceUses(SDValue(N, 0), SDValue(New, 1));
@@ -3292,6 +3343,11 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
// Other cases are autogenerated.
break;
}
+ case ISD::MLOAD:
+ if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
+ return;
+ // Other cases are autogenerated.
+ break;
case ARMISD::WLS:
case ARMISD::LE: {
SDValue Ops[] = { N->getOperand(1),
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index e359756b7bf..c153e786e2d 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -296,6 +296,8 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
setIndexedLoadAction(im, VT, Legal);
setIndexedStoreAction(im, VT, Legal);
+ setIndexedMaskedLoadAction(im, VT, Legal);
+ setIndexedMaskedStoreAction(im, VT, Legal);
}
}
@@ -322,6 +324,8 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
setIndexedLoadAction(im, VT, Legal);
setIndexedStoreAction(im, VT, Legal);
+ setIndexedMaskedLoadAction(im, VT, Legal);
+ setIndexedMaskedStoreAction(im, VT, Legal);
}
if (HasMVEFP) {
@@ -374,12 +378,12 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
// Pre and Post inc on these are legal, given the correct extends
for (unsigned im = (unsigned)ISD::PRE_INC;
im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
- setIndexedLoadAction(im, MVT::v8i8, Legal);
- setIndexedStoreAction(im, MVT::v8i8, Legal);
- setIndexedLoadAction(im, MVT::v4i8, Legal);
- setIndexedStoreAction(im, MVT::v4i8, Legal);
- setIndexedLoadAction(im, MVT::v4i16, Legal);
- setIndexedStoreAction(im, MVT::v4i16, Legal);
+ for (auto VT : {MVT::v8i8, MVT::v4i8, MVT::v4i16}) {
+ setIndexedLoadAction(im, VT, Legal);
+ setIndexedStoreAction(im, VT, Legal);
+ setIndexedMaskedLoadAction(im, VT, Legal);
+ setIndexedMaskedStoreAction(im, VT, Legal);
+ }
}
// Predicate types
@@ -9013,8 +9017,9 @@ static SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) {
SDValue ZeroVec = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
DAG.getTargetConstant(0, dl, MVT::i32));
SDValue NewLoad = DAG.getMaskedLoad(
- VT, dl, N->getChain(), N->getBasePtr(), Mask, ZeroVec, N->getMemoryVT(),
- N->getMemOperand(), N->getExtensionType(), N->isExpandingLoad());
+ VT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask, ZeroVec,
+ N->getMemoryVT(), N->getMemOperand(), N->getAddressingMode(),
+ N->getExtensionType(), N->isExpandingLoad());
SDValue Combo = NewLoad;
if (!PassThru.isUndef() &&
(PassThru.getOpcode() != ISD::BITCAST ||
@@ -15192,14 +15197,19 @@ static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT,
}
static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, unsigned Align,
- bool isSEXTLoad, bool isLE, SDValue &Base,
- SDValue &Offset, bool &isInc,
- SelectionDAG &DAG) {
+ bool isSEXTLoad, bool IsMasked, bool isLE,
+ SDValue &Base, SDValue &Offset,
+ bool &isInc, SelectionDAG &DAG) {
if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
return false;
if (!isa<ConstantSDNode>(Ptr->getOperand(1)))
return false;
+ // We allow LE non-masked loads to change the type (for example use a vldrb.8
+ // as opposed to a vldrw.32). This can allow extra addressing modes or
+ // alignments for what is otherwise an equivalent instruction.
+ bool CanChangeType = isLE && !IsMasked;
+
ConstantSDNode *RHS = cast<ConstantSDNode>(Ptr->getOperand(1));
int RHSC = (int)RHS->getZExtValue();
@@ -15218,7 +15228,7 @@ static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, unsigned Align,
};
// Try to find a matching instruction based on s/zext, Alignment, Offset and
- // (in BE) type.
+ // (in BE/masked) type.
Base = Ptr->getOperand(0);
if (VT == MVT::v4i16) {
if (Align >= 2 && IsInRange(RHSC, 0x80, 2))
@@ -15226,13 +15236,15 @@ static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, unsigned Align,
} else if (VT == MVT::v4i8 || VT == MVT::v8i8) {
if (IsInRange(RHSC, 0x80, 1))
return true;
- } else if (Align >= 4 && (isLE || VT == MVT::v4i32 || VT == MVT::v4f32) &&
+ } else if (Align >= 4 &&
+ (CanChangeType || VT == MVT::v4i32 || VT == MVT::v4f32) &&
IsInRange(RHSC, 0x80, 4))
return true;
- else if (Align >= 2 && (isLE || VT == MVT::v8i16 || VT == MVT::v8f16) &&
+ else if (Align >= 2 &&
+ (CanChangeType || VT == MVT::v8i16 || VT == MVT::v8f16) &&
IsInRange(RHSC, 0x80, 2))
return true;
- else if ((isLE || VT == MVT::v16i8) && IsInRange(RHSC, 0x80, 1))
+ else if ((CanChangeType || VT == MVT::v16i8) && IsInRange(RHSC, 0x80, 1))
return true;
return false;
}
@@ -15252,6 +15264,7 @@ ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
SDValue Ptr;
unsigned Align;
bool isSEXTLoad = false;
+ bool IsMasked = false;
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
Ptr = LD->getBasePtr();
VT = LD->getMemoryVT();
@@ -15261,6 +15274,17 @@ ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
Ptr = ST->getBasePtr();
VT = ST->getMemoryVT();
Align = ST->getAlignment();
+ } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
+ Ptr = LD->getBasePtr();
+ VT = LD->getMemoryVT();
+ Align = LD->getAlignment();
+ isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
+ IsMasked = true;
+ } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
+ Ptr = ST->getBasePtr();
+ VT = ST->getMemoryVT();
+ Align = ST->getAlignment();
+ IsMasked = true;
} else
return false;
@@ -15269,8 +15293,8 @@ ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
if (VT.isVector())
isLegal = Subtarget->hasMVEIntegerOps() &&
getMVEIndexedAddressParts(Ptr.getNode(), VT, Align, isSEXTLoad,
- Subtarget->isLittle(), Base, Offset,
- isInc, DAG);
+ IsMasked, Subtarget->isLittle(), Base,
+ Offset, isInc, DAG);
else {
if (Subtarget->isThumb2())
isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
@@ -15298,6 +15322,7 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
SDValue Ptr;
unsigned Align;
bool isSEXTLoad = false, isNonExt;
+ bool IsMasked = false;
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
VT = LD->getMemoryVT();
Ptr = LD->getBasePtr();
@@ -15309,6 +15334,19 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
Ptr = ST->getBasePtr();
Align = ST->getAlignment();
isNonExt = !ST->isTruncatingStore();
+ } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
+ VT = LD->getMemoryVT();
+ Ptr = LD->getBasePtr();
+ Align = LD->getAlignment();
+ isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
+ isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;
+ IsMasked = true;
+ } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
+ VT = ST->getMemoryVT();
+ Ptr = ST->getBasePtr();
+ Align = ST->getAlignment();
+ isNonExt = !ST->isTruncatingStore();
+ IsMasked = true;
} else
return false;
@@ -15332,7 +15370,7 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
bool isLegal = false;
if (VT.isVector())
isLegal = Subtarget->hasMVEIntegerOps() &&
- getMVEIndexedAddressParts(Op, VT, Align, isSEXTLoad,
+ getMVEIndexedAddressParts(Op, VT, Align, isSEXTLoad, IsMasked,
Subtarget->isLittle(), Base, Offset,
isInc, DAG);
else {
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index 429d0a1cf1b..dd8c032dae4 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -5332,6 +5332,10 @@ class MVE_vector_offset_store_typed<ValueType Ty, Instruction Opcode,
PatFrag StoreKind, int shift>
: Pat<(StoreKind (Ty MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<shift>:$addr),
(Opcode MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<shift>:$addr)>;
+class MVE_vector_offset_maskedstore_typed<ValueType Ty, Instruction Opcode,
+ PatFrag StoreKind, int shift>
+ : Pat<(StoreKind (Ty MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<shift>:$addr, VCCR:$pred),
+ (Opcode MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<shift>:$addr, (i32 1), VCCR:$pred)>;
multiclass MVE_vector_offset_store<Instruction RegImmInst, PatFrag StoreKind,
int shift> {
@@ -5363,7 +5367,7 @@ def aligned16_post_store : PatFrag<(ops node:$val, node:$ptr, node:$offset),
def maskedload8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
- (masked_ld node:$ptr, node:$pred, node:$passthru), [{
+ (masked_ld node:$ptr, undef, node:$pred, node:$passthru), [{
auto *Ld = cast<MaskedLoadSDNode>(N);
return Ld->getMemoryVT().getScalarType() == MVT::i8;
}]>;
@@ -5382,7 +5386,7 @@ def extmaskedload8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
return ScalarVT.isInteger() && Ld->getExtensionType() == ISD::EXTLOAD;
}]>;
def alignedmaskedload16: PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
- (masked_ld node:$ptr, node:$pred, node:$passthru), [{
+ (masked_ld node:$ptr, undef, node:$pred, node:$passthru), [{
auto *Ld = cast<MaskedLoadSDNode>(N);
EVT ScalarVT = Ld->getMemoryVT().getScalarType();
return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && Ld->getAlignment() >= 2;
@@ -5402,14 +5406,14 @@ def extmaskedload16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
return ScalarVT.isInteger() && Ld->getExtensionType() == ISD::EXTLOAD;
}]>;
def alignedmaskedload32: PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
- (masked_ld node:$ptr, node:$pred, node:$passthru), [{
+ (masked_ld node:$ptr, undef, node:$pred, node:$passthru), [{
auto *Ld = cast<MaskedLoadSDNode>(N);
EVT ScalarVT = Ld->getMemoryVT().getScalarType();
return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && Ld->getAlignment() >= 4;
}]>;
def maskedstore8 : PatFrag<(ops node:$val, node:$ptr, node:$pred),
- (masked_st node:$val, node:$ptr, node:$pred), [{
+ (masked_st node:$val, node:$ptr, undef, node:$pred), [{
return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
}]>;
def truncatingmaskedstore8 : PatFrag<(ops node:$val, node:$ptr, node:$pred),
@@ -5417,7 +5421,7 @@ def truncatingmaskedstore8 : PatFrag<(ops node:$val, node:$ptr, node:$pred),
return cast<MaskedStoreSDNode>(N)->isTruncatingStore();
}]>;
def maskedstore16 : PatFrag<(ops node:$val, node:$ptr, node:$pred),
- (masked_st node:$val, node:$ptr, node:$pred), [{
+ (masked_st node:$val, node:$ptr, undef, node:$pred), [{
auto *St = cast<MaskedStoreSDNode>(N);
EVT ScalarVT = St->getMemoryVT().getScalarType();
return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2;
@@ -5428,12 +5432,41 @@ def truncatingmaskedstore16 : PatFrag<(ops node:$val, node:$ptr, node:$pred),
return cast<MaskedStoreSDNode>(N)->isTruncatingStore();
}]>;
def maskedstore32 : PatFrag<(ops node:$val, node:$ptr, node:$pred),
- (masked_st node:$val, node:$ptr, node:$pred), [{
+ (masked_st node:$val, node:$ptr, undef, node:$pred), [{
auto *St = cast<MaskedStoreSDNode>(N);
EVT ScalarVT = St->getMemoryVT().getScalarType();
return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && St->getAlignment() >= 4;
}]>;
+
+def pre_maskedstore : PatFrag<(ops node:$val, node:$base, node:$offset, node:$mask),
+ (masked_st node:$val, node:$base, node:$offset, node:$mask), [{
+ ISD::MemIndexedMode AM = cast<MaskedStoreSDNode>(N)->getAddressingMode();
+ return AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
+}]>;
+def post_maskedstore : PatFrag<(ops node:$val, node:$base, node:$offset, node:$mask),
+ (masked_st node:$val, node:$base, node:$offset, node:$mask), [{
+ ISD::MemIndexedMode AM = cast<MaskedStoreSDNode>(N)->getAddressingMode();
+ return AM == ISD::POST_INC || AM == ISD::POST_DEC;
+}]>;
+def aligned32_pre_maskedstore : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask),
+ (pre_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{
+ return cast<MaskedStoreSDNode>(N)->getAlignment() >= 4;
+}]>;
+def aligned32_post_maskedstore : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask),
+ (post_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{
+ return cast<MaskedStoreSDNode>(N)->getAlignment() >= 4;
+}]>;
+def aligned16_pre_maskedstore : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask),
+ (pre_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{
+ return cast<MaskedStoreSDNode>(N)->getAlignment() >= 2;
+}]>;
+def aligned16_post_maskedstore : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask),
+ (post_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{
+ return cast<MaskedStoreSDNode>(N)->getAlignment() >= 2;
+}]>;
+
+
let Predicates = [HasMVEInt, IsLE] in {
// Stores
defm : MVE_vector_store<MVE_VSTRBU8, byte_alignedstore, 0>;
@@ -5515,19 +5548,26 @@ let Predicates = [HasMVEInt] in {
def : MVE_vector_maskedstore_typed<v8f16, MVE_VSTRHU16, maskedstore16, 1>;
def : MVE_vector_maskedstore_typed<v4i32, MVE_VSTRWU32, maskedstore32, 2>;
def : MVE_vector_maskedstore_typed<v4f32, MVE_VSTRWU32, maskedstore32, 2>;
- // Truncating stores
- def : Pat<(truncatingmaskedstore8 (v8i16 MQPR:$val), t2addrmode_imm7<0>:$addr, VCCR:$pred),
- (MVE_VSTRB16 MQPR:$val, t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>;
- def : Pat<(truncatingmaskedstore8 (v4i32 MQPR:$val), t2addrmode_imm7<0>:$addr, VCCR:$pred),
- (MVE_VSTRB32 MQPR:$val, t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>;
- def : Pat<(truncatingmaskedstore16 (v4i32 MQPR:$val), t2addrmode_imm7<1>:$addr, VCCR:$pred),
- (MVE_VSTRH32 MQPR:$val, t2addrmode_imm7<1>:$addr, (i32 1), VCCR:$pred)>;
+
+ // Pre/Post inc masked stores
+ def : MVE_vector_offset_maskedstore_typed<v16i8, MVE_VSTRBU8_pre, pre_maskedstore, 0>;
+ def : MVE_vector_offset_maskedstore_typed<v16i8, MVE_VSTRBU8_post, post_maskedstore, 0>;
+ def : MVE_vector_offset_maskedstore_typed<v8i16, MVE_VSTRHU16_pre, aligned16_pre_maskedstore, 1>;
+ def : MVE_vector_offset_maskedstore_typed<v8i16, MVE_VSTRHU16_post, aligned16_post_maskedstore, 1>;
+ def : MVE_vector_offset_maskedstore_typed<v8f16, MVE_VSTRHU16_pre, aligned16_pre_maskedstore, 1>;
+ def : MVE_vector_offset_maskedstore_typed<v8f16, MVE_VSTRHU16_post, aligned16_post_maskedstore, 1>;
+ def : MVE_vector_offset_maskedstore_typed<v4i32, MVE_VSTRWU32_pre, aligned32_pre_maskedstore, 2>;
+ def : MVE_vector_offset_maskedstore_typed<v4i32, MVE_VSTRWU32_post, aligned32_post_maskedstore, 2>;
+ def : MVE_vector_offset_maskedstore_typed<v4f32, MVE_VSTRWU32_pre, aligned32_pre_maskedstore, 2>;
+ def : MVE_vector_offset_maskedstore_typed<v4f32, MVE_VSTRWU32_post, aligned32_post_maskedstore, 2>;
+
// Aligned masked loads
def : MVE_vector_maskedload_typed<v16i8, MVE_VLDRBU8, maskedload8, 0>;
def : MVE_vector_maskedload_typed<v8i16, MVE_VLDRHU16, alignedmaskedload16, 1>;
def : MVE_vector_maskedload_typed<v8f16, MVE_VLDRHU16, alignedmaskedload16, 1>;
def : MVE_vector_maskedload_typed<v4i32, MVE_VLDRWU32, alignedmaskedload32, 2>;
def : MVE_vector_maskedload_typed<v4f32, MVE_VLDRWU32, alignedmaskedload32, 2>;
+
// Extending masked loads.
def : Pat<(v8i16 (sextmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred,
(v8i16 NEONimmAllZerosV))),
@@ -5569,6 +5609,37 @@ let MinAlignment = 2 in {
(pre_truncstvi16 node:$val, node:$base, node:$offset)>;
}
+def pre_truncmaskedst : PatFrag<(ops node:$val, node:$base, node:$offset, node:$pred),
+ (masked_st node:$val, node:$base, node:$offset, node:$pred), [{
+ ISD::MemIndexedMode AM = cast<MaskedStoreSDNode>(N)->getAddressingMode();
+ return AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
+}]>;
+def pre_truncmaskedstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset, node:$pred),
+ (pre_truncmaskedst node:$val, node:$base, node:$offset, node:$pred), [{
+ return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
+}]>;
+def pre_truncmaskedstvi16_align2 : PatFrag<(ops node:$val, node:$base, node:$offset, node:$pred),
+ (pre_truncmaskedst node:$val, node:$base, node:$offset, node:$pred), [{
+ auto *St = cast<MaskedStoreSDNode>(N);
+ EVT ScalarVT = St->getMemoryVT().getScalarType();
+ return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2;
+}]>;
+def post_truncmaskedst : PatFrag<(ops node:$val, node:$base, node:$offset, node:$postd),
+ (masked_st node:$val, node:$base, node:$offset, node:$postd), [{
+ ISD::MemIndexedMode AM = cast<MaskedStoreSDNode>(N)->getAddressingMode();
+ return AM == ISD::POST_INC || AM == ISD::POST_DEC;
+}]>;
+def post_truncmaskedstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset, node:$postd),
+ (post_truncmaskedst node:$val, node:$base, node:$offset, node:$postd), [{
+ return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
+}]>;
+def post_truncmaskedstvi16_align2 : PatFrag<(ops node:$val, node:$base, node:$offset, node:$postd),
+ (post_truncmaskedst node:$val, node:$base, node:$offset, node:$postd), [{
+ auto *St = cast<MaskedStoreSDNode>(N);
+ EVT ScalarVT = St->getMemoryVT().getScalarType();
+ return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2;
+}]>;
+
let Predicates = [HasMVEInt] in {
def : Pat<(truncstorevi8 (v8i16 MQPR:$val), taddrmode_imm7<0>:$addr),
(MVE_VSTRB16 MQPR:$val, taddrmode_imm7<0>:$addr)>;
@@ -5590,6 +5661,27 @@ let Predicates = [HasMVEInt] in {
(MVE_VSTRB32_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>;
def : Pat<(pre_truncstvi16_align2 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<1>:$addr),
(MVE_VSTRH32_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<1>:$addr)>;
+
+ def : Pat<(truncatingmaskedstore8 (v8i16 MQPR:$val), taddrmode_imm7<0>:$addr, VCCR:$pred),
+ (MVE_VSTRB16 MQPR:$val, taddrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>;
+ def : Pat<(truncatingmaskedstore8 (v4i32 MQPR:$val), taddrmode_imm7<0>:$addr, VCCR:$pred),
+ (MVE_VSTRB32 MQPR:$val, taddrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>;
+ def : Pat<(truncatingmaskedstore16 (v4i32 MQPR:$val), taddrmode_imm7<1>:$addr, VCCR:$pred),
+ (MVE_VSTRH32 MQPR:$val, taddrmode_imm7<1>:$addr, (i32 1), VCCR:$pred)>;
+
+ def : Pat<(post_truncmaskedstvi8 (v8i16 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr, VCCR:$pred),
+ (MVE_VSTRB16_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr, (i32 1), VCCR:$pred)>;
+ def : Pat<(post_truncmaskedstvi8 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr, VCCR:$pred),
+ (MVE_VSTRB32_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr, (i32 1), VCCR:$pred)>;
+ def : Pat<(post_truncmaskedstvi16_align2 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<1>:$addr, VCCR:$pred),
+ (MVE_VSTRH32_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<1>:$addr, (i32 1), VCCR:$pred)>;
+
+ def : Pat<(pre_truncmaskedstvi8 (v8i16 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr, VCCR:$pred),
+ (MVE_VSTRB16_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr, (i32 1), VCCR:$pred)>;
+ def : Pat<(pre_truncmaskedstvi8 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr, VCCR:$pred),
+ (MVE_VSTRB32_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr, (i32 1), VCCR:$pred)>;
+ def : Pat<(pre_truncmaskedstvi16_align2 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<1>:$addr, VCCR:$pred),
+ (MVE_VSTRH32_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<1>:$addr, (i32 1), VCCR:$pred)>;
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c3861adf091..32072df268d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -24280,9 +24280,11 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
MVT MaskVT = MVT::getVectorVT(MVT::i1, MemVT.getVectorNumElements());
SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
+ SDValue Offset = DAG.getUNDEF(VMask.getValueType());
- return DAG.getMaskedStore(Chain, dl, DataToTruncate, Addr, VMask, MemVT,
- MemIntr->getMemOperand(), true /* truncating */);
+ return DAG.getMaskedStore(Chain, dl, DataToTruncate, Addr, Offset, VMask,
+ MemVT, MemIntr->getMemOperand(), ISD::UNINDEXED,
+ true /* truncating */);
}
case X86ISD::VTRUNCUS:
case X86ISD::VTRUNCS: {
@@ -27593,12 +27595,11 @@ static SDValue LowerMLOAD(SDValue Op, const X86Subtarget &Subtarget,
if (PassThru.isUndef() || ISD::isBuildVectorAllZeros(PassThru.getNode()))
return Op;
- SDValue NewLoad = DAG.getMaskedLoad(VT, dl, N->getChain(),
- N->getBasePtr(), Mask,
- getZeroVector(VT, Subtarget, DAG, dl),
- N->getMemoryVT(), N->getMemOperand(),
- N->getExtensionType(),
- N->isExpandingLoad());
+ SDValue NewLoad = DAG.getMaskedLoad(
+ VT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask,
+ getZeroVector(VT, Subtarget, DAG, dl), N->getMemoryVT(),
+ N->getMemOperand(), N->getAddressingMode(), N->getExtensionType(),
+ N->isExpandingLoad());
// Emit a blend.
SDValue Select = DAG.getNode(ISD::VSELECT, dl, MaskVT, Mask, NewLoad,
PassThru);
@@ -27632,11 +27633,10 @@ static SDValue LowerMLOAD(SDValue Op, const X86Subtarget &Subtarget,
MVT WideMaskVT = MVT::getVectorVT(MVT::i1, NumEltsInWideVec);
Mask = ExtendToType(Mask, WideMaskVT, DAG, true);
- SDValue NewLoad = DAG.getMaskedLoad(WideDataVT, dl, N->getChain(),
- N->getBasePtr(), Mask, PassThru,
- N->getMemoryVT(), N->getMemOperand(),
- N->getExtensionType(),
- N->isExpandingLoad());
+ SDValue NewLoad = DAG.getMaskedLoad(
+ WideDataVT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask,
+ PassThru, N->getMemoryVT(), N->getMemOperand(), N->getAddressingMode(),
+ N->getExtensionType(), N->isExpandingLoad());
SDValue Exract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
NewLoad.getValue(0),
@@ -27682,7 +27682,8 @@ static SDValue LowerMSTORE(SDValue Op, const X86Subtarget &Subtarget,
DataToStore = ExtendToType(DataToStore, WideDataVT, DAG);
Mask = ExtendToType(Mask, WideMaskVT, DAG, true);
return DAG.getMaskedStore(N->getChain(), dl, DataToStore, N->getBasePtr(),
- Mask, N->getMemoryVT(), N->getMemOperand(),
+ N->getOffset(), Mask, N->getMemoryVT(),
+ N->getMemOperand(), N->getAddressingMode(),
N->isTruncatingStore(), N->isCompressingStore());
}
@@ -40453,6 +40454,7 @@ static bool getParamsForOneTrueMaskedElt(MaskedLoadStoreSDNode *MaskedOp,
static SDValue
reduceMaskedLoadToScalarLoad(MaskedLoadSDNode *ML, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
+ assert(ML->isUnindexed() && "Unexpected indexed masked load!");
// TODO: This is not x86-specific, so it could be lifted to DAGCombiner.
// However, some target hooks may need to be added to know when the transform
// is profitable. Endianness would also have to be considered.
@@ -40480,6 +40482,7 @@ reduceMaskedLoadToScalarLoad(MaskedLoadSDNode *ML, SelectionDAG &DAG,
static SDValue
combineMaskedLoadConstantMask(MaskedLoadSDNode *ML, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
+ assert(ML->isUnindexed() && "Unexpected indexed masked load!");
if (!ISD::isBuildVectorOfConstantSDNodes(ML->getMask().getNode()))
return SDValue();
@@ -40515,10 +40518,10 @@ combineMaskedLoadConstantMask(MaskedLoadSDNode *ML, SelectionDAG &DAG,
// The new masked load has an undef pass-through operand. The select uses the
// original pass-through operand.
- SDValue NewML = DAG.getMaskedLoad(VT, DL, ML->getChain(), ML->getBasePtr(),
- ML->getMask(), DAG.getUNDEF(VT),
- ML->getMemoryVT(), ML->getMemOperand(),
- ML->getExtensionType());
+ SDValue NewML = DAG.getMaskedLoad(
+ VT, DL, ML->getChain(), ML->getBasePtr(), ML->getOffset(), ML->getMask(),
+ DAG.getUNDEF(VT), ML->getMemoryVT(), ML->getMemOperand(),
+ ML->getAddressingMode(), ML->getExtensionType());
SDValue Blend = DAG.getSelect(DL, VT, ML->getMask(), NewML,
ML->getPassThru());
@@ -40604,8 +40607,9 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG,
TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
Mst->getMemoryVT())) {
return DAG.getMaskedStore(Mst->getChain(), SDLoc(N), Value.getOperand(0),
- Mst->getBasePtr(), Mask,
- Mst->getMemoryVT(), Mst->getMemOperand(), true);
+ Mst->getBasePtr(), Mst->getOffset(), Mask,
+ Mst->getMemoryVT(), Mst->getMemOperand(),
+ Mst->getAddressingMode(), true);
}
return SDValue();
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index de6f8a81dff..1a4f7e1e6bb 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -706,6 +706,10 @@ def X86GF2P8affineinvqb : SDNode<"X86ISD::GF2P8AFFINEINVQB", SDTBlend>;
def X86GF2P8affineqb : SDNode<"X86ISD::GF2P8AFFINEQB", SDTBlend>;
def X86GF2P8mulb : SDNode<"X86ISD::GF2P8MULB", SDTIntBinOp>;
+def SDTX86MaskedStore: SDTypeProfile<0, 3, [ // masked store
+ SDTCisVec<0>, SDTCisPtrTy<1>, SDTCisVec<2>, SDTCisSameNumEltsAs<0, 2>
+]>;
+
//===----------------------------------------------------------------------===//
// SSE Complex Patterns
//===----------------------------------------------------------------------===//
@@ -1040,9 +1044,10 @@ def vinsert256_insert : PatFrag<(ops node:$bigvec, node:$smallvec,
INSERT_get_vinsert256_imm>;
def masked_load : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (masked_ld node:$src1, node:$src2, node:$src3), [{
+ (masked_ld node:$src1, undef, node:$src2, node:$src3), [{
return !cast<MaskedLoadSDNode>(N)->isExpandingLoad() &&
- cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD;
+ cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD &&
+ cast<MaskedLoadSDNode>(N)->isUnindexed();
}]>;
def masked_load_aligned : PatFrag<(ops node:$src1, node:$src2, node:$src3),
@@ -1055,17 +1060,19 @@ def masked_load_aligned : PatFrag<(ops node:$src1, node:$src2, node:$src3),
}]>;
def X86mExpandingLoad : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (masked_ld node:$src1, node:$src2, node:$src3), [{
- return cast<MaskedLoadSDNode>(N)->isExpandingLoad();
+ (masked_ld node:$src1, undef, node:$src2, node:$src3), [{
+ return cast<MaskedLoadSDNode>(N)->isExpandingLoad() &&
+ cast<MaskedLoadSDNode>(N)->isUnindexed();
}]>;
// Masked store fragments.
// X86mstore can't be implemented in core DAG files because some targets
// do not support vector types (llvm-tblgen will fail).
def masked_store : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (masked_st node:$src1, node:$src2, node:$src3), [{
- return (!cast<MaskedStoreSDNode>(N)->isTruncatingStore()) &&
- (!cast<MaskedStoreSDNode>(N)->isCompressingStore());
+ (masked_st node:$src1, node:$src2, undef, node:$src3), [{
+ return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
+ !cast<MaskedStoreSDNode>(N)->isCompressingStore() &&
+ cast<MaskedStoreSDNode>(N)->isUnindexed();
}]>;
def masked_store_aligned : PatFrag<(ops node:$src1, node:$src2, node:$src3),
@@ -1078,16 +1085,18 @@ def masked_store_aligned : PatFrag<(ops node:$src1, node:$src2, node:$src3),
}]>;
def X86mCompressingStore : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (masked_st node:$src1, node:$src2, node:$src3), [{
- return cast<MaskedStoreSDNode>(N)->isCompressingStore();
+ (masked_st node:$src1, node:$src2, undef, node:$src3), [{
+ return cast<MaskedStoreSDNode>(N)->isCompressingStore() &&
+ cast<MaskedStoreSDNode>(N)->isUnindexed();
}]>;
// masked truncstore fragments
// X86mtruncstore can't be implemented in core DAG files because some targets
// doesn't support vector type ( llvm-tblgen will fail)
def X86mtruncstore : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (masked_st node:$src1, node:$src2, node:$src3), [{
- return cast<MaskedStoreSDNode>(N)->isTruncatingStore();
+ (masked_st node:$src1, node:$src2, undef, node:$src3), [{
+ return cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
+ cast<MaskedStoreSDNode>(N)->isUnindexed();
}]>;
def masked_truncstorevi8 :
PatFrag<(ops node:$src1, node:$src2, node:$src3),
@@ -1111,10 +1120,10 @@ def X86TruncSStore : SDNode<"X86ISD::VTRUNCSTORES", SDTStore,
def X86TruncUSStore : SDNode<"X86ISD::VTRUNCSTOREUS", SDTStore,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
-def X86MTruncSStore : SDNode<"X86ISD::VMTRUNCSTORES", SDTMaskedStore,
+def X86MTruncSStore : SDNode<"X86ISD::VMTRUNCSTORES", SDTX86MaskedStore,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
-def X86MTruncUSStore : SDNode<"X86ISD::VMTRUNCSTOREUS", SDTMaskedStore,
+def X86MTruncUSStore : SDNode<"X86ISD::VMTRUNCSTOREUS", SDTX86MaskedStore,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def truncstore_s_vi8 : PatFrag<(ops node:$val, node:$ptr),
OpenPOWER on IntegriCloud