summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp73
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td96
-rw-r--r--llvm/lib/Target/X86/X86InstrFragmentsSIMD.td26
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td88
4 files changed, 164 insertions, 119 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index e0bcf702489..8be441fe28e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -794,6 +794,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
+
+ setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
+ setOperationAction(ISD::STORE, MVT::v2f32, Custom);
}
if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
@@ -971,11 +974,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// We want to legalize this to an f64 load rather than an i64 load on
// 64-bit targets and two 32-bit loads on a 32-bit target. Similar for
// store.
- setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
setOperationAction(ISD::LOAD, MVT::v4i16, Custom);
setOperationAction(ISD::LOAD, MVT::v8i8, Custom);
- setOperationAction(ISD::STORE, MVT::v2f32, Custom);
setOperationAction(ISD::STORE, MVT::v2i32, Custom);
setOperationAction(ISD::STORE, MVT::v4i16, Custom);
setOperationAction(ISD::STORE, MVT::v8i8, Custom);
@@ -21267,21 +21268,29 @@ static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget,
TargetLowering::TypeWidenVector)
return SDValue();
- // Widen the vector, cast to a v2x64 type, extract the single 64-bit element
- // and store it.
MVT WideVT = MVT::getVectorVT(StoreVT.getVectorElementType(),
StoreVT.getVectorNumElements() * 2);
StoredVal = DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, StoredVal,
DAG.getUNDEF(StoreVT));
- MVT StVT = Subtarget.is64Bit() && StoreVT.isInteger() ? MVT::i64 : MVT::f64;
- MVT CastVT = MVT::getVectorVT(StVT, 2);
- StoredVal = DAG.getBitcast(CastVT, StoredVal);
- StoredVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, StVT, StoredVal,
- DAG.getIntPtrConstant(0, dl));
- return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
- St->getPointerInfo(), St->getAlignment(),
- St->getMemOperand()->getFlags());
+ if (Subtarget.hasSSE2()) {
+ // Widen the vector, cast to a v2x64 type, extract the single 64-bit element
+ // and store it.
+ MVT StVT = Subtarget.is64Bit() && StoreVT.isInteger() ? MVT::i64 : MVT::f64;
+ MVT CastVT = MVT::getVectorVT(StVT, 2);
+ StoredVal = DAG.getBitcast(CastVT, StoredVal);
+ StoredVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, StVT, StoredVal,
+ DAG.getIntPtrConstant(0, dl));
+
+ return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
+ St->getPointerInfo(), St->getAlignment(),
+ St->getMemOperand()->getFlags());
+ }
+ assert(Subtarget.hasSSE1() && "Expected SSE");
+ SDVTList Tys = DAG.getVTList(MVT::Other);
+ SDValue Ops[] = {St->getChain(), StoredVal, St->getBasePtr()};
+ return DAG.getMemIntrinsicNode(X86ISD::VEXTRACT_STORE, dl, Tys, Ops, MVT::i64,
+ St->getMemOperand());
}
// Lower vector extended loads using a shuffle. If SSSE3 is not available we
@@ -28155,19 +28164,28 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
if (!ISD::isNON_EXTLoad(N))
return;
auto *Ld = cast<LoadSDNode>(N);
- MVT LdVT = Subtarget.is64Bit() && VT.isInteger() ? MVT::i64 : MVT::f64;
- SDValue Res = DAG.getLoad(LdVT, dl, Ld->getChain(), Ld->getBasePtr(),
- Ld->getPointerInfo(),
- Ld->getAlignment(),
- Ld->getMemOperand()->getFlags());
- SDValue Chain = Res.getValue(1);
- MVT WideVT = MVT::getVectorVT(LdVT, 2);
- Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, WideVT, Res);
- MVT CastVT = MVT::getVectorVT(VT.getVectorElementType(),
- VT.getVectorNumElements() * 2);
- Res = DAG.getBitcast(CastVT, Res);
+ if (Subtarget.hasSSE2()) {
+ MVT LdVT = Subtarget.is64Bit() && VT.isInteger() ? MVT::i64 : MVT::f64;
+ SDValue Res = DAG.getLoad(LdVT, dl, Ld->getChain(), Ld->getBasePtr(),
+ Ld->getPointerInfo(), Ld->getAlignment(),
+ Ld->getMemOperand()->getFlags());
+ SDValue Chain = Res.getValue(1);
+ MVT WideVT = MVT::getVectorVT(LdVT, 2);
+ Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, WideVT, Res);
+ MVT CastVT = MVT::getVectorVT(VT.getVectorElementType(),
+ VT.getVectorNumElements() * 2);
+ Res = DAG.getBitcast(CastVT, Res);
+ Results.push_back(Res);
+ Results.push_back(Chain);
+ return;
+ }
+ assert(Subtarget.hasSSE1() && "Expected SSE");
+ SDVTList Tys = DAG.getVTList(MVT::v4f32, MVT::Other);
+ SDValue Ops[] = {Ld->getChain(), Ld->getBasePtr()};
+ SDValue Res = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops,
+ MVT::i64, Ld->getMemOperand());
Results.push_back(Res);
- Results.push_back(Chain);
+ Results.push_back(Res.getValue(1));
return;
}
}
@@ -32016,8 +32034,11 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
// directly if we don't shuffle the lower element and we shuffle the upper
// (zero) elements within themselves.
if (V1.getOpcode() == X86ISD::VZEXT_LOAD &&
- (V1.getScalarValueSizeInBits() % MaskEltSizeInBits) == 0) {
- unsigned Scale = V1.getScalarValueSizeInBits() / MaskEltSizeInBits;
+ (cast<MemIntrinsicSDNode>(V1)->getMemoryVT().getScalarSizeInBits() %
+ MaskEltSizeInBits) == 0) {
+ unsigned Scale =
+ cast<MemIntrinsicSDNode>(V1)->getMemoryVT().getScalarSizeInBits() /
+ MaskEltSizeInBits;
ArrayRef<int> HiMask(Mask.data() + Scale, NumMaskElts - Scale);
if (isSequentialOrUndefInRange(Mask, 0, Scale, 0) &&
isUndefOrZeroOrInRange(HiMask, Scale, NumMaskElts)) {
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 56aa2ecffe0..54eddeacaa1 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -1352,15 +1352,15 @@ multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
let Predicates = [HasAVX512] in {
// 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
- def : Pat<(v8i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))),
+ def : Pat<(v8i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
(VPBROADCASTQZm addr:$src)>;
}
let Predicates = [HasVLX] in {
// 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
- def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))),
+ def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
(VPBROADCASTQZ128m addr:$src)>;
- def : Pat<(v4i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))),
+ def : Pat<(v4i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
(VPBROADCASTQZ256m addr:$src)>;
}
let Predicates = [HasVLX, HasBWI] in {
@@ -3838,7 +3838,7 @@ def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
(VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>;
let Predicates = [HasAVX512] in {
- def : Pat<(X86vextractstore (v2i64 VR128X:$src), addr:$dst),
+ def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst),
(VMOVPQI2QIZmr addr:$dst, VR128X:$src)>;
}
@@ -3873,7 +3873,7 @@ def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
// AVX-512 MOVSS, MOVSD
//===----------------------------------------------------------------------===//
-multiclass avx512_move_scalar<string asm, SDNode OpNode,
+multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag,
X86VectorVTInfo _> {
let Predicates = [HasAVX512, OptForSize] in
def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
@@ -3901,7 +3901,7 @@ multiclass avx512_move_scalar<string asm, SDNode OpNode,
let canFoldAsLoad = 1, isReMaterializable = 1 in {
def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
- [(set _.RC:$dst, (_.VT (X86vzload addr:$src)))],
+ [(set _.RC:$dst, (_.VT (vzload_frag addr:$src)))],
_.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
// _alt version uses FR32/FR64 register class.
let isCodeGenOnly = 1 in
@@ -3935,10 +3935,10 @@ multiclass avx512_move_scalar<string asm, SDNode OpNode,
NotMemoryFoldable;
}
-defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, f32x_info>,
+defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>,
VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
-defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, f64x_info>,
+defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>,
VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
@@ -4319,16 +4319,16 @@ let Predicates = [HasAVX512] in {
// Represent the same patterns above but in the form they appear for
// 256-bit types
- def : Pat<(v8f32 (X86vzload addr:$src)),
+ def : Pat<(v8f32 (X86vzload32 addr:$src)),
(SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
- def : Pat<(v4f64 (X86vzload addr:$src)),
+ def : Pat<(v4f64 (X86vzload64 addr:$src)),
(SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
// Represent the same patterns above but in the form they appear for
// 512-bit types
- def : Pat<(v16f32 (X86vzload addr:$src)),
+ def : Pat<(v16f32 (X86vzload32 addr:$src)),
(SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
- def : Pat<(v8f64 (X86vzload addr:$src)),
+ def : Pat<(v8f64 (X86vzload64 addr:$src)),
(SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
}
@@ -4351,21 +4351,21 @@ let Predicates = [HasAVX512] in {
// AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
(VMOVDI2PDIZrm addr:$src)>;
- def : Pat<(v4i32 (X86vzload addr:$src)),
+ def : Pat<(v4i32 (X86vzload32 addr:$src)),
(VMOVDI2PDIZrm addr:$src)>;
- def : Pat<(v8i32 (X86vzload addr:$src)),
+ def : Pat<(v8i32 (X86vzload32 addr:$src)),
(SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
(VMOVZPQILo2PQIZrr VR128X:$src)>;
- def : Pat<(v2i64 (X86vzload addr:$src)),
+ def : Pat<(v2i64 (X86vzload64 addr:$src)),
(VMOVQI2PQIZrm addr:$src)>;
- def : Pat<(v4i64 (X86vzload addr:$src)),
+ def : Pat<(v4i64 (X86vzload64 addr:$src)),
(SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
// Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
- def : Pat<(v16i32 (X86vzload addr:$src)),
+ def : Pat<(v16i32 (X86vzload32 addr:$src)),
(SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
- def : Pat<(v8i64 (X86vzload addr:$src)),
+ def : Pat<(v8i64 (X86vzload64 addr:$src)),
(SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
@@ -6353,11 +6353,11 @@ let Predicates = [HasAVX512] in {
def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
(bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
(VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
- def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload addr:$src2))),
+ def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))),
(VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
// VMOVLPD patterns
- def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload addr:$src2))),
+ def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))),
(VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
}
@@ -8292,47 +8292,47 @@ let Predicates = [HasVLX] in {
}
let Predicates = [HasDQI, HasVLX] in {
- def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload addr:$src))))),
+ def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
(VCVTPS2QQZ128rm addr:$src)>;
def : Pat<(v2i64 (vselect VK2WM:$mask,
- (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload addr:$src)))),
+ (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
VR128X:$src0)),
(VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
def : Pat<(v2i64 (vselect VK2WM:$mask,
- (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload addr:$src)))),
+ (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
v2i64x_info.ImmAllZerosV)),
(VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
- def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload addr:$src))))),
+ def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
(VCVTPS2UQQZ128rm addr:$src)>;
def : Pat<(v2i64 (vselect VK2WM:$mask,
- (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload addr:$src)))),
+ (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
VR128X:$src0)),
(VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
def : Pat<(v2i64 (vselect VK2WM:$mask,
- (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload addr:$src)))),
+ (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
v2i64x_info.ImmAllZerosV)),
(VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
- def : Pat<(v2i64 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload addr:$src))))),
+ def : Pat<(v2i64 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
(VCVTTPS2QQZ128rm addr:$src)>;
def : Pat<(v2i64 (vselect VK2WM:$mask,
- (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload addr:$src)))),
+ (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
VR128X:$src0)),
(VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
def : Pat<(v2i64 (vselect VK2WM:$mask,
- (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload addr:$src)))),
+ (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
v2i64x_info.ImmAllZerosV)),
(VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
- def : Pat<(v2i64 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload addr:$src))))),
+ def : Pat<(v2i64 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
(VCVTTPS2UQQZ128rm addr:$src)>;
def : Pat<(v2i64 (vselect VK2WM:$mask,
- (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload addr:$src)))),
+ (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
VR128X:$src0)),
(VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
def : Pat<(v2i64 (vselect VK2WM:$mask,
- (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload addr:$src)))),
+ (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
v2i64x_info.ImmAllZerosV)),
(VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
}
@@ -8375,25 +8375,25 @@ def : Pat<(v2f64 (X86VUintToFP (v4i32 VR128X:$src1))),
}
let Predicates = [HasVLX] in {
- def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
+ def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
(VCVTDQ2PDZ128rm addr:$src)>;
def : Pat<(v2f64 (vselect VK2WM:$mask,
- (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src)))),
+ (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
VR128X:$src0)),
(VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
def : Pat<(v2f64 (vselect VK2WM:$mask,
- (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src)))),
+ (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
v2f64x_info.ImmAllZerosV)),
(VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
- def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
+ def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
(VCVTUDQ2PDZ128rm addr:$src)>;
def : Pat<(v2f64 (vselect VK2WM:$mask,
- (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src)))),
+ (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
VR128X:$src0)),
(VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
def : Pat<(v2f64 (vselect VK2WM:$mask,
- (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src)))),
+ (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
v2f64x_info.ImmAllZerosV)),
(VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
}
@@ -8562,7 +8562,7 @@ let Predicates = [HasVLX] in {
EVEX_CD8<32, CD8VH>;
// Pattern match vcvtph2ps of a scalar i64 load.
- def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzload_v2i64 addr:$src)))),
+ def : Pat<(v4f32 (X86cvtph2ps (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
(VCVTPH2PSZ128rm addr:$src)>;
def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert
(v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
@@ -9626,13 +9626,13 @@ multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
(!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
(!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
- def : Pat<(v8i16 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
+ def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
(!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
}
let Predicates = [HasVLX] in {
def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
(!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
- def : Pat<(v4i32 (InVecOp (v16i8 (vzload_v4i32 addr:$src)))),
+ def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
(!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
@@ -9642,35 +9642,35 @@ multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
(!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
(!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
- def : Pat<(v4i32 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
+ def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
(!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
(!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
- def : Pat<(v2i64 (InVecOp (v8i16 (vzload_v4i32 addr:$src)))),
+ def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))),
(!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
(!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
- def : Pat<(v2i64 (InVecOp (v4i32 (vzload_v2i64 addr:$src)))),
+ def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
(!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
}
let Predicates = [HasVLX] in {
def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
- def : Pat<(v8i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
+ def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
(!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
(!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
- def : Pat<(v4i64 (InVecOp (v16i8 (vzload_v4i32 addr:$src)))),
+ def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
(!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
- def : Pat<(v4i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
+ def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
(!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
}
// 512-bit patterns
@@ -10873,7 +10873,7 @@ def : Pat<(v2f64 (X86VBroadcast f64:$src)),
(VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
def : Pat<(v2f64 (X86VBroadcast (v2f64 (nonvolatile_load addr:$src)))),
(VMOVDDUPZ128rm addr:$src)>;
-def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload addr:$src)))),
+def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload64 addr:$src)))),
(VMOVDDUPZ128rm addr:$src)>;
def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 4077bdc92f8..096cc27861c 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -99,10 +99,10 @@ def X86insertps : SDNode<"X86ISD::INSERTPS",
def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL",
SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
-def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
- [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
-def X86vextractstore : SDNode<"X86ISD::VEXTRACT_STORE", SDTStore,
- [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def X86vzld : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def X86vextractst : SDNode<"X86ISD::VEXTRACT_STORE", SDTStore,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def SDTVtrunc : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisInt<0>, SDTCisInt<1>,
@@ -939,10 +939,20 @@ def bc_v8i64 : PatFrag<(ops node:$in), (v8i64 (bitconvert node:$in))>;
def bc_v8f64 : PatFrag<(ops node:$in), (v8f64 (bitconvert node:$in))>;
def bc_v16f32 : PatFrag<(ops node:$in), (v16f32 (bitconvert node:$in))>;
-def vzload_v4i32 : PatFrag<(ops node:$src),
- (bitconvert (v4i32 (X86vzload node:$src)))>;
-def vzload_v2i64 : PatFrag<(ops node:$src),
- (bitconvert (v2i64 (X86vzload node:$src)))>;
+def X86vzload32 : PatFrag<(ops node:$src),
+ (X86vzld node:$src), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getStoreSize() == 4;
+}]>;
+
+def X86vzload64 : PatFrag<(ops node:$src),
+ (X86vzld node:$src), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getStoreSize() == 8;
+}]>;
+
+def X86vextractstore64 : PatFrag<(ops node:$val, node:$ptr),
+ (X86vextractst node:$val, node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getStoreSize() == 8;
+}]>;
def fp32imm0 : PatLeaf<(f32 fpimm), [{
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index d25d216db19..7d0a5b87baf 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -226,14 +226,15 @@ multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt,
// Loading from memory automatically zeroing upper bits.
multiclass sse12_move_rm<RegisterClass RC, ValueType vt, X86MemOperand x86memop,
- PatFrag mem_pat, string OpcodeStr, Domain d> {
+ PatFrag mem_pat, PatFrag vzloadfrag, string OpcodeStr,
+ Domain d> {
def V#NAME#rm : SI<0x10, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (vt (X86vzload addr:$src)))], d>,
+ [(set VR128:$dst, (vt (vzloadfrag addr:$src)))], d>,
VEX, VEX_LIG, Sched<[WriteFLoad]>, VEX_WIG;
def NAME#rm : SI<0x10, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (vt (X86vzload addr:$src)))], d>,
+ [(set VR128:$dst, (vt (vzloadfrag addr:$src)))], d>,
Sched<[WriteFLoad]>;
// _alt version uses FR32/FR64 register class.
@@ -255,9 +256,9 @@ defm MOVSD : sse12_move<FR64, X86Movsd, v2f64, f64mem, "movsd",
SSEPackedDouble, "MOVSD", UseSSE2>, XD;
let canFoldAsLoad = 1, isReMaterializable = 1 in {
- defm MOVSS : sse12_move_rm<FR32, v4f32, f32mem, loadf32, "movss",
+ defm MOVSS : sse12_move_rm<FR32, v4f32, f32mem, loadf32, X86vzload32, "movss",
SSEPackedSingle>, XS;
- defm MOVSD : sse12_move_rm<FR64, v2f64, f64mem, loadf64, "movsd",
+ defm MOVSD : sse12_move_rm<FR64, v2f64, f64mem, loadf64, X86vzload64, "movsd",
SSEPackedDouble>, XD;
}
@@ -270,9 +271,9 @@ let Predicates = [UseAVX] in {
// Represent the same patterns above but in the form they appear for
// 256-bit types
- def : Pat<(v8f32 (X86vzload addr:$src)),
+ def : Pat<(v8f32 (X86vzload32 addr:$src)),
(SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>;
- def : Pat<(v4f64 (X86vzload addr:$src)),
+ def : Pat<(v4f64 (X86vzload64 addr:$src)),
(SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_xmm)>;
}
@@ -663,6 +664,13 @@ let Predicates = [UseSSE1] in {
def : Pat<(X86Shufp (v4f32 (nonvolatile_load addr:$src2)), VR128:$src1,
(i8 -28)),
(MOVLPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(X86Shufp (v4f32 (X86vzload64 addr:$src2)), VR128:$src1, (i8 -28)),
+ (MOVLPSrm VR128:$src1, addr:$src2)>;
+
+ def : Pat<(v4f32 (X86vzload64 addr:$src)),
+ (MOVLPSrm (v4f32 (V_SET0)), addr:$src)>;
+ def : Pat<(X86vextractstore64 (v4f32 VR128:$src), addr:$dst),
+ (MOVLPSmr addr:$dst, VR128:$src)>;
}
//===----------------------------------------------------------------------===//
@@ -702,7 +710,7 @@ let Predicates = [UseAVX] in {
def : Pat<(v2f64 (X86Unpckl VR128:$src1,
(bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
(VMOVHPDrm VR128:$src1, addr:$src2)>;
- def : Pat<(v2f64 (X86Unpckl VR128:$src1, (X86vzload addr:$src2))),
+ def : Pat<(v2f64 (X86Unpckl VR128:$src1, (X86vzload64 addr:$src2))),
(VMOVHPDrm VR128:$src1, addr:$src2)>;
def : Pat<(store (f64 (extractelt
@@ -711,7 +719,7 @@ let Predicates = [UseAVX] in {
(VMOVHPDmr addr:$dst, VR128:$src)>;
// MOVLPD patterns
- def : Pat<(v2f64 (X86Movsd VR128:$src1, (X86vzload addr:$src2))),
+ def : Pat<(v2f64 (X86Movsd VR128:$src1, (X86vzload64 addr:$src2))),
(VMOVLPDrm VR128:$src1, addr:$src2)>;
}
@@ -721,6 +729,12 @@ let Predicates = [UseSSE1] in {
// No need for aligned load, we're only loading 64-bits.
def : Pat<(X86Movlhps VR128:$src1, (v4f32 (nonvolatile_load addr:$src2))),
(MOVHPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(X86Movlhps VR128:$src1, (v4f32 (X86vzload64 addr:$src2))),
+ (MOVHPSrm VR128:$src1, addr:$src2)>;
+
+ def : Pat<(X86vextractstore64 (v4f32 (X86Movhlps VR128:$src, VR128:$src)),
+ addr:$dst),
+ (MOVHPSmr addr:$dst, VR128:$src)>;
}
let Predicates = [UseSSE2] in {
@@ -731,7 +745,7 @@ let Predicates = [UseSSE2] in {
def : Pat<(v2f64 (X86Unpckl VR128:$src1,
(bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
(MOVHPDrm VR128:$src1, addr:$src2)>;
- def : Pat<(v2f64 (X86Unpckl VR128:$src1, (X86vzload addr:$src2))),
+ def : Pat<(v2f64 (X86Unpckl VR128:$src1, (X86vzload64 addr:$src2))),
(MOVHPDrm VR128:$src1, addr:$src2)>;
def : Pat<(store (f64 (extractelt
@@ -740,7 +754,7 @@ let Predicates = [UseSSE2] in {
(MOVHPDmr addr:$dst, VR128:$src)>;
// MOVLPD patterns
- def : Pat<(v2f64 (X86Movsd VR128:$src1, (X86vzload addr:$src2))),
+ def : Pat<(v2f64 (X86Movsd VR128:$src1, (X86vzload64 addr:$src2))),
(MOVLPDrm VR128:$src1, addr:$src2)>;
}
@@ -1631,13 +1645,13 @@ def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
// AVX register conversion intrinsics
let Predicates = [HasAVX, NoVLX] in {
- def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
+ def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
(VCVTDQ2PDrm addr:$src)>;
} // Predicates = [HasAVX, NoVLX]
// SSE2 register conversion intrinsics
let Predicates = [UseSSE2] in {
- def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
+ def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
(CVTDQ2PDrm addr:$src)>;
} // Predicates = [UseSSE2]
@@ -4124,9 +4138,9 @@ let Predicates = [UseAVX] in {
// These instructions also write zeros in the high part of a 256-bit register.
def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
(VMOVDI2PDIrm addr:$src)>;
- def : Pat<(v4i32 (X86vzload addr:$src)),
+ def : Pat<(v4i32 (X86vzload32 addr:$src)),
(VMOVDI2PDIrm addr:$src)>;
- def : Pat<(v8i32 (X86vzload addr:$src)),
+ def : Pat<(v8i32 (X86vzload32 addr:$src)),
(SUBREG_TO_REG (i64 0), (v4i32 (VMOVDI2PDIrm addr:$src)), sub_xmm)>;
}
@@ -4138,7 +4152,7 @@ let Predicates = [UseSSE2] in {
(MOV64toPQIrr GR64:$src)>;
def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
(MOVDI2PDIrm addr:$src)>;
- def : Pat<(v4i32 (X86vzload addr:$src)),
+ def : Pat<(v4i32 (X86vzload32 addr:$src)),
(MOVDI2PDIrm addr:$src)>;
}
@@ -4206,19 +4220,19 @@ def : InstAlias<"movq.s\t{$src, $dst|$dst, $src}",
(MOVPQI2QIrr VR128:$dst, VR128:$src), 0>;
let Predicates = [UseAVX] in {
- def : Pat<(v2i64 (X86vzload addr:$src)),
+ def : Pat<(v2i64 (X86vzload64 addr:$src)),
(VMOVQI2PQIrm addr:$src)>;
- def : Pat<(v4i64 (X86vzload addr:$src)),
+ def : Pat<(v4i64 (X86vzload64 addr:$src)),
(SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIrm addr:$src)), sub_xmm)>;
- def : Pat<(X86vextractstore (v2i64 VR128:$src), addr:$dst),
+ def : Pat<(X86vextractstore64 (v2i64 VR128:$src), addr:$dst),
(VMOVPQI2QImr addr:$dst, VR128:$src)>;
}
let Predicates = [UseSSE2] in {
- def : Pat<(v2i64 (X86vzload addr:$src)), (MOVQI2PQIrm addr:$src)>;
+ def : Pat<(v2i64 (X86vzload64 addr:$src)), (MOVQI2PQIrm addr:$src)>;
- def : Pat<(X86vextractstore (v2i64 VR128:$src), addr:$dst),
+ def : Pat<(X86vextractstore64 (v2i64 VR128:$src), addr:$dst),
(MOVPQI2QImr addr:$dst, VR128:$src)>;
}
@@ -4368,7 +4382,7 @@ defm MOVDDUP : sse3_replicate_dfp<"movddup", SchedWriteFShuffle>;
let Predicates = [HasAVX, NoVLX] in {
def : Pat<(X86Movddup (v2f64 (nonvolatile_load addr:$src))),
(VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
- def : Pat<(X86Movddup (v2f64 (X86vzload addr:$src))),
+ def : Pat<(X86Movddup (v2f64 (X86vzload64 addr:$src))),
(VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
}
@@ -4376,7 +4390,7 @@ let Predicates = [UseSSE3] in {
// No need for aligned memory as this only loads 64-bits.
def : Pat<(X86Movddup (v2f64 (nonvolatile_load addr:$src))),
(MOVDDUPrm addr:$src)>;
- def : Pat<(X86Movddup (v2f64 (X86vzload addr:$src))),
+ def : Pat<(X86Movddup (v2f64 (X86vzload64 addr:$src))),
(MOVDDUPrm addr:$src)>;
}
@@ -4953,7 +4967,7 @@ multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy,
def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
- def : Pat<(v8i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
+ def : Pat<(v8i32 (InVecOp (v16i8 (X86vzload64 addr:$src)))),
(!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
@@ -4961,12 +4975,12 @@ multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy,
def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
(!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
- def : Pat<(v4i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
+ def : Pat<(v4i64 (InVecOp (v16i8 (X86vzload64 addr:$src)))),
(!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
- def : Pat<(v4i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
+ def : Pat<(v4i64 (InVecOp (v8i16 (X86vzload64 addr:$src)))),
(!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
}
}
@@ -5018,7 +5032,7 @@ multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy,
(!cast<I>(OpcPrefix#BWrm) addr:$src)>;
def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
(!cast<I>(OpcPrefix#BWrm) addr:$src)>;
- def : Pat<(v8i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
+ def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
(!cast<I>(OpcPrefix#BWrm) addr:$src)>;
def : Pat<(v8i16 (ExtOp (loadv16i8 addr:$src))),
(!cast<I>(OpcPrefix#BWrm) addr:$src)>;
@@ -5026,7 +5040,7 @@ multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy,
let Predicates = [HasAVX, NoVLX] in {
def : Pat<(v4i32 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
(!cast<I>(OpcPrefix#BDrm) addr:$src)>;
- def : Pat<(v4i32 (ExtOp (v16i8 (vzload_v4i32 addr:$src)))),
+ def : Pat<(v4i32 (ExtOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
(!cast<I>(OpcPrefix#BDrm) addr:$src)>;
def : Pat<(v4i32 (ExtOp (loadv16i8 addr:$src))),
(!cast<I>(OpcPrefix#BDrm) addr:$src)>;
@@ -5040,14 +5054,14 @@ multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy,
(!cast<I>(OpcPrefix#WDrm) addr:$src)>;
def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
(!cast<I>(OpcPrefix#WDrm) addr:$src)>;
- def : Pat<(v4i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
+ def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
(!cast<I>(OpcPrefix#WDrm) addr:$src)>;
def : Pat<(v4i32 (ExtOp (loadv8i16 addr:$src))),
(!cast<I>(OpcPrefix#WDrm) addr:$src)>;
def : Pat<(v2i64 (ExtOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
(!cast<I>(OpcPrefix#WQrm) addr:$src)>;
- def : Pat<(v2i64 (ExtOp (v8i16 (vzload_v4i32 addr:$src)))),
+ def : Pat<(v2i64 (ExtOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))),
(!cast<I>(OpcPrefix#WQrm) addr:$src)>;
def : Pat<(v2i64 (ExtOp (loadv8i16 addr:$src))),
(!cast<I>(OpcPrefix#WQrm) addr:$src)>;
@@ -5056,7 +5070,7 @@ multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy,
(!cast<I>(OpcPrefix#DQrm) addr:$src)>;
def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
(!cast<I>(OpcPrefix#DQrm) addr:$src)>;
- def : Pat<(v2i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
+ def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
(!cast<I>(OpcPrefix#DQrm) addr:$src)>;
def : Pat<(v2i64 (ExtOp (loadv4i32 addr:$src))),
(!cast<I>(OpcPrefix#DQrm) addr:$src)>;
@@ -7261,10 +7275,10 @@ let Predicates = [HasF16C, NoVLX] in {
WriteCvtPS2PHYSt>, VEX_L;
// Pattern match vcvtph2ps of a scalar i64 load.
- def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzload_v2i64 addr:$src)))),
+ def : Pat<(v4f32 (X86cvtph2ps (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
(VCVTPH2PSrm addr:$src)>;
- def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert
- (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
+ def : Pat<(v4f32 (X86cvtph2ps (bc_v8i16
+ (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(VCVTPH2PSrm addr:$src)>;
def : Pat<(store (f64 (extractelt
@@ -7436,9 +7450,9 @@ defm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, loadi64,
let Predicates = [HasAVX2, NoVLX] in {
// 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
- def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))),
+ def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
(VPBROADCASTQrm addr:$src)>;
- def : Pat<(v4i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))),
+ def : Pat<(v4i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
(VPBROADCASTQYrm addr:$src)>;
def : Pat<(v4i32 (X86VBroadcast (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
@@ -7550,7 +7564,7 @@ let Predicates = [HasAVX, NoVLX] in {
(VMOVDDUPrr VR128:$src)>;
def : Pat<(v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
(VMOVDDUPrm addr:$src)>;
- def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload addr:$src)))),
+ def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload64 addr:$src)))),
(VMOVDDUPrm addr:$src)>;
}
OpenPOWER on IntegriCloud