summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp20
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td8
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td16
3 files changed, 20 insertions, 24 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 014889720a0..31c795c47d0 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -33675,6 +33675,26 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
Movl, N->getOperand(0).getOperand(2));
}
+ // If this a vzmovl of a full vector load, replace it with a vzload, unless
+ // the load is volatile.
+ if (N->getOpcode() == X86ISD::VZEXT_MOVL && N->getOperand(0).hasOneUse() &&
+ ISD::isNormalLoad(N->getOperand(0).getNode())) {
+ LoadSDNode *LN = cast<LoadSDNode>(N->getOperand(0));
+ if (!LN->isVolatile()) {
+ SDVTList Tys = DAG.getVTList(VT, MVT::Other);
+ SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };
+ SDValue VZLoad =
+ DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops,
+ VT.getVectorElementType(),
+ LN->getPointerInfo(),
+ LN->getAlignment(),
+ MachineMemOperand::MOLoad);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 0), VZLoad.getValue(1));
+ return VZLoad;
+ }
+ }
+
+
// Look for a truncating shuffle to v2i32 of a PMULUDQ where one of the
// operands is an extend from v2i32 to v2i64. Turn it into a pmulld.
// FIXME: This can probably go away once we default to widening legalization.
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 917cd20f0c7..aeb03ab9b1c 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -4317,15 +4317,11 @@ let Predicates = [HasAVX512] in {
// with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
(VMOVSSZrm addr:$src)>;
- def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
- (VMOVSSZrm addr:$src)>;
// MOVSDrm zeros the high parts of the register; represent this
// with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
(VMOVSDZrm addr:$src)>;
- def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
- (VMOVSDZrm addr:$src)>;
// Represent the same patterns above but in the form they appear for
// 256-bit types
@@ -4363,14 +4359,10 @@ let Predicates = [HasAVX512] in {
(VMOVDI2PDIZrm addr:$src)>;
def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
(VMOVDI2PDIZrm addr:$src)>;
- def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))),
- (VMOVDI2PDIZrm addr:$src)>;
def : Pat<(v4i32 (X86vzload addr:$src)),
(VMOVDI2PDIZrm addr:$src)>;
def : Pat<(v8i32 (X86vzload addr:$src)),
(SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
- def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
- (VMOVQI2PQIZrm addr:$src)>;
def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
(VMOVZPQILo2PQIZrr VR128X:$src)>;
def : Pat<(v2i64 (X86vzload addr:$src)),
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 1ab7af56797..cf9acd965e6 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -267,8 +267,6 @@ let Predicates = [UseAVX] in {
// with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
(VMOVSSrm addr:$src)>;
- def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
- (VMOVSSrm addr:$src)>;
def : Pat<(v4f32 (X86vzload addr:$src)),
(VMOVSSrm addr:$src)>;
@@ -276,8 +274,6 @@ let Predicates = [UseAVX] in {
// with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
(VMOVSDrm addr:$src)>;
- def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
- (VMOVSDrm addr:$src)>;
def : Pat<(v2f64 (X86vzload addr:$src)),
(VMOVSDrm addr:$src)>;
@@ -321,16 +317,12 @@ let Predicates = [UseSSE1] in {
// MOVSSrm already zeros the high parts of the register.
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
(MOVSSrm addr:$src)>;
- def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
- (MOVSSrm addr:$src)>;
}
let Predicates = [UseSSE2] in {
// MOVSDrm already zeros the high parts of the register.
def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
(MOVSDrm addr:$src)>;
- def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
- (MOVSDrm addr:$src)>;
}
//===----------------------------------------------------------------------===//
@@ -4145,8 +4137,6 @@ let Predicates = [UseAVX] in {
(VMOVDI2PDIrm addr:$src)>;
def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
(VMOVDI2PDIrm addr:$src)>;
- def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))),
- (VMOVDI2PDIrm addr:$src)>;
def : Pat<(v4i32 (X86vzload addr:$src)),
(VMOVDI2PDIrm addr:$src)>;
def : Pat<(v8i32 (X86vzload addr:$src)),
@@ -4163,8 +4153,6 @@ let Predicates = [UseSSE2] in {
(MOVDI2PDIrm addr:$src)>;
def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
(MOVDI2PDIrm addr:$src)>;
- def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))),
- (MOVDI2PDIrm addr:$src)>;
def : Pat<(v4i32 (X86vzload addr:$src)),
(MOVDI2PDIrm addr:$src)>;
}
@@ -4233,8 +4221,6 @@ def : InstAlias<"movq.s\t{$src, $dst|$dst, $src}",
(MOVPQI2QIrr VR128:$dst, VR128:$src), 0>;
let Predicates = [UseAVX] in {
- def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
- (VMOVQI2PQIrm addr:$src)>;
def : Pat<(v2i64 (X86vzload addr:$src)),
(VMOVQI2PQIrm addr:$src)>;
def : Pat<(v4i64 (X86vzload addr:$src)),
@@ -4245,8 +4231,6 @@ let Predicates = [UseAVX] in {
}
let Predicates = [UseSSE2] in {
- def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
- (MOVQI2PQIrm addr:$src)>;
def : Pat<(v2i64 (X86vzload addr:$src)), (MOVQI2PQIrm addr:$src)>;
def : Pat<(X86vextractstore (v2i64 VR128:$src), addr:$dst),
OpenPOWER on IntegriCloud