diff options
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 20 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 8 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 16 |
3 files changed, 20 insertions, 24 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 014889720a0..31c795c47d0 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -33675,6 +33675,26 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG, Movl, N->getOperand(0).getOperand(2)); } + // If this a vzmovl of a full vector load, replace it with a vzload, unless + // the load is volatile. + if (N->getOpcode() == X86ISD::VZEXT_MOVL && N->getOperand(0).hasOneUse() && + ISD::isNormalLoad(N->getOperand(0).getNode())) { + LoadSDNode *LN = cast<LoadSDNode>(N->getOperand(0)); + if (!LN->isVolatile()) { + SDVTList Tys = DAG.getVTList(VT, MVT::Other); + SDValue Ops[] = { LN->getChain(), LN->getBasePtr() }; + SDValue VZLoad = + DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, + VT.getVectorElementType(), + LN->getPointerInfo(), + LN->getAlignment(), + MachineMemOperand::MOLoad); + DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 0), VZLoad.getValue(1)); + return VZLoad; + } + } + + // Look for a truncating shuffle to v2i32 of a PMULUDQ where one of the // operands is an extend from v2i32 to v2i64. Turn it into a pmulld. // FIXME: This can probably go away once we default to widening legalization. diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 917cd20f0c7..aeb03ab9b1c 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -4317,15 +4317,11 @@ let Predicates = [HasAVX512] in { // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), (VMOVSSZrm addr:$src)>; - def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), - (VMOVSSZrm addr:$src)>; // MOVSDrm zeros the high parts of the register; represent this // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), (VMOVSDZrm addr:$src)>; - def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), - (VMOVSDZrm addr:$src)>; // Represent the same patterns above but in the form they appear for // 256-bit types @@ -4363,14 +4359,10 @@ let Predicates = [HasAVX512] in { (VMOVDI2PDIZrm addr:$src)>; def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))), (VMOVDI2PDIZrm addr:$src)>; - def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))), - (VMOVDI2PDIZrm addr:$src)>; def : Pat<(v4i32 (X86vzload addr:$src)), (VMOVDI2PDIZrm addr:$src)>; def : Pat<(v8i32 (X86vzload addr:$src)), (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>; - def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), - (VMOVQI2PQIZrm addr:$src)>; def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))), (VMOVZPQILo2PQIZrr VR128X:$src)>; def : Pat<(v2i64 (X86vzload addr:$src)), diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 1ab7af56797..cf9acd965e6 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -267,8 +267,6 @@ let Predicates = [UseAVX] in { // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), (VMOVSSrm addr:$src)>; - def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), - (VMOVSSrm addr:$src)>; def : Pat<(v4f32 (X86vzload addr:$src)), (VMOVSSrm addr:$src)>; @@ -276,8 +274,6 @@ let Predicates = [UseAVX] in { // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), (VMOVSDrm addr:$src)>; - def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), - (VMOVSDrm addr:$src)>; def : Pat<(v2f64 (X86vzload addr:$src)), (VMOVSDrm addr:$src)>; @@ -321,16 +317,12 @@ let Predicates = [UseSSE1] in { // MOVSSrm already zeros the high parts of the register. def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), (MOVSSrm addr:$src)>; - def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), - (MOVSSrm addr:$src)>; } let Predicates = [UseSSE2] in { // MOVSDrm already zeros the high parts of the register. def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), (MOVSDrm addr:$src)>; - def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), - (MOVSDrm addr:$src)>; } //===----------------------------------------------------------------------===// @@ -4145,8 +4137,6 @@ let Predicates = [UseAVX] in { (VMOVDI2PDIrm addr:$src)>; def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))), (VMOVDI2PDIrm addr:$src)>; - def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))), - (VMOVDI2PDIrm addr:$src)>; def : Pat<(v4i32 (X86vzload addr:$src)), (VMOVDI2PDIrm addr:$src)>; def : Pat<(v8i32 (X86vzload addr:$src)), @@ -4163,8 +4153,6 @@ let Predicates = [UseSSE2] in { (MOVDI2PDIrm addr:$src)>; def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))), (MOVDI2PDIrm addr:$src)>; - def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))), - (MOVDI2PDIrm addr:$src)>; def : Pat<(v4i32 (X86vzload addr:$src)), (MOVDI2PDIrm addr:$src)>; } @@ -4233,8 +4221,6 @@ def : InstAlias<"movq.s\t{$src, $dst|$dst, $src}", (MOVPQI2QIrr VR128:$dst, VR128:$src), 0>; let Predicates = [UseAVX] in { - def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), - (VMOVQI2PQIrm addr:$src)>; def : Pat<(v2i64 (X86vzload addr:$src)), (VMOVQI2PQIrm addr:$src)>; def : Pat<(v4i64 (X86vzload addr:$src)), @@ -4245,8 +4231,6 @@ let Predicates = [UseAVX] in { } let Predicates = [UseSSE2] in { - def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), - (MOVQI2PQIrm addr:$src)>; def : Pat<(v2i64 (X86vzload addr:$src)), (MOVQI2PQIrm addr:$src)>; def : Pat<(X86vextractstore (v2i64 VR128:$src), addr:$dst), |

