summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp32
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td20
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td6
3 files changed, 27 insertions, 31 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 33042e3900c..3807ad68ee9 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1876,6 +1876,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setTargetDAGCombine(ISD::SIGN_EXTEND);
setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
setTargetDAGCombine(ISD::ANY_EXTEND_VECTOR_INREG);
+ setTargetDAGCombine(ISD::SIGN_EXTEND_VECTOR_INREG);
setTargetDAGCombine(ISD::ZERO_EXTEND_VECTOR_INREG);
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::UINT_TO_FP);
@@ -43914,16 +43915,35 @@ static SDValue combinePMULDQ(SDNode *N, SelectionDAG &DAG,
}
static SDValue combineExtInVec(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
+ EVT VT = N->getValueType(0);
+ SDValue In = N->getOperand(0);
+
+ // Try to merge vector loads and extend_inreg to an extload.
+ if (!DCI.isBeforeLegalizeOps() && ISD::isNormalLoad(In.getNode()) &&
+ In.hasOneUse()) {
+ auto *Ld = cast<LoadSDNode>(In);
+ if (!Ld->isVolatile()) {
+ MVT SVT = In.getSimpleValueType().getVectorElementType();
+ ISD::LoadExtType Ext = N->getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
+ EVT MemVT = EVT::getVectorVT(*DAG.getContext(), SVT,
+ VT.getVectorNumElements());
+ SDValue Load =
+ DAG.getExtLoad(Ext, SDLoc(N), VT, Ld->getChain(), Ld->getBasePtr(),
+ Ld->getPointerInfo(), MemVT, Ld->getAlignment(),
+ Ld->getMemOperand()->getFlags());
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1));
+ return Load;
+ }
+ }
+
// Disabling for widening legalization for now. We can enable if we find a
// case that needs it. Otherwise it can be deleted when we switch to
// widening legalization.
if (ExperimentalVectorWideningLegalization)
return SDValue();
- EVT VT = N->getValueType(0);
- SDValue In = N->getOperand(0);
-
// Combine (ext_invec (ext_invec X)) -> (ext_invec X)
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (In.getOpcode() == N->getOpcode() &&
@@ -43932,7 +43952,7 @@ static SDValue combineExtInVec(SDNode *N, SelectionDAG &DAG,
// Attempt to combine as a shuffle.
// TODO: SSE41 support
- if (Subtarget.hasAVX()) {
+ if (Subtarget.hasAVX() && N->getOpcode() != ISD::SIGN_EXTEND_VECTOR_INREG) {
SDValue Op(N, 0);
if (TLI.isTypeLegal(VT) && TLI.isTypeLegal(In.getValueType()))
if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
@@ -44010,7 +44030,9 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SIGN_EXTEND: return combineSext(N, DAG, DCI, Subtarget);
case ISD::SIGN_EXTEND_INREG: return combineSignExtendInReg(N, DAG, Subtarget);
case ISD::ANY_EXTEND_VECTOR_INREG:
- case ISD::ZERO_EXTEND_VECTOR_INREG: return combineExtInVec(N, DAG, Subtarget);
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG: return combineExtInVec(N, DAG, DCI,
+ Subtarget);
case ISD::SETCC: return combineSetCC(N, DAG, Subtarget);
case X86ISD::SETCC: return combineX86SetCC(N, DAG, Subtarget);
case X86ISD::BRCOND: return combineBrCond(N, DAG, Subtarget);
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index b2dfc512932..02f5af438b6 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -9632,21 +9632,15 @@ multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
(!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
def : Pat<(v8i16 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
- def : Pat<(v8i16 (InVecOp (loadv16i8 addr:$src))),
- (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
}
let Predicates = [HasVLX] in {
def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
(!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
def : Pat<(v4i32 (InVecOp (v16i8 (vzload_v4i32 addr:$src)))),
(!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
- def : Pat<(v4i32 (InVecOp (loadv16i8 addr:$src))),
- (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
(!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
- def : Pat<(v2i64 (InVecOp (loadv16i8 addr:$src))),
- (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
@@ -9654,15 +9648,11 @@ multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
(!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
def : Pat<(v4i32 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
- def : Pat<(v4i32 (InVecOp (loadv8i16 addr:$src))),
- (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
(!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
def : Pat<(v2i64 (InVecOp (v8i16 (vzload_v4i32 addr:$src)))),
(!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
- def : Pat<(v2i64 (InVecOp (loadv8i16 addr:$src))),
- (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
@@ -9670,37 +9660,27 @@ multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
(!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
def : Pat<(v2i64 (InVecOp (v4i32 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
- def : Pat<(v2i64 (InVecOp (loadv4i32 addr:$src))),
- (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
}
let Predicates = [HasVLX] in {
def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
def : Pat<(v8i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
- def : Pat<(v8i32 (InVecOp (loadv16i8 addr:$src))),
- (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
(!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
def : Pat<(v4i64 (InVecOp (v16i8 (vzload_v4i32 addr:$src)))),
(!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
- def : Pat<(v4i64 (InVecOp (loadv16i8 addr:$src))),
- (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
def : Pat<(v4i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
- def : Pat<(v4i64 (InVecOp (loadv8i16 addr:$src))),
- (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
}
// 512-bit patterns
let Predicates = [HasAVX512] in {
def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
- def : Pat<(v8i64 (InVecOp (loadv16i8 addr:$src))),
- (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
}
}
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 56974c44b4d..e27074fb27e 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -4947,8 +4947,6 @@ multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy,
(!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
def : Pat<(v8i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
- def : Pat<(v8i32 (InVecOp (loadv16i8 addr:$src))),
- (!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
(!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
@@ -4957,15 +4955,11 @@ multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy,
(!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
def : Pat<(v4i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
- def : Pat<(v4i64 (InVecOp (loadv16i8 addr:$src))),
- (!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
def : Pat<(v4i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
- def : Pat<(v4i64 (InVecOp (loadv8i16 addr:$src))),
- (!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
}
}
OpenPOWER on IntegriCloud