diff options
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 32 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 20 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 6 |
3 files changed, 27 insertions, 31 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 33042e3900c..3807ad68ee9 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1876,6 +1876,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setTargetDAGCombine(ISD::SIGN_EXTEND); setTargetDAGCombine(ISD::SIGN_EXTEND_INREG); setTargetDAGCombine(ISD::ANY_EXTEND_VECTOR_INREG); + setTargetDAGCombine(ISD::SIGN_EXTEND_VECTOR_INREG); setTargetDAGCombine(ISD::ZERO_EXTEND_VECTOR_INREG); setTargetDAGCombine(ISD::SINT_TO_FP); setTargetDAGCombine(ISD::UINT_TO_FP); @@ -43914,16 +43915,35 @@ static SDValue combinePMULDQ(SDNode *N, SelectionDAG &DAG, } static SDValue combineExtInVec(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { + EVT VT = N->getValueType(0); + SDValue In = N->getOperand(0); + + // Try to merge vector loads and extend_inreg to an extload. + if (!DCI.isBeforeLegalizeOps() && ISD::isNormalLoad(In.getNode()) && + In.hasOneUse()) { + auto *Ld = cast<LoadSDNode>(In); + if (!Ld->isVolatile()) { + MVT SVT = In.getSimpleValueType().getVectorElementType(); + ISD::LoadExtType Ext = N->getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ? ISD::SEXTLOAD : ISD::ZEXTLOAD; + EVT MemVT = EVT::getVectorVT(*DAG.getContext(), SVT, + VT.getVectorNumElements()); + SDValue Load = + DAG.getExtLoad(Ext, SDLoc(N), VT, Ld->getChain(), Ld->getBasePtr(), + Ld->getPointerInfo(), MemVT, Ld->getAlignment(), + Ld->getMemOperand()->getFlags()); + DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1)); + return Load; + } + } + // Disabling for widening legalization for now. We can enable if we find a // case that needs it. Otherwise it can be deleted when we switch to // widening legalization. if (ExperimentalVectorWideningLegalization) return SDValue(); - EVT VT = N->getValueType(0); - SDValue In = N->getOperand(0); - // Combine (ext_invec (ext_invec X)) -> (ext_invec X) const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (In.getOpcode() == N->getOpcode() && @@ -43932,7 +43952,7 @@ static SDValue combineExtInVec(SDNode *N, SelectionDAG &DAG, // Attempt to combine as a shuffle. // TODO: SSE41 support - if (Subtarget.hasAVX()) { + if (Subtarget.hasAVX() && N->getOpcode() != ISD::SIGN_EXTEND_VECTOR_INREG) { SDValue Op(N, 0); if (TLI.isTypeLegal(VT) && TLI.isTypeLegal(In.getValueType())) if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget)) @@ -44010,7 +44030,9 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::SIGN_EXTEND: return combineSext(N, DAG, DCI, Subtarget); case ISD::SIGN_EXTEND_INREG: return combineSignExtendInReg(N, DAG, Subtarget); case ISD::ANY_EXTEND_VECTOR_INREG: - case ISD::ZERO_EXTEND_VECTOR_INREG: return combineExtInVec(N, DAG, Subtarget); + case ISD::SIGN_EXTEND_VECTOR_INREG: + case ISD::ZERO_EXTEND_VECTOR_INREG: return combineExtInVec(N, DAG, DCI, + Subtarget); case ISD::SETCC: return combineSetCC(N, DAG, Subtarget); case X86ISD::SETCC: return combineX86SetCC(N, DAG, Subtarget); case X86ISD::BRCOND: return combineBrCond(N, DAG, Subtarget); diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index b2dfc512932..02f5af438b6 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -9632,21 +9632,15 @@ multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp, (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; def : Pat<(v8i16 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))), (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; - def : Pat<(v8i16 (InVecOp (loadv16i8 addr:$src))), - (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; } let Predicates = [HasVLX] in { def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>; def : Pat<(v4i32 (InVecOp (v16i8 (vzload_v4i32 addr:$src)))), (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>; - def : Pat<(v4i32 (InVecOp (loadv16i8 addr:$src))), - (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>; def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))), (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>; - def : Pat<(v2i64 (InVecOp (loadv16i8 addr:$src))), - (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>; def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; @@ -9654,15 +9648,11 @@ multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp, (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; def : Pat<(v4i32 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))), (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; - def : Pat<(v4i32 (InVecOp (loadv8i16 addr:$src))), - (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>; def : Pat<(v2i64 (InVecOp (v8i16 (vzload_v4i32 addr:$src)))), (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>; - def : Pat<(v2i64 (InVecOp (loadv8i16 addr:$src))), - (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>; def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; @@ -9670,37 +9660,27 @@ multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp, (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; def : Pat<(v2i64 (InVecOp (v4i32 (vzload_v2i64 addr:$src)))), (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; - def : Pat<(v2i64 (InVecOp (loadv4i32 addr:$src))), - (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; } let Predicates = [HasVLX] in { def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; def : Pat<(v8i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))), (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; - def : Pat<(v8i32 (InVecOp (loadv16i8 addr:$src))), - (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>; def : Pat<(v4i64 (InVecOp (v16i8 (vzload_v4i32 addr:$src)))), (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>; - def : Pat<(v4i64 (InVecOp (loadv16i8 addr:$src))), - (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>; def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; def : Pat<(v4i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))), (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; - def : Pat<(v4i64 (InVecOp (loadv8i16 addr:$src))), - (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; } // 512-bit patterns let Predicates = [HasAVX512] in { def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), (!cast<I>(OpcPrefix#BQZrm) addr:$src)>; - def : Pat<(v8i64 (InVecOp (loadv16i8 addr:$src))), - (!cast<I>(OpcPrefix#BQZrm) addr:$src)>; } } diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 56974c44b4d..e27074fb27e 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -4947,8 +4947,6 @@ multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy, (!cast<I>(OpcPrefix#BDYrm) addr:$src)>; def : Pat<(v8i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))), (!cast<I>(OpcPrefix#BDYrm) addr:$src)>; - def : Pat<(v8i32 (InVecOp (loadv16i8 addr:$src))), - (!cast<I>(OpcPrefix#BDYrm) addr:$src)>; def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))), (!cast<I>(OpcPrefix#DQYrm) addr:$src)>; @@ -4957,15 +4955,11 @@ multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy, (!cast<I>(OpcPrefix#BQYrm) addr:$src)>; def : Pat<(v4i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))), (!cast<I>(OpcPrefix#BQYrm) addr:$src)>; - def : Pat<(v4i64 (InVecOp (loadv16i8 addr:$src))), - (!cast<I>(OpcPrefix#BQYrm) addr:$src)>; def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), (!cast<I>(OpcPrefix#WQYrm) addr:$src)>; def : Pat<(v4i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))), (!cast<I>(OpcPrefix#WQYrm) addr:$src)>; - def : Pat<(v4i64 (InVecOp (loadv8i16 addr:$src))), - (!cast<I>(OpcPrefix#WQYrm) addr:$src)>; } } |

