summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2019-07-02 23:20:03 +0000
committerCraig Topper <craig.topper@intel.com>2019-07-02 23:20:03 +0000
commitb770d2c9d44672c7cd1b82ac32954179686b9c08 (patch)
tree88286d4bcda00647b1824545feab612e9f576918 /llvm/lib
parentaab3891702024fda3394d9995fdc2f24b82ffa5b (diff)
downloadbcm5719-llvm-b770d2c9d44672c7cd1b82ac32954179686b9c08.tar.gz
bcm5719-llvm-b770d2c9d44672c7cd1b82ac32954179686b9c08.zip
[X86] Add a DAG combine for turning *_extend_vector_inreg+load into an appropriate extload if the load isn't volatile.
Remove the corresponding isel patterns that did the same thing without checking for volatile. This fixes another variation of PR42079 llvm-svn: 364977
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp32
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td20
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td6
3 files changed, 27 insertions, 31 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 33042e3900c..3807ad68ee9 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1876,6 +1876,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setTargetDAGCombine(ISD::SIGN_EXTEND);
setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
setTargetDAGCombine(ISD::ANY_EXTEND_VECTOR_INREG);
+ setTargetDAGCombine(ISD::SIGN_EXTEND_VECTOR_INREG);
setTargetDAGCombine(ISD::ZERO_EXTEND_VECTOR_INREG);
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::UINT_TO_FP);
@@ -43914,16 +43915,35 @@ static SDValue combinePMULDQ(SDNode *N, SelectionDAG &DAG,
}
static SDValue combineExtInVec(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
+ EVT VT = N->getValueType(0);
+ SDValue In = N->getOperand(0);
+
+ // Try to merge vector loads and extend_inreg to an extload.
+ if (!DCI.isBeforeLegalizeOps() && ISD::isNormalLoad(In.getNode()) &&
+ In.hasOneUse()) {
+ auto *Ld = cast<LoadSDNode>(In);
+ if (!Ld->isVolatile()) {
+ MVT SVT = In.getSimpleValueType().getVectorElementType();
+ ISD::LoadExtType Ext = N->getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
+ EVT MemVT = EVT::getVectorVT(*DAG.getContext(), SVT,
+ VT.getVectorNumElements());
+ SDValue Load =
+ DAG.getExtLoad(Ext, SDLoc(N), VT, Ld->getChain(), Ld->getBasePtr(),
+ Ld->getPointerInfo(), MemVT, Ld->getAlignment(),
+ Ld->getMemOperand()->getFlags());
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1));
+ return Load;
+ }
+ }
+
// Disabling for widening legalization for now. We can enable if we find a
// case that needs it. Otherwise it can be deleted when we switch to
// widening legalization.
if (ExperimentalVectorWideningLegalization)
return SDValue();
- EVT VT = N->getValueType(0);
- SDValue In = N->getOperand(0);
-
// Combine (ext_invec (ext_invec X)) -> (ext_invec X)
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (In.getOpcode() == N->getOpcode() &&
@@ -43932,7 +43952,7 @@ static SDValue combineExtInVec(SDNode *N, SelectionDAG &DAG,
// Attempt to combine as a shuffle.
// TODO: SSE41 support
- if (Subtarget.hasAVX()) {
+ if (Subtarget.hasAVX() && N->getOpcode() != ISD::SIGN_EXTEND_VECTOR_INREG) {
SDValue Op(N, 0);
if (TLI.isTypeLegal(VT) && TLI.isTypeLegal(In.getValueType()))
if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
@@ -44010,7 +44030,9 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SIGN_EXTEND: return combineSext(N, DAG, DCI, Subtarget);
case ISD::SIGN_EXTEND_INREG: return combineSignExtendInReg(N, DAG, Subtarget);
case ISD::ANY_EXTEND_VECTOR_INREG:
- case ISD::ZERO_EXTEND_VECTOR_INREG: return combineExtInVec(N, DAG, Subtarget);
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG: return combineExtInVec(N, DAG, DCI,
+ Subtarget);
case ISD::SETCC: return combineSetCC(N, DAG, Subtarget);
case X86ISD::SETCC: return combineX86SetCC(N, DAG, Subtarget);
case X86ISD::BRCOND: return combineBrCond(N, DAG, Subtarget);
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index b2dfc512932..02f5af438b6 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -9632,21 +9632,15 @@ multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
(!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
def : Pat<(v8i16 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
- def : Pat<(v8i16 (InVecOp (loadv16i8 addr:$src))),
- (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
}
let Predicates = [HasVLX] in {
def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
(!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
def : Pat<(v4i32 (InVecOp (v16i8 (vzload_v4i32 addr:$src)))),
(!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
- def : Pat<(v4i32 (InVecOp (loadv16i8 addr:$src))),
- (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
(!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
- def : Pat<(v2i64 (InVecOp (loadv16i8 addr:$src))),
- (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
@@ -9654,15 +9648,11 @@ multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
(!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
def : Pat<(v4i32 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
- def : Pat<(v4i32 (InVecOp (loadv8i16 addr:$src))),
- (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
(!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
def : Pat<(v2i64 (InVecOp (v8i16 (vzload_v4i32 addr:$src)))),
(!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
- def : Pat<(v2i64 (InVecOp (loadv8i16 addr:$src))),
- (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
@@ -9670,37 +9660,27 @@ multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
(!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
def : Pat<(v2i64 (InVecOp (v4i32 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
- def : Pat<(v2i64 (InVecOp (loadv4i32 addr:$src))),
- (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
}
let Predicates = [HasVLX] in {
def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
def : Pat<(v8i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
- def : Pat<(v8i32 (InVecOp (loadv16i8 addr:$src))),
- (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
(!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
def : Pat<(v4i64 (InVecOp (v16i8 (vzload_v4i32 addr:$src)))),
(!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
- def : Pat<(v4i64 (InVecOp (loadv16i8 addr:$src))),
- (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
def : Pat<(v4i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
- def : Pat<(v4i64 (InVecOp (loadv8i16 addr:$src))),
- (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
}
// 512-bit patterns
let Predicates = [HasAVX512] in {
def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
- def : Pat<(v8i64 (InVecOp (loadv16i8 addr:$src))),
- (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
}
}
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 56974c44b4d..e27074fb27e 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -4947,8 +4947,6 @@ multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy,
(!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
def : Pat<(v8i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
- def : Pat<(v8i32 (InVecOp (loadv16i8 addr:$src))),
- (!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
(!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
@@ -4957,15 +4955,11 @@ multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy,
(!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
def : Pat<(v4i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
- def : Pat<(v4i64 (InVecOp (loadv16i8 addr:$src))),
- (!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
def : Pat<(v4i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
- def : Pat<(v4i64 (InVecOp (loadv8i16 addr:$src))),
- (!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
}
}
OpenPOWER on IntegriCloud