diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 15 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 2 |
2 files changed, 14 insertions, 3 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index bb2545a24db..b3ddbe8377c 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -39656,6 +39656,7 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG, } static SDValue combineStore(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { StoreSDNode *St = cast<StoreSDNode>(N); EVT VT = St->getValue().getValueType(); @@ -39767,6 +39768,18 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, } } + // Try to optimize v16i16->v16i8 truncating stores when BWI is not + // supported, but avx512f is by extending to v16i32 and truncating. + if (!St->isTruncatingStore() && VT == MVT::v16i8 && !Subtarget.hasBWI() && + St->getValue().getOpcode() == ISD::TRUNCATE && + St->getValue().getOperand(0).getValueType() == MVT::v16i16 && + TLI.isTruncStoreLegalOrCustom(MVT::v16i32, MVT::v16i8) && + !DCI.isBeforeLegalizeOps()) { + SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v16i32, St->getValue()); + return DAG.getTruncStore(St->getChain(), dl, Ext, St->getBasePtr(), + MVT::v16i8, St->getMemOperand()); + } + // Optimize trunc store (of multiple scalars) to shuffle and store. // First, pack all of the elements in one place. Next, store to memory // in fewer chunks. @@ -43774,7 +43787,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::BEXTR: return combineBEXTR(N, DAG, DCI, Subtarget); case ISD::LOAD: return combineLoad(N, DAG, DCI, Subtarget); case ISD::MLOAD: return combineMaskedLoad(N, DAG, DCI, Subtarget); - case ISD::STORE: return combineStore(N, DAG, Subtarget); + case ISD::STORE: return combineStore(N, DAG, DCI, Subtarget); case ISD::MSTORE: return combineMaskedStore(N, DAG, DCI, Subtarget); case ISD::SINT_TO_FP: return combineSIntToFP(N, DAG, Subtarget); case ISD::UINT_TO_FP: return combineUIntToFP(N, DAG, Subtarget); diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index e1516dd745c..917cd20f0c7 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -9799,8 +9799,6 @@ def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))), (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>; def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))), (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>; -def: Pat<(store (v16i8 (trunc (v16i16 VR256X:$src))), addr:$dst), - (VPMOVDBZmr addr:$dst, (v16i32 (VPMOVZXWDZrr VR256X:$src)))>; } //===----------------------------------------------------------------------===// |