From b195ed8ce3ce2f31684b275640b9f7abaad8eeac Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 19 Feb 2018 22:07:31 +0000 Subject: [X86] Use vpmovq2m/vpmovd2m for truncate to vXi1 when possible. Previously we used vptestmd, but the scheduling data for SKX says vpmovq2m/vpmovd2m is lower latency. We already used vpmovb2m/vpmovw2m for byte/word truncates. So this is more consistent anyway. llvm-svn: 325534 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'llvm/lib/Target') diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index b8b3fbec732..c6916fdf1a1 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -16762,6 +16762,10 @@ static SDValue LowerTruncateVecI1(SDValue Op, SelectionDAG &DAG, In = DAG.getNode(ISD::SHL, DL, InVT, In, DAG.getConstant(ShiftInx, DL, InVT)); } + // If we have DQI, emit a pattern that will be iseled as vpmovq2m/vpmovd2m. + if (Subtarget.hasDQI()) + return DAG.getNode(X86ISD::CMPM, DL, VT, DAG.getConstant(0, DL, InVT), + In, DAG.getConstant(6, DL, MVT::i8)); return DAG.getNode(X86ISD::CMPM, DL, VT, In, getZeroVector(InVT, Subtarget, DAG, DL), DAG.getConstant(4, DL, MVT::i8)); -- cgit v1.2.3