summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86/X86ISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp10
1 files changed, 9 insertions, 1 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 4056b4982b0..67f2929dae7 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -32737,9 +32737,17 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast,
if (!VT.isScalarInteger() || !VecVT.isSimple())
return SDValue();
+ // If the input is a truncate from v16i8 or v32i8 go ahead and use a
+ // movmskb even with avx512. This will be better than truncating to vXi1 and
+ // using a kmov. This can especially help KNL if the input is a v16i8/v32i8
+ // vpcmpeqb/vpcmpgtb.
+ bool IsTruncated = N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
+ (N0.getOperand(0).getValueType() == MVT::v16i8 ||
+ N0.getOperand(0).getValueType() == MVT::v32i8);
+
// With AVX512 vxi1 types are legal and we prefer using k-regs.
// MOVMSK is supported in SSE2 or later.
- if (Subtarget.hasAVX512() || !Subtarget.hasSSE2())
+ if (!Subtarget.hasSSE2() || (Subtarget.hasAVX512() && !IsTruncated))
return SDValue();
// There are MOVMSK flavors for types v16i8, v32i8, v4f32, v8f32, v4f64 and
OpenPOWER on IntegriCloud