From f01a1dad7f369399cbe21a94d5c33a9be2c9c512 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 28 Mar 2017 17:23:49 +0000 Subject: [x86] use VPMOVMSK to replace memcmp libcalls for 32-byte equality Follow-up to: https://reviews.llvm.org/rL298775 llvm-svn: 298933 --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 16 ++++++++-------- llvm/lib/Target/X86/X86ISelLowering.cpp | 6 +++++- 2 files changed, 13 insertions(+), 9 deletions(-) (limited to 'llvm/lib') diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 25523e52af1..9a4d44842ea 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6069,20 +6069,20 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { // supports the MVT we'll be loading or if it is small enough (<= 4) that // we'll only produce a small number of byte loads. MVT LoadVT; - switch (CSize->getZExtValue()) { + unsigned NumBitsToCompare = CSize->getZExtValue() * 8; + switch (NumBitsToCompare) { default: return false; - case 2: + case 16: LoadVT = MVT::i16; break; - case 4: + case 32: LoadVT = MVT::i32; break; - case 8: - LoadVT = hasFastLoadsAndCompare(64); - break; - case 16: - LoadVT = hasFastLoadsAndCompare(128); + case 64: + case 128: + case 256: + LoadVT = hasFastLoadsAndCompare(NumBitsToCompare); break; } diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 88e09cd56f8..eab398ac3bb 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -4646,8 +4646,12 @@ MVT X86TargetLowering::hasFastEqualityCompare(unsigned NumBits) const { if (NumBits == 128 && isTypeLegal(MVT::v16i8)) return MVT::v16i8; + // VPMOVMSKB can handle this. + if (NumBits == 256 && isTypeLegal(MVT::v32i8)) + return MVT::v32i8; + // TODO: Allow 64-bit type for 32-bit target. - // TODO: 256- and 512-bit types should be allowed, but make sure that those + // TODO: 512-bit types should be allowed, but make sure that those // cases are handled in combineVectorSizedSetCCEquality(). return MVT::INVALID_SIMPLE_VALUE_TYPE; -- cgit v1.2.3