From fea731a4aa6aabf270fbb9ba6401ca8826c55a9b Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 31 Jul 2017 18:08:24 +0000 Subject: [CGP] use subtract or subtract-of-cmps for result of memcmp expansion As noted in the code comment, transforming this in the other direction might require a separate transform here in CGP given the block-at-a-time DAG constraint. Besides that theoretical motivation, there are 2 practical motivations for the subtract-of-cmps form: 1. The codegen for both x86 and PPC is better for this IR (though PPC could be better still). There is discussion about canonicalizing IR to the select form ( http://lists.llvm.org/pipermail/llvm-dev/2017-July/114885.html ), so we probably need to add DAG transforms for those patterns anyway, but this improves the memcmp output without waiting for that step. 2. If we allow vector-sized chunks for the load and compare, x86 is better prepared to convert that to optimal code when using subtract-of-cmps, so another prerequisite patch is avoided if we choose to enable that. Differential Revision: https://reviews.llvm.org/D34904 llvm-svn: 309597 --- llvm/lib/CodeGen/CodeGenPrepare.cpp | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) (limited to 'llvm/lib') diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index b5487b63db1..debfdc161b4 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -2117,13 +2117,25 @@ Value *MemCmpExpansion::getMemCmpOneBlock(unsigned Size) { LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2); } - // TODO: Instead of comparing ULT, just subtract and return the difference? - Value *CmpNE = Builder.CreateICmpNE(LoadSrc1, LoadSrc2); + if (Size < 4) { + // The i8 and i16 cases don't need compares. We zext the loaded values and + // subtract them to get the suitable negative, zero, or positive i32 result. + LoadSrc1 = Builder.CreateZExt(LoadSrc1, Builder.getInt32Ty()); + LoadSrc2 = Builder.CreateZExt(LoadSrc2, Builder.getInt32Ty()); + return Builder.CreateSub(LoadSrc1, LoadSrc2); + } + + // The result of memcmp is negative, zero, or positive, so produce that by + // subtracting 2 extended compare bits: sub (ugt, ult). + // If a target prefers to use selects to get -1/0/1, they should be able + // to transform this later. The inverse transform (going from selects to math) + // may not be possible in the DAG because the selects got converted into + // branches before we got there. + Value *CmpUGT = Builder.CreateICmpUGT(LoadSrc1, LoadSrc2); Value *CmpULT = Builder.CreateICmpULT(LoadSrc1, LoadSrc2); - Type *I32 = Builder.getInt32Ty(); - Value *Sel1 = Builder.CreateSelect(CmpULT, ConstantInt::get(I32, -1), - ConstantInt::get(I32, 1)); - return Builder.CreateSelect(CmpNE, Sel1, ConstantInt::get(I32, 0)); + Value *ZextUGT = Builder.CreateZExt(CmpUGT, Builder.getInt32Ty()); + Value *ZextULT = Builder.CreateZExt(CmpULT, Builder.getInt32Ty()); + return Builder.CreateSub(ZextUGT, ZextULT); } // This function expands the memcmp call into an inline expansion and returns -- cgit v1.2.3