From fea731a4aa6aabf270fbb9ba6401ca8826c55a9b Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Mon, 31 Jul 2017 18:08:24 +0000
Subject: [CGP] use subtract or subtract-of-cmps for result of memcmp expansion

As noted in the code comment, transforming this in the other direction might require
a separate transform here in CGP given the block-at-a-time DAG constraint.

Besides that theoretical motivation, there are 2 practical motivations for the
subtract-of-cmps form:

1. The codegen for both x86 and PPC is better for this IR (though PPC could be better still).
   There is discussion about canonicalizing IR to the select form
   ( http://lists.llvm.org/pipermail/llvm-dev/2017-July/114885.html ),
   so we probably need to add DAG transforms for those patterns anyway, but this improves the
   memcmp output without waiting for that step.

2. If we allow vector-sized chunks for the load and compare, x86 is better prepared to convert
   that to optimal code when using subtract-of-cmps, so another prerequisite patch is avoided
   if we choose to enable that.

Differential Revision: https://reviews.llvm.org/D34904

llvm-svn: 309597
---
 llvm/lib/CodeGen/CodeGenPrepare.cpp | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

(limited to 'llvm/lib')

diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index b5487b63db1..debfdc161b4 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -2117,13 +2117,25 @@ Value *MemCmpExpansion::getMemCmpOneBlock(unsigned Size) {
     LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2);
   }
 
-  // TODO: Instead of comparing ULT, just subtract and return the difference?
-  Value *CmpNE = Builder.CreateICmpNE(LoadSrc1, LoadSrc2);
+  if (Size < 4) {
+    // The i8 and i16 cases don't need compares. We zext the loaded values and
+    // subtract them to get the suitable negative, zero, or positive i32 result.
+    LoadSrc1 = Builder.CreateZExt(LoadSrc1, Builder.getInt32Ty());
+    LoadSrc2 = Builder.CreateZExt(LoadSrc2, Builder.getInt32Ty());
+    return Builder.CreateSub(LoadSrc1, LoadSrc2);
+  }
+
+  // The result of memcmp is negative, zero, or positive, so produce that by
+  // subtracting 2 extended compare bits: sub (ugt, ult).
+  // If a target prefers to use selects to get -1/0/1, they should be able
+  // to transform this later. The inverse transform (going from selects to math)
+  // may not be possible in the DAG because the selects got converted into
+  // branches before we got there.
+  Value *CmpUGT = Builder.CreateICmpUGT(LoadSrc1, LoadSrc2);
   Value *CmpULT = Builder.CreateICmpULT(LoadSrc1, LoadSrc2);
-  Type *I32 = Builder.getInt32Ty();
-  Value *Sel1 = Builder.CreateSelect(CmpULT, ConstantInt::get(I32, -1),
-                                             ConstantInt::get(I32, 1));
-  return Builder.CreateSelect(CmpNE, Sel1, ConstantInt::get(I32, 0));
+  Value *ZextUGT = Builder.CreateZExt(CmpUGT, Builder.getInt32Ty());
+  Value *ZextULT = Builder.CreateZExt(CmpULT, Builder.getInt32Ty());
+  return Builder.CreateSub(ZextUGT, ZextULT);
 }
 
 // This function expands the memcmp call into an inline expansion and returns
-- 
cgit v1.2.3