[LibCallSimplifier] try harder to fold memcmp with constant arguments (2nd try)

The 1st try was reverted because it could inf-loop by creating a dead instruction. Fixed that to not happen and added a test case to verify. Original commit message: Try to fold: memcmp(X, C, ConstantLength) == 0 --> load X == *C Without this change, we're unnecessarily checking the alignment of the constant data, so we miss the transform in the first 2 tests in the patch. I noted this shortcoming of LibCallSimpifier in one of the recent CGP memcmp expansion patches. This doesn't help the example in: https://bugs.llvm.org/show_bug.cgi?id=34032#c13 ...directly, but it's worth short-circuiting more of these simple cases since we're already trying to do that. The benefit of transforming to load+cmp is that existing IR analysis/transforms may further simplify that code. For example, if the load of the variable is common to multiple memcmp calls, CSE can remove the duplicate instructions. Differential Revision: https://reviews.llvm.org/D36922 llvm-svn: 311366
author: Sanjay Patel <spatel@rotateright.com> 2017-08-21 19:13:14 +0000
committer: Sanjay Patel <spatel@rotateright.com> 2017-08-21 19:13:14 +0000
commit: 82ec872990f0058c2c5938ecbc649e99a70bef9c (patch)
tree: a350301d100fae6e3e0b1f53a88bdf12e6360d8e /llvm/test/Transforms
parent: d986545df620fb8933835aa0c1d85256d82a23c2 (diff)
download: bcm5719-llvm-82ec872990f0058c2c5938ecbc649e99a70bef9c.tar.gz
bcm5719-llvm-82ec872990f0058c2c5938ecbc649e99a70bef9c.zip
1 files changed, 39 insertions, 10 deletions
diff --git a/llvm/test/Transforms/InstCombine/memcmp-constant-fold.ll b/llvm/test/Transforms/InstCombine/memcmp-constant-fold.ll
index b19f17c818d..211b3b5ab2c 100644
--- a/llvm/test/Transforms/InstCombine/memcmp-constant-fold.ll
+++ b/llvm/test/Transforms/InstCombine/memcmp-constant-fold.ll
@@ -3,31 +3,45 @@
 
 declare i32 @memcmp(i8*, i8*, i64)
 
-; TODO: The alignment of this constant does not matter. We constant fold the load.
+; The alignment of this constant does not matter. We constant fold the load.
 
 @charbuf = private unnamed_addr constant [4 x i8] [i8 0, i8 0, i8 0, i8 1], align 1
 
 define i1 @memcmp_4bytes_unaligned_constant_i8(i8* align 4 %x) {
-; ALL-LABEL: @memcmp_4bytes_unaligned_constant_i8(
-; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @charbuf, i64 0, i64 0), i64 4)
-; ALL-NEXT:    [[CMPEQ0:%.*]] = icmp eq i32 [[CALL]], 0
-; ALL-NEXT:    ret i1 [[CMPEQ0]]
+; LE-LABEL: @memcmp_4bytes_unaligned_constant_i8(
+; LE-NEXT:    [[TMP1:%.*]] = bitcast i8* %x to i32*
+; LE-NEXT:    [[LHSV:%.*]] = load i32, i32* [[TMP1]], align 4
+; LE-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[LHSV]], 16777216
+; LE-NEXT:    ret i1 [[TMP2]]
+;
+; BE-LABEL: @memcmp_4bytes_unaligned_constant_i8(
+; BE-NEXT:    [[TMP1:%.*]] = bitcast i8* %x to i32*
+; BE-NEXT:    [[LHSV:%.*]] = load i32, i32* [[TMP1]], align 4
+; BE-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[LHSV]], 1
+; BE-NEXT:    ret i1 [[TMP2]]
 ;
   %call = tail call i32 @memcmp(i8* %x, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @charbuf, i64 0, i64 0), i64 4)
   %cmpeq0 = icmp eq i32 %call, 0
   ret i1 %cmpeq0
 }
 
-; TODO: We still don't care about alignment of the constant. We are not limited to constant folding only i8 arrays.
+; We still don't care about alignment of the constant. We are not limited to constant folding only i8 arrays.
 ; It doesn't matter if the constant operand is the first operand to the memcmp.
 
 @intbuf_unaligned = private unnamed_addr constant [4 x i16] [i16 1, i16 2, i16 3, i16 4], align 1
 
 define i1 @memcmp_4bytes_unaligned_constant_i16(i8* align 4 %x) {
-; ALL-LABEL: @memcmp_4bytes_unaligned_constant_i16(
-; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* bitcast ([4 x i16]* @intbuf_unaligned to i8*), i8* %x, i64 4)
-; ALL-NEXT:    [[CMPEQ0:%.*]] = icmp eq i32 [[CALL]], 0
-; ALL-NEXT:    ret i1 [[CMPEQ0]]
+; LE-LABEL: @memcmp_4bytes_unaligned_constant_i16(
+; LE-NEXT:    [[TMP1:%.*]] = bitcast i8* %x to i32*
+; LE-NEXT:    [[RHSV:%.*]] = load i32, i32* [[TMP1]], align 4
+; LE-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[RHSV]], 131073
+; LE-NEXT:    ret i1 [[TMP2]]
+;
+; BE-LABEL: @memcmp_4bytes_unaligned_constant_i16(
+; BE-NEXT:    [[TMP1:%.*]] = bitcast i8* %x to i32*
+; BE-NEXT:    [[RHSV:%.*]] = load i32, i32* [[TMP1]], align 4
+; BE-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[RHSV]], 65538
+; BE-NEXT:    ret i1 [[TMP2]]
 ;
   %call = tail call i32 @memcmp(i8* bitcast (i16* getelementptr inbounds ([4 x i16], [4 x i16]* @intbuf_unaligned, i64 0, i64 0) to i8*), i8* %x, i64 4)
   %cmpeq0 = icmp eq i32 %call, 0
@@ -49,3 +63,18 @@ define i1 @memcmp_3bytes_aligned_constant_i32(i8* align 4 %x) {
   ret i1 %cmpeq0
 }
 
+; A sloppy implementation would infinite loop by recreating the unused instructions.
+
+define i1 @memcmp_4bytes_one_unaligned_i8(i8* align 4 %x, i8* align 1 %y) {
+; ALL-LABEL: @memcmp_4bytes_one_unaligned_i8(
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 4)
+; ALL-NEXT:    [[CMPEQ0:%.*]] = icmp eq i32 [[CALL]], 0
+; ALL-NEXT:    ret i1 [[CMPEQ0]]
+;
+  %bc = bitcast i8* %x to i32*
+  %lhsv = load i32, i32* %bc
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 4)
+  %cmpeq0 = icmp eq i32 %call, 0
+  ret i1 %cmpeq0
+}
+
author	Sanjay Patel <spatel@rotateright.com>	2017-08-21 19:13:14 +0000
committer	Sanjay Patel <spatel@rotateright.com>	2017-08-21 19:13:14 +0000
commit	82ec872990f0058c2c5938ecbc649e99a70bef9c (patch)
tree	a350301d100fae6e3e0b1f53a88bdf12e6360d8e /llvm/test/Transforms
parent	d986545df620fb8933835aa0c1d85256d82a23c2 (diff)
download	bcm5719-llvm-82ec872990f0058c2c5938ecbc649e99a70bef9c.tar.gz bcm5719-llvm-82ec872990f0058c2c5938ecbc649e99a70bef9c.zip