summaryrefslogtreecommitdiffstats
path: root/llvm/test/Transforms/CodeGenPrepare
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2017-06-27 21:46:34 +0000
committerSanjay Patel <spatel@rotateright.com>2017-06-27 21:46:34 +0000
commit70b36f193df2c8e745b19c36dcef9a517ee42eb9 (patch)
tree5f6bc83be1c263f21d38a55135c004e8af24d916 /llvm/test/Transforms/CodeGenPrepare
parent849fcca09004751b0fc9a2a93fd9eb985a4c6876 (diff)
downloadbcm5719-llvm-70b36f193df2c8e745b19c36dcef9a517ee42eb9.tar.gz
bcm5719-llvm-70b36f193df2c8e745b19c36dcef9a517ee42eb9.zip
[CGP] eliminate a sub instruction in memcmp expansion
As noted in D34071, there are some IR optimization opportunities that could be handled by normal IR passes if this expansion wasn't happening so late in CGP. Regardless of that, it seems wasteful to knowingly produce suboptimal IR here, so I'm proposing this change: %s = sub i32 %x, %y %r = icmp ne %s, 0 => %r = icmp ne %x, %y Changing the predicate to 'eq' mimics what InstCombine would do, so that's just an efficiency improvement if we decide this expansion should happen sooner. The fact that the PowerPC backend doesn't eliminate the 'subf.' might be something for PPC folks to investigate separately. Differential Revision: https://reviews.llvm.org/D34416 llvm-svn: 306471
Diffstat (limited to 'llvm/test/Transforms/CodeGenPrepare')
-rw-r--r--llvm/test/Transforms/CodeGenPrepare/X86/memcmp.ll55
1 files changed, 25 insertions, 30 deletions
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/memcmp.ll b/llvm/test/Transforms/CodeGenPrepare/X86/memcmp.ll
index 690e714af26..b0335ee3450 100644
--- a/llvm/test/Transforms/CodeGenPrepare/X86/memcmp.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/memcmp.ll
@@ -14,15 +14,14 @@ define i32 @cmp2(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X32-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
; X32-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
; X32-NEXT: [[TMP7:%.*]] = zext i16 [[TMP5]] to i32
-; X32-NEXT: [[TMP8:%.*]] = sub i32 [[TMP6]], [[TMP7]]
-; X32-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
-; X32-NEXT: br i1 [[TMP9]], label %res_block, label %endblock
+; X32-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP6]], [[TMP7]]
+; X32-NEXT: br i1 [[TMP8]], label %endblock, label %res_block
; X32: res_block:
-; X32-NEXT: [[TMP10:%.*]] = icmp ult i32 [[TMP6]], [[TMP7]]
-; X32-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 -1, i32 1
+; X32-NEXT: [[TMP9:%.*]] = icmp ult i32 [[TMP6]], [[TMP7]]
+; X32-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 -1, i32 1
; X32-NEXT: br label %endblock
; X32: endblock:
-; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, %loadbb ], [ [[TMP11]], %res_block ]
+; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, %loadbb ], [ [[TMP10]], %res_block ]
; X32-NEXT: ret i32 [[PHI_RES]]
;
; X64-LABEL: @cmp2(
@@ -35,15 +34,14 @@ define i32 @cmp2(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
; X64-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i64
; X64-NEXT: [[TMP7:%.*]] = zext i16 [[TMP5]] to i64
-; X64-NEXT: [[TMP8:%.*]] = sub i64 [[TMP6]], [[TMP7]]
-; X64-NEXT: [[TMP9:%.*]] = icmp ne i64 [[TMP8]], 0
-; X64-NEXT: br i1 [[TMP9]], label %res_block, label %endblock
+; X64-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP6]], [[TMP7]]
+; X64-NEXT: br i1 [[TMP8]], label %endblock, label %res_block
; X64: res_block:
-; X64-NEXT: [[TMP10:%.*]] = icmp ult i64 [[TMP6]], [[TMP7]]
-; X64-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 -1, i32 1
+; X64-NEXT: [[TMP9:%.*]] = icmp ult i64 [[TMP6]], [[TMP7]]
+; X64-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 -1, i32 1
; X64-NEXT: br label %endblock
; X64: endblock:
-; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, %loadbb ], [ [[TMP11]], %res_block ]
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, %loadbb ], [ [[TMP10]], %res_block ]
; X64-NEXT: ret i32 [[PHI_RES]]
;
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 2)
@@ -68,15 +66,14 @@ define i32 @cmp4(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
; X32-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
; X32-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
-; X32-NEXT: [[TMP6:%.*]] = sub i32 [[TMP4]], [[TMP5]]
-; X32-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0
-; X32-NEXT: br i1 [[TMP7]], label %res_block, label %endblock
+; X32-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP4]], [[TMP5]]
+; X32-NEXT: br i1 [[TMP6]], label %endblock, label %res_block
; X32: res_block:
-; X32-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP4]], [[TMP5]]
-; X32-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 -1, i32 1
+; X32-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP4]], [[TMP5]]
+; X32-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1
; X32-NEXT: br label %endblock
; X32: endblock:
-; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, %loadbb ], [ [[TMP9]], %res_block ]
+; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, %loadbb ], [ [[TMP8]], %res_block ]
; X32-NEXT: ret i32 [[PHI_RES]]
;
; X64-LABEL: @cmp4(
@@ -89,15 +86,14 @@ define i32 @cmp4(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
; X64-NEXT: [[TMP6:%.*]] = zext i32 [[TMP4]] to i64
; X64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP5]] to i64
-; X64-NEXT: [[TMP8:%.*]] = sub i64 [[TMP6]], [[TMP7]]
-; X64-NEXT: [[TMP9:%.*]] = icmp ne i64 [[TMP8]], 0
-; X64-NEXT: br i1 [[TMP9]], label %res_block, label %endblock
+; X64-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP6]], [[TMP7]]
+; X64-NEXT: br i1 [[TMP8]], label %endblock, label %res_block
; X64: res_block:
-; X64-NEXT: [[TMP10:%.*]] = icmp ult i64 [[TMP6]], [[TMP7]]
-; X64-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 -1, i32 1
+; X64-NEXT: [[TMP9:%.*]] = icmp ult i64 [[TMP6]], [[TMP7]]
+; X64-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 -1, i32 1
; X64-NEXT: br label %endblock
; X64: endblock:
-; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, %loadbb ], [ [[TMP11]], %res_block ]
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, %loadbb ], [ [[TMP10]], %res_block ]
; X64-NEXT: ret i32 [[PHI_RES]]
;
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 4)
@@ -144,15 +140,14 @@ define i32 @cmp8(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
; X64-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
; X64-NEXT: [[TMP5:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
-; X64-NEXT: [[TMP6:%.*]] = sub i64 [[TMP4]], [[TMP5]]
-; X64-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 0
-; X64-NEXT: br i1 [[TMP7]], label %res_block, label %endblock
+; X64-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP4]], [[TMP5]]
+; X64-NEXT: br i1 [[TMP6]], label %endblock, label %res_block
; X64: res_block:
-; X64-NEXT: [[TMP8:%.*]] = icmp ult i64 [[TMP4]], [[TMP5]]
-; X64-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 -1, i32 1
+; X64-NEXT: [[TMP7:%.*]] = icmp ult i64 [[TMP4]], [[TMP5]]
+; X64-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1
; X64-NEXT: br label %endblock
; X64: endblock:
-; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, %loadbb ], [ [[TMP9]], %res_block ]
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, %loadbb ], [ [[TMP8]], %res_block ]
; X64-NEXT: ret i32 [[PHI_RES]]
;
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 8)
OpenPOWER on IntegriCloud