summaryrefslogtreecommitdiffstats
path: root/llvm/test/Transforms/CodeGenPrepare/X86/memcmp.ll
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2017-06-20 15:58:30 +0000
committerSanjay Patel <spatel@rotateright.com>2017-06-20 15:58:30 +0000
commit0656629b870ae9933e350c5f4edc733012e7ece0 (patch)
tree82fc84249670cc54145d258342581ecd40b81bec /llvm/test/Transforms/CodeGenPrepare/X86/memcmp.ll
parent4822b5b649f0086aa8339c2def1dbdd303dcb257 (diff)
downloadbcm5719-llvm-0656629b870ae9933e350c5f4edc733012e7ece0.tar.gz
bcm5719-llvm-0656629b870ae9933e350c5f4edc733012e7ece0.zip
[x86] enable CGP memcmp() expansion for 2/4/8 byte sizes
There are a couple of potential improvements as seen in the IR and asm: 1. We're unnecessarily extending to a larger type to compare values. 2. The codegen for (select cond, 1, -1) could avoid a cmov. (or we could change the order of the compares, so we have a select with 0 operand) llvm-svn: 305802
Diffstat (limited to 'llvm/test/Transforms/CodeGenPrepare/X86/memcmp.ll')
-rw-r--r--llvm/test/Transforms/CodeGenPrepare/X86/memcmp.ll150
1 files changed, 132 insertions, 18 deletions
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/memcmp.ll b/llvm/test/Transforms/CodeGenPrepare/X86/memcmp.ll
index 328e8cc2907..337889b34d6 100644
--- a/llvm/test/Transforms/CodeGenPrepare/X86/memcmp.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/memcmp.ll
@@ -6,9 +6,47 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
declare i32 @memcmp(i8* nocapture, i8* nocapture, i64)
define i32 @cmp2(i8* nocapture readonly %x, i8* nocapture readonly %y) {
-; ALL-LABEL: @cmp2(
-; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 2)
-; ALL-NEXT: ret i32 [[CALL]]
+; X32-LABEL: @cmp2(
+; X32-NEXT: loadbb:
+; X32-NEXT: [[TMP0:%.*]] = bitcast i8* %x to i16*
+; X32-NEXT: [[TMP1:%.*]] = bitcast i8* %y to i16*
+; X32-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP0]]
+; X32-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]]
+; X32-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X32-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
+; X32-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X32-NEXT: [[TMP7:%.*]] = zext i16 [[TMP5]] to i32
+; X32-NEXT: [[TMP8:%.*]] = sub i32 [[TMP6]], [[TMP7]]
+; X32-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
+; X32-NEXT: br i1 [[TMP9]], label %res_block, label %endblock
+; X32: res_block:
+; X32-NEXT: [[TMP10:%.*]] = icmp ult i32 [[TMP6]], [[TMP7]]
+; X32-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 -1, i32 1
+; X32-NEXT: br label %endblock
+; X32: endblock:
+; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, %loadbb ], [ [[TMP11]], %res_block ]
+; X32-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-LABEL: @cmp2(
+; X64-NEXT: loadbb:
+; X64-NEXT: [[TMP0:%.*]] = bitcast i8* %x to i16*
+; X64-NEXT: [[TMP1:%.*]] = bitcast i8* %y to i16*
+; X64-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP0]]
+; X64-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]]
+; X64-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
+; X64-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i64
+; X64-NEXT: [[TMP7:%.*]] = zext i16 [[TMP5]] to i64
+; X64-NEXT: [[TMP8:%.*]] = sub i64 [[TMP6]], [[TMP7]]
+; X64-NEXT: [[TMP9:%.*]] = icmp ne i64 [[TMP8]], 0
+; X64-NEXT: br i1 [[TMP9]], label %res_block, label %endblock
+; X64: res_block:
+; X64-NEXT: [[TMP10:%.*]] = icmp ult i64 [[TMP6]], [[TMP7]]
+; X64-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 -1, i32 1
+; X64-NEXT: br label %endblock
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, %loadbb ], [ [[TMP11]], %res_block ]
+; X64-NEXT: ret i32 [[PHI_RES]]
;
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 2)
ret i32 %call
@@ -24,9 +62,45 @@ define i32 @cmp3(i8* nocapture readonly %x, i8* nocapture readonly %y) {
}
define i32 @cmp4(i8* nocapture readonly %x, i8* nocapture readonly %y) {
-; ALL-LABEL: @cmp4(
-; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 4)
-; ALL-NEXT: ret i32 [[CALL]]
+; X32-LABEL: @cmp4(
+; X32-NEXT: loadbb:
+; X32-NEXT: [[TMP0:%.*]] = bitcast i8* %x to i32*
+; X32-NEXT: [[TMP1:%.*]] = bitcast i8* %y to i32*
+; X32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]]
+; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
+; X32-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X32-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X32-NEXT: [[TMP6:%.*]] = sub i32 [[TMP4]], [[TMP5]]
+; X32-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0
+; X32-NEXT: br i1 [[TMP7]], label %res_block, label %endblock
+; X32: res_block:
+; X32-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP4]], [[TMP5]]
+; X32-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 -1, i32 1
+; X32-NEXT: br label %endblock
+; X32: endblock:
+; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, %loadbb ], [ [[TMP9]], %res_block ]
+; X32-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-LABEL: @cmp4(
+; X64-NEXT: loadbb:
+; X64-NEXT: [[TMP0:%.*]] = bitcast i8* %x to i32*
+; X64-NEXT: [[TMP1:%.*]] = bitcast i8* %y to i32*
+; X64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]]
+; X64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
+; X64-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-NEXT: [[TMP6:%.*]] = zext i32 [[TMP4]] to i64
+; X64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP5]] to i64
+; X64-NEXT: [[TMP8:%.*]] = sub i64 [[TMP6]], [[TMP7]]
+; X64-NEXT: [[TMP9:%.*]] = icmp ne i64 [[TMP8]], 0
+; X64-NEXT: br i1 [[TMP9]], label %res_block, label %endblock
+; X64: res_block:
+; X64-NEXT: [[TMP10:%.*]] = icmp ult i64 [[TMP6]], [[TMP7]]
+; X64-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 -1, i32 1
+; X64-NEXT: br label %endblock
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, %loadbb ], [ [[TMP11]], %res_block ]
+; X64-NEXT: ret i32 [[PHI_RES]]
;
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 4)
ret i32 %call
@@ -60,9 +134,28 @@ define i32 @cmp7(i8* nocapture readonly %x, i8* nocapture readonly %y) {
}
define i32 @cmp8(i8* nocapture readonly %x, i8* nocapture readonly %y) {
-; ALL-LABEL: @cmp8(
-; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 8)
-; ALL-NEXT: ret i32 [[CALL]]
+; X32-LABEL: @cmp8(
+; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 8)
+; X32-NEXT: ret i32 [[CALL]]
+;
+; X64-LABEL: @cmp8(
+; X64-NEXT: loadbb:
+; X64-NEXT: [[TMP0:%.*]] = bitcast i8* %x to i64*
+; X64-NEXT: [[TMP1:%.*]] = bitcast i8* %y to i64*
+; X64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP0]]
+; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
+; X64-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; X64-NEXT: [[TMP5:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6:%.*]] = sub i64 [[TMP4]], [[TMP5]]
+; X64-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 0
+; X64-NEXT: br i1 [[TMP7]], label %res_block, label %endblock
+; X64: res_block:
+; X64-NEXT: [[TMP8:%.*]] = icmp ult i64 [[TMP4]], [[TMP5]]
+; X64-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 -1, i32 1
+; X64-NEXT: br label %endblock
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, %loadbb ], [ [[TMP9]], %res_block ]
+; X64-NEXT: ret i32 [[PHI_RES]]
;
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 8)
ret i32 %call
@@ -142,8 +235,13 @@ define i32 @cmp16(i8* nocapture readonly %x, i8* nocapture readonly %y) {
define i32 @cmp_eq2(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; ALL-LABEL: @cmp_eq2(
-; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 2)
-; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* %x to i16*
+; ALL-NEXT: [[TMP2:%.*]] = bitcast i8* %y to i16*
+; ALL-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]]
+; ALL-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]]
+; ALL-NEXT: [[TMP5:%.*]] = icmp ne i16 [[TMP3]], [[TMP4]]
+; ALL-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32
+; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0
; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; ALL-NEXT: ret i32 [[CONV]]
;
@@ -168,8 +266,13 @@ define i32 @cmp_eq3(i8* nocapture readonly %x, i8* nocapture readonly %y) {
define i32 @cmp_eq4(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; ALL-LABEL: @cmp_eq4(
-; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 4)
-; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* %x to i32*
+; ALL-NEXT: [[TMP2:%.*]] = bitcast i8* %y to i32*
+; ALL-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
+; ALL-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
+; ALL-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]
+; ALL-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32
+; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0
; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; ALL-NEXT: ret i32 [[CONV]]
;
@@ -219,11 +322,22 @@ define i32 @cmp_eq7(i8* nocapture readonly %x, i8* nocapture readonly %y) {
}
define i32 @cmp_eq8(i8* nocapture readonly %x, i8* nocapture readonly %y) {
-; ALL-LABEL: @cmp_eq8(
-; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 8)
-; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
-; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
-; ALL-NEXT: ret i32 [[CONV]]
+; X32-LABEL: @cmp_eq8(
+; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 8)
+; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; X32-NEXT: ret i32 [[CONV]]
+;
+; X64-LABEL: @cmp_eq8(
+; X64-NEXT: [[TMP1:%.*]] = bitcast i8* %x to i64*
+; X64-NEXT: [[TMP2:%.*]] = bitcast i8* %y to i64*
+; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
+; X64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
+; X64-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
+; X64-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32
+; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0
+; X64-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; X64-NEXT: ret i32 [[CONV]]
;
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 8)
%cmp = icmp eq i32 %call, 0
OpenPOWER on IntegriCloud