diff options
author | Zaara Syeda <syzaara@ca.ibm.com> | 2017-05-31 17:12:38 +0000 |
---|---|---|
committer | Zaara Syeda <syzaara@ca.ibm.com> | 2017-05-31 17:12:38 +0000 |
commit | 3a7578c6589b910f9a04bae7f7f121dfe3281578 (patch) | |
tree | d9717d369740af26067db3f584bf73a07d6167bd /llvm/test | |
parent | 6ad77845e29dd0b85a9d21dce8f06bf569999a5d (diff) | |
download | bcm5719-llvm-3a7578c6589b910f9a04bae7f7f121dfe3281578.tar.gz bcm5719-llvm-3a7578c6589b910f9a04bae7f7f121dfe3281578.zip |
[PPC] Inline expansion of memcmp
This patch does an inline expansion of memcmp.
It changes the memcmp library call into an inline expansion when the size is
known at compile time and is under a target specified threshold.
This expansion is implemented in CodeGenPrepare and expands into straight line
code. The target specifies a maximum load size and the expansion works by using
this size to load the two sources, compare, and exit early if a difference is
found. It also has a special case when the memcmp result is used in a compare
to zero equality.
Differential Revision: https://reviews.llvm.org/D28637
llvm-svn: 304313
Diffstat (limited to 'llvm/test')
-rw-r--r-- | llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll | 121 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/memcmp.ll | 87 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/memcmpIR.ll | 194 |
3 files changed, 402 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll b/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll new file mode 100644 index 00000000000..3095429758f --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll @@ -0,0 +1,121 @@ +; RUN: llc -verify-machineinstrs -mcpu=pwr8 < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +@zeroEqualityTest01.buffer1 = private unnamed_addr constant [3 x i32] [i32 1, i32 2, i32 4], align 4 +@zeroEqualityTest01.buffer2 = private unnamed_addr constant [3 x i32] [i32 1, i32 2, i32 3], align 4 +@zeroEqualityTest02.buffer1 = private unnamed_addr constant [4 x i32] [i32 4, i32 0, i32 0, i32 0], align 4 +@zeroEqualityTest02.buffer2 = private unnamed_addr constant [4 x i32] [i32 3, i32 0, i32 0, i32 0], align 4 +@zeroEqualityTest03.buffer1 = private unnamed_addr constant [4 x i32] [i32 0, i32 0, i32 0, i32 3], align 4 +@zeroEqualityTest03.buffer2 = private unnamed_addr constant [4 x i32] [i32 0, i32 0, i32 0, i32 4], align 4 +@zeroEqualityTest04.buffer1 = private unnamed_addr constant [15 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14], align 4 +@zeroEqualityTest04.buffer2 = private unnamed_addr constant [15 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 13], align 4 + +; Function Attrs: nounwind readonly +declare signext i32 @memcmp(i8* nocapture, i8* nocapture, i64) local_unnamed_addr #1 + +; Validate with if(memcmp()) +; Function Attrs: nounwind readonly +define signext i32 @zeroEqualityTest01() local_unnamed_addr #0 { +entry: + %call = tail call signext i32 @memcmp(i8* bitcast ([3 x i32]* @zeroEqualityTest01.buffer1 to i8*), i8* bitcast ([3 x i32]* @zeroEqualityTest01.buffer2 to i8*), i64 16) + %not.tobool = icmp ne i32 %call, 0 + %. = zext i1 %not.tobool to i32 + ret i32 %. + + ; CHECK-LABEL: @zeroEqualityTest01 + ; CHECK-LABEL: %res_block + ; CHECK: li 3, 1 + ; CHECK-NEXT: clrldi + ; CHECK-NEXT: blr + ; CHECK: li 3, 0 + ; CHECK-NEXT: clrldi + ; CHECK-NEXT: blr +} + +; Validate with if(memcmp() == 0) +; Function Attrs: nounwind readonly +define signext i32 @zeroEqualityTest02() local_unnamed_addr #0 { +entry: + %call = tail call signext i32 @memcmp(i8* bitcast ([4 x i32]* @zeroEqualityTest02.buffer1 to i8*), i8* bitcast ([4 x i32]* @zeroEqualityTest02.buffer2 to i8*), i64 16) + %not.cmp = icmp ne i32 %call, 0 + %. = zext i1 %not.cmp to i32 + ret i32 %. + + ; CHECK-LABEL: @zeroEqualityTest02 + ; CHECK-LABEL: %res_block + ; CHECK: li 3, 1 + ; CHECK-NEXT: clrldi + ; CHECK-NEXT: blr + ; CHECK: li 3, 0 + ; CHECK-NEXT: clrldi + ; CHECK-NEXT: blr +} + +; Validate with > 0 +; Function Attrs: nounwind readonly +define signext i32 @zeroEqualityTest03() local_unnamed_addr #0 { +entry: + %call = tail call signext i32 @memcmp(i8* bitcast ([4 x i32]* @zeroEqualityTest02.buffer1 to i8*), i8* bitcast ([4 x i32]* @zeroEqualityTest02.buffer2 to i8*), i64 16) + %not.cmp = icmp slt i32 %call, 1 + %. = zext i1 %not.cmp to i32 + ret i32 %. + + ; CHECK-LABEL: @zeroEqualityTest03 + ; CHECK-LABEL: %res_block + ; CHECK: cmpld + ; CHECK-NEXT: li [[LI:[0-9]+]], 1 + ; CHECK-NEXT: li [[LI2:[0-9]+]], -1 + ; CHECK-NEXT: isel [[ISEL:[0-9]+]], [[LI2]], [[LI]], 0 +} + +; Validate with < 0 +; Function Attrs: nounwind readonly +define signext i32 @zeroEqualityTest04() local_unnamed_addr #0 { +entry: + %call = tail call signext i32 @memcmp(i8* bitcast ([4 x i32]* @zeroEqualityTest03.buffer1 to i8*), i8* bitcast ([4 x i32]* @zeroEqualityTest03.buffer2 to i8*), i64 16) + %call.lobit = lshr i32 %call, 31 + %call.lobit.not = xor i32 %call.lobit, 1 + ret i32 %call.lobit.not + + ; CHECK-LABEL: @zeroEqualityTest04 + ; CHECK-LABEL: %res_block + ; CHECK: cmpld + ; CHECK-NEXT: li [[LI:[0-9]+]], 1 + ; CHECK-NEXT: li [[LI2:[0-9]+]], -1 + ; CHECK-NEXT: isel [[ISEL:[0-9]+]], [[LI2]], [[LI]], 0 +} + +; Validate with memcmp()?: +; Function Attrs: nounwind readonly +define signext i32 @zeroEqualityTest05() local_unnamed_addr #0 { +entry: + %call = tail call signext i32 @memcmp(i8* bitcast ([15 x i32]* @zeroEqualityTest04.buffer1 to i8*), i8* bitcast ([15 x i32]* @zeroEqualityTest04.buffer2 to i8*), i64 16) + %not.tobool = icmp eq i32 %call, 0 + %cond = zext i1 %not.tobool to i32 + ret i32 %cond + + ; CHECK-LABEL: @zeroEqualityTest05 + ; CHECK-LABEL: %res_block + ; CHECK: li 3, 1 + ; CHECK: li 3, 0 +} + +; Validate with !memcmp()?: +; Function Attrs: nounwind readonly +define signext i32 @zeroEqualityTest06() local_unnamed_addr #0 { +entry: + %call = tail call signext i32 @memcmp(i8* bitcast ([15 x i32]* @zeroEqualityTest04.buffer1 to i8*), i8* bitcast ([15 x i32]* @zeroEqualityTest04.buffer2 to i8*), i64 16) + %not.lnot = icmp ne i32 %call, 0 + %cond = zext i1 %not.lnot to i32 + ret i32 %cond + + ; CHECK-LABEL: @zeroEqualityTest06 + ; CHECK-LABEL: %res_block + ; CHECK: li 3, 1 + ; CHECK-NEXT: clrldi + ; CHECK-NEXT: blr + ; CHECK: li 3, 0 + ; CHECK-NEXT: clrldi + ; CHECK-NEXT: blr +} diff --git a/llvm/test/CodeGen/PowerPC/memcmp.ll b/llvm/test/CodeGen/PowerPC/memcmp.ll new file mode 100644 index 00000000000..bae713cb207 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/memcmp.ll @@ -0,0 +1,87 @@ +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-gnu-linux < %s | FileCheck %s -check-prefix=CHECK + +; Check size 8 +; Function Attrs: nounwind readonly +define signext i32 @test1(i32* nocapture readonly %buffer1, i32* nocapture readonly %buffer2) local_unnamed_addr #0 { +entry: + %0 = bitcast i32* %buffer1 to i8* + %1 = bitcast i32* %buffer2 to i8* + %call = tail call signext i32 @memcmp(i8* %0, i8* %1, i64 8) #2 + ret i32 %call + +; CHECK-LABEL: @test1 +; CHECK: ldbrx [[LOAD1:[0-9]+]] +; CHECK-NEXT: ldbrx [[LOAD2:[0-9]+]] +; CHECK-NEXT: li [[LI:[0-9]+]], 1 +; CHECK-NEXT: cmpld [[CMPLD:[0-9]+]], [[LOAD1]], [[LOAD2]] +; CHECK-NEXT: subf. [[SUB:[0-9]+]], [[LOAD2]], [[LOAD1]] +; CHECK-NEXT: li [[LI2:[0-9]+]], -1 +; CHECK-NEXT: isel [[ISEL:[0-9]+]], [[LI2]], [[LI]], 4 +; CHECK-NEXT: isel [[ISEL2:[0-9]+]], 0, [[ISEL]], 2 +; CHECK-NEXT: extsw 3, [[ISEL2]] +; CHECK-NEXT: blr +} + +; Check size 4 +; Function Attrs: nounwind readonly +define signext i32 @test2(i32* nocapture readonly %buffer1, i32* nocapture readonly %buffer2) local_unnamed_addr #0 { +entry: + %0 = bitcast i32* %buffer1 to i8* + %1 = bitcast i32* %buffer2 to i8* + %call = tail call signext i32 @memcmp(i8* %0, i8* %1, i64 4) #2 + ret i32 %call + +; CHECK-LABEL: @test2 +; CHECK: lwbrx [[LOAD1:[0-9]+]] +; CHECK-NEXT: lwbrx [[LOAD2:[0-9]+]] +; CHECK-NEXT: li [[LI:[0-9]+]], 1 +; CHECK-NEXT: cmpld [[CMPLD:[0-9]+]], [[LOAD1]], [[LOAD2]] +; CHECK-NEXT: subf. [[SUB:[0-9]+]], [[LOAD2]], [[LOAD1]] +; CHECK-NEXT: li [[LI2:[0-9]+]], -1 +; CHECK-NEXT: isel [[ISEL:[0-9]+]], [[LI2]], [[LI]], 4 +; CHECK-NEXT: isel [[ISEL2:[0-9]+]], 0, [[ISEL]], 2 +; CHECK-NEXT: extsw 3, [[ISEL2]] +; CHECK-NEXT: blr +} + +; Check size 2 +; Function Attrs: nounwind readonly +define signext i32 @test3(i32* nocapture readonly %buffer1, i32* nocapture readonly %buffer2) local_unnamed_addr #0 { +entry: + %0 = bitcast i32* %buffer1 to i8* + %1 = bitcast i32* %buffer2 to i8* + %call = tail call signext i32 @memcmp(i8* %0, i8* %1, i64 2) #2 + ret i32 %call + +; CHECK-LABEL: @test3 +; CHECK: lhbrx [[LOAD1:[0-9]+]] +; CHECK-NEXT: lhbrx [[LOAD2:[0-9]+]] +; CHECK-NEXT: li [[LI:[0-9]+]], 1 +; CHECK-NEXT: cmpld [[CMPLD:[0-9]+]], [[LOAD1]], [[LOAD2]] +; CHECK-NEXT: subf. [[SUB:[0-9]+]], [[LOAD2]], [[LOAD1]] +; CHECK-NEXT: li [[LI2:[0-9]+]], -1 +; CHECK-NEXT: isel [[ISEL:[0-9]+]], [[LI2]], [[LI]], 4 +; CHECK-NEXT: isel [[ISEL2:[0-9]+]], 0, [[ISEL]], 2 +; CHECK-NEXT: extsw 3, [[ISEL2]] +; CHECK-NEXT: blr +} + +; Check size 1 +; Function Attrs: nounwind readonly +define signext i32 @test4(i32* nocapture readonly %buffer1, i32* nocapture readonly %buffer2) local_unnamed_addr #0 { +entry: + %0 = bitcast i32* %buffer1 to i8* + %1 = bitcast i32* %buffer2 to i8* + %call = tail call signext i32 @memcmp(i8* %0, i8* %1, i64 1) #2 + ret i32 %call + +; CHECK-LABEL: @test4 +; CHECK: lbz [[LOAD1:[0-9]+]] +; CHECK-NEXT: lbz [[LOAD2:[0-9]+]] +; CHECK-NEXT: subf [[SUB:[0-9]+]], [[LOAD2]], [[LOAD1]] +; CHECK-NEXT: extsw 3, [[SUB]] +; CHECK-NEXT: blr +} + +; Function Attrs: nounwind readonly +declare signext i32 @memcmp(i8*, i8*, i64) #1 diff --git a/llvm/test/CodeGen/PowerPC/memcmpIR.ll b/llvm/test/CodeGen/PowerPC/memcmpIR.ll new file mode 100644 index 00000000000..f052cc258df --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/memcmpIR.ll @@ -0,0 +1,194 @@ +; RUN: llc -o - -mtriple=powerpc64le-unknown-gnu-linux -stop-after codegenprepare %s | FileCheck %s +; RUN: llc -o - -mtriple=powerpc64-unknown-gnu-linux -stop-after codegenprepare %s | FileCheck %s --check-prefix=CHECK-BE + +define signext i32 @test1(i32* nocapture readonly %buffer1, i32* nocapture readonly %buffer2) { +entry: + ; CHECK: [[LOAD1:%[0-9]+]] = load i64, i64* + ; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64* + ; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD1]]) + ; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD2]]) + ; CHECK-NEXT: [[SUB:%[0-9]+]] = sub i64 [[BSWAP1]], [[BSWAP2]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp ne i64 [[SUB]], 0 + ; CHECK-NEXT: br i1 [[ICMP]], label %res_block, label + + ; CHECK-LABEL: res_block:{{.*}} + ; CHECK: [[ICMP2:%[0-9]+]] = icmp ult i64 + ; CHECK-NEXT: [[SELECT:%[0-9]+]] = select i1 [[ICMP2]], i32 -1, i32 1 + ; CHECK-NEXT: br label %endblock + + ; CHECK: [[GEP1:%[0-9]+]] = getelementptr i64, i64* {{.*}}, i64 1 + ; CHECK-NEXT: [[GEP2:%[0-9]+]] = getelementptr i64, i64* {{.*}}, i64 1 + ; CHECK-NEXT: [[LOAD1:%[0-9]+]] = load i64, i64* [[GEP1]] + ; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64* [[GEP2]] + ; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD1]]) + ; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD2]]) + ; CHECK-NEXT: [[SUB:%[0-9]+]] = sub i64 [[BSWAP1]], [[BSWAP2]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp ne i64 [[SUB]], 0 + ; CHECK-NEXT: br i1 [[ICMP]], label %res_block, label %endblock + + + ; CHECK-BE: [[LOAD1:%[0-9]+]] = load i64, i64* + ; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64* + ; CHECK-BE-NEXT: [[SUB:%[0-9]+]] = sub i64 [[LOAD1]], [[LOAD2]] + ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp ne i64 [[SUB]], 0 + ; CHECK-BE-NEXT: br i1 [[ICMP]], label %res_block, label + + ; CHECK-BE-LABEL: res_block:{{.*}} + ; CHECK-BE: [[ICMP2:%[0-9]+]] = icmp ult i64 + ; CHECK-BE-NEXT: [[SELECT:%[0-9]+]] = select i1 [[ICMP2]], i32 -1, i32 1 + ; CHECK-BE-NEXT: br label %endblock + + ; CHECK-BE: [[GEP1:%[0-9]+]] = getelementptr i64, i64* {{.*}}, i64 1 + ; CHECK-BE-NEXT: [[GEP2:%[0-9]+]] = getelementptr i64, i64* {{.*}}, i64 1 + ; CHECK-BE-NEXT: [[LOAD1:%[0-9]+]] = load i64, i64* [[GEP1]] + ; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64* [[GEP2]] + ; CHECK-BE-NEXT: [[SUB:%[0-9]+]] = sub i64 [[LOAD1]], [[LOAD2]] + ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp ne i64 [[SUB]], 0 + ; CHECK-BE-NEXT: br i1 [[ICMP]], label %res_block, label %endblock + + %0 = bitcast i32* %buffer1 to i8* + %1 = bitcast i32* %buffer2 to i8* + %call = tail call signext i32 @memcmp(i8* %0, i8* %1, i64 16) + ret i32 %call +} + +declare signext i32 @memcmp(i8* nocapture, i8* nocapture, i64) local_unnamed_addr #1 + +define signext i32 @test2(i32* nocapture readonly %buffer1, i32* nocapture readonly %buffer2) { + ; CHECK: [[LOAD1:%[0-9]+]] = load i32, i32* + ; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i32, i32* + ; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i32 @llvm.bswap.i32(i32 [[LOAD1]]) + ; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i32 @llvm.bswap.i32(i32 [[LOAD2]]) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]] = zext i32 [[BSWAP1]] to i64 + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]] = zext i32 [[BSWAP2]] to i64 + ; CHECK-NEXT: [[SUB:%[0-9]+]] = sub i64 [[ZEXT1]], [[ZEXT2]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp ne i64 [[SUB]], 0 + ; CHECK-NEXT: br i1 [[ICMP]], label %res_block, label %endblock + + ; CHECK-LABEL: res_block:{{.*}} + ; CHECK: [[ICMP2:%[0-9]+]] = icmp ult i64 + ; CHECK-NEXT: [[SELECT:%[0-9]+]] = select i1 [[ICMP2]], i32 -1, i32 1 + ; CHECK-NEXT: br label %endblock + + ; CHECK-BE: [[LOAD1:%[0-9]+]] = load i32, i32* + ; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i32, i32* + ; CHECK-BE-NEXT: [[ZEXT1:%[0-9]+]] = zext i32 [[LOAD1]] to i64 + ; CHECK-BE-NEXT: [[ZEXT2:%[0-9]+]] = zext i32 [[LOAD2]] to i64 + ; CHECK-BE-NEXT: [[SUB:%[0-9]+]] = sub i64 [[ZEXT1]], [[ZEXT2]] + ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp ne i64 [[SUB]], 0 + ; CHECK-BE-NEXT: br i1 [[ICMP]], label %res_block, label %endblock + + ; CHECK-BE-LABEL: res_block:{{.*}} + ; CHECK-BE: [[ICMP2:%[0-9]+]] = icmp ult i64 + ; CHECK-BE-NEXT: [[SELECT:%[0-9]+]] = select i1 [[ICMP2]], i32 -1, i32 1 + ; CHECK-BE-NEXT: br label %endblock + +entry: + %0 = bitcast i32* %buffer1 to i8* + %1 = bitcast i32* %buffer2 to i8* + %call = tail call signext i32 @memcmp(i8* %0, i8* %1, i64 4) + ret i32 %call +} + +define signext i32 @test3(i32* nocapture readonly %buffer1, i32* nocapture readonly %buffer2) { + ; CHECK: [[LOAD1:%[0-9]+]] = load i64, i64* + ; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64* + ; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD1]]) + ; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD2]]) + ; CHECK-NEXT: [[SUB:%[0-9]+]] = sub i64 [[BSWAP1]], [[BSWAP2]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp ne i64 [[SUB]], 0 + ; CHECK-NEXT: br i1 [[ICMP]], label %res_block, label + + ; CHECK-LABEL: res_block:{{.*}} + ; CHECK: [[ICMP2:%[0-9]+]] = icmp ult i64 + ; CHECK-NEXT: [[SELECT:%[0-9]+]] = select i1 [[ICMP2]], i32 -1, i32 1 + ; CHECK-NEXT: br label %endblock + + ; CHECK: [[LOAD1:%[0-9]+]] = load i32, i32* + ; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i32, i32* + ; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i32 @llvm.bswap.i32(i32 [[LOAD1]]) + ; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i32 @llvm.bswap.i32(i32 [[LOAD2]]) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]] = zext i32 [[BSWAP1]] to i64 + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]] = zext i32 [[BSWAP2]] to i64 + ; CHECK-NEXT: [[SUB:%[0-9]+]] = sub i64 [[ZEXT1]], [[ZEXT2]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp ne i64 [[SUB]], 0 + ; CHECK-NEXT: br i1 [[ICMP]], label %res_block, label + + ; CHECK: [[LOAD1:%[0-9]+]] = load i16, i16* + ; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i16, i16* + ; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i16 @llvm.bswap.i16(i16 [[LOAD1]]) + ; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i16 @llvm.bswap.i16(i16 [[LOAD2]]) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]] = zext i16 [[BSWAP1]] to i64 + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]] = zext i16 [[BSWAP2]] to i64 + ; CHECK-NEXT: [[SUB:%[0-9]+]] = sub i64 [[ZEXT1]], [[ZEXT2]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp ne i64 [[SUB]], 0 + ; CHECK-NEXT: br i1 [[ICMP]], label %res_block, label + + ; CHECK: [[LOAD1:%[0-9]+]] = load i8, i8* + ; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i8, i8* + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]] = zext i8 [[LOAD1]] to i32 + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]] = zext i8 [[LOAD2]] to i32 + ; CHECK-NEXT: [[SUB:%[0-9]+]] = sub i32 [[ZEXT1]], [[ZEXT2]] + ; CHECK-NEXT: br label %endblock + + ; CHECK-BE: [[LOAD1:%[0-9]+]] = load i64, i64* + ; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64* + ; CHECK-BE-NEXT: [[SUB:%[0-9]+]] = sub i64 [[LOAD1]], [[LOAD2]] + ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp ne i64 [[SUB]], 0 + ; CHECK-BE-NEXT: br i1 [[ICMP]], label %res_block, label + + ; CHECK-BE-LABEL: res_block:{{.*}} + ; CHECK-BE: [[ICMP2:%[0-9]+]] = icmp ult i64 + ; CHECK-BE-NEXT: [[SELECT:%[0-9]+]] = select i1 [[ICMP2]], i32 -1, i32 1 + ; CHECK-BE-NEXT: br label %endblock + + ; CHECK-BE: [[LOAD1:%[0-9]+]] = load i32, i32* + ; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i32, i32* + ; CHECK-BE-NEXT: [[ZEXT1:%[0-9]+]] = zext i32 [[LOAD1]] to i64 + ; CHECK-BE-NEXT: [[ZEXT2:%[0-9]+]] = zext i32 [[LOAD2]] to i64 + ; CHECK-BE-NEXT: [[SUB:%[0-9]+]] = sub i64 [[ZEXT1]], [[ZEXT2]] + ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp ne i64 [[SUB]], 0 + ; CHECK-BE-NEXT: br i1 [[ICMP]], label %res_block, label + + ; CHECK-BE: [[LOAD1:%[0-9]+]] = load i16, i16* + ; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i16, i16* + ; CHECK-BE-NEXT: [[ZEXT1:%[0-9]+]] = zext i16 [[LOAD1]] to i64 + ; CHECK-BE-NEXT: [[ZEXT2:%[0-9]+]] = zext i16 [[LOAD2]] to i64 + ; CHECK-BE-NEXT: [[SUB:%[0-9]+]] = sub i64 [[ZEXT1]], [[ZEXT2]] + ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp ne i64 [[SUB]], 0 + ; CHECK-BE-NEXT: br i1 [[ICMP]], label %res_block, label + + ; CHECK-BE: [[LOAD1:%[0-9]+]] = load i8, i8* + ; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i8, i8* + ; CHECK-BE-NEXT: [[ZEXT1:%[0-9]+]] = zext i8 [[LOAD1]] to i32 + ; CHECK-BE-NEXT: [[ZEXT2:%[0-9]+]] = zext i8 [[LOAD2]] to i32 + ; CHECK-BE-NEXT: [[SUB:%[0-9]+]] = sub i32 [[ZEXT1]], [[ZEXT2]] + ; CHECK-BE-NEXT: br label %endblock + +entry: + %0 = bitcast i32* %buffer1 to i8* + %1 = bitcast i32* %buffer2 to i8* + %call = tail call signext i32 @memcmp(i8* %0, i8* %1, i64 15) + ret i32 %call +} + ; CHECK: call = tail call signext i32 @memcmp + ; CHECK-BE: call = tail call signext i32 @memcmp +define signext i32 @test4(i32* nocapture readonly %buffer1, i32* nocapture readonly %buffer2) { + +entry: + %0 = bitcast i32* %buffer1 to i8* + %1 = bitcast i32* %buffer2 to i8* + %call = tail call signext i32 @memcmp(i8* %0, i8* %1, i64 65) + ret i32 %call +} + +define signext i32 @test5(i32* nocapture readonly %buffer1, i32* nocapture readonly %buffer2, i32 signext %SIZE) { + ; CHECK: call = tail call signext i32 @memcmp + ; CHECK-BE: call = tail call signext i32 @memcmp +entry: + %0 = bitcast i32* %buffer1 to i8* + %1 = bitcast i32* %buffer2 to i8* + %conv = sext i32 %SIZE to i64 + %call = tail call signext i32 @memcmp(i8* %0, i8* %1, i64 %conv) + ret i32 %call +} |