From 9534ea03e89d46eece6f892f38fa8da331828249 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Thu, 21 Aug 2008 21:00:15 +0000 Subject: Fix a number of byval / memcpy / memset related codegen issues. 1. x86-64 byval alignment should be max of 8 and alignment of type. Previously the code was not doing what the commit message was saying. 2. Do not use byte repeat move and store operations. These are slow. llvm-svn: 55139 --- llvm/test/CodeGen/X86/2004-02-12-Memcpy.ll | 4 +-- llvm/test/CodeGen/X86/byval3.ll | 4 +-- llvm/test/CodeGen/X86/byval4.ll | 4 +-- llvm/test/CodeGen/X86/byval5.ll | 2 +- llvm/test/CodeGen/X86/memset-2.ll | 45 +++++++++++++++++++++++++++++ llvm/test/CodeGen/X86/memset64-on-x86-32.ll | 12 ++++---- 6 files changed, 57 insertions(+), 14 deletions(-) create mode 100644 llvm/test/CodeGen/X86/memset-2.ll (limited to 'llvm/test') diff --git a/llvm/test/CodeGen/X86/2004-02-12-Memcpy.ll b/llvm/test/CodeGen/X86/2004-02-12-Memcpy.ll index 59364c1f6d6..56bb21caf3c 100644 --- a/llvm/test/CodeGen/X86/2004-02-12-Memcpy.ll +++ b/llvm/test/CodeGen/X86/2004-02-12-Memcpy.ll @@ -1,11 +1,11 @@ -; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-pc-linux-gnu | grep movs | count 3 +; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-pc-linux-gnu | grep movs | count 1 @A = global [32 x i32] zeroinitializer @B = global [32 x i32] zeroinitializer declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) -define void @main() { +define void @main() nounwind { ; dword copy call void @llvm.memcpy.i32(i8* bitcast ([32 x i32]* @A to i8*), i8* bitcast ([32 x i32]* @B to i8*), diff --git a/llvm/test/CodeGen/X86/byval3.ll b/llvm/test/CodeGen/X86/byval3.ll index 074bab4c0a9..707a4c5d278 100644 --- a/llvm/test/CodeGen/X86/byval3.ll +++ b/llvm/test/CodeGen/X86/byval3.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsl | count 2 +; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsq | count 2 ; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsl | count 2 %struct.s = type { i32, i32, i32, i32, i32, i32, i32, i32, @@ -7,7 +7,7 @@ i32, i32, i32, i32, i32, i32, i32, i32, i32 } -define void @g(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6) { +define void @g(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6) nounwind { entry: %d = alloca %struct.s, align 16 %tmp = getelementptr %struct.s* %d, i32 0, i32 0 diff --git a/llvm/test/CodeGen/X86/byval4.ll b/llvm/test/CodeGen/X86/byval4.ll index d2fa9e289e7..5576c361ae1 100644 --- a/llvm/test/CodeGen/X86/byval4.ll +++ b/llvm/test/CodeGen/X86/byval4.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsw | count 2 +; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsq | count 2 ; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsl | count 2 %struct.s = type { i16, i16, i16, i16, i16, i16, i16, i16, @@ -13,7 +13,7 @@ define void @g(i16 signext %a1, i16 signext %a2, i16 signext %a3, - i16 signext %a4, i16 signext %a5, i16 signext %a6) { + i16 signext %a4, i16 signext %a5, i16 signext %a6) nounwind { entry: %a = alloca %struct.s, align 16 %tmp = getelementptr %struct.s* %a, i32 0, i32 0 diff --git a/llvm/test/CodeGen/X86/byval5.ll b/llvm/test/CodeGen/X86/byval5.ll index fd9c197bbfd..c6f4588dd45 100644 --- a/llvm/test/CodeGen/X86/byval5.ll +++ b/llvm/test/CodeGen/X86/byval5.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsb | count 2 +; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsq | count 2 ; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsl | count 2 %struct.s = type { i8, i8, i8, i8, i8, i8, i8, i8, diff --git a/llvm/test/CodeGen/X86/memset-2.ll b/llvm/test/CodeGen/X86/memset-2.ll new file mode 100644 index 00000000000..2ad665cda75 --- /dev/null +++ b/llvm/test/CodeGen/X86/memset-2.ll @@ -0,0 +1,45 @@ +; RUN: llvm-as < %s | llc -march=x86 | not grep rep +; RUN: llvm-as < %s | llc -march=x86 | grep memset + +declare void @llvm.memset.i32(i8*, i8, i32, i32) nounwind + +define fastcc i32 @cli_scanzip(i32 %desc) nounwind { +entry: + br label %bb8.i.i.i.i + +bb8.i.i.i.i: ; preds = %bb8.i.i.i.i, %entry + icmp eq i32 0, 0 ; :0 [#uses=1] + br i1 %0, label %bb61.i.i.i, label %bb8.i.i.i.i + +bb32.i.i.i: ; preds = %bb61.i.i.i + ptrtoint i8* %tail.0.i.i.i to i32 ; :1 [#uses=1] + sub i32 0, %1 ; :2 [#uses=1] + icmp sgt i32 %2, 19 ; :3 [#uses=1] + br i1 %3, label %bb34.i.i.i, label %bb61.i.i.i + +bb34.i.i.i: ; preds = %bb32.i.i.i + load i32* null, align 4 ; :4 [#uses=1] + icmp eq i32 %4, 101010256 ; :5 [#uses=1] + br i1 %5, label %bb8.i11.i.i.i, label %bb61.i.i.i + +bb8.i11.i.i.i: ; preds = %bb8.i11.i.i.i, %bb34.i.i.i + icmp eq i32 0, 0 ; :6 [#uses=1] + br i1 %6, label %cli_dbgmsg.exit49.i, label %bb8.i11.i.i.i + +cli_dbgmsg.exit49.i: ; preds = %bb8.i11.i.i.i + icmp eq [32768 x i8]* null, null ; :7 [#uses=1] + br i1 %7, label %bb1.i28.i, label %bb8.i.i + +bb61.i.i.i: ; preds = %bb61.i.i.i, %bb34.i.i.i, %bb32.i.i.i, %bb8.i.i.i.i + %tail.0.i.i.i = getelementptr [1024 x i8]* null, i32 0, i32 0 ; [#uses=2] + load i8* %tail.0.i.i.i, align 1 ; :8 [#uses=1] + icmp eq i8 %8, 80 ; :9 [#uses=1] + br i1 %9, label %bb32.i.i.i, label %bb61.i.i.i + +bb1.i28.i: ; preds = %cli_dbgmsg.exit49.i + call void @llvm.memset.i32( i8* null, i8 0, i32 88, i32 1 ) nounwind + unreachable + +bb8.i.i: ; preds = %bb8.i.i, %cli_dbgmsg.exit49.i + br label %bb8.i.i +} diff --git a/llvm/test/CodeGen/X86/memset64-on-x86-32.ll b/llvm/test/CodeGen/X86/memset64-on-x86-32.ll index 7045c0faf0b..d76d4d47924 100644 --- a/llvm/test/CodeGen/X86/memset64-on-x86-32.ll +++ b/llvm/test/CodeGen/X86/memset64-on-x86-32.ll @@ -1,12 +1,10 @@ -; RUN: llvm-as < %s | llc -march=x86 | grep stosb +; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin | grep stosl +; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | grep movq | count 10 -target triple = "i386-apple-darwin9" - %struct.S = type { [80 x i8] } - -define %struct.S* @bork() { +define void @bork() nounwind { entry: - call void @llvm.memset.i64( i8* null, i8 0, i64 80, i32 1 ) - ret %struct.S* null + call void @llvm.memset.i64( i8* null, i8 0, i64 80, i32 4 ) + ret void } declare void @llvm.memset.i64(i8*, i8, i64, i32) nounwind -- cgit v1.2.3