summaryrefslogtreecommitdiffstats
path: root/clang/test/CodeGenCXX/alloca-align.cpp
diff options
context:
space:
mode:
authorChris Lattner <sabre@nondot.org>2010-06-28 19:56:59 +0000
committerChris Lattner <sabre@nondot.org>2010-06-28 19:56:59 +0000
commita7d81ab7f3df57e3e26e55037bb00735ba87c145 (patch)
treec7660cab5672f1be3c48697927fd0e91bb8c5ae6 /clang/test/CodeGenCXX/alloca-align.cpp
parentc42461e145fb458377e3c4d8892c1348c7dad4e1 (diff)
downloadbcm5719-llvm-a7d81ab7f3df57e3e26e55037bb00735ba87c145.tar.gz
bcm5719-llvm-a7d81ab7f3df57e3e26e55037bb00735ba87c145.zip
X86-64:
pass/return structs of float/int as float/i32 instead of double/i64 to make the code generated for ABI cleaner. Passing in the low part of a double is the same as passing in a float. For example, we now compile: struct DeclGroup { float NumDecls; }; float foo(DeclGroup D); void bar(DeclGroup *D) { foo(*D); } into: %struct.DeclGroup = type { float } define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) nounwind { entry: %D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2] %agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2] store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr %tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1] %tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1] %tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1] call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false) %coerce.dive = getelementptr %struct.DeclGroup* %agg.tmp, i32 0, i32 0 ; <float*> [#uses=1] %0 = load float* %coerce.dive, align 1 ; <float> [#uses=1] %call = call float @_Z3foo9DeclGroup(float %0) ; <float> [#uses=0] ret void } instead of: %struct.DeclGroup = type { float } define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) nounwind { entry: %D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2] %agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2] %tmp3 = alloca double ; <double*> [#uses=2] store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr %tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1] %tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1] %tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1] call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false) %coerce.dive = getelementptr %struct.DeclGroup* %agg.tmp, i32 0, i32 0 ; <float*> [#uses=1] %0 = bitcast double* %tmp3 to float* ; <float*> [#uses=1] %1 = load float* %coerce.dive ; <float> [#uses=1] store float %1, float* %0, align 1 %2 = load double* %tmp3 ; <double> [#uses=1] %call = call float @_Z3foo9DeclGroup(double %2) ; <float> [#uses=0] ret void } which is this machine code (at -O0): __Z3barP9DeclGroup: subq $24, %rsp movq %rdi, 16(%rsp) movq 16(%rsp), %rdi leaq 8(%rsp), %rax movl (%rdi), %ecx movl %ecx, (%rax) movss 8(%rsp), %xmm0 callq __Z3foo9DeclGroup addq $24, %rsp ret vs this: __Z3barP9DeclGroup: subq $24, %rsp movq %rdi, 16(%rsp) movq 16(%rsp), %rdi leaq 8(%rsp), %rax movl (%rdi), %ecx movl %ecx, (%rax) movss 8(%rsp), %xmm0 movss %xmm0, (%rsp) movsd (%rsp), %xmm0 callq __Z3foo9DeclGroup addq $24, %rsp ret At -O3, it is the difference between this now: __Z3barP9DeclGroup: movss (%rdi), %xmm0 jmp __Z3foo9DeclGroup # TAILCALL vs this before: __Z3barP9DeclGroup: movl (%rdi), %eax movd %rax, %xmm0 jmp __Z3foo9DeclGroup # TAILCALL llvm-svn: 107048
Diffstat (limited to 'clang/test/CodeGenCXX/alloca-align.cpp')
-rw-r--r--clang/test/CodeGenCXX/alloca-align.cpp2
1 files changed, 1 insertions, 1 deletions
diff --git a/clang/test/CodeGenCXX/alloca-align.cpp b/clang/test/CodeGenCXX/alloca-align.cpp
index b70e366f4cf..99d6ab5845f 100644
--- a/clang/test/CodeGenCXX/alloca-align.cpp
+++ b/clang/test/CodeGenCXX/alloca-align.cpp
@@ -18,7 +18,7 @@ extern "C" void f1() {
(void) (struct s0) { 0, 0, 0, 0 };
}
-// CHECK: define i64 @f2
+// CHECK: define i32 @f2
// CHECK: alloca %struct.s1, align 2
struct s1 { short x; short y; };
extern "C" struct s1 f2(int a, struct s1 *x, struct s1 *y) {
OpenPOWER on IntegriCloud