summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Lattner <sabre@nondot.org>2010-06-28 19:56:59 +0000
committerChris Lattner <sabre@nondot.org>2010-06-28 19:56:59 +0000
commita7d81ab7f3df57e3e26e55037bb00735ba87c145 (patch)
treec7660cab5672f1be3c48697927fd0e91bb8c5ae6
parentc42461e145fb458377e3c4d8892c1348c7dad4e1 (diff)
downloadbcm5719-llvm-a7d81ab7f3df57e3e26e55037bb00735ba87c145.tar.gz
bcm5719-llvm-a7d81ab7f3df57e3e26e55037bb00735ba87c145.zip
X86-64:
pass/return structs of float/int as float/i32 instead of double/i64 to make the code generated for ABI cleaner. Passing in the low part of a double is the same as passing in a float. For example, we now compile: struct DeclGroup { float NumDecls; }; float foo(DeclGroup D); void bar(DeclGroup *D) { foo(*D); } into: %struct.DeclGroup = type { float } define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) nounwind { entry: %D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2] %agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2] store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr %tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1] %tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1] %tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1] call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false) %coerce.dive = getelementptr %struct.DeclGroup* %agg.tmp, i32 0, i32 0 ; <float*> [#uses=1] %0 = load float* %coerce.dive, align 1 ; <float> [#uses=1] %call = call float @_Z3foo9DeclGroup(float %0) ; <float> [#uses=0] ret void } instead of: %struct.DeclGroup = type { float } define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) nounwind { entry: %D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2] %agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2] %tmp3 = alloca double ; <double*> [#uses=2] store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr %tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1] %tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1] %tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1] call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false) %coerce.dive = getelementptr %struct.DeclGroup* %agg.tmp, i32 0, i32 0 ; <float*> [#uses=1] %0 = bitcast double* %tmp3 to float* ; <float*> [#uses=1] %1 = load float* %coerce.dive ; <float> [#uses=1] store float %1, float* %0, align 1 %2 = load double* %tmp3 ; <double> [#uses=1] %call = call float @_Z3foo9DeclGroup(double %2) ; <float> [#uses=0] ret void } which is this machine code (at -O0): __Z3barP9DeclGroup: subq $24, %rsp movq %rdi, 16(%rsp) movq 16(%rsp), %rdi leaq 8(%rsp), %rax movl (%rdi), %ecx movl %ecx, (%rax) movss 8(%rsp), %xmm0 callq __Z3foo9DeclGroup addq $24, %rsp ret vs this: __Z3barP9DeclGroup: subq $24, %rsp movq %rdi, 16(%rsp) movq 16(%rsp), %rdi leaq 8(%rsp), %rax movl (%rdi), %ecx movl %ecx, (%rax) movss 8(%rsp), %xmm0 movss %xmm0, (%rsp) movsd (%rsp), %xmm0 callq __Z3foo9DeclGroup addq $24, %rsp ret At -O3, it is the difference between this now: __Z3barP9DeclGroup: movss (%rdi), %xmm0 jmp __Z3foo9DeclGroup # TAILCALL vs this before: __Z3barP9DeclGroup: movl (%rdi), %eax movd %rax, %xmm0 jmp __Z3foo9DeclGroup # TAILCALL llvm-svn: 107048
-rw-r--r--clang/lib/CodeGen/TargetInfo.cpp12
-rw-r--r--clang/test/CodeGen/x86_64-arguments.c7
-rw-r--r--clang/test/CodeGenCXX/alloca-align.cpp2
-rw-r--r--clang/test/CodeGenCXX/virtual-functions-incomplete-types.cpp2
4 files changed, 18 insertions, 5 deletions
diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index e95b3b6e901..2f8dd6c32d6 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -1086,6 +1086,13 @@ ABIArgInfo X86_64ABIInfo::getCoerceResult(QualType Ty,
if (Ty->isIntegralOrEnumerationType() || Ty->hasPointerRepresentation())
return (Ty->isPromotableIntegerType() ?
ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
+
+ // If this is a 32-bit structure that is passed as an int64, then it will be
+ // passed in the low 32-bits of a 64-bit GPR, which is the same as how an
+ // i32 is passed. Coerce to a i32 instead of a i64.
+ if (Context.getTypeSizeInChars(Ty).getQuantity() == 4)
+ CoerceTo = llvm::Type::getInt32Ty(CoerceTo->getContext());
+
} else if (CoerceTo->isDoubleTy()) {
assert(Ty.isCanonical() && "should always have a canonical type here");
assert(!Ty.hasQualifiers() && "should never have a qualified type here");
@@ -1094,6 +1101,11 @@ ABIArgInfo X86_64ABIInfo::getCoerceResult(QualType Ty,
if (Ty == Context.FloatTy || Ty == Context.DoubleTy)
return ABIArgInfo::getDirect();
+ // If this is a 32-bit structure that is passed as a double, then it will be
+ // passed in the low 32-bits of the XMM register, which is the same as how a
+ // float is passed. Coerce to a float instead of a double.
+ if (Context.getTypeSizeInChars(Ty).getQuantity() == 4)
+ CoerceTo = llvm::Type::getFloatTy(CoerceTo->getContext());
}
return ABIArgInfo::getCoerce(CoerceTo);
diff --git a/clang/test/CodeGen/x86_64-arguments.c b/clang/test/CodeGen/x86_64-arguments.c
index d182aa25d91..24223901088 100644
--- a/clang/test/CodeGen/x86_64-arguments.c
+++ b/clang/test/CodeGen/x86_64-arguments.c
@@ -92,9 +92,10 @@ void f16(float a, float b, float c, float d, float e, float f, float g, float h,
void f17(float a, float b, float c, float d, float e, float f, float g, float h,
long double X) {}
-// Check for valid coercion.
-// CHECK: [[f18_t1:%.*]] = trunc i64 {{.*}} to i32
-// CHECK: store i32 [[f18_t1]], i32*
+// Check for valid coercion. The struct should be passed/returned as i32, not
+// as i64 for better code quality.
+// rdar://8135035
+// CHECK: define void @f18(i32 %a, i32)
struct f18_s0 { int f0; };
void f18(int a, struct f18_s0 f18_arg1) { while (1) {} }
diff --git a/clang/test/CodeGenCXX/alloca-align.cpp b/clang/test/CodeGenCXX/alloca-align.cpp
index b70e366f4cf..99d6ab5845f 100644
--- a/clang/test/CodeGenCXX/alloca-align.cpp
+++ b/clang/test/CodeGenCXX/alloca-align.cpp
@@ -18,7 +18,7 @@ extern "C" void f1() {
(void) (struct s0) { 0, 0, 0, 0 };
}
-// CHECK: define i64 @f2
+// CHECK: define i32 @f2
// CHECK: alloca %struct.s1, align 2
struct s1 { short x; short y; };
extern "C" struct s1 f2(int a, struct s1 *x, struct s1 *y) {
diff --git a/clang/test/CodeGenCXX/virtual-functions-incomplete-types.cpp b/clang/test/CodeGenCXX/virtual-functions-incomplete-types.cpp
index 991c2bc7220..052a0192d66 100644
--- a/clang/test/CodeGenCXX/virtual-functions-incomplete-types.cpp
+++ b/clang/test/CodeGenCXX/virtual-functions-incomplete-types.cpp
@@ -9,7 +9,7 @@ struct B {
void B::f() { }
-// CHECK: define i64 @_ZN1D1gEv(%struct.B* %this)
+// CHECK: define i32 @_ZN1D1gEv(%struct.B* %this)
// CHECK: declare void @_ZN1B1gEv()
struct C;
OpenPOWER on IntegriCloud