diff options
author | Chris Lattner <sabre@nondot.org> | 2010-08-25 23:39:14 +0000 |
---|---|---|
committer | Chris Lattner <sabre@nondot.org> | 2010-08-25 23:39:14 +0000 |
commit | 9f8b4518761b3701b1eb2607fe4ebea14f6ea1f4 (patch) | |
tree | 396a207a017272333d116b34a86339d2dfec2638 /clang | |
parent | bf154593f965e5618f1af08719d8d5b92d23ae73 (diff) | |
download | bcm5719-llvm-9f8b4518761b3701b1eb2607fe4ebea14f6ea1f4.tar.gz bcm5719-llvm-9f8b4518761b3701b1eb2607fe4ebea14f6ea1f4.zip |
Finally pass "two floats in a 64-bit unit" as a <2 x float> instead of
as a double in the x86-64 ABI. This allows us to generate much better
code for certain things, e.g.:
_Complex float f32(_Complex float A, _Complex float B) {
return A+B;
}
Used to compile into (look at the integer silliness!):
_f32: ## @f32
## BB#0: ## %entry
movd %xmm1, %rax
movd %eax, %xmm1
movd %xmm0, %rcx
movd %ecx, %xmm0
addss %xmm1, %xmm0
movd %xmm0, %edx
shrq $32, %rax
movd %eax, %xmm0
shrq $32, %rcx
movd %ecx, %xmm1
addss %xmm0, %xmm1
movd %xmm1, %eax
shlq $32, %rax
addq %rdx, %rax
movd %rax, %xmm0
ret
Now we get:
_f32: ## @f32
movdqa %xmm0, %xmm2
addss %xmm1, %xmm2
pshufd $16, %xmm2, %xmm2
pshufd $1, %xmm1, %xmm1
pshufd $1, %xmm0, %xmm0
addss %xmm1, %xmm0
pshufd $16, %xmm0, %xmm1
movdqa %xmm2, %xmm0
unpcklps %xmm1, %xmm0
ret
and compile stuff like:
extern float _Complex ccoshf( float _Complex ) ;
float _Complex ccosf ( float _Complex z ) {
float _Complex iz;
(__real__ iz) = -(__imag__ z);
(__imag__ iz) = (__real__ z);
return ccoshf(iz);
}
into:
_ccosf: ## @ccosf
## BB#0: ## %entry
pshufd $1, %xmm0, %xmm1
xorps LCPI4_0(%rip), %xmm1
unpcklps %xmm0, %xmm1
movaps %xmm1, %xmm0
jmp _ccoshf ## TAILCALL
instead of:
_ccosf: ## @ccosf
## BB#0: ## %entry
movd %xmm0, %rax
movq %rax, %rcx
shlq $32, %rcx
shrq $32, %rax
xorl $-2147483648, %eax ## imm = 0xFFFFFFFF80000000
addq %rcx, %rax
movd %rax, %xmm0
jmp _ccoshf ## TAILCALL
There is still "stuff to be done" here for the struct case,
but this resolves rdar://6379669 - [x86-64 ABI] Pass and return
_Complex float / double efficiently
llvm-svn: 112111
Diffstat (limited to 'clang')
-rw-r--r-- | clang/lib/CodeGen/TargetInfo.cpp | 8 | ||||
-rw-r--r-- | clang/test/CodeGen/x86_64-arguments.c | 11 |
2 files changed, 10 insertions, 9 deletions
diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index a08e7f4b806..c5b858dbfab 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -1294,12 +1294,8 @@ GetSSETypeAtOffset(const llvm::Type *IRType, unsigned IROffset, // offset+0 and offset+4. Walk the LLVM IR type to find out if this is the // case. if (ContainsFloatAtOffset(IRType, IROffset, getTargetData()) && - ContainsFloatAtOffset(IRType, IROffset+4, getTargetData())) { - // FIXME: <2 x float> doesn't pass as one XMM register yet. Don't enable - // this code until it does. - //return llvm::VectorType::get(llvm::Type::getFloatTy(getVMContext()), 2); - - } + ContainsFloatAtOffset(IRType, IROffset+4, getTargetData())) + return llvm::VectorType::get(llvm::Type::getFloatTy(getVMContext()), 2); return llvm::Type::getDoubleTy(getVMContext()); } diff --git a/clang/test/CodeGen/x86_64-arguments.c b/clang/test/CodeGen/x86_64-arguments.c index 9fd08dd4a8b..039dd27869b 100644 --- a/clang/test/CodeGen/x86_64-arguments.c +++ b/clang/test/CodeGen/x86_64-arguments.c @@ -1,5 +1,4 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o %t %s -// RUN: FileCheck < %t %s +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s| FileCheck %s // CHECK: %0 = type { i64, double } @@ -215,8 +214,14 @@ void f30(struct S0 p_4) { // rdar://8251384 struct f31foo { float a, b, c; }; float f31(struct f31foo X) { - // CHECK: define float @f31(double %X.coerce0, float %X.coerce1) + // CHECK: define float @f31(<2 x float> %X.coerce0, float %X.coerce1) return X.c; } +_Complex float f32(_Complex float A, _Complex float B) { + // rdar://6379669 + // CHECK: define <2 x float> @f32(<2 x float> %A.coerce, <2 x float> %B.coerce) + return A+B; +} + |