Finally pass "two floats in a 64-bit unit" as a <2 x float> instead of

as a double in the x86-64 ABI. This allows us to generate much better code for certain things, e.g.: _Complex float f32(_Complex float A, _Complex float B) { return A+B; } Used to compile into (look at the integer silliness!): _f32: ## @f32 ## BB#0: ## %entry movd %xmm1, %rax movd %eax, %xmm1 movd %xmm0, %rcx movd %ecx, %xmm0 addss %xmm1, %xmm0 movd %xmm0, %edx shrq $32, %rax movd %eax, %xmm0 shrq $32, %rcx movd %ecx, %xmm1 addss %xmm0, %xmm1 movd %xmm1, %eax shlq $32, %rax addq %rdx, %rax movd %rax, %xmm0 ret Now we get: _f32: ## @f32 movdqa %xmm0, %xmm2 addss %xmm1, %xmm2 pshufd $16, %xmm2, %xmm2 pshufd $1, %xmm1, %xmm1 pshufd $1, %xmm0, %xmm0 addss %xmm1, %xmm0 pshufd $16, %xmm0, %xmm1 movdqa %xmm2, %xmm0 unpcklps %xmm1, %xmm0 ret and compile stuff like: extern float _Complex ccoshf( float _Complex ) ; float _Complex ccosf ( float _Complex z ) { float _Complex iz; (__real__ iz) = -(__imag__ z); (__imag__ iz) = (__real__ z); return ccoshf(iz); } into: _ccosf: ## @ccosf ## BB#0: ## %entry pshufd $1, %xmm0, %xmm1 xorps LCPI4_0(%rip), %xmm1 unpcklps %xmm0, %xmm1 movaps %xmm1, %xmm0 jmp _ccoshf ## TAILCALL instead of: _ccosf: ## @ccosf ## BB#0: ## %entry movd %xmm0, %rax movq %rax, %rcx shlq $32, %rcx shrq $32, %rax xorl $-2147483648, %eax ## imm = 0xFFFFFFFF80000000 addq %rcx, %rax movd %rax, %xmm0 jmp _ccoshf ## TAILCALL There is still "stuff to be done" here for the struct case, but this resolves rdar://6379669 - [x86-64 ABI] Pass and return _Complex float / double efficiently llvm-svn: 112111
author: Chris Lattner <sabre@nondot.org> 2010-08-25 23:39:14 +0000
committer: Chris Lattner <sabre@nondot.org> 2010-08-25 23:39:14 +0000
commit: 9f8b4518761b3701b1eb2607fe4ebea14f6ea1f4 (patch)
tree: 396a207a017272333d116b34a86339d2dfec2638 /clang/test/CodeGen
parent: bf154593f965e5618f1af08719d8d5b92d23ae73 (diff)
download: bcm5719-llvm-9f8b4518761b3701b1eb2607fe4ebea14f6ea1f4.tar.gz
bcm5719-llvm-9f8b4518761b3701b1eb2607fe4ebea14f6ea1f4.zip
1 files changed, 8 insertions, 3 deletions
diff --git a/clang/test/CodeGen/x86_64-arguments.c b/clang/test/CodeGen/x86_64-arguments.c
index 9fd08dd4a8b..039dd27869b 100644
--- a/clang/test/CodeGen/x86_64-arguments.c
+++ b/clang/test/CodeGen/x86_64-arguments.c
@@ -1,5 +1,4 @@
-// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o %t %s
-// RUN: FileCheck < %t %s
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s| FileCheck %s
 
 // CHECK: %0 = type { i64, double }
 
@@ -215,8 +214,14 @@ void f30(struct S0 p_4) {
 // rdar://8251384
 struct f31foo { float a, b, c; };
 float f31(struct f31foo X) {
-  // CHECK: define float @f31(double %X.coerce0, float %X.coerce1)
+  // CHECK: define float @f31(<2 x float> %X.coerce0, float %X.coerce1)
   return X.c;
 }
 
+_Complex float f32(_Complex float A, _Complex float B) {
+  // rdar://6379669
+  // CHECK: define <2 x float> @f32(<2 x float> %A.coerce, <2 x float> %B.coerce)
+  return A+B;
+}
+
author	Chris Lattner <sabre@nondot.org>	2010-08-25 23:39:14 +0000
committer	Chris Lattner <sabre@nondot.org>	2010-08-25 23:39:14 +0000
commit	9f8b4518761b3701b1eb2607fe4ebea14f6ea1f4 (patch)
tree	396a207a017272333d116b34a86339d2dfec2638 /clang/test/CodeGen
parent	bf154593f965e5618f1af08719d8d5b92d23ae73 (diff)
download	bcm5719-llvm-9f8b4518761b3701b1eb2607fe4ebea14f6ea1f4.tar.gz bcm5719-llvm-9f8b4518761b3701b1eb2607fe4ebea14f6ea1f4.zip