Implement IRGen for the x86 vectorcall convention

The most complex aspect of the convention is the handling of homogeneous vector and floating point aggregates. Reuse the homogeneous aggregate classification code that we use on PPC64 and ARM for this. This convention also has a C mangling, and we apparently implement that in both Clang and LLVM. Reviewed By: majnemer Differential Revision: http://reviews.llvm.org/D6063 llvm-svn: 221006
author: Reid Kleckner <reid@kleckner.net> 2014-10-31 22:00:51 +0000
committer: Reid Kleckner <reid@kleckner.net> 2014-10-31 22:00:51 +0000
commit: 80944df6f478018ae895afb27ebff1ab881d4936 (patch)
tree: 10f2ac21b9f143eee54ceecd693525454fdf6044 /clang/test/CodeGen
parent: 49be5b357b10e33b66853259404e6197145de4fe (diff)
download: bcm5719-llvm-80944df6f478018ae895afb27ebff1ab881d4936.tar.gz
bcm5719-llvm-80944df6f478018ae895afb27ebff1ab881d4936.zip
3 files changed, 117 insertions, 6 deletions
diff --git a/clang/test/CodeGen/mangle-windows.c b/clang/test/CodeGen/mangle-windows.c
index 37d10182835..4a108751aa4 100644
--- a/clang/test/CodeGen/mangle-windows.c
+++ b/clang/test/CodeGen/mangle-windows.c
@@ -1,33 +1,68 @@
 // RUN: %clang_cc1 -emit-llvm %s -o - -triple=i386-pc-win32 | FileCheck %s
 // RUN: %clang_cc1 -emit-llvm %s -o - -triple=i386-mingw32 | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm %s -o - -triple=x86_64-pc-win32 | FileCheck %s --check-prefix=X64
+// RUN: %clang_cc1 -emit-llvm %s -o - -triple=x86_64-mingw32 | FileCheck %s --check-prefix=X64
 
 void __stdcall f1(void) {}
 // CHECK: define x86_stdcallcc void @"\01_f1@0"
+// X64: define void @f1(
 
 void __fastcall f2(void) {}
 // CHECK: define x86_fastcallcc void @"\01@f2@0"
+// X64: define void @f2(
 
 void __stdcall f3() {}
 // CHECK: define x86_stdcallcc void @"\01_f3@0"
+// X64: define void @f3(
 
 void __fastcall f4(char a) {}
 // CHECK: define x86_fastcallcc void @"\01@f4@4"
+// X64: define void @f4(
 
 void __fastcall f5(short a) {}
 // CHECK: define x86_fastcallcc void @"\01@f5@4"
+// X64: define void @f5(
 
 void __fastcall f6(int a) {}
 // CHECK: define x86_fastcallcc void @"\01@f6@4"
+// X64: define void @f6(
 
 void __fastcall f7(long a) {}
 // CHECK: define x86_fastcallcc void @"\01@f7@4"
+// X64: define void @f7(
 
 void __fastcall f8(long long a) {}
 // CHECK: define x86_fastcallcc void @"\01@f8@8"
+// X64: define void @f8(
 
 void __fastcall f9(long long a, char b, char c, short d) {}
-// CHECK: define x86_fastcallcc void @"\01@f9@20"(i64 %a, i8 signext %b, i8
-// signext %c, i16 signext %d)
+// CHECK: define x86_fastcallcc void @"\01@f9@20"(i64 %a, i8 signext %b, i8 signext %c, i16 signext %d)
+// X64: define void @f9(
 
 void f12(void) {}
 // CHECK: define void @f12(
+// X64: define void @f12(
+
+void __vectorcall v1(void) {}
+// CHECK: define x86_vectorcallcc void @"\01v1@@0"(
+// X64: define x86_vectorcallcc void @"\01v1@@0"(
+
+void __vectorcall v2(char a) {}
+// CHECK: define x86_vectorcallcc void @"\01v2@@4"(
+// X64: define x86_vectorcallcc void @"\01v2@@8"(
+
+void __vectorcall v3(short a) {}
+// CHECK: define x86_vectorcallcc void @"\01v3@@4"(
+// X64: define x86_vectorcallcc void @"\01v3@@8"(
+
+void __vectorcall v4(int a) {}
+// CHECK: define x86_vectorcallcc void @"\01v4@@4"(
+// X64: define x86_vectorcallcc void @"\01v4@@8"(
+
+void __vectorcall v5(long long a) {}
+// CHECK: define x86_vectorcallcc void @"\01v5@@8"(
+// X64: define x86_vectorcallcc void @"\01v5@@8"(
+
+void __vectorcall v6(char a, char b) {}
+// CHECK: define x86_vectorcallcc void @"\01v6@@8"(
+// X64: define x86_vectorcallcc void @"\01v6@@16"(
diff --git a/clang/test/CodeGen/microsoft-call-conv.c b/clang/test/CodeGen/microsoft-call-conv.c
index aabe825516a..c24db1296f1 100644
--- a/clang/test/CodeGen/microsoft-call-conv.c
+++ b/clang/test/CodeGen/microsoft-call-conv.c
@@ -20,9 +20,8 @@ void __thiscall f6(void) {
   f3();
 // CHECK: call x86_thiscallcc void @f3()
 }
-// FIXME: Add this to LLVM.
 void __vectorcall f61(void) {
-// CHECK-LABEL: define void @f61()
+// CHECK-LABEL: define x86_vectorcallcc void @f61()
   f3();
 // CHECK: call x86_thiscallcc void @f3()
 }
@@ -41,7 +40,7 @@ int main(void) {
     // CHECK: call x86_fastcallcc void @f4()
     // CHECK: call x86_stdcallcc void @f5()
     // CHECK: call x86_thiscallcc void @f6()
-    // CHECK: call void @f61()
+    // CHECK: call x86_vectorcallcc void @f61()
     pf1(); pf2(); pf3(); pf4(); pf5(); pf6(); pf7();
     // CHECK: call x86_fastcallcc void %{{.*}}()
     // CHECK: call x86_stdcallcc void %{{.*}}()
@@ -49,7 +48,7 @@ int main(void) {
     // CHECK: call x86_fastcallcc void %{{.*}}()
     // CHECK: call x86_stdcallcc void %{{.*}}()
     // CHECK: call x86_thiscallcc void %{{.*}}()
-    // CHECK: call void %{{.*}}()
+    // CHECK: call x86_vectorcallcc void %{{.*}}()
     return 0;
 }
 
diff --git a/clang/test/CodeGen/vectorcall.c b/clang/test/CodeGen/vectorcall.c
new file mode 100644
index 00000000000..17927c7a3de
--- /dev/null
+++ b/clang/test/CodeGen/vectorcall.c
@@ -0,0 +1,77 @@
+// RUN: %clang_cc1 -emit-llvm %s -o - -triple=i386-pc-win32 | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm %s -o - -triple=x86_64-pc-win32 | FileCheck %s --check-prefix=X64
+
+void __vectorcall v1(int a, int b) {}
+// CHECK: define x86_vectorcallcc void @"\01v1@@8"(i32 inreg %a, i32 inreg %b)
+// X64: define x86_vectorcallcc void @"\01v1@@16"(i32 %a, i32 %b)
+
+void __vectorcall v2(char a, char b) {}
+// CHECK: define x86_vectorcallcc void @"\01v2@@8"(i8 inreg signext %a, i8 inreg signext %b)
+// X64: define x86_vectorcallcc void @"\01v2@@16"(i8 %a, i8 %b)
+
+struct Small { int a; };
+void __vectorcall v3(int a, struct Small b, int c) {}
+// CHECK: define x86_vectorcallcc void @"\01v3@@12"(i32 inreg %a, %struct.Small* byval align 4 %b, i32 inreg %c)
+// X64: define x86_vectorcallcc void @"\01v3@@24"(i32 %a, i32 %b.coerce, i32 %c)
+
+struct Large { int a[5]; };
+void __vectorcall v4(int a, struct Large b, int c) {}
+// CHECK: define x86_vectorcallcc void @"\01v4@@28"(i32 inreg %a, %struct.Large* byval align 4 %b, i32 inreg %c)
+// X64: define x86_vectorcallcc void @"\01v4@@40"(i32 %a, %struct.Large* %b, i32 %c)
+
+struct HFA2 { double x, y; };
+struct HFA4 { double w, x, y, z; };
+struct HFA5 { double v, w, x, y, z; };
+
+void __vectorcall hfa1(int a, struct HFA4 b, int c) {}
+// CHECK: define x86_vectorcallcc void @"\01hfa1@@40"(i32 inreg %a, double %b.0, double %b.1, double %b.2, double %b.3, i32 inreg %c)
+// X64: define x86_vectorcallcc void @"\01hfa1@@48"(i32 %a, double %b.0, double %b.1, double %b.2, double %b.3, i32 %c)
+
+// HFAs that would require more than six total SSE registers are passed
+// indirectly. Additional vector arguments can consume the rest of the SSE
+// registers.
+void __vectorcall hfa2(struct HFA4 a, struct HFA4 b, double c) {}
+// CHECK: define x86_vectorcallcc void @"\01hfa2@@72"(double %a.0, double %a.1, double %a.2, double %a.3, %struct.HFA4* inreg %b, double %c)
+// X64: define x86_vectorcallcc void @"\01hfa2@@72"(double %a.0, double %a.1, double %a.2, double %a.3, %struct.HFA4* align 8 %b, double %c)
+
+// Ensure that we pass builtin types directly while counting them against the
+// SSE register usage.
+void __vectorcall hfa3(double a, double b, double c, double d, double e, struct HFA2 f) {}
+// CHECK: define x86_vectorcallcc void @"\01hfa3@@56"(double %a, double %b, double %c, double %d, double %e, %struct.HFA2* inreg %f)
+// X64: define x86_vectorcallcc void @"\01hfa3@@56"(double %a, double %b, double %c, double %d, double %e, %struct.HFA2* align 8 %f)
+
+// Aggregates with more than four elements are not HFAs and are passed byval.
+// Because they are not classified as homogeneous, they don't get special
+// handling to ensure alignment.
+void __vectorcall hfa4(struct HFA5 a) {}
+// CHECK: define x86_vectorcallcc void @"\01hfa4@@40"(%struct.HFA5* byval align 4)
+// X64: define x86_vectorcallcc void @"\01hfa4@@40"(%struct.HFA5* %a)
+
+// Return HFAs of 4 or fewer elements in registers.
+static struct HFA2 g_hfa2;
+struct HFA2 __vectorcall hfa5(void) { return g_hfa2; }
+// CHECK: define x86_vectorcallcc %struct.HFA2 @"\01hfa5@@0"()
+// X64: define x86_vectorcallcc %struct.HFA2 @"\01hfa5@@0"()
+
+typedef float __attribute__((vector_size(16))) v4f32;
+struct HVA2 { v4f32 x, y; };
+struct HVA4 { v4f32 w, x, y, z; };
+
+void __vectorcall hva1(int a, struct HVA4 b, int c) {}
+// CHECK: define x86_vectorcallcc void @"\01hva1@@72"(i32 inreg %a, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, i32 inreg %c)
+// X64: define x86_vectorcallcc void @"\01hva1@@80"(i32 %a, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, i32 %c)
+
+void __vectorcall hva2(struct HVA4 a, struct HVA4 b, v4f32 c) {}
+// CHECK: define x86_vectorcallcc void @"\01hva2@@144"(<4 x float> %a.0, <4 x float> %a.1, <4 x float> %a.2, <4 x float> %a.3, %struct.HVA4* inreg %b, <4 x float> %c)
+// X64: define x86_vectorcallcc void @"\01hva2@@144"(<4 x float> %a.0, <4 x float> %a.1, <4 x float> %a.2, <4 x float> %a.3, %struct.HVA4* align 16 %b, <4 x float> %c)
+
+void __vectorcall hva3(v4f32 a, v4f32 b, v4f32 c, v4f32 d, v4f32 e, struct HVA2 f) {}
+// CHECK: define x86_vectorcallcc void @"\01hva3@@112"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, %struct.HVA2* inreg %f)
+// X64: define x86_vectorcallcc void @"\01hva3@@112"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, %struct.HVA2* align 16 %f)
+
+typedef float __attribute__((ext_vector_type(3))) v3f32;
+struct OddSizeHVA { v3f32 x, y; };
+
+void __vectorcall odd_size_hva(struct OddSizeHVA a) {}
+// CHECK: define x86_vectorcallcc void @"\01odd_size_hva@@32"(<3 x float> %a.0, <3 x float> %a.1)
+// X64: define x86_vectorcallcc void @"\01odd_size_hva@@32"(<3 x float> %a.0, <3 x float> %a.1)
author	Reid Kleckner <reid@kleckner.net>	2014-10-31 22:00:51 +0000
committer	Reid Kleckner <reid@kleckner.net>	2014-10-31 22:00:51 +0000
commit	80944df6f478018ae895afb27ebff1ab881d4936 (patch)
tree	10f2ac21b9f143eee54ceecd693525454fdf6044 /clang/test/CodeGen
parent	49be5b357b10e33b66853259404e6197145de4fe (diff)
download	bcm5719-llvm-80944df6f478018ae895afb27ebff1ab881d4936.tar.gz bcm5719-llvm-80944df6f478018ae895afb27ebff1ab881d4936.zip