8 files changed, 714 insertions, 0 deletions
diff --git a/clang/test/CodeGen/aarch64-arguments.c b/clang/test/CodeGen/aarch64-arguments.c
new file mode 100644
index 00000000000..2255b8244d1
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-arguments.c
@@ -0,0 +1,194 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -w -o - %s | FileCheck -check-prefix=PCS %s
+
+// Sign extension is performed by the callee on AArch64, which means
+// that we *shouldn't* tag arguments and returns with their extension.
+
+// PCS: define i8 @f0(i16 %a)
+char f0(short a) {
+  return a;
+}
+
+// PCS: define [1 x i64] @f1()
+struct s1 { char f0; };
+struct s1 f1(void) {}
+
+// PCS: define [1 x i64] @f2()
+struct s2 { short f0; };
+struct s2 f2(void) {}
+
+// PCS: define [1 x i64] @f3()
+struct s3 { int f0; };
+struct s3 f3(void) {}
+
+// PCS: define [1 x i64] @f4()
+struct s4 { struct s4_0 { int f0; } f0; };
+struct s4 f4(void) {}
+
+// PCS: define [1 x i64] @f5()
+struct s5 { struct { } f0; int f1; };
+struct s5 f5(void) {}
+
+// PCS: define  [1 x i64] @f6()
+struct s6 { int f0[1]; };
+struct s6 f6(void) {}
+
+// PCS: define void @f7()
+struct s7 { struct { int : 0; } f0; };
+struct s7 f7(void) {}
+
+// PCS: define  void @f8()
+struct s8 { struct { int : 0; } f0[1]; };
+struct s8 f8(void) {}
+
+// PCS: define [1 x i64] @f9()
+struct s9 { long f0; int : 0; };
+struct s9 f9(void) {}
+
+// PCS: define [1 x i64] @f10()
+struct s10 { long f0; int : 0; int : 0; };
+struct s10 f10(void) {}
+
+// PCS: define [1 x i64] @f11()
+struct s11 { int : 0; long f0; };
+struct s11 f11(void) {}
+
+// PCS: define [1 x i64] @f12()
+union u12 { char f0; short f1; int f2; long f3; };
+union u12 f12(void) {}
+
+// PCS: define %struct.s13 @f13()
+struct s13 { float f0; };
+struct s13 f13(void) {}
+
+// PCS: define %union.u14 @f14()
+union u14 { float f0; };
+union u14 f14(void) {}
+
+// PCS: define void @f15()
+void f15(struct s7 a0) {}
+
+// PCS: define void @f16()
+void f16(struct s8 a0) {}
+
+// PCS: define [1 x i64] @f17()
+struct s17 { short f0 : 13; char f1 : 4; };
+struct s17 f17(void) {}
+
+// PCS: define [1 x i64] @f18()
+struct s18 { short f0; char f1 : 4; };
+struct s18 f18(void) {}
+
+// PCS: define [1 x i64] @f19()
+struct s19 { long f0; struct s8 f1; };
+struct s19 f19(void) {}
+
+// PCS: define [1 x i64] @f20()
+struct s20 { struct s8 f1; long f0; };
+struct s20 f20(void) {}
+
+// PCS: define [1 x i64] @f21()
+struct s21 { struct {} f1; long f0 : 4; };
+struct s21 f21(void) {}
+
+// PCS: define { float, float } @f22()
+// PCS: define { double, double } @f23(
+_Complex float      f22(void) {}
+_Complex double     f23(void) {}
+
+// PCS: define [1 x i64] @f24()
+struct s24 { _Complex char f0; };
+struct s24 f24() {}
+
+// PCS: define [1 x i64] @f25()
+struct s25 { _Complex short f0; };
+struct s25 f25() {}
+
+// PCS: define [1 x i64] @f26()
+struct s26 { _Complex int f0; };
+struct s26 f26() {}
+
+// PCS: define [2 x i64] @f27()
+struct s27 { _Complex long f0; };
+struct s27 f27() {}
+
+// PCS: define void @f28(i8 %a, i16 %b, i32 %c, i64 %d, float %e, double %f)
+void f28(char a, short b, int c, long d, float e, double f) {}
+
+// PCS: define void @f29([2 x i64] %a
+struct s29 { int arr[4]; };
+void f29(struct s29 a) {}
+
+// PCS: define void @f30(%struct.s30* %a)
+struct s30 { int arr[4]; char c;};
+void f30(struct s30 a) {}
+
+// PCS: define void @f31([4 x double] %a
+struct s31 { double arr[4]; };
+void f31(struct s31 a) {}
+
+// PCS: define void @f32(%struct.s32* %a)
+struct s32 { float arr[5]; };
+void f32(struct s32 a) {}
+
+// Not the only solution, but it *is* an HFA.
+// PCS: define void @f33([3 x float] %a.coerce0, float %a.coerce1)
+struct s33 { float arr[3]; float a; };
+void f33(struct s33 a) {}
+
+// PCS: define void @f34(%struct.s34* sret noalias
+struct s34 { int a[4]; char b };
+struct s34 f34(void) {}
+
+// PCS: define void @f35()
+struct s35 {};
+void f35(struct s35 a) {}
+
+// Check padding is added:
+// PCS: @f36(i32 %x0, i32 %x1, i32 %x2, i32 %x3, i32 %x4, i32 %x5, i32 %x6, [1 x i64], %struct.s36* byval align 8 %stacked)
+struct s36 { long a, b; };
+void f36(int x0, int x1, int x2, int x3, int x4, int x5, int x6, struct s36 stacked) {}
+
+// But only once:
+// PCS: @f37(i32 %x0, i32 %x1, i32 %x2, i32 %x3, i32 %x4, i32 %x5, i32 %x6, [1 x i64], %struct.s37* byval align 8 %stacked, %struct.s37* byval align 8 %stacked2)
+struct s37 { long a, b; };
+void f37(int x0, int x1, int x2, int x3, int x4, int x5, int x6, struct s37 stacked, struct s37 stacked2) {}
+
+// Check for HFA padding args. Also, they should not end up on the stack in a
+// way which will have holes in when lowered further by LLVM. In particular [3 x
+// float] would be unacceptable.
+
+// PCS: @f38(float %s0, double %d1, float %s2, float %s3, float %s4, float %s5, [2 x float], %struct.s38* byval align 4 %stacked)
+struct s38 { float a, b, c; };
+void f38(float s0, double d1, float s2, float s3, float s4, float s5, struct s38 stacked) {}
+
+// Check both VFP and integer arguments are padded (also that pointers and enums
+// get counted as integer types correctly).
+struct s39_int { long a, b; };
+struct s39_float { float a, b, c, d; };
+enum s39_enum { Val1, Val2 };
+// PCS: @f39(float %s0, i32 %x0, float %s1, i32* %x1, float %s2, i32 %x2, float %s3, float %s4, i32 %x3, [3 x float], %struct.s39_float* byval align 4 %stacked, i32 %x4, i32 %x5, i32 %x6, [1 x i64], %struct.s39_int* byval align 8 %stacked2)
+void f39(float s0, int x0, float s1, int *x1, float s2, enum s39_enum x2, float s3, float s4,
+         int x3, struct s39_float stacked, int x4, int x5, int x6,
+         struct s39_int stacked2) {}
+
+struct s40 { __int128 a; };
+// PCS: @f40(i32 %x0, [1 x i128] %x2_3.coerce, i32 %x4, i32 %x5, i32 %x6, [1 x i64], %struct.s40* byval align 16 %stacked)
+void f40(int x0, struct s40 x2_3, int x4, int x5, int x6, struct s40 stacked) {}
+
+// Checking: __int128 will get properly aligned type, with padding so big struct doesn't use x7.
+struct s41 { int arr[5]; };
+// PCS: @f41(i32 %x0, i32 %x1, i32 %x2, i32 %x3, i32 %x4, i32 %x5, i32 %x6, [1 x i64], i128* byval align 16, %struct.s41* %stacked2)
+int f41(int x0, int x1, int x2, int x3, int x4, int x5, int x6, __int128 stacked, struct s41 stacked2) {}
+
+// Checking: __int128 needing to be aligned in registers will consume correct
+// number. Previously padding was inserted before "stacked" because x6_7 was
+// "allocated" to x5 and x6 by clang.
+// PCS: @f42(i32 %x0, i32 %x1, i32 %x2, i32 %x3, i32 %x4, i128 %x6_7, i128* byval align 16)
+void f42(int x0, int x1, int x2, int x3, int x4, __int128 x6_7, __int128 stacked) {}
+
+// Checking: __fp16 is extended to double when calling variadic functions
+void variadic(int a, ...);
+void f43(__fp16 *in) {
+  variadic(42, *in);
+// CHECK: call void @variadic(i32 42, double
+}
diff --git a/clang/test/CodeGen/aarch64-inline-asm.c b/clang/test/CodeGen/aarch64-inline-asm.c
new file mode 100644
index 00000000000..ca39c6e7ff2
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-inline-asm.c
@@ -0,0 +1,56 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -o - %s | FileCheck %s
+
+// The only part clang really deals with is the lvalue/rvalue
+// distinction on constraints. It's sufficient to emit llvm and make
+// sure that's sane.
+
+long var;
+
+void test_generic_constraints(int var32, long var64) {
+    asm("add %0, %1, %1" : "=r"(var32) : "0"(var32));
+// CHECK: [[R32_ARG:%[a-zA-Z0-9]+]] = load i32*
+// CHECK: call i32 asm "add $0, $1, $1", "=r,0"(i32 [[R32_ARG]])
+
+    asm("add %0, %1, %1" : "=r"(var64) : "0"(var64));
+// CHECK: [[R32_ARG:%[a-zA-Z0-9]+]] = load i64*
+// CHECK: call i64 asm "add $0, $1, $1", "=r,0"(i64 [[R32_ARG]])
+
+    asm("ldr %0, %1" : "=r"(var32) : "m"(var));
+    asm("ldr %0, [%1]" : "=r"(var64) : "r"(&var));
+// CHECK: call i32 asm "ldr $0, $1", "=r,*m"(i64* @var)
+// CHECK: call i64 asm "ldr $0, [$1]", "=r,r"(i64* @var)
+}
+
+float f;
+double d;
+void test_constraint_w() {
+    asm("fadd %s0, %s1, %s1" : "=w"(f) : "w"(f));
+// CHECK: [[FLT_ARG:%[a-zA-Z_0-9]+]] = load float* @f
+// CHECK: call float asm "fadd ${0:s}, ${1:s}, ${1:s}", "=w,w"(float [[FLT_ARG]])
+
+    asm("fadd %d0, %d1, %d1" : "=w"(d) : "w"(d));
+// CHECK: [[DBL_ARG:%[a-zA-Z_0-9]+]] = load double* @d
+// CHECK: call double asm "fadd ${0:d}, ${1:d}, ${1:d}", "=w,w"(double [[DBL_ARG]])
+}
+
+void test_constraints_immed(void) {
+    asm("add x0, x0, %0" : : "I"(4095) : "x0");
+    asm("and w0, w0, %0" : : "K"(0xaaaaaaaa) : "w0");
+    asm("and x0, x0, %0" : : "L"(0xaaaaaaaaaaaaaaaa) : "x0");
+// CHECK: call void asm sideeffect "add x0, x0, $0", "I,~{x0}"(i32 4095)
+// CHECK: call void asm sideeffect "and w0, w0, $0", "K,~{w0}"(i32 -1431655766)
+// CHECK: call void asm sideeffect "and x0, x0, $0", "L,~{x0}"(i64 -6148914691236517206)
+}
+
+void test_constraint_S(void) {
+    int *addr;
+    asm("adrp %0, %A1\n\t"
+        "add %0, %0, %L1" : "=r"(addr) : "S"(&var));
+// CHECK: call i32* asm "adrp $0, ${1:A}\0A\09add $0, $0, ${1:L}", "=r,S"(i64* @var)
+}
+
+void test_constraint_Q(void) {
+    int val;
+    asm("ldxr %0, %1" : "=r"(val) : "Q"(var));
+// CHECK: call i32 asm "ldxr $0, $1", "=r,*Q"(i64* @var)
+}
diff --git a/clang/test/CodeGen/aarch64-type-sizes.c b/clang/test/CodeGen/aarch64-type-sizes.c
new file mode 100644
index 00000000000..3b9c9fc4264
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-type-sizes.c
@@ -0,0 +1,90 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -w -o - %s | FileCheck %s
+
+// char by definition has size 1
+
+int check_short() {
+  return sizeof(short);
+// CHECK: ret i32 2
+}
+
+int check_int() {
+  return sizeof(int);
+// CHECK: ret i32 4
+}
+
+int check_long() {
+// Both 4 and 8 are permitted under the PCS, Linux says 8!
+  return sizeof(long);
+// CHECK: ret i32 8
+}
+
+int check_longlong() {
+  return sizeof(long long);
+// CHECK: ret i32 8
+}
+
+int check_int128() {
+  return sizeof(__int128);
+// CHECK: ret i32 16
+}
+
+int check_fp16() {
+  return sizeof(__fp16);
+// CHECK: ret i32 2
+}
+
+int check_float() {
+  return sizeof(float);
+// CHECK: ret i32 4
+}
+
+int check_double() {
+  return sizeof(double);
+// CHECK: ret i32 8
+}
+
+int check_longdouble() {
+  return sizeof(long double);
+// CHECK: ret i32 16
+}
+
+int check_floatComplex() {
+  return sizeof(float _Complex);
+// CHECK: ret i32 8
+}
+
+int check_doubleComplex() {
+  return sizeof(double _Complex);
+// CHECK: ret i32 16
+}
+
+int check_longdoubleComplex() {
+  return sizeof(long double _Complex);
+// CHECK: ret i32 32
+}
+
+int check_bool() {
+  return sizeof(_Bool);
+// CHECK: ret i32 1
+}
+
+int check_wchar() {
+// PCS allows either unsigned short or unsigned int. Linux again says "bigger!"
+  return sizeof(__WCHAR_TYPE__);
+// CHECK: ret i32 4
+}
+
+int check_wchar_unsigned() {
+  return (__WCHAR_TYPE__)-1 > (__WCHAR_TYPE__)0;
+// CHECK: ret i32 1
+}
+
+enum Small {
+  Item
+};
+
+int foo() {
+  return sizeof(enum Small);
+// CHECK: ret i32 4
+}
+
diff --git a/clang/test/CodeGen/aarch64-varargs.c b/clang/test/CodeGen/aarch64-varargs.c
new file mode 100644
index 00000000000..324a0708271
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-varargs.c
@@ -0,0 +1,238 @@
+// RUN: %clang_cc1 -triple aarch64 -emit-llvm -o - %s | FileCheck %s
+#include <stdarg.h>
+
+// Obviously there's more than one way to implement va_arg. This test should at
+// least prevent unintentional regressions caused by refactoring.
+
+va_list the_list;
+
+int simple_int(void) {
+// CHECK: define i32 @simple_int
+  return va_arg(the_list, int);
+// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0
+// CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]]
+
+// CHECK: [[VAARG_MAYBE_REG]]
+// CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 8
+// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0
+// CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]]
+
+// CHECK: [[VAARG_IN_REG]]
+// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 1)
+// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr i8* [[REG_TOP]], i32 [[GR_OFFS]]
+// CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to i32*
+// CHECK: br label %[[VAARG_END:[a-z._0-9]+]]
+
+// CHECK: [[VAARG_ON_STACK]]
+// CHECK: [[STACK:%[a-z_0-9]+]] = load i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr i8* [[STACK]], i32 8
+// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to i32*
+// CHECK: br label %[[VAARG_END]]
+
+// CHECK: [[VAARG_END]]
+// CHECK: [[ADDR:%[a-z._0-9]+]] = phi i32* [ [[FROMREG_ADDR]], %[[VAARG_IN_REG]] ], [ [[FROMSTACK_ADDR]], %[[VAARG_ON_STACK]] ]
+// CHECK: [[RESULT:%[a-z_0-9]+]] = load i32* [[ADDR]]
+// CHECK: ret i32 [[RESULT]]
+}
+
+__int128 aligned_int(void) {
+// CHECK: define i128 @aligned_int
+  return va_arg(the_list, __int128);
+// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0
+// CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]]
+
+// CHECK: [[VAARG_MAYBE_REG]]
+// CHECK: [[ALIGN_REGOFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 15
+// CHECK: [[ALIGNED_REGOFFS:%[a-z_0-9]+]] = and i32 [[ALIGN_REGOFFS]], -16
+// CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[ALIGNED_REGOFFS]], 16
+// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0
+// CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]]
+
+// CHECK: [[VAARG_IN_REG]]
+// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 1)
+// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr i8* [[REG_TOP]], i32 [[ALIGNED_REGOFFS]]
+// CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to i128*
+// CHECK: br label %[[VAARG_END:[a-z._0-9]+]]
+
+// CHECK: [[VAARG_ON_STACK]]
+// CHECK: [[STACK:%[a-z_0-9]+]] = load i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[STACKINT:%[a-z_0-9]+]] = ptrtoint i8* [[STACK]] to i64
+// CHECK: [[ALIGN_STACK:%[a-z_0-9]+]] = add i64 [[STACKINT]], 15
+// CHECK: [[ALIGNED_STACK_INT:%[a-z_0-9]+]] = and i64 [[ALIGN_STACK]], -16
+// CHECK: [[ALIGNED_STACK_PTR:%[a-z_0-9]+]] = inttoptr i64 [[ALIGNED_STACK_INT]] to i8*
+// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr i8* [[ALIGNED_STACK_PTR]], i32 16
+// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[ALIGNED_STACK_PTR]] to i128*
+// CHECK: br label %[[VAARG_END]]
+
+// CHECK: [[VAARG_END]]
+// CHECK: [[ADDR:%[a-z._0-9]+]] = phi i128* [ [[FROMREG_ADDR]], %[[VAARG_IN_REG]] ], [ [[FROMSTACK_ADDR]], %[[VAARG_ON_STACK]] ]
+// CHECK: [[RESULT:%[a-z_0-9]+]] = load i128* [[ADDR]]
+// CHECK: ret i128 [[RESULT]]
+}
+
+struct bigstruct {
+  int a[10];
+};
+
+struct bigstruct simple_indirect(void) {
+// CHECK: define void @simple_indirect
+  return va_arg(the_list, struct bigstruct);
+// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0
+// CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]]
+
+// CHECK: [[VAARG_MAYBE_REG]]
+// CHECK-NOT: and i32
+// CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 8
+// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0
+// CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]]
+
+// CHECK: [[VAARG_IN_REG]]
+// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 1)
+// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr i8* [[REG_TOP]], i32 [[GR_OFFS]]
+// CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to %struct.bigstruct**
+// CHECK: br label %[[VAARG_END:[a-z._0-9]+]]
+
+// CHECK: [[VAARG_ON_STACK]]
+// CHECK: [[STACK:%[a-z_0-9]+]] = load i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK-NOT: and i64
+// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr i8* [[STACK]], i32 8
+// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to %struct.bigstruct**
+// CHECK: br label %[[VAARG_END]]
+
+// CHECK: [[VAARG_END]]
+// CHECK: [[ADDR:%[a-z._0-9]+]] = phi %struct.bigstruct** [ [[FROMREG_ADDR]], %[[VAARG_IN_REG]] ], [ [[FROMSTACK_ADDR]], %[[VAARG_ON_STACK]] ]
+// CHECK: load %struct.bigstruct** [[ADDR]]
+}
+
+struct aligned_bigstruct {
+  float a;
+  long double b;
+};
+
+struct aligned_bigstruct simple_aligned_indirect(void) {
+// CHECK: define void @simple_aligned_indirect
+  return va_arg(the_list, struct aligned_bigstruct);
+// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0
+// CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]]
+
+// CHECK: [[VAARG_MAYBE_REG]]
+// CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 8
+// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0
+// CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]]
+
+// CHECK: [[VAARG_IN_REG]]
+// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 1)
+// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr i8* [[REG_TOP]], i32 [[GR_OFFS]]
+// CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to %struct.aligned_bigstruct**
+// CHECK: br label %[[VAARG_END:[a-z._0-9]+]]
+
+// CHECK: [[VAARG_ON_STACK]]
+// CHECK: [[STACK:%[a-z_0-9]+]] = load i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr i8* [[STACK]], i32 8
+// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to %struct.aligned_bigstruct**
+// CHECK: br label %[[VAARG_END]]
+
+// CHECK: [[VAARG_END]]
+// CHECK: [[ADDR:%[a-z._0-9]+]] = phi %struct.aligned_bigstruct** [ [[FROMREG_ADDR]], %[[VAARG_IN_REG]] ], [ [[FROMSTACK_ADDR]], %[[VAARG_ON_STACK]] ]
+// CHECK: load %struct.aligned_bigstruct** [[ADDR]]
+}
+
+double simple_double(void) {
+// CHECK: define double @simple_double
+  return va_arg(the_list, double);
+// CHECK: [[VR_OFFS:%[a-z_0-9]+]] = load i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 4)
+// CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[VR_OFFS]], 0
+// CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK]], label %[[VAARG_MAYBE_REG]]
+
+// CHECK: [[VAARG_MAYBE_REG]]
+// CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[VR_OFFS]], 16
+// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 4)
+// CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0
+// CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]]
+
+// CHECK: [[VAARG_IN_REG]]
+// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 2)
+// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr i8* [[REG_TOP]], i32 [[VR_OFFS]]
+// CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to double*
+// CHECK: br label %[[VAARG_END]]
+
+// CHECK: [[VAARG_ON_STACK]]
+// CHECK: [[STACK:%[a-z_0-9]+]] = load i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr i8* [[STACK]], i32 8
+// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to double*
+// CHECK: br label %[[VAARG_END]]
+
+// CHECK: [[VAARG_END]]
+// CHECK: [[ADDR:%[a-z._0-9]+]] = phi double* [ [[FROMREG_ADDR]], %[[VAARG_IN_REG]] ], [ [[FROMSTACK_ADDR]], %[[VAARG_ON_STACK]] ]
+// CHECK: [[RESULT:%[a-z_0-9]+]] = load double* [[ADDR]]
+// CHECK: ret double [[RESULT]]
+}
+
+struct hfa {
+  float a, b;
+};
+
+struct hfa simple_hfa(void) {
+// CHECK: define %struct.hfa @simple_hfa
+  return va_arg(the_list, struct hfa);
+// CHECK: [[VR_OFFS:%[a-z_0-9]+]] = load i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 4)
+// CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[VR_OFFS]], 0
+// CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]]
+
+// CHECK: [[VAARG_MAYBE_REG]]
+// CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[VR_OFFS]], 32
+// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 4)
+// CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0
+// CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]]
+
+// CHECK: [[VAARG_IN_REG]]
+// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 2)
+// CHECK: [[FIRST_REG:%[a-z_0-9]+]] = getelementptr i8* [[REG_TOP]], i32 [[VR_OFFS]]
+// CHECK: [[EL_ADDR:%[a-z_0-9]+]] = getelementptr i8* [[FIRST_REG]], i32 0
+// CHECK: [[EL_TYPED:%[a-z_0-9]+]] = bitcast i8* [[EL_ADDR]] to float*
+// CHECK: [[EL_TMPADDR:%[a-z_0-9]+]] = getelementptr inbounds [2 x float]* %[[TMP_HFA:[a-z_.0-9]+]], i32 0, i32 0
+// CHECK: [[EL:%[a-z_0-9]+]] = load float* [[EL_TYPED]]
+// CHECK: store float [[EL]], float* [[EL_TMPADDR]]
+// CHECK: [[EL_ADDR:%[a-z_0-9]+]] = getelementptr i8* [[FIRST_REG]], i32 16
+// CHECK: [[EL_TYPED:%[a-z_0-9]+]] = bitcast i8* [[EL_ADDR]] to float*
+// CHECK: [[EL_TMPADDR:%[a-z_0-9]+]] = getelementptr inbounds [2 x float]* %[[TMP_HFA]], i32 0, i32 1
+// CHECK: [[EL:%[a-z_0-9]+]] = load float* [[EL_TYPED]]
+// CHECK: store float [[EL]], float* [[EL_TMPADDR]]
+// CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast [2 x float]* %[[TMP_HFA]] to %struct.hfa*
+// CHECK: br label %[[VAARG_END:[a-z_.0-9]+]]
+
+// CHECK: [[VAARG_ON_STACK]]
+// CHECK: [[STACK:%[a-z_0-9]+]] = load i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr i8* [[STACK]], i32 8
+// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to %struct.hfa*
+// CHECK: br label %[[VAARG_END]]
+
+// CHECK: [[VAARG_END]]
+// CHECK: [[ADDR:%[a-z._0-9]+]] = phi %struct.hfa* [ [[FROMREG_ADDR]], %[[VAARG_IN_REG]] ], [ [[FROMSTACK_ADDR]], %[[VAARG_ON_STACK]] ]
+}
+
+void check_start(int n, ...) {
+// CHECK: define void @check_start(i32 %n, ...)
+
+  va_list the_list;
+  va_start(the_list, n);
+// CHECK: [[THE_LIST:%[a-z_0-9]+]] = alloca %struct.__va_list
+// CHECK: [[VOIDP_THE_LIST:%[a-z_0-9]+]] = bitcast %struct.__va_list* [[THE_LIST]] to i8*
+// CHECK: call void @llvm.va_start(i8* [[VOIDP_THE_LIST]])
+}
+
+
diff --git a/clang/test/CodeGenCXX/aarch64-arguments.cpp b/clang/test/CodeGenCXX/aarch64-arguments.cpp
new file mode 100644
index 00000000000..f56ad0bbdcf
--- /dev/null
+++ b/clang/test/CodeGenCXX/aarch64-arguments.cpp
@@ -0,0 +1,5 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux -emit-llvm -w -o - %s | FileCheck -check-prefix=PCS %s
+
+// PCS: define void @{{.*}}(i8 %a
+struct s0 {};
+void f0(s0 a) {}
diff --git a/clang/test/CodeGenCXX/aarch64-cxxabi.cpp b/clang/test/CodeGenCXX/aarch64-cxxabi.cpp
new file mode 100644
index 00000000000..04d9493ae6b
--- /dev/null
+++ b/clang/test/CodeGenCXX/aarch64-cxxabi.cpp
@@ -0,0 +1,96 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -w -o - %s | FileCheck %s
+
+// Check differences between the generic Itanium ABI, the AArch32 version and
+// the AArch64 version.
+
+////////////////////////////////////////////////////////////////////////////////
+
+// The ABI says that the key function is the "textually first, non-inline,
+// non-pure, virtual member function". The generic version decides this after
+// the completion of the class definition; the AArch32 version decides this at
+// the end of the translation unit.
+
+// We construct a class which needs a VTable here under generic ABI, but not
+// AArch32.
+
+// (see next section for explanation of guard)
+// CHECK: @_ZGVZ15guard_variablesiE4mine = internal global i64 0
+
+// CHECK: @_ZTV16CheckKeyFunction =
+struct CheckKeyFunction {
+  virtual void foo();
+};
+
+// This is not inline when CheckKeyFunction is completed, so
+// CheckKeyFunction::foo is the key function. VTables should be emitted.
+inline void CheckKeyFunction::foo() {
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+// Guard variables only specify and use the low bit to determine status, rather
+// than the low byte as in the generic Itanium ABI. However, unlike 32-bit ARM,
+// they *are* 64-bits wide so check that in case confusion has occurred.
+
+class Guarded {
+public:
+  Guarded(int i);
+  ~Guarded();
+};
+
+void guard_variables(int a) {
+  static Guarded mine(a);
+// CHECK: [[GUARDBIT:%[0-9]+]] = and i64 {{%[0-9]+}}, 1
+// CHECK: icmp eq i64 [[GUARDBIT]], 0
+
+  // As guards are 64-bit, these helpers should take 64-bit pointers.
+// CHECK: call i32 @__cxa_guard_acquire(i64*
+// CHECK: call void @__cxa_guard_release(i64*
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+// Member function pointers use the adj field to distinguish between virtual and
+// nonvirtual members. As a result the adjustment is shifted (if ptr was used, a
+// mask would be expected instead).
+
+class C {
+  int a();
+  virtual int b();
+};
+
+
+int member_pointer(C &c, int (C::*func)()) {
+// CHECK: ashr i64 %[[MEMPTRADJ:[0-9a-z.]+]], 1
+// CHECK: %[[ISVIRTUAL:[0-9]+]] = and i64 %[[MEMPTRADJ]], 1
+// CHECK: icmp ne i64 %[[ISVIRTUAL]], 0
+  return (c.*func)();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+// AArch64 PCS says that va_list type is based on "struct __va_list ..." in the
+// std namespace, which means it should mangle as "St9__va_list".
+
+// CHECK: @_Z7va_funcSt9__va_list
+void va_func(__builtin_va_list l) {
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+// AArch64 constructors (like generic Itanium, but unlike AArch32) do not return
+// "this".
+
+void test_constructor() {
+  Guarded g(42);
+// CHECK: call void @_ZN7GuardedC1Ei
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+// In principle the AArch32 ABI allows this to be accomplished via a call to
+// __aeabi_atexit instead of __cxa_atexit. Clang doesn't make use of this at the
+// moment, but it's definitely not allowed for AArch64.
+
+// CHECK: call i32 @__cxa_atexit
+Guarded g(42);
diff --git a/clang/test/Driver/aarch64-features.c b/clang/test/Driver/aarch64-features.c
new file mode 100644
index 00000000000..2acb7157f6c
--- /dev/null
+++ b/clang/test/Driver/aarch64-features.c
@@ -0,0 +1,5 @@
+// RUN: %clang -target aarch64-none-linux-gnu -### %s -fsyntax-only 2>&1 | FileCheck %s
+
+// The AArch64 PCS states that chars should be unsigned.
+// CHECK: fno-signed-char
+
diff --git a/clang/test/Preprocessor/aarch64-target-features.c b/clang/test/Preprocessor/aarch64-target-features.c
new file mode 100644
index 00000000000..65104e33117
--- /dev/null
+++ b/clang/test/Preprocessor/aarch64-target-features.c
@@ -0,0 +1,30 @@
+// RUN: %clang -target aarch64-none-linux-gnu -x c -E -dM %s -o - | FileCheck %s
+// CHECK: __AARCH 8
+// CHECK: __AARCH64EL__
+// CHECK: __AARCH_ACLE 101
+// CHECK-NOT: __AARCH_ADVSIMD_FP
+// CHECK-NOT: __AARCH_FEATURE_ADVSIMD
+// CHECK-NOT: __AARCH_FEATURE_BIG_ENDIAN
+// CHECK: __AARCH_FEATURE_CLZ 1
+// CHECK: __AARCH_FEATURE_FMA 1
+// CHECK: __AARCH_FEATURE_LDREX 0xf
+// CHECK: __AARCH_FEATURE_UNALIGNED 1
+// CHECK: __AARCH_FP 0xe
+// CHECK-NOT: __AARCH_FP_FAST
+// CHECK: __AARCH_FP16_FORMAT_IEEE 1
+// CHECK: __AARCH_FP_FENV_ROUNDING 1
+// CHECK: __AARCH_PROFILE 'A'
+// CHECK: __AARCH_SIZEOF_MINIMAL_ENUM 4
+// CHECK: __AARCH_SIZEOF_WCHAR_T 4
+// CHECK: __aarch64__
+
+
+// RUN: %clang -target aarch64-none-linux-gnu -ffast-math -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-FASTMATH %s
+// CHECK-FASTMATH: __AARCH_FP_FAST
+
+// RUN: %clang -target aarch64-none-linux-gnu -fshort-wchar -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-SHORTWCHAR %s
+// CHECK-SHORTWCHAR: __AARCH_SIZEOF_WCHAR_T 2
+
+// RUN: %clang -target aarch64-none-linux-gnu -fshort-enums -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-SHORTENUMS %s
+// CHECK-SHORTENUMS: __AARCH_SIZEOF_MINIMAL_ENUM 1
+