Allow __fp16 as a function arg or return type for AArch64

ACLE 2.0 allows __fp16 to be used as a function argument or return type. This enables this for AArch64. This also fixes an existing bug that causes clang to not allow homogeneous floating-point aggregates with a base type of __fp16. This is valid for AAPCS64, but not for AAPCS-VFP. llvm-svn: 216558
author: Oliver Stannard <oliver.stannard@arm.com> 2014-08-27 16:31:57 +0000
committer: Oliver Stannard <oliver.stannard@arm.com> 2014-08-27 16:31:57 +0000
commit: ed8ecc8429e53dc638763fb8f48c620b6dbae174 (patch)
tree: e141e1da5d0632038e1b6c04a30997cfd87489a6 /clang/lib/CodeGen
parent: 6107a8f4db4fe74a3884c2b49b6ded009063335e (diff)
download: bcm5719-llvm-ed8ecc8429e53dc638763fb8f48c620b6dbae174.tar.gz
bcm5719-llvm-ed8ecc8429e53dc638763fb8f48c620b6dbae174.zip
4 files changed, 43 insertions, 25 deletions
diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp
index b508dcb446f..3fdab85d5f0 100644
--- a/clang/lib/CodeGen/CGExprConstant.cpp
+++ b/clang/lib/CodeGen/CGExprConstant.cpp
@@ -1132,7 +1132,8 @@ llvm::Constant *CodeGenModule::EmitConstantValue(const APValue &Value,
   case APValue::Float: {
     const llvm::APFloat &Init = Value.getFloat();
     if (&Init.getSemantics() == &llvm::APFloat::IEEEhalf &&
-         !Context.getLangOpts().NativeHalfType)
+        !Context.getLangOpts().NativeHalfType &&
+        !Context.getLangOpts().HalfArgsAndReturns)
       return llvm::ConstantInt::get(VMContext, Init.bitcastToAPInt());
     else
       return llvm::ConstantFP::get(VMContext, Init);
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index 9e0fbcfd184..5abe80f1900 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -701,7 +701,8 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType,
   llvm::Type *SrcTy = Src->getType();
 
   // If casting to/from storage-only half FP, use special intrinsics.
-  if (SrcType->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType) {
+  if (SrcType->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType &&
+      !CGF.getContext().getLangOpts().HalfArgsAndReturns) {
     Src = Builder.CreateCall(
         CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_from_fp16,
                              CGF.CGM.FloatTy),
@@ -773,7 +774,8 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType,
                              DstTy);
 
   // Cast to half via float
-  if (DstType->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType)
+  if (DstType->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType &&
+      !CGF.getContext().getLangOpts().HalfArgsAndReturns)
     DstTy = CGF.FloatTy;
 
   if (isa<llvm::IntegerType>(SrcTy)) {
@@ -1691,7 +1693,8 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
     // Add the inc/dec to the real part.
     llvm::Value *amt;
 
-    if (type->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType) {
+    if (type->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType &&
+        !CGF.getContext().getLangOpts().HalfArgsAndReturns) {
       // Another special case: half FP increment should be done via float
       value = Builder.CreateCall(
           CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_from_fp16,
@@ -1714,7 +1717,8 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
     }
     value = Builder.CreateFAdd(value, amt, isInc ? "inc" : "dec");
 
-    if (type->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType)
+    if (type->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType &&
+        !CGF.getContext().getLangOpts().HalfArgsAndReturns)
       value = Builder.CreateCall(
           CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_to_fp16,
                                CGF.CGM.FloatTy),
diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp
index 6b0e4ad8b31..1f39eb88c01 100644
--- a/clang/lib/CodeGen/CodeGenTypes.cpp
+++ b/clang/lib/CodeGen/CodeGenTypes.cpp
@@ -358,9 +358,10 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
 
     case BuiltinType::Half:
       // Half FP can either be storage-only (lowered to i16) or native.
-      ResultType = getTypeForFormat(getLLVMContext(),
-          Context.getFloatTypeSemantics(T),
-          Context.getLangOpts().NativeHalfType);
+      ResultType =
+          getTypeForFormat(getLLVMContext(), Context.getFloatTypeSemantics(T),
+                           Context.getLangOpts().NativeHalfType ||
+                               Context.getLangOpts().HalfArgsAndReturns);
       break;
     case BuiltinType::Float:
     case BuiltinType::Double:
diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index c27e2cf42fe..fb3b931ce95 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -3544,8 +3544,9 @@ public:
 };
 }
 
-static bool isHomogeneousAggregate(QualType Ty, const Type *&Base,
+static bool isARMHomogeneousAggregate(QualType Ty, const Type *&Base,
                                    ASTContext &Context,
+                                   bool isAArch64,
                                    uint64_t *HAMembers = nullptr);
 
 ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty,
@@ -3627,7 +3628,7 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty,
   // Homogeneous Floating-point Aggregates (HFAs) need to be expanded.
   const Type *Base = nullptr;
   uint64_t Members = 0;
-  if (isHomogeneousAggregate(Ty, Base, getContext(), &Members)) {
+  if (isARMHomogeneousAggregate(Ty, Base, getContext(), true, &Members)) {
     IsHA = true;
     if (!IsNamedArg && isDarwinPCS()) {
       // With the Darwin ABI, variadic arguments are always passed on the stack
@@ -3685,7 +3686,7 @@ ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy) const {
     return ABIArgInfo::getIgnore();
 
   const Type *Base = nullptr;
-  if (isHomogeneousAggregate(RetTy, Base, getContext()))
+  if (isARMHomogeneousAggregate(RetTy, Base, getContext(), true))
     // Homogeneous Floating-point Aggregates (HFAs) are returned directly.
     return ABIArgInfo::getDirect();
 
@@ -3822,7 +3823,7 @@ static llvm::Value *EmitAArch64VAArg(llvm::Value *VAListAddr, QualType Ty,
 
   const Type *Base = nullptr;
   uint64_t NumMembers;
-  bool IsHFA = isHomogeneousAggregate(Ty, Base, Ctx, &NumMembers);
+  bool IsHFA = isARMHomogeneousAggregate(Ty, Base, Ctx, true, &NumMembers);
   if (IsHFA && NumMembers > 1) {
     // Homogeneous aggregates passed in registers will have their elements split
     // and stored 16-bytes apart regardless of size (they're notionally in qN,
@@ -3965,7 +3966,7 @@ llvm::Value *AArch64ABIInfo::EmitDarwinVAArg(llvm::Value *VAListAddr, QualType T
   uint64_t Align = CGF.getContext().getTypeAlign(Ty) / 8;
 
   const Type *Base = nullptr;
-  bool isHA = isHomogeneousAggregate(Ty, Base, getContext());
+  bool isHA = isARMHomogeneousAggregate(Ty, Base, getContext(), true);
 
   bool isIndirect = false;
   // Arguments bigger than 16 bytes which aren't homogeneous aggregates should
@@ -4251,15 +4252,16 @@ void ARMABIInfo::setRuntimeCC() {
     RuntimeCC = abiCC;
 }
 
-/// isHomogeneousAggregate - Return true if a type is an AAPCS-VFP homogeneous
+/// isARMHomogeneousAggregate - Return true if a type is an AAPCS-VFP homogeneous
 /// aggregate.  If HAMembers is non-null, the number of base elements
 /// contained in the type is returned through it; this is used for the
 /// recursive calls that check aggregate component types.
-static bool isHomogeneousAggregate(QualType Ty, const Type *&Base,
-                                   ASTContext &Context, uint64_t *HAMembers) {
+static bool isARMHomogeneousAggregate(QualType Ty, const Type *&Base,
+                                   ASTContext &Context, bool isAArch64,
+                                   uint64_t *HAMembers) {
   uint64_t Members = 0;
   if (const ConstantArrayType *AT = Context.getAsConstantArrayType(Ty)) {
-    if (!isHomogeneousAggregate(AT->getElementType(), Base, Context, &Members))
+    if (!isARMHomogeneousAggregate(AT->getElementType(), Base, Context, isAArch64, &Members))
       return false;
     Members *= AT->getSize().getZExtValue();
   } else if (const RecordType *RT = Ty->getAs<RecordType>()) {
@@ -4270,7 +4272,7 @@ static bool isHomogeneousAggregate(QualType Ty, const Type *&Base,
     Members = 0;
     for (const auto *FD : RD->fields()) {
       uint64_t FldMembers;
-      if (!isHomogeneousAggregate(FD->getType(), Base, Context, &FldMembers))
+      if (!isARMHomogeneousAggregate(FD->getType(), Base, Context, isAArch64, &FldMembers))
         return false;
 
       Members = (RD->isUnion() ?
@@ -4284,12 +4286,22 @@ static bool isHomogeneousAggregate(QualType Ty, const Type *&Base,
     }
 
     // Homogeneous aggregates for AAPCS-VFP must have base types of float,
-    // double, or 64-bit or 128-bit vectors.
+    // double, or 64-bit or 128-bit vectors. "long double" has the same machine
+    // type as double, so it is also allowed as a base type.
+    // Homogeneous aggregates for AAPCS64 must have base types of a floating
+    // point type or a short-vector type. This is the same as the 32-bit ABI,
+    // but with the difference that any floating-point type is allowed,
+    // including __fp16.
     if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
-      if (BT->getKind() != BuiltinType::Float && 
-          BT->getKind() != BuiltinType::Double &&
-          BT->getKind() != BuiltinType::LongDouble)
-        return false;
+      if (isAArch64) {
+        if (!BT->isFloatingPoint())
+          return false;
+      } else {
+        if (BT->getKind() != BuiltinType::Float &&
+            BT->getKind() != BuiltinType::Double &&
+            BT->getKind() != BuiltinType::LongDouble)
+          return false;
+      }
     } else if (const VectorType *VT = Ty->getAs<VectorType>()) {
       unsigned VecSize = Context.getTypeSize(VT);
       if (VecSize != 64 && VecSize != 128)
@@ -4491,7 +4503,7 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic,
     // into VFP registers.
     const Type *Base = nullptr;
     uint64_t Members = 0;
-    if (isHomogeneousAggregate(Ty, Base, getContext(), &Members)) {
+    if (isARMHomogeneousAggregate(Ty, Base, getContext(), false, &Members)) {
       assert(Base && "Base class should be set for homogeneous aggregate");
       // Base can be a floating-point or a vector.
       if (Base->isVectorType()) {
@@ -4696,7 +4708,7 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy,
   // Check for homogeneous aggregates with AAPCS-VFP.
   if (getABIKind() == AAPCS_VFP && !isVariadic) {
     const Type *Base = nullptr;
-    if (isHomogeneousAggregate(RetTy, Base, getContext())) {
+    if (isARMHomogeneousAggregate(RetTy, Base, getContext(), false)) {
       assert(Base && "Base class should be set for homogeneous aggregate");
       // Homogeneous Aggregates are returned directly.
       return ABIArgInfo::getDirect(nullptr, 0, nullptr, !isAAPCS_VFP);
author	Oliver Stannard <oliver.stannard@arm.com>	2014-08-27 16:31:57 +0000
committer	Oliver Stannard <oliver.stannard@arm.com>	2014-08-27 16:31:57 +0000
commit	ed8ecc8429e53dc638763fb8f48c620b6dbae174 (patch)
tree	e141e1da5d0632038e1b6c04a30997cfd87489a6 /clang/lib/CodeGen
parent	6107a8f4db4fe74a3884c2b49b6ded009063335e (diff)
download	bcm5719-llvm-ed8ecc8429e53dc638763fb8f48c620b6dbae174.tar.gz bcm5719-llvm-ed8ecc8429e53dc638763fb8f48c620b6dbae174.zip