[CodeGen] Convert double -> __fp16 in one step.

Fix the CodeGen so that for types bigger than float, instead of converting to fp16 via the sequence "InTy -> float -> fp16", we perform conversions in just one step. This avoids the double rounding which potentially changes results from a natural IEEE-754 operation. rdar://17594379, rdar://17468714 Differential Revision: http://reviews.llvm.org/D4602 Part of: http://reviews.llvm.org/D8367 llvm-svn: 232968
author: Ahmed Bougacha <ahmed.bougacha@gmail.com> 2015-03-23 17:48:07 +0000
committer: Ahmed Bougacha <ahmed.bougacha@gmail.com> 2015-03-23 17:48:07 +0000
commit: 47ec2c7479de7527c955c93fce1323717c58782c (patch)
tree: 00053954d42ec8a0c2dfb1d58cca8de30f2207ba
parent: affe181b397be2bef404e0158b3a8410795fd716 (diff)
download: bcm5719-llvm-47ec2c7479de7527c955c93fce1323717c58782c.tar.gz
bcm5719-llvm-47ec2c7479de7527c955c93fce1323717c58782c.zip
2 files changed, 39 insertions, 14 deletions
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index e5d612ccc35..85a0de2b919 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -745,9 +745,20 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType,
   QualType OrigSrcType = SrcType;
   llvm::Type *SrcTy = Src->getType();
 
-  // If casting to/from storage-only half FP, use special intrinsics.
+  // Handle conversions to bool first, they are special: comparisons against 0.
+  if (DstType->isBooleanType())
+    return EmitConversionToBool(Src, SrcType);
+
+  llvm::Type *DstTy = ConvertType(DstType);
+
+  // Cast from storage-only half FP using the special intrinsic.
   if (SrcType->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType &&
       !CGF.getContext().getLangOpts().HalfArgsAndReturns) {
+    if (DstTy->isFloatingPointTy())
+      return Builder.CreateCall(
+          CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_from_fp16, DstTy), Src);
+
+    // If this isn't an FP->FP conversion, go through float.
     Src = Builder.CreateCall(
         CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_from_fp16,
                              CGF.CGM.FloatTy),
@@ -756,12 +767,6 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType,
     SrcTy = CGF.FloatTy;
   }
 
-  // Handle conversions to bool first, they are special: comparisons against 0.
-  if (DstType->isBooleanType())
-    return EmitConversionToBool(Src, SrcType);
-
-  llvm::Type *DstTy = ConvertType(DstType);
-
   // Ignore conversions like int -> uint.
   if (SrcTy == DstTy)
     return Src;
@@ -818,10 +823,14 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType,
     EmitFloatConversionCheck(OrigSrc, OrigSrcType, Src, SrcType, DstType,
                              DstTy);
 
-  // Cast to half via float
+  // Cast to half using the intrinsic if from FP type, through float otherwise.
   if (DstType->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType &&
-      !CGF.getContext().getLangOpts().HalfArgsAndReturns)
+      !CGF.getContext().getLangOpts().HalfArgsAndReturns) {
+    if (SrcTy->isFloatingPointTy())
+      return Builder.CreateCall(
+          CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_to_fp16, SrcTy), Src);
     DstTy = CGF.FloatTy;
+  }
 
   if (isa<llvm::IntegerType>(SrcTy)) {
     bool InputSigned = SrcType->isSignedIntegerOrEnumerationType();
diff --git a/clang/test/CodeGen/fp16-ops.c b/clang/test/CodeGen/fp16-ops.c
index 2d8d6c54e35..5dd0b320b28 100644
--- a/clang/test/CodeGen/fp16-ops.c
+++ b/clang/test/CodeGen/fp16-ops.c
@@ -5,6 +5,7 @@ typedef unsigned cond_t;
 volatile cond_t test;
 volatile __fp16 h0 = 0.0, h1 = 1.0, h2;
 volatile float f0, f1, f2;
+volatile double d0;
 
 void foo(void) {
   // CHECK-LABEL: define void @foo()
@@ -52,7 +53,7 @@ void foo(void) {
   // CHECK: call float @llvm.convert.from.fp16.f32(
   // CHECK: fmul float
   // CHECK: call i16 @llvm.convert.to.fp16.f32(
-  h1 = h0 * (__fp16) -2.0;
+  h1 = h0 * (__fp16) -2.0f;
   // CHECK: call float @llvm.convert.from.fp16.f32(
   // CHECK: fmul float
   // CHECK: call i16 @llvm.convert.to.fp16.f32(
@@ -71,7 +72,7 @@ void foo(void) {
   // CHECK: call float @llvm.convert.from.fp16.f32(
   // CHECK: fdiv float
   // CHECK: call i16 @llvm.convert.to.fp16.f32(
-  h1 = (h0 / (__fp16) -2.0);
+  h1 = (h0 / (__fp16) -2.0f);
   // CHECK: call float @llvm.convert.from.fp16.f32(
   // CHECK: fdiv float
   // CHECK: call i16 @llvm.convert.to.fp16.f32(
@@ -109,7 +110,7 @@ void foo(void) {
   // CHECK: call float @llvm.convert.from.fp16.f32(
   // CHECK: fsub float
   // CHECK: call i16 @llvm.convert.to.fp16.f32(
-  h1 = ((__fp16)-2.0 - h0);
+  h1 = ((__fp16)-2.0f - h0);
   // CHECK: call float @llvm.convert.from.fp16.f32(
   // CHECK: fsub float
   // CHECK: call i16 @llvm.convert.to.fp16.f32(
@@ -218,7 +219,7 @@ void foo(void) {
   // Check assignments (inc. compound)
   h0 = h1;
   // CHECK: call i16 @llvm.convert.to.fp16.f32(
-  h0 = (__fp16)-2.0;
+  h0 = (__fp16)-2.0f;
   // CHECK: call i16 @llvm.convert.to.fp16.f32(
   h0 = f0;
 
@@ -231,7 +232,7 @@ void foo(void) {
   // CHECK: call float @llvm.convert.from.fp16.f32(
   // CHECK: fadd
   // CHECK: call i16 @llvm.convert.to.fp16.f32(
-  h0 += (__fp16)1.0;
+  h0 += (__fp16)1.0f;
   // CHECK: call float @llvm.convert.from.fp16.f32(
   // CHECK: fadd
   // CHECK: call i16 @llvm.convert.to.fp16.f32(
@@ -281,4 +282,19 @@ void foo(void) {
   // CHECK: fdiv
   // CHECK: call i16 @llvm.convert.to.fp16.f32(
   h0 /= f2;
+
+  // Check conversions to/from double
+  // CHECK: call i16 @llvm.convert.to.fp16.f64(
+  h0 = d0;
+
+  // CHECK: [[MID:%.*]] = fptrunc double {{%.*}} to float
+  // CHECK: call i16 @llvm.convert.to.fp16.f32(float [[MID]])
+  h0 = (float)d0;
+
+  // CHECK: call double @llvm.convert.from.fp16.f64(
+  d0 = h0;
+
+  // CHECK: [[MID:%.*]] = call float @llvm.convert.from.fp16.f32(
+  // CHECK: fpext float [[MID]] to double
+  d0 = (float)h0;
 }
author	Ahmed Bougacha <ahmed.bougacha@gmail.com>	2015-03-23 17:48:07 +0000
committer	Ahmed Bougacha <ahmed.bougacha@gmail.com>	2015-03-23 17:48:07 +0000
commit	47ec2c7479de7527c955c93fce1323717c58782c (patch)
tree	00053954d42ec8a0c2dfb1d58cca8de30f2207ba
parent	affe181b397be2bef404e0158b3a8410795fd716 (diff)
download	bcm5719-llvm-47ec2c7479de7527c955c93fce1323717c58782c.tar.gz bcm5719-llvm-47ec2c7479de7527c955c93fce1323717c58782c.zip