2 files changed, 45 insertions, 11 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a348a489aff..dff602323bd 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -6542,12 +6542,12 @@ static Constant *getConstantVector(MVT VT, const APInt &SplatValue,
     APInt Val = SplatValue.extractBits(ScalarSize, ScalarSize * i);
     Constant *Const;
     if (VT.isFloatingPoint()) {
-      assert((ScalarSize == 32 || ScalarSize == 64) &&
-             "Unsupported floating point scalar size");
-      if (ScalarSize == 32)
-        Const = ConstantFP::get(Type::getFloatTy(C), Val.bitsToFloat());
-      else
-        Const = ConstantFP::get(Type::getDoubleTy(C), Val.bitsToDouble());
+      if (ScalarSize == 32) {
+        Const = ConstantFP::get(C, APFloat(APFloat::IEEEsingle(), Val));
+      } else {
+        assert(ScalarSize == 64 && "Unsupported floating point scalar size");
+        Const = ConstantFP::get(C, APFloat(APFloat::IEEEdouble(), Val));
+      }
     } else
       Const = Constant::getIntegerValue(Type::getIntNTy(C, ScalarSize), Val);
     ConstantVec.push_back(Const);
@@ -6633,11 +6633,13 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
           // AVX have support for 32 and 64 bit broadcast for floats only.
           // No 64bit integer in 32bit subtarget.
           MVT CVT = MVT::getFloatingPointVT(SplatBitSize);
-          Constant *C = SplatBitSize == 32
-                            ? ConstantFP::get(Type::getFloatTy(*Ctx),
-                                              SplatValue.bitsToFloat())
-                            : ConstantFP::get(Type::getDoubleTy(*Ctx),
-                                              SplatValue.bitsToDouble());
+          // Lower the splat via APFloat directly, to avoid any conversion.
+          Constant *C =
+              SplatBitSize == 32
+                  ? ConstantFP::get(*Ctx,
+                                    APFloat(APFloat::IEEEsingle(), SplatValue))
+                  : ConstantFP::get(*Ctx,
+                                    APFloat(APFloat::IEEEdouble(), SplatValue));
           SDValue CP = DAG.getConstantPool(C, PVT);
           unsigned Repeat = VT.getSizeInBits() / SplatBitSize;
 
diff --git a/llvm/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll b/llvm/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll
index b13965a30ed..bbe31c5c2ac 100644
--- a/llvm/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll
+++ b/llvm/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll
@@ -1203,3 +1203,35 @@ define <8 x double> @f8xf64_f256(<8 x double> %a) {
   ret <8 x double> %res2
 }
 
+
+
+; ALL:       .LCPI38
+; ALL-NEXT:  .long	4290379776              # 0xffba0000
+
+; AVX:       .LCPI38
+; AVX-NEXT:  .long	4290379776              # float NaN
+
+define <8 x i16> @f8xi16_i32_NaN(<8 x i16> %a) {
+; ALL32-LABEL: f8xi16_i32_NaN:
+; ALL32:       # BB#0:
+; ALL32-NEXT:    vpbroadcastd {{\.LCPI.*}}, %xmm1
+; ALL32-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
+; ALL32-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; ALL32-NEXT:    retl
+;
+; ALL64-LABEL: f8xi16_i32_NaN:
+; ALL64:       # BB#0:
+; ALL64-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm1
+; ALL64-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
+; ALL64-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; ALL64-NEXT:    retq
+;
+; AVX-LABEL: f8xi16_i32_NaN:
+; AVX:       # BB#0:
+; AVX-NEXT:    vbroadcastss {{\.LCPI.*}}, %xmm1
+; AVX-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm0
+  %res1 = add <8 x i16> <i16 0, i16 -70, i16 0, i16 -70, i16 0, i16 -70, i16 0, i16 -70>, %a
+  %res2 = and <8 x i16> <i16 0, i16 -70, i16 0, i16 -70, i16 0, i16 -70, i16 0, i16 -70>, %res1
+  ret <8 x i16> %res2
+}