diff options
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 24 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll | 32 |
2 files changed, 45 insertions, 11 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index a348a489aff..dff602323bd 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -6542,12 +6542,12 @@ static Constant *getConstantVector(MVT VT, const APInt &SplatValue, APInt Val = SplatValue.extractBits(ScalarSize, ScalarSize * i); Constant *Const; if (VT.isFloatingPoint()) { - assert((ScalarSize == 32 || ScalarSize == 64) && - "Unsupported floating point scalar size"); - if (ScalarSize == 32) - Const = ConstantFP::get(Type::getFloatTy(C), Val.bitsToFloat()); - else - Const = ConstantFP::get(Type::getDoubleTy(C), Val.bitsToDouble()); + if (ScalarSize == 32) { + Const = ConstantFP::get(C, APFloat(APFloat::IEEEsingle(), Val)); + } else { + assert(ScalarSize == 64 && "Unsupported floating point scalar size"); + Const = ConstantFP::get(C, APFloat(APFloat::IEEEdouble(), Val)); + } } else Const = Constant::getIntegerValue(Type::getIntNTy(C, ScalarSize), Val); ConstantVec.push_back(Const); @@ -6633,11 +6633,13 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp, // AVX have support for 32 and 64 bit broadcast for floats only. // No 64bit integer in 32bit subtarget. MVT CVT = MVT::getFloatingPointVT(SplatBitSize); - Constant *C = SplatBitSize == 32 - ? ConstantFP::get(Type::getFloatTy(*Ctx), - SplatValue.bitsToFloat()) - : ConstantFP::get(Type::getDoubleTy(*Ctx), - SplatValue.bitsToDouble()); + // Lower the splat via APFloat directly, to avoid any conversion. + Constant *C = + SplatBitSize == 32 + ? ConstantFP::get(*Ctx, + APFloat(APFloat::IEEEsingle(), SplatValue)) + : ConstantFP::get(*Ctx, + APFloat(APFloat::IEEEdouble(), SplatValue)); SDValue CP = DAG.getConstantPool(C, PVT); unsigned Repeat = VT.getSizeInBits() / SplatBitSize; diff --git a/llvm/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll b/llvm/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll index b13965a30ed..bbe31c5c2ac 100644 --- a/llvm/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll +++ b/llvm/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll @@ -1203,3 +1203,35 @@ define <8 x double> @f8xf64_f256(<8 x double> %a) { ret <8 x double> %res2 } + + +; ALL: .LCPI38 +; ALL-NEXT: .long 4290379776 # 0xffba0000 + +; AVX: .LCPI38 +; AVX-NEXT: .long 4290379776 # float NaN + +define <8 x i16> @f8xi16_i32_NaN(<8 x i16> %a) { +; ALL32-LABEL: f8xi16_i32_NaN: +; ALL32: # BB#0: +; ALL32-NEXT: vpbroadcastd {{\.LCPI.*}}, %xmm1 +; ALL32-NEXT: vpaddw %xmm1, %xmm0, %xmm0 +; ALL32-NEXT: vpand %xmm1, %xmm0, %xmm0 +; ALL32-NEXT: retl +; +; ALL64-LABEL: f8xi16_i32_NaN: +; ALL64: # BB#0: +; ALL64-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1 +; ALL64-NEXT: vpaddw %xmm1, %xmm0, %xmm0 +; ALL64-NEXT: vpand %xmm1, %xmm0, %xmm0 +; ALL64-NEXT: retq +; +; AVX-LABEL: f8xi16_i32_NaN: +; AVX: # BB#0: +; AVX-NEXT: vbroadcastss {{\.LCPI.*}}, %xmm1 +; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 + %res1 = add <8 x i16> <i16 0, i16 -70, i16 0, i16 -70, i16 0, i16 -70, i16 0, i16 -70>, %a + %res2 = and <8 x i16> <i16 0, i16 -70, i16 0, i16 -70, i16 0, i16 -70, i16 0, i16 -70>, %res1 + ret <8 x i16> %res2 +} |