diff options
author | Craig Topper <craig.topper@intel.com> | 2018-06-03 19:42:59 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2018-06-03 19:42:59 +0000 |
commit | 6fb26f93ef77771b44341928c63e879c0e6e6f1c (patch) | |
tree | 95754e2201d601f733f62f639ce4496fe5467233 /clang | |
parent | d6155ff0024262c8d9b37d71474c19f833f5c86a (diff) | |
download | bcm5719-llvm-6fb26f93ef77771b44341928c63e879c0e6e6f1c.tar.gz bcm5719-llvm-6fb26f93ef77771b44341928c63e879c0e6e6f1c.zip |
[X86] Replace __builtin_ia32_vbroadcastf128_pd256 and __builtin_ia32_vbroadcastf128_ps256 with an unaligned load intrinsics and a __builtin_shufflevector call.
llvm-svn: 333853
Diffstat (limited to 'clang')
-rw-r--r-- | clang/include/clang/Basic/BuiltinsX86.def | 2 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGBuiltin.cpp | 26 | ||||
-rw-r--r-- | clang/lib/Headers/avxintrin.h | 8 | ||||
-rw-r--r-- | clang/test/CodeGen/builtins-x86.c | 2 |
4 files changed, 6 insertions, 32 deletions
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index 1a73a164d89..7710b621de3 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -510,8 +510,6 @@ TARGET_BUILTIN(__builtin_ia32_movmskpd256, "iV4d", "nc", "avx") TARGET_BUILTIN(__builtin_ia32_movmskps256, "iV8f", "nc", "avx") TARGET_BUILTIN(__builtin_ia32_vzeroall, "v", "n", "avx") TARGET_BUILTIN(__builtin_ia32_vzeroupper, "v", "n", "avx") -TARGET_BUILTIN(__builtin_ia32_vbroadcastf128_pd256, "V4dV2dC*", "n", "avx") -TARGET_BUILTIN(__builtin_ia32_vbroadcastf128_ps256, "V8fV4fC*", "n", "avx") TARGET_BUILTIN(__builtin_ia32_lddqu256, "V32ccC*", "n", "avx") TARGET_BUILTIN(__builtin_ia32_maskloadpd, "V2dV2dC*V2LLi", "n", "avx") TARGET_BUILTIN(__builtin_ia32_maskloadps, "V4fV4fC*V4i", "n", "avx") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 3afd84a895f..dd0e3510753 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -8300,26 +8300,6 @@ static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc, CGF.Builder.getIntNTy(std::max(NumElts, 8U))); } -static Value *EmitX86SubVectorBroadcast(CodeGenFunction &CGF, - ArrayRef<Value *> Ops, - llvm::Type *DstTy, - unsigned SrcSizeInBits, - unsigned Align) { - // Load the subvector. - Value *SubVec = CGF.Builder.CreateAlignedLoad(Ops[0], Align); - - // Create broadcast mask. - unsigned NumDstElts = DstTy->getVectorNumElements(); - unsigned NumSrcElts = SrcSizeInBits / DstTy->getScalarSizeInBits(); - - SmallVector<uint32_t, 8> Mask; - for (unsigned i = 0; i != NumDstElts; i += NumSrcElts) - for (unsigned j = 0; j != NumSrcElts; ++j) - Mask.push_back(j); - - return CGF.Builder.CreateShuffleVector(SubVec, SubVec, Mask, "subvecbcst"); -} - static Value *EmitX86Select(CodeGenFunction &CGF, Value *Mask, Value *Op0, Value *Op1) { @@ -8959,12 +8939,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_movdqa64load512_mask: return EmitX86MaskedLoad(*this, Ops, 64); - case X86::BI__builtin_ia32_vbroadcastf128_pd256: - case X86::BI__builtin_ia32_vbroadcastf128_ps256: { - llvm::Type *DstTy = ConvertType(E->getType()); - return EmitX86SubVectorBroadcast(*this, Ops, DstTy, 128, 1); - } - case X86::BI__builtin_ia32_storehps: case X86::BI__builtin_ia32_storelps: { llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty); diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index d1a9028c400..908359fd1e5 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -3111,7 +3111,9 @@ _mm256_broadcast_ss(float const *__a) static __inline __m256d __DEFAULT_FN_ATTRS _mm256_broadcast_pd(__m128d const *__a) { - return (__m256d)__builtin_ia32_vbroadcastf128_pd256((__v2df const *)__a); + __m128d __b = _mm_loadu_pd((const double *)__a); + return (__m256d)__builtin_shufflevector((__v2df)__b, (__v2df)__b, + 0, 1, 0, 1); } /// Loads the data from a 128-bit vector of [4 x float] from the @@ -3129,7 +3131,9 @@ _mm256_broadcast_pd(__m128d const *__a) static __inline __m256 __DEFAULT_FN_ATTRS _mm256_broadcast_ps(__m128 const *__a) { - return (__m256)__builtin_ia32_vbroadcastf128_ps256((__v4sf const *)__a); + __m128 __b = _mm_loadu_ps((const float *)__a); + return (__m256)__builtin_shufflevector((__v4sf)__b, (__v4sf)__b, + 0, 1, 2, 3, 0, 1, 2, 3); } /* SIMD load ops */ diff --git a/clang/test/CodeGen/builtins-x86.c b/clang/test/CodeGen/builtins-x86.c index 2126ce8218a..fd99dd2be31 100644 --- a/clang/test/CodeGen/builtins-x86.c +++ b/clang/test/CodeGen/builtins-x86.c @@ -466,8 +466,6 @@ void f0() { tmp_i = __builtin_ia32_movmskps256(tmp_V8f); __builtin_ia32_vzeroall(); __builtin_ia32_vzeroupper(); - tmp_V4d = __builtin_ia32_vbroadcastf128_pd256(tmp_V2dCp); - tmp_V8f = __builtin_ia32_vbroadcastf128_ps256(tmp_V4fCp); tmp_V32c = __builtin_ia32_lddqu256(tmp_cCp); tmp_V2d = __builtin_ia32_maskloadpd(tmp_V2dCp, tmp_V2LLi); tmp_V4f = __builtin_ia32_maskloadps(tmp_V4fCp, tmp_V4i); |