diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-07-22 13:58:56 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-07-22 13:58:56 +0000 |
commit | 2d8517303cb727011b27ebf099de7165662df159 (patch) | |
tree | e2c208abb5be7d390dbb38ec1a56c1373d85840f | |
parent | ea0d4f9962fbc1741a730ec74b655940ea15424b (diff) | |
download | bcm5719-llvm-2d8517303cb727011b27ebf099de7165662df159.tar.gz bcm5719-llvm-2d8517303cb727011b27ebf099de7165662df159.zip |
[X86][AVX] Added support for lowering to VBROADCASTF128/VBROADCASTI128 with generic IR
As discussed on D22460, I've updated the vbroadcastf128 pd256/ps256 builtins to map directly to generic IR - load+splat a 128-bit vector to both lanes of a 256-bit vector.
Fix for PR28657.
llvm-svn: 276417
-rw-r--r-- | clang/lib/CodeGen/CGBuiltin.cpp | 27 | ||||
-rw-r--r-- | clang/test/CodeGen/avx-builtins.c | 6 |
2 files changed, 31 insertions, 2 deletions
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index c74e53ea84e..fb3240b2522 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -6619,6 +6619,26 @@ static Value *EmitX86MaskedLoad(CodeGenFunction &CGF, return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]); } +static Value *EmitX86SubVectorBroadcast(CodeGenFunction &CGF, + SmallVectorImpl<Value *> &Ops, + llvm::Type *DstTy, + unsigned SrcSizeInBits, + unsigned Align) { + // Load the subvector. + Ops[0] = CGF.Builder.CreateAlignedLoad(Ops[0], Align); + + // Create broadcast mask. + unsigned NumDstElts = DstTy->getVectorNumElements(); + unsigned NumSrcElts = SrcSizeInBits / DstTy->getScalarSizeInBits(); + + SmallVector<uint32_t, 8> Mask; + for (unsigned i = 0; i != NumDstElts; i += NumSrcElts) + for (unsigned j = 0; j != NumSrcElts; ++j) + Mask.push_back(j); + + return CGF.Builder.CreateShuffleVector(Ops[0], Ops[0], Mask, "subvecbcst"); +} + static Value *EmitX86Select(CodeGenFunction &CGF, Value *Mask, Value *Op0, Value *Op1) { @@ -6995,6 +7015,13 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity(); return EmitX86MaskedLoad(*this, Ops, Align); } + + case X86::BI__builtin_ia32_vbroadcastf128_pd256: + case X86::BI__builtin_ia32_vbroadcastf128_ps256: { + llvm::Type *DstTy = ConvertType(E->getType()); + return EmitX86SubVectorBroadcast(*this, Ops, DstTy, 128, 16); + } + case X86::BI__builtin_ia32_storehps: case X86::BI__builtin_ia32_storelps: { llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty); diff --git a/clang/test/CodeGen/avx-builtins.c b/clang/test/CodeGen/avx-builtins.c index 650e4d280ec..4abeacd17ca 100644 --- a/clang/test/CodeGen/avx-builtins.c +++ b/clang/test/CodeGen/avx-builtins.c @@ -84,13 +84,15 @@ __m256 test_mm256_blendv_ps(__m256 V1, __m256 V2, __m256 V3) { __m256d test_mm256_broadcast_pd(__m128d* A) { // CHECK-LABEL: test_mm256_broadcast_pd - // CHECK: call <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8* %{{.*}}) + // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 16 + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 0, i32 1> return _mm256_broadcast_pd(A); } __m256 test_mm256_broadcast_ps(__m128* A) { // CHECK-LABEL: test_mm256_broadcast_ps - // CHECK: call <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8* %{{.*}}) + // CHECK: load <4 x float>, <4 x float>* %{{.*}}, align 16 + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> return _mm256_broadcast_ps(A); } |