diff options
author | Chris Lattner <sabre@nondot.org> | 2007-08-03 16:09:33 +0000 |
---|---|---|
committer | Chris Lattner <sabre@nondot.org> | 2007-08-03 16:09:33 +0000 |
commit | fb837dccacbf85180122453a9dab340787880773 (patch) | |
tree | 016ea370fd89f632f974f82ac2727979f451eb71 | |
parent | 177bd450e0a15247d8d7267c973cc9e9a8a6b139 (diff) | |
download | bcm5719-llvm-fb837dccacbf85180122453a9dab340787880773.tar.gz bcm5719-llvm-fb837dccacbf85180122453a9dab340787880773.zip |
In the common case where we are shuffling a vector, emit an
llvm vector shuffle instead of a bunch of insert/extract operations.
For: vec4 = vec4.yyyy; // splat
Emit:
%tmp1 = shufflevector <4 x float> %tmp, <4 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
instead of:
%tmp1 = extractelement <4 x float> %tmp, i32 1
%tmp2 = insertelement <4 x float> undef, float %tmp1, i32 0
%tmp3 = extractelement <4 x float> %tmp, i32 1
%tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1
%tmp5 = extractelement <4 x float> %tmp, i32 1
%tmp6 = insertelement <4 x float> %tmp4, float %tmp5, i32 2
%tmp7 = extractelement <4 x float> %tmp, i32 1
%tmp8 = insertelement <4 x float> %tmp6, float %tmp7, i32 3
llvm-svn: 40779
-rw-r--r-- | clang/CodeGen/CGExpr.cpp | 21 |
1 files changed, 19 insertions, 2 deletions
diff --git a/clang/CodeGen/CGExpr.cpp b/clang/CodeGen/CGExpr.cpp index 178107e2eea..dad4b86a46f 100644 --- a/clang/CodeGen/CGExpr.cpp +++ b/clang/CodeGen/CGExpr.cpp @@ -298,14 +298,31 @@ RValue CodeGenFunction::EmitLoadOfLValue(LValue LV, QualType ExprType) { return RValue::get(Builder.CreateExtractElement(Vec, Elt, "tmp")); } + // If the source and destination have the same number of elements, use a + // vector shuffle instead of insert/extracts. + unsigned NumResultElts = cast<VectorType>(ExprType)->getNumElements(); + unsigned NumSourceElts = + cast<llvm::VectorType>(Vec->getType())->getNumElements(); - unsigned NumElts = cast<VectorType>(ExprType)->getNumElements(); + if (NumResultElts == NumSourceElts) { + llvm::SmallVector<llvm::Constant*, 4> Mask; + for (unsigned i = 0; i != NumResultElts; ++i) { + unsigned InIdx = OCUVectorComponent::getAccessedFieldNo(i, EncFields); + Mask.push_back(llvm::ConstantInt::get(llvm::Type::Int32Ty, InIdx)); + } + + llvm::Value *MaskV = llvm::ConstantVector::get(&Mask[0], Mask.size()); + Vec = Builder.CreateShuffleVector(Vec, + llvm::UndefValue::get(Vec->getType()), + MaskV, "tmp"); + return RValue::get(Vec); + } // Start out with an undef of the result type. llvm::Value *Result = llvm::UndefValue::get(ConvertType(ExprType)); // Extract/Insert each element of the result. - for (unsigned i = 0; i != NumElts; ++i) { + for (unsigned i = 0; i != NumResultElts; ++i) { unsigned InIdx = OCUVectorComponent::getAccessedFieldNo(i, EncFields); llvm::Value *Elt = llvm::ConstantInt::get(llvm::Type::Int32Ty, InIdx); Elt = Builder.CreateExtractElement(Vec, Elt, "tmp"); |