From a011002195b8e21eabc54f73f72156c6ae19dd1c Mon Sep 17 00:00:00 2001 From: Nate Begeman Date: Tue, 8 Jun 2010 00:16:34 +0000 Subject: Extend __builtin_shufflevector to expose the full power of the llvm shufflevector instruction. This means it can now be used for vector truncation and concatenation. This will be used for the ARM NEON implementation. llvm-svn: 105589 --- clang/lib/CodeGen/CGExprScalar.cpp | 98 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 95 insertions(+), 3 deletions(-) (limited to 'clang/lib/CodeGen') diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index 2108414c5ae..08374353169 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -578,12 +578,104 @@ Value *ScalarExprEmitter::VisitExpr(Expr *E) { } Value *ScalarExprEmitter::VisitShuffleVectorExpr(ShuffleVectorExpr *E) { - llvm::SmallVector indices; - for (unsigned i = 2; i < E->getNumSubExprs(); i++) { - indices.push_back(cast(CGF.EmitScalarExpr(E->getExpr(i)))); + // Vector Mask Case + if (E->getNumSubExprs() == 2 || + E->getNumSubExprs() == 3 && E->getExpr(2)->getType()->isVectorType()) { + Value* LHS = CGF.EmitScalarExpr(E->getExpr(0)); + Value* RHS = CGF.EmitScalarExpr(E->getExpr(1)); + Value* Mask; + + const llvm::Type *I32Ty = llvm::Type::getInt32Ty(CGF.getLLVMContext()); + const llvm::VectorType *LTy = cast(LHS->getType()); + unsigned LHSElts = LTy->getNumElements(); + + if (E->getNumSubExprs() == 3) { + Mask = CGF.EmitScalarExpr(E->getExpr(2)); + + // Shuffle LHS & RHS into one input vector. + llvm::SmallVector concat; + for (unsigned i = 0; i != LHSElts; ++i) { + concat.push_back(llvm::ConstantInt::get(I32Ty, 2*i)); + concat.push_back(llvm::ConstantInt::get(I32Ty, 2*i+1)); + } + + Value* CV = llvm::ConstantVector::get(concat.begin(), concat.size()); + LHS = Builder.CreateShuffleVector(LHS, RHS, CV, "concat"); + LHSElts *= 2; + } else { + Mask = RHS; + } + + const llvm::VectorType *MTy = cast(Mask->getType()); + llvm::Constant* EltMask; + + // Treat vec3 like vec4. + if ((LHSElts == 6) && (E->getNumSubExprs() == 3)) + EltMask = llvm::ConstantInt::get(MTy->getElementType(), + (1 << llvm::Log2_32(LHSElts+2))-1); + else if ((LHSElts == 3) && (E->getNumSubExprs() == 2)) + EltMask = llvm::ConstantInt::get(MTy->getElementType(), + (1 << llvm::Log2_32(LHSElts+1))-1); + else + EltMask = llvm::ConstantInt::get(MTy->getElementType(), + (1 << llvm::Log2_32(LHSElts))-1); + + // Mask off the high bits of each shuffle index. + llvm::SmallVector MaskV; + for (unsigned i = 0, e = MTy->getNumElements(); i != e; ++i) + MaskV.push_back(EltMask); + + Value* MaskBits = llvm::ConstantVector::get(MaskV.begin(), MaskV.size()); + Mask = Builder.CreateAnd(Mask, MaskBits, "mask"); + + // newv = undef + // mask = mask & maskbits + // for each elt + // n = extract mask i + // x = extract val n + // newv = insert newv, x, i + const llvm::VectorType *RTy = llvm::VectorType::get(LTy->getElementType(), + MTy->getNumElements()); + Value* NewV = llvm::UndefValue::get(RTy); + for (unsigned i = 0, e = MTy->getNumElements(); i != e; ++i) { + Value *Indx = llvm::ConstantInt::get(I32Ty, i); + Indx = Builder.CreateExtractElement(Mask, Indx, "shuf_idx"); + Indx = Builder.CreateZExt(Indx, I32Ty, "idx_zext"); + + // Handle vec3 special since the index will be off by one for the RHS. + if ((LHSElts == 6) && (E->getNumSubExprs() == 3)) { + Value *cmpIndx, *newIndx; + cmpIndx = Builder.CreateICmpUGT(Indx, llvm::ConstantInt::get(I32Ty, 3), + "cmp_shuf_idx"); + newIndx = Builder.CreateSub(Indx, llvm::ConstantInt::get(I32Ty, 1), + "shuf_idx_adj"); + Indx = Builder.CreateSelect(cmpIndx, newIndx, Indx, "sel_shuf_idx"); + } + Value *VExt = Builder.CreateExtractElement(LHS, Indx, "shuf_elt"); + NewV = Builder.CreateInsertElement(NewV, VExt, Indx, "shuf_ins"); + } + return NewV; } + Value* V1 = CGF.EmitScalarExpr(E->getExpr(0)); Value* V2 = CGF.EmitScalarExpr(E->getExpr(1)); + + // Handle vec3 special since the index will be off by one for the RHS. + llvm::SmallVector indices; + for (unsigned i = 2; i < E->getNumSubExprs(); i++) { + llvm::Constant *C = cast(CGF.EmitScalarExpr(E->getExpr(i))); + const llvm::VectorType *VTy = cast(V1->getType()); + if (VTy->getNumElements() == 3) { + if (llvm::ConstantInt *CI = dyn_cast(C)) { + uint64_t cVal = CI->getZExtValue(); + if (cVal > 3) { + C = llvm::ConstantInt::get(C->getType(), cVal-1); + } + } + } + indices.push_back(C); + } + Value* SV = llvm::ConstantVector::get(indices.begin(), indices.size()); return Builder.CreateShuffleVector(V1, V2, SV, "shuffle"); } -- cgit v1.2.3