diff options
author | Tim Northover <tnorthover@apple.com> | 2014-01-30 14:47:57 +0000 |
---|---|---|
committer | Tim Northover <tnorthover@apple.com> | 2014-01-30 14:47:57 +0000 |
commit | ac85c341ae905eff075693cf9bb4138df91604ef (patch) | |
tree | e903ec8e1997ad9907ec25d717fd451f5e808c00 /clang/lib/CodeGen | |
parent | c322f838bc34e9b67c430004f5d5ebe8c91642e7 (diff) | |
download | bcm5719-llvm-ac85c341ae905eff075693cf9bb4138df91604ef.tar.gz bcm5719-llvm-ac85c341ae905eff075693cf9bb4138df91604ef.zip |
ARM & AArch64: fully share NEON implementation of permutation intrinsics
As a starting point, this moves the CodeGen for NEON permutation
instructions (vtrn, vzip, vuzp) into a new shared function.
llvm-svn: 200471
Diffstat (limited to 'clang/lib/CodeGen')
-rw-r--r-- | clang/lib/CodeGen/CGBuiltin.cpp | 151 | ||||
-rw-r--r-- | clang/lib/CodeGen/CodeGenFunction.h | 3 |
2 files changed, 83 insertions, 71 deletions
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index bd0301d741e..b03daa85d58 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -1754,6 +1754,76 @@ CodeGenFunction::EmitPointerWithAlignment(const Expr *Addr) { return std::make_pair(EmitScalarExpr(Addr), Align); } +Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(unsigned BuiltinID, + const CallExpr *E, + SmallVectorImpl<Value *> &Ops, + llvm::VectorType *VTy) { + switch (BuiltinID) { + default: break; + case NEON::BI__builtin_neon_vtrn_v: + case NEON::BI__builtin_neon_vtrnq_v: { + Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy)); + Ops[1] = Builder.CreateBitCast(Ops[1], VTy); + Ops[2] = Builder.CreateBitCast(Ops[2], VTy); + Value *SV = 0; + + for (unsigned vi = 0; vi != 2; ++vi) { + SmallVector<Constant*, 16> Indices; + for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { + Indices.push_back(Builder.getInt32(i+vi)); + Indices.push_back(Builder.getInt32(i+e+vi)); + } + Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); + SV = llvm::ConstantVector::get(Indices); + SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn"); + SV = Builder.CreateStore(SV, Addr); + } + return SV; + } + case NEON::BI__builtin_neon_vuzp_v: + case NEON::BI__builtin_neon_vuzpq_v: { + Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy)); + Ops[1] = Builder.CreateBitCast(Ops[1], VTy); + Ops[2] = Builder.CreateBitCast(Ops[2], VTy); + Value *SV = 0; + + for (unsigned vi = 0; vi != 2; ++vi) { + SmallVector<Constant*, 16> Indices; + for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) + Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi)); + + Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); + SV = llvm::ConstantVector::get(Indices); + SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp"); + SV = Builder.CreateStore(SV, Addr); + } + return SV; + } + case NEON::BI__builtin_neon_vzip_v: + case NEON::BI__builtin_neon_vzipq_v: { + Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy)); + Ops[1] = Builder.CreateBitCast(Ops[1], VTy); + Ops[2] = Builder.CreateBitCast(Ops[2], VTy); + Value *SV = 0; + + for (unsigned vi = 0; vi != 2; ++vi) { + SmallVector<Constant*, 16> Indices; + for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { + Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1)); + Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e)); + } + Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); + SV = llvm::ConstantVector::get(Indices); + SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip"); + SV = Builder.CreateStore(SV, Addr); + } + return SV; + } + } + + return 0; +} + static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E) { @@ -2981,6 +3051,11 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, if (!Ty) return 0; + // Many NEON builtins have identical semantics and uses in ARM and + // AArch64. Emit these in a single function. + if (Value *Result = EmitCommonNeonBuiltinExpr(BuiltinID, E, Ops, VTy)) + return Result; + unsigned Int; switch (BuiltinID) { default: @@ -2989,18 +3064,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, // AArch64 builtins mapping to legacy ARM v7 builtins. // FIXME: the mapped builtins listed correspond to what has been tested // in aarch64-neon-intrinsics.c so far. - case NEON::BI__builtin_neon_vuzp_v: - return EmitARMBuiltinExpr(NEON::BI__builtin_neon_vuzp_v, E); - case NEON::BI__builtin_neon_vuzpq_v: - return EmitARMBuiltinExpr(NEON::BI__builtin_neon_vuzpq_v, E); - case NEON::BI__builtin_neon_vzip_v: - return EmitARMBuiltinExpr(NEON::BI__builtin_neon_vzip_v, E); - case NEON::BI__builtin_neon_vzipq_v: - return EmitARMBuiltinExpr(NEON::BI__builtin_neon_vzipq_v, E); - case NEON::BI__builtin_neon_vtrn_v: - return EmitARMBuiltinExpr(NEON::BI__builtin_neon_vtrn_v, E); - case NEON::BI__builtin_neon_vtrnq_v: - return EmitARMBuiltinExpr(NEON::BI__builtin_neon_vtrnq_v, E); case NEON::BI__builtin_neon_vext_v: return EmitARMBuiltinExpr(NEON::BI__builtin_neon_vext_v, E); case NEON::BI__builtin_neon_vextq_v: @@ -4213,6 +4276,11 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, if (!Ty) return 0; + // Many NEON builtins have identical semantics and uses in ARM and + // AArch64. Emit these in a single function. + if (Value *Result = EmitCommonNeonBuiltinExpr(BuiltinID, E, Ops, VTy)) + return Result; + unsigned Int; switch (BuiltinID) { default: return 0; @@ -4869,65 +4937,6 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, ConstantAggregateZero::get(Ty)); return Builder.CreateSExt(Ops[0], Ty, "vtst"); } - case NEON::BI__builtin_neon_vtrn_v: - case NEON::BI__builtin_neon_vtrnq_v: { - Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); - Ops[1] = Builder.CreateBitCast(Ops[1], Ty); - Ops[2] = Builder.CreateBitCast(Ops[2], Ty); - Value *SV = 0; - - for (unsigned vi = 0; vi != 2; ++vi) { - SmallVector<Constant*, 16> Indices; - for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { - Indices.push_back(Builder.getInt32(i+vi)); - Indices.push_back(Builder.getInt32(i+e+vi)); - } - Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); - SV = llvm::ConstantVector::get(Indices); - SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn"); - SV = Builder.CreateStore(SV, Addr); - } - return SV; - } - case NEON::BI__builtin_neon_vuzp_v: - case NEON::BI__builtin_neon_vuzpq_v: { - Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); - Ops[1] = Builder.CreateBitCast(Ops[1], Ty); - Ops[2] = Builder.CreateBitCast(Ops[2], Ty); - Value *SV = 0; - - for (unsigned vi = 0; vi != 2; ++vi) { - SmallVector<Constant*, 16> Indices; - for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) - Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi)); - - Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); - SV = llvm::ConstantVector::get(Indices); - SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp"); - SV = Builder.CreateStore(SV, Addr); - } - return SV; - } - case NEON::BI__builtin_neon_vzip_v: - case NEON::BI__builtin_neon_vzipq_v: { - Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); - Ops[1] = Builder.CreateBitCast(Ops[1], Ty); - Ops[2] = Builder.CreateBitCast(Ops[2], Ty); - Value *SV = 0; - - for (unsigned vi = 0; vi != 2; ++vi) { - SmallVector<Constant*, 16> Indices; - for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { - Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1)); - Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e)); - } - Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi); - SV = llvm::ConstantVector::get(Indices); - SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip"); - SV = Builder.CreateStore(SV, Addr); - } - return SV; - } } } diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index ff3746ad184..ac583e4d1c1 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -2177,6 +2177,9 @@ public: llvm::Value *EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty); llvm::Value *EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E); + llvm::Value *EmitCommonNeonBuiltinExpr(unsigned BuiltinID, const CallExpr *E, + SmallVectorImpl<llvm::Value *> &Ops, + llvm::VectorType *VTy); llvm::Value *EmitNeonCall(llvm::Function *F, SmallVectorImpl<llvm::Value*> &O, const char *name, |