diff options
Diffstat (limited to 'clang')
| -rw-r--r-- | clang/include/clang/Basic/BuiltinsARM.def | 46 | ||||
| -rw-r--r-- | clang/lib/CodeGen/CGBuiltin.cpp | 28 | ||||
| -rw-r--r-- | clang/lib/CodeGen/CodeGenFunction.h | 3 | ||||
| -rw-r--r-- | clang/lib/Headers/arm_neon.td | 75 | 
4 files changed, 63 insertions, 89 deletions
diff --git a/clang/include/clang/Basic/BuiltinsARM.def b/clang/include/clang/Basic/BuiltinsARM.def index 26c5bec6351..247cf7f5abf 100644 --- a/clang/include/clang/Basic/BuiltinsARM.def +++ b/clang/include/clang/Basic/BuiltinsARM.def @@ -16,8 +16,6 @@  // In libgcc  BUILTIN(__clear_cache, "vc*c*", "") - -// FIXME: This is just a placeholder. NEON intrinsics should be listed here.  BUILTIN(__builtin_thread_pointer, "v*", "")  // NEON @@ -105,46 +103,16 @@ BUILTIN(__builtin_neon_vmin_v, "V8cV8cV8ci", "n")  BUILTIN(__builtin_neon_vminq_v, "V16cV16cV16ci", "n")  BUILTIN(__builtin_neon_vmlal_v, "V16cV16cV8cV8ci", "n")  BUILTIN(__builtin_neon_vmlal_lane_v, "V16cV16cV8cV8cii", "n") -BUILTIN(__builtin_neon_vmlal_n_s16, "V4iV4iV4ss", "n") -BUILTIN(__builtin_neon_vmlal_n_s32, "V2LLiV2LLiV2ii", "n") -BUILTIN(__builtin_neon_vmlal_n_u16, "V4iV4iV4sUs", "n") -BUILTIN(__builtin_neon_vmlal_n_u32, "V2LLiV2LLiV2iUi", "n")  BUILTIN(__builtin_neon_vmla_lane_v, "V8cV8cV8cV8cii", "n")  BUILTIN(__builtin_neon_vmlaq_lane_v, "V16cV16cV16cV16cii", "n") -BUILTIN(__builtin_neon_vmla_n_i16, "V4sV4sV4sUs", "n") -BUILTIN(__builtin_neon_vmla_n_i32, "V2iV2iV2iUi", "n") -BUILTIN(__builtin_neon_vmla_n_f32, "V2fV2fV2ff", "n") -BUILTIN(__builtin_neon_vmlaq_n_i16, "V8sV8sV8sUs", "n") -BUILTIN(__builtin_neon_vmlaq_n_i32, "V4iV4iV4iUi", "n") -BUILTIN(__builtin_neon_vmlaq_n_f32, "V4fV4fV4ff", "n")  BUILTIN(__builtin_neon_vmlsl_v, "V16cV16cV8cV8ci", "n")  BUILTIN(__builtin_neon_vmlsl_lane_v, "V16cV16cV8cV8cii", "n") -BUILTIN(__builtin_neon_vmlsl_n_s16, "V4iV4iV4ss", "n") -BUILTIN(__builtin_neon_vmlsl_n_s32, "V2LLiV2LLiV2ii", "n") -BUILTIN(__builtin_neon_vmlsl_n_u16, "V4iV4iV4sUs", "n") -BUILTIN(__builtin_neon_vmlsl_n_u32, "V2LLiV2LLiV2iUi", "n")  BUILTIN(__builtin_neon_vmls_lane_v, "V8cV8cV8cV8cii", "n")  BUILTIN(__builtin_neon_vmlsq_lane_v, "V16cV16cV16cV16cii", "n") -BUILTIN(__builtin_neon_vmls_n_i16, "V4sV4sV4sUs", "n") -BUILTIN(__builtin_neon_vmls_n_i32, "V2iV2iV2iUi", "n") -BUILTIN(__builtin_neon_vmls_n_f32, "V2fV2fV2ff", "n") -BUILTIN(__builtin_neon_vmlsq_n_i16, "V8sV8sV8sUs", "n") -BUILTIN(__builtin_neon_vmlsq_n_i32, "V4iV4iV4iUi", "n") -BUILTIN(__builtin_neon_vmlsq_n_f32, "V4fV4fV4ff", "n")  BUILTIN(__builtin_neon_vmovl_v, "V16cV8ci", "n")  BUILTIN(__builtin_neon_vmovn_v, "V8cV16ci", "n")  BUILTIN(__builtin_neon_vmull_v, "V16cV8cV8ci", "n")  BUILTIN(__builtin_neon_vmull_lane_v, "V16cV8cV8cii", "n") -BUILTIN(__builtin_neon_vmull_n_s16, "V4iV4ss", "n") -BUILTIN(__builtin_neon_vmull_n_s32, "V2LLiV2ii", "n") -BUILTIN(__builtin_neon_vmull_n_u16, "V4iV4sUs", "n") -BUILTIN(__builtin_neon_vmull_n_u32, "V2LLiV2iUi", "n") -BUILTIN(__builtin_neon_vmul_n_i16, "V4sV4sUs", "n") -BUILTIN(__builtin_neon_vmul_n_i32, "V2iV2iUi", "n") -BUILTIN(__builtin_neon_vmul_n_f32, "V2fV2ff", "n") -BUILTIN(__builtin_neon_vmulq_n_i16, "V8sV8sUs", "n") -BUILTIN(__builtin_neon_vmulq_n_i32, "V4iV4iUi", "n") -BUILTIN(__builtin_neon_vmulq_n_f32, "V4fV4ff", "n")  BUILTIN(__builtin_neon_vpadal_v, "V8cV8cV8ci", "n")  BUILTIN(__builtin_neon_vpadalq_v, "V16cV16cV16ci", "n")  BUILTIN(__builtin_neon_vpadd_v, "V8cV8cV8ci", "n") @@ -158,24 +126,14 @@ BUILTIN(__builtin_neon_vqadd_v, "V8cV8cV8ci", "n")  BUILTIN(__builtin_neon_vqaddq_v, "V16cV16cV16ci", "n")  BUILTIN(__builtin_neon_vqdmlal_v, "V16cV16cV8cV8ci", "n")  BUILTIN(__builtin_neon_vqdmlal_lane_v, "V16cV16cV8cV8cii", "n") -BUILTIN(__builtin_neon_vqdmlal_n_s16, "V4iV4iV4ss", "n") -BUILTIN(__builtin_neon_vqdmlal_n_s32, "V2LLiV2LLiV2ii", "n")  BUILTIN(__builtin_neon_vqdmlsl_v, "V16cV16cV8cV8ci", "n")  BUILTIN(__builtin_neon_vqdmlsl_lane_v, "V16cV16cV8cV8cii", "n") -BUILTIN(__builtin_neon_vqdmlsl_n_s16, "V4iV4iV4ss", "n") -BUILTIN(__builtin_neon_vqdmlsl_n_s32, "V2LLiV2LLiV2ii", "n")  BUILTIN(__builtin_neon_vqdmulh_v, "V8cV8cV8ci", "n")  BUILTIN(__builtin_neon_vqdmulhq_v, "V16cV16cV16ci", "n")  BUILTIN(__builtin_neon_vqdmulh_lane_v, "V8cV8cV8cii", "n")  BUILTIN(__builtin_neon_vqdmulhq_lane_v, "V16cV16cV16cii", "n") -BUILTIN(__builtin_neon_vqdmulh_n_s16, "V4sV4ss", "n") -BUILTIN(__builtin_neon_vqdmulh_n_s32, "V2iV2ii", "n") -BUILTIN(__builtin_neon_vqdmulhq_n_s16, "V8sV8ss", "n") -BUILTIN(__builtin_neon_vqdmulhq_n_s32, "V4iV4ii", "n")  BUILTIN(__builtin_neon_vqdmull_v, "V16cV8cV8ci", "n")  BUILTIN(__builtin_neon_vqdmull_lane_v, "V16cV8cV8cii", "n") -BUILTIN(__builtin_neon_vqdmull_n_s16, "V4iV4ss", "n") -BUILTIN(__builtin_neon_vqdmull_n_s32, "V2LLiV2ii", "n")  BUILTIN(__builtin_neon_vqmovn_v, "V8cV16ci", "n")  BUILTIN(__builtin_neon_vqmovun_v, "V8cV16ci", "n")  BUILTIN(__builtin_neon_vqneg_v, "V8cV8ci", "n") @@ -184,10 +142,6 @@ BUILTIN(__builtin_neon_vqrdmulh_v, "V8cV8cV8ci", "n")  BUILTIN(__builtin_neon_vqrdmulhq_v, "V16cV16cV16ci", "n")  BUILTIN(__builtin_neon_vqrdmulh_lane_v, "V8cV8cV8cii", "n")  BUILTIN(__builtin_neon_vqrdmulhq_lane_v, "V16cV16cV16cii", "n") -BUILTIN(__builtin_neon_vqrdmulh_n_s16, "V4sV4ss", "n") -BUILTIN(__builtin_neon_vqrdmulh_n_s32, "V2iV2ii", "n") -BUILTIN(__builtin_neon_vqrdmulhq_n_s16, "V8sV8ss", "n") -BUILTIN(__builtin_neon_vqrdmulhq_n_s32, "V4iV4ii", "n")  BUILTIN(__builtin_neon_vqrshl_v, "V8cV8cV8ci", "n")  BUILTIN(__builtin_neon_vqrshlq_v, "V16cV16cV16ci", "n")  BUILTIN(__builtin_neon_vqrshrn_n_v, "V8cV16cii", "n") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 3e8fec5b355..dbf53520281 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -894,13 +894,24 @@ const llvm::Type *GetNeonType(LLVMContext &Ctx, unsigned type, bool q) {    return 0;  } +Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) { +  unsigned nElts = cast<llvm::VectorType>(V->getType())->getNumElements(); +  SmallVector<Constant*, 16> Indices(nElts, C); +  Value* SV = llvm::ConstantVector::get(Indices.begin(), Indices.size()); +  return Builder.CreateShuffleVector(V, V, SV, "lane"); +} +  Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops, -                                     const char *name) { +                                     const char *name, bool splat) {    unsigned j = 0;    for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();         ai != ae; ++ai, ++j)      Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name); +  if (splat) { +    Ops[j-1] = EmitNeonSplat(Ops[j-1], cast<Constant>(Ops[j])); +    Ops.resize(j); +  }    return Builder.CreateCall(F, Ops.begin(), Ops.end(), name);  } @@ -917,9 +928,10 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,                                 a, b);    } -  llvm::SmallVector<Value*, 4> Ops;    // Determine the type of this overloaded NEON intrinsic.    assert(BuiltinID > ARM::BI__builtin_thread_pointer); + +  llvm::SmallVector<Value*, 4> Ops;    for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++)      Ops.push_back(EmitScalarExpr(E->getArg(i))); @@ -931,11 +943,16 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,    unsigned type = Result.getZExtValue();    bool usgn = type & 0x08;    bool quad = type & 0x10; +  bool splat = false;    const llvm::Type *Ty = GetNeonType(VMContext, type & 0x7, quad);    if (!Ty)      return 0; +  // FIXME: multiplies by scalar do not currently match their patterns because +  //   they are implemented via mul(splat(scalar_to_vector)) rather than +  //   mul(dup(scalar)) +    unsigned Int;    switch (BuiltinID) {    default: return 0; @@ -1087,12 +1104,11 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,    case ARM::BI__builtin_neon_vminq_v:      Int = usgn ? Intrinsic::arm_neon_vminu : Intrinsic::arm_neon_vmins;      return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vmin"); -  // FIXME: vmlal_lane -> splat, drop imm +  case ARM::BI__builtin_neon_vmlal_lane_v: +    splat = true;    case ARM::BI__builtin_neon_vmlal_v:      Int = usgn ? Intrinsic::arm_neon_vmlalu : Intrinsic::arm_neon_vmlals; -    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vmlal"); -  // FIXME: vmlal_n, vmla_n, vmlsl_n, vmls_n, vmull_n, vmul_n, -  //        vqdmlal_n, vqdmlsl_n, vqdmulh_n, vqdmull_n, vqrdmulh_n -> splat,-_n +    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vmlal", splat);    case ARM::BI__builtin_neon_vmovl_v:      Int = usgn ? Intrinsic::arm_neon_vmovlu : Intrinsic::arm_neon_vmovls;      return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vmovl"); diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 0682a0474b5..50e334061ef 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -1147,7 +1147,8 @@ public:    llvm::Value *EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E);    llvm::Value *EmitNeonCall(llvm::Function *F,                               llvm::SmallVectorImpl<llvm::Value*> &O, -                            const char *name); +                            const char *name, bool splat = false); +  llvm::Value *EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx);    llvm::Value *EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E);    llvm::Value *EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E); diff --git a/clang/lib/Headers/arm_neon.td b/clang/lib/Headers/arm_neon.td index 4dbecb66b9e..fb298a67c69 100644 --- a/clang/lib/Headers/arm_neon.td +++ b/clang/lib/Headers/arm_neon.td @@ -14,29 +14,32 @@  class Op; -def OP_NONE : Op; -def OP_ADD  : Op; -def OP_SUB  : Op; -def OP_MUL  : Op; -def OP_MLA  : Op; -def OP_MLS  : Op; -def OP_EQ   : Op; -def OP_GE   : Op; -def OP_LE   : Op; -def OP_GT   : Op; -def OP_LT   : Op; -def OP_NEG  : Op; -def OP_NOT  : Op; -def OP_AND  : Op; -def OP_OR   : Op; -def OP_XOR  : Op; -def OP_ANDN : Op; -def OP_ORN  : Op; -def OP_CAST : Op; -def OP_HI   : Op; -def OP_LO   : Op; -def OP_CONC : Op; -def OP_DUP  : Op; +def OP_NONE  : Op; +def OP_ADD   : Op; +def OP_SUB   : Op; +def OP_MUL   : Op; +def OP_MLA   : Op; +def OP_MLS   : Op; +def OP_MUL_N : Op; +def OP_MLA_N : Op; +def OP_MLS_N : Op; +def OP_EQ    : Op; +def OP_GE    : Op; +def OP_LE    : Op; +def OP_GT    : Op; +def OP_LT    : Op; +def OP_NEG   : Op; +def OP_NOT   : Op; +def OP_AND   : Op; +def OP_OR    : Op; +def OP_XOR   : Op; +def OP_ANDN  : Op; +def OP_ORN   : Op; +def OP_CAST  : Op; +def OP_HI    : Op; +def OP_LO    : Op; +def OP_CONC  : Op; +def OP_DUP   : Op;  class Inst <string p, string t, Op o> {    string Prototype = p; @@ -48,7 +51,6 @@ class Inst <string p, string t, Op o> {  class SInst<string p, string t> : Inst<p, t, OP_NONE> {}  class IInst<string p, string t> : Inst<p, t, OP_NONE> {}  class WInst<string p, string t> : Inst<p, t, OP_NONE> {} -class BInst<string p, string t> : Inst<p, t, OP_NONE> {}  // prototype: return (arg, arg, ...)  // v: void @@ -64,6 +66,7 @@ class BInst<string p, string t> : Inst<p, t, OP_NONE> {}  // i: constant int  // l: constant uint64  // s: scalar of element type +// a: scalar of element type (splat to vector type)  // k: default elt width, double num elts  // #: array of default vectors  // p: pointer type @@ -273,21 +276,21 @@ def VQDMLAL_LANE  : SInst<"wwddi", "si">;  def VMLS_LANE     : IInst<"ddddi", "siUsUifQsQiQUsQUiQf">;  def VMLSL_LANE    : SInst<"wwddi", "siUsUi">;  def VQDMLSL_LANE  : SInst<"wwddi", "si">; -def VMUL_N        : IInst<"dds",   "sifUsUiQsQiQfQUsQUi">; -def VMULL_N       : SInst<"wds",   "siUsUi">; +def VMUL_N        : Inst<"dds",    "sifUsUiQsQiQfQUsQUi", OP_MUL_N>; +def VMULL_N       : SInst<"wda",   "siUsUi">;  def VMULL_LANE    : SInst<"wddi",  "siUsUi">; -def VQDMULL_N     : SInst<"wds",   "si">; +def VQDMULL_N     : SInst<"wda",   "si">;  def VQDMULL_LANE  : SInst<"wddi",  "si">; -def VQDMULH_N     : SInst<"dds",   "siQsQi">; +def VQDMULH_N     : SInst<"dda",   "siQsQi">;  def VQDMULH_LANE  : SInst<"dddi",  "siQsQi">; -def VQRDMULH_N    : SInst<"dds",   "siQsQi">; +def VQRDMULH_N    : SInst<"dda",   "siQsQi">;  def VQRDMULH_LANE : SInst<"dddi",  "siQsQi">; -def VMLA_N        : IInst<"ddds",  "siUsUifQsQiQUsQUiQf">; -def VMLAL_N       : SInst<"wwds",  "siUsUi">; -def VQDMLAL_N     : SInst<"wwds",  "si">; -def VMLS_N        : IInst<"ddds",  "siUsUifQsQiQUsQUiQf">; -def VMLSL_N       : SInst<"wwds",  "siUsUi">; -def VQDMLSL_N     : SInst<"wwds",  "si">; +def VMLA_N        : Inst<"ddda",   "siUsUifQsQiQUsQUiQf", OP_MLA_N>; +def VMLAL_N       : SInst<"wwda",  "siUsUi">; +def VQDMLAL_N     : SInst<"wwda",  "si">; +def VMLS_N        : Inst<"ddds",   "siUsUifQsQiQUsQUiQf", OP_MLS_N>; +def VMLSL_N       : SInst<"wwda",  "siUsUi">; +def VQDMLSL_N     : SInst<"wwda",  "si">;  ////////////////////////////////////////////////////////////////////////////////  // E.3.26 Vector Extract @@ -319,7 +322,7 @@ def VORR : Inst<"ddd",   "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_OR>;  def VEOR : Inst<"ddd",   "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_XOR>;  def VBIC : Inst<"ddd",   "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_ANDN>;  def VORN : Inst<"ddd",   "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_ORN>; -def VBSL : BInst<"dudd", "csilUcUsUiUlfPcPsQcQsQiQlQUcQUsQUiQUlQfQPcQPs">; +def VBSL : SInst<"dudd", "csilUcUsUiUlfPcPsQcQsQiQlQUcQUsQUiQUlQfQPcQPs">;  ////////////////////////////////////////////////////////////////////////////////  // E.3.30 Transposition operations  | 

