diff options
| author | Justin Lebar <jlebar@google.com> | 2017-01-21 01:00:32 +0000 |
|---|---|---|
| committer | Justin Lebar <jlebar@google.com> | 2017-01-21 01:00:32 +0000 |
| commit | 46624a822d3a3df4a4b6dff0d231acb45d269853 (patch) | |
| tree | 6bbc8395441b94ec4debd95283741b709bcd8468 /llvm/lib | |
| parent | 077f8fb1689d24d0118248ca41447aea2cd0f6d9 (diff) | |
| download | bcm5719-llvm-46624a822d3a3df4a4b6dff0d231acb45d269853.tar.gz bcm5719-llvm-46624a822d3a3df4a4b6dff0d231acb45d269853.zip | |
[NVPTX] Auto-upgrade some NVPTX intrinsics to LLVM target-generic code.
Summary:
Specifically, we upgrade llvm.nvvm.:
* brev{32,64}
* clz.{i,ll}
* popc.{i,ll}
* abs.{i,ll}
* {min,max}.{i,ll,u,ull}
* h2f
These either map directly to an existing LLVM target-generic
intrinsic or map to a simple LLVM target-generic idiom.
In all cases, we check that the code we generate is lowered to PTX as we
expect.
These builtins don't need to be backfilled in clang: They're not
accessible to user code from nvcc.
Reviewers: tra
Subscribers: majnemer, cfe-commits, llvm-commits, jholewinski
Differential Revision: https://reviews.llvm.org/D28793
llvm-svn: 292694
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/IR/AutoUpgrade.cpp | 87 | ||||
| -rw-r--r-- | llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 48 |
2 files changed, 86 insertions, 49 deletions
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index e3a7bae02e0..b68bc3f0561 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// #include "llvm/IR/AutoUpgrade.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/IR/CFG.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" @@ -204,7 +205,38 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { } break; } + case 'n': { + if (Name.startswith("nvvm.")) { + Name = Name.substr(5); + + // The following nvvm intrinsics correspond exactly to an LLVM intrinsic. + Intrinsic::ID IID = StringSwitch<Intrinsic::ID>(Name) + .Cases("brev32", "brev64", Intrinsic::bitreverse) + .Case("clz.i", Intrinsic::ctlz) + .Case("popc.i", Intrinsic::ctpop) + .Default(Intrinsic::not_intrinsic); + if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) { + NewFn = Intrinsic::getDeclaration(F->getParent(), IID, + {F->getReturnType()}); + return true; + } + // The following nvvm intrinsics correspond exactly to an LLVM idiom, but + // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall. + // + // TODO: We could add lohi.i2d. + bool Expand = StringSwitch<bool>(Name) + .Cases("abs.i", "abs.ll", true) + .Cases("clz.ll", "popc.ll", "h2f", true) + .Cases("max.i", "max.ll", "max.ui", "max.ull", true) + .Cases("min.i", "min.ll", "min.ui", "min.ull", true) + .Default(false); + if (Expand) { + NewFn = nullptr; + return true; + } + } + } case 'o': // We only need to change the name to match the mangling including the // address space. @@ -753,6 +785,9 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { bool IsX86 = Name.startswith("x86."); if (IsX86) Name = Name.substr(4); + bool IsNVVM = Name.startswith("nvvm."); + if (IsNVVM) + Name = Name.substr(5); if (IsX86 && Name.startswith("sse4a.movnt.")) { Module *M = F->getParent(); @@ -1727,6 +1762,50 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { { CI->getArgOperand(0), CI->getArgOperand(1) }); Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2)); + } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) { + Value *Arg = CI->getArgOperand(0); + Value *Neg = Builder.CreateNeg(Arg, "neg"); + Value *Cmp = Builder.CreateICmpSGE( + Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond"); + Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs"); + } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" || + Name == "max.ui" || Name == "max.ull")) { + Value *Arg0 = CI->getArgOperand(0); + Value *Arg1 = CI->getArgOperand(1); + Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull") + ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond") + : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond"); + Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max"); + } else if (IsNVVM && (Name == "min.i" || Name == "min.ll" || + Name == "min.ui" || Name == "min.ull")) { + Value *Arg0 = CI->getArgOperand(0); + Value *Arg1 = CI->getArgOperand(1); + Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull") + ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond") + : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond"); + Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min"); + } else if (IsNVVM && Name == "clz.ll") { + // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64. + Value *Arg = CI->getArgOperand(0); + Value *Ctlz = Builder.CreateCall( + Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, + {Arg->getType()}), + {Arg, Builder.getFalse()}, "ctlz"); + Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc"); + } else if (IsNVVM && Name == "popc.ll") { + // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an + // i64. + Value *Arg = CI->getArgOperand(0); + Value *Popc = Builder.CreateCall( + Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop, + {Arg->getType()}), + Arg, "ctpop"); + Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc"); + } else if (IsNVVM && Name == "h2f") { + Rep = Builder.CreateCall(Intrinsic::getDeclaration( + F->getParent(), Intrinsic::convert_from_fp16, + {Builder.getFloatTy()}), + CI->getArgOperand(0), "h2f"); } else { llvm_unreachable("Unknown function for CallInst upgrade."); } @@ -1786,11 +1865,15 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { CI->eraseFromParent(); return; - case Intrinsic::ctpop: { + case Intrinsic::ctpop: + CI->replaceAllUsesWith(Builder.CreateCall(NewFn, {CI->getArgOperand(0)})); + CI->eraseFromParent(); + return; + + case Intrinsic::convert_from_fp16: CI->replaceAllUsesWith(Builder.CreateCall(NewFn, {CI->getArgOperand(0)})); CI->eraseFromParent(); return; - } case Intrinsic::x86_xop_vfrcz_ss: case Intrinsic::x86_xop_vfrcz_sd: diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index 8df727d276e..b06685545ff 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -187,16 +187,6 @@ class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass, // MISC // -def INT_NVVM_CLZ_I : F_MATH_1<"clz.b32 \t$dst, $src0;", Int32Regs, Int32Regs, - int_nvvm_clz_i>; -def INT_NVVM_CLZ_LL : F_MATH_1<"clz.b64 \t$dst, $src0;", Int32Regs, Int64Regs, - int_nvvm_clz_ll>; - -def INT_NVVM_POPC_I : F_MATH_1<"popc.b32 \t$dst, $src0;", Int32Regs, Int32Regs, - int_nvvm_popc_i>; -def INT_NVVM_POPC_LL : F_MATH_1<"popc.b64 \t$dst, $src0;", Int32Regs, Int64Regs, - int_nvvm_popc_ll>; - def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>; @@ -204,26 +194,6 @@ def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs, // Min Max // -def INT_NVVM_MIN_I : F_MATH_2<"min.s32 \t$dst, $src0, $src1;", Int32Regs, - Int32Regs, Int32Regs, int_nvvm_min_i>; -def INT_NVVM_MIN_UI : F_MATH_2<"min.u32 \t$dst, $src0, $src1;", Int32Regs, - Int32Regs, Int32Regs, int_nvvm_min_ui>; - -def INT_NVVM_MIN_LL : F_MATH_2<"min.s64 \t$dst, $src0, $src1;", Int64Regs, - Int64Regs, Int64Regs, int_nvvm_min_ll>; -def INT_NVVM_MIN_ULL : F_MATH_2<"min.u64 \t$dst, $src0, $src1;", Int64Regs, - Int64Regs, Int64Regs, int_nvvm_min_ull>; - -def INT_NVVM_MAX_I : F_MATH_2<"max.s32 \t$dst, $src0, $src1;", Int32Regs, - Int32Regs, Int32Regs, int_nvvm_max_i>; -def INT_NVVM_MAX_UI : F_MATH_2<"max.u32 \t$dst, $src0, $src1;", Int32Regs, - Int32Regs, Int32Regs, int_nvvm_max_ui>; - -def INT_NVVM_MAX_LL : F_MATH_2<"max.s64 \t$dst, $src0, $src1;", Int64Regs, - Int64Regs, Int64Regs, int_nvvm_max_ll>; -def INT_NVVM_MAX_ULL : F_MATH_2<"max.u64 \t$dst, $src0, $src1;", Int64Regs, - Int64Regs, Int64Regs, int_nvvm_max_ull>; - def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_f>; def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;", @@ -239,6 +209,7 @@ def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs, def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs, Float64Regs, Float64Regs, int_nvvm_fmax_d>; + // // Multiplication // @@ -321,15 +292,6 @@ def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;", Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>; // -// Brev -// - -def INT_NVVM_BREV32 : F_MATH_1<"brev.b32 \t$dst, $src0;", Int32Regs, Int32Regs, - int_nvvm_brev32>; -def INT_NVVM_BREV64 : F_MATH_1<"brev.b64 \t$dst, $src0;", Int64Regs, Int64Regs, - int_nvvm_brev64>; - -// // Sad // @@ -360,11 +322,6 @@ def : Pat<(int_nvvm_ceil_d Float64Regs:$a), // Abs // -def INT_NVVM_ABS_I : F_MATH_1<"abs.s32 \t$dst, $src0;", Int32Regs, Int32Regs, - int_nvvm_abs_i>; -def INT_NVVM_ABS_LL : F_MATH_1<"abs.s64 \t$dst, $src0;", Int64Regs, Int64Regs, - int_nvvm_abs_ll>; - def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs, int_nvvm_fabs_ftz_f>; def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs, @@ -810,9 +767,6 @@ def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a), def : Pat<(int_nvvm_f2h_rn Float32Regs:$a), (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN))>; -def : Pat<(int_nvvm_h2f Int16Regs:$a), - (CVT_f32_f16 (BITCONVERT_16_I2F Int16Regs:$a), CvtNONE)>; - // // Bitcast // |

