diff options
author | Justin Holewinski <jholewinski@nvidia.com> | 2013-06-28 17:58:13 +0000 |
---|---|---|
committer | Justin Holewinski <jholewinski@nvidia.com> | 2013-06-28 17:58:13 +0000 |
commit | af258be13449b834f3a07e85ebfb24f6c8879d2a (patch) | |
tree | 18fedac048aa55183de50cfcd6a531f703527ec6 | |
parent | e04e4bdf71ec5fd79855841e53199758e4aa4abd (diff) | |
download | bcm5719-llvm-af258be13449b834f3a07e85ebfb24f6c8879d2a.tar.gz bcm5719-llvm-af258be13449b834f3a07e85ebfb24f6c8879d2a.zip |
[NVPTX] Add (1.0 / sqrt(x)) => rsqrt(x) generation when allowable by FP flags
llvm-svn: 185178
-rw-r--r-- | llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 13 | ||||
-rw-r--r-- | llvm/test/CodeGen/NVPTX/rsqrt.ll | 13 |
2 files changed, 25 insertions, 1 deletions
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 553a6ba703d..b6f75268d63 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -136,7 +136,8 @@ def hasLDG : Predicate<"Subtarget.hasLDG()">; def hasLDU : Predicate<"Subtarget.hasLDU()">; def hasGenericLdSt : Predicate<"Subtarget.hasGenericLdSt()">; -def doF32FTZ : Predicate<"UseF32FTZ">; +def doF32FTZ : Predicate<"UseF32FTZ==1">; +def doNoF32FTZ : Predicate<"UseF32FTZ==0">; def doFMAF32 : Predicate<"doFMAF32">; def doFMAF32_ftz : Predicate<"(doFMAF32 && UseF32FTZ)">; @@ -765,6 +766,16 @@ def FDIV32ri_prec : NVPTXInst<(outs Float32Regs:$dst), (fdiv Float32Regs:$a, fpimm:$b))]>, Requires<[reqPTX20]>; +// +// F32 rsqrt +// + +def RSQRTF32approx1r : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$b), + "rsqrt.approx.f32 \t$dst, $b;", []>; + +def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_f Float32Regs:$b)), + (RSQRTF32approx1r Float32Regs:$b)>, + Requires<[do_DIVF32_FULL, do_SQRTF32_APPROX, doNoF32FTZ]>; multiclass FPCONTRACT32<string OpcStr, Predicate Pred> { def rrr : NVPTXInst<(outs Float32Regs:$dst), diff --git a/llvm/test/CodeGen/NVPTX/rsqrt.ll b/llvm/test/CodeGen/NVPTX/rsqrt.ll new file mode 100644 index 00000000000..d49eebe8a4b --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/rsqrt.ll @@ -0,0 +1,13 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 -nvptx-prec-divf32=1 -nvptx-prec-sqrtf32=0 | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" + +declare float @llvm.nvvm.sqrt.f(float) + +define float @foo(float %a) { +; CHECK: rsqrt.approx.f32 + %val = tail call float @llvm.nvvm.sqrt.f(float %a) + %ret = fdiv float 1.0, %val + ret float %ret +} +
\ No newline at end of file |