diff options
| author | Lang Hames <lhames@gmail.com> | 2012-06-05 19:07:46 +0000 |
|---|---|---|
| committer | Lang Hames <lhames@gmail.com> | 2012-06-05 19:07:46 +0000 |
| commit | a59100cc08b031e97458e05bbacf2d40c1121dfa (patch) | |
| tree | b6b4e61e759f5358f4d3e49542f883a72b661a1e /llvm | |
| parent | 572a3a2cceae01ca2091f8f76bc184faca20dc62 (diff) | |
| download | bcm5719-llvm-a59100cc08b031e97458e05bbacf2d40c1121dfa.tar.gz bcm5719-llvm-a59100cc08b031e97458e05bbacf2d40c1121dfa.zip | |
Add a new intrinsic: llvm.fmuladd. This intrinsic represents a multiply-add
expression (a * b + c) that can be implemented as a fused multiply-add (fma)
if the target determines that this will be more efficient. This intrinsic
will be used to implement FP_CONTRACT support and an aggressive FMA formation
mode.
If your target has a fast FMA instruction you should override the
isFMAFasterThanMulAndAdd method in TargetLowering to return true.
llvm-svn: 158014
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/docs/LangRef.html | 51 | ||||
| -rw-r--r-- | llvm/include/llvm/Intrinsics.td | 4 | ||||
| -rw-r--r-- | llvm/include/llvm/Target/TargetLowering.h | 8 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 21 |
4 files changed, 84 insertions, 0 deletions
diff --git a/llvm/docs/LangRef.html b/llvm/docs/LangRef.html index 1f43ab67f24..f13f13909bc 100644 --- a/llvm/docs/LangRef.html +++ b/llvm/docs/LangRef.html @@ -277,6 +277,11 @@ <li><a href="#int_umul_overflow">'<tt>llvm.umul.with.overflow.*</tt> Intrinsics</a></li> </ol> </li> + <li><a href="#spec_arithmetic">Specialised Arithmetic Intrinsics</a> + <ol> + <li><a href="#fmuladd">'<tt>llvm.fmuladd</tt> Intrinsic</a></li> + </ol> + </li> <li><a href="#int_fp16">Half Precision Floating Point Intrinsics</a> <ol> <li><a href="#int_convert_to_fp16">'<tt>llvm.convert.to.fp16</tt>' Intrinsic</a></li> @@ -7947,6 +7952,52 @@ LLVM</a>.</p> <!-- ======================================================================= --> <h3> + <a name="spec_arithmetic">Specialised Arithmetic Intrinsics</a> +</h3> + +<!-- _______________________________________________________________________ --> + +<h4> + <a name="fmuladd">'<tt>llvm.fmuladd.*</tt>' Intrinsic</a> +</h4> + +<div> + +<h5>Syntax:</h5> +<pre> + declare float @llvm.fmuladd.f32(float %a, float %b, float %c) + declare double @llvm.fmuladd.f64(double %a, double %b, double %c) +</pre> + +<h5>Overview:</h5> +<p>The '<tt>llvm.fmuladd.*</tt>' intrinsic functions represent multiply-add +expressions that can be fused if the code generator determines that the fused +expression would be legal and efficient.</p> + +<h5>Arguments:</h5> +<p>The '<tt>llvm.fmuladd.*</tt>' intrinsics each take three arguments: two +multiplicands, a and b, and an addend c.</p> + +<h5>Semantics:</h5> +<p>The expression:</p> +<pre> + %0 = call float @llvm.fmuladd.f32(%a, %b, %c) +</pre> +<p>is equivalent to the expression a * b + c, except that rounding will not be +performed between the multiplication and addition steps if the code generator +fuses the operations. Fusion is not guaranteed, even if the target platform +supports it. If a fused multiply-add is required the corresponding llvm.fma.* +intrinsic function should be used instead.</p> + +<h5>Examples:</h5> +<pre> + %r2 = call float @llvm.fmuladd.f32(float %a, float %b, float %c) ; yields {float}:r2 = (a * b) + c +</pre> + +</div> + +<!-- ======================================================================= --> +<h3> <a name="int_fp16">Half Precision Floating Point Intrinsics</a> </h3> diff --git a/llvm/include/llvm/Intrinsics.td b/llvm/include/llvm/Intrinsics.td index 01d2cca47a6..e2be4c4f6ab 100644 --- a/llvm/include/llvm/Intrinsics.td +++ b/llvm/include/llvm/Intrinsics.td @@ -266,6 +266,10 @@ let Properties = [IntrNoMem] in { def int_fma : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>; + + def int_fmuladd : Intrinsic<[llvm_anyfloat_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, + LLVMMatchType<0>]>; } // NOTE: these are internal interfaces. diff --git a/llvm/include/llvm/Target/TargetLowering.h b/llvm/include/llvm/Target/TargetLowering.h index 27447b5df7b..915dd9d4e8e 100644 --- a/llvm/include/llvm/Target/TargetLowering.h +++ b/llvm/include/llvm/Target/TargetLowering.h @@ -1657,6 +1657,14 @@ public: return false; } + /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than + /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to + /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd + /// is expanded to mul + add. + virtual bool isFMAFasterThanMulAndAdd(EVT) const { + return false; + } + /// isNarrowingProfitable - Return true if it's profitable to narrow /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow /// from i32 to i8 but not from i32 to i16. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index ab3ce48aacb..4152aa1ae16 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4932,6 +4932,27 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(1)), getValue(I.getArgOperand(2)))); return 0; + case Intrinsic::fmuladd: { + EVT VT = TLI.getValueType(I.getType()); + if (TLI.isOperationLegal(ISD::FMA, VT) && TLI.isFMAFasterThanMulAndAdd(VT)){ + setValue(&I, DAG.getNode(ISD::FMA, dl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), + getValue(I.getArgOperand(2)))); + } else { + SDValue Mul = DAG.getNode(ISD::FMUL, dl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1))); + SDValue Add = DAG.getNode(ISD::FADD, dl, + getValue(I.getArgOperand(0)).getValueType(), + Mul, + getValue(I.getArgOperand(2))); + setValue(&I, Add); + } + return 0; + } case Intrinsic::convert_to_fp16: setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, dl, MVT::i16, getValue(I.getArgOperand(0)))); |

