diff options
| author | Florian Hahn <flo@fhahn.com> | 2019-12-23 14:28:56 +0100 |
|---|---|---|
| committer | Florian Hahn <flo@fhahn.com> | 2019-12-23 14:49:14 +0100 |
| commit | 8d6f59b78a83489d3dacdd4db0dbf1d4213c92b7 (patch) | |
| tree | 437d358caad703ce49bcf79ae7f20a18a6252a67 /llvm/lib/Transforms | |
| parent | eca40066ebb5759aa44d21833c7a1fd7dd2361af (diff) | |
| download | bcm5719-llvm-8d6f59b78a83489d3dacdd4db0dbf1d4213c92b7.tar.gz bcm5719-llvm-8d6f59b78a83489d3dacdd4db0dbf1d4213c92b7.zip | |
[Matrix] Use fmuladd for matrix.multiply if allowed.
If the matrix.multiply calls have the contract fast math flag, we can
use fmuladd. This als adds a command line option to force fmuladd
generation. We can retire this option once there is a clang-level
option.
Reviewers: anemet, Gerolf, hfinkel, andrew.w.kaylor
Reviewed By: anemet
Differential Revision: https://reviews.llvm.org/D70951
Diffstat (limited to 'llvm/lib/Transforms')
| -rw-r--r-- | llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp | 30 |
1 files changed, 25 insertions, 5 deletions
diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp index b3188001e11..d03b55756d3 100644 --- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp +++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp @@ -43,6 +43,11 @@ using namespace PatternMatch; static cl::opt<bool> EnableShapePropagation("matrix-propagate-shape", cl::init(true)); +static cl::opt<bool> AllowContractEnabled( + "matrix-allow-contract", cl::init(false), cl::Hidden, + cl::desc("Allow the use of FMAs if available and profitable. This may " + "result in different results, due to less rounding error.")); + namespace { // Given an element poitner \p BasePtr to the start of a (sub) matrix, compute @@ -536,12 +541,25 @@ public: } Value *createMulAdd(Value *Sum, Value *A, Value *B, bool UseFPOp, - IRBuilder<> &Builder) { - Value *Mul = UseFPOp ? Builder.CreateFMul(A, B) : Builder.CreateMul(A, B); + IRBuilder<> &Builder, bool AllowContraction) { + if (!Sum) - return Mul; + return UseFPOp ? Builder.CreateFMul(A, B) : Builder.CreateMul(A, B); + + if (UseFPOp) { + if (AllowContraction) { + // Use fmuladd for floating point operations and let the backend decide + // if that's profitable. + Value *FMulAdd = Intrinsic::getDeclaration( + Func.getParent(), Intrinsic::fmuladd, A->getType()); + return Builder.CreateCall(FMulAdd, {A, B, Sum}); + } + Value *Mul = Builder.CreateFMul(A, B); + return Builder.CreateFAdd(Sum, Mul); + } - return UseFPOp ? Builder.CreateFAdd(Sum, Mul) : Builder.CreateAdd(Sum, Mul); + Value *Mul = Builder.CreateMul(A, B); + return Builder.CreateAdd(Sum, Mul); } /// Cache \p Matrix as result of \p Inst and update the uses of \p Inst. For @@ -591,6 +609,8 @@ public: EltType->getPrimitiveSizeInBits(), uint64_t(1)); + bool AllowContract = AllowContractEnabled || (isa<FPMathOperator>(MatMul) && + MatMul->hasAllowContract()); // Multiply columns from the first operand with scalars from the second // operand. Then move along the K axes and accumulate the columns. With // this the adds can be vectorized without reassociation. @@ -607,7 +627,7 @@ public: Value *RH = Builder.CreateExtractElement(Rhs.getColumn(J), K); Value *Splat = Builder.CreateVectorSplat(BlockSize, RH, "splat"); Sum = createMulAdd(Sum, L, Splat, EltType->isFloatingPointTy(), - Builder); + Builder, AllowContract); } Result.setColumn(J, insertVector(Result.getColumn(J), I, Sum, Builder)); } |

