summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms
diff options
context:
space:
mode:
authorFlorian Hahn <flo@fhahn.com>2019-12-23 14:28:56 +0100
committerFlorian Hahn <flo@fhahn.com>2019-12-23 14:49:14 +0100
commit8d6f59b78a83489d3dacdd4db0dbf1d4213c92b7 (patch)
tree437d358caad703ce49bcf79ae7f20a18a6252a67 /llvm/lib/Transforms
parenteca40066ebb5759aa44d21833c7a1fd7dd2361af (diff)
downloadbcm5719-llvm-8d6f59b78a83489d3dacdd4db0dbf1d4213c92b7.tar.gz
bcm5719-llvm-8d6f59b78a83489d3dacdd4db0dbf1d4213c92b7.zip
[Matrix] Use fmuladd for matrix.multiply if allowed.
If the matrix.multiply calls have the contract fast math flag, we can use fmuladd. This als adds a command line option to force fmuladd generation. We can retire this option once there is a clang-level option. Reviewers: anemet, Gerolf, hfinkel, andrew.w.kaylor Reviewed By: anemet Differential Revision: https://reviews.llvm.org/D70951
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r--llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp30
1 files changed, 25 insertions, 5 deletions
diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
index b3188001e11..d03b55756d3 100644
--- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
@@ -43,6 +43,11 @@ using namespace PatternMatch;
static cl::opt<bool> EnableShapePropagation("matrix-propagate-shape",
cl::init(true));
+static cl::opt<bool> AllowContractEnabled(
+ "matrix-allow-contract", cl::init(false), cl::Hidden,
+ cl::desc("Allow the use of FMAs if available and profitable. This may "
+ "result in different results, due to less rounding error."));
+
namespace {
// Given an element poitner \p BasePtr to the start of a (sub) matrix, compute
@@ -536,12 +541,25 @@ public:
}
Value *createMulAdd(Value *Sum, Value *A, Value *B, bool UseFPOp,
- IRBuilder<> &Builder) {
- Value *Mul = UseFPOp ? Builder.CreateFMul(A, B) : Builder.CreateMul(A, B);
+ IRBuilder<> &Builder, bool AllowContraction) {
+
if (!Sum)
- return Mul;
+ return UseFPOp ? Builder.CreateFMul(A, B) : Builder.CreateMul(A, B);
+
+ if (UseFPOp) {
+ if (AllowContraction) {
+ // Use fmuladd for floating point operations and let the backend decide
+ // if that's profitable.
+ Value *FMulAdd = Intrinsic::getDeclaration(
+ Func.getParent(), Intrinsic::fmuladd, A->getType());
+ return Builder.CreateCall(FMulAdd, {A, B, Sum});
+ }
+ Value *Mul = Builder.CreateFMul(A, B);
+ return Builder.CreateFAdd(Sum, Mul);
+ }
- return UseFPOp ? Builder.CreateFAdd(Sum, Mul) : Builder.CreateAdd(Sum, Mul);
+ Value *Mul = Builder.CreateMul(A, B);
+ return Builder.CreateAdd(Sum, Mul);
}
/// Cache \p Matrix as result of \p Inst and update the uses of \p Inst. For
@@ -591,6 +609,8 @@ public:
EltType->getPrimitiveSizeInBits(),
uint64_t(1));
+ bool AllowContract = AllowContractEnabled || (isa<FPMathOperator>(MatMul) &&
+ MatMul->hasAllowContract());
// Multiply columns from the first operand with scalars from the second
// operand. Then move along the K axes and accumulate the columns. With
// this the adds can be vectorized without reassociation.
@@ -607,7 +627,7 @@ public:
Value *RH = Builder.CreateExtractElement(Rhs.getColumn(J), K);
Value *Splat = Builder.CreateVectorSplat(BlockSize, RH, "splat");
Sum = createMulAdd(Sum, L, Splat, EltType->isFloatingPointTy(),
- Builder);
+ Builder, AllowContract);
}
Result.setColumn(J, insertVector(Result.getColumn(J), I, Sum, Builder));
}
OpenPOWER on IntegriCloud