summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorJonas Paulsson <paulsson@linux.vnet.ibm.com>2016-09-28 09:41:38 +0000
committerJonas Paulsson <paulsson@linux.vnet.ibm.com>2016-09-28 09:41:38 +0000
commit58c5a7f55af4a5601d453f2c09ee3d0d9b4cc59d (patch)
tree8210d274329291131ebfed2c0f2969a0ef7bc53c /llvm/lib
parent963f75efc27bb55dba7c21e8107b0013e5a12021 (diff)
downloadbcm5719-llvm-58c5a7f55af4a5601d453f2c09ee3d0d9b4cc59d.tar.gz
bcm5719-llvm-58c5a7f55af4a5601d453f2c09ee3d0d9b4cc59d.zip
[SystemZ] Implementation of getUnrollingPreferences().
This commit enables more unrolling for SystemZ by implementing the SystemZTargetTransformInfo::getUnrollingPreferences() method. It has been found that it is better to only unroll moderately, so the DefaultUnrollRuntimeCount has been moved into UnrollingPreferences in order to set this to a lower value for SystemZ (4). Reviewers: Evgeny Stupachenko, Ulrich Weigand. https://reviews.llvm.org/D24451 llvm-svn: 282570
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp57
-rw-r--r--llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h2
-rw-r--r--llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp9
3 files changed, 62 insertions, 6 deletions
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index 5ff5b21f49b..b10c0e09a0d 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -238,6 +238,63 @@ SystemZTTIImpl::getPopcntSupport(unsigned TyWidth) {
return TTI::PSK_Software;
}
+void SystemZTTIImpl::getUnrollingPreferences(Loop *L,
+ TTI::UnrollingPreferences &UP) {
+ // Find out if L contains a call, what the machine instruction count
+ // estimate is, and how many stores there are.
+ bool HasCall = false;
+ unsigned NumStores = 0;
+ for (auto &BB : L->blocks())
+ for (auto &I : *BB) {
+ if (isa<CallInst>(&I) || isa<InvokeInst>(&I)) {
+ ImmutableCallSite CS(&I);
+ if (const Function *F = CS.getCalledFunction()) {
+ if (isLoweredToCall(F))
+ HasCall = true;
+ if (F->getIntrinsicID() == Intrinsic::memcpy ||
+ F->getIntrinsicID() == Intrinsic::memset)
+ NumStores++;
+ } else { // indirect call.
+ HasCall = true;
+ }
+ }
+ if (isa<StoreInst>(&I)) {
+ NumStores++;
+ Type *MemAccessTy = I.getOperand(0)->getType();
+ if((MemAccessTy->isIntegerTy() || MemAccessTy->isFloatingPointTy()) &&
+ (getDataLayout().getTypeSizeInBits(MemAccessTy) == 128))
+ NumStores++; // 128 bit fp/int stores get split.
+ }
+ }
+
+ // The z13 processor will run out of store tags if too many stores
+ // are fed into it too quickly. Therefore make sure there are not
+ // too many stores in the resulting unrolled loop.
+ unsigned const Max = (NumStores ? (12 / NumStores) : UINT_MAX);
+
+ if (HasCall) {
+ // Only allow full unrolling if loop has any calls.
+ UP.FullUnrollMaxCount = Max;
+ UP.MaxCount = 1;
+ return;
+ }
+
+ UP.MaxCount = Max;
+ if (UP.MaxCount <= 1)
+ return;
+
+ // Allow partial and runtime trip count unrolling.
+ UP.Partial = UP.Runtime = true;
+
+ UP.PartialThreshold = 75;
+ UP.DefaultUnrollRuntimeCount = 4;
+
+ // Allow expensive instructions in the pre-header of the loop.
+ UP.AllowExpensiveTripCount = true;
+
+ UP.Force = true;
+}
+
unsigned SystemZTTIImpl::getNumberOfRegisters(bool Vector) {
if (!Vector)
// Discount the stack pointer. Also leave out %r0, since it can't
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
index 9ae736d8413..a870dd9ea01 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@@ -50,6 +50,8 @@ public:
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
+ void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP);
+
/// @}
/// \name Vector TTI Implementations
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index dbbffee96e2..a8442e64832 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -102,10 +102,6 @@ static cl::opt<unsigned> PragmaUnrollThreshold(
/// code expansion would result.
static const unsigned NoThreshold = UINT_MAX;
-/// Default unroll count for loops with run-time trip count if
-/// -unroll-count is not set
-static const unsigned DefaultUnrollRuntimeCount = 8;
-
/// Gather the various unrolling parameters based on the defaults, compiler
/// flags, TTI overrides and user specified parameters.
static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
@@ -122,6 +118,7 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
UP.PartialThreshold = UP.Threshold;
UP.PartialOptSizeThreshold = 0;
UP.Count = 0;
+ UP.DefaultUnrollRuntimeCount = 8;
UP.MaxCount = UINT_MAX;
UP.FullUnrollMaxCount = UINT_MAX;
UP.Partial = false;
@@ -803,7 +800,7 @@ static bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
// largest power-of-two factor that satisfies the threshold limit.
// As we'll create fixup loop, do the type of unrolling only if
// remainder loop is allowed.
- UP.Count = DefaultUnrollRuntimeCount;
+ UP.Count = UP.DefaultUnrollRuntimeCount;
UnrolledSize = (LoopSize - BEInsns) * UP.Count + BEInsns;
while (UP.Count != 0 && UnrolledSize > UP.PartialThreshold) {
UP.Count >>= 1;
@@ -852,7 +849,7 @@ static bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
return false;
}
if (UP.Count == 0)
- UP.Count = DefaultUnrollRuntimeCount;
+ UP.Count = UP.DefaultUnrollRuntimeCount;
UnrolledSize = (LoopSize - BEInsns) * UP.Count + BEInsns;
// Reduce unroll count to be the largest power-of-two factor of
OpenPOWER on IntegriCloud