diff options
-rw-r--r-- | polly/include/polly/Support/SCEVValidator.h | 10 | ||||
-rw-r--r-- | polly/lib/Analysis/ScopBuilder.cpp | 7 | ||||
-rw-r--r-- | polly/lib/Analysis/ScopDetection.cpp | 23 | ||||
-rw-r--r-- | polly/lib/Support/SCEVValidator.cpp | 35 | ||||
-rw-r--r-- | polly/test/ScopInfo/constant_functions_multi_dim.ll | 118 |
5 files changed, 188 insertions, 5 deletions
diff --git a/polly/include/polly/Support/SCEVValidator.h b/polly/include/polly/Support/SCEVValidator.h index 0463fe6df58..b3bf5a7f138 100644 --- a/polly/include/polly/Support/SCEVValidator.h +++ b/polly/include/polly/Support/SCEVValidator.h @@ -36,6 +36,16 @@ namespace polly { /// @param Call The call to check. bool isConstCall(llvm::CallInst *Call); +/// Check if some parameters in the affine expression might hide induction +/// variables. If this is the case, we will try to delinearize the accesses +/// taking into account this information to possibly obtain a memory access +/// with more structure. Currently we assume that each parameter that +/// comes from a function call might depend on a (virtual) induction variable. +/// This covers calls to 'get_global_id' and 'get_local_id' as they commonly +/// arise in OpenCL code, while not catching any false-positives in our current +/// tests. +bool hasIVParams(const llvm::SCEV *Expr); + /// Find the loops referenced from a SCEV expression. /// /// @param Expr The SCEV expression to scan for loops. diff --git a/polly/lib/Analysis/ScopBuilder.cpp b/polly/lib/Analysis/ScopBuilder.cpp index 83396809fe2..f3623525df4 100644 --- a/polly/lib/Analysis/ScopBuilder.cpp +++ b/polly/lib/Analysis/ScopBuilder.cpp @@ -407,6 +407,13 @@ bool ScopBuilder::buildAccessMultiDimParam(MemAccInst Inst, ScopStmt *Stmt) { Sizes.insert(Sizes.end(), AccItr->second.Shape->DelinearizedSizes.begin(), AccItr->second.Shape->DelinearizedSizes.end()); + + // In case only the element size is contained in the 'Sizes' array, the + // access does not access a real multi-dimensional array. Hence, we allow + // the normal single-dimensional access construction to handle this. + if (Sizes.size() == 1) + return false; + // Remove the element size. This information is already provided by the // ElementSize parameter. In case the element size of this access and the // element size used for delinearization differs the delinearization is diff --git a/polly/lib/Analysis/ScopDetection.cpp b/polly/lib/Analysis/ScopDetection.cpp index d1d6360ab44..d5c154fa08a 100644 --- a/polly/lib/Analysis/ScopDetection.cpp +++ b/polly/lib/Analysis/ScopDetection.cpp @@ -823,6 +823,15 @@ bool ScopDetection::hasValidArraySizes(DetectionContext &Context, SmallVectorImpl<const SCEV *> &Sizes, const SCEVUnknown *BasePointer, Loop *Scope) const { + // If no sizes were found, all sizes are trivially valid. We allow this case + // to make it possible to pass known-affine accesses to the delinearization to + // try to recover some interesting multi-dimensional accesses, but to still + // allow the already known to be affine access in case the delinearization + // fails. In such situations, the delinearization will just return a Sizes + // array of size zero. + if (Sizes.size() == 0) + return true; + Value *BaseValue = BasePointer->getValue(); Region &CurRegion = Context.CurRegion; for (const SCEV *DelinearizedSize : Sizes) { @@ -893,10 +902,14 @@ bool ScopDetection::computeAccessFunctions( else IsNonAffine = true; } else { - SE.computeAccessFunctions(AF, Acc->DelinearizedSubscripts, - Shape->DelinearizedSizes); - if (Acc->DelinearizedSubscripts.size() == 0) - IsNonAffine = true; + if (Shape->DelinearizedSizes.size() == 0) { + Acc->DelinearizedSubscripts.push_back(AF); + } else { + SE.computeAccessFunctions(AF, Acc->DelinearizedSubscripts, + Shape->DelinearizedSizes); + if (Acc->DelinearizedSubscripts.size() == 0) + IsNonAffine = true; + } for (const SCEV *S : Acc->DelinearizedSubscripts) if (!isAffine(S, Scope, Context)) IsNonAffine = true; @@ -1013,7 +1026,7 @@ bool ScopDetection::isValidAccess(Instruction *Inst, const SCEV *AF, } else if (PollyDelinearize && !IsVariantInNonAffineLoop) { Context.Accesses[BP].push_back({Inst, AF}); - if (!IsAffine) + if (!IsAffine || hasIVParams(AF)) Context.NonAffineAccesses.insert( std::make_pair(BP, LI.getLoopFor(Inst->getParent()))); } else if (!AllowNonAffine && !IsAffine) { diff --git a/polly/lib/Support/SCEVValidator.cpp b/polly/lib/Support/SCEVValidator.cpp index 0e159f42993..1941875d0d5 100644 --- a/polly/lib/Support/SCEVValidator.cpp +++ b/polly/lib/Support/SCEVValidator.cpp @@ -429,6 +429,34 @@ public: } }; +class SCEVHasIVParams { + bool HasIVParams = false; + +public: + SCEVHasIVParams() {} + + bool follow(const SCEV *S) { + const SCEVUnknown *Unknown = dyn_cast<SCEVUnknown>(S); + if (!Unknown) + return true; + + CallInst *Call = dyn_cast<CallInst>(Unknown->getValue()); + + if (!Call) + return true; + + if (isConstCall(Call)) { + HasIVParams = true; + return false; + } + + return true; + } + + bool isDone() { return HasIVParams; } + bool hasIVParams() { return HasIVParams; } +}; + /// Check whether a SCEV refers to an SSA name defined inside a region. class SCEVInRegionDependences { const Region *R; @@ -542,6 +570,13 @@ void findValues(const SCEV *Expr, ScalarEvolution &SE, ST.visitAll(Expr); } +bool hasIVParams(const SCEV *Expr) { + SCEVHasIVParams HasIVParams; + SCEVTraversal<SCEVHasIVParams> ST(HasIVParams); + ST.visitAll(Expr); + return HasIVParams.hasIVParams(); +} + bool hasScalarDepsInsideRegion(const SCEV *Expr, const Region *R, llvm::Loop *Scope, bool AllowLoops) { SCEVInRegionDependences InRegionDeps(R, Scope, AllowLoops); diff --git a/polly/test/ScopInfo/constant_functions_multi_dim.ll b/polly/test/ScopInfo/constant_functions_multi_dim.ll new file mode 100644 index 00000000000..726e19fec1a --- /dev/null +++ b/polly/test/ScopInfo/constant_functions_multi_dim.ll @@ -0,0 +1,118 @@ +; RUN: opt %loadPolly -polly-scops -analyze \ +; RUN: -polly-detect-full-functions < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; CHECK: Statements { +; CHECK-NEXT: Stmt_entry_split +; CHECK-NEXT: Domain := +; CHECK-NEXT: [N, __global_id_0, __global_id_1] -> { Stmt_entry_split[] }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: [N, __global_id_0, __global_id_1] -> { Stmt_entry_split[] -> [0, 0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [N, __global_id_0, __global_id_1] -> { Stmt_entry_split[] -> MemRef_acc_0_lcssa__phi[] }; +; CHECK-NEXT: Stmt_for_inc_lr_ph +; CHECK-NEXT: Domain := +; CHECK-NEXT: [N, __global_id_0, __global_id_1] -> { Stmt_for_inc_lr_ph[] : N > 0 }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: [N, __global_id_0, __global_id_1] -> { Stmt_for_inc_lr_ph[] -> [1, 0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [N, __global_id_0, __global_id_1] -> { Stmt_for_inc_lr_ph[] -> MemRef_acc_03__phi[] }; +; CHECK-NEXT: Stmt_for_inc +; CHECK-NEXT: Domain := +; CHECK-NEXT: [N, __global_id_0, __global_id_1] -> { Stmt_for_inc[i0] : 0 <= i0 < N }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: [N, __global_id_0, __global_id_1] -> { Stmt_for_inc[i0] -> [2, i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [N, __global_id_0, __global_id_1] -> { Stmt_for_inc[i0] -> MemRef_acc_03__phi[] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [N, __global_id_0, __global_id_1] -> { Stmt_for_inc[i0] -> MemRef_acc_03__phi[] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [N, __global_id_0, __global_id_1] -> { Stmt_for_inc[i0] -> MemRef_A[__global_id_0, i0] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [N, __global_id_0, __global_id_1] -> { Stmt_for_inc[i0] -> MemRef_B[i0, __global_id_1] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [N, __global_id_0, __global_id_1] -> { Stmt_for_inc[i0] -> MemRef__lcssa__phi[] }; +; CHECK-NEXT: Stmt_for_cond_for_end_crit_edge +; CHECK-NEXT: Domain := +; CHECK-NEXT: [N, __global_id_0, __global_id_1] -> { Stmt_for_cond_for_end_crit_edge[] : N > 0 }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: [N, __global_id_0, __global_id_1] -> { Stmt_for_cond_for_end_crit_edge[] -> [3, 0] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [N, __global_id_0, __global_id_1] -> { Stmt_for_cond_for_end_crit_edge[] -> MemRef__lcssa__phi[] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [N, __global_id_0, __global_id_1] -> { Stmt_for_cond_for_end_crit_edge[] -> MemRef_acc_0_lcssa__phi[] }; +; CHECK-NEXT: Stmt_for_end +; CHECK-NEXT: Domain := +; CHECK-NEXT: [N, __global_id_0, __global_id_1] -> { Stmt_for_end[] }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: [N, __global_id_0, __global_id_1] -> { Stmt_for_end[] -> [4, 0] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [N, __global_id_0, __global_id_1] -> { Stmt_for_end[] -> MemRef_acc_0_lcssa__phi[] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [N, __global_id_0, __global_id_1] -> { Stmt_for_end[] -> MemRef_C[__global_id_0, __global_id_1] }; +; CHECK-NEXT: } + + +; Function Attrs: noinline nounwind uwtable +define void @mat_mul(float* %C, float* %A, float* %B, i64 %N) #0 !kernel_arg_addr_space !2 !kernel_arg_access_qual !3 !kernel_arg_type !4 !kernel_arg_base_type !4 !kernel_arg_type_qual !5 { +entry: + br label %entry.split + +entry.split: ; preds = %entry + %call = tail call i64 @_Z13get_global_idj(i32 0) #3 + %call1 = tail call i64 @_Z13get_global_idj(i32 1) #3 + %cmp1 = icmp sgt i64 %N, 0 + %mul = mul nsw i64 %call, %N + br i1 %cmp1, label %for.inc.lr.ph, label %for.end + +for.inc.lr.ph: ; preds = %entry.split + br label %for.inc + +for.inc: ; preds = %for.inc.lr.ph, %for.inc + %acc.03 = phi float [ 0.000000e+00, %for.inc.lr.ph ], [ %tmp6, %for.inc ] + %m.02 = phi i64 [ 0, %for.inc.lr.ph ], [ %inc, %for.inc ] + %add = add nsw i64 %m.02, %mul + %arrayidx = getelementptr inbounds float, float* %A, i64 %add + %tmp = load float, float* %arrayidx, align 4 + %mul2 = mul nsw i64 %m.02, %N + %add3 = add nsw i64 %mul2, %call1 + %arrayidx4 = getelementptr inbounds float, float* %B, i64 %add3 + %tmp5 = load float, float* %arrayidx4, align 4 + %tmp6 = tail call float @llvm.fmuladd.f32(float %tmp, float %tmp5, float %acc.03) + %inc = add nuw nsw i64 %m.02, 1 + %exitcond = icmp ne i64 %inc, %N + br i1 %exitcond, label %for.inc, label %for.cond.for.end_crit_edge + +for.cond.for.end_crit_edge: ; preds = %for.inc + %.lcssa = phi float [ %tmp6, %for.inc ] + br label %for.end + +for.end: ; preds = %for.cond.for.end_crit_edge, %entry.split + %acc.0.lcssa = phi float [ %.lcssa, %for.cond.for.end_crit_edge ], [ 0.000000e+00, %entry.split ] + %add7 = add nsw i64 %mul, %call1 + %arrayidx8 = getelementptr inbounds float, float* %C, i64 %add7 + store float %acc.0.lcssa, float* %arrayidx8, align 4 + ret void +} + +; Function Attrs: nounwind readnone +declare i64 @_Z13get_global_idj(i32) #1 + +; Function Attrs: nounwind readnone speculatable +declare float @llvm.fmuladd.f32(float, float, float) #2 + +attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind readnone speculatable } +attributes #3 = { nounwind readnone } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 5.0.0 (trunk 303846) (llvm/trunk 303834)"} +!2 = !{i32 1, i32 1, i32 1, i32 0} +!3 = !{!"none", !"none", !"none", !"none"} +!4 = !{!"float*", !"float*", !"float*", !"long"} +!5 = !{!"", !"", !"", !""} |