diff options
| author | Johannes Doerfert <doerfert@cs.uni-saarland.de> | 2015-11-11 04:30:07 +0000 |
|---|---|---|
| committer | Johannes Doerfert <doerfert@cs.uni-saarland.de> | 2015-11-11 04:30:07 +0000 |
| commit | fc4bfc465afa391da841ce58c5acff8c723425da (patch) | |
| tree | 00943ef144f189fb5ae0008e6cd51e5f2877d967 | |
| parent | d619eaaae421f70574b9618bb51f8c6959491fa5 (diff) | |
| download | bcm5719-llvm-fc4bfc465afa391da841ce58c5acff8c723425da.tar.gz bcm5719-llvm-fc4bfc465afa391da841ce58c5acff8c723425da.zip | |
[FIX] Create empty invariant equivalence classes
We now create all invariant equivalence classes for required invariant loads
instead of creating them on-demand. This way we can check if a parameter
references an invariant load that is actually not executed and was therefor
not materialized. If that happens the parameter is not materialized either.
This fixes bug 25469.
llvm-svn: 252701
| -rw-r--r-- | polly/lib/Analysis/ScopInfo.cpp | 20 | ||||
| -rw-r--r-- | polly/lib/CodeGen/IslNodeBuilder.cpp | 18 | ||||
| -rw-r--r-- | polly/test/Isl/CodeGen/invariant_load_not_executed_but_in_parameters.ll | 115 | ||||
| -rw-r--r-- | polly/test/ScopInfo/invariant_loads_complicated_dependences.ll | 6 |
4 files changed, 145 insertions, 14 deletions
diff --git a/polly/lib/Analysis/ScopInfo.cpp b/polly/lib/Analysis/ScopInfo.cpp index 6027cf3481a..4920b118187 100644 --- a/polly/lib/Analysis/ScopInfo.cpp +++ b/polly/lib/Analysis/ScopInfo.cpp @@ -1637,10 +1637,14 @@ void Scop::buildInvariantEquivalenceClasses() { const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand()); LoadInst *&ClassRep = EquivClasses[PointerSCEV]; - if (!ClassRep) - ClassRep = LInst; - else + if (ClassRep) { InvEquivClassVMap[LInst] = ClassRep; + continue; + } + + ClassRep = LInst; + InvariantEquivClasses.emplace_back(PointerSCEV, MemoryAccessList(), + nullptr); } } @@ -2656,8 +2660,11 @@ void Scop::addInvariantLoads(ScopStmt &Stmt, MemoryAccessList &InvMAs) { // Unify the execution context of the class and this statement. isl_set *&IAClassDomainCtx = std::get<2>(IAClass); - IAClassDomainCtx = isl_set_coalesce( - isl_set_union(IAClassDomainCtx, isl_set_copy(DomainCtx))); + if (IAClassDomainCtx) + IAClassDomainCtx = isl_set_coalesce( + isl_set_union(IAClassDomainCtx, isl_set_copy(DomainCtx))); + else + IAClassDomainCtx = isl_set_copy(DomainCtx); break; } @@ -2760,9 +2767,6 @@ void Scop::hoistInvariantLoads() { } isl_union_map_free(Writes); - if (!InvariantEquivClasses.empty()) - IsOptimized = true; - auto &ScopRIL = *SD.getRequiredInvariantLoads(&getRegion()); // Check required invariant loads that were tagged during SCoP detection. for (LoadInst *LI : ScopRIL) { diff --git a/polly/lib/CodeGen/IslNodeBuilder.cpp b/polly/lib/CodeGen/IslNodeBuilder.cpp index 639c29ce543..ab762e6797a 100644 --- a/polly/lib/CodeGen/IslNodeBuilder.cpp +++ b/polly/lib/CodeGen/IslNodeBuilder.cpp @@ -819,6 +819,7 @@ bool IslNodeBuilder::materializeValue(isl_id *Id) { // If the Id is already mapped, skip it. if (!IDToValue.count(Id)) { auto *ParamSCEV = (const SCEV *)isl_id_get_user(Id); + Value *V = nullptr; // Parameters could refere to invariant loads that need to be // preloaded before we can generate code for the parameter. Thus, @@ -827,13 +828,22 @@ bool IslNodeBuilder::materializeValue(isl_id *Id) { SetVector<Value *> Values; findValues(ParamSCEV, Values); for (auto *Val : Values) - if (const auto *IAClass = S.lookupInvariantEquivClass(Val)) + if (const auto *IAClass = S.lookupInvariantEquivClass(Val)) { + + // Check if this invariant access class is empty, hence if we never + // actually added a loads instruction to it. In that case it has no + // (meaningful) users and we should not try to code generate it. + if (std::get<1>(*IAClass).empty()) + V = UndefValue::get(ParamSCEV->getType()); + if (!preloadInvariantEquivClass(*IAClass)) { isl_id_free(Id); return false; } + } - auto *V = generateSCEV(ParamSCEV); + if (!V) + V = generateSCEV(ParamSCEV); IDToValue[Id] = V; } @@ -947,7 +957,9 @@ bool IslNodeBuilder::preloadInvariantEquivClass( // elements of the class to the one preloaded load as they are referenced // during the code generation and therefor need to be mapped. const MemoryAccessList &MAs = std::get<1>(IAClass); - assert(!MAs.empty()); + if (MAs.empty()) + return true; + MemoryAccess *MA = MAs.front(); assert(MA->isExplicit() && MA->isRead()); diff --git a/polly/test/Isl/CodeGen/invariant_load_not_executed_but_in_parameters.ll b/polly/test/Isl/CodeGen/invariant_load_not_executed_but_in_parameters.ll new file mode 100644 index 00000000000..77a3341d71a --- /dev/null +++ b/polly/test/Isl/CodeGen/invariant_load_not_executed_but_in_parameters.ll @@ -0,0 +1,115 @@ +; RUN: opt %loadPolly -polly-codegen -analyze < %s +; +; Check that this does not crash as the invariant load is not executed (thus +; not preloaded) but still referenced by one of the parameters. +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +%struct.Exp.204.248.358 = type { %struct.Exp_.200.244.354*, i32, i32, i32, %struct.Exp.204.248.358*, %struct.Exp.204.248.358*, %union.anon.2.201.245.355, %union.anon.3.202.246.356, %union.anon.4.203.247.357 } +%struct.Exp_.200.244.354 = type { i32, i32, i32, i32, %struct.Id.199.243.353* } +%struct.Id.199.243.353 = type { i8*, i32, i32, i32, %union.anon.1.198.242.352 } +%union.anon.1.198.242.352 = type { [2 x i64] } +%union.anon.2.201.245.355 = type { %struct.Exp.204.248.358* } +%union.anon.3.202.246.356 = type { i32 } +%union.anon.4.203.247.357 = type { %struct.Exp.204.248.358** } +%struct.Classfile.218.262.372 = type { %struct._IO_FILE.206.250.360*, %struct._IO_FILE.206.250.360*, i32, i32, i32, %struct.ClassVersion.207.251.361, %struct.ConstPool.210.254.364, %struct.AccessFlags.211.255.365, i16, i8*, i8*, i16, i8*, i16, i16*, i16, %struct.field_info.212.256.366**, i16, %struct.method_info.217.261.371**, i8*, i16, i8**, i8* } +%struct._IO_FILE.206.250.360 = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker.205.249.359*, %struct._IO_FILE.206.250.360*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +%struct._IO_marker.205.249.359 = type { %struct._IO_marker.205.249.359*, %struct._IO_FILE.206.250.360*, i32 } +%struct.ClassVersion.207.251.361 = type { i16, i16 } +%struct.ConstPool.210.254.364 = type { i16, %struct.cp_info.209.253.363* } +%struct.cp_info.209.253.363 = type { i8, %union.anon.208.252.362 } +%union.anon.208.252.362 = type { i64 } +%struct.AccessFlags.211.255.365 = type { i16 } +%struct.field_info.212.256.366 = type <{ %struct.AccessFlags.211.255.365, [6 x i8], i8*, i8*, i32, i16, [2 x i8] }> +%struct.method_info.217.261.371 = type { %struct.AccessFlags.211.255.365, i8*, i8*, i8, i8, i32, i8*, i16, %struct.Block.214.258.368*, i16, %struct.LineNumberTableEntry.215.259.369*, i16, %struct.LocalVariableTableEntry.216.260.370*, i8**, i8**, i32*, i32*, i8*, i32, i32, i32* } +%struct.Block.214.258.368 = type { i32, i16, i16, %union.anon.0.213.257.367, i16, %struct.Exp.204.248.358* } +%union.anon.0.213.257.367 = type { i32 } +%struct.LineNumberTableEntry.215.259.369 = type { i16, i16 } +%struct.LocalVariableTableEntry.216.260.370 = type { i16, i16, i16, i16, i16 } +%struct.Case.219.263.373 = type { i64, i64 } + +@currpc = external global i32, align 4 +@bufflength = external global i32, align 4 +@inbuff = external global i8*, align 8 +@stkptr = external global %struct.Exp.204.248.358**, align 8 +@donestkptr = external global %struct.Exp.204.248.358**, align 8 + +; Function Attrs: uwtable +define i32 @_Z13dotableswitchP9Classfile(%struct.Classfile.218.262.372* %c) #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +entry: + br label %entry.split + +entry.split: ; preds = %entry + %sub = add i32 0, -1 + %tobool.5 = icmp eq i32 0, 0 + br i1 %tobool.5, label %while.end, label %while.body.lr.ph + +while.body.lr.ph: ; preds = %entry.split + br label %while.body + +while.body: ; preds = %while.body, %while.body.lr.ph + %0 = load i32, i32* @currpc, align 4 + %rem = and i32 %0, 3 + %tobool = icmp eq i32 %rem, 0 + br i1 %tobool, label %while.cond.while.end_crit_edge, label %while.body + +while.cond.while.end_crit_edge: ; preds = %while.body + br label %while.end + +while.end: ; preds = %while.cond.while.end_crit_edge, %entry.split + invoke void @_ZN3ExpC2Ejj7Exptype4Type2OpPS_jjP4Case(%struct.Exp.204.248.358* nonnull undef, i32 %sub, i32 undef, i32 9, i32 0, i32 39, %struct.Exp.204.248.358* undef, i32 undef, i32 undef, %struct.Case.219.263.373* nonnull undef) + to label %invoke.cont unwind label %lpad + +invoke.cont: ; preds = %while.end + br i1 undef, label %for.end, label %for.body.lr.ph + +for.body.lr.ph: ; preds = %invoke.cont + br label %for.body + +for.body: ; preds = %for.body, %for.body.lr.ph + br i1 undef, label %for.cond.for.end_crit_edge, label %for.body + +lpad: ; preds = %while.end + %1 = landingpad { i8*, i32 } + cleanup + resume { i8*, i32 } undef + +for.cond.for.end_crit_edge: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.cond.for.end_crit_edge, %invoke.cont + ret i32 0 +} + +; Function Attrs: nounwind readnone +declare { i64, i1 } @llvm.umul.with.overflow.i64(i64, i64) #1 + +; Function Attrs: nobuiltin +declare noalias i8* @_Znam(i64) #2 + +; Function Attrs: nobuiltin +declare noalias i8* @_Znwm(i64) #2 + +; Function Attrs: uwtable +declare void @_ZN3ExpC2Ejj7Exptype4Type2OpPS_jjP4Case(%struct.Exp.204.248.358*, i32, i32, i32, i32, i32, %struct.Exp.204.248.358*, i32, i32, %struct.Case.219.263.373*) unnamed_addr #0 align 2 + +declare i32 @__gxx_personality_v0(...) + +; Function Attrs: nobuiltin nounwind +declare void @_ZdlPv(i8*) #3 + +; Function Attrs: uwtable +declare i32 @_Z10doluswitchP9Classfile(%struct.Classfile.218.262.372*) #0 + +; Function Attrs: nounwind uwtable +declare void @_ZN4Exp_C2E7Exptype4Type2Op(%struct.Exp_.200.244.354*, i32, i32, i32) unnamed_addr #4 align 2 + +attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone } +attributes #2 = { nobuiltin "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { nobuiltin nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #4 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 3.8.0 (http://llvm.org/git/clang.git 02f36bd65b1133fbfa8bf1470105c02d9bda35e7) (http://llvm.org/git/llvm.git 01eac645064dde04ea609a8a742b1b996ef79179)"} diff --git a/polly/test/ScopInfo/invariant_loads_complicated_dependences.ll b/polly/test/ScopInfo/invariant_loads_complicated_dependences.ll index 3db78d85e97..f2bc438139c 100644 --- a/polly/test/ScopInfo/invariant_loads_complicated_dependences.ll +++ b/polly/test/ScopInfo/invariant_loads_complicated_dependences.ll @@ -5,14 +5,14 @@ ; CHECK-NEXT: [LB, UB] -> { Stmt_for_body[i0] -> MemRef_LBptr[0] }; ; CHECK-NEXT: Execution Context: [LB, UB] -> { : } ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [LB, UB] -> { Stmt_do_cond[i0, i1] -> MemRef_UBptr[0] }; +; CHECK-NEXT: Execution Context: [LB, UB] -> { : } +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: [LB, UB] -> { Stmt_if_then[i0, i1] -> MemRef_V[0] }; ; CHECK-NEXT: Execution Context: [LB, UB] -> { : (UB >= 1 + LB and UB >= 6) or LB >= 6 } ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: [LB, UB] -> { Stmt_if_else[i0, i1] -> MemRef_U[0] }; ; CHECK-NEXT: Execution Context: [LB, UB] -> { : LB <= 5 } -; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: [LB, UB] -> { Stmt_do_cond[i0, i1] -> MemRef_UBptr[0] }; -; CHECK-NEXT: Execution Context: [LB, UB] -> { : } ; CHECK-NEXT: } ; ; void f(int *restrict A, int *restrict V, int *restrict U, int *restrict UB, |

