diff options
author | Johannes Doerfert <doerfert@cs.uni-saarland.de> | 2016-02-07 17:30:13 +0000 |
---|---|---|
committer | Johannes Doerfert <doerfert@cs.uni-saarland.de> | 2016-02-07 17:30:13 +0000 |
commit | 96e547113926e78c08330664f796e36894ff44a9 (patch) | |
tree | cc9f3ab37ea9413efa947fde4642172120d6ecbc | |
parent | 57b627b41cb716e9d2653839be57e5b7b3208781 (diff) | |
download | bcm5719-llvm-96e547113926e78c08330664f796e36894ff44a9.tar.gz bcm5719-llvm-96e547113926e78c08330664f796e36894ff44a9.zip |
Separate invariant equivalence classes by type
We now distinguish invariant loads to the same memory location if they
have different types. This will cause us to pre-load an invariant
location once for each type that is used to access it. However, we can
thereby avoid invalid casting, especially if an array is accessed
though different typed/sized invariant loads.
This basically reverts the changes in r260023 but keeps the test
cases.
llvm-svn: 260045
7 files changed, 47 insertions, 56 deletions
diff --git a/polly/include/polly/CodeGen/IslNodeBuilder.h b/polly/include/polly/CodeGen/IslNodeBuilder.h index 330a19862d4..7d27107ca56 100644 --- a/polly/include/polly/CodeGen/IslNodeBuilder.h +++ b/polly/include/polly/CodeGen/IslNodeBuilder.h @@ -209,7 +209,9 @@ protected: virtual void createFor(__isl_take isl_ast_node *For); /// @brief Set to remember materialized invariant loads. - SmallPtrSet<const SCEV *, 16> PreloadedPtrs; + /// + /// An invariant load is identified by its pointer (the SCEV) and its type. + SmallSet<std::pair<const SCEV *, Type *>, 16> PreloadedPtrs; /// @brief Preload the memory access at @p AccessRange with @p Build. /// diff --git a/polly/include/polly/ScopInfo.h b/polly/include/polly/ScopInfo.h index 71bb5d55286..345a32be4e7 100644 --- a/polly/include/polly/ScopInfo.h +++ b/polly/include/polly/ScopInfo.h @@ -850,11 +850,13 @@ using MemoryAccessList = std::forward_list<MemoryAccess *>; /// The first element is the SCEV for the pointer/location that identifies this /// equivalence class. The second is a list of memory accesses to that location /// that are now treated as invariant and hoisted during code generation. The -/// last element is the execution context under which the invariant memory +/// third element is the execution context under which the invariant memory /// location is accessed, hence the union of all domain contexts for the memory -/// accesses in the list. +/// accesses in the list. The last element describes the type of the invariant +/// accesss in order to differentiate between different typed invariant loads of +/// the same location. using InvariantEquivClassTy = - std::tuple<const SCEV *, MemoryAccessList, isl_set *>; + std::tuple<const SCEV *, MemoryAccessList, isl_set *, Type *>; /// @brief Type for invariant accesses equivalence classes. using InvariantEquivClassesTy = SmallVector<InvariantEquivClassTy, 8>; diff --git a/polly/lib/Analysis/ScopInfo.cpp b/polly/lib/Analysis/ScopInfo.cpp index 23c14591912..e33f9bfac8f 100644 --- a/polly/lib/Analysis/ScopInfo.cpp +++ b/polly/lib/Analysis/ScopInfo.cpp @@ -1827,21 +1827,22 @@ void Scop::addUserContext() { } void Scop::buildInvariantEquivalenceClasses() { - DenseMap<const SCEV *, LoadInst *> EquivClasses; + DenseMap<std::pair<const SCEV *, Type *>, LoadInst *> EquivClasses; const InvariantLoadsSetTy &RIL = *SD.getRequiredInvariantLoads(&getRegion()); for (LoadInst *LInst : RIL) { const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand()); - LoadInst *&ClassRep = EquivClasses[PointerSCEV]; + Type *Ty = LInst->getType(); + LoadInst *&ClassRep = EquivClasses[std::make_pair(PointerSCEV, Ty)]; if (ClassRep) { InvEquivClassVMap[LInst] = ClassRep; continue; } ClassRep = LInst; - InvariantEquivClasses.emplace_back(PointerSCEV, MemoryAccessList(), - nullptr); + InvariantEquivClasses.emplace_back(PointerSCEV, MemoryAccessList(), nullptr, + Ty); } } @@ -2852,9 +2853,10 @@ const InvariantEquivClassTy *Scop::lookupInvariantEquivClass(Value *Val) const { if (Value *Rep = InvEquivClassVMap.lookup(LInst)) LInst = cast<LoadInst>(Rep); + Type *Ty = LInst->getType(); const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand()); for (auto &IAClass : InvariantEquivClasses) - if (PointerSCEV == std::get<0>(IAClass)) + if (PointerSCEV == std::get<0>(IAClass) && Ty == std::get<3>(IAClass)) return &IAClass; return nullptr; @@ -2897,11 +2899,12 @@ void Scop::addInvariantLoads(ScopStmt &Stmt, MemoryAccessList &InvMAs) { // MA and if found consolidate them. Otherwise create a new equivalence // class at the end of InvariantEquivClasses. LoadInst *LInst = cast<LoadInst>(MA->getAccessInstruction()); + Type *Ty = LInst->getType(); const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand()); bool Consolidated = false; for (auto &IAClass : InvariantEquivClasses) { - if (PointerSCEV != std::get<0>(IAClass)) + if (PointerSCEV != std::get<0>(IAClass) || Ty != std::get<3>(IAClass)) continue; Consolidated = true; @@ -2926,7 +2929,7 @@ void Scop::addInvariantLoads(ScopStmt &Stmt, MemoryAccessList &InvMAs) { // If we did not consolidate MA, thus did not find an equivalence class // for it, we create a new one. InvariantEquivClasses.emplace_back(PointerSCEV, MemoryAccessList{MA}, - isl_set_copy(DomainCtx)); + isl_set_copy(DomainCtx), Ty); } isl_set_free(DomainCtx); @@ -2971,15 +2974,6 @@ bool Scop::isHoistableAccess(MemoryAccess *Access, isl_map *AccessRelation = Access->getAccessRelation(); - // Invariant load hoisting of memory accesses with non-canonical element - // types lacks support for equivalence classes that contain elements of - // different width/size. Hence, do not yet consider loads with non-canonical - // element size for load hoisting. - if (!isl_map_is_single_valued(AccessRelation)) { - isl_map_free(AccessRelation); - return false; - } - // Skip accesses that have an empty access relation. These can be caused // by multiple offsets with a type cast in-between that cause the overall // byte offset to be not divisible by the new types sizes. diff --git a/polly/lib/CodeGen/IslNodeBuilder.cpp b/polly/lib/CodeGen/IslNodeBuilder.cpp index d8ec5b0c7b2..50435b67a9a 100644 --- a/polly/lib/CodeGen/IslNodeBuilder.cpp +++ b/polly/lib/CodeGen/IslNodeBuilder.cpp @@ -1015,7 +1015,8 @@ bool IslNodeBuilder::preloadInvariantEquivClass( // Check for recurrsion which can be caused by additional constraints, e.g., // non-finitie loop contraints. In such a case we have to bail out and insert // a "false" runtime check that will cause the original code to be executed. - if (!PreloadedPtrs.insert(std::get<0>(IAClass)).second) + auto PtrId = std::make_pair(std::get<0>(IAClass), std::get<3>(IAClass)); + if (!PreloadedPtrs.insert(PtrId).second) return false; // If the base pointer of this class is dependent on another one we have to @@ -1033,13 +1034,10 @@ bool IslNodeBuilder::preloadInvariantEquivClass( if (!PreloadVal) return false; - assert(PreloadVal->getType() == AccInst->getType()); for (const MemoryAccess *MA : MAs) { Instruction *MAAccInst = MA->getAccessInstruction(); - // TODO: The bitcast here is wrong. In case of floating and non-floating - // point values we need to reload the value or convert it. - ValueMap[MAAccInst] = - Builder.CreateBitOrPointerCast(PreloadVal, MAAccInst->getType()); + assert(PreloadVal->getType() == MAAccInst->getType()); + ValueMap[MAAccInst] = PreloadVal; } if (SE.isSCEVable(AccInstTy)) { @@ -1063,11 +1061,8 @@ bool IslNodeBuilder::preloadInvariantEquivClass( // should only change the base pointer of the derived SAI if we actually // preloaded it. if (BasePtr == MA->getBaseAddr()) { - // TODO: The bitcast here is wrong. In case of floating and non-floating - // point values we need to reload the value or convert it. - BasePtr = - Builder.CreateBitOrPointerCast(PreloadVal, BasePtr->getType()); - DerivedSAI->setBasePtr(BasePtr); + assert(BasePtr->getType() == PreloadVal->getType()); + DerivedSAI->setBasePtr(PreloadVal); } // For scalar derived SAIs we remap the alloca used for the derived value. diff --git a/polly/test/Isl/CodeGen/multiple-types-invariant-load.ll b/polly/test/Isl/CodeGen/multiple-types-invariant-load.ll index e69c3f806fa..71d1be2dedd 100644 --- a/polly/test/Isl/CodeGen/multiple-types-invariant-load.ll +++ b/polly/test/Isl/CodeGen/multiple-types-invariant-load.ll @@ -1,18 +1,9 @@ -; RUN: opt %loadPolly -polly-codegen -S < %s | FileCheck %s - -; Invariant loads with non-canonical types are not yet fully supported. - -; XFAIL: * +; RUN: opt %loadPolly -polly-allow-differing-element-types -polly-codegen -S < %s | FileCheck %s ; CHECK: %polly.access.cast.global.load = bitcast %struct.hoge* %global.load to i32* ; CHECK: %polly.access.global.load = getelementptr i32, i32* %polly.access.cast.global.load, i64 0 ; CHECK: %polly.access.global.load.load = load i32, i32* %polly.access.global.load -; CHECK: %polly.access.cast.global.load1 = bitcast %struct.hoge* %global.load to i32* -; CHECK: %polly.access.global.load2 = getelementptr i32, i32* %polly.access.cast.global.load1, i64 2 -; CHECK: %polly.access.global.load2.cast = bitcast i32* %polly.access.global.load2 to double* -; CHECK: %polly.access.global.load2.load = load double, double* %polly.access.global.load2.cast - target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/polly/test/ScopInfo/invariant_load_access_classes_different_base_type_same_pointer.ll b/polly/test/ScopInfo/invariant_load_access_classes_different_base_type_same_pointer.ll index 3b6b24d16d6..3df1490509e 100644 --- a/polly/test/ScopInfo/invariant_load_access_classes_different_base_type_same_pointer.ll +++ b/polly/test/ScopInfo/invariant_load_access_classes_different_base_type_same_pointer.ll @@ -11,6 +11,9 @@ ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: { Stmt_for_body[i0] -> MemRef_U[0] }; ; CHECK-NEXT: Execution Context: { : } +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body[i0] -> MemRef_U[0] }; +; CHECK-NEXT: Execution Context: { : } ; CHECK-NEXT: } ; ; CHECK: Statements { @@ -24,13 +27,15 @@ ; CHECK-NEXT: } ; ; CODEGEN: entry: -; CODEGEN: %U.f.preload.s2a = alloca float +; CODEGEN-DAG: %U.f.preload.s2a = alloca float +; CODEGEN-DAG: %U.i.preload.s2a = alloca i32 ; CODEGEN: br label %polly.split_new_and_old ; ; CODEGEN: polly.preload.begin: -; CODEGEN: %U.load = load float, float* bitcast (i32* @U to float*) -; CODEGEN: %0 = bitcast float %U.load to i32 -; CODEGEN: store float %U.load, float* %U.f.preload.s2a +; CODEGEN-DAG: %U.load[[f:[.0-9]*]] = load float, float* bitcast (i32* @U to float*) +; CODEGEN-DAG: store float %U.load[[f]], float* %U.f.preload.s2a +; CODEGEN-DAG: %U.load[[i:[.0-9]*]] = load i32, i32* @U +; CODEGEN-DAG: store i32 %U.load[[i]], i32* %U.i.preload.s2a ; ; CODEGEN: polly.merge_new_and_old: ; CODEGEN-NOT: merge = phi @@ -39,8 +44,7 @@ ; CODEGEN-NOT: final_reload ; ; CODEGEN: polly.stmt.for.body: -; CODEGEN: %p_conv = fptosi float %U.load to i32 -; CODEGEN: %p_add = add nsw i32 %0, %p_conv +; CODEGEN: %p_add = add nsw i32 %U.load[[i]], %p_conv ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/polly/test/ScopInfo/invariant_load_access_classes_different_base_type_same_pointer_escaping.ll b/polly/test/ScopInfo/invariant_load_access_classes_different_base_type_same_pointer_escaping.ll index b208859d1f5..2e7696fb7f8 100644 --- a/polly/test/ScopInfo/invariant_load_access_classes_different_base_type_same_pointer_escaping.ll +++ b/polly/test/ScopInfo/invariant_load_access_classes_different_base_type_same_pointer_escaping.ll @@ -16,6 +16,9 @@ ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: { Stmt_do_body[i0] -> MemRef_U[0] }; ; CHECK-NEXT: Execution Context: { : } +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_do_body[i0] -> MemRef_U[0] }; +; CHECK-NEXT: Execution Context: { : } ; CHECK-NEXT: } ; ; CHECK: Statements { @@ -29,26 +32,26 @@ ; CHECK-NEXT: } ; ; CODEGEN: entry: -; CODEGEN: %U.f.preload.s2a = alloca float +; CODEGEN-DAG: %U.f.preload.s2a = alloca float +; CODEGEN-DAG: %U.i.preload.s2a = alloca i32 ; CODEGEN: br label %polly.split_new_and_old ; ; CODEGEN: polly.preload.begin: -; CODEGEN: %U.load = load float, float* bitcast (i32* @U to float*) -; CODEGEN: %0 = bitcast float %U.load to i32 -; CODEGEN: store float %U.load, float* %U.f.preload.s2a +; CODEGEN-DAG: %U.load[[f:[.0-9]*]] = load float, float* bitcast (i32* @U to float*) +; CODEGEN-DAG: store float %U.load[[f]], float* %U.f.preload.s2a +; CODEGEN-DAG: %U.load[[i:[.0-9]*]] = load i32, i32* @U +; CODEGEN-DAG: store i32 %U.load[[i]], i32* %U.i.preload.s2a ; ; CODEGEN: polly.merge_new_and_old: ; CODEGEN-DAG: %U.f.merge = phi float [ %U.f.final_reload, %polly.exiting ], [ %U.f, %do.cond ] -; CODEGEN-DAG: %U.i.merge = phi i32 [ %5, %polly.exiting ], [ %U.i, %do.cond ] +; CODEGEN-DAG: %U.i.merge = phi i32 [ %U.i.final_reload, %polly.exiting ], [ %U.i, %do.cond ] ; ; CODEGEN: polly.loop_exit: ; CODEGEN-DAG: %U.f.final_reload = load float, float* %U.f.preload.s2a -; CODEGEN-DAG: %U.i.final_reload = load float, float* %U.f.preload.s2a -; CODEGEN-DAG: %5 = bitcast float %U.i.final_reload to i32 +; CODEGEN-DAG: %U.i.final_reload = load i32, i32* %U.i.preload.s2a ; ; CODEGEN: polly.stmt.do.body: -; CODEGEN: %p_conv = fptosi float %U.load to i32 -; CODEGEN: %p_add = add nsw i32 %0, %p_conv +; CODEGEN: %p_add = add nsw i32 %U.load[[i]], %p_conv ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" |