diff options
6 files changed, 271 insertions, 22 deletions
diff --git a/polly/include/polly/CodeGen/IslNodeBuilder.h b/polly/include/polly/CodeGen/IslNodeBuilder.h index 562b9a0f175..99c7b9f50fb 100644 --- a/polly/include/polly/CodeGen/IslNodeBuilder.h +++ b/polly/include/polly/CodeGen/IslNodeBuilder.h @@ -204,8 +204,10 @@ protected: virtual void createFor(__isl_take isl_ast_node *For); /// @brief Preload the memory access at @p AccessRange with @p Build. + /// + /// @returns The preloaded value casted to type @p Ty Value *preloadUnconditionally(__isl_take isl_set *AccessRange, - isl_ast_build *Build); + isl_ast_build *Build, Type *Ty); /// @brief Preload the memory load access @p MA. /// @@ -218,8 +220,7 @@ protected: /// MA_preload = load MA; /// use MA_preload Value *preloadInvariantLoad(const MemoryAccess &MA, - __isl_take isl_set *Domain, - __isl_keep isl_ast_build *Build); + __isl_take isl_set *Domain); void createForVector(__isl_take isl_ast_node *For, int VectorWidth); void createForSequential(__isl_take isl_ast_node *For); diff --git a/polly/lib/CodeGen/BlockGenerators.cpp b/polly/lib/CodeGen/BlockGenerators.cpp index db5efbdf04b..fe2ed7934b0 100644 --- a/polly/lib/CodeGen/BlockGenerators.cpp +++ b/polly/lib/CodeGen/BlockGenerators.cpp @@ -541,8 +541,10 @@ void BlockGenerator::createScalarFinalization(Region &R) { Value *ScalarAddr = EscapeMappingValue.first; // Reload the demoted instruction in the optimized version of the SCoP. - Instruction *EscapeInstReload = + Value *EscapeInstReload = Builder.CreateLoad(ScalarAddr, EscapeInst->getName() + ".final_reload"); + EscapeInstReload = + Builder.CreateBitOrPointerCast(EscapeInstReload, EscapeInst->getType()); // Create the merge PHI that merges the optimized and unoptimized version. PHINode *MergePHI = PHINode::Create(EscapeInst->getType(), 2, diff --git a/polly/lib/CodeGen/IslNodeBuilder.cpp b/polly/lib/CodeGen/IslNodeBuilder.cpp index 086ff7e91d8..ea0523b4e39 100644 --- a/polly/lib/CodeGen/IslNodeBuilder.cpp +++ b/polly/lib/CodeGen/IslNodeBuilder.cpp @@ -835,18 +835,20 @@ void IslNodeBuilder::materializeParameters(isl_set *Set, bool All) { } Value *IslNodeBuilder::preloadUnconditionally(isl_set *AccessRange, - isl_ast_build *Build) { + isl_ast_build *Build, Type *Ty) { isl_pw_multi_aff *PWAccRel = isl_pw_multi_aff_from_set(AccessRange); PWAccRel = isl_pw_multi_aff_gist_params(PWAccRel, S.getContext()); isl_ast_expr *Access = isl_ast_build_access_from_pw_multi_aff(Build, PWAccRel); - return ExprBuilder.create(Access); + Value *PreloadVal = ExprBuilder.create(Access); + PreloadVal = Builder.CreateBitOrPointerCast(PreloadVal, Ty); + return PreloadVal; } Value *IslNodeBuilder::preloadInvariantLoad(const MemoryAccess &MA, - isl_set *Domain, - isl_ast_build *Build) { + isl_set *Domain) { + auto *Build = isl_ast_build_from_context(isl_set_universe(S.getParamSpace())); isl_set *AccessRange = isl_map_range(MA.getAccessRelation()); materializeParameters(AccessRange, false); @@ -854,9 +856,13 @@ Value *IslNodeBuilder::preloadInvariantLoad(const MemoryAccess &MA, bool AlwaysExecuted = isl_set_is_equal(Domain, Universe); isl_set_free(Universe); + Instruction *AccInst = MA.getAccessInstruction(); + Type *AccInstTy = AccInst->getType(); + + Value *PreloadVal; if (AlwaysExecuted) { isl_set_free(Domain); - return preloadUnconditionally(AccessRange, Build); + PreloadVal = preloadUnconditionally(AccessRange, Build, AccInstTy); } else { materializeParameters(Domain, false); @@ -890,18 +896,18 @@ Value *IslNodeBuilder::preloadInvariantLoad(const MemoryAccess &MA, Builder.CreateBr(MergeBB); Builder.SetInsertPoint(ExecBB->getTerminator()); - Instruction *AccInst = MA.getAccessInstruction(); - Type *AccInstTy = AccInst->getType(); - Value *PreAccInst = preloadUnconditionally(AccessRange, Build); + Value *PreAccInst = preloadUnconditionally(AccessRange, Build, AccInstTy); Builder.SetInsertPoint(MergeBB->getTerminator()); auto *MergePHI = Builder.CreatePHI( AccInstTy, 2, "polly.preload." + AccInst->getName() + ".merge"); MergePHI->addIncoming(PreAccInst, ExecBB); MergePHI->addIncoming(Constant::getNullValue(AccInstTy), CondBB); - - return MergePHI; + PreloadVal = MergePHI; } + + isl_ast_build_free(Build); + return PreloadVal; } void IslNodeBuilder::preloadInvariantLoads() { @@ -918,9 +924,6 @@ void IslNodeBuilder::preloadInvariantLoads() { PreLoadBB->setName("polly.preload.begin"); Builder.SetInsertPoint(PreLoadBB->begin()); - isl_ast_build *Build = - isl_ast_build_from_context(isl_set_universe(S.getParamSpace())); - // For each equivalence class of invariant loads we pre-load the representing // element with the unified execution context. However, we have to map all // elements of the class to the one preloaded load as they are referenced @@ -932,9 +935,12 @@ void IslNodeBuilder::preloadInvariantLoads() { isl_set *Domain = isl_set_copy(IAClass.second.front().second); Instruction *AccInst = MA->getAccessInstruction(); - Value *PreloadVal = preloadInvariantLoad(*MA, Domain, Build); - for (const InvariantAccessTy &IA : IAClass.second) - ValueMap[IA.first->getAccessInstruction()] = PreloadVal; + Value *PreloadVal = preloadInvariantLoad(*MA, Domain); + for (const InvariantAccessTy &IA : IAClass.second) { + Instruction *AccInst = IA.first->getAccessInstruction(); + ValueMap[AccInst] = + Builder.CreateBitOrPointerCast(PreloadVal, AccInst->getType()); + } if (SE.isSCEVable(AccInst->getType())) { isl_id *ParamId = S.getIdForParam(SE.getSCEV(AccInst)); @@ -965,8 +971,6 @@ void IslNodeBuilder::preloadInvariantLoads() { EscapeMap[AccInst] = std::make_pair(Alloca, std::move(EscapeUsers)); } - - isl_ast_build_free(Build); } void IslNodeBuilder::addParameters(__isl_take isl_set *Context) { diff --git a/polly/test/ScopInfo/invariant_load_access_classes_different_base_type.ll b/polly/test/ScopInfo/invariant_load_access_classes_different_base_type.ll new file mode 100644 index 00000000000..edcdd391288 --- /dev/null +++ b/polly/test/ScopInfo/invariant_load_access_classes_different_base_type.ll @@ -0,0 +1,66 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-codegen -S < %s +; +; struct { +; int a; +; float b; +; } S; +; +; void f(int *A) { +; for (int i = 0; i < 1000; i++) +; A[i] = S.a + S.b; +; } +; +; CHECK: Invariant Accesses: { +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_for_body[i0] -> MemRef_S[0] }; +; CHECK: Execution Context: { : } +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_for_body[i0] -> MemRef_S[1] }; +; CHECK: Execution Context: { : } +; CHECK: } +; +; CODEGEN: %S.b.preload.s2a = alloca float +; CODEGEN: %S.a.preload.s2a = alloca i32 +; +; CODEGEN: %.load = load i32, i32* getelementptr inbounds (%struct.anon, %struct.anon* @S, i32 0, i32 0) +; CODEGEN: store i32 %.load, i32* %S.a.preload.s2a +; CODEGEN: %.load1 = load i32, i32* getelementptr (i32, i32* getelementptr inbounds (%struct.anon, %struct.anon* @S, i32 0, i32 0), i64 1) +; CODEGEN: %0 = bitcast i32 %.load1 to float +; CODEGEN: store float %0, float* %S.b.preload.s2a +; +; CODEGEN: polly.stmt.for.body: +; CODEGEN: %p_conv = sitofp i32 %.load to float +; CODEGEN: %p_add = fadd float %p_conv, %0 +; CODEGEN: %p_conv1 = fptosi float %p_add to i32 + +%struct.anon = type { i32, float } + +@S = common global %struct.anon zeroinitializer, align 4 + +define void @f(i32* %A) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %exitcond = icmp ne i64 %indvars.iv, 1000 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %S.a = load i32, i32* getelementptr inbounds (%struct.anon, %struct.anon* @S, i64 0, i32 0), align 4 + %conv = sitofp i32 %S.a to float + %S.b = load float, float* getelementptr inbounds (%struct.anon, %struct.anon* @S, i64 0, i32 1), align 4 + %add = fadd float %conv, %S.b + %conv1 = fptosi float %add to i32 + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + store i32 %conv1, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} diff --git a/polly/test/ScopInfo/invariant_load_access_classes_different_base_type_escaping.ll b/polly/test/ScopInfo/invariant_load_access_classes_different_base_type_escaping.ll new file mode 100644 index 00000000000..742c7dc8df0 --- /dev/null +++ b/polly/test/ScopInfo/invariant_load_access_classes_different_base_type_escaping.ll @@ -0,0 +1,99 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-codegen -S < %s +; +; struct { +; int a; +; float b; +; } S; +; +; float f(int *A) { +; int x; +; float y; +; int i = 0; +; do { +; x = S.a; +; y = S.b; +; A[i] = x + y; +; } while (i++ < 1000); +; return x + y; +; } +; +; CHECK: Invariant Accesses: { +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_do_body[i0] -> MemRef_S[0] }; +; CHECK: Execution Context: { : } +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_do_body[i0] -> MemRef_S[1] }; +; CHECK: Execution Context: { : } +; CHECK: } +; +; CHECK: Statements { +; CHECK-NOT: Access +; CHECK: Stmt_do_body +; CHECK: Domain := +; CHECK: { Stmt_do_body[i0] : i0 <= 1000 and i0 >= 0 }; +; CHECK: Schedule := +; CHECK: { Stmt_do_body[i0] -> [i0] }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_do_body[i0] -> MemRef_A[i0] }; +; CHECK-NOT: Access +; CHECK: } +; +; CODEGEN: entry: +; CODEGEN: %S.b.preload.s2a = alloca float +; CODEGEN: %S.a.preload.s2a = alloca i32 +; +; CODEGEN: polly.preload.begin: +; CODEGEN: %.load = load i32, i32* getelementptr inbounds (%struct.anon, %struct.anon* @S, i32 0, i32 0) +; CODEGEN: %0 = bitcast i32 %.load to float +; CODEGEN: store i32 %.load, i32* %S.a.preload.s2a +; CODEGEN: %.load1 = load i32, i32* getelementptr (i32, i32* getelementptr inbounds (%struct.anon, %struct.anon* @S, i32 0, i32 0), i64 1) +; CODEGEN: %1 = bitcast i32 %.load1 to float +; CODEGEN: %2 = bitcast float %1 to i32 +; CODEGEN: store float %1, float* %S.b.preload.s2a +; +; CODEGEN: polly.merge_new_and_old: +; CODEGEN-DAG: %S.b.merge = phi float [ %S.b.final_reload, %polly.loop_exit ], [ %S.b, %do.cond ] +; CODEGEN-DAG: %S.a.merge = phi i32 [ %S.a.final_reload, %polly.loop_exit ], [ %S.a, %do.cond ] +; +; CODEGEN: do.end: +; CODEGEN: %conv3 = sitofp i32 %S.a.merge to float +; CODEGEN: %add4 = fadd float %conv3, %S.b.merge +; CODEGEN: ret float %add4 +; +; CODEGEN: polly.loop_exit: +; CODEGEN-DAG: %S.b.final_reload = load float, float* %S.b.preload.s2a +; CODEGEN-DAG: %S.a.final_reload = load i32, i32* %S.a.preload.s2a + +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +%struct.anon = type { i32, float } + +@S = common global %struct.anon zeroinitializer, align 4 + +define float @f(i32* %A) { +entry: + br label %do.body + +do.body: ; preds = %do.cond, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %do.cond ], [ 0, %entry ] + %S.a = load i32, i32* getelementptr inbounds (%struct.anon, %struct.anon* @S, i64 0, i32 0), align 4 + %S.b = load float, float* getelementptr inbounds (%struct.anon, %struct.anon* @S, i64 0, i32 1), align 4 + %conv = sitofp i32 %S.a to float + %add = fadd float %conv, %S.b + %conv1 = fptosi float %add to i32 + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + store i32 %conv1, i32* %arrayidx, align 4 + br label %do.cond + +do.cond: ; preds = %do.body + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, 1001 + br i1 %exitcond, label %do.body, label %do.end + +do.end: ; preds = %do.cond + %conv3 = sitofp i32 %S.a to float + %add4 = fadd float %conv3, %S.b + ret float %add4 +} diff --git a/polly/test/ScopInfo/invariant_load_access_classes_different_base_type_same_pointer.ll b/polly/test/ScopInfo/invariant_load_access_classes_different_base_type_same_pointer.ll new file mode 100644 index 00000000000..d3fad2277da --- /dev/null +++ b/polly/test/ScopInfo/invariant_load_access_classes_different_base_type_same_pointer.ll @@ -0,0 +1,77 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-codegen -S < %s +; +; int U; +; void f(int *A) { +; for (int i = 0; i < 1000; i++) +; A[i] = (*(int *)&U) + (int)(*(float *)&U); +; } +; +; CHECK: Invariant Accesses: { +; CHECK-NOT: ReadAccess +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_for_body[i0] -> MemRef_U[0] }; +; CHECK: Execution Context: { : } +; CHECK-NOT: ReadAccess +; CHECK: } +; +; CHECK: Statements { +; CHECK: Stmt_for_body +; CHECK: Domain := +; CHECK: { Stmt_for_body[i0] : i0 <= 999 and i0 >= 0 }; +; CHECK: Schedule := +; CHECK: { Stmt_for_body[i0] -> [i0] }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_for_body[i0] -> MemRef_A[i0] }; +; CHECK: } +; +; CODEGEN: entry: +; CODEGEN: %U.f.preload.s2a = alloca float +; CODEGEN: br label %polly.split_new_and_old +; +; CODEGEN: polly.preload.begin: +; CODEGEN: %U.load = load i32, i32* @U +; CODEGEN: %0 = bitcast i32 %U.load to float +; CODEGEN: %1 = bitcast float %0 to i32 +; CODEGEN: store float %0, float* %U.f.preload.s2a +; +; CODEGEN: polly.merge_new_and_old: +; CODEGEN-NOT: merge = phi +; +; CODEGEN: polly.loop_exit: +; CODEGEN-NOT: final_reload +; +; CODEGEN: polly.stmt.for.body: +; CODEGEN: %p_conv = fptosi float %0 to i32 +; CODEGEN: %p_add = add nsw i32 %1, %p_conv +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +@U = common global i32 0, align 4 + +define void @f(i32* %A) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %exitcond = icmp ne i64 %indvars.iv, 1000 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %U.i = load i32, i32* @U, align 4 + %U.cast = bitcast i32 *@U to float* + %U.f = load float, float* %U.cast, align 4 + %conv = fptosi float %U.f to i32 + %add = add nsw i32 %U.i, %conv + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + store i32 %add, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} |

