summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--polly/include/polly/CodeGen/IRBuilder.h24
-rw-r--r--polly/lib/CodeGen/IRBuilder.cpp61
-rw-r--r--polly/lib/CodeGen/IslNodeBuilder.cpp4
-rw-r--r--polly/lib/Transform/ScheduleOptimizer.cpp16
-rw-r--r--polly/test/ScheduleOptimizer/mat_mul_pattern_data_layout_2.ll1
-rw-r--r--polly/test/ScheduleOptimizer/pattern-matching-based-opts_10.ll70
-rw-r--r--polly/test/ScheduleOptimizer/pattern-matching-based-opts_3.ll2
-rw-r--r--polly/test/ScheduleOptimizer/pattern-matching-based-opts_5.ll1
8 files changed, 174 insertions, 5 deletions
diff --git a/polly/include/polly/CodeGen/IRBuilder.h b/polly/include/polly/CodeGen/IRBuilder.h
index 97adbdf49ba..23a12f45dd2 100644
--- a/polly/include/polly/CodeGen/IRBuilder.h
+++ b/polly/include/polly/CodeGen/IRBuilder.h
@@ -80,7 +80,20 @@ public:
/// Delete the set of alternative alias bases
void resetAlternativeAliasBases() { AlternativeAliasBases.clear(); }
+ /// Add inter iteration alias-free base pointer @p BasePtr.
+ void addInterIterationAliasFreeBasePtr(llvm::Value *BasePtr);
+
private:
+ /// Annotate with the second level alias metadata
+ ///
+ /// Annotate the instruction @p I with the second level alias metadata
+ /// to distinguish the individual non-aliasing accesses that have inter
+ /// iteration alias-free base pointers.
+ ///
+ /// @param I The instruction to be annotated.
+ /// @param BasePtr The base pointer of @p I.
+ void annotateSecondLevel(llvm::Instruction *I, llvm::Value *BasePtr);
+
/// The ScalarEvolution analysis we use to find base pointers.
llvm::ScalarEvolution *SE;
@@ -100,6 +113,17 @@ private:
llvm::DenseMap<llvm::AssertingVH<llvm::Value>, llvm::MDNode *>
OtherAliasScopeListMap;
+ /// A map from pointers to second level alias scopes.
+ llvm::DenseMap<llvm::AssertingVH<llvm::Value>, llvm::MDNode *>
+ SecondLevelAliasScopeMap;
+
+ /// A map from pointers to second level alias scope list of other pointers.
+ llvm::DenseMap<llvm::AssertingVH<llvm::Value>, llvm::MDNode *>
+ SecondLevelOtherAliasScopeListMap;
+
+ /// Inter iteration alias-free base pointers.
+ llvm::SmallPtrSet<llvm::Value *, 4> InterIterationAliasFreeBasePtrs;
+
llvm::DenseMap<llvm::AssertingVH<llvm::Value>, llvm::AssertingVH<llvm::Value>>
AlternativeAliasBases;
};
diff --git a/polly/lib/CodeGen/IRBuilder.cpp b/polly/lib/CodeGen/IRBuilder.cpp
index 9b3602d0138..10775156595 100644
--- a/polly/lib/CodeGen/IRBuilder.cpp
+++ b/polly/lib/CodeGen/IRBuilder.cpp
@@ -115,6 +115,49 @@ void ScopAnnotator::annotateLoopLatch(BranchInst *B, Loop *L,
B->setMetadata("llvm.loop", Id);
}
+/// Get the pointer operand
+///
+/// @param Inst The instruction to be analyzed.
+/// @return the pointer operand in case @p Inst is a memory access
+/// instruction and nullptr otherwise.
+static llvm::Value *getMemAccInstPointerOperand(Instruction *Inst) {
+ auto MemInst = MemAccInst::dyn_cast(Inst);
+ if (!MemInst)
+ return nullptr;
+
+ return MemInst.getPointerOperand();
+}
+
+void ScopAnnotator::annotateSecondLevel(llvm::Instruction *Inst,
+ llvm::Value *BasePtr) {
+ auto *Ptr = getMemAccInstPointerOperand(Inst);
+ if (!Ptr)
+ return;
+ auto SecondLevelAliasScope = SecondLevelAliasScopeMap.lookup(Ptr);
+ auto SecondLevelOtherAliasScopeList =
+ SecondLevelOtherAliasScopeListMap.lookup(Ptr);
+ if (!SecondLevelAliasScope) {
+ auto AliasScope = AliasScopeMap.lookup(BasePtr);
+ if (!AliasScope)
+ return;
+ LLVMContext &Ctx = SE->getContext();
+ SecondLevelAliasScope = getID(
+ Ctx, AliasScope, MDString::get(Ctx, "second level alias metadata"));
+ SecondLevelAliasScopeMap[Ptr] = SecondLevelAliasScope;
+ Metadata *Args = {SecondLevelAliasScope};
+ auto SecondLevelBasePtrAliasScopeList =
+ SecondLevelAliasScopeMap.lookup(BasePtr);
+ SecondLevelAliasScopeMap[BasePtr] = MDNode::concatenate(
+ SecondLevelBasePtrAliasScopeList, MDNode::get(Ctx, Args));
+ auto OtherAliasScopeList = OtherAliasScopeListMap.lookup(BasePtr);
+ SecondLevelOtherAliasScopeList = MDNode::concatenate(
+ OtherAliasScopeList, SecondLevelBasePtrAliasScopeList);
+ SecondLevelOtherAliasScopeListMap[Ptr] = SecondLevelOtherAliasScopeList;
+ }
+ Inst->setMetadata("alias.scope", SecondLevelAliasScope);
+ Inst->setMetadata("noalias", SecondLevelOtherAliasScopeList);
+}
+
void ScopAnnotator::annotate(Instruction *Inst) {
if (!Inst->mayReadOrWriteMemory())
return;
@@ -126,11 +169,7 @@ void ScopAnnotator::annotate(Instruction *Inst) {
if (!AliasScopeDomain)
return;
- auto MemInst = MemAccInst::dyn_cast(Inst);
- if (!MemInst)
- return;
-
- auto *Ptr = MemInst.getPointerOperand();
+ auto *Ptr = getMemAccInstPointerOperand(Inst);
if (!Ptr)
return;
@@ -162,6 +201,18 @@ void ScopAnnotator::annotate(Instruction *Inst) {
"BasePtr either expected in AliasScopeMap and OtherAlias...Map");
auto *OtherAliasScopeList = OtherAliasScopeListMap[BasePtr];
+ if (InterIterationAliasFreeBasePtrs.count(BasePtr)) {
+ annotateSecondLevel(Inst, BasePtr);
+ return;
+ }
+
Inst->setMetadata("alias.scope", AliasScope);
Inst->setMetadata("noalias", OtherAliasScopeList);
}
+
+void ScopAnnotator::addInterIterationAliasFreeBasePtr(llvm::Value *BasePtr) {
+ if (!BasePtr)
+ return;
+
+ InterIterationAliasFreeBasePtrs.insert(BasePtr);
+}
diff --git a/polly/lib/CodeGen/IslNodeBuilder.cpp b/polly/lib/CodeGen/IslNodeBuilder.cpp
index 44f831e871d..1d2f546c46b 100644
--- a/polly/lib/CodeGen/IslNodeBuilder.cpp
+++ b/polly/lib/CodeGen/IslNodeBuilder.cpp
@@ -381,6 +381,10 @@ void IslNodeBuilder::createMark(__isl_take isl_ast_node *Node) {
isl_id_free(Id);
return;
}
+ if (!strcmp(isl_id_get_name(Id), "Inter iteration alias-free")) {
+ auto *BasePtr = static_cast<Value *>(isl_id_get_user(Id));
+ Annotator.addInterIterationAliasFreeBasePtr(BasePtr);
+ }
create(Child);
isl_id_free(Id);
}
diff --git a/polly/lib/Transform/ScheduleOptimizer.cpp b/polly/lib/Transform/ScheduleOptimizer.cpp
index a880aa8238a..e1d8fd37f92 100644
--- a/polly/lib/Transform/ScheduleOptimizer.cpp
+++ b/polly/lib/Transform/ScheduleOptimizer.cpp
@@ -1248,10 +1248,26 @@ isolateAndUnrollMatMulInnerLoops(__isl_take isl_schedule_node *Node,
return Node;
}
+/// Mark @p BasePtr with "Inter iteration alias-free" mark node.
+///
+/// @param Node The child of the mark node to be inserted.
+/// @param BasePtr The pointer to be marked.
+/// @return The modified isl_schedule_node.
+static isl_schedule_node *markInterIterationAliasFree(isl_schedule_node *Node,
+ llvm::Value *BasePtr) {
+ if (!BasePtr)
+ return Node;
+
+ auto *Ctx = isl_schedule_node_get_ctx(Node);
+ auto *Id = isl_id_alloc(Ctx, "Inter iteration alias-free", BasePtr);
+ return isl_schedule_node_child(isl_schedule_node_insert_mark(Node, Id), 0);
+}
+
__isl_give isl_schedule_node *ScheduleTreeOptimizer::optimizeMatMulPattern(
__isl_take isl_schedule_node *Node, const llvm::TargetTransformInfo *TTI,
MatMulInfoTy &MMI) {
assert(TTI && "The target transform info should be provided.");
+ Node = markInterIterationAliasFree(Node, MMI.WriteToC->getLatestBaseAddr());
int DimOutNum = isl_schedule_node_band_n_member(Node);
assert(DimOutNum > 2 && "In case of the matrix multiplication the loop nest "
"and, consequently, the corresponding scheduling "
diff --git a/polly/test/ScheduleOptimizer/mat_mul_pattern_data_layout_2.ll b/polly/test/ScheduleOptimizer/mat_mul_pattern_data_layout_2.ll
index a78e8b6621d..91a32b072f5 100644
--- a/polly/test/ScheduleOptimizer/mat_mul_pattern_data_layout_2.ll
+++ b/polly/test/ScheduleOptimizer/mat_mul_pattern_data_layout_2.ll
@@ -27,6 +27,7 @@
; CHECK-NEXT: for (int c3 = 0; c3 <= 31; c3 += 1)
; CHECK-NEXT: Stmt_bb9(32 * c0 + c2, 32 * c1 + c3);
; CHECK-NEXT: }
+; CHECK-NEXT: // Inter iteration alias-free
; CHECK-NEXT: // 1st level tiling - Tiles
; CHECK-NEXT: for (int c1 = 0; c1 <= 3; c1 += 1) {
; CHECK-NEXT: for (int c3 = 0; c3 <= 1055; c3 += 1)
diff --git a/polly/test/ScheduleOptimizer/pattern-matching-based-opts_10.ll b/polly/test/ScheduleOptimizer/pattern-matching-based-opts_10.ll
new file mode 100644
index 00000000000..a640daf00ab
--- /dev/null
+++ b/polly/test/ScheduleOptimizer/pattern-matching-based-opts_10.ll
@@ -0,0 +1,70 @@
+; RUN: opt %loadPolly -polly-opt-isl -polly-invariant-load-hoisting=true \
+; RUN: -polly-pattern-matching-based-opts=true \
+; RUN: -polly-target-throughput-vector-fma=1 \
+; RUN: -polly-target-latency-vector-fma=1 \
+; RUN: -polly-codegen -polly-target-1st-cache-level-associativity=8 \
+; RUN: -polly-target-2nd-cache-level-associativity=8 \
+; RUN: -polly-target-1st-cache-level-size=32768 \
+; RUN: -polly-target-vector-register-bitwidth=256 \
+; RUN: -polly-target-2nd-cache-level-size=262144 -S < %s \
+; RUN: | FileCheck %s
+;
+; This test case checks whether Polly generates second level alias metadata
+; to distinguish the specific accesses in case of the ublas gemm kernel.
+;
+; CHECK: %tmp22_p_scalar_ = load double, double* %scevgep168, align 8, !alias.scope !10, !noalias !2
+; CHECK: store double %p_tmp23, double* %scevgep168, align 8, !alias.scope !10, !noalias !2
+; CHECK: %tmp22_p_scalar_188 = load double, double* %scevgep187, align 8, !alias.scope !11, !noalias !12
+; CHECK: store double %p_tmp23189, double* %scevgep187, align 8, !alias.scope !11, !noalias !12
+; CHECK: %tmp22_p_scalar_209 = load double, double* %scevgep208, align 8, !alias.scope !13, !noalias !14
+; CHECK: store double %p_tmp23210, double* %scevgep208, align 8, !alias.scope !13, !noalias !14
+; CHECK: %tmp22_p_scalar_230 = load double, double* %scevgep229, align 8, !alias.scope !15, !noalias !16
+; CHECK: store double %p_tmp23231, double* %scevgep229, align 8, !alias.scope !15, !noalias !16
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-unknown"
+
+define internal void @kernel_gemm(i32 %arg, i32 %arg1, i32 %arg2, double %arg3, double %arg4, [1056 x double]* %arg5, [1024 x double]* %arg6, [1056 x double]* %arg7) {
+bb:
+ br label %bb8
+
+bb8: ; preds = %bb29, %bb
+ %tmp = phi i64 [ 0, %bb ], [ %tmp30, %bb29 ]
+ br label %bb9
+
+bb9: ; preds = %bb26, %bb8
+ %tmp10 = phi i64 [ 0, %bb8 ], [ %tmp27, %bb26 ]
+ %tmp11 = getelementptr inbounds [1056 x double], [1056 x double]* %arg5, i64 %tmp, i64 %tmp10
+ %tmp12 = load double, double* %tmp11, align 8
+ %tmp13 = fmul double %tmp12, %arg4
+ store double %tmp13, double* %tmp11, align 8
+ br label %Copy_0
+
+Copy_0: ; preds = %Copy_0, %bb9
+ %tmp15 = phi i64 [ 0, %bb9 ], [ %tmp24, %Copy_0 ]
+ %tmp16 = getelementptr inbounds [1024 x double], [1024 x double]* %arg6, i64 %tmp, i64 %tmp15
+ %tmp17 = load double, double* %tmp16, align 8
+ %tmp18 = fmul double %tmp17, %arg3
+ %tmp19 = getelementptr inbounds [1056 x double], [1056 x double]* %arg7, i64 %tmp15, i64 %tmp10
+ %tmp20 = load double, double* %tmp19, align 8
+ %tmp21 = fmul double %tmp18, %tmp20
+ %tmp22 = load double, double* %tmp11, align 8
+ %tmp23 = fadd double %tmp22, %tmp21
+ store double %tmp23, double* %tmp11, align 8
+ %tmp24 = add nuw nsw i64 %tmp15, 1
+ %tmp25 = icmp ne i64 %tmp24, 1024
+ br i1 %tmp25, label %Copy_0, label %bb26
+
+bb26: ; preds = %Copy_0
+ %tmp27 = add nuw nsw i64 %tmp10, 1
+ %tmp28 = icmp ne i64 %tmp27, 1056
+ br i1 %tmp28, label %bb9, label %bb29
+
+bb29: ; preds = %bb26
+ %tmp30 = add nuw nsw i64 %tmp, 1
+ %tmp31 = icmp ne i64 %tmp30, 1056
+ br i1 %tmp31, label %bb8, label %bb32
+
+bb32: ; preds = %bb29
+ ret void
+}
diff --git a/polly/test/ScheduleOptimizer/pattern-matching-based-opts_3.ll b/polly/test/ScheduleOptimizer/pattern-matching-based-opts_3.ll
index a605508c9b2..169eae3bc5d 100644
--- a/polly/test/ScheduleOptimizer/pattern-matching-based-opts_3.ll
+++ b/polly/test/ScheduleOptimizer/pattern-matching-based-opts_3.ll
@@ -33,6 +33,7 @@
; CHECK-NEXT: for (int c3 = 0; c3 <= 31; c3 += 1)
; CHECK-NEXT: Stmt_bb9(32 * c0 + c2, 32 * c1 + c3);
; CHECK-NEXT: }
+; CHECK-NEXT: // Inter iteration alias-free
; CHECK-NEXT: // Register tiling - Tiles
; CHECK-NEXT: for (int c0 = 0; c0 <= 131; c0 += 1)
; CHECK-NEXT: for (int c1 = 0; c1 <= 263; c1 += 1)
@@ -84,6 +85,7 @@
; EXTRACTION-OF-MACRO-KERNEL-NEXT: for (int c3 = 0; c3 <= 31; c3 += 1)
; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb9(32 * c0 + c2, 32 * c1 + c3);
; EXTRACTION-OF-MACRO-KERNEL-NEXT: }
+; EXTRACTION-OF-MACRO-KERNEL-NEXT: // Inter iteration alias-free
; EXTRACTION-OF-MACRO-KERNEL-NEXT: // 1st level tiling - Tiles
; EXTRACTION-OF-MACRO-KERNEL-NEXT: for (int c1 = 0; c1 <= 3; c1 += 1) {
; EXTRACTION-OF-MACRO-KERNEL-NEXT: for (int c3 = 0; c3 <= 1055; c3 += 1)
diff --git a/polly/test/ScheduleOptimizer/pattern-matching-based-opts_5.ll b/polly/test/ScheduleOptimizer/pattern-matching-based-opts_5.ll
index a5cb9fece1f..9d07b509f70 100644
--- a/polly/test/ScheduleOptimizer/pattern-matching-based-opts_5.ll
+++ b/polly/test/ScheduleOptimizer/pattern-matching-based-opts_5.ll
@@ -37,6 +37,7 @@
; C[i][j] += A[i][k] * B[k][j];
;
; CHECK: if (ni >= 1) {
+; CHECK-NEXT: // Inter iteration alias-free
; CHECK-NEXT: // 1st level tiling - Tiles
; CHECK-NEXT: for (int c0 = 0; c0 <= floord(nj - 1, 2048); c0 += 1)
; CHECK-NEXT: for (int c1 = 0; c1 <= floord(nk - 1, 256); c1 += 1) {
OpenPOWER on IntegriCloud