diff options
author | Roman Gareev <gareevroman@gmail.com> | 2016-07-25 09:42:53 +0000 |
---|---|---|
committer | Roman Gareev <gareevroman@gmail.com> | 2016-07-25 09:42:53 +0000 |
commit | 3a18a931a8f47879c4b246600a2bd59ca4f61c82 (patch) | |
tree | c145b0d3c80f4f6acb23c00b91a3af4964fbc4b1 /polly/test/ScheduleOptimizer | |
parent | 73f5c785c103edb05a5619c30eff040511816561 (diff) | |
download | bcm5719-llvm-3a18a931a8f47879c4b246600a2bd59ca4f61c82.tar.gz bcm5719-llvm-3a18a931a8f47879c4b246600a2bd59ca4f61c82.zip |
Apply all necessary tilings and interchangings to get a macro-kernel
This is the second patch to apply the BLIS matmul optimization pattern
on matmul kernels
(http://www.cs.utexas.edu/users/flame/pubs/TOMS-BLIS-Analytical.pdf).
BLIS implements gemm as three nested loops around a macro-kernel, plus
two packing routines. The macro-kernel is implemented in terms
of two additional loops around a micro-kernel. The micro-kernel
is a loop around a rank-1 (i.e., outer product) update. In this change
we create the BLIS macro-kernel by applying a combination of tiling
and interchanging. In subsequent changes we will implement the packing
transformation.
Reviewed-by: Tobias Grosser <tobias@grosser.es>
Differential Revision: http://reviews.llvm.org/D21491
llvm-svn: 276627
Diffstat (limited to 'polly/test/ScheduleOptimizer')
-rw-r--r-- | polly/test/ScheduleOptimizer/pattern-matching-based-opts_3.ll | 51 |
1 files changed, 51 insertions, 0 deletions
diff --git a/polly/test/ScheduleOptimizer/pattern-matching-based-opts_3.ll b/polly/test/ScheduleOptimizer/pattern-matching-based-opts_3.ll index 51c9da2978c..45a352c8d37 100644 --- a/polly/test/ScheduleOptimizer/pattern-matching-based-opts_3.ll +++ b/polly/test/ScheduleOptimizer/pattern-matching-based-opts_3.ll @@ -1,4 +1,5 @@ ; RUN: opt %loadPolly -polly-opt-isl -polly-pattern-matching-based-opts=true -polly-target-througput-vector-fma=1 -polly-target-latency-vector-fma=8 -analyze -polly-ast < %s 2>&1 | FileCheck %s +; RUN: opt %loadPolly -polly-opt-isl -polly-pattern-matching-based-opts=true -polly-target-througput-vector-fma=1 -polly-target-latency-vector-fma=8 -analyze -polly-ast -polly-target-cache-level-associativity=8,8 -polly-target-cache-level-sizes=32768,262144 < %s 2>&1 | FileCheck %s --check-prefix=EXTRACTION-OF-MACRO-KERNEL ; ; /* C := alpha*A*B + beta*C */ ; for (i = 0; i < _PB_NI; i++) @@ -62,6 +63,56 @@ ; CHECK: } ; CHECK: } ; +; EXTRACTION-OF-MACRO-KERNEL: // 1st level tiling - Tiles +; EXTRACTION-OF-MACRO-KERNEL: for (int c0 = 0; c0 <= 65; c0 += 1) +; EXTRACTION-OF-MACRO-KERNEL: for (int c1 = 0; c1 <= 3; c1 += 1) +; EXTRACTION-OF-MACRO-KERNEL: for (int c2 = 0; c2 <= 10; c2 += 1) { +; EXTRACTION-OF-MACRO-KERNEL: // 1st level tiling - Points +; EXTRACTION-OF-MACRO-KERNEL: // Register tiling - Tiles +; EXTRACTION-OF-MACRO-KERNEL: for (int c3 = 0; c3 <= 3; c3 += 1) +; EXTRACTION-OF-MACRO-KERNEL: for (int c4 = 0; c4 <= 11; c4 += 1) +; EXTRACTION-OF-MACRO-KERNEL: for (int c5 = 0; c5 <= 255; c5 += 1) { +; EXTRACTION-OF-MACRO-KERNEL: // Register tiling - Points +; EXTRACTION-OF-MACRO-KERNEL: // 1st level tiling - Tiles +; EXTRACTION-OF-MACRO-KERNEL: // 1st level tiling - Points +; EXTRACTION-OF-MACRO-KERNEL: { +; EXTRACTION-OF-MACRO-KERNEL: Stmt_bb24(16 * c0 + 4 * c3, 96 * c2 + 8 * c4, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL: Stmt_bb24(16 * c0 + 4 * c3, 96 * c2 + 8 * c4 + 1, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL: Stmt_bb24(16 * c0 + 4 * c3, 96 * c2 + 8 * c4 + 2, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL: Stmt_bb24(16 * c0 + 4 * c3, 96 * c2 + 8 * c4 + 3, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL: Stmt_bb24(16 * c0 + 4 * c3, 96 * c2 + 8 * c4 + 4, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL: Stmt_bb24(16 * c0 + 4 * c3, 96 * c2 + 8 * c4 + 5, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL: Stmt_bb24(16 * c0 + 4 * c3, 96 * c2 + 8 * c4 + 6, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL: Stmt_bb24(16 * c0 + 4 * c3, 96 * c2 + 8 * c4 + 7, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL: Stmt_bb24(16 * c0 + 4 * c3 + 1, 96 * c2 + 8 * c4, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL: Stmt_bb24(16 * c0 + 4 * c3 + 1, 96 * c2 + 8 * c4 + 1, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL: Stmt_bb24(16 * c0 + 4 * c3 + 1, 96 * c2 + 8 * c4 + 2, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL: Stmt_bb24(16 * c0 + 4 * c3 + 1, 96 * c2 + 8 * c4 + 3, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL: Stmt_bb24(16 * c0 + 4 * c3 + 1, 96 * c2 + 8 * c4 + 4, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL: Stmt_bb24(16 * c0 + 4 * c3 + 1, 96 * c2 + 8 * c4 + 5, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL: Stmt_bb24(16 * c0 + 4 * c3 + 1, 96 * c2 + 8 * c4 + 6, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL: Stmt_bb24(16 * c0 + 4 * c3 + 1, 96 * c2 + 8 * c4 + 7, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL: Stmt_bb24(16 * c0 + 4 * c3 + 2, 96 * c2 + 8 * c4, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL: Stmt_bb24(16 * c0 + 4 * c3 + 2, 96 * c2 + 8 * c4 + 1, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL: Stmt_bb24(16 * c0 + 4 * c3 + 2, 96 * c2 + 8 * c4 + 2, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL: Stmt_bb24(16 * c0 + 4 * c3 + 2, 96 * c2 + 8 * c4 + 3, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL: Stmt_bb24(16 * c0 + 4 * c3 + 2, 96 * c2 + 8 * c4 + 4, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL: Stmt_bb24(16 * c0 + 4 * c3 + 2, 96 * c2 + 8 * c4 + 5, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL: Stmt_bb24(16 * c0 + 4 * c3 + 2, 96 * c2 + 8 * c4 + 6, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL: Stmt_bb24(16 * c0 + 4 * c3 + 2, 96 * c2 + 8 * c4 + 7, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL: Stmt_bb24(16 * c0 + 4 * c3 + 3, 96 * c2 + 8 * c4, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL: Stmt_bb24(16 * c0 + 4 * c3 + 3, 96 * c2 + 8 * c4 + 1, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL: Stmt_bb24(16 * c0 + 4 * c3 + 3, 96 * c2 + 8 * c4 + 2, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL: Stmt_bb24(16 * c0 + 4 * c3 + 3, 96 * c2 + 8 * c4 + 3, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL: Stmt_bb24(16 * c0 + 4 * c3 + 3, 96 * c2 + 8 * c4 + 4, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL: Stmt_bb24(16 * c0 + 4 * c3 + 3, 96 * c2 + 8 * c4 + 5, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL: Stmt_bb24(16 * c0 + 4 * c3 + 3, 96 * c2 + 8 * c4 + 6, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL: Stmt_bb24(16 * c0 + 4 * c3 + 3, 96 * c2 + 8 * c4 + 7, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL: } +; EXTRACTION-OF-MACRO-KERNEL: } +; EXTRACTION-OF-MACRO-KERNEL: } +; EXTRACTION-OF-MACRO-KERNEL: } +; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-unknown" |