summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTobias Grosser <grosser@fim.uni-passau.de>2013-05-20 14:01:54 +0000
committerTobias Grosser <grosser@fim.uni-passau.de>2013-05-20 14:01:54 +0000
commit95935c5de142edf90273dc76ebafb7d09561821e (patch)
tree6d377acfd280854b7ed516f4463bdb843b77ce53
parent4b5f4cbaf2ac2e760c181a56d3a7bbd5a7d233af (diff)
downloadbcm5719-llvm-95935c5de142edf90273dc76ebafb7d09561821e.tar.gz
bcm5719-llvm-95935c5de142edf90273dc76ebafb7d09561821e.zip
Update matmul example to the latest polly version
As the namings of the scops have changed, polly was not able to read in the user given .jscop files. By renaming the provided files, polly now finds them again and can use them to optimize the matmul function. We also update the generated files to reflect the very latest version of Polly. llvm-svn: 182265
-rw-r--r--polly/www/example_manual_matmul.html50
-rw-r--r--polly/www/experiments/matmul/init_array___%1---%19.jscop21
-rw-r--r--polly/www/experiments/matmul/init_array___%for.cond---%for.end19.jscop21
-rw-r--r--polly/www/experiments/matmul/main___%1---%17.jscop40
-rw-r--r--polly/www/experiments/matmul/main___%for.cond---%for.end30.jscop40
-rw-r--r--polly/www/experiments/matmul/main___%for.cond---%for.end30.jscop.interchanged (renamed from polly/www/experiments/matmul/main___%1---%17.jscop.interchanged)0
-rw-r--r--polly/www/experiments/matmul/main___%for.cond---%for.end30.jscop.interchanged+tiled (renamed from polly/www/experiments/matmul/main___%1---%17.jscop.interchanged+tiled)0
-rw-r--r--polly/www/experiments/matmul/main___%for.cond---%for.end30.jscop.interchanged+tiled+vector (renamed from polly/www/experiments/matmul/main___%1---%17.jscop.interchanged+tiled+vector)0
-rwxr-xr-xpolly/www/experiments/matmul/matmul.normalopt.exebin8644 -> 8849 bytes
-rw-r--r--polly/www/experiments/matmul/matmul.normalopt.llbin1744 -> 2620 bytes
-rw-r--r--polly/www/experiments/matmul/matmul.normalopt.s289
-rwxr-xr-xpolly/www/experiments/matmul/matmul.polly.interchanged+tiled+vector+openmp.exebin13208 -> 13413 bytes
-rw-r--r--polly/www/experiments/matmul/matmul.polly.interchanged+tiled+vector+openmp.llbin4784 -> 5980 bytes
-rw-r--r--polly/www/experiments/matmul/matmul.polly.interchanged+tiled+vector+openmp.s946
-rwxr-xr-xpolly/www/experiments/matmul/matmul.polly.interchanged+tiled+vector.exebin8718 -> 8923 bytes
-rw-r--r--polly/www/experiments/matmul/matmul.polly.interchanged+tiled+vector.llbin2820 -> 3724 bytes
-rw-r--r--polly/www/experiments/matmul/matmul.polly.interchanged+tiled+vector.s490
-rwxr-xr-xpolly/www/experiments/matmul/matmul.polly.interchanged+tiled.exebin8711 -> 8916 bytes
-rw-r--r--polly/www/experiments/matmul/matmul.polly.interchanged+tiled.llbin2748 -> 3612 bytes
-rw-r--r--polly/www/experiments/matmul/matmul.polly.interchanged+tiled.s491
-rwxr-xr-xpolly/www/experiments/matmul/matmul.polly.interchanged.exebin8705 -> 8910 bytes
-rw-r--r--polly/www/experiments/matmul/matmul.polly.interchanged.llbin2308 -> 3128 bytes
-rw-r--r--polly/www/experiments/matmul/matmul.polly.interchanged.s311
-rw-r--r--polly/www/experiments/matmul/matmul.preopt.ll276
-rw-r--r--polly/www/experiments/matmul/matmul.s415
-rw-r--r--polly/www/experiments/matmul/scops.init_array.dot62
-rw-r--r--polly/www/experiments/matmul/scops.init_array.dot.pngbin144639 -> 154236 bytes
-rw-r--r--polly/www/experiments/matmul/scops.main.dot90
-rw-r--r--polly/www/experiments/matmul/scops.main.dot.pngbin175453 -> 190505 bytes
-rw-r--r--polly/www/experiments/matmul/scops.print_array.dot84
-rw-r--r--polly/www/experiments/matmul/scops.print_array.dot.pngbin181300 -> 200271 bytes
-rw-r--r--polly/www/experiments/matmul/scopsonly.init_array.dot62
-rw-r--r--polly/www/experiments/matmul/scopsonly.init_array.dot.pngbin24332 -> 28261 bytes
-rw-r--r--polly/www/experiments/matmul/scopsonly.main.dot90
-rw-r--r--polly/www/experiments/matmul/scopsonly.main.dot.pngbin35645 -> 43325 bytes
-rw-r--r--polly/www/experiments/matmul/scopsonly.print_array.dot84
-rw-r--r--polly/www/experiments/matmul/scopsonly.print_array.dot.pngbin30645 -> 52118 bytes
37 files changed, 2144 insertions, 1718 deletions
diff --git a/polly/www/example_manual_matmul.html b/polly/www/example_manual_matmul.html
index 7e352a26802..2636e1d6098 100644
--- a/polly/www/example_manual_matmul.html
+++ b/polly/www/example_manual_matmul.html
@@ -110,7 +110,7 @@ view-scops-only:
<pre>
[...]
Printing analysis 'Polly - Create polyhedral description of Scops' for region:
-'%1 =&gt;&nbsp;%17' in function 'init_array':
+'for.cond =&gt; for.end19' in function 'init_array':
Context:
{ [] }
Statements {
@@ -135,7 +135,7 @@ Printing analysis 'Polly - Create polyhedral description of Scops' for region:
}
[...]
Printing analysis 'Polly - Create polyhedral description of Scops' for region:
-'%1 =&gt;&nbsp;%17' in function 'main':
+'for.cond =&gt; for.end30' in function 'main':
Context:
{ [] }
Statements {
@@ -178,7 +178,7 @@ Printing analysis 'Polly - Create polyhedral description of Scops' for region:
<li><h4>Show the dependences for the SCoPs</h4>
<pre class="code">opt -basicaa -polly-dependences -analyze matmul.preopt.ll</pre>
<pre>Printing analysis 'Polly - Calculate dependences for SCoP' for region:
-'for.cond =&gt; for.end28' in function 'init_array':
+'for.cond =&gt; for.end19' in function 'init_array':
Must dependences:
{ }
May dependences:
@@ -188,7 +188,7 @@ Printing analysis 'Polly - Create polyhedral description of Scops' for region:
May no source:
{ }
Printing analysis 'Polly - Calculate dependences for SCoP' for region:
-'for.cond =&gt; for.end48' in function 'main':
+'for.cond =&gt; for.end30' in function 'main':
Must dependences:
{ Stmt_4[i0, i1] -&gt; Stmt_6[i0, i1, 0]&nbsp;:
i0 &gt;= 0 and i0 &lt;= 1023 and i1 &gt;= 0 and i1 &lt;= 1023;
@@ -221,8 +221,8 @@ Printing analysis 'Polly - Calculate dependences for SCoP' for region:
Polly can export the polyhedral representation in so called jscop files. Jscop
files contain the polyhedral representation stored in a JSON file.
<pre class="code">opt -basicaa -polly-export-jscop matmul.preopt.ll</pre>
-<pre>Writing SCoP 'for.cond =&gt; for.end28' in function 'init_array' to './init_array___%for.cond---%for.end28.jscop'.
-Writing SCoP 'for.cond =&gt; for.end48' in function 'main' to './main___%for.cond---%for.end48.jscop'.
+<pre>Writing SCoP 'for.cond =&gt; for.end19' in function 'init_array' to './init_array___%for.cond---%for.end19.jscop'.
+Writing SCoP 'for.cond =&gt; for.end30' in function 'main' to './main___%for.cond---%for.end30.jscop'.
</pre></li>
<li><h4>Import the changed jscop files and print the updated SCoP structure
@@ -268,7 +268,7 @@ opt matmul.preopt.ll -basicaa \
</pre>
<pre>
[...]
-Reading JScop '%1 =&gt; %17' in function 'main' from './main___%1---%17.jscop.interchanged'.
+Reading JScop 'for.cond =&gt; for.end30' in function 'main' from './main___%for.cond---%for.end30.jscop.interchanged+tiled'.
[...]
main():
for (c2=0;c2&lt;=1535;c2++) {
@@ -295,7 +295,7 @@ opt matmul.preopt.ll -basicaa \
</pre>
<pre>
[...]
-Reading JScop '%1 =&gt; %17' in function 'main' from './main___%1---%17.jscop.interchanged+tiled'.
+Reading JScop 'for.cond =&gt; for.end30' in function 'main' from './main___%for.cond---%for.end30.jscop.interchanged+tiled'.
[...]
main():
for (c2=0;c2&lt;=1535;c2++) {
@@ -329,7 +329,7 @@ opt matmul.preopt.ll -basicaa \
<pre>
[...]
-Reading JScop '%1 =&gt; %17' in function 'main' from './main___%1---%17.jscop.interchanged+tiled+vector'.
+Reading JScop 'for.cond =&gt; for.end30' in function 'main' from './main___%for.cond---%for.end30.jscop.interchanged+tiled+vector'.
[...]
main():
for (c2=0;c2&lt;=1535;c2++) {
@@ -369,11 +369,11 @@ opt -basicaa \
-polly-codegen matmul.preopt.ll \
| opt -O3 &gt; matmul.polly.interchanged.ll</pre>
<pre>
-Reading JScop '%1 =&gt; %19' in function 'init_array' from
- './init_array___%1---%19.jscop.interchanged'.
+Reading JScop 'for.cond =&gt; for.end19' in function 'init_array' from
+ './init_array___%for.cond---%for.end19.jscop.interchanged'.
File could not be read: No such file or directory
-Reading JScop '%1 =&gt; %17' in function 'main' from
- './main___%1---%17.jscop.interchanged'.
+Reading JScop 'for.cond =&gt; for.end30' in function 'main' from
+ './main___%for.cond---%for.end30.jscop.interchanged'.
</pre>
<pre class="code">
opt -basicaa \
@@ -381,11 +381,11 @@ opt -basicaa \
-polly-codegen matmul.preopt.ll \
| opt -O3 &gt; matmul.polly.interchanged+tiled.ll</pre>
<pre>
-Reading JScop '%1 =&gt; %19' in function 'init_array' from
- './init_array___%1---%19.jscop.interchanged+tiled'.
+Reading JScop 'for.cond =&gt; for.end19' in function 'init_array' from
+ './init_array___%for.cond---%for.end19.jscop.interchanged+tiled'.
File could not be read: No such file or directory
-Reading JScop '%1 =&gt; %17' in function 'main' from
- './main___%1---%17.jscop.interchanged+tiled'.
+Reading JScop 'for.cond =&gt; for.end30' in function 'main' from
+ './main___%for.cond---%for.end30.jscop.interchanged+tiled'.
</pre>
<pre class="code">
opt -basicaa \
@@ -393,11 +393,11 @@ opt -basicaa \
-polly-codegen -polly-vectorizer=polly matmul.preopt.ll \
| opt -O3 &gt; matmul.polly.interchanged+tiled+vector.ll</pre>
<pre>
-Reading JScop '%1 =&gt; %19' in function 'init_array' from
- './init_array___%1---%19.jscop.interchanged+tiled+vector'.
+Reading JScop 'for.cond =&gt; for.end19' in function 'init_array' from
+ './init_array___%for.cond---%for.end19.jscop.interchanged+tiled+vector'.
File could not be read: No such file or directory
-Reading JScop '%1 =&gt; %17' in function 'main' from
- './main___%1---%17.jscop.interchanged+tiled+vector'.
+Reading JScop 'for.cond =&gt; for.end30' in function 'main' from
+ './main___%for.cond---%for.end30.jscop.interchanged+tiled+vector'.
</pre>
<pre class="code">
opt -basicaa \
@@ -405,11 +405,11 @@ opt -basicaa \
-polly-codegen -polly-vectorizer=polly -enable-polly-openmp matmul.preopt.ll \
| opt -O3 &gt; matmul.polly.interchanged+tiled+openmp.ll</pre>
<pre>
-Reading JScop '%1 =&gt; %19' in function 'init_array' from
- './init_array___%1---%19.jscop.interchanged+tiled+vector'.
+Reading JScop 'for.cond =&gt; for.end19' in function 'init_array' from
+ './init_array___%for.cond---%for.end19.jscop.interchanged+tiled+vector'.
File could not be read: No such file or directory
-Reading JScop '%1 =&gt; %17' in function 'main' from
- './main___%1---%17.jscop.interchanged+tiled+vector'.
+Reading JScop 'for.cond =&gt; for.end30' in function 'main' from
+ './main___%for.cond---%for.end30.jscop.interchanged+tiled+vector'.
</pre>
<li><h4>Create the executables</h4>
diff --git a/polly/www/experiments/matmul/init_array___%1---%19.jscop b/polly/www/experiments/matmul/init_array___%1---%19.jscop
deleted file mode 100644
index c7f9bb8c87a..00000000000
--- a/polly/www/experiments/matmul/init_array___%1---%19.jscop
+++ /dev/null
@@ -1,21 +0,0 @@
-{
- "context" : "{ [] }",
- "name" : "%1 => %19",
- "statements" : [
- {
- "accesses" : [
- {
- "kind" : "write",
- "relation" : "{ Stmt_5[i0, i1] -> MemRef_A[1536i0 + i1] }"
- },
- {
- "kind" : "write",
- "relation" : "{ Stmt_5[i0, i1] -> MemRef_B[1536i0 + i1] }"
- }
- ],
- "domain" : "{ Stmt_5[i0, i1] : i0 >= 0 and i0 <= 1535 and i1 >= 0 and i1 <= 1535 }",
- "name" : "Stmt_5",
- "schedule" : "{ Stmt_5[i0, i1] -> scattering[0, i0, 0, i1, 0] }"
- }
- ]
-}
diff --git a/polly/www/experiments/matmul/init_array___%for.cond---%for.end19.jscop b/polly/www/experiments/matmul/init_array___%for.cond---%for.end19.jscop
new file mode 100644
index 00000000000..dfd10935988
--- /dev/null
+++ b/polly/www/experiments/matmul/init_array___%for.cond---%for.end19.jscop
@@ -0,0 +1,21 @@
+{
+ "context" : "{ : }",
+ "name" : "for.cond => for.end19",
+ "statements" : [
+ {
+ "accesses" : [
+ {
+ "kind" : "write",
+ "relation" : "{ Stmt_for_body3[i0, i1] -> MemRef_A[1536i0 + i1] }"
+ },
+ {
+ "kind" : "write",
+ "relation" : "{ Stmt_for_body3[i0, i1] -> MemRef_B[1536i0 + i1] }"
+ }
+ ],
+ "domain" : "{ Stmt_for_body3[i0, i1] : i0 >= 0 and i0 <= 1535 and i1 >= 0 and i1 <= 1535 }",
+ "name" : "Stmt_for_body3",
+ "schedule" : "{ Stmt_for_body3[i0, i1] -> scattering[0, i0, 0, i1, 0] }"
+ }
+ ]
+}
diff --git a/polly/www/experiments/matmul/main___%1---%17.jscop b/polly/www/experiments/matmul/main___%1---%17.jscop
deleted file mode 100644
index c37839525ad..00000000000
--- a/polly/www/experiments/matmul/main___%1---%17.jscop
+++ /dev/null
@@ -1,40 +0,0 @@
-{
- "context" : "{ : }",
- "name" : "%1 => %17",
- "statements" : [
- {
- "accesses" : [
- {
- "kind" : "write",
- "relation" : "{ Stmt_4[i0, i1] -> MemRef_C[1536i0 + i1] }"
- }
- ],
- "domain" : "{ Stmt_4[i0, i1] : i0 >= 0 and i0 <= 1535 and i1 >= 0 and i1 <= 1535 }",
- "name" : "Stmt_4",
- "schedule" : "{ Stmt_4[i0, i1] -> scattering[0, i0, 0, i1, 0, 0, 0] }"
- },
- {
- "accesses" : [
- {
- "kind" : "read",
- "relation" : "{ Stmt_6[i0, i1, i2] -> MemRef_C[1536i0 + i1] }"
- },
- {
- "kind" : "read",
- "relation" : "{ Stmt_6[i0, i1, i2] -> MemRef_A[1536i0 + i2] }"
- },
- {
- "kind" : "read",
- "relation" : "{ Stmt_6[i0, i1, i2] -> MemRef_B[i1 + 1536i2] }"
- },
- {
- "kind" : "write",
- "relation" : "{ Stmt_6[i0, i1, i2] -> MemRef_C[1536i0 + i1] }"
- }
- ],
- "domain" : "{ Stmt_6[i0, i1, i2] : i0 >= 0 and i0 <= 1535 and i1 >= 0 and i1 <= 1535 and i2 >= 0 and i2 <= 1535 }",
- "name" : "Stmt_6",
- "schedule" : "{ Stmt_6[i0, i1, i2] -> scattering[0, i0, 0, i1, 1, i2, 0] }"
- }
- ]
-}
diff --git a/polly/www/experiments/matmul/main___%for.cond---%for.end30.jscop b/polly/www/experiments/matmul/main___%for.cond---%for.end30.jscop
new file mode 100644
index 00000000000..4d6e463a7db
--- /dev/null
+++ b/polly/www/experiments/matmul/main___%for.cond---%for.end30.jscop
@@ -0,0 +1,40 @@
+{
+ "context" : "{ : }",
+ "name" : "for.cond => for.end30",
+ "statements" : [
+ {
+ "accesses" : [
+ {
+ "kind" : "write",
+ "relation" : "{ Stmt_for_body3[i0, i1] -> MemRef_C[1536i0 + i1] }"
+ }
+ ],
+ "domain" : "{ Stmt_for_body3[i0, i1] : i0 >= 0 and i0 <= 1535 and i1 >= 0 and i1 <= 1535 }",
+ "name" : "Stmt_for_body3",
+ "schedule" : "{ Stmt_for_body3[i0, i1] -> scattering[0, i0, 0, i1, 0, 0, 0] }"
+ },
+ {
+ "accesses" : [
+ {
+ "kind" : "read",
+ "relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[1536i0 + i1] }"
+ },
+ {
+ "kind" : "read",
+ "relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_A[1536i0 + i2] }"
+ },
+ {
+ "kind" : "read",
+ "relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_B[i1 + 1536i2] }"
+ },
+ {
+ "kind" : "write",
+ "relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[1536i0 + i1] }"
+ }
+ ],
+ "domain" : "{ Stmt_for_body8[i0, i1, i2] : i0 >= 0 and i0 <= 1535 and i1 >= 0 and i1 <= 1535 and i2 >= 0 and i2 <= 1535 }",
+ "name" : "Stmt_for_body8",
+ "schedule" : "{ Stmt_for_body8[i0, i1, i2] -> scattering[0, i0, 0, i1, 1, i2, 0] }"
+ }
+ ]
+}
diff --git a/polly/www/experiments/matmul/main___%1---%17.jscop.interchanged b/polly/www/experiments/matmul/main___%for.cond---%for.end30.jscop.interchanged
index 1d73c8a9fdf..1d73c8a9fdf 100644
--- a/polly/www/experiments/matmul/main___%1---%17.jscop.interchanged
+++ b/polly/www/experiments/matmul/main___%for.cond---%for.end30.jscop.interchanged
diff --git a/polly/www/experiments/matmul/main___%1---%17.jscop.interchanged+tiled b/polly/www/experiments/matmul/main___%for.cond---%for.end30.jscop.interchanged+tiled
index ab68b5ebda2..ab68b5ebda2 100644
--- a/polly/www/experiments/matmul/main___%1---%17.jscop.interchanged+tiled
+++ b/polly/www/experiments/matmul/main___%for.cond---%for.end30.jscop.interchanged+tiled
diff --git a/polly/www/experiments/matmul/main___%1---%17.jscop.interchanged+tiled+vector b/polly/www/experiments/matmul/main___%for.cond---%for.end30.jscop.interchanged+tiled+vector
index 67a1cd9c3f0..67a1cd9c3f0 100644
--- a/polly/www/experiments/matmul/main___%1---%17.jscop.interchanged+tiled+vector
+++ b/polly/www/experiments/matmul/main___%for.cond---%for.end30.jscop.interchanged+tiled+vector
diff --git a/polly/www/experiments/matmul/matmul.normalopt.exe b/polly/www/experiments/matmul/matmul.normalopt.exe
index 73b94752d8e..cdb9e67af45 100755
--- a/polly/www/experiments/matmul/matmul.normalopt.exe
+++ b/polly/www/experiments/matmul/matmul.normalopt.exe
Binary files differ
diff --git a/polly/www/experiments/matmul/matmul.normalopt.ll b/polly/www/experiments/matmul/matmul.normalopt.ll
index 182ed9aa221..ba792c29f70 100644
--- a/polly/www/experiments/matmul/matmul.normalopt.ll
+++ b/polly/www/experiments/matmul/matmul.normalopt.ll
Binary files differ
diff --git a/polly/www/experiments/matmul/matmul.normalopt.s b/polly/www/experiments/matmul/matmul.normalopt.s
index f10f6441182..079af702a14 100644
--- a/polly/www/experiments/matmul/matmul.normalopt.s
+++ b/polly/www/experiments/matmul/matmul.normalopt.s
@@ -2,74 +2,112 @@
.section .rodata.cst8,"aM",@progbits,8
.align 8
.LCPI0_0:
- .quad 4602678819172646912 # double 5.000000e-01
+ .quad 4602678819172646912 # double 0.5
.text
.globl init_array
.align 16, 0x90
.type init_array,@function
init_array: # @init_array
-# BB#0:
- xorl %eax, %eax
- movsd .LCPI0_0(%rip), %xmm0
- movq %rax, %rcx
+ .cfi_startproc
+# BB#0: # %entry
+ pushq %rbp
+.Ltmp2:
+ .cfi_def_cfa_offset 16
+.Ltmp3:
+ .cfi_offset %rbp, -16
+ movq %rsp, %rbp
+.Ltmp4:
+ .cfi_def_cfa_register %rbp
+ xorl %r8d, %r8d
+ vmovsd .LCPI0_0(%rip), %xmm0
.align 16, 0x90
-.LBB0_1: # %.preheader
+.LBB0_1: # %for.cond1.preheader
# =>This Loop Header: Depth=1
# Child Loop BB0_2 Depth 2
- movq $-1536, %rdx # imm = 0xFFFFFFFFFFFFFA00
- xorl %esi, %esi
+ xorl %ecx, %ecx
.align 16, 0x90
-.LBB0_2: # Parent Loop BB0_1 Depth=1
+.LBB0_2: # %for.body3
+ # Parent Loop BB0_1 Depth=1
# => This Inner Loop Header: Depth=2
- movl %esi, %edi
- sarl $31, %edi
- shrl $22, %edi
- addl %esi, %edi
- andl $-1024, %edi # imm = 0xFFFFFFFFFFFFFC00
- negl %edi
- leal 1(%rsi,%rdi), %edi
- cvtsi2sd %edi, %xmm1
- mulsd %xmm0, %xmm1
- cvtsd2ss %xmm1, %xmm1
- movss %xmm1, A+6144(%rax,%rdx,4)
- movss %xmm1, B+6144(%rax,%rdx,4)
- addl %ecx, %esi
- incq %rdx
+ movl %ecx, %edx
+ imull %r8d, %edx
+ movl %edx, %esi
+ sarl $31, %esi
+ shrl $22, %esi
+ addl %edx, %esi
+ andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
+ negl %esi
+ movq %r8, %rax
+ shlq $11, %rax
+ leal 1(%rdx,%rsi), %edi
+ leaq (%rax,%rax,2), %rsi
+ leaq 1(%rcx), %rdx
+ cmpq $1536, %rdx # imm = 0x600
+ vcvtsi2sdl %edi, %xmm0, %xmm1
+ vmulsd %xmm0, %xmm1, %xmm1
+ vcvtsd2ss %xmm1, %xmm1, %xmm1
+ vmovss %xmm1, A(%rsi,%rcx,4)
+ vmovss %xmm1, B(%rsi,%rcx,4)
+ movq %rdx, %rcx
jne .LBB0_2
-# BB#3: # in Loop: Header=BB0_1 Depth=1
- addq $6144, %rax # imm = 0x1800
- incq %rcx
- cmpq $1536, %rcx # imm = 0x600
+# BB#3: # %for.inc17
+ # in Loop: Header=BB0_1 Depth=1
+ incq %r8
+ cmpq $1536, %r8 # imm = 0x600
jne .LBB0_1
-# BB#4:
+# BB#4: # %for.end19
+ popq %rbp
ret
-.Ltmp0:
- .size init_array, .Ltmp0-init_array
+.Ltmp5:
+ .size init_array, .Ltmp5-init_array
+ .cfi_endproc
.globl print_array
.align 16, 0x90
.type print_array,@function
print_array: # @print_array
-# BB#0:
+ .cfi_startproc
+# BB#0: # %entry
+ pushq %rbp
+.Ltmp9:
+ .cfi_def_cfa_offset 16
+.Ltmp10:
+ .cfi_offset %rbp, -16
+ movq %rsp, %rbp
+.Ltmp11:
+ .cfi_def_cfa_register %rbp
+ pushq %r15
pushq %r14
+ pushq %r12
pushq %rbx
- pushq %rax
- movq $-9437184, %rbx # imm = 0xFFFFFFFFFF700000
+.Ltmp12:
+ .cfi_offset %rbx, -48
+.Ltmp13:
+ .cfi_offset %r12, -40
+.Ltmp14:
+ .cfi_offset %r14, -32
+.Ltmp15:
+ .cfi_offset %r15, -24
+ xorl %r14d, %r14d
+ movl $C, %r15d
.align 16, 0x90
-.LBB1_1: # %.preheader
+.LBB1_1: # %for.cond1.preheader
# =>This Loop Header: Depth=1
# Child Loop BB1_2 Depth 2
- xorl %r14d, %r14d
- movq stdout(%rip), %rdi
+ movq stdout(%rip), %rax
+ movq %r15, %r12
+ xorl %ebx, %ebx
.align 16, 0x90
-.LBB1_2: # Parent Loop BB1_1 Depth=1
+.LBB1_2: # %for.body3
+ # Parent Loop BB1_1 Depth=1
# => This Inner Loop Header: Depth=2
- movss C+9437184(%rbx,%r14,4), %xmm0
- cvtss2sd %xmm0, %xmm0
+ vmovss (%r12), %xmm0
+ vcvtss2sd %xmm0, %xmm0, %xmm0
+ movq %rax, %rdi
movl $.L.str, %esi
movb $1, %al
callq fprintf
- movslq %r14d, %rax
+ movslq %ebx, %rax
imulq $1717986919, %rax, %rcx # imm = 0x66666667
movq %rcx, %rdx
shrq $63, %rdx
@@ -79,113 +117,146 @@ print_array: # @print_array
subl %ecx, %eax
cmpl $79, %eax
jne .LBB1_4
-# BB#3: # in Loop: Header=BB1_2 Depth=2
+# BB#3: # %if.then
+ # in Loop: Header=BB1_2 Depth=2
movq stdout(%rip), %rsi
movl $10, %edi
callq fputc
-.LBB1_4: # in Loop: Header=BB1_2 Depth=2
- incq %r14
- movq stdout(%rip), %rsi
- cmpq $1536, %r14 # imm = 0x600
- movq %rsi, %rdi
+.LBB1_4: # %for.inc
+ # in Loop: Header=BB1_2 Depth=2
+ addq $4, %r12
+ incq %rbx
+ movq stdout(%rip), %rax
+ cmpq $1536, %rbx # imm = 0x600
jne .LBB1_2
-# BB#5: # in Loop: Header=BB1_1 Depth=1
+# BB#5: # %for.end
+ # in Loop: Header=BB1_1 Depth=1
movl $10, %edi
+ movq %rax, %rsi
callq fputc
- addq $6144, %rbx # imm = 0x1800
+ addq $6144, %r15 # imm = 0x1800
+ incq %r14
+ cmpq $1536, %r14 # imm = 0x600
jne .LBB1_1
-# BB#6:
- addq $8, %rsp
+# BB#6: # %for.end12
popq %rbx
+ popq %r12
popq %r14
+ popq %r15
+ popq %rbp
ret
-.Ltmp1:
- .size print_array, .Ltmp1-print_array
+.Ltmp16:
+ .size print_array, .Ltmp16-print_array
+ .cfi_endproc
.section .rodata.cst8,"aM",@progbits,8
.align 8
.LCPI2_0:
- .quad 4602678819172646912 # double 5.000000e-01
+ .quad 4602678819172646912 # double 0.5
.text
.globl main
.align 16, 0x90
.type main,@function
main: # @main
-# BB#0:
- xorl %eax, %eax
- movsd .LCPI2_0(%rip), %xmm0
- movq %rax, %rcx
+ .cfi_startproc
+# BB#0: # %entry
+ pushq %rbp
+.Ltmp19:
+ .cfi_def_cfa_offset 16
+.Ltmp20:
+ .cfi_offset %rbp, -16
+ movq %rsp, %rbp
+.Ltmp21:
+ .cfi_def_cfa_register %rbp
+ xorl %r8d, %r8d
+ vmovsd .LCPI2_0(%rip), %xmm0
.align 16, 0x90
-.LBB2_1: # %.preheader.i
+.LBB2_1: # %for.cond1.preheader.i
# =>This Loop Header: Depth=1
# Child Loop BB2_2 Depth 2
- movq $-1536, %rdx # imm = 0xFFFFFFFFFFFFFA00
- xorl %esi, %esi
+ xorl %ecx, %ecx
.align 16, 0x90
-.LBB2_2: # Parent Loop BB2_1 Depth=1
+.LBB2_2: # %for.body3.i
+ # Parent Loop BB2_1 Depth=1
# => This Inner Loop Header: Depth=2
- movl %esi, %edi
- sarl $31, %edi
- shrl $22, %edi
- addl %esi, %edi
- andl $-1024, %edi # imm = 0xFFFFFFFFFFFFFC00
- negl %edi
- leal 1(%rsi,%rdi), %edi
- cvtsi2sd %edi, %xmm1
- mulsd %xmm0, %xmm1
- cvtsd2ss %xmm1, %xmm1
- movss %xmm1, A+6144(%rax,%rdx,4)
- movss %xmm1, B+6144(%rax,%rdx,4)
- addl %ecx, %esi
- incq %rdx
+ movl %ecx, %edx
+ imull %r8d, %edx
+ movl %edx, %esi
+ sarl $31, %esi
+ shrl $22, %esi
+ addl %edx, %esi
+ andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
+ negl %esi
+ movq %r8, %rax
+ shlq $11, %rax
+ leal 1(%rdx,%rsi), %edi
+ leaq (%rax,%rax,2), %rsi
+ leaq 1(%rcx), %rdx
+ cmpq $1536, %rdx # imm = 0x600
+ vcvtsi2sdl %edi, %xmm0, %xmm1
+ vmulsd %xmm0, %xmm1, %xmm1
+ vcvtsd2ss %xmm1, %xmm1, %xmm1
+ vmovss %xmm1, A(%rsi,%rcx,4)
+ vmovss %xmm1, B(%rsi,%rcx,4)
+ movq %rdx, %rcx
jne .LBB2_2
-# BB#3: # in Loop: Header=BB2_1 Depth=1
- addq $6144, %rax # imm = 0x1800
- incq %rcx
- xorl %edx, %edx
- cmpq $1536, %rcx # imm = 0x600
+# BB#3: # %for.inc17.i
+ # in Loop: Header=BB2_1 Depth=1
+ incq %r8
+ cmpq $1536, %r8 # imm = 0x600
jne .LBB2_1
+# BB#4:
+ xorl %r8d, %r8d
+ movl $A, %r9d
.align 16, 0x90
-.LBB2_4: # %.preheader
+.LBB2_5: # %for.cond1.preheader
# =>This Loop Header: Depth=1
- # Child Loop BB2_5 Depth 2
- # Child Loop BB2_6 Depth 3
- xorl %eax, %eax
- xorl %ecx, %ecx
+ # Child Loop BB2_6 Depth 2
+ # Child Loop BB2_7 Depth 3
+ leaq (%r8,%r8,2), %rdx
+ shlq $11, %rdx
+ leaq C(%rdx), %rsi
+ xorl %edi, %edi
.align 16, 0x90
-.LBB2_5: # Parent Loop BB2_4 Depth=1
+.LBB2_6: # %for.body3
+ # Parent Loop BB2_5 Depth=1
# => This Loop Header: Depth=2
- # Child Loop BB2_6 Depth 3
- movl $0, C(%rcx,%rdx)
- leaq B(%rcx), %rsi
- pxor %xmm0, %xmm0
- movq %rax, %rdi
+ # Child Loop BB2_7 Depth 3
+ movl $0, (%rsi)
+ vxorps %xmm0, %xmm0, %xmm0
+ movq $-9437184, %rax # imm = 0xFFFFFFFFFF700000
+ movq %r9, %rcx
.align 16, 0x90
-.LBB2_6: # Parent Loop BB2_4 Depth=1
- # Parent Loop BB2_5 Depth=2
+.LBB2_7: # %for.body8
+ # Parent Loop BB2_5 Depth=1
+ # Parent Loop BB2_6 Depth=2
# => This Inner Loop Header: Depth=3
- movss A(%rdx,%rdi,4), %xmm1
- mulss (%rsi), %xmm1
- addss %xmm1, %xmm0
- addq $6144, %rsi # imm = 0x1800
+ vmovss (%rcx), %xmm1
+ vmulss B+9437184(%rax,%rdi,4), %xmm1, %xmm1
+ vaddss %xmm1, %xmm0, %xmm0
+ addq $4, %rcx
+ addq $6144, %rax # imm = 0x1800
+ jne .LBB2_7
+# BB#8: # %for.inc25
+ # in Loop: Header=BB2_6 Depth=2
+ vmovss %xmm0, (%rsi)
+ leaq C+4(%rdx,%rdi,4), %rsi
incq %rdi
cmpq $1536, %rdi # imm = 0x600
jne .LBB2_6
-# BB#7: # in Loop: Header=BB2_5 Depth=2
- movss %xmm0, C(%rcx,%rdx)
- addq $4, %rcx
- cmpq $6144, %rcx # imm = 0x1800
+# BB#9: # %for.inc28
+ # in Loop: Header=BB2_5 Depth=1
+ addq $6144, %r9 # imm = 0x1800
+ incq %r8
+ cmpq $1536, %r8 # imm = 0x600
jne .LBB2_5
-# BB#8: # %init_array.exit
- # in Loop: Header=BB2_4 Depth=1
- addq $6144, %rdx # imm = 0x1800
- cmpq $9437184, %rdx # imm = 0x900000
- jne .LBB2_4
-# BB#9:
+# BB#10: # %for.end30
xorl %eax, %eax
+ popq %rbp
ret
-.Ltmp2:
- .size main, .Ltmp2-main
+.Ltmp22:
+ .size main, .Ltmp22-main
+ .cfi_endproc
.type A,@object # @A
.comm A,9437184,16
diff --git a/polly/www/experiments/matmul/matmul.polly.interchanged+tiled+vector+openmp.exe b/polly/www/experiments/matmul/matmul.polly.interchanged+tiled+vector+openmp.exe
index 7a2e6de6138..feb24366d73 100755
--- a/polly/www/experiments/matmul/matmul.polly.interchanged+tiled+vector+openmp.exe
+++ b/polly/www/experiments/matmul/matmul.polly.interchanged+tiled+vector+openmp.exe
Binary files differ
diff --git a/polly/www/experiments/matmul/matmul.polly.interchanged+tiled+vector+openmp.ll b/polly/www/experiments/matmul/matmul.polly.interchanged+tiled+vector+openmp.ll
index 710f706f68e..593794ef380 100644
--- a/polly/www/experiments/matmul/matmul.polly.interchanged+tiled+vector+openmp.ll
+++ b/polly/www/experiments/matmul/matmul.polly.interchanged+tiled+vector+openmp.ll
Binary files differ
diff --git a/polly/www/experiments/matmul/matmul.polly.interchanged+tiled+vector+openmp.s b/polly/www/experiments/matmul/matmul.polly.interchanged+tiled+vector+openmp.s
index 04dc0656c06..ca87de11704 100644
--- a/polly/www/experiments/matmul/matmul.polly.interchanged+tiled+vector+openmp.s
+++ b/polly/www/experiments/matmul/matmul.polly.interchanged+tiled+vector+openmp.s
@@ -1,55 +1,166 @@
.file "matmul.polly.interchanged+tiled+vector+openmp.ll"
+ .section .rodata.cst8,"aM",@progbits,8
+ .align 8
+.LCPI0_0:
+ .quad 4602678819172646912 # double 0.5
.text
.globl init_array
.align 16, 0x90
.type init_array,@function
init_array: # @init_array
-# BB#0: # %pollyBB
+ .cfi_startproc
+# BB#0: # %entry
+ pushq %rbp
+.Ltmp3:
+ .cfi_def_cfa_offset 16
+.Ltmp4:
+ .cfi_offset %rbp, -16
+ movq %rsp, %rbp
+.Ltmp5:
+ .cfi_def_cfa_register %rbp
+ pushq %r15
+ pushq %r14
pushq %rbx
- subq $16, %rsp
- movq $A, (%rsp)
- movq $B, 8(%rsp)
+ subq $24, %rsp
+.Ltmp6:
+ .cfi_offset %rbx, -40
+.Ltmp7:
+ .cfi_offset %r14, -32
+.Ltmp8:
+ .cfi_offset %r15, -24
+ leaq -32(%rbp), %rsi
movl $init_array.omp_subfn, %edi
- leaq (%rsp), %rbx
xorl %edx, %edx
xorl %ecx, %ecx
movl $1536, %r8d # imm = 0x600
movl $1, %r9d
- movq %rbx, %rsi
callq GOMP_parallel_loop_runtime_start
- movq %rbx, %rdi
- callq init_array.omp_subfn
+ leaq -40(%rbp), %rdi
+ leaq -48(%rbp), %rsi
+ callq GOMP_loop_runtime_next
+ testb %al, %al
+ je .LBB0_4
+# BB#1:
+ leaq -40(%rbp), %r14
+ leaq -48(%rbp), %r15
+ vmovsd .LCPI0_0(%rip), %xmm1
+ .align 16, 0x90
+.LBB0_2: # %omp.loadIVBounds.i
+ # =>This Loop Header: Depth=1
+ # Child Loop BB0_8 Depth 2
+ # Child Loop BB0_5 Depth 3
+ movq -48(%rbp), %r8
+ leaq -1(%r8), %rcx
+ movq -40(%rbp), %rax
+ cmpq %rcx, %rax
+ jg .LBB0_3
+# BB#7: # %polly.loop_preheader4.preheader.i
+ # in Loop: Header=BB0_2 Depth=1
+ addq $-2, %r8
+ .align 16, 0x90
+.LBB0_8: # %polly.loop_preheader4.i
+ # Parent Loop BB0_2 Depth=1
+ # => This Loop Header: Depth=2
+ # Child Loop BB0_5 Depth 3
+ xorl %edx, %edx
+ .align 16, 0x90
+.LBB0_5: # %polly.loop_header3.i
+ # Parent Loop BB0_2 Depth=1
+ # Parent Loop BB0_8 Depth=2
+ # => This Inner Loop Header: Depth=3
+ movl %edx, %esi
+ imull %eax, %esi
+ movl %esi, %edi
+ sarl $31, %edi
+ shrl $22, %edi
+ addl %esi, %edi
+ andl $-1024, %edi # imm = 0xFFFFFFFFFFFFFC00
+ negl %edi
+ movq %rax, %rcx
+ shlq $11, %rcx
+ leal 1(%rsi,%rdi), %ebx
+ leaq (%rcx,%rcx,2), %rdi
+ leaq 1(%rdx), %rsi
+ cmpq $1536, %rsi # imm = 0x600
+ vcvtsi2sdl %ebx, %xmm0, %xmm0
+ vmulsd %xmm1, %xmm0, %xmm0
+ vcvtsd2ss %xmm0, %xmm0, %xmm0
+ vmovss %xmm0, A(%rdi,%rdx,4)
+ vmovss %xmm0, B(%rdi,%rdx,4)
+ movq %rsi, %rdx
+ jne .LBB0_5
+# BB#6: # %polly.loop_exit5.i
+ # in Loop: Header=BB0_8 Depth=2
+ cmpq %r8, %rax
+ leaq 1(%rax), %rax
+ jle .LBB0_8
+.LBB0_3: # %omp.checkNext.backedge.i
+ # in Loop: Header=BB0_2 Depth=1
+ movq %r14, %rdi
+ movq %r15, %rsi
+ callq GOMP_loop_runtime_next
+ vmovsd .LCPI0_0(%rip), %xmm1
+ testb %al, %al
+ jne .LBB0_2
+.LBB0_4: # %init_array.omp_subfn.exit
+ callq GOMP_loop_end_nowait
callq GOMP_parallel_end
- addq $16, %rsp
+ addq $24, %rsp
popq %rbx
+ popq %r14
+ popq %r15
+ popq %rbp
ret
-.Ltmp0:
- .size init_array, .Ltmp0-init_array
+.Ltmp9:
+ .size init_array, .Ltmp9-init_array
+ .cfi_endproc
.globl print_array
.align 16, 0x90
.type print_array,@function
print_array: # @print_array
-# BB#0:
+ .cfi_startproc
+# BB#0: # %entry
+ pushq %rbp
+.Ltmp13:
+ .cfi_def_cfa_offset 16
+.Ltmp14:
+ .cfi_offset %rbp, -16
+ movq %rsp, %rbp
+.Ltmp15:
+ .cfi_def_cfa_register %rbp
+ pushq %r15
pushq %r14
+ pushq %r12
pushq %rbx
- pushq %rax
- movq $-9437184, %rbx # imm = 0xFFFFFFFFFF700000
+.Ltmp16:
+ .cfi_offset %rbx, -48
+.Ltmp17:
+ .cfi_offset %r12, -40
+.Ltmp18:
+ .cfi_offset %r14, -32
+.Ltmp19:
+ .cfi_offset %r15, -24
+ xorl %r14d, %r14d
+ movl $C, %r15d
.align 16, 0x90
-.LBB1_1: # %.preheader
+.LBB1_1: # %for.cond1.preheader
# =>This Loop Header: Depth=1
# Child Loop BB1_2 Depth 2
- xorl %r14d, %r14d
- movq stdout(%rip), %rdi
+ movq stdout(%rip), %rax
+ movq %r15, %r12
+ xorl %ebx, %ebx
.align 16, 0x90
-.LBB1_2: # Parent Loop BB1_1 Depth=1
+.LBB1_2: # %for.body3
+ # Parent Loop BB1_1 Depth=1
# => This Inner Loop Header: Depth=2
- movss C+9437184(%rbx,%r14,4), %xmm0
- cvtss2sd %xmm0, %xmm0
+ vmovss (%r12), %xmm0
+ vcvtss2sd %xmm0, %xmm0, %xmm0
+ movq %rax, %rdi
movl $.L.str, %esi
movb $1, %al
callq fprintf
- movslq %r14d, %rax
+ movslq %ebx, %rax
imulq $1717986919, %rax, %rcx # imm = 0x66666667
movq %rcx, %rdx
shrq $63, %rdx
@@ -59,127 +170,135 @@ print_array: # @print_array
subl %ecx, %eax
cmpl $79, %eax
jne .LBB1_4
-# BB#3: # in Loop: Header=BB1_2 Depth=2
+# BB#3: # %if.then
+ # in Loop: Header=BB1_2 Depth=2
movq stdout(%rip), %rsi
movl $10, %edi
callq fputc
-.LBB1_4: # in Loop: Header=BB1_2 Depth=2
- incq %r14
- movq stdout(%rip), %rsi
- cmpq $1536, %r14 # imm = 0x600
- movq %rsi, %rdi
+.LBB1_4: # %for.inc
+ # in Loop: Header=BB1_2 Depth=2
+ addq $4, %r12
+ incq %rbx
+ movq stdout(%rip), %rax
+ cmpq $1536, %rbx # imm = 0x600
jne .LBB1_2
-# BB#5: # in Loop: Header=BB1_1 Depth=1
+# BB#5: # %for.end
+ # in Loop: Header=BB1_1 Depth=1
movl $10, %edi
+ movq %rax, %rsi
callq fputc
- addq $6144, %rbx # imm = 0x1800
+ addq $6144, %r15 # imm = 0x1800
+ incq %r14
+ cmpq $1536, %r14 # imm = 0x600
jne .LBB1_1
-# BB#6:
- addq $8, %rsp
+# BB#6: # %for.end12
popq %rbx
+ popq %r12
popq %r14
+ popq %r15
+ popq %rbp
ret
-.Ltmp1:
- .size print_array, .Ltmp1-print_array
+.Ltmp20:
+ .size print_array, .Ltmp20-print_array
+ .cfi_endproc
.globl main
.align 16, 0x90
.type main,@function
main: # @main
-# BB#0: # %pollyBB
+ .cfi_startproc
+# BB#0: # %entry
pushq %rbp
+.Ltmp24:
+ .cfi_def_cfa_offset 16
+.Ltmp25:
+ .cfi_offset %rbp, -16
movq %rsp, %rbp
+.Ltmp26:
+ .cfi_def_cfa_register %rbp
pushq %r15
pushq %r14
pushq %r13
pushq %r12
pushq %rbx
- subq $56, %rsp
- movq $A, -72(%rbp)
- movq $B, -64(%rbp)
- movl $init_array.omp_subfn, %edi
- leaq -72(%rbp), %rbx
- movq %rbx, %rsi
- xorl %edx, %edx
- xorl %ecx, %ecx
- movl $1536, %r8d # imm = 0x600
- movl $1, %r9d
- callq GOMP_parallel_loop_runtime_start
- movq %rbx, %rdi
- callq init_array.omp_subfn
- callq GOMP_parallel_end
+ subq $24, %rsp
+.Ltmp27:
+ .cfi_offset %rbx, -56
+.Ltmp28:
+ .cfi_offset %r12, -48
+.Ltmp29:
+ .cfi_offset %r13, -40
+.Ltmp30:
+ .cfi_offset %r14, -32
+.Ltmp31:
+ .cfi_offset %r15, -24
+ callq init_array
+ leaq -48(%rbp), %rsi
movl $main.omp_subfn, %edi
- leaq -96(%rbp), %rsi
- movq $C, -96(%rbp)
- movq $A, -88(%rbp)
- movq $B, -80(%rbp)
xorl %edx, %edx
xorl %ecx, %ecx
movl $1536, %r8d # imm = 0x600
movl $1, %r9d
callq GOMP_parallel_loop_runtime_start
- leaq -48(%rbp), %rdi
- leaq -56(%rbp), %rsi
+ leaq -56(%rbp), %rdi
+ leaq -64(%rbp), %rsi
callq GOMP_loop_runtime_next
- testb $1, %al
- je .LBB2_6
+ testb %al, %al
+ je .LBB2_4
# BB#1:
- leaq -48(%rbp), %rbx
leaq -56(%rbp), %r14
+ leaq -64(%rbp), %r15
.align 16, 0x90
-.LBB2_3: # %omp.loadIVBounds.i
+.LBB2_2: # %omp.loadIVBounds.i
# =>This Loop Header: Depth=1
- # Child Loop BB2_5 Depth 2
- movq -56(%rbp), %r15
- decq %r15
- movq -48(%rbp), %r12
- cmpq %r15, %r12
- jg .LBB2_2
-# BB#4: # %polly.loop_header2.preheader.lr.ph.i
- # in Loop: Header=BB2_3 Depth=1
- leaq (%r12,%r12,2), %rax
- shlq $11, %rax
- leaq C(%rax), %r13
+ # Child Loop BB2_6 Depth 2
+ movq -64(%rbp), %r12
+ leaq -1(%r12), %rcx
+ movq -56(%rbp), %rax
+ cmpq %rcx, %rax
+ jg .LBB2_3
+# BB#5: # %polly.loop_preheader4.preheader.i
+ # in Loop: Header=BB2_2 Depth=1
+ addq $-2, %r12
+ leaq (%rax,%rax,2), %rcx
+ leaq -1(%rax), %r13
+ shlq $11, %rcx
+ leaq C(%rcx), %rbx
.align 16, 0x90
-.LBB2_5: # %polly.loop_header2.preheader.i
- # Parent Loop BB2_3 Depth=1
+.LBB2_6: # %polly.loop_preheader4.i
+ # Parent Loop BB2_2 Depth=1
# => This Inner Loop Header: Depth=2
- movq %r13, %rdi
+ movq %rbx, %rdi
xorl %esi, %esi
movl $6144, %edx # imm = 0x1800
callq memset
- addq $6144, %r13 # imm = 0x1800
- incq %r12
- cmpq %r15, %r12
- jle .LBB2_5
-.LBB2_2: # %omp.checkNext.loopexit.i
- # in Loop: Header=BB2_3 Depth=1
- movq %rbx, %rdi
- movq %r14, %rsi
+ addq $6144, %rbx # imm = 0x1800
+ incq %r13
+ cmpq %r12, %r13
+ jle .LBB2_6
+.LBB2_3: # %omp.checkNext.backedge.i
+ # in Loop: Header=BB2_2 Depth=1
+ movq %r14, %rdi
+ movq %r15, %rsi
callq GOMP_loop_runtime_next
- testb $1, %al
- jne .LBB2_3
-.LBB2_6: # %main.omp_subfn.exit
+ testb %al, %al
+ jne .LBB2_2
+.LBB2_4: # %main.omp_subfn.exit
callq GOMP_loop_end_nowait
callq GOMP_parallel_end
- movq %rsp, %rax
- leaq -32(%rax), %rbx
+ leaq -48(%rbp), %rbx
movl $main.omp_subfn1, %edi
+ movq %rbx, %rsi
+ xorl %edx, %edx
xorl %ecx, %ecx
movl $1536, %r8d # imm = 0x600
movl $64, %r9d
- movq %rbx, %rsp
- movq $C, -32(%rax)
- movq $A, -24(%rax)
- movq $B, -16(%rax)
- movq %rbx, %rsi
- xorl %edx, %edx
callq GOMP_parallel_loop_runtime_start
movq %rbx, %rdi
callq main.omp_subfn1
callq GOMP_parallel_end
xorl %eax, %eax
- leaq -40(%rbp), %rsp
+ addq $24, %rsp
popq %rbx
popq %r12
popq %r13
@@ -187,418 +306,427 @@ main: # @main
popq %r15
popq %rbp
ret
-.Ltmp2:
- .size main, .Ltmp2-main
+.Ltmp32:
+ .size main, .Ltmp32-main
+ .cfi_endproc
.section .rodata.cst8,"aM",@progbits,8
.align 8
.LCPI3_0:
- .quad 4602678819172646912 # double 5.000000e-01
+ .quad 4602678819172646912 # double 0.5
.text
.align 16, 0x90
.type init_array.omp_subfn,@function
init_array.omp_subfn: # @init_array.omp_subfn
-.Leh_func_begin3:
-.Ltmp6:
.cfi_startproc
# BB#0: # %omp.setup
- pushq %r14
-.Ltmp7:
+ pushq %rbp
+.Ltmp36:
.cfi_def_cfa_offset 16
+.Ltmp37:
+ .cfi_offset %rbp, -16
+ movq %rsp, %rbp
+.Ltmp38:
+ .cfi_def_cfa_register %rbp
+ pushq %r15
+ pushq %r14
pushq %rbx
-.Ltmp8:
- .cfi_def_cfa_offset 24
subq $24, %rsp
-.Ltmp9:
- .cfi_def_cfa_offset 48
-.Ltmp10:
- .cfi_offset 3, -24
-.Ltmp11:
- .cfi_offset 14, -16
- leaq 16(%rsp), %rdi
- leaq 8(%rsp), %rsi
+.Ltmp39:
+ .cfi_offset %rbx, -40
+.Ltmp40:
+ .cfi_offset %r14, -32
+.Ltmp41:
+ .cfi_offset %r15, -24
+ leaq -32(%rbp), %rdi
+ leaq -40(%rbp), %rsi
callq GOMP_loop_runtime_next
- testb $1, %al
- je .LBB3_2
+ testb %al, %al
+ je .LBB3_4
# BB#1:
- leaq 16(%rsp), %rbx
- leaq 8(%rsp), %r14
- jmp .LBB3_4
-.LBB3_2: # %omp.exit
- callq GOMP_loop_end_nowait
- addq $24, %rsp
- popq %rbx
- popq %r14
- ret
+ leaq -32(%rbp), %r14
+ leaq -40(%rbp), %r15
+ vmovsd .LCPI3_0(%rip), %xmm1
.align 16, 0x90
-.LBB3_3: # %omp.checkNext.loopexit
- # in Loop: Header=BB3_4 Depth=1
- movq %rbx, %rdi
- movq %r14, %rsi
- callq GOMP_loop_runtime_next
- testb $1, %al
- je .LBB3_2
-.LBB3_4: # %omp.loadIVBounds
+.LBB3_2: # %omp.loadIVBounds
# =>This Loop Header: Depth=1
- # Child Loop BB3_7 Depth 2
- # Child Loop BB3_8 Depth 3
- movq 8(%rsp), %rax
- decq %rax
- movq 16(%rsp), %rcx
- cmpq %rax, %rcx
+ # Child Loop BB3_8 Depth 2
+ # Child Loop BB3_5 Depth 3
+ movq -40(%rbp), %r8
+ leaq -1(%r8), %rcx
+ movq -32(%rbp), %rax
+ cmpq %rcx, %rax
jg .LBB3_3
-# BB#5: # %polly.loop_header2.preheader.lr.ph
- # in Loop: Header=BB3_4 Depth=1
- movq %rcx, %rdx
- shlq $11, %rdx
- leaq (%rdx,%rdx,2), %rdx
- jmp .LBB3_7
+# BB#7: # %polly.loop_preheader4.preheader
+ # in Loop: Header=BB3_2 Depth=1
+ addq $-2, %r8
.align 16, 0x90
-.LBB3_6: # %polly.loop_header.loopexit
- # in Loop: Header=BB3_7 Depth=2
- addq $6144, %rdx # imm = 0x1800
- incq %rcx
- cmpq %rax, %rcx
- jg .LBB3_3
-.LBB3_7: # %polly.loop_header2.preheader
- # Parent Loop BB3_4 Depth=1
+.LBB3_8: # %polly.loop_preheader4
+ # Parent Loop BB3_2 Depth=1
# => This Loop Header: Depth=2
- # Child Loop BB3_8 Depth 3
- movq $-1536, %rsi # imm = 0xFFFFFFFFFFFFFA00
- xorl %edi, %edi
+ # Child Loop BB3_5 Depth 3
+ xorl %edx, %edx
.align 16, 0x90
-.LBB3_8: # %polly.loop_body3
- # Parent Loop BB3_4 Depth=1
- # Parent Loop BB3_7 Depth=2
+.LBB3_5: # %polly.loop_header3
+ # Parent Loop BB3_2 Depth=1
+ # Parent Loop BB3_8 Depth=2
# => This Inner Loop Header: Depth=3
- movl %edi, %r8d
- sarl $31, %r8d
- shrl $22, %r8d
- addl %edi, %r8d
- andl $-1024, %r8d # imm = 0xFFFFFFFFFFFFFC00
- negl %r8d
- leal 1(%rdi,%r8), %r8d
- cvtsi2sd %r8d, %xmm0
- mulsd .LCPI3_0(%rip), %xmm0
- cvtsd2ss %xmm0, %xmm0
- movss %xmm0, A+6144(%rdx,%rsi,4)
- movss %xmm0, B+6144(%rdx,%rsi,4)
- addl %ecx, %edi
- incq %rsi
- jne .LBB3_8
- jmp .LBB3_6
-.Ltmp12:
- .size init_array.omp_subfn, .Ltmp12-init_array.omp_subfn
-.Ltmp13:
+ movl %edx, %esi
+ imull %eax, %esi
+ movl %esi, %edi
+ sarl $31, %edi
+ shrl $22, %edi
+ addl %esi, %edi
+ andl $-1024, %edi # imm = 0xFFFFFFFFFFFFFC00
+ negl %edi
+ movq %rax, %rcx
+ shlq $11, %rcx
+ leal 1(%rsi,%rdi), %ebx
+ leaq (%rcx,%rcx,2), %rdi
+ leaq 1(%rdx), %rsi
+ cmpq $1536, %rsi # imm = 0x600
+ vcvtsi2sdl %ebx, %xmm0, %xmm0
+ vmulsd %xmm1, %xmm0, %xmm0
+ vcvtsd2ss %xmm0, %xmm0, %xmm0
+ vmovss %xmm0, A(%rdi,%rdx,4)
+ vmovss %xmm0, B(%rdi,%rdx,4)
+ movq %rsi, %rdx
+ jne .LBB3_5
+# BB#6: # %polly.loop_exit5
+ # in Loop: Header=BB3_8 Depth=2
+ cmpq %r8, %rax
+ leaq 1(%rax), %rax
+ jle .LBB3_8
+.LBB3_3: # %omp.checkNext.backedge
+ # in Loop: Header=BB3_2 Depth=1
+ movq %r14, %rdi
+ movq %r15, %rsi
+ callq GOMP_loop_runtime_next
+ vmovsd .LCPI3_0(%rip), %xmm1
+ testb %al, %al
+ jne .LBB3_2
+.LBB3_4: # %omp.exit
+ callq GOMP_loop_end_nowait
+ addq $24, %rsp
+ popq %rbx
+ popq %r14
+ popq %r15
+ popq %rbp
+ ret
+.Ltmp42:
+ .size init_array.omp_subfn, .Ltmp42-init_array.omp_subfn
.cfi_endproc
-.Leh_func_end3:
.align 16, 0x90
.type main.omp_subfn,@function
main.omp_subfn: # @main.omp_subfn
-.Leh_func_begin4:
-.Ltmp20:
.cfi_startproc
# BB#0: # %omp.setup
- pushq %r15
-.Ltmp21:
+ pushq %rbp
+.Ltmp46:
.cfi_def_cfa_offset 16
+.Ltmp47:
+ .cfi_offset %rbp, -16
+ movq %rsp, %rbp
+.Ltmp48:
+ .cfi_def_cfa_register %rbp
+ pushq %r15
pushq %r14
-.Ltmp22:
- .cfi_def_cfa_offset 24
pushq %r13
-.Ltmp23:
- .cfi_def_cfa_offset 32
pushq %r12
-.Ltmp24:
- .cfi_def_cfa_offset 40
pushq %rbx
-.Ltmp25:
- .cfi_def_cfa_offset 48
- subq $16, %rsp
-.Ltmp26:
- .cfi_def_cfa_offset 64
-.Ltmp27:
- .cfi_offset 3, -48
-.Ltmp28:
- .cfi_offset 12, -40
-.Ltmp29:
- .cfi_offset 13, -32
-.Ltmp30:
- .cfi_offset 14, -24
-.Ltmp31:
- .cfi_offset 15, -16
- leaq 8(%rsp), %rdi
- leaq (%rsp), %rsi
+ subq $24, %rsp
+.Ltmp49:
+ .cfi_offset %rbx, -56
+.Ltmp50:
+ .cfi_offset %r12, -48
+.Ltmp51:
+ .cfi_offset %r13, -40
+.Ltmp52:
+ .cfi_offset %r14, -32
+.Ltmp53:
+ .cfi_offset %r15, -24
+ leaq -48(%rbp), %rdi
+ leaq -56(%rbp), %rsi
callq GOMP_loop_runtime_next
- testb $1, %al
- je .LBB4_2
+ testb %al, %al
+ je .LBB4_4
# BB#1:
- leaq 8(%rsp), %rbx
- leaq (%rsp), %r14
- jmp .LBB4_4
-.LBB4_2: # %omp.exit
- callq GOMP_loop_end_nowait
- addq $16, %rsp
- popq %rbx
- popq %r12
- popq %r13
- popq %r14
- popq %r15
- ret
+ leaq -48(%rbp), %r14
+ leaq -56(%rbp), %r15
.align 16, 0x90
-.LBB4_3: # %omp.checkNext.loopexit
- # in Loop: Header=BB4_4 Depth=1
- movq %rbx, %rdi
- movq %r14, %rsi
- callq GOMP_loop_runtime_next
- testb $1, %al
- je .LBB4_2
-.LBB4_4: # %omp.loadIVBounds
+.LBB4_2: # %omp.loadIVBounds
# =>This Loop Header: Depth=1
# Child Loop BB4_6 Depth 2
- movq (%rsp), %r15
- decq %r15
- movq 8(%rsp), %r12
- cmpq %r15, %r12
+ movq -56(%rbp), %r12
+ leaq -1(%r12), %rcx
+ movq -48(%rbp), %rax
+ cmpq %rcx, %rax
jg .LBB4_3
-# BB#5: # %polly.loop_header2.preheader.lr.ph
- # in Loop: Header=BB4_4 Depth=1
- leaq (%r12,%r12,2), %rax
- shlq $11, %rax
- leaq C(%rax), %r13
+# BB#5: # %polly.loop_preheader4.preheader
+ # in Loop: Header=BB4_2 Depth=1
+ addq $-2, %r12
+ leaq (%rax,%rax,2), %rcx
+ leaq -1(%rax), %r13
+ shlq $11, %rcx
+ leaq C(%rcx), %rbx
.align 16, 0x90
-.LBB4_6: # %polly.loop_header2.preheader
- # Parent Loop BB4_4 Depth=1
+.LBB4_6: # %polly.loop_preheader4
+ # Parent Loop BB4_2 Depth=1
# => This Inner Loop Header: Depth=2
- movq %r13, %rdi
+ movq %rbx, %rdi
xorl %esi, %esi
movl $6144, %edx # imm = 0x1800
callq memset
- addq $6144, %r13 # imm = 0x1800
- incq %r12
- cmpq %r15, %r12
+ addq $6144, %rbx # imm = 0x1800
+ incq %r13
+ cmpq %r12, %r13
jle .LBB4_6
- jmp .LBB4_3
-.Ltmp32:
- .size main.omp_subfn, .Ltmp32-main.omp_subfn
-.Ltmp33:
+.LBB4_3: # %omp.checkNext.backedge
+ # in Loop: Header=BB4_2 Depth=1
+ movq %r14, %rdi
+ movq %r15, %rsi
+ callq GOMP_loop_runtime_next
+ testb %al, %al
+ jne .LBB4_2
+.LBB4_4: # %omp.exit
+ callq GOMP_loop_end_nowait
+ addq $24, %rsp
+ popq %rbx
+ popq %r12
+ popq %r13
+ popq %r14
+ popq %r15
+ popq %rbp
+ ret
+.Ltmp54:
+ .size main.omp_subfn, .Ltmp54-main.omp_subfn
.cfi_endproc
-.Leh_func_end4:
.align 16, 0x90
.type main.omp_subfn1,@function
main.omp_subfn1: # @main.omp_subfn1
-.Leh_func_begin5:
-.Ltmp41:
.cfi_startproc
# BB#0: # %omp.setup
pushq %rbp
-.Ltmp42:
+.Ltmp58:
.cfi_def_cfa_offset 16
+.Ltmp59:
+ .cfi_offset %rbp, -16
+ movq %rsp, %rbp
+.Ltmp60:
+ .cfi_def_cfa_register %rbp
pushq %r15
-.Ltmp43:
- .cfi_def_cfa_offset 24
pushq %r14
-.Ltmp44:
- .cfi_def_cfa_offset 32
pushq %r13
-.Ltmp45:
- .cfi_def_cfa_offset 40
pushq %r12
-.Ltmp46:
- .cfi_def_cfa_offset 48
pushq %rbx
-.Ltmp47:
- .cfi_def_cfa_offset 56
- subq $40, %rsp
-.Ltmp48:
- .cfi_def_cfa_offset 96
-.Ltmp49:
- .cfi_offset 3, -56
-.Ltmp50:
- .cfi_offset 12, -48
-.Ltmp51:
- .cfi_offset 13, -40
-.Ltmp52:
- .cfi_offset 14, -32
-.Ltmp53:
- .cfi_offset 15, -24
-.Ltmp54:
- .cfi_offset 6, -16
- leaq 32(%rsp), %rdi
- leaq 24(%rsp), %rsi
+ subq $72, %rsp
+.Ltmp61:
+ .cfi_offset %rbx, -56
+.Ltmp62:
+ .cfi_offset %r12, -48
+.Ltmp63:
+ .cfi_offset %r13, -40
+.Ltmp64:
+ .cfi_offset %r14, -32
+.Ltmp65:
+ .cfi_offset %r15, -24
jmp .LBB5_1
.align 16, 0x90
-.LBB5_4: # %omp.loadIVBounds
+.LBB5_2: # %omp.loadIVBounds
# in Loop: Header=BB5_1 Depth=1
- movq 24(%rsp), %rax
- decq %rax
- movq %rax, (%rsp) # 8-byte Spill
- movq 32(%rsp), %rcx
+ movq -56(%rbp), %rax
+ movq %rax, -112(%rbp) # 8-byte Spill
+ leaq -1(%rax), %rax
+ movq -48(%rbp), %rcx
cmpq %rax, %rcx
- jg .LBB5_3
-# BB#5: # %polly.loop_header2.preheader.lr.ph
+ jg .LBB5_1
+# BB#3: # %polly.loop_preheader4.preheader
# in Loop: Header=BB5_1 Depth=1
- leaq (%rcx,%rcx,2), %rax
- movq %rcx, %rdx
- shlq $9, %rdx
- leaq (%rdx,%rdx,2), %rdx
- movq %rdx, 16(%rsp) # 8-byte Spill
- shlq $11, %rax
- leaq A(%rax), %rax
- movq %rax, 8(%rsp) # 8-byte Spill
- jmp .LBB5_7
+ leaq -1(%rcx), %rax
+ movq %rax, -88(%rbp) # 8-byte Spill
+ addq $-65, -112(%rbp) # 8-byte Folded Spill
+ movq %rcx, %rax
+ shlq $9, %rax
+ leaq (%rax,%rax,2), %rax
+ leaq C+16(,%rax,4), %rax
+ movq %rax, -104(%rbp) # 8-byte Spill
.align 16, 0x90
-.LBB5_6: # %polly.loop_header.loopexit
- # in Loop: Header=BB5_7 Depth=2
- addq $98304, 16(%rsp) # 8-byte Folded Spill
- # imm = 0x18000
- addq $393216, 8(%rsp) # 8-byte Folded Spill
- # imm = 0x60000
- addq $64, %rcx
- cmpq (%rsp), %rcx # 8-byte Folded Reload
- jg .LBB5_3
-.LBB5_7: # %polly.loop_header2.preheader
+.LBB5_7: # %polly.loop_preheader4
# Parent Loop BB5_1 Depth=1
# => This Loop Header: Depth=2
- # Child Loop BB5_9 Depth 3
- # Child Loop BB5_11 Depth 4
+ # Child Loop BB5_8 Depth 3
+ # Child Loop BB5_9 Depth 4
+ # Child Loop BB5_12 Depth 5
+ # Child Loop BB5_17 Depth 6
+ # Child Loop BB5_18 Depth 7
# Child Loop BB5_14 Depth 5
- # Child Loop BB5_18 Depth 6
- # Child Loop BB5_19 Depth 7
- leaq 63(%rcx), %rax
+ movq %rcx, -72(%rbp) # 8-byte Spill
+ leaq 62(%rcx), %rdi
xorl %edx, %edx
- jmp .LBB5_9
.align 16, 0x90
-.LBB5_8: # %polly.loop_header2.loopexit
- # in Loop: Header=BB5_9 Depth=3
- addq $64, %rdx
- cmpq $1536, %rdx # imm = 0x600
- je .LBB5_6
-.LBB5_9: # %polly.loop_header7.preheader
+.LBB5_8: # %polly.loop_preheader11
# Parent Loop BB5_1 Depth=1
# Parent Loop BB5_7 Depth=2
# => This Loop Header: Depth=3
- # Child Loop BB5_11 Depth 4
+ # Child Loop BB5_9 Depth 4
+ # Child Loop BB5_12 Depth 5
+ # Child Loop BB5_17 Depth 6
+ # Child Loop BB5_18 Depth 7
# Child Loop BB5_14 Depth 5
- # Child Loop BB5_18 Depth 6
- # Child Loop BB5_19 Depth 7
- movq 16(%rsp), %rsi # 8-byte Reload
- leaq (%rsi,%rdx), %rsi
- leaq 63(%rdx), %rdi
- xorl %r8d, %r8d
- movq 8(%rsp), %r9 # 8-byte Reload
- movq %rdx, %r10
- jmp .LBB5_11
+ movq %rdx, -96(%rbp) # 8-byte Spill
+ leaq -4(%rdx), %rcx
+ movq %rdx, %rax
+ decq %rax
+ cmovsq %rcx, %rax
+ movq %rax, %r14
+ sarq $63, %r14
+ shrq $62, %r14
+ addq %rax, %r14
+ andq $-4, %r14
+ movq %rdx, %rax
+ orq $63, %rax
+ leaq -4(%rax), %rdx
+ movq -104(%rbp), %rcx # 8-byte Reload
+ leaq (%rcx,%r14,4), %rcx
+ movq %rcx, -80(%rbp) # 8-byte Spill
+ leaq B+16(,%r14,4), %rbx
+ leaq 4(%r14), %rcx
+ movq %rcx, -64(%rbp) # 8-byte Spill
+ xorl %r11d, %r11d
.align 16, 0x90
-.LBB5_10: # %polly.loop_header7.loopexit
- # in Loop: Header=BB5_11 Depth=4
- addq $256, %r9 # imm = 0x100
- addq $98304, %r10 # imm = 0x18000
- addq $64, %r8
- cmpq $1536, %r8 # imm = 0x600
- je .LBB5_8
-.LBB5_11: # %polly.loop_body8
+.LBB5_9: # %polly.loop_header10
# Parent Loop BB5_1 Depth=1
# Parent Loop BB5_7 Depth=2
- # Parent Loop BB5_9 Depth=3
+ # Parent Loop BB5_8 Depth=3
# => This Loop Header: Depth=4
+ # Child Loop BB5_12 Depth 5
+ # Child Loop BB5_17 Depth 6
+ # Child Loop BB5_18 Depth 7
# Child Loop BB5_14 Depth 5
- # Child Loop BB5_18 Depth 6
- # Child Loop BB5_19 Depth 7
- movabsq $9223372036854775744, %r11 # imm = 0x7FFFFFFFFFFFFFC0
- cmpq %r11, %rcx
- jg .LBB5_10
-# BB#12: # %polly.loop_body13.lr.ph
- # in Loop: Header=BB5_11 Depth=4
- leaq 63(%r8), %r11
- movq %rcx, %rbx
- movq %rsi, %r14
- movq %r9, %r15
- jmp .LBB5_14
+ movabsq $9223372036854775744, %rcx # imm = 0x7FFFFFFFFFFFFFC0
+ cmpq %rcx, -72(%rbp) # 8-byte Folded Reload
+ jg .LBB5_15
+# BB#10: # %polly.loop_header17.preheader
+ # in Loop: Header=BB5_9 Depth=4
+ movq %r11, %r15
+ orq $63, %r15
+ cmpq %r15, %r11
+ movq -88(%rbp), %rcx # 8-byte Reload
+ jle .LBB5_11
.align 16, 0x90
-.LBB5_13: # %polly.loop_header12.loopexit
- # in Loop: Header=BB5_14 Depth=5
- addq $1536, %r14 # imm = 0x600
- addq $6144, %r15 # imm = 0x1800
- incq %rbx
- cmpq %rax, %rbx
- jg .LBB5_10
-.LBB5_14: # %polly.loop_body13
+.LBB5_14: # %polly.loop_exit28.us
+ # Parent Loop BB5_1 Depth=1
+ # Parent Loop BB5_7 Depth=2
+ # Parent Loop BB5_8 Depth=3
+ # Parent Loop BB5_9 Depth=4
+ # => This Inner Loop Header: Depth=5
+ incq %rcx
+ cmpq %rdi, %rcx
+ jle .LBB5_14
+ jmp .LBB5_15
+ .align 16, 0x90
+.LBB5_11: # in Loop: Header=BB5_9 Depth=4
+ decq %r15
+ movq -80(%rbp), %r13 # 8-byte Reload
+ movq -72(%rbp), %rcx # 8-byte Reload
+ .align 16, 0x90
+.LBB5_12: # %polly.loop_header26.preheader
# Parent Loop BB5_1 Depth=1
# Parent Loop BB5_7 Depth=2
- # Parent Loop BB5_9 Depth=3
- # Parent Loop BB5_11 Depth=4
+ # Parent Loop BB5_8 Depth=3
+ # Parent Loop BB5_9 Depth=4
# => This Loop Header: Depth=5
- # Child Loop BB5_18 Depth 6
- # Child Loop BB5_19 Depth 7
- cmpq %r11, %r8
+ # Child Loop BB5_17 Depth 6
+ # Child Loop BB5_18 Depth 7
+ cmpq %rax, -64(%rbp) # 8-byte Folded Reload
+ movq %rbx, %r12
+ movq %r11, %r8
jg .LBB5_13
-# BB#15: # %polly.loop_body13
- # in Loop: Header=BB5_14 Depth=5
- cmpq %rdi, %rdx
- jg .LBB5_13
-# BB#16: # %polly.loop_body23.lr.ph.preheader
- # in Loop: Header=BB5_14 Depth=5
- xorl %r12d, %r12d
- movq %r10, %r13
- jmp .LBB5_18
.align 16, 0x90
-.LBB5_17: # %polly.loop_header17.loopexit
- # in Loop: Header=BB5_18 Depth=6
- addq $1536, %r13 # imm = 0x600
- incq %r12
- cmpq $64, %r12
- je .LBB5_13
-.LBB5_18: # %polly.loop_body23.lr.ph
+.LBB5_17: # %polly.loop_header35.preheader
# Parent Loop BB5_1 Depth=1
# Parent Loop BB5_7 Depth=2
- # Parent Loop BB5_9 Depth=3
- # Parent Loop BB5_11 Depth=4
- # Parent Loop BB5_14 Depth=5
+ # Parent Loop BB5_8 Depth=3
+ # Parent Loop BB5_9 Depth=4
+ # Parent Loop BB5_12 Depth=5
# => This Loop Header: Depth=6
- # Child Loop BB5_19 Depth 7
- movss (%r15,%r12,4), %xmm0
- pshufd $0, %xmm0, %xmm0 # xmm0 = xmm0[0,0,0,0]
- xorl %ebp, %ebp
- .align 16, 0x90
-.LBB5_19: # %polly.loop_body23
+ # Child Loop BB5_18 Depth 7
+ leaq (%rcx,%rcx,2), %rsi
+ shlq $11, %rsi
+ vbroadcastss A(%rsi,%r8,4), %xmm0
+ movq %r13, %r9
+ movq %r12, %r10
+ movq %r14, %rsi
+.LBB5_18: # %polly.loop_header35
# Parent Loop BB5_1 Depth=1
# Parent Loop BB5_7 Depth=2
- # Parent Loop BB5_9 Depth=3
- # Parent Loop BB5_11 Depth=4
- # Parent Loop BB5_14 Depth=5
- # Parent Loop BB5_18 Depth=6
+ # Parent Loop BB5_8 Depth=3
+ # Parent Loop BB5_9 Depth=4
+ # Parent Loop BB5_12 Depth=5
+ # Parent Loop BB5_17 Depth=6
# => This Inner Loop Header: Depth=7
- movaps B(%rbp,%r13,4), %xmm1
- mulps %xmm0, %xmm1
- addps C(%rbp,%r14,4), %xmm1
- movaps %xmm1, C(%rbp,%r14,4)
- addq $16, %rbp
- cmpq $256, %rbp # imm = 0x100
- jne .LBB5_19
- jmp .LBB5_17
-.LBB5_3: # %omp.checkNext.loopexit
- # in Loop: Header=BB5_1 Depth=1
- leaq 32(%rsp), %rax
- movq %rax, %rdi
- leaq 24(%rsp), %rax
- movq %rax, %rsi
+ vmulps (%r10), %xmm0, %xmm1
+ vaddps (%r9), %xmm1, %xmm1
+ vmovaps %xmm1, (%r9)
+ addq $16, %r9
+ addq $16, %r10
+ addq $4, %rsi
+ cmpq %rdx, %rsi
+ jle .LBB5_18
+# BB#16: # %polly.loop_exit37
+ # in Loop: Header=BB5_17 Depth=6
+ addq $6144, %r12 # imm = 0x1800
+ cmpq %r15, %r8
+ leaq 1(%r8), %r8
+ jle .LBB5_17
+ .align 16, 0x90
+.LBB5_13: # %polly.loop_exit28
+ # in Loop: Header=BB5_12 Depth=5
+ addq $6144, %r13 # imm = 0x1800
+ cmpq %rdi, %rcx
+ leaq 1(%rcx), %rcx
+ jle .LBB5_12
+ .align 16, 0x90
+.LBB5_15: # %polly.loop_exit19
+ # in Loop: Header=BB5_9 Depth=4
+ addq $393216, %rbx # imm = 0x60000
+ cmpq $1472, %r11 # imm = 0x5C0
+ leaq 64(%r11), %r11
+ jl .LBB5_9
+# BB#5: # %polly.loop_exit12
+ # in Loop: Header=BB5_8 Depth=3
+ movq -96(%rbp), %rdx # 8-byte Reload
+ cmpq $1472, %rdx # imm = 0x5C0
+ leaq 64(%rdx), %rdx
+ jl .LBB5_8
+# BB#6: # %polly.loop_exit5
+ # in Loop: Header=BB5_7 Depth=2
+ addq $64, -88(%rbp) # 8-byte Folded Spill
+ addq $393216, -104(%rbp) # 8-byte Folded Spill
+ # imm = 0x60000
+ movq -72(%rbp), %rcx # 8-byte Reload
+ cmpq -112(%rbp), %rcx # 8-byte Folded Reload
+ leaq 64(%rcx), %rcx
+ jle .LBB5_7
.LBB5_1: # %omp.setup
# =>This Loop Header: Depth=1
# Child Loop BB5_7 Depth 2
- # Child Loop BB5_9 Depth 3
- # Child Loop BB5_11 Depth 4
+ # Child Loop BB5_8 Depth 3
+ # Child Loop BB5_9 Depth 4
+ # Child Loop BB5_12 Depth 5
+ # Child Loop BB5_17 Depth 6
+ # Child Loop BB5_18 Depth 7
# Child Loop BB5_14 Depth 5
- # Child Loop BB5_18 Depth 6
- # Child Loop BB5_19 Depth 7
+ leaq -48(%rbp), %rdi
+ leaq -56(%rbp), %rsi
callq GOMP_loop_runtime_next
- testb $1, %al
- jne .LBB5_4
-# BB#2: # %omp.exit
+ testb %al, %al
+ jne .LBB5_2
+# BB#4: # %omp.exit
callq GOMP_loop_end_nowait
- addq $40, %rsp
+ addq $72, %rsp
popq %rbx
popq %r12
popq %r13
@@ -606,11 +734,9 @@ main.omp_subfn1: # @main.omp_subfn1
popq %r15
popq %rbp
ret
-.Ltmp55:
- .size main.omp_subfn1, .Ltmp55-main.omp_subfn1
-.Ltmp56:
+.Ltmp66:
+ .size main.omp_subfn1, .Ltmp66-main.omp_subfn1
.cfi_endproc
-.Leh_func_end5:
.type A,@object # @A
.comm A,9437184,16
diff --git a/polly/www/experiments/matmul/matmul.polly.interchanged+tiled+vector.exe b/polly/www/experiments/matmul/matmul.polly.interchanged+tiled+vector.exe
index fac17e21685..36b788ea9ac 100755
--- a/polly/www/experiments/matmul/matmul.polly.interchanged+tiled+vector.exe
+++ b/polly/www/experiments/matmul/matmul.polly.interchanged+tiled+vector.exe
Binary files differ
diff --git a/polly/www/experiments/matmul/matmul.polly.interchanged+tiled+vector.ll b/polly/www/experiments/matmul/matmul.polly.interchanged+tiled+vector.ll
index 7217bc92c80..9d1f9ad098f 100644
--- a/polly/www/experiments/matmul/matmul.polly.interchanged+tiled+vector.ll
+++ b/polly/www/experiments/matmul/matmul.polly.interchanged+tiled+vector.ll
Binary files differ
diff --git a/polly/www/experiments/matmul/matmul.polly.interchanged+tiled+vector.s b/polly/www/experiments/matmul/matmul.polly.interchanged+tiled+vector.s
index a1d6f0bf9b0..485d230bc39 100644
--- a/polly/www/experiments/matmul/matmul.polly.interchanged+tiled+vector.s
+++ b/polly/www/experiments/matmul/matmul.polly.interchanged+tiled+vector.s
@@ -2,76 +2,112 @@
.section .rodata.cst8,"aM",@progbits,8
.align 8
.LCPI0_0:
- .quad 4602678819172646912 # double 5.000000e-01
+ .quad 4602678819172646912 # double 0.5
.text
.globl init_array
.align 16, 0x90
.type init_array,@function
init_array: # @init_array
-# BB#0: # %pollyBB
- xorl %eax, %eax
- movsd .LCPI0_0(%rip), %xmm0
- movq %rax, %rcx
+ .cfi_startproc
+# BB#0: # %entry
+ pushq %rbp
+.Ltmp2:
+ .cfi_def_cfa_offset 16
+.Ltmp3:
+ .cfi_offset %rbp, -16
+ movq %rsp, %rbp
+.Ltmp4:
+ .cfi_def_cfa_register %rbp
+ xorl %r8d, %r8d
+ vmovsd .LCPI0_0(%rip), %xmm0
.align 16, 0x90
-.LBB0_2: # %polly.loop_header1.preheader
+.LBB0_1: # %polly.loop_preheader3
# =>This Loop Header: Depth=1
- # Child Loop BB0_3 Depth 2
- movq $-1536, %rdx # imm = 0xFFFFFFFFFFFFFA00
- xorl %esi, %esi
+ # Child Loop BB0_2 Depth 2
+ xorl %ecx, %ecx
.align 16, 0x90
-.LBB0_3: # %polly.loop_body2
- # Parent Loop BB0_2 Depth=1
+.LBB0_2: # %polly.loop_header2
+ # Parent Loop BB0_1 Depth=1
# => This Inner Loop Header: Depth=2
- movl %esi, %edi
- sarl $31, %edi
- shrl $22, %edi
- addl %esi, %edi
- andl $-1024, %edi # imm = 0xFFFFFFFFFFFFFC00
- negl %edi
- leal 1(%rsi,%rdi), %edi
- cvtsi2sd %edi, %xmm1
- mulsd %xmm0, %xmm1
- cvtsd2ss %xmm1, %xmm1
- movss %xmm1, A+6144(%rax,%rdx,4)
- movss %xmm1, B+6144(%rax,%rdx,4)
- addl %ecx, %esi
- incq %rdx
- jne .LBB0_3
-# BB#1: # %polly.loop_header.loopexit
- # in Loop: Header=BB0_2 Depth=1
- addq $6144, %rax # imm = 0x1800
- incq %rcx
- cmpq $1536, %rcx # imm = 0x600
+ movl %ecx, %edx
+ imull %r8d, %edx
+ movl %edx, %esi
+ sarl $31, %esi
+ shrl $22, %esi
+ addl %edx, %esi
+ andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
+ negl %esi
+ movq %r8, %rax
+ shlq $11, %rax
+ leal 1(%rdx,%rsi), %edi
+ leaq (%rax,%rax,2), %rsi
+ leaq 1(%rcx), %rdx
+ cmpq $1536, %rdx # imm = 0x600
+ vcvtsi2sdl %edi, %xmm0, %xmm1
+ vmulsd %xmm0, %xmm1, %xmm1
+ vcvtsd2ss %xmm1, %xmm1, %xmm1
+ vmovss %xmm1, A(%rsi,%rcx,4)
+ vmovss %xmm1, B(%rsi,%rcx,4)
+ movq %rdx, %rcx
jne .LBB0_2
-# BB#4: # %polly.after_loop
+# BB#3: # %polly.loop_exit4
+ # in Loop: Header=BB0_1 Depth=1
+ incq %r8
+ cmpq $1536, %r8 # imm = 0x600
+ jne .LBB0_1
+# BB#4: # %polly.loop_exit
+ popq %rbp
ret
-.Ltmp0:
- .size init_array, .Ltmp0-init_array
+.Ltmp5:
+ .size init_array, .Ltmp5-init_array
+ .cfi_endproc
.globl print_array
.align 16, 0x90
.type print_array,@function
print_array: # @print_array
-# BB#0:
+ .cfi_startproc
+# BB#0: # %entry
+ pushq %rbp
+.Ltmp9:
+ .cfi_def_cfa_offset 16
+.Ltmp10:
+ .cfi_offset %rbp, -16
+ movq %rsp, %rbp
+.Ltmp11:
+ .cfi_def_cfa_register %rbp
+ pushq %r15
pushq %r14
+ pushq %r12
pushq %rbx
- pushq %rax
- movq $-9437184, %rbx # imm = 0xFFFFFFFFFF700000
+.Ltmp12:
+ .cfi_offset %rbx, -48
+.Ltmp13:
+ .cfi_offset %r12, -40
+.Ltmp14:
+ .cfi_offset %r14, -32
+.Ltmp15:
+ .cfi_offset %r15, -24
+ xorl %r14d, %r14d
+ movl $C, %r15d
.align 16, 0x90
-.LBB1_1: # %.preheader
+.LBB1_1: # %for.cond1.preheader
# =>This Loop Header: Depth=1
# Child Loop BB1_2 Depth 2
- xorl %r14d, %r14d
- movq stdout(%rip), %rdi
+ movq stdout(%rip), %rax
+ movq %r15, %r12
+ xorl %ebx, %ebx
.align 16, 0x90
-.LBB1_2: # Parent Loop BB1_1 Depth=1
+.LBB1_2: # %for.body3
+ # Parent Loop BB1_1 Depth=1
# => This Inner Loop Header: Depth=2
- movss C+9437184(%rbx,%r14,4), %xmm0
- cvtss2sd %xmm0, %xmm0
+ vmovss (%r12), %xmm0
+ vcvtss2sd %xmm0, %xmm0, %xmm0
+ movq %rax, %rdi
movl $.L.str, %esi
movb $1, %al
callq fprintf
- movslq %r14d, %rax
+ movslq %ebx, %rax
imulq $1717986919, %rax, %rcx # imm = 0x66666667
movq %rcx, %rdx
shrq $63, %rdx
@@ -81,217 +117,258 @@ print_array: # @print_array
subl %ecx, %eax
cmpl $79, %eax
jne .LBB1_4
-# BB#3: # in Loop: Header=BB1_2 Depth=2
+# BB#3: # %if.then
+ # in Loop: Header=BB1_2 Depth=2
movq stdout(%rip), %rsi
movl $10, %edi
callq fputc
-.LBB1_4: # in Loop: Header=BB1_2 Depth=2
- incq %r14
- movq stdout(%rip), %rsi
- cmpq $1536, %r14 # imm = 0x600
- movq %rsi, %rdi
+.LBB1_4: # %for.inc
+ # in Loop: Header=BB1_2 Depth=2
+ addq $4, %r12
+ incq %rbx
+ movq stdout(%rip), %rax
+ cmpq $1536, %rbx # imm = 0x600
jne .LBB1_2
-# BB#5: # in Loop: Header=BB1_1 Depth=1
+# BB#5: # %for.end
+ # in Loop: Header=BB1_1 Depth=1
movl $10, %edi
+ movq %rax, %rsi
callq fputc
- addq $6144, %rbx # imm = 0x1800
+ addq $6144, %r15 # imm = 0x1800
+ incq %r14
+ cmpq $1536, %r14 # imm = 0x600
jne .LBB1_1
-# BB#6:
- addq $8, %rsp
+# BB#6: # %for.end12
popq %rbx
+ popq %r12
popq %r14
+ popq %r15
+ popq %rbp
ret
-.Ltmp1:
- .size print_array, .Ltmp1-print_array
+.Ltmp16:
+ .size print_array, .Ltmp16-print_array
+ .cfi_endproc
.section .rodata.cst8,"aM",@progbits,8
.align 8
.LCPI2_0:
- .quad 4602678819172646912 # double 5.000000e-01
+ .quad 4602678819172646912 # double 0.5
.text
.globl main
.align 16, 0x90
.type main,@function
main: # @main
-# BB#0: # %pollyBB
+ .cfi_startproc
+# BB#0: # %entry
pushq %rbp
+.Ltmp20:
+ .cfi_def_cfa_offset 16
+.Ltmp21:
+ .cfi_offset %rbp, -16
+ movq %rsp, %rbp
+.Ltmp22:
+ .cfi_def_cfa_register %rbp
pushq %r15
pushq %r14
pushq %r13
pushq %r12
pushq %rbx
- subq $24, %rsp
- xorl %eax, %eax
- movsd .LCPI2_0(%rip), %xmm0
- movq %rax, %rcx
+ subq $56, %rsp
+.Ltmp23:
+ .cfi_offset %rbx, -56
+.Ltmp24:
+ .cfi_offset %r12, -48
+.Ltmp25:
+ .cfi_offset %r13, -40
+.Ltmp26:
+ .cfi_offset %r14, -32
+.Ltmp27:
+ .cfi_offset %r15, -24
+ xorl %ebx, %ebx
+ vmovsd .LCPI2_0(%rip), %xmm0
.align 16, 0x90
-.LBB2_1: # %polly.loop_header1.preheader.i
+.LBB2_1: # %polly.loop_preheader3.i
# =>This Loop Header: Depth=1
# Child Loop BB2_2 Depth 2
- movq $-1536, %rdx # imm = 0xFFFFFFFFFFFFFA00
- xorl %esi, %esi
+ xorl %ecx, %ecx
.align 16, 0x90
-.LBB2_2: # %polly.loop_body2.i
+.LBB2_2: # %polly.loop_header2.i
# Parent Loop BB2_1 Depth=1
# => This Inner Loop Header: Depth=2
- movl %esi, %edi
- sarl $31, %edi
- shrl $22, %edi
- addl %esi, %edi
- andl $-1024, %edi # imm = 0xFFFFFFFFFFFFFC00
- negl %edi
- leal 1(%rsi,%rdi), %edi
- cvtsi2sd %edi, %xmm1
- mulsd %xmm0, %xmm1
- cvtsd2ss %xmm1, %xmm1
- movss %xmm1, A+6144(%rax,%rdx,4)
- movss %xmm1, B+6144(%rax,%rdx,4)
- addl %ecx, %esi
- incq %rdx
+ movl %ecx, %edx
+ imull %ebx, %edx
+ movl %edx, %esi
+ sarl $31, %esi
+ shrl $22, %esi
+ addl %edx, %esi
+ andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
+ negl %esi
+ movq %rbx, %rax
+ shlq $11, %rax
+ leal 1(%rdx,%rsi), %edi
+ leaq (%rax,%rax,2), %rsi
+ leaq 1(%rcx), %rdx
+ cmpq $1536, %rdx # imm = 0x600
+ vcvtsi2sdl %edi, %xmm0, %xmm1
+ vmulsd %xmm0, %xmm1, %xmm1
+ vcvtsd2ss %xmm1, %xmm1, %xmm1
+ vmovss %xmm1, A(%rsi,%rcx,4)
+ vmovss %xmm1, B(%rsi,%rcx,4)
+ movq %rdx, %rcx
jne .LBB2_2
-# BB#3: # %polly.loop_header.loopexit.i
+# BB#3: # %polly.loop_exit4.i
# in Loop: Header=BB2_1 Depth=1
- addq $6144, %rax # imm = 0x1800
- incq %rcx
- cmpq $1536, %rcx # imm = 0x600
+ incq %rbx
+ cmpq $1536, %rbx # imm = 0x600
jne .LBB2_1
-# BB#4: # %polly.loop_header.preheader
+# BB#4: # %polly.loop_preheader3.preheader
movl $C, %edi
xorl %esi, %esi
movl $9437184, %edx # imm = 0x900000
callq memset
- xorl %eax, %eax
- movq %rax, 16(%rsp) # 8-byte Spill
- movq %rax, (%rsp) # 8-byte Spill
- jmp .LBB2_6
+ xorl %esi, %esi
+ movl $C+16, %eax
+ movq %rax, -88(%rbp) # 8-byte Spill
.align 16, 0x90
-.LBB2_5: # %polly.loop_header7.loopexit
- # in Loop: Header=BB2_6 Depth=1
- addq $393216, (%rsp) # 8-byte Folded Spill
- # imm = 0x60000
- movq 16(%rsp), %rax # 8-byte Reload
- addq $64, %rax
- movq %rax, 16(%rsp) # 8-byte Spill
- cmpq $1536, %rax # imm = 0x600
- je .LBB2_7
-.LBB2_6: # %polly.loop_header12.preheader
+.LBB2_5: # %polly.loop_preheader17
# =>This Loop Header: Depth=1
- # Child Loop BB2_9 Depth 2
- # Child Loop BB2_11 Depth 3
- # Child Loop BB2_14 Depth 4
- # Child Loop BB2_18 Depth 5
- # Child Loop BB2_19 Depth 6
- movq 16(%rsp), %rax # 8-byte Reload
- leaq 63(%rax), %rax
- movq (%rsp), %rcx # 8-byte Reload
- leaq A(%rcx), %rdx
- movq %rdx, 8(%rsp) # 8-byte Spill
+ # Child Loop BB2_15 Depth 2
+ # Child Loop BB2_8 Depth 3
+ # Child Loop BB2_11 Depth 4
+ # Child Loop BB2_17 Depth 5
+ # Child Loop BB2_18 Depth 6
+ movq %rsi, -56(%rbp) # 8-byte Spill
+ movq %rsi, %rax
+ orq $63, %rax
+ movq %rax, -72(%rbp) # 8-byte Spill
+ leaq -1(%rax), %rax
+ movq %rax, -48(%rbp) # 8-byte Spill
xorl %edx, %edx
- jmp .LBB2_9
.align 16, 0x90
-.LBB2_8: # %polly.loop_header12.loopexit
- # in Loop: Header=BB2_9 Depth=2
- addq $256, %rcx # imm = 0x100
- addq $64, %rdx
- cmpq $1536, %rdx # imm = 0x600
- je .LBB2_5
-.LBB2_9: # %polly.loop_header17.preheader
- # Parent Loop BB2_6 Depth=1
+.LBB2_15: # %polly.loop_preheader24
+ # Parent Loop BB2_5 Depth=1
# => This Loop Header: Depth=2
- # Child Loop BB2_11 Depth 3
- # Child Loop BB2_14 Depth 4
- # Child Loop BB2_18 Depth 5
- # Child Loop BB2_19 Depth 6
- leaq 63(%rdx), %rsi
- xorl %edi, %edi
- movq 8(%rsp), %r8 # 8-byte Reload
- movq %rdx, %r9
- jmp .LBB2_11
+ # Child Loop BB2_8 Depth 3
+ # Child Loop BB2_11 Depth 4
+ # Child Loop BB2_17 Depth 5
+ # Child Loop BB2_18 Depth 6
+ movq %rdx, -80(%rbp) # 8-byte Spill
+ leaq -4(%rdx), %rcx
+ movq %rdx, %rax
+ decq %rax
+ cmovsq %rcx, %rax
+ movq %rax, %r15
+ sarq $63, %r15
+ shrq $62, %r15
+ addq %rax, %r15
+ andq $-4, %r15
+ movq %rdx, %r13
+ orq $63, %r13
+ leaq -4(%r13), %rdx
+ xorl %r10d, %r10d
+ movq -88(%rbp), %rax # 8-byte Reload
+ leaq (%rax,%r15,4), %rax
+ movq %rax, -64(%rbp) # 8-byte Spill
+ leaq B+16(,%r15,4), %rbx
+ leaq 4(%r15), %r12
.align 16, 0x90
-.LBB2_10: # %polly.loop_header17.loopexit
- # in Loop: Header=BB2_11 Depth=3
- addq $256, %r8 # imm = 0x100
- addq $98304, %r9 # imm = 0x18000
- addq $64, %rdi
- cmpq $1536, %rdi # imm = 0x600
- je .LBB2_8
-.LBB2_11: # %polly.loop_body18
- # Parent Loop BB2_6 Depth=1
- # Parent Loop BB2_9 Depth=2
+.LBB2_8: # %polly.loop_header23
+ # Parent Loop BB2_5 Depth=1
+ # Parent Loop BB2_15 Depth=2
# => This Loop Header: Depth=3
- # Child Loop BB2_14 Depth 4
- # Child Loop BB2_18 Depth 5
- # Child Loop BB2_19 Depth 6
- cmpq %rax, 16(%rsp) # 8-byte Folded Reload
- jg .LBB2_10
-# BB#12: # %polly.loop_body23.lr.ph
- # in Loop: Header=BB2_11 Depth=3
- leaq 63(%rdi), %r10
- xorl %r11d, %r11d
- jmp .LBB2_14
- .align 16, 0x90
-.LBB2_13: # %polly.loop_header22.loopexit
- # in Loop: Header=BB2_14 Depth=4
- addq $6144, %r11 # imm = 0x1800
- cmpq $393216, %r11 # imm = 0x60000
- je .LBB2_10
-.LBB2_14: # %polly.loop_body23
- # Parent Loop BB2_6 Depth=1
- # Parent Loop BB2_9 Depth=2
- # Parent Loop BB2_11 Depth=3
- # => This Loop Header: Depth=4
- # Child Loop BB2_18 Depth 5
- # Child Loop BB2_19 Depth 6
- cmpq %r10, %rdi
+ # Child Loop BB2_11 Depth 4
+ # Child Loop BB2_17 Depth 5
+ # Child Loop BB2_18 Depth 6
+ cmpq -72(%rbp), %rsi # 8-byte Folded Reload
jg .LBB2_13
-# BB#15: # %polly.loop_body23
- # in Loop: Header=BB2_14 Depth=4
- cmpq %rsi, %rdx
+# BB#9: # %polly.loop_header30.preheader
+ # in Loop: Header=BB2_8 Depth=3
+ movq %r10, %rax
+ orq $63, %rax
+ cmpq %rax, %r10
jg .LBB2_13
-# BB#16: # %polly.loop_body33.lr.ph.preheader
- # in Loop: Header=BB2_14 Depth=4
- leaq (%r8,%r11), %rbx
- xorl %r14d, %r14d
- movq %r9, %r15
- movq %r14, %r12
- jmp .LBB2_18
+# BB#10: # in Loop: Header=BB2_8 Depth=3
+ decq %rax
+ movq -64(%rbp), %r14 # 8-byte Reload
+ movq -56(%rbp), %r11 # 8-byte Reload
.align 16, 0x90
-.LBB2_17: # %polly.loop_header27.loopexit
- # in Loop: Header=BB2_18 Depth=5
- addq $1536, %r15 # imm = 0x600
- incq %r12
- cmpq $64, %r12
- je .LBB2_13
-.LBB2_18: # %polly.loop_body33.lr.ph
- # Parent Loop BB2_6 Depth=1
- # Parent Loop BB2_9 Depth=2
- # Parent Loop BB2_11 Depth=3
- # Parent Loop BB2_14 Depth=4
- # => This Loop Header: Depth=5
- # Child Loop BB2_19 Depth 6
- movss (%rbx,%r12,4), %xmm0
- pshufd $0, %xmm0, %xmm0 # xmm0 = xmm0[0,0,0,0]
- movq %r14, %r13
+.LBB2_11: # %polly.loop_header37.preheader
+ # Parent Loop BB2_5 Depth=1
+ # Parent Loop BB2_15 Depth=2
+ # Parent Loop BB2_8 Depth=3
+ # => This Loop Header: Depth=4
+ # Child Loop BB2_17 Depth 5
+ # Child Loop BB2_18 Depth 6
+ cmpq %r13, %r12
+ movq %rbx, %r8
+ movq %r10, %rsi
+ jg .LBB2_12
.align 16, 0x90
-.LBB2_19: # %polly.loop_body33
- # Parent Loop BB2_6 Depth=1
- # Parent Loop BB2_9 Depth=2
- # Parent Loop BB2_11 Depth=3
- # Parent Loop BB2_14 Depth=4
- # Parent Loop BB2_18 Depth=5
+.LBB2_17: # %polly.loop_header46.preheader
+ # Parent Loop BB2_5 Depth=1
+ # Parent Loop BB2_15 Depth=2
+ # Parent Loop BB2_8 Depth=3
+ # Parent Loop BB2_11 Depth=4
+ # => This Loop Header: Depth=5
+ # Child Loop BB2_18 Depth 6
+ leaq (%r11,%r11,2), %rcx
+ shlq $11, %rcx
+ vbroadcastss A(%rcx,%rsi,4), %xmm0
+ movq %r14, %rdi
+ movq %r8, %r9
+ movq %r15, %rcx
+.LBB2_18: # %polly.loop_header46
+ # Parent Loop BB2_5 Depth=1
+ # Parent Loop BB2_15 Depth=2
+ # Parent Loop BB2_8 Depth=3
+ # Parent Loop BB2_11 Depth=4
+ # Parent Loop BB2_17 Depth=5
# => This Inner Loop Header: Depth=6
- movaps B(%r13,%r15,4), %xmm1
- mulps %xmm0, %xmm1
- leaq (%r11,%r13), %rbp
- addps C(%rcx,%rbp), %xmm1
- movaps %xmm1, C(%rcx,%rbp)
- addq $16, %r13
- cmpq $256, %r13 # imm = 0x100
- jne .LBB2_19
- jmp .LBB2_17
-.LBB2_7: # %polly.after_loop9
+ vmulps (%r9), %xmm0, %xmm1
+ vaddps (%rdi), %xmm1, %xmm1
+ vmovaps %xmm1, (%rdi)
+ addq $16, %rdi
+ addq $16, %r9
+ addq $4, %rcx
+ cmpq %rdx, %rcx
+ jle .LBB2_18
+# BB#16: # %polly.loop_exit48
+ # in Loop: Header=BB2_17 Depth=5
+ addq $6144, %r8 # imm = 0x1800
+ cmpq %rax, %rsi
+ leaq 1(%rsi), %rsi
+ jle .LBB2_17
+ .align 16, 0x90
+.LBB2_12: # %polly.loop_exit39
+ # in Loop: Header=BB2_11 Depth=4
+ addq $6144, %r14 # imm = 0x1800
+ cmpq -48(%rbp), %r11 # 8-byte Folded Reload
+ leaq 1(%r11), %r11
+ jle .LBB2_11
+ .align 16, 0x90
+.LBB2_13: # %polly.loop_exit32
+ # in Loop: Header=BB2_8 Depth=3
+ addq $393216, %rbx # imm = 0x60000
+ cmpq $1472, %r10 # imm = 0x5C0
+ leaq 64(%r10), %r10
+ movq -56(%rbp), %rsi # 8-byte Reload
+ jl .LBB2_8
+# BB#14: # %polly.loop_exit25
+ # in Loop: Header=BB2_15 Depth=2
+ movq -80(%rbp), %rdx # 8-byte Reload
+ cmpq $1472, %rdx # imm = 0x5C0
+ leaq 64(%rdx), %rdx
+ jl .LBB2_15
+# BB#6: # %polly.loop_exit18
+ # in Loop: Header=BB2_5 Depth=1
+ addq $393216, -88(%rbp) # 8-byte Folded Spill
+ # imm = 0x60000
+ cmpq $1472, %rsi # imm = 0x5C0
+ leaq 64(%rsi), %rsi
+ jl .LBB2_5
+# BB#7: # %polly.loop_exit11
xorl %eax, %eax
- addq $24, %rsp
+ addq $56, %rsp
popq %rbx
popq %r12
popq %r13
@@ -299,8 +376,9 @@ main: # @main
popq %r15
popq %rbp
ret
-.Ltmp2:
- .size main, .Ltmp2-main
+.Ltmp28:
+ .size main, .Ltmp28-main
+ .cfi_endproc
.type A,@object # @A
.comm A,9437184,16
diff --git a/polly/www/experiments/matmul/matmul.polly.interchanged+tiled.exe b/polly/www/experiments/matmul/matmul.polly.interchanged+tiled.exe
index 4334522f458..fbd8b128fd8 100755
--- a/polly/www/experiments/matmul/matmul.polly.interchanged+tiled.exe
+++ b/polly/www/experiments/matmul/matmul.polly.interchanged+tiled.exe
Binary files differ
diff --git a/polly/www/experiments/matmul/matmul.polly.interchanged+tiled.ll b/polly/www/experiments/matmul/matmul.polly.interchanged+tiled.ll
index fa301cfa5eb..acdd95f3bc4 100644
--- a/polly/www/experiments/matmul/matmul.polly.interchanged+tiled.ll
+++ b/polly/www/experiments/matmul/matmul.polly.interchanged+tiled.ll
Binary files differ
diff --git a/polly/www/experiments/matmul/matmul.polly.interchanged+tiled.s b/polly/www/experiments/matmul/matmul.polly.interchanged+tiled.s
index 0f86df25d35..f7ab7fdd59c 100644
--- a/polly/www/experiments/matmul/matmul.polly.interchanged+tiled.s
+++ b/polly/www/experiments/matmul/matmul.polly.interchanged+tiled.s
@@ -2,76 +2,112 @@
.section .rodata.cst8,"aM",@progbits,8
.align 8
.LCPI0_0:
- .quad 4602678819172646912 # double 5.000000e-01
+ .quad 4602678819172646912 # double 0.5
.text
.globl init_array
.align 16, 0x90
.type init_array,@function
init_array: # @init_array
-# BB#0: # %pollyBB
- xorl %eax, %eax
- movsd .LCPI0_0(%rip), %xmm0
- movq %rax, %rcx
+ .cfi_startproc
+# BB#0: # %entry
+ pushq %rbp
+.Ltmp2:
+ .cfi_def_cfa_offset 16
+.Ltmp3:
+ .cfi_offset %rbp, -16
+ movq %rsp, %rbp
+.Ltmp4:
+ .cfi_def_cfa_register %rbp
+ xorl %r8d, %r8d
+ vmovsd .LCPI0_0(%rip), %xmm0
.align 16, 0x90
-.LBB0_2: # %polly.loop_header1.preheader
+.LBB0_1: # %polly.loop_preheader3
# =>This Loop Header: Depth=1
- # Child Loop BB0_3 Depth 2
- movq $-1536, %rdx # imm = 0xFFFFFFFFFFFFFA00
- xorl %esi, %esi
+ # Child Loop BB0_2 Depth 2
+ xorl %ecx, %ecx
.align 16, 0x90
-.LBB0_3: # %polly.loop_body2
- # Parent Loop BB0_2 Depth=1
+.LBB0_2: # %polly.loop_header2
+ # Parent Loop BB0_1 Depth=1
# => This Inner Loop Header: Depth=2
- movl %esi, %edi
- sarl $31, %edi
- shrl $22, %edi
- addl %esi, %edi
- andl $-1024, %edi # imm = 0xFFFFFFFFFFFFFC00
- negl %edi
- leal 1(%rsi,%rdi), %edi
- cvtsi2sd %edi, %xmm1
- mulsd %xmm0, %xmm1
- cvtsd2ss %xmm1, %xmm1
- movss %xmm1, A+6144(%rax,%rdx,4)
- movss %xmm1, B+6144(%rax,%rdx,4)
- addl %ecx, %esi
- incq %rdx
- jne .LBB0_3
-# BB#1: # %polly.loop_header.loopexit
- # in Loop: Header=BB0_2 Depth=1
- addq $6144, %rax # imm = 0x1800
- incq %rcx
- cmpq $1536, %rcx # imm = 0x600
+ movl %ecx, %edx
+ imull %r8d, %edx
+ movl %edx, %esi
+ sarl $31, %esi
+ shrl $22, %esi
+ addl %edx, %esi
+ andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
+ negl %esi
+ movq %r8, %rax
+ shlq $11, %rax
+ leal 1(%rdx,%rsi), %edi
+ leaq (%rax,%rax,2), %rsi
+ leaq 1(%rcx), %rdx
+ cmpq $1536, %rdx # imm = 0x600
+ vcvtsi2sdl %edi, %xmm0, %xmm1
+ vmulsd %xmm0, %xmm1, %xmm1
+ vcvtsd2ss %xmm1, %xmm1, %xmm1
+ vmovss %xmm1, A(%rsi,%rcx,4)
+ vmovss %xmm1, B(%rsi,%rcx,4)
+ movq %rdx, %rcx
jne .LBB0_2
-# BB#4: # %polly.after_loop
+# BB#3: # %polly.loop_exit4
+ # in Loop: Header=BB0_1 Depth=1
+ incq %r8
+ cmpq $1536, %r8 # imm = 0x600
+ jne .LBB0_1
+# BB#4: # %polly.loop_exit
+ popq %rbp
ret
-.Ltmp0:
- .size init_array, .Ltmp0-init_array
+.Ltmp5:
+ .size init_array, .Ltmp5-init_array
+ .cfi_endproc
.globl print_array
.align 16, 0x90
.type print_array,@function
print_array: # @print_array
-# BB#0:
+ .cfi_startproc
+# BB#0: # %entry
+ pushq %rbp
+.Ltmp9:
+ .cfi_def_cfa_offset 16
+.Ltmp10:
+ .cfi_offset %rbp, -16
+ movq %rsp, %rbp
+.Ltmp11:
+ .cfi_def_cfa_register %rbp
+ pushq %r15
pushq %r14
+ pushq %r12
pushq %rbx
- pushq %rax
- movq $-9437184, %rbx # imm = 0xFFFFFFFFFF700000
+.Ltmp12:
+ .cfi_offset %rbx, -48
+.Ltmp13:
+ .cfi_offset %r12, -40
+.Ltmp14:
+ .cfi_offset %r14, -32
+.Ltmp15:
+ .cfi_offset %r15, -24
+ xorl %r14d, %r14d
+ movl $C, %r15d
.align 16, 0x90
-.LBB1_1: # %.preheader
+.LBB1_1: # %for.cond1.preheader
# =>This Loop Header: Depth=1
# Child Loop BB1_2 Depth 2
- xorl %r14d, %r14d
- movq stdout(%rip), %rdi
+ movq stdout(%rip), %rax
+ movq %r15, %r12
+ xorl %ebx, %ebx
.align 16, 0x90
-.LBB1_2: # Parent Loop BB1_1 Depth=1
+.LBB1_2: # %for.body3
+ # Parent Loop BB1_1 Depth=1
# => This Inner Loop Header: Depth=2
- movss C+9437184(%rbx,%r14,4), %xmm0
- cvtss2sd %xmm0, %xmm0
+ vmovss (%r12), %xmm0
+ vcvtss2sd %xmm0, %xmm0, %xmm0
+ movq %rax, %rdi
movl $.L.str, %esi
movb $1, %al
callq fprintf
- movslq %r14d, %rax
+ movslq %ebx, %rax
imulq $1717986919, %rax, %rcx # imm = 0x66666667
movq %rcx, %rdx
shrq $63, %rdx
@@ -81,222 +117,252 @@ print_array: # @print_array
subl %ecx, %eax
cmpl $79, %eax
jne .LBB1_4
-# BB#3: # in Loop: Header=BB1_2 Depth=2
+# BB#3: # %if.then
+ # in Loop: Header=BB1_2 Depth=2
movq stdout(%rip), %rsi
movl $10, %edi
callq fputc
-.LBB1_4: # in Loop: Header=BB1_2 Depth=2
- incq %r14
- movq stdout(%rip), %rsi
- cmpq $1536, %r14 # imm = 0x600
- movq %rsi, %rdi
+.LBB1_4: # %for.inc
+ # in Loop: Header=BB1_2 Depth=2
+ addq $4, %r12
+ incq %rbx
+ movq stdout(%rip), %rax
+ cmpq $1536, %rbx # imm = 0x600
jne .LBB1_2
-# BB#5: # in Loop: Header=BB1_1 Depth=1
+# BB#5: # %for.end
+ # in Loop: Header=BB1_1 Depth=1
movl $10, %edi
+ movq %rax, %rsi
callq fputc
- addq $6144, %rbx # imm = 0x1800
+ addq $6144, %r15 # imm = 0x1800
+ incq %r14
+ cmpq $1536, %r14 # imm = 0x600
jne .LBB1_1
-# BB#6:
- addq $8, %rsp
+# BB#6: # %for.end12
popq %rbx
+ popq %r12
popq %r14
+ popq %r15
+ popq %rbp
ret
-.Ltmp1:
- .size print_array, .Ltmp1-print_array
+.Ltmp16:
+ .size print_array, .Ltmp16-print_array
+ .cfi_endproc
.section .rodata.cst8,"aM",@progbits,8
.align 8
.LCPI2_0:
- .quad 4602678819172646912 # double 5.000000e-01
+ .quad 4602678819172646912 # double 0.5
.text
.globl main
.align 16, 0x90
.type main,@function
main: # @main
-# BB#0: # %pollyBB
+ .cfi_startproc
+# BB#0: # %entry
pushq %rbp
+.Ltmp20:
+ .cfi_def_cfa_offset 16
+.Ltmp21:
+ .cfi_offset %rbp, -16
+ movq %rsp, %rbp
+.Ltmp22:
+ .cfi_def_cfa_register %rbp
pushq %r15
pushq %r14
pushq %r13
pushq %r12
pushq %rbx
- subq $40, %rsp
- xorl %eax, %eax
- movsd .LCPI2_0(%rip), %xmm0
- movq %rax, %rcx
+ subq $56, %rsp
+.Ltmp23:
+ .cfi_offset %rbx, -56
+.Ltmp24:
+ .cfi_offset %r12, -48
+.Ltmp25:
+ .cfi_offset %r13, -40
+.Ltmp26:
+ .cfi_offset %r14, -32
+.Ltmp27:
+ .cfi_offset %r15, -24
+ xorl %ebx, %ebx
+ vmovsd .LCPI2_0(%rip), %xmm0
.align 16, 0x90
-.LBB2_1: # %polly.loop_header1.preheader.i
+.LBB2_1: # %polly.loop_preheader3.i
# =>This Loop Header: Depth=1
# Child Loop BB2_2 Depth 2
- movq $-1536, %rdx # imm = 0xFFFFFFFFFFFFFA00
- xorl %esi, %esi
+ xorl %ecx, %ecx
.align 16, 0x90
-.LBB2_2: # %polly.loop_body2.i
+.LBB2_2: # %polly.loop_header2.i
# Parent Loop BB2_1 Depth=1
# => This Inner Loop Header: Depth=2
- movl %esi, %edi
- sarl $31, %edi
- shrl $22, %edi
- addl %esi, %edi
- andl $-1024, %edi # imm = 0xFFFFFFFFFFFFFC00
- negl %edi
- leal 1(%rsi,%rdi), %edi
- cvtsi2sd %edi, %xmm1
- mulsd %xmm0, %xmm1
- cvtsd2ss %xmm1, %xmm1
- movss %xmm1, A+6144(%rax,%rdx,4)
- movss %xmm1, B+6144(%rax,%rdx,4)
- addl %ecx, %esi
- incq %rdx
+ movl %ecx, %edx
+ imull %ebx, %edx
+ movl %edx, %esi
+ sarl $31, %esi
+ shrl $22, %esi
+ addl %edx, %esi
+ andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
+ negl %esi
+ movq %rbx, %rax
+ shlq $11, %rax
+ leal 1(%rdx,%rsi), %edi
+ leaq (%rax,%rax,2), %rsi
+ leaq 1(%rcx), %rdx
+ cmpq $1536, %rdx # imm = 0x600
+ vcvtsi2sdl %edi, %xmm0, %xmm1
+ vmulsd %xmm0, %xmm1, %xmm1
+ vcvtsd2ss %xmm1, %xmm1, %xmm1
+ vmovss %xmm1, A(%rsi,%rcx,4)
+ vmovss %xmm1, B(%rsi,%rcx,4)
+ movq %rdx, %rcx
jne .LBB2_2
-# BB#3: # %polly.loop_header.loopexit.i
+# BB#3: # %polly.loop_exit4.i
# in Loop: Header=BB2_1 Depth=1
- addq $6144, %rax # imm = 0x1800
- incq %rcx
- cmpq $1536, %rcx # imm = 0x600
+ incq %rbx
+ cmpq $1536, %rbx # imm = 0x600
jne .LBB2_1
-# BB#4: # %polly.loop_header.preheader
- movl $C, %eax
- movq %rax, 8(%rsp) # 8-byte Spill
+# BB#4: # %polly.loop_preheader3.preheader
+ movl $C, %ebx
+ movl $C, %edi
xorl %esi, %esi
movl $9437184, %edx # imm = 0x900000
- movl $C, %edi
callq memset
- movl $A, %eax
- movq %rax, 16(%rsp) # 8-byte Spill
- movq $0, 32(%rsp) # 8-byte Folded Spill
- jmp .LBB2_6
+ xorl %eax, %eax
.align 16, 0x90
-.LBB2_5: # %polly.loop_header7.loopexit
- # in Loop: Header=BB2_6 Depth=1
- addq $393216, 16(%rsp) # 8-byte Folded Spill
- # imm = 0x60000
- addq $393216, 8(%rsp) # 8-byte Folded Spill
- # imm = 0x60000
- movq 32(%rsp), %rax # 8-byte Reload
- addq $64, %rax
- movq %rax, 32(%rsp) # 8-byte Spill
- cmpq $1536, %rax # imm = 0x600
- je .LBB2_7
-.LBB2_6: # %polly.loop_header12.preheader
+.LBB2_5: # %polly.loop_preheader17
# =>This Loop Header: Depth=1
- # Child Loop BB2_9 Depth 2
- # Child Loop BB2_11 Depth 3
- # Child Loop BB2_14 Depth 4
- # Child Loop BB2_18 Depth 5
- # Child Loop BB2_19 Depth 6
- movq 32(%rsp), %rax # 8-byte Reload
- leaq 63(%rax), %rax
+ # Child Loop BB2_15 Depth 2
+ # Child Loop BB2_8 Depth 3
+ # Child Loop BB2_11 Depth 4
+ # Child Loop BB2_17 Depth 5
+ # Child Loop BB2_18 Depth 6
+ movq %rax, -56(%rbp) # 8-byte Spill
+ movq %rbx, -88(%rbp) # 8-byte Spill
+ movq %rax, %rcx
+ orq $63, %rcx
+ movq %rcx, -72(%rbp) # 8-byte Spill
+ leaq -1(%rcx), %rcx
+ movq %rcx, -48(%rbp) # 8-byte Spill
+ movq $-1, %r15
movl $B, %ecx
- movq %rcx, 24(%rsp) # 8-byte Spill
- xorl %ecx, %ecx
- movq 8(%rsp), %rdx # 8-byte Reload
- jmp .LBB2_9
+ movq %rbx, -64(%rbp) # 8-byte Spill
+ xorl %r12d, %r12d
.align 16, 0x90
-.LBB2_8: # %polly.loop_header12.loopexit
- # in Loop: Header=BB2_9 Depth=2
- addq $256, %rdx # imm = 0x100
- addq $256, 24(%rsp) # 8-byte Folded Spill
- # imm = 0x100
- addq $64, %rcx
- cmpq $1536, %rcx # imm = 0x600
- je .LBB2_5
-.LBB2_9: # %polly.loop_header17.preheader
- # Parent Loop BB2_6 Depth=1
+.LBB2_15: # %polly.loop_preheader24
+ # Parent Loop BB2_5 Depth=1
# => This Loop Header: Depth=2
- # Child Loop BB2_11 Depth 3
- # Child Loop BB2_14 Depth 4
- # Child Loop BB2_18 Depth 5
- # Child Loop BB2_19 Depth 6
- leaq 63(%rcx), %rsi
- xorl %edi, %edi
- movq 16(%rsp), %r8 # 8-byte Reload
- movq 24(%rsp), %r9 # 8-byte Reload
- jmp .LBB2_11
+ # Child Loop BB2_8 Depth 3
+ # Child Loop BB2_11 Depth 4
+ # Child Loop BB2_17 Depth 5
+ # Child Loop BB2_18 Depth 6
+ movq %rcx, -80(%rbp) # 8-byte Spill
+ movq %r12, %r13
+ orq $63, %r13
+ leaq -1(%r13), %rbx
+ xorl %r9d, %r9d
+ movq %rcx, %rdx
.align 16, 0x90
-.LBB2_10: # %polly.loop_header17.loopexit
- # in Loop: Header=BB2_11 Depth=3
- addq $256, %r8 # imm = 0x100
- addq $393216, %r9 # imm = 0x60000
- addq $64, %rdi
- cmpq $1536, %rdi # imm = 0x600
- je .LBB2_8
-.LBB2_11: # %polly.loop_body18
- # Parent Loop BB2_6 Depth=1
- # Parent Loop BB2_9 Depth=2
+.LBB2_8: # %polly.loop_header23
+ # Parent Loop BB2_5 Depth=1
+ # Parent Loop BB2_15 Depth=2
# => This Loop Header: Depth=3
- # Child Loop BB2_14 Depth 4
- # Child Loop BB2_18 Depth 5
- # Child Loop BB2_19 Depth 6
- cmpq %rax, 32(%rsp) # 8-byte Folded Reload
- jg .LBB2_10
-# BB#12: # %polly.loop_body23.lr.ph
- # in Loop: Header=BB2_11 Depth=3
- leaq 63(%rdi), %r10
- xorl %r11d, %r11d
- jmp .LBB2_14
- .align 16, 0x90
-.LBB2_13: # %polly.loop_header22.loopexit
- # in Loop: Header=BB2_14 Depth=4
- addq $6144, %r11 # imm = 0x1800
- cmpq $393216, %r11 # imm = 0x60000
- je .LBB2_10
-.LBB2_14: # %polly.loop_body23
- # Parent Loop BB2_6 Depth=1
- # Parent Loop BB2_9 Depth=2
- # Parent Loop BB2_11 Depth=3
- # => This Loop Header: Depth=4
- # Child Loop BB2_18 Depth 5
- # Child Loop BB2_19 Depth 6
- cmpq %r10, %rdi
+ # Child Loop BB2_11 Depth 4
+ # Child Loop BB2_17 Depth 5
+ # Child Loop BB2_18 Depth 6
+ cmpq -72(%rbp), %rax # 8-byte Folded Reload
jg .LBB2_13
-# BB#15: # %polly.loop_body23
- # in Loop: Header=BB2_14 Depth=4
- cmpq %rsi, %rcx
+# BB#9: # %polly.loop_header30.preheader
+ # in Loop: Header=BB2_8 Depth=3
+ movq %r9, %rax
+ orq $63, %rax
+ cmpq %rax, %r9
jg .LBB2_13
-# BB#16: # %polly.loop_body33.lr.ph.preheader
- # in Loop: Header=BB2_14 Depth=4
- leaq (%rdx,%r11), %rbx
- leaq (%r8,%r11), %r14
- xorl %r15d, %r15d
- movq %r9, %r12
- movq %r15, %r13
- jmp .LBB2_18
+# BB#10: # in Loop: Header=BB2_8 Depth=3
+ decq %rax
+ movq -64(%rbp), %r10 # 8-byte Reload
+ movq -56(%rbp), %r11 # 8-byte Reload
.align 16, 0x90
-.LBB2_17: # %polly.loop_header27.loopexit
- # in Loop: Header=BB2_18 Depth=5
- addq $6144, %r12 # imm = 0x1800
- incq %r13
- cmpq $64, %r13
- je .LBB2_13
-.LBB2_18: # %polly.loop_body33.lr.ph
- # Parent Loop BB2_6 Depth=1
- # Parent Loop BB2_9 Depth=2
- # Parent Loop BB2_11 Depth=3
- # Parent Loop BB2_14 Depth=4
- # => This Loop Header: Depth=5
- # Child Loop BB2_19 Depth 6
- movss (%r14,%r13,4), %xmm0
- movq %r15, %rbp
+.LBB2_11: # %polly.loop_header37.preheader
+ # Parent Loop BB2_5 Depth=1
+ # Parent Loop BB2_15 Depth=2
+ # Parent Loop BB2_8 Depth=3
+ # => This Loop Header: Depth=4
+ # Child Loop BB2_17 Depth 5
+ # Child Loop BB2_18 Depth 6
+ cmpq %r13, %r12
+ movq %rdx, %r14
+ movq %r9, %rcx
+ jg .LBB2_12
.align 16, 0x90
-.LBB2_19: # %polly.loop_body33
- # Parent Loop BB2_6 Depth=1
- # Parent Loop BB2_9 Depth=2
- # Parent Loop BB2_11 Depth=3
- # Parent Loop BB2_14 Depth=4
- # Parent Loop BB2_18 Depth=5
+.LBB2_17: # %polly.loop_header46.preheader
+ # Parent Loop BB2_5 Depth=1
+ # Parent Loop BB2_15 Depth=2
+ # Parent Loop BB2_8 Depth=3
+ # Parent Loop BB2_11 Depth=4
+ # => This Loop Header: Depth=5
+ # Child Loop BB2_18 Depth 6
+ leaq (%r11,%r11,2), %rsi
+ shlq $11, %rsi
+ vmovss A(%rsi,%rcx,4), %xmm0
+ movq %r10, %rdi
+ movq %r14, %r8
+ movq %r15, %rsi
+.LBB2_18: # %polly.loop_header46
+ # Parent Loop BB2_5 Depth=1
+ # Parent Loop BB2_15 Depth=2
+ # Parent Loop BB2_8 Depth=3
+ # Parent Loop BB2_11 Depth=4
+ # Parent Loop BB2_17 Depth=5
# => This Inner Loop Header: Depth=6
- movss (%r12,%rbp,4), %xmm1
- mulss %xmm0, %xmm1
- addss (%rbx,%rbp,4), %xmm1
- movss %xmm1, (%rbx,%rbp,4)
- incq %rbp
- cmpq $64, %rbp
- jne .LBB2_19
- jmp .LBB2_17
-.LBB2_7: # %polly.after_loop9
+ vmulss (%r8), %xmm0, %xmm1
+ vaddss (%rdi), %xmm1, %xmm1
+ vmovss %xmm1, (%rdi)
+ addq $4, %rdi
+ addq $4, %r8
+ incq %rsi
+ cmpq %rbx, %rsi
+ jle .LBB2_18
+# BB#16: # %polly.loop_exit48
+ # in Loop: Header=BB2_17 Depth=5
+ addq $6144, %r14 # imm = 0x1800
+ cmpq %rax, %rcx
+ leaq 1(%rcx), %rcx
+ jle .LBB2_17
+ .align 16, 0x90
+.LBB2_12: # %polly.loop_exit39
+ # in Loop: Header=BB2_11 Depth=4
+ addq $6144, %r10 # imm = 0x1800
+ cmpq -48(%rbp), %r11 # 8-byte Folded Reload
+ leaq 1(%r11), %r11
+ jle .LBB2_11
+ .align 16, 0x90
+.LBB2_13: # %polly.loop_exit32
+ # in Loop: Header=BB2_8 Depth=3
+ addq $393216, %rdx # imm = 0x60000
+ cmpq $1472, %r9 # imm = 0x5C0
+ leaq 64(%r9), %r9
+ movq -56(%rbp), %rax # 8-byte Reload
+ jl .LBB2_8
+# BB#14: # %polly.loop_exit25
+ # in Loop: Header=BB2_15 Depth=2
+ addq $256, -64(%rbp) # 8-byte Folded Spill
+ # imm = 0x100
+ movq -80(%rbp), %rcx # 8-byte Reload
+ addq $256, %rcx # imm = 0x100
+ addq $64, %r15
+ cmpq $1472, %r12 # imm = 0x5C0
+ leaq 64(%r12), %r12
+ jl .LBB2_15
+# BB#6: # %polly.loop_exit18
+ # in Loop: Header=BB2_5 Depth=1
+ movq -88(%rbp), %rbx # 8-byte Reload
+ addq $393216, %rbx # imm = 0x60000
+ cmpq $1472, %rax # imm = 0x5C0
+ leaq 64(%rax), %rax
+ jl .LBB2_5
+# BB#7: # %polly.loop_exit11
xorl %eax, %eax
- addq $40, %rsp
+ addq $56, %rsp
popq %rbx
popq %r12
popq %r13
@@ -304,8 +370,9 @@ main: # @main
popq %r15
popq %rbp
ret
-.Ltmp2:
- .size main, .Ltmp2-main
+.Ltmp28:
+ .size main, .Ltmp28-main
+ .cfi_endproc
.type A,@object # @A
.comm A,9437184,16
diff --git a/polly/www/experiments/matmul/matmul.polly.interchanged.exe b/polly/www/experiments/matmul/matmul.polly.interchanged.exe
index cc125c4b2b1..240c95a7f79 100755
--- a/polly/www/experiments/matmul/matmul.polly.interchanged.exe
+++ b/polly/www/experiments/matmul/matmul.polly.interchanged.exe
Binary files differ
diff --git a/polly/www/experiments/matmul/matmul.polly.interchanged.ll b/polly/www/experiments/matmul/matmul.polly.interchanged.ll
index c0a54bb64f4..52fbccc7ed5 100644
--- a/polly/www/experiments/matmul/matmul.polly.interchanged.ll
+++ b/polly/www/experiments/matmul/matmul.polly.interchanged.ll
Binary files differ
diff --git a/polly/www/experiments/matmul/matmul.polly.interchanged.s b/polly/www/experiments/matmul/matmul.polly.interchanged.s
index 8bbc523f764..a764da0b3f2 100644
--- a/polly/www/experiments/matmul/matmul.polly.interchanged.s
+++ b/polly/www/experiments/matmul/matmul.polly.interchanged.s
@@ -2,76 +2,112 @@
.section .rodata.cst8,"aM",@progbits,8
.align 8
.LCPI0_0:
- .quad 4602678819172646912 # double 5.000000e-01
+ .quad 4602678819172646912 # double 0.5
.text
.globl init_array
.align 16, 0x90
.type init_array,@function
init_array: # @init_array
-# BB#0: # %pollyBB
- xorl %eax, %eax
- movsd .LCPI0_0(%rip), %xmm0
- movq %rax, %rcx
+ .cfi_startproc
+# BB#0: # %entry
+ pushq %rbp
+.Ltmp2:
+ .cfi_def_cfa_offset 16
+.Ltmp3:
+ .cfi_offset %rbp, -16
+ movq %rsp, %rbp
+.Ltmp4:
+ .cfi_def_cfa_register %rbp
+ xorl %r8d, %r8d
+ vmovsd .LCPI0_0(%rip), %xmm0
.align 16, 0x90
-.LBB0_2: # %polly.loop_header1.preheader
+.LBB0_1: # %polly.loop_preheader3
# =>This Loop Header: Depth=1
- # Child Loop BB0_3 Depth 2
- movq $-1536, %rdx # imm = 0xFFFFFFFFFFFFFA00
- xorl %esi, %esi
+ # Child Loop BB0_2 Depth 2
+ xorl %ecx, %ecx
.align 16, 0x90
-.LBB0_3: # %polly.loop_body2
- # Parent Loop BB0_2 Depth=1
+.LBB0_2: # %polly.loop_header2
+ # Parent Loop BB0_1 Depth=1
# => This Inner Loop Header: Depth=2
- movl %esi, %edi
- sarl $31, %edi
- shrl $22, %edi
- addl %esi, %edi
- andl $-1024, %edi # imm = 0xFFFFFFFFFFFFFC00
- negl %edi
- leal 1(%rsi,%rdi), %edi
- cvtsi2sd %edi, %xmm1
- mulsd %xmm0, %xmm1
- cvtsd2ss %xmm1, %xmm1
- movss %xmm1, A+6144(%rax,%rdx,4)
- movss %xmm1, B+6144(%rax,%rdx,4)
- addl %ecx, %esi
- incq %rdx
- jne .LBB0_3
-# BB#1: # %polly.loop_header.loopexit
- # in Loop: Header=BB0_2 Depth=1
- addq $6144, %rax # imm = 0x1800
- incq %rcx
- cmpq $1536, %rcx # imm = 0x600
+ movl %ecx, %edx
+ imull %r8d, %edx
+ movl %edx, %esi
+ sarl $31, %esi
+ shrl $22, %esi
+ addl %edx, %esi
+ andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
+ negl %esi
+ movq %r8, %rax
+ shlq $11, %rax
+ leal 1(%rdx,%rsi), %edi
+ leaq (%rax,%rax,2), %rsi
+ leaq 1(%rcx), %rdx
+ cmpq $1536, %rdx # imm = 0x600
+ vcvtsi2sdl %edi, %xmm0, %xmm1
+ vmulsd %xmm0, %xmm1, %xmm1
+ vcvtsd2ss %xmm1, %xmm1, %xmm1
+ vmovss %xmm1, A(%rsi,%rcx,4)
+ vmovss %xmm1, B(%rsi,%rcx,4)
+ movq %rdx, %rcx
jne .LBB0_2
-# BB#4: # %polly.after_loop
+# BB#3: # %polly.loop_exit4
+ # in Loop: Header=BB0_1 Depth=1
+ incq %r8
+ cmpq $1536, %r8 # imm = 0x600
+ jne .LBB0_1
+# BB#4: # %polly.loop_exit
+ popq %rbp
ret
-.Ltmp0:
- .size init_array, .Ltmp0-init_array
+.Ltmp5:
+ .size init_array, .Ltmp5-init_array
+ .cfi_endproc
.globl print_array
.align 16, 0x90
.type print_array,@function
print_array: # @print_array
-# BB#0:
+ .cfi_startproc
+# BB#0: # %entry
+ pushq %rbp
+.Ltmp9:
+ .cfi_def_cfa_offset 16
+.Ltmp10:
+ .cfi_offset %rbp, -16
+ movq %rsp, %rbp
+.Ltmp11:
+ .cfi_def_cfa_register %rbp
+ pushq %r15
pushq %r14
+ pushq %r12
pushq %rbx
- pushq %rax
- movq $-9437184, %rbx # imm = 0xFFFFFFFFFF700000
+.Ltmp12:
+ .cfi_offset %rbx, -48
+.Ltmp13:
+ .cfi_offset %r12, -40
+.Ltmp14:
+ .cfi_offset %r14, -32
+.Ltmp15:
+ .cfi_offset %r15, -24
+ xorl %r14d, %r14d
+ movl $C, %r15d
.align 16, 0x90
-.LBB1_1: # %.preheader
+.LBB1_1: # %for.cond1.preheader
# =>This Loop Header: Depth=1
# Child Loop BB1_2 Depth 2
- xorl %r14d, %r14d
- movq stdout(%rip), %rdi
+ movq stdout(%rip), %rax
+ movq %r15, %r12
+ xorl %ebx, %ebx
.align 16, 0x90
-.LBB1_2: # Parent Loop BB1_1 Depth=1
+.LBB1_2: # %for.body3
+ # Parent Loop BB1_1 Depth=1
# => This Inner Loop Header: Depth=2
- movss C+9437184(%rbx,%r14,4), %xmm0
- cvtss2sd %xmm0, %xmm0
+ vmovss (%r12), %xmm0
+ vcvtss2sd %xmm0, %xmm0, %xmm0
+ movq %rax, %rdi
movl $.L.str, %esi
movb $1, %al
callq fprintf
- movslq %r14d, %rax
+ movslq %ebx, %rax
imulq $1717986919, %rax, %rcx # imm = 0x66666667
movq %rcx, %rdx
shrq $63, %rdx
@@ -81,125 +117,158 @@ print_array: # @print_array
subl %ecx, %eax
cmpl $79, %eax
jne .LBB1_4
-# BB#3: # in Loop: Header=BB1_2 Depth=2
+# BB#3: # %if.then
+ # in Loop: Header=BB1_2 Depth=2
movq stdout(%rip), %rsi
movl $10, %edi
callq fputc
-.LBB1_4: # in Loop: Header=BB1_2 Depth=2
- incq %r14
- movq stdout(%rip), %rsi
- cmpq $1536, %r14 # imm = 0x600
- movq %rsi, %rdi
+.LBB1_4: # %for.inc
+ # in Loop: Header=BB1_2 Depth=2
+ addq $4, %r12
+ incq %rbx
+ movq stdout(%rip), %rax
+ cmpq $1536, %rbx # imm = 0x600
jne .LBB1_2
-# BB#5: # in Loop: Header=BB1_1 Depth=1
+# BB#5: # %for.end
+ # in Loop: Header=BB1_1 Depth=1
movl $10, %edi
+ movq %rax, %rsi
callq fputc
- addq $6144, %rbx # imm = 0x1800
+ addq $6144, %r15 # imm = 0x1800
+ incq %r14
+ cmpq $1536, %r14 # imm = 0x600
jne .LBB1_1
-# BB#6:
- addq $8, %rsp
+# BB#6: # %for.end12
popq %rbx
+ popq %r12
popq %r14
+ popq %r15
+ popq %rbp
ret
-.Ltmp1:
- .size print_array, .Ltmp1-print_array
+.Ltmp16:
+ .size print_array, .Ltmp16-print_array
+ .cfi_endproc
.section .rodata.cst8,"aM",@progbits,8
.align 8
.LCPI2_0:
- .quad 4602678819172646912 # double 5.000000e-01
+ .quad 4602678819172646912 # double 0.5
.text
.globl main
.align 16, 0x90
.type main,@function
main: # @main
-# BB#0: # %pollyBB
- pushq %rax
- xorl %eax, %eax
- movsd .LCPI2_0(%rip), %xmm0
- movq %rax, %rcx
+ .cfi_startproc
+# BB#0: # %entry
+ pushq %rbp
+.Ltmp20:
+ .cfi_def_cfa_offset 16
+.Ltmp21:
+ .cfi_offset %rbp, -16
+ movq %rsp, %rbp
+.Ltmp22:
+ .cfi_def_cfa_register %rbp
+ pushq %r14
+ pushq %rbx
+.Ltmp23:
+ .cfi_offset %rbx, -32
+.Ltmp24:
+ .cfi_offset %r14, -24
+ xorl %ebx, %ebx
+ vmovsd .LCPI2_0(%rip), %xmm0
.align 16, 0x90
-.LBB2_1: # %polly.loop_header1.preheader.i
+.LBB2_1: # %polly.loop_preheader3.i
# =>This Loop Header: Depth=1
# Child Loop BB2_2 Depth 2
- movq $-1536, %rdx # imm = 0xFFFFFFFFFFFFFA00
- xorl %esi, %esi
+ xorl %ecx, %ecx
.align 16, 0x90
-.LBB2_2: # %polly.loop_body2.i
+.LBB2_2: # %polly.loop_header2.i
# Parent Loop BB2_1 Depth=1
# => This Inner Loop Header: Depth=2
- movl %esi, %edi
- sarl $31, %edi
- shrl $22, %edi
- addl %esi, %edi
- andl $-1024, %edi # imm = 0xFFFFFFFFFFFFFC00
- negl %edi
- leal 1(%rsi,%rdi), %edi
- cvtsi2sd %edi, %xmm1
- mulsd %xmm0, %xmm1
- cvtsd2ss %xmm1, %xmm1
- movss %xmm1, A+6144(%rax,%rdx,4)
- movss %xmm1, B+6144(%rax,%rdx,4)
- addl %ecx, %esi
- incq %rdx
+ movl %ecx, %edx
+ imull %ebx, %edx
+ movl %edx, %esi
+ sarl $31, %esi
+ shrl $22, %esi
+ addl %edx, %esi
+ andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
+ negl %esi
+ movq %rbx, %rax
+ shlq $11, %rax
+ leal 1(%rdx,%rsi), %edi
+ leaq (%rax,%rax,2), %rsi
+ leaq 1(%rcx), %rdx
+ cmpq $1536, %rdx # imm = 0x600
+ vcvtsi2sdl %edi, %xmm0, %xmm1
+ vmulsd %xmm0, %xmm1, %xmm1
+ vcvtsd2ss %xmm1, %xmm1, %xmm1
+ vmovss %xmm1, A(%rsi,%rcx,4)
+ vmovss %xmm1, B(%rsi,%rcx,4)
+ movq %rdx, %rcx
jne .LBB2_2
-# BB#3: # %polly.loop_header.loopexit.i
+# BB#3: # %polly.loop_exit4.i
# in Loop: Header=BB2_1 Depth=1
- addq $6144, %rax # imm = 0x1800
- incq %rcx
- cmpq $1536, %rcx # imm = 0x600
+ incq %rbx
+ cmpq $1536, %rbx # imm = 0x600
jne .LBB2_1
-# BB#4: # %polly.loop_header.preheader
+# BB#4: # %polly.loop_preheader3.preheader
+ movl $C, %r14d
movl $C, %edi
xorl %esi, %esi
movl $9437184, %edx # imm = 0x900000
callq memset
xorl %eax, %eax
- jmp .LBB2_6
.align 16, 0x90
-.LBB2_5: # %polly.loop_header7.loopexit
- # in Loop: Header=BB2_6 Depth=1
- addq $6144, %rax # imm = 0x1800
- cmpq $9437184, %rax # imm = 0x900000
- je .LBB2_7
-.LBB2_6: # %polly.loop_header12.preheader
+.LBB2_5: # %polly.loop_preheader17
# =>This Loop Header: Depth=1
- # Child Loop BB2_9 Depth 2
- # Child Loop BB2_10 Depth 3
- leaq A(%rax), %rcx
- movq $-9437184, %rdx # imm = 0xFFFFFFFFFF700000
- jmp .LBB2_9
+ # Child Loop BB2_10 Depth 2
+ # Child Loop BB2_8 Depth 3
+ movl $B, %ebx
+ xorl %edx, %edx
.align 16, 0x90
-.LBB2_8: # %polly.loop_header12.loopexit
- # in Loop: Header=BB2_9 Depth=2
- addq $4, %rcx
- addq $6144, %rdx # imm = 0x1800
- je .LBB2_5
-.LBB2_9: # %polly.loop_header17.preheader
- # Parent Loop BB2_6 Depth=1
+.LBB2_10: # %polly.loop_preheader24
+ # Parent Loop BB2_5 Depth=1
# => This Loop Header: Depth=2
- # Child Loop BB2_10 Depth 3
- movss (%rcx), %xmm0
- xorl %esi, %esi
+ # Child Loop BB2_8 Depth 3
+ leaq (%rax,%rax,2), %rcx
+ shlq $11, %rcx
+ vmovss A(%rcx,%rdx,4), %xmm0
+ movl $1536, %esi # imm = 0x600
+ movq %r14, %rdi
+ movq %rbx, %rcx
.align 16, 0x90
-.LBB2_10: # %polly.loop_body18
- # Parent Loop BB2_6 Depth=1
- # Parent Loop BB2_9 Depth=2
+.LBB2_8: # %polly.loop_header23
+ # Parent Loop BB2_5 Depth=1
+ # Parent Loop BB2_10 Depth=2
# => This Inner Loop Header: Depth=3
- movss B+9437184(%rdx,%rsi,4), %xmm1
- mulss %xmm0, %xmm1
- addss C(%rax,%rsi,4), %xmm1
- movss %xmm1, C(%rax,%rsi,4)
- incq %rsi
- cmpq $1536, %rsi # imm = 0x600
+ vmulss (%rcx), %xmm0, %xmm1
+ vaddss (%rdi), %xmm1, %xmm1
+ vmovss %xmm1, (%rdi)
+ addq $4, %rdi
+ addq $4, %rcx
+ decq %rsi
+ jne .LBB2_8
+# BB#9: # %polly.loop_exit25
+ # in Loop: Header=BB2_10 Depth=2
+ addq $6144, %rbx # imm = 0x1800
+ incq %rdx
+ cmpq $1536, %rdx # imm = 0x600
jne .LBB2_10
- jmp .LBB2_8
-.LBB2_7: # %polly.after_loop9
+# BB#6: # %polly.loop_exit18
+ # in Loop: Header=BB2_5 Depth=1
+ addq $6144, %r14 # imm = 0x1800
+ incq %rax
+ cmpq $1536, %rax # imm = 0x600
+ jne .LBB2_5
+# BB#7: # %polly.loop_exit11
xorl %eax, %eax
- popq %rdx
+ popq %rbx
+ popq %r14
+ popq %rbp
ret
-.Ltmp2:
- .size main, .Ltmp2-main
+.Ltmp25:
+ .size main, .Ltmp25-main
+ .cfi_endproc
.type A,@object # @A
.comm A,9437184,16
diff --git a/polly/www/experiments/matmul/matmul.preopt.ll b/polly/www/experiments/matmul/matmul.preopt.ll
index 9287d7e141b..3931716619b 100644
--- a/polly/www/experiments/matmul/matmul.preopt.ll
+++ b/polly/www/experiments/matmul/matmul.preopt.ll
@@ -1,5 +1,5 @@
; ModuleID = 'matmul.s'
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
@@ -8,173 +8,179 @@ target triple = "x86_64-unknown-linux-gnu"
@A = common global [1536 x [1536 x float]] zeroinitializer, align 16
@B = common global [1536 x [1536 x float]] zeroinitializer, align 16
@stdout = external global %struct._IO_FILE*
-@.str = private unnamed_addr constant [5 x i8] c"%lf \00"
+@.str = private unnamed_addr constant [5 x i8] c"%lf \00", align 1
@C = common global [1536 x [1536 x float]] zeroinitializer, align 16
-@.str1 = private unnamed_addr constant [2 x i8] c"\0A\00"
-
-define void @init_array() nounwind {
-; <label>:0
- br label %1
-
-; <label>:1 ; preds = %18, %0
- %2 = phi i64 [ %indvar.next2, %18 ], [ 0, %0 ]
- %exitcond5 = icmp ne i64 %2, 1536
- br i1 %exitcond5, label %3, label %19
-
-; <label>:3 ; preds = %1
- br label %4
-
-; <label>:4 ; preds = %16, %3
- %indvar = phi i64 [ %indvar.next, %16 ], [ 0, %3 ]
- %scevgep4 = getelementptr [1536 x [1536 x float]]* @A, i64 0, i64 %2, i64 %indvar
- %scevgep = getelementptr [1536 x [1536 x float]]* @B, i64 0, i64 %2, i64 %indvar
- %tmp = mul i64 %2, %indvar
- %tmp3 = trunc i64 %tmp to i32
+@.str1 = private unnamed_addr constant [2 x i8] c"\0A\00", align 1
+
+; Function Attrs: nounwind uwtable
+define void @init_array() #0 {
+entry:
+ br label %for.cond
+
+for.cond: ; preds = %for.inc17, %entry
+ %0 = phi i64 [ %indvar.next2, %for.inc17 ], [ 0, %entry ]
+ %exitcond3 = icmp ne i64 %0, 1536
+ br i1 %exitcond3, label %for.body, label %for.end19
+
+for.body: ; preds = %for.cond
+ br label %for.cond1
+
+for.cond1: ; preds = %for.inc, %for.body
+ %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %for.body ]
+ %arrayidx6 = getelementptr [1536 x [1536 x float]]* @A, i64 0, i64 %0, i64 %indvar
+ %arrayidx16 = getelementptr [1536 x [1536 x float]]* @B, i64 0, i64 %0, i64 %indvar
+ %1 = mul i64 %0, %indvar
+ %mul = trunc i64 %1 to i32
%exitcond = icmp ne i64 %indvar, 1536
- br i1 %exitcond, label %5, label %17
-
-; <label>:5 ; preds = %4
- %6 = srem i32 %tmp3, 1024
- %7 = add nsw i32 1, %6
- %8 = sitofp i32 %7 to double
- %9 = fdiv double %8, 2.000000e+00
- %10 = fptrunc double %9 to float
- store float %10, float* %scevgep4
- %11 = srem i32 %tmp3, 1024
- %12 = add nsw i32 1, %11
- %13 = sitofp i32 %12 to double
- %14 = fdiv double %13, 2.000000e+00
- %15 = fptrunc double %14 to float
- store float %15, float* %scevgep
- br label %16
-
-; <label>:16 ; preds = %5
+ br i1 %exitcond, label %for.body3, label %for.end
+
+for.body3: ; preds = %for.cond1
+ %rem = srem i32 %mul, 1024
+ %add = add nsw i32 1, %rem
+ %conv = sitofp i32 %add to double
+ %div = fdiv double %conv, 2.000000e+00
+ %conv4 = fptrunc double %div to float
+ store float %conv4, float* %arrayidx6, align 4
+ %rem8 = srem i32 %mul, 1024
+ %add9 = add nsw i32 1, %rem8
+ %conv10 = sitofp i32 %add9 to double
+ %div11 = fdiv double %conv10, 2.000000e+00
+ %conv12 = fptrunc double %div11 to float
+ store float %conv12, float* %arrayidx16, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body3
%indvar.next = add i64 %indvar, 1
- br label %4
+ br label %for.cond1
-; <label>:17 ; preds = %4
- br label %18
+for.end: ; preds = %for.cond1
+ br label %for.inc17
-; <label>:18 ; preds = %17
- %indvar.next2 = add i64 %2, 1
- br label %1
+for.inc17: ; preds = %for.end
+ %indvar.next2 = add i64 %0, 1
+ br label %for.cond
-; <label>:19 ; preds = %1
+for.end19: ; preds = %for.cond
ret void
}
-define void @print_array() nounwind {
-; <label>:0
- br label %1
+; Function Attrs: nounwind uwtable
+define void @print_array() #0 {
+entry:
+ br label %for.cond
-; <label>:1 ; preds = %19, %0
- %indvar1 = phi i64 [ %indvar.next2, %19 ], [ 0, %0 ]
+for.cond: ; preds = %for.inc10, %entry
+ %indvar1 = phi i64 [ %indvar.next2, %for.inc10 ], [ 0, %entry ]
%exitcond3 = icmp ne i64 %indvar1, 1536
- br i1 %exitcond3, label %2, label %20
+ br i1 %exitcond3, label %for.body, label %for.end12
-; <label>:2 ; preds = %1
- br label %3
+for.body: ; preds = %for.cond
+ br label %for.cond1
-; <label>:3 ; preds = %15, %2
- %indvar = phi i64 [ %indvar.next, %15 ], [ 0, %2 ]
- %scevgep = getelementptr [1536 x [1536 x float]]* @C, i64 0, i64 %indvar1, i64 %indvar
+for.cond1: ; preds = %for.inc, %for.body
+ %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %for.body ]
+ %arrayidx5 = getelementptr [1536 x [1536 x float]]* @C, i64 0, i64 %indvar1, i64 %indvar
%j.0 = trunc i64 %indvar to i32
%exitcond = icmp ne i64 %indvar, 1536
- br i1 %exitcond, label %4, label %16
-
-; <label>:4 ; preds = %3
- %5 = load %struct._IO_FILE** @stdout, align 8
- %6 = load float* %scevgep
- %7 = fpext float %6 to double
- %8 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %5, i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), double %7)
- %9 = srem i32 %j.0, 80
- %10 = icmp eq i32 %9, 79
- br i1 %10, label %11, label %14
-
-; <label>:11 ; preds = %4
- %12 = load %struct._IO_FILE** @stdout, align 8
- %13 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %12, i8* getelementptr inbounds ([2 x i8]* @.str1, i32 0, i32 0))
- br label %14
-
-; <label>:14 ; preds = %11, %4
- br label %15
-
-; <label>:15 ; preds = %14
+ br i1 %exitcond, label %for.body3, label %for.end
+
+for.body3: ; preds = %for.cond1
+ %0 = load %struct._IO_FILE** @stdout, align 8
+ %1 = load float* %arrayidx5, align 4
+ %conv = fpext float %1 to double
+ %call = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %0, i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), double %conv)
+ %rem = srem i32 %j.0, 80
+ %cmp6 = icmp eq i32 %rem, 79
+ br i1 %cmp6, label %if.then, label %if.end
+
+if.then: ; preds = %for.body3
+ %2 = load %struct._IO_FILE** @stdout, align 8
+ %call8 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %2, i8* getelementptr inbounds ([2 x i8]* @.str1, i32 0, i32 0))
+ br label %if.end
+
+if.end: ; preds = %if.then, %for.body3
+ br label %for.inc
+
+for.inc: ; preds = %if.end
%indvar.next = add i64 %indvar, 1
- br label %3
+ br label %for.cond1
-; <label>:16 ; preds = %3
- %17 = load %struct._IO_FILE** @stdout, align 8
- %18 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %17, i8* getelementptr inbounds ([2 x i8]* @.str1, i32 0, i32 0))
- br label %19
+for.end: ; preds = %for.cond1
+ %3 = load %struct._IO_FILE** @stdout, align 8
+ %call9 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %3, i8* getelementptr inbounds ([2 x i8]* @.str1, i32 0, i32 0))
+ br label %for.inc10
-; <label>:19 ; preds = %16
+for.inc10: ; preds = %for.end
%indvar.next2 = add i64 %indvar1, 1
- br label %1
+ br label %for.cond
-; <label>:20 ; preds = %1
+for.end12: ; preds = %for.cond
ret void
}
-declare i32 @fprintf(%struct._IO_FILE*, i8*, ...)
+declare i32 @fprintf(%struct._IO_FILE*, i8*, ...) #1
-define i32 @main() nounwind {
-; <label>:0
+; Function Attrs: nounwind uwtable
+define i32 @main() #0 {
+entry:
call void @init_array()
- br label %1
-
-; <label>:1 ; preds = %16, %0
- %indvar3 = phi i64 [ %indvar.next4, %16 ], [ 0, %0 ]
- %exitcond9 = icmp ne i64 %indvar3, 1536
- br i1 %exitcond9, label %2, label %17
-
-; <label>:2 ; preds = %1
- br label %3
-
-; <label>:3 ; preds = %14, %2
- %indvar1 = phi i64 [ %indvar.next2, %14 ], [ 0, %2 ]
- %scevgep8 = getelementptr [1536 x [1536 x float]]* @C, i64 0, i64 %indvar3, i64 %indvar1
- %exitcond6 = icmp ne i64 %indvar1, 1536
- br i1 %exitcond6, label %4, label %15
-
-; <label>:4 ; preds = %3
- store float 0.000000e+00, float* %scevgep8
- br label %5
-
-; <label>:5 ; preds = %12, %4
- %indvar = phi i64 [ %indvar.next, %12 ], [ 0, %4 ]
- %scevgep5 = getelementptr [1536 x [1536 x float]]* @A, i64 0, i64 %indvar3, i64 %indvar
- %scevgep = getelementptr [1536 x [1536 x float]]* @B, i64 0, i64 %indvar, i64 %indvar1
+ br label %for.cond
+
+for.cond: ; preds = %for.inc28, %entry
+ %indvar3 = phi i64 [ %indvar.next4, %for.inc28 ], [ 0, %entry ]
+ %exitcond6 = icmp ne i64 %indvar3, 1536
+ br i1 %exitcond6, label %for.body, label %for.end30
+
+for.body: ; preds = %for.cond
+ br label %for.cond1
+
+for.cond1: ; preds = %for.inc25, %for.body
+ %indvar1 = phi i64 [ %indvar.next2, %for.inc25 ], [ 0, %for.body ]
+ %arrayidx5 = getelementptr [1536 x [1536 x float]]* @C, i64 0, i64 %indvar3, i64 %indvar1
+ %exitcond5 = icmp ne i64 %indvar1, 1536
+ br i1 %exitcond5, label %for.body3, label %for.end27
+
+for.body3: ; preds = %for.cond1
+ store float 0.000000e+00, float* %arrayidx5, align 4
+ br label %for.cond6
+
+for.cond6: ; preds = %for.inc, %for.body3
+ %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %for.body3 ]
+ %arrayidx16 = getelementptr [1536 x [1536 x float]]* @A, i64 0, i64 %indvar3, i64 %indvar
+ %arrayidx20 = getelementptr [1536 x [1536 x float]]* @B, i64 0, i64 %indvar, i64 %indvar1
%exitcond = icmp ne i64 %indvar, 1536
- br i1 %exitcond, label %6, label %13
-
-; <label>:6 ; preds = %5
- %7 = load float* %scevgep8
- %8 = load float* %scevgep5
- %9 = load float* %scevgep
- %10 = fmul float %8, %9
- %11 = fadd float %7, %10
- store float %11, float* %scevgep8
- br label %12
-
-; <label>:12 ; preds = %6
+ br i1 %exitcond, label %for.body8, label %for.end
+
+for.body8: ; preds = %for.cond6
+ %0 = load float* %arrayidx5, align 4
+ %1 = load float* %arrayidx16, align 4
+ %2 = load float* %arrayidx20, align 4
+ %mul = fmul float %1, %2
+ %add = fadd float %0, %mul
+ store float %add, float* %arrayidx5, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body8
%indvar.next = add i64 %indvar, 1
- br label %5
+ br label %for.cond6
-; <label>:13 ; preds = %5
- br label %14
+for.end: ; preds = %for.cond6
+ br label %for.inc25
-; <label>:14 ; preds = %13
+for.inc25: ; preds = %for.end
%indvar.next2 = add i64 %indvar1, 1
- br label %3
+ br label %for.cond1
-; <label>:15 ; preds = %3
- br label %16
+for.end27: ; preds = %for.cond1
+ br label %for.inc28
-; <label>:16 ; preds = %15
+for.inc28: ; preds = %for.end27
%indvar.next4 = add i64 %indvar3, 1
- br label %1
+ br label %for.cond
-; <label>:17 ; preds = %1
+for.end30: ; preds = %for.cond
ret i32 0
}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/polly/www/experiments/matmul/matmul.s b/polly/www/experiments/matmul/matmul.s
index bec9d2a7504..22ea57b1687 100644
--- a/polly/www/experiments/matmul/matmul.s
+++ b/polly/www/experiments/matmul/matmul.s
@@ -1,5 +1,5 @@
; ModuleID = 'matmul.c'
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
@@ -8,248 +8,257 @@ target triple = "x86_64-unknown-linux-gnu"
@A = common global [1536 x [1536 x float]] zeroinitializer, align 16
@B = common global [1536 x [1536 x float]] zeroinitializer, align 16
@stdout = external global %struct._IO_FILE*
-@.str = private unnamed_addr constant [5 x i8] c"%lf \00"
+@.str = private unnamed_addr constant [5 x i8] c"%lf \00", align 1
@C = common global [1536 x [1536 x float]] zeroinitializer, align 16
-@.str1 = private unnamed_addr constant [2 x i8] c"\0A\00"
+@.str1 = private unnamed_addr constant [2 x i8] c"\0A\00", align 1
-define void @init_array() nounwind {
+; Function Attrs: nounwind uwtable
+define void @init_array() #0 {
+entry:
%i = alloca i32, align 4
%j = alloca i32, align 4
store i32 0, i32* %i, align 4
- br label %1
+ br label %for.cond
-; <label>:1 ; preds = %41, %0
- %2 = load i32* %i, align 4
- %3 = icmp slt i32 %2, 1536
- br i1 %3, label %4, label %44
+for.cond: ; preds = %for.inc17, %entry
+ %0 = load i32* %i, align 4
+ %cmp = icmp slt i32 %0, 1536
+ br i1 %cmp, label %for.body, label %for.end19
-; <label>:4 ; preds = %1
+for.body: ; preds = %for.cond
store i32 0, i32* %j, align 4
- br label %5
+ br label %for.cond1
-; <label>:5 ; preds = %37, %4
- %6 = load i32* %j, align 4
- %7 = icmp slt i32 %6, 1536
- br i1 %7, label %8, label %40
+for.cond1: ; preds = %for.inc, %for.body
+ %1 = load i32* %j, align 4
+ %cmp2 = icmp slt i32 %1, 1536
+ br i1 %cmp2, label %for.body3, label %for.end
-; <label>:8 ; preds = %5
+for.body3: ; preds = %for.cond1
+ %2 = load i32* %i, align 4
+ %3 = load i32* %j, align 4
+ %mul = mul nsw i32 %2, %3
+ %rem = srem i32 %mul, 1024
+ %add = add nsw i32 1, %rem
+ %conv = sitofp i32 %add to double
+ %div = fdiv double %conv, 2.000000e+00
+ %conv4 = fptrunc double %div to float
+ %4 = load i32* %j, align 4
+ %idxprom = sext i32 %4 to i64
+ %5 = load i32* %i, align 4
+ %idxprom5 = sext i32 %5 to i64
+ %arrayidx = getelementptr inbounds [1536 x [1536 x float]]* @A, i32 0, i64 %idxprom5
+ %arrayidx6 = getelementptr inbounds [1536 x float]* %arrayidx, i32 0, i64 %idxprom
+ store float %conv4, float* %arrayidx6, align 4
+ %6 = load i32* %i, align 4
+ %7 = load i32* %j, align 4
+ %mul7 = mul nsw i32 %6, %7
+ %rem8 = srem i32 %mul7, 1024
+ %add9 = add nsw i32 1, %rem8
+ %conv10 = sitofp i32 %add9 to double
+ %div11 = fdiv double %conv10, 2.000000e+00
+ %conv12 = fptrunc double %div11 to float
+ %8 = load i32* %j, align 4
+ %idxprom13 = sext i32 %8 to i64
%9 = load i32* %i, align 4
+ %idxprom14 = sext i32 %9 to i64
+ %arrayidx15 = getelementptr inbounds [1536 x [1536 x float]]* @B, i32 0, i64 %idxprom14
+ %arrayidx16 = getelementptr inbounds [1536 x float]* %arrayidx15, i32 0, i64 %idxprom13
+ store float %conv12, float* %arrayidx16, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body3
%10 = load i32* %j, align 4
- %11 = mul nsw i32 %9, %10
- %12 = srem i32 %11, 1024
- %13 = add nsw i32 1, %12
- %14 = sitofp i32 %13 to double
- %15 = fdiv double %14, 2.000000e+00
- %16 = fptrunc double %15 to float
- %17 = load i32* %j, align 4
- %18 = sext i32 %17 to i64
- %19 = load i32* %i, align 4
- %20 = sext i32 %19 to i64
- %21 = getelementptr inbounds [1536 x [1536 x float]]* @A, i32 0, i64 %20
- %22 = getelementptr inbounds [1536 x float]* %21, i32 0, i64 %18
- store float %16, float* %22
- %23 = load i32* %i, align 4
- %24 = load i32* %j, align 4
- %25 = mul nsw i32 %23, %24
- %26 = srem i32 %25, 1024
- %27 = add nsw i32 1, %26
- %28 = sitofp i32 %27 to double
- %29 = fdiv double %28, 2.000000e+00
- %30 = fptrunc double %29 to float
- %31 = load i32* %j, align 4
- %32 = sext i32 %31 to i64
- %33 = load i32* %i, align 4
- %34 = sext i32 %33 to i64
- %35 = getelementptr inbounds [1536 x [1536 x float]]* @B, i32 0, i64 %34
- %36 = getelementptr inbounds [1536 x float]* %35, i32 0, i64 %32
- store float %30, float* %36
- br label %37
-
-; <label>:37 ; preds = %8
- %38 = load i32* %j, align 4
- %39 = add nsw i32 %38, 1
- store i32 %39, i32* %j, align 4
- br label %5
-
-; <label>:40 ; preds = %5
- br label %41
-
-; <label>:41 ; preds = %40
- %42 = load i32* %i, align 4
- %43 = add nsw i32 %42, 1
- store i32 %43, i32* %i, align 4
- br label %1
-
-; <label>:44 ; preds = %1
+ %inc = add nsw i32 %10, 1
+ store i32 %inc, i32* %j, align 4
+ br label %for.cond1
+
+for.end: ; preds = %for.cond1
+ br label %for.inc17
+
+for.inc17: ; preds = %for.end
+ %11 = load i32* %i, align 4
+ %inc18 = add nsw i32 %11, 1
+ store i32 %inc18, i32* %i, align 4
+ br label %for.cond
+
+for.end19: ; preds = %for.cond
ret void
}
-define void @print_array() nounwind {
+; Function Attrs: nounwind uwtable
+define void @print_array() #0 {
+entry:
%i = alloca i32, align 4
%j = alloca i32, align 4
store i32 0, i32* %i, align 4
- br label %1
+ br label %for.cond
-; <label>:1 ; preds = %32, %0
- %2 = load i32* %i, align 4
- %3 = icmp slt i32 %2, 1536
- br i1 %3, label %4, label %35
+for.cond: ; preds = %for.inc10, %entry
+ %0 = load i32* %i, align 4
+ %cmp = icmp slt i32 %0, 1536
+ br i1 %cmp, label %for.body, label %for.end12
-; <label>:4 ; preds = %1
+for.body: ; preds = %for.cond
store i32 0, i32* %j, align 4
- br label %5
-
-; <label>:5 ; preds = %26, %4
+ br label %for.cond1
+
+for.cond1: ; preds = %for.inc, %for.body
+ %1 = load i32* %j, align 4
+ %cmp2 = icmp slt i32 %1, 1536
+ br i1 %cmp2, label %for.body3, label %for.end
+
+for.body3: ; preds = %for.cond1
+ %2 = load %struct._IO_FILE** @stdout, align 8
+ %3 = load i32* %j, align 4
+ %idxprom = sext i32 %3 to i64
+ %4 = load i32* %i, align 4
+ %idxprom4 = sext i32 %4 to i64
+ %arrayidx = getelementptr inbounds [1536 x [1536 x float]]* @C, i32 0, i64 %idxprom4
+ %arrayidx5 = getelementptr inbounds [1536 x float]* %arrayidx, i32 0, i64 %idxprom
+ %5 = load float* %arrayidx5, align 4
+ %conv = fpext float %5 to double
+ %call = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %2, i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), double %conv)
%6 = load i32* %j, align 4
- %7 = icmp slt i32 %6, 1536
- br i1 %7, label %8, label %29
+ %rem = srem i32 %6, 80
+ %cmp6 = icmp eq i32 %rem, 79
+ br i1 %cmp6, label %if.then, label %if.end
+
+if.then: ; preds = %for.body3
+ %7 = load %struct._IO_FILE** @stdout, align 8
+ %call8 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %7, i8* getelementptr inbounds ([2 x i8]* @.str1, i32 0, i32 0))
+ br label %if.end
+
+if.end: ; preds = %if.then, %for.body3
+ br label %for.inc
+
+for.inc: ; preds = %if.end
+ %8 = load i32* %j, align 4
+ %inc = add nsw i32 %8, 1
+ store i32 %inc, i32* %j, align 4
+ br label %for.cond1
-; <label>:8 ; preds = %5
+for.end: ; preds = %for.cond1
%9 = load %struct._IO_FILE** @stdout, align 8
- %10 = load i32* %j, align 4
- %11 = sext i32 %10 to i64
- %12 = load i32* %i, align 4
- %13 = sext i32 %12 to i64
- %14 = getelementptr inbounds [1536 x [1536 x float]]* @C, i32 0, i64 %13
- %15 = getelementptr inbounds [1536 x float]* %14, i32 0, i64 %11
- %16 = load float* %15
- %17 = fpext float %16 to double
- %18 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %9, i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), double %17)
- %19 = load i32* %j, align 4
- %20 = srem i32 %19, 80
- %21 = icmp eq i32 %20, 79
- br i1 %21, label %22, label %25
-
-; <label>:22 ; preds = %8
- %23 = load %struct._IO_FILE** @stdout, align 8
- %24 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %23, i8* getelementptr inbounds ([2 x i8]* @.str1, i32 0, i32 0))
- br label %25
-
-; <label>:25 ; preds = %22, %8
- br label %26
-
-; <label>:26 ; preds = %25
- %27 = load i32* %j, align 4
- %28 = add nsw i32 %27, 1
- store i32 %28, i32* %j, align 4
- br label %5
-
-; <label>:29 ; preds = %5
- %30 = load %struct._IO_FILE** @stdout, align 8
- %31 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %30, i8* getelementptr inbounds ([2 x i8]* @.str1, i32 0, i32 0))
- br label %32
-
-; <label>:32 ; preds = %29
- %33 = load i32* %i, align 4
- %34 = add nsw i32 %33, 1
- store i32 %34, i32* %i, align 4
- br label %1
-
-; <label>:35 ; preds = %1
+ %call9 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %9, i8* getelementptr inbounds ([2 x i8]* @.str1, i32 0, i32 0))
+ br label %for.inc10
+
+for.inc10: ; preds = %for.end
+ %10 = load i32* %i, align 4
+ %inc11 = add nsw i32 %10, 1
+ store i32 %inc11, i32* %i, align 4
+ br label %for.cond
+
+for.end12: ; preds = %for.cond
ret void
}
-declare i32 @fprintf(%struct._IO_FILE*, i8*, ...)
+declare i32 @fprintf(%struct._IO_FILE*, i8*, ...) #1
-define i32 @main() nounwind {
- %1 = alloca i32, align 4
+; Function Attrs: nounwind uwtable
+define i32 @main() #0 {
+entry:
+ %retval = alloca i32, align 4
%i = alloca i32, align 4
%j = alloca i32, align 4
%k = alloca i32, align 4
%t_start = alloca double, align 8
%t_end = alloca double, align 8
- store i32 0, i32* %1
+ store i32 0, i32* %retval
call void @init_array()
store i32 0, i32* %i, align 4
- br label %2
+ br label %for.cond
-; <label>:2 ; preds = %57, %0
- %3 = load i32* %i, align 4
- %4 = icmp slt i32 %3, 1536
- br i1 %4, label %5, label %60
+for.cond: ; preds = %for.inc28, %entry
+ %0 = load i32* %i, align 4
+ %cmp = icmp slt i32 %0, 1536
+ br i1 %cmp, label %for.body, label %for.end30
-; <label>:5 ; preds = %2
+for.body: ; preds = %for.cond
store i32 0, i32* %j, align 4
- br label %6
+ br label %for.cond1
-; <label>:6 ; preds = %53, %5
- %7 = load i32* %j, align 4
- %8 = icmp slt i32 %7, 1536
- br i1 %8, label %9, label %56
+for.cond1: ; preds = %for.inc25, %for.body
+ %1 = load i32* %j, align 4
+ %cmp2 = icmp slt i32 %1, 1536
+ br i1 %cmp2, label %for.body3, label %for.end27
-; <label>:9 ; preds = %6
- %10 = load i32* %j, align 4
- %11 = sext i32 %10 to i64
- %12 = load i32* %i, align 4
- %13 = sext i32 %12 to i64
- %14 = getelementptr inbounds [1536 x [1536 x float]]* @C, i32 0, i64 %13
- %15 = getelementptr inbounds [1536 x float]* %14, i32 0, i64 %11
- store float 0.000000e+00, float* %15
+for.body3: ; preds = %for.cond1
+ %2 = load i32* %j, align 4
+ %idxprom = sext i32 %2 to i64
+ %3 = load i32* %i, align 4
+ %idxprom4 = sext i32 %3 to i64
+ %arrayidx = getelementptr inbounds [1536 x [1536 x float]]* @C, i32 0, i64 %idxprom4
+ %arrayidx5 = getelementptr inbounds [1536 x float]* %arrayidx, i32 0, i64 %idxprom
+ store float 0.000000e+00, float* %arrayidx5, align 4
store i32 0, i32* %k, align 4
- br label %16
-
-; <label>:16 ; preds = %49, %9
- %17 = load i32* %k, align 4
- %18 = icmp slt i32 %17, 1536
- br i1 %18, label %19, label %52
-
-; <label>:19 ; preds = %16
- %20 = load i32* %j, align 4
- %21 = sext i32 %20 to i64
- %22 = load i32* %i, align 4
- %23 = sext i32 %22 to i64
- %24 = getelementptr inbounds [1536 x [1536 x float]]* @C, i32 0, i64 %23
- %25 = getelementptr inbounds [1536 x float]* %24, i32 0, i64 %21
- %26 = load float* %25
- %27 = load i32* %k, align 4
- %28 = sext i32 %27 to i64
- %29 = load i32* %i, align 4
- %30 = sext i32 %29 to i64
- %31 = getelementptr inbounds [1536 x [1536 x float]]* @A, i32 0, i64 %30
- %32 = getelementptr inbounds [1536 x float]* %31, i32 0, i64 %28
- %33 = load float* %32
- %34 = load i32* %j, align 4
- %35 = sext i32 %34 to i64
- %36 = load i32* %k, align 4
- %37 = sext i32 %36 to i64
- %38 = getelementptr inbounds [1536 x [1536 x float]]* @B, i32 0, i64 %37
- %39 = getelementptr inbounds [1536 x float]* %38, i32 0, i64 %35
- %40 = load float* %39
- %41 = fmul float %33, %40
- %42 = fadd float %26, %41
- %43 = load i32* %j, align 4
- %44 = sext i32 %43 to i64
- %45 = load i32* %i, align 4
- %46 = sext i32 %45 to i64
- %47 = getelementptr inbounds [1536 x [1536 x float]]* @C, i32 0, i64 %46
- %48 = getelementptr inbounds [1536 x float]* %47, i32 0, i64 %44
- store float %42, float* %48
- br label %49
-
-; <label>:49 ; preds = %19
- %50 = load i32* %k, align 4
- %51 = add nsw i32 %50, 1
- store i32 %51, i32* %k, align 4
- br label %16
-
-; <label>:52 ; preds = %16
- br label %53
-
-; <label>:53 ; preds = %52
- %54 = load i32* %j, align 4
- %55 = add nsw i32 %54, 1
- store i32 %55, i32* %j, align 4
- br label %6
-
-; <label>:56 ; preds = %6
- br label %57
-
-; <label>:57 ; preds = %56
- %58 = load i32* %i, align 4
- %59 = add nsw i32 %58, 1
- store i32 %59, i32* %i, align 4
- br label %2
-
-; <label>:60 ; preds = %2
+ br label %for.cond6
+
+for.cond6: ; preds = %for.inc, %for.body3
+ %4 = load i32* %k, align 4
+ %cmp7 = icmp slt i32 %4, 1536
+ br i1 %cmp7, label %for.body8, label %for.end
+
+for.body8: ; preds = %for.cond6
+ %5 = load i32* %j, align 4
+ %idxprom9 = sext i32 %5 to i64
+ %6 = load i32* %i, align 4
+ %idxprom10 = sext i32 %6 to i64
+ %arrayidx11 = getelementptr inbounds [1536 x [1536 x float]]* @C, i32 0, i64 %idxprom10
+ %arrayidx12 = getelementptr inbounds [1536 x float]* %arrayidx11, i32 0, i64 %idxprom9
+ %7 = load float* %arrayidx12, align 4
+ %8 = load i32* %k, align 4
+ %idxprom13 = sext i32 %8 to i64
+ %9 = load i32* %i, align 4
+ %idxprom14 = sext i32 %9 to i64
+ %arrayidx15 = getelementptr inbounds [1536 x [1536 x float]]* @A, i32 0, i64 %idxprom14
+ %arrayidx16 = getelementptr inbounds [1536 x float]* %arrayidx15, i32 0, i64 %idxprom13
+ %10 = load float* %arrayidx16, align 4
+ %11 = load i32* %j, align 4
+ %idxprom17 = sext i32 %11 to i64
+ %12 = load i32* %k, align 4
+ %idxprom18 = sext i32 %12 to i64
+ %arrayidx19 = getelementptr inbounds [1536 x [1536 x float]]* @B, i32 0, i64 %idxprom18
+ %arrayidx20 = getelementptr inbounds [1536 x float]* %arrayidx19, i32 0, i64 %idxprom17
+ %13 = load float* %arrayidx20, align 4
+ %mul = fmul float %10, %13
+ %add = fadd float %7, %mul
+ %14 = load i32* %j, align 4
+ %idxprom21 = sext i32 %14 to i64
+ %15 = load i32* %i, align 4
+ %idxprom22 = sext i32 %15 to i64
+ %arrayidx23 = getelementptr inbounds [1536 x [1536 x float]]* @C, i32 0, i64 %idxprom22
+ %arrayidx24 = getelementptr inbounds [1536 x float]* %arrayidx23, i32 0, i64 %idxprom21
+ store float %add, float* %arrayidx24, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body8
+ %16 = load i32* %k, align 4
+ %inc = add nsw i32 %16, 1
+ store i32 %inc, i32* %k, align 4
+ br label %for.cond6
+
+for.end: ; preds = %for.cond6
+ br label %for.inc25
+
+for.inc25: ; preds = %for.end
+ %17 = load i32* %j, align 4
+ %inc26 = add nsw i32 %17, 1
+ store i32 %inc26, i32* %j, align 4
+ br label %for.cond1
+
+for.end27: ; preds = %for.cond1
+ br label %for.inc28
+
+for.inc28: ; preds = %for.end27
+ %18 = load i32* %i, align 4
+ %inc29 = add nsw i32 %18, 1
+ store i32 %inc29, i32* %i, align 4
+ br label %for.cond
+
+for.end30: ; preds = %for.cond
ret i32 0
}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/polly/www/experiments/matmul/scops.init_array.dot b/polly/www/experiments/matmul/scops.init_array.dot
index 1b3f09284f9..10a0add0a61 100644
--- a/polly/www/experiments/matmul/scops.init_array.dot
+++ b/polly/www/experiments/matmul/scops.init_array.dot
@@ -1,47 +1,47 @@
digraph "Scop Graph for 'init_array' function" {
label="Scop Graph for 'init_array' function";
- Node0x26ade30 [shape=record,label="{%0:\l\l br label %1\l}"];
- Node0x26ade30 -> Node0x26acdd0;
- Node0x26acdd0 [shape=record,label="{%1:\l\l %2 = phi i64 [ %indvar.next2, %18 ], [ 0, %0 ]\l %exitcond5 = icmp ne i64 %2, 1536\l br i1 %exitcond5, label %3, label %19\l}"];
- Node0x26acdd0 -> Node0x26acdf0;
- Node0x26acdd0 -> Node0x26adce0;
- Node0x26acdf0 [shape=record,label="{%3:\l\l br label %4\l}"];
- Node0x26acdf0 -> Node0x26addc0;
- Node0x26addc0 [shape=record,label="{%4:\l\l %indvar = phi i64 [ %indvar.next, %16 ], [ 0, %3 ]\l %scevgep4 = getelementptr [1536 x [1536 x float]]* @A, i64 0, i64 %2, i64 %indvar\l %scevgep = getelementptr [1536 x [1536 x float]]* @B, i64 0, i64 %2, i64 %indvar\l %tmp = mul i64 %2, %indvar\l %tmp3 = trunc i64 %tmp to i32\l %exitcond = icmp ne i64 %indvar, 1536\l br i1 %exitcond, label %5, label %17\l}"];
- Node0x26addc0 -> Node0x26ace70;
- Node0x26addc0 -> Node0x26ad010;
- Node0x26ace70 [shape=record,label="{%5:\l\l %6 = srem i32 %tmp3, 1024\l %7 = add nsw i32 1, %6\l %8 = sitofp i32 %7 to double\l %9 = fdiv double %8, 2.000000e+00\l %10 = fptrunc double %9 to float\l store float %10, float* %scevgep4\l %11 = srem i32 %tmp3, 1024\l %12 = add nsw i32 1, %11\l %13 = sitofp i32 %12 to double\l %14 = fdiv double %13, 2.000000e+00\l %15 = fptrunc double %14 to float\l store float %15, float* %scevgep\l br label %16\l}"];
- Node0x26ace70 -> Node0x26ace90;
- Node0x26ace90 [shape=record,label="{%16:\l\l %indvar.next = add i64 %indvar, 1\l br label %4\l}"];
- Node0x26ace90 -> Node0x26addc0[constraint=false];
- Node0x26ad010 [shape=record,label="{%17:\l\l br label %18\l}"];
- Node0x26ad010 -> Node0x26ad6c0;
- Node0x26ad6c0 [shape=record,label="{%18:\l\l %indvar.next2 = add i64 %2, 1\l br label %1\l}"];
- Node0x26ad6c0 -> Node0x26acdd0[constraint=false];
- Node0x26adce0 [shape=record,label="{%19:\l\l ret void\l}"];
+ Node0x17d4370 [shape=record,label="{entry:\l br label %for.cond\l}"];
+ Node0x17d4370 -> Node0x17da5d0;
+ Node0x17da5d0 [shape=record,label="{for.cond: \l %0 = phi i64 [ %indvar.next2, %for.inc17 ], [ 0, %entry ]\l %exitcond3 = icmp ne i64 %0, 1536\l br i1 %exitcond3, label %for.body, label %for.end19\l}"];
+ Node0x17da5d0 -> Node0x17da5f0;
+ Node0x17da5d0 -> Node0x17da650;
+ Node0x17da5f0 [shape=record,label="{for.body: \l br label %for.cond1\l}"];
+ Node0x17da5f0 -> Node0x17da900;
+ Node0x17da900 [shape=record,label="{for.cond1: \l %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %for.body ]\l %arrayidx6 = getelementptr [1536 x [1536 x float]]* @A, i64 0, i64 %0, i64 %indvar\l %arrayidx16 = getelementptr [1536 x [1536 x float]]* @B, i64 0, i64 %0, i64 %indvar\l %1 = mul i64 %0, %indvar\l %mul = trunc i64 %1 to i32\l %exitcond = icmp ne i64 %indvar, 1536\l br i1 %exitcond, label %for.body3, label %for.end\l}"];
+ Node0x17da900 -> Node0x17da670;
+ Node0x17da900 -> Node0x17da9a0;
+ Node0x17da670 [shape=record,label="{for.body3: \l %rem = srem i32 %mul, 1024\l %add = add nsw i32 1, %rem\l %conv = sitofp i32 %add to double\l %div = fdiv double %conv, 2.000000e+00\l %conv4 = fptrunc double %div to float\l store float %conv4, float* %arrayidx6, align 4\l %rem8 = srem i32 %mul, 1024\l %add9 = add nsw i32 1, %rem8\l %conv10 = sitofp i32 %add9 to double\l %div11 = fdiv double %conv10, 2.000000e+00\l %conv12 = fptrunc double %div11 to float\l store float %conv12, float* %arrayidx16, align 4\l br label %for.inc\l}"];
+ Node0x17da670 -> Node0x17da8e0;
+ Node0x17da8e0 [shape=record,label="{for.inc: \l %indvar.next = add i64 %indvar, 1\l br label %for.cond1\l}"];
+ Node0x17da8e0 -> Node0x17da900[constraint=false];
+ Node0x17da9a0 [shape=record,label="{for.end: \l br label %for.inc17\l}"];
+ Node0x17da9a0 -> Node0x17d9e70;
+ Node0x17d9e70 [shape=record,label="{for.inc17: \l %indvar.next2 = add i64 %0, 1\l br label %for.cond\l}"];
+ Node0x17d9e70 -> Node0x17da5d0[constraint=false];
+ Node0x17da650 [shape=record,label="{for.end19: \l ret void\l}"];
colorscheme = "paired12"
- subgraph cluster_0x26a94c0 {
+ subgraph cluster_0x17d3a30 {
label = "";
style = solid;
color = 1
- subgraph cluster_0x26aa4e0 {
+ subgraph cluster_0x17d4ec0 {
label = "";
style = filled;
- color = 3 subgraph cluster_0x26a9780 {
+ color = 3 subgraph cluster_0x17d4180 {
label = "";
style = solid;
color = 5
- Node0x26addc0;
- Node0x26ace70;
- Node0x26ace90;
+ Node0x17da900;
+ Node0x17da670;
+ Node0x17da8e0;
}
- Node0x26acdd0;
- Node0x26acdf0;
- Node0x26ad010;
- Node0x26ad6c0;
+ Node0x17da5d0;
+ Node0x17da5f0;
+ Node0x17da9a0;
+ Node0x17d9e70;
}
- Node0x26ade30;
- Node0x26adce0;
+ Node0x17d4370;
+ Node0x17da650;
}
}
diff --git a/polly/www/experiments/matmul/scops.init_array.dot.png b/polly/www/experiments/matmul/scops.init_array.dot.png
index ee04e8b7018..48a9f38946a 100644
--- a/polly/www/experiments/matmul/scops.init_array.dot.png
+++ b/polly/www/experiments/matmul/scops.init_array.dot.png
Binary files differ
diff --git a/polly/www/experiments/matmul/scops.main.dot b/polly/www/experiments/matmul/scops.main.dot
index 0459c48fb50..2a1fc50cf72 100644
--- a/polly/www/experiments/matmul/scops.main.dot
+++ b/polly/www/experiments/matmul/scops.main.dot
@@ -1,65 +1,65 @@
digraph "Scop Graph for 'main' function" {
label="Scop Graph for 'main' function";
- Node0x26ace10 [shape=record,label="{%0:\l\l call void @init_array()\l br label %1\l}"];
- Node0x26ace10 -> Node0x26acd60;
- Node0x26acd60 [shape=record,label="{%1:\l\l %indvar3 = phi i64 [ %indvar.next4, %16 ], [ 0, %0 ]\l %exitcond9 = icmp ne i64 %indvar3, 1536\l br i1 %exitcond9, label %2, label %17\l}"];
- Node0x26acd60 -> Node0x26acd80;
- Node0x26acd60 -> Node0x26af2e0;
- Node0x26acd80 [shape=record,label="{%2:\l\l br label %3\l}"];
- Node0x26acd80 -> Node0x26aee80;
- Node0x26aee80 [shape=record,label="{%3:\l\l %indvar1 = phi i64 [ %indvar.next2, %14 ], [ 0, %2 ]\l %scevgep8 = getelementptr [1536 x [1536 x float]]* @C, i64 0, i64 %indvar3, i64 %indvar1\l %exitcond6 = icmp ne i64 %indvar1, 1536\l br i1 %exitcond6, label %4, label %15\l}"];
- Node0x26aee80 -> Node0x26aeea0;
- Node0x26aee80 -> Node0x26aeec0;
- Node0x26aeea0 [shape=record,label="{%4:\l\l store float 0.000000e+00, float* %scevgep8\l br label %5\l}"];
- Node0x26aeea0 -> Node0x26aced0;
- Node0x26aced0 [shape=record,label="{%5:\l\l %indvar = phi i64 [ %indvar.next, %12 ], [ 0, %4 ]\l %scevgep5 = getelementptr [1536 x [1536 x float]]* @A, i64 0, i64 %indvar3, i64 %indvar\l %scevgep = getelementptr [1536 x [1536 x float]]* @B, i64 0, i64 %indvar, i64 %indvar1\l %exitcond = icmp ne i64 %indvar, 1536\l br i1 %exitcond, label %6, label %13\l}"];
- Node0x26aced0 -> Node0x26ace60;
- Node0x26aced0 -> Node0x26af5e0;
- Node0x26ace60 [shape=record,label="{%6:\l\l %7 = load float* %scevgep8\l %8 = load float* %scevgep5\l %9 = load float* %scevgep\l %10 = fmul float %8, %9\l %11 = fadd float %7, %10\l store float %11, float* %scevgep8\l br label %12\l}"];
- Node0x26ace60 -> Node0x26af640;
- Node0x26af640 [shape=record,label="{%12:\l\l %indvar.next = add i64 %indvar, 1\l br label %5\l}"];
- Node0x26af640 -> Node0x26aced0[constraint=false];
- Node0x26af5e0 [shape=record,label="{%13:\l\l br label %14\l}"];
- Node0x26af5e0 -> Node0x26af6e0;
- Node0x26af6e0 [shape=record,label="{%14:\l\l %indvar.next2 = add i64 %indvar1, 1\l br label %3\l}"];
- Node0x26af6e0 -> Node0x26aee80[constraint=false];
- Node0x26aeec0 [shape=record,label="{%15:\l\l br label %16\l}"];
- Node0x26aeec0 -> Node0x26af740;
- Node0x26af740 [shape=record,label="{%16:\l\l %indvar.next4 = add i64 %indvar3, 1\l br label %1\l}"];
- Node0x26af740 -> Node0x26acd60[constraint=false];
- Node0x26af2e0 [shape=record,label="{%17:\l\l ret i32 0\l}"];
+ Node0x17d21a0 [shape=record,label="{entry:\l call void @init_array()\l br label %for.cond\l}"];
+ Node0x17d21a0 -> Node0x17d2020;
+ Node0x17d2020 [shape=record,label="{for.cond: \l %indvar3 = phi i64 [ %indvar.next4, %for.inc28 ], [ 0, %entry ]\l %exitcond6 = icmp ne i64 %indvar3, 1536\l br i1 %exitcond6, label %for.body, label %for.end30\l}"];
+ Node0x17d2020 -> Node0x17d3950;
+ Node0x17d2020 -> Node0x17da500;
+ Node0x17d3950 [shape=record,label="{for.body: \l br label %for.cond1\l}"];
+ Node0x17d3950 -> Node0x17da760;
+ Node0x17da760 [shape=record,label="{for.cond1: \l %indvar1 = phi i64 [ %indvar.next2, %for.inc25 ], [ 0, %for.body ]\l %arrayidx5 = getelementptr [1536 x [1536 x float]]* @C, i64 0, i64 %indvar3, i64 %indvar1\l %exitcond5 = icmp ne i64 %indvar1, 1536\l br i1 %exitcond5, label %for.body3, label %for.end27\l}"];
+ Node0x17da760 -> Node0x17db1e0;
+ Node0x17da760 -> Node0x17db250;
+ Node0x17db1e0 [shape=record,label="{for.body3: \l store float 0.000000e+00, float* %arrayidx5, align 4\l br label %for.cond6\l}"];
+ Node0x17db1e0 -> Node0x17da740;
+ Node0x17da740 [shape=record,label="{for.cond6: \l %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %for.body3 ]\l %arrayidx16 = getelementptr [1536 x [1536 x float]]* @A, i64 0, i64 %indvar3, i64 %indvar\l %arrayidx20 = getelementptr [1536 x [1536 x float]]* @B, i64 0, i64 %indvar, i64 %indvar1\l %exitcond = icmp ne i64 %indvar, 1536\l br i1 %exitcond, label %for.body8, label %for.end\l}"];
+ Node0x17da740 -> Node0x17da5a0;
+ Node0x17da740 -> Node0x17da800;
+ Node0x17da5a0 [shape=record,label="{for.body8: \l %0 = load float* %arrayidx5, align 4\l %1 = load float* %arrayidx16, align 4\l %2 = load float* %arrayidx20, align 4\l %mul = fmul float %1, %2\l %add = fadd float %0, %mul\l store float %add, float* %arrayidx5, align 4\l br label %for.inc\l}"];
+ Node0x17da5a0 -> Node0x17da5c0;
+ Node0x17da5c0 [shape=record,label="{for.inc: \l %indvar.next = add i64 %indvar, 1\l br label %for.cond6\l}"];
+ Node0x17da5c0 -> Node0x17da740[constraint=false];
+ Node0x17da800 [shape=record,label="{for.end: \l br label %for.inc25\l}"];
+ Node0x17da800 -> Node0x17dae20;
+ Node0x17dae20 [shape=record,label="{for.inc25: \l %indvar.next2 = add i64 %indvar1, 1\l br label %for.cond1\l}"];
+ Node0x17dae20 -> Node0x17da760[constraint=false];
+ Node0x17db250 [shape=record,label="{for.end27: \l br label %for.inc28\l}"];
+ Node0x17db250 -> Node0x17dae80;
+ Node0x17dae80 [shape=record,label="{for.inc28: \l %indvar.next4 = add i64 %indvar3, 1\l br label %for.cond\l}"];
+ Node0x17dae80 -> Node0x17d2020[constraint=false];
+ Node0x17da500 [shape=record,label="{for.end30: \l ret i32 0\l}"];
colorscheme = "paired12"
- subgraph cluster_0x26a8b20 {
+ subgraph cluster_0x17d3f30 {
label = "";
style = solid;
color = 1
- subgraph cluster_0x26a9220 {
+ subgraph cluster_0x17d38d0 {
label = "";
style = filled;
- color = 3 subgraph cluster_0x26ad500 {
+ color = 3 subgraph cluster_0x17d3850 {
label = "";
style = solid;
color = 5
- subgraph cluster_0x26ad480 {
+ subgraph cluster_0x17d37d0 {
label = "";
style = solid;
color = 7
- Node0x26aced0;
- Node0x26ace60;
- Node0x26af640;
+ Node0x17da740;
+ Node0x17da5a0;
+ Node0x17da5c0;
}
- Node0x26aee80;
- Node0x26aeea0;
- Node0x26af5e0;
- Node0x26af6e0;
+ Node0x17da760;
+ Node0x17db1e0;
+ Node0x17da800;
+ Node0x17dae20;
}
- Node0x26acd60;
- Node0x26acd80;
- Node0x26aeec0;
- Node0x26af740;
+ Node0x17d2020;
+ Node0x17d3950;
+ Node0x17db250;
+ Node0x17dae80;
}
- Node0x26ace10;
- Node0x26af2e0;
+ Node0x17d21a0;
+ Node0x17da500;
}
}
diff --git a/polly/www/experiments/matmul/scops.main.dot.png b/polly/www/experiments/matmul/scops.main.dot.png
index 404d5f19f38..4e73701a08d 100644
--- a/polly/www/experiments/matmul/scops.main.dot.png
+++ b/polly/www/experiments/matmul/scops.main.dot.png
Binary files differ
diff --git a/polly/www/experiments/matmul/scops.print_array.dot b/polly/www/experiments/matmul/scops.print_array.dot
index 6aafb40d666..59f02e76910 100644
--- a/polly/www/experiments/matmul/scops.print_array.dot
+++ b/polly/www/experiments/matmul/scops.print_array.dot
@@ -1,60 +1,60 @@
digraph "Scop Graph for 'print_array' function" {
label="Scop Graph for 'print_array' function";
- Node0x26ac9a0 [shape=record,label="{%0:\l\l br label %1\l}"];
- Node0x26ac9a0 -> Node0x26acd00;
- Node0x26acd00 [shape=record,label="{%1:\l\l %indvar1 = phi i64 [ %indvar.next2, %19 ], [ 0, %0 ]\l %exitcond3 = icmp ne i64 %indvar1, 1536\l br i1 %exitcond3, label %2, label %20\l}"];
- Node0x26acd00 -> Node0x26a8ac0;
- Node0x26acd00 -> Node0x26ac9c0;
- Node0x26a8ac0 [shape=record,label="{%2:\l\l br label %3\l}"];
- Node0x26a8ac0 -> Node0x26ad940;
- Node0x26ad940 [shape=record,label="{%3:\l\l %indvar = phi i64 [ %indvar.next, %15 ], [ 0, %2 ]\l %scevgep = getelementptr [1536 x [1536 x float]]* @C, i64 0, i64 %indvar1, i64 %indvar\l %j.0 = trunc i64 %indvar to i32\l %exitcond = icmp ne i64 %indvar, 1536\l br i1 %exitcond, label %4, label %16\l}"];
- Node0x26ad940 -> Node0x26acde0;
- Node0x26ad940 -> Node0x26ad9e0;
- Node0x26acde0 [shape=record,label="{%4:\l\l %5 = load %struct._IO_FILE** @stdout, align 8\l %6 = load float* %scevgep\l %7 = fpext float %6 to double\l %8 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %5, i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), double %7)\l %9 = srem i32 %j.0, 80\l %10 = icmp eq i32 %9, 79\l br i1 %10, label %11, label %14\l}"];
- Node0x26acde0 -> Node0x26ada40;
- Node0x26acde0 -> Node0x26acfa0;
- Node0x26ada40 [shape=record,label="{%11:\l\l %12 = load %struct._IO_FILE** @stdout, align 8\l %13 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %12, i8* getelementptr inbounds ([2 x i8]* @.str1, i32 0, i32 0))\l br label %14\l}"];
- Node0x26ada40 -> Node0x26acfa0;
- Node0x26acfa0 [shape=record,label="{%14:\l\l br label %15\l}"];
- Node0x26acfa0 -> Node0x26ad6c0;
- Node0x26ad6c0 [shape=record,label="{%15:\l\l %indvar.next = add i64 %indvar, 1\l br label %3\l}"];
- Node0x26ad6c0 -> Node0x26ad940[constraint=false];
- Node0x26ad9e0 [shape=record,label="{%16:\l\l %17 = load %struct._IO_FILE** @stdout, align 8\l %18 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %17, i8* getelementptr inbounds ([2 x i8]* @.str1, i32 0, i32 0))\l br label %19\l}"];
- Node0x26ad9e0 -> Node0x26ace00;
- Node0x26ace00 [shape=record,label="{%19:\l\l %indvar.next2 = add i64 %indvar1, 1\l br label %1\l}"];
- Node0x26ace00 -> Node0x26acd00[constraint=false];
- Node0x26ac9c0 [shape=record,label="{%20:\l\l ret void\l}"];
+ Node0x17d2200 [shape=record,label="{entry:\l br label %for.cond\l}"];
+ Node0x17d2200 -> Node0x17d4f20;
+ Node0x17d4f20 [shape=record,label="{for.cond: \l %indvar1 = phi i64 [ %indvar.next2, %for.inc10 ], [ 0, %entry ]\l %exitcond3 = icmp ne i64 %indvar1, 1536\l br i1 %exitcond3, label %for.body, label %for.end12\l}"];
+ Node0x17d4f20 -> Node0x17d3680;
+ Node0x17d4f20 -> Node0x17d9fc0;
+ Node0x17d3680 [shape=record,label="{for.body: \l br label %for.cond1\l}"];
+ Node0x17d3680 -> Node0x17da220;
+ Node0x17da220 [shape=record,label="{for.cond1: \l %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %for.body ]\l %arrayidx5 = getelementptr [1536 x [1536 x float]]* @C, i64 0, i64 %indvar1, i64 %indvar\l %j.0 = trunc i64 %indvar to i32\l %exitcond = icmp ne i64 %indvar, 1536\l br i1 %exitcond, label %for.body3, label %for.end\l}"];
+ Node0x17da220 -> Node0x17d9ea0;
+ Node0x17da220 -> Node0x17da0f0;
+ Node0x17d9ea0 [shape=record,label="{for.body3: \l %0 = load %struct._IO_FILE** @stdout, align 8\l %1 = load float* %arrayidx5, align 4\l %conv = fpext float %1 to double\l %call = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %0, i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), double %conv)\l %rem = srem i32 %j.0, 80\l %cmp6 = icmp eq i32 %rem, 79\l br i1 %cmp6, label %if.then, label %if.end\l}"];
+ Node0x17d9ea0 -> Node0x17d9ec0;
+ Node0x17d9ea0 -> Node0x17da060;
+ Node0x17d9ec0 [shape=record,label="{if.then: \l %2 = load %struct._IO_FILE** @stdout, align 8\l %call8 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %2, i8* getelementptr inbounds ([2 x i8]* @.str1, i32 0, i32 0))\l br label %if.end\l}"];
+ Node0x17d9ec0 -> Node0x17da060;
+ Node0x17da060 [shape=record,label="{if.end: \l br label %for.inc\l}"];
+ Node0x17da060 -> Node0x17da200;
+ Node0x17da200 [shape=record,label="{for.inc: \l %indvar.next = add i64 %indvar, 1\l br label %for.cond1\l}"];
+ Node0x17da200 -> Node0x17da220[constraint=false];
+ Node0x17da0f0 [shape=record,label="{for.end: \l %3 = load %struct._IO_FILE** @stdout, align 8\l %call9 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %3, i8* getelementptr inbounds ([2 x i8]* @.str1, i32 0, i32 0))\l br label %for.inc10\l}"];
+ Node0x17da0f0 -> Node0x17da080;
+ Node0x17da080 [shape=record,label="{for.inc10: \l %indvar.next2 = add i64 %indvar1, 1\l br label %for.cond\l}"];
+ Node0x17da080 -> Node0x17d4f20[constraint=false];
+ Node0x17d9fc0 [shape=record,label="{for.end12: \l ret void\l}"];
colorscheme = "paired12"
- subgraph cluster_0x26adae0 {
+ subgraph cluster_0x17d38f0 {
label = "";
style = solid;
color = 1
- subgraph cluster_0x26aa030 {
- label = "";
+ subgraph cluster_0x17d4030 {
+ label = "Non affine branch in BB 'for.body3' with LHS: %rem and RHS: 79";
style = solid;
color = 6
- subgraph cluster_0x26a9fb0 {
- label = "";
+ subgraph cluster_0x17d3fb0 {
+ label = "Non affine branch in BB 'for.body3' with LHS: %rem and RHS: 79";
style = solid;
color = 5
- subgraph cluster_0x26adb60 {
- label = "";
+ subgraph cluster_0x17d3f30 {
+ label = "Non affine branch in BB 'for.body3' with LHS: %rem and RHS: 79";
style = solid;
color = 7
- Node0x26acde0;
- Node0x26ada40;
+ Node0x17d9ea0;
+ Node0x17d9ec0;
}
- Node0x26ad940;
- Node0x26acfa0;
- Node0x26ad6c0;
+ Node0x17da220;
+ Node0x17da060;
+ Node0x17da200;
}
- Node0x26acd00;
- Node0x26a8ac0;
- Node0x26ad9e0;
- Node0x26ace00;
+ Node0x17d4f20;
+ Node0x17d3680;
+ Node0x17da0f0;
+ Node0x17da080;
}
- Node0x26ac9a0;
- Node0x26ac9c0;
+ Node0x17d2200;
+ Node0x17d9fc0;
}
}
diff --git a/polly/www/experiments/matmul/scops.print_array.dot.png b/polly/www/experiments/matmul/scops.print_array.dot.png
index 5b1658a291f..e3b973b131a 100644
--- a/polly/www/experiments/matmul/scops.print_array.dot.png
+++ b/polly/www/experiments/matmul/scops.print_array.dot.png
Binary files differ
diff --git a/polly/www/experiments/matmul/scopsonly.init_array.dot b/polly/www/experiments/matmul/scopsonly.init_array.dot
index 7ef7b1397a5..4685c5b0af5 100644
--- a/polly/www/experiments/matmul/scopsonly.init_array.dot
+++ b/polly/www/experiments/matmul/scopsonly.init_array.dot
@@ -1,47 +1,47 @@
digraph "Scop Graph for 'init_array' function" {
label="Scop Graph for 'init_array' function";
- Node0x24dfca0 [shape=record,label="{%0}"];
- Node0x24dfca0 -> Node0x24dfdf0;
- Node0x24dfdf0 [shape=record,label="{%1}"];
- Node0x24dfdf0 -> Node0x24dee50;
- Node0x24dfdf0 -> Node0x24def50;
- Node0x24dee50 [shape=record,label="{%3}"];
- Node0x24dee50 -> Node0x24deec0;
- Node0x24deec0 [shape=record,label="{%4}"];
- Node0x24deec0 -> Node0x24dfdc0;
- Node0x24deec0 -> Node0x24df0c0;
- Node0x24dfdc0 [shape=record,label="{%5}"];
- Node0x24dfdc0 -> Node0x24defb0;
- Node0x24defb0 [shape=record,label="{%16}"];
- Node0x24defb0 -> Node0x24deec0[constraint=false];
- Node0x24df0c0 [shape=record,label="{%17}"];
- Node0x24df0c0 -> Node0x24deee0;
- Node0x24deee0 [shape=record,label="{%18}"];
- Node0x24deee0 -> Node0x24dfdf0[constraint=false];
- Node0x24def50 [shape=record,label="{%19}"];
+ Node0x17d4370 [shape=record,label="{entry}"];
+ Node0x17d4370 -> Node0x17d9de0;
+ Node0x17d9de0 [shape=record,label="{for.cond}"];
+ Node0x17d9de0 -> Node0x17d9e40;
+ Node0x17d9de0 -> Node0x17d9ea0;
+ Node0x17d9e40 [shape=record,label="{for.body}"];
+ Node0x17d9e40 -> Node0x17d9f90;
+ Node0x17d9f90 [shape=record,label="{for.cond1}"];
+ Node0x17d9f90 -> Node0x17d9ff0;
+ Node0x17d9f90 -> Node0x17da050;
+ Node0x17d9ff0 [shape=record,label="{for.body3}"];
+ Node0x17d9ff0 -> Node0x17d9f00;
+ Node0x17d9f00 [shape=record,label="{for.inc}"];
+ Node0x17d9f00 -> Node0x17d9f90[constraint=false];
+ Node0x17da050 [shape=record,label="{for.end}"];
+ Node0x17da050 -> Node0x17da200;
+ Node0x17da200 [shape=record,label="{for.inc17}"];
+ Node0x17da200 -> Node0x17d9de0[constraint=false];
+ Node0x17d9ea0 [shape=record,label="{for.end19}"];
colorscheme = "paired12"
- subgraph cluster_0x24db4c0 {
+ subgraph cluster_0x17d3a30 {
label = "";
style = solid;
color = 1
- subgraph cluster_0x24dc4e0 {
+ subgraph cluster_0x17d4ec0 {
label = "";
style = filled;
- color = 3 subgraph cluster_0x24db780 {
+ color = 3 subgraph cluster_0x17d4180 {
label = "";
style = solid;
color = 5
- Node0x24deec0;
- Node0x24dfdc0;
- Node0x24defb0;
+ Node0x17d9f90;
+ Node0x17d9ff0;
+ Node0x17d9f00;
}
- Node0x24dfdf0;
- Node0x24dee50;
- Node0x24df0c0;
- Node0x24deee0;
+ Node0x17d9de0;
+ Node0x17d9e40;
+ Node0x17da050;
+ Node0x17da200;
}
- Node0x24dfca0;
- Node0x24def50;
+ Node0x17d4370;
+ Node0x17d9ea0;
}
}
diff --git a/polly/www/experiments/matmul/scopsonly.init_array.dot.png b/polly/www/experiments/matmul/scopsonly.init_array.dot.png
index 92c4f9882bd..f101d4d3081 100644
--- a/polly/www/experiments/matmul/scopsonly.init_array.dot.png
+++ b/polly/www/experiments/matmul/scopsonly.init_array.dot.png
Binary files differ
diff --git a/polly/www/experiments/matmul/scopsonly.main.dot b/polly/www/experiments/matmul/scopsonly.main.dot
index d375349730a..3b3f3288019 100644
--- a/polly/www/experiments/matmul/scopsonly.main.dot
+++ b/polly/www/experiments/matmul/scopsonly.main.dot
@@ -1,65 +1,65 @@
digraph "Scop Graph for 'main' function" {
label="Scop Graph for 'main' function";
- Node0x24deb60 [shape=record,label="{%0}"];
- Node0x24deb60 -> Node0x24deaa0;
- Node0x24deaa0 [shape=record,label="{%1}"];
- Node0x24deaa0 -> Node0x24e12a0;
- Node0x24deaa0 -> Node0x24e0e30;
- Node0x24e12a0 [shape=record,label="{%2}"];
- Node0x24e12a0 -> Node0x24e0e00;
- Node0x24e0e00 [shape=record,label="{%3}"];
- Node0x24e0e00 -> Node0x24e1410;
- Node0x24e0e00 -> Node0x24e1470;
- Node0x24e1410 [shape=record,label="{%4}"];
- Node0x24e1410 -> Node0x24e1380;
- Node0x24e1380 [shape=record,label="{%5}"];
- Node0x24e1380 -> Node0x24deaf0;
- Node0x24e1380 -> Node0x24e1620;
- Node0x24deaf0 [shape=record,label="{%6}"];
- Node0x24deaf0 -> Node0x24e1680;
- Node0x24e1680 [shape=record,label="{%12}"];
- Node0x24e1680 -> Node0x24e1380[constraint=false];
- Node0x24e1620 [shape=record,label="{%13}"];
- Node0x24e1620 -> Node0x24e16e0;
- Node0x24e16e0 [shape=record,label="{%14}"];
- Node0x24e16e0 -> Node0x24e0e00[constraint=false];
- Node0x24e1470 [shape=record,label="{%15}"];
- Node0x24e1470 -> Node0x24e01a0;
- Node0x24e01a0 [shape=record,label="{%16}"];
- Node0x24e01a0 -> Node0x24deaa0[constraint=false];
- Node0x24e0e30 [shape=record,label="{%17}"];
+ Node0x17d3950 [shape=record,label="{entry}"];
+ Node0x17d3950 -> Node0x17d21a0;
+ Node0x17d21a0 [shape=record,label="{for.cond}"];
+ Node0x17d21a0 -> Node0x17db9a0;
+ Node0x17d21a0 -> Node0x17da4f0;
+ Node0x17db9a0 [shape=record,label="{for.body}"];
+ Node0x17db9a0 -> Node0x17da5e0;
+ Node0x17da5e0 [shape=record,label="{for.cond1}"];
+ Node0x17da5e0 -> Node0x17da640;
+ Node0x17da5e0 -> Node0x17da6a0;
+ Node0x17da640 [shape=record,label="{for.body3}"];
+ Node0x17da640 -> Node0x17da550;
+ Node0x17da550 [shape=record,label="{for.cond6}"];
+ Node0x17da550 -> Node0x17da5b0;
+ Node0x17da550 -> Node0x17da850;
+ Node0x17da5b0 [shape=record,label="{for.body8}"];
+ Node0x17da5b0 -> Node0x17da8b0;
+ Node0x17da8b0 [shape=record,label="{for.inc}"];
+ Node0x17da8b0 -> Node0x17da550[constraint=false];
+ Node0x17da850 [shape=record,label="{for.end}"];
+ Node0x17da850 -> Node0x17db930;
+ Node0x17db930 [shape=record,label="{for.inc25}"];
+ Node0x17db930 -> Node0x17da5e0[constraint=false];
+ Node0x17da6a0 [shape=record,label="{for.end27}"];
+ Node0x17da6a0 -> Node0x17dada0;
+ Node0x17dada0 [shape=record,label="{for.inc28}"];
+ Node0x17dada0 -> Node0x17d21a0[constraint=false];
+ Node0x17da4f0 [shape=record,label="{for.end30}"];
colorscheme = "paired12"
- subgraph cluster_0x24dfc10 {
+ subgraph cluster_0x17d3f30 {
label = "";
style = solid;
color = 1
- subgraph cluster_0x24de570 {
+ subgraph cluster_0x17d38d0 {
label = "";
style = filled;
- color = 3 subgraph cluster_0x24de7a0 {
+ color = 3 subgraph cluster_0x17d3850 {
label = "";
style = solid;
color = 5
- subgraph cluster_0x24de720 {
+ subgraph cluster_0x17d37d0 {
label = "";
style = solid;
color = 7
- Node0x24e1380;
- Node0x24deaf0;
- Node0x24e1680;
+ Node0x17da550;
+ Node0x17da5b0;
+ Node0x17da8b0;
}
- Node0x24e0e00;
- Node0x24e1410;
- Node0x24e1620;
- Node0x24e16e0;
+ Node0x17da5e0;
+ Node0x17da640;
+ Node0x17da850;
+ Node0x17db930;
}
- Node0x24deaa0;
- Node0x24e12a0;
- Node0x24e1470;
- Node0x24e01a0;
+ Node0x17d21a0;
+ Node0x17db9a0;
+ Node0x17da6a0;
+ Node0x17dada0;
}
- Node0x24deb60;
- Node0x24e0e30;
+ Node0x17d3950;
+ Node0x17da4f0;
}
}
diff --git a/polly/www/experiments/matmul/scopsonly.main.dot.png b/polly/www/experiments/matmul/scopsonly.main.dot.png
index f0cf154bc79..32634243888 100644
--- a/polly/www/experiments/matmul/scopsonly.main.dot.png
+++ b/polly/www/experiments/matmul/scopsonly.main.dot.png
Binary files differ
diff --git a/polly/www/experiments/matmul/scopsonly.print_array.dot b/polly/www/experiments/matmul/scopsonly.print_array.dot
index 7c46729e31d..c900d0f1be1 100644
--- a/polly/www/experiments/matmul/scopsonly.print_array.dot
+++ b/polly/www/experiments/matmul/scopsonly.print_array.dot
@@ -1,60 +1,60 @@
digraph "Scop Graph for 'print_array' function" {
label="Scop Graph for 'print_array' function";
- Node0x24df2c0 [shape=record,label="{%0}"];
- Node0x24df2c0 -> Node0x24df2a0;
- Node0x24df2a0 [shape=record,label="{%1}"];
- Node0x24df2a0 -> Node0x24dee90;
- Node0x24df2a0 -> Node0x24dee20;
- Node0x24dee90 [shape=record,label="{%2}"];
- Node0x24dee90 -> Node0x24debd0;
- Node0x24debd0 [shape=record,label="{%3}"];
- Node0x24debd0 -> Node0x24df150;
- Node0x24debd0 -> Node0x24de990;
- Node0x24df150 [shape=record,label="{%4}"];
- Node0x24df150 -> Node0x24df3a0;
- Node0x24df150 -> Node0x24defb0;
- Node0x24df3a0 [shape=record,label="{%11}"];
- Node0x24df3a0 -> Node0x24defb0;
- Node0x24defb0 [shape=record,label="{%14}"];
- Node0x24defb0 -> Node0x24df530;
- Node0x24df530 [shape=record,label="{%15}"];
- Node0x24df530 -> Node0x24debd0[constraint=false];
- Node0x24de990 [shape=record,label="{%16}"];
- Node0x24de990 -> Node0x24df9a0;
- Node0x24df9a0 [shape=record,label="{%19}"];
- Node0x24df9a0 -> Node0x24df2a0[constraint=false];
- Node0x24dee20 [shape=record,label="{%20}"];
+ Node0x17d2200 [shape=record,label="{entry}"];
+ Node0x17d2200 -> Node0x17d4f20;
+ Node0x17d4f20 [shape=record,label="{for.cond}"];
+ Node0x17d4f20 -> Node0x17d9fd0;
+ Node0x17d4f20 -> Node0x17da030;
+ Node0x17d9fd0 [shape=record,label="{for.body}"];
+ Node0x17d9fd0 -> Node0x17da120;
+ Node0x17da120 [shape=record,label="{for.cond1}"];
+ Node0x17da120 -> Node0x17da180;
+ Node0x17da120 -> Node0x17da1e0;
+ Node0x17da180 [shape=record,label="{for.body3}"];
+ Node0x17da180 -> Node0x17da090;
+ Node0x17da180 -> Node0x17da0f0;
+ Node0x17da090 [shape=record,label="{if.then}"];
+ Node0x17da090 -> Node0x17da0f0;
+ Node0x17da0f0 [shape=record,label="{if.end}"];
+ Node0x17da0f0 -> Node0x17da390;
+ Node0x17da390 [shape=record,label="{for.inc}"];
+ Node0x17da390 -> Node0x17da120[constraint=false];
+ Node0x17da1e0 [shape=record,label="{for.end}"];
+ Node0x17da1e0 -> Node0x17d9e40;
+ Node0x17d9e40 [shape=record,label="{for.inc10}"];
+ Node0x17d9e40 -> Node0x17d4f20[constraint=false];
+ Node0x17da030 [shape=record,label="{for.end12}"];
colorscheme = "paired12"
- subgraph cluster_0x24dbe40 {
+ subgraph cluster_0x17d38f0 {
label = "";
style = solid;
color = 1
- subgraph cluster_0x24db6e0 {
- label = "";
+ subgraph cluster_0x17d4030 {
+ label = "Non affine branch in BB 'for.body3' with LHS: %rem and RHS: 79";
style = solid;
color = 6
- subgraph cluster_0x24db660 {
- label = "";
+ subgraph cluster_0x17d3fb0 {
+ label = "Non affine branch in BB 'for.body3' with LHS: %rem and RHS: 79";
style = solid;
color = 5
- subgraph cluster_0x24db5e0 {
- label = "";
+ subgraph cluster_0x17d3f30 {
+ label = "Non affine branch in BB 'for.body3' with LHS: %rem and RHS: 79";
style = solid;
color = 7
- Node0x24df150;
- Node0x24df3a0;
+ Node0x17da180;
+ Node0x17da090;
}
- Node0x24debd0;
- Node0x24defb0;
- Node0x24df530;
+ Node0x17da120;
+ Node0x17da0f0;
+ Node0x17da390;
}
- Node0x24df2a0;
- Node0x24dee90;
- Node0x24de990;
- Node0x24df9a0;
+ Node0x17d4f20;
+ Node0x17d9fd0;
+ Node0x17da1e0;
+ Node0x17d9e40;
}
- Node0x24df2c0;
- Node0x24dee20;
+ Node0x17d2200;
+ Node0x17da030;
}
}
diff --git a/polly/www/experiments/matmul/scopsonly.print_array.dot.png b/polly/www/experiments/matmul/scopsonly.print_array.dot.png
index 3426e7b06fb..b0d4b45aace 100644
--- a/polly/www/experiments/matmul/scopsonly.print_array.dot.png
+++ b/polly/www/experiments/matmul/scopsonly.print_array.dot.png
Binary files differ
OpenPOWER on IntegriCloud