diff options
Diffstat (limited to 'polly/www/experiments/matmul/matmul.polly.interchanged+tiled+vector+openmp.s')
| -rw-r--r-- | polly/www/experiments/matmul/matmul.polly.interchanged+tiled+vector+openmp.s | 628 |
1 files changed, 628 insertions, 0 deletions
diff --git a/polly/www/experiments/matmul/matmul.polly.interchanged+tiled+vector+openmp.s b/polly/www/experiments/matmul/matmul.polly.interchanged+tiled+vector+openmp.s new file mode 100644 index 00000000000..04dc0656c06 --- /dev/null +++ b/polly/www/experiments/matmul/matmul.polly.interchanged+tiled+vector+openmp.s @@ -0,0 +1,628 @@ + .file "matmul.polly.interchanged+tiled+vector+openmp.ll" + .text + .globl init_array + .align 16, 0x90 + .type init_array,@function +init_array: # @init_array +# BB#0: # %pollyBB + pushq %rbx + subq $16, %rsp + movq $A, (%rsp) + movq $B, 8(%rsp) + movl $init_array.omp_subfn, %edi + leaq (%rsp), %rbx + xorl %edx, %edx + xorl %ecx, %ecx + movl $1536, %r8d # imm = 0x600 + movl $1, %r9d + movq %rbx, %rsi + callq GOMP_parallel_loop_runtime_start + movq %rbx, %rdi + callq init_array.omp_subfn + callq GOMP_parallel_end + addq $16, %rsp + popq %rbx + ret +.Ltmp0: + .size init_array, .Ltmp0-init_array + + .globl print_array + .align 16, 0x90 + .type print_array,@function +print_array: # @print_array +# BB#0: + pushq %r14 + pushq %rbx + pushq %rax + movq $-9437184, %rbx # imm = 0xFFFFFFFFFF700000 + .align 16, 0x90 +.LBB1_1: # %.preheader + # =>This Loop Header: Depth=1 + # Child Loop BB1_2 Depth 2 + xorl %r14d, %r14d + movq stdout(%rip), %rdi + .align 16, 0x90 +.LBB1_2: # Parent Loop BB1_1 Depth=1 + # => This Inner Loop Header: Depth=2 + movss C+9437184(%rbx,%r14,4), %xmm0 + cvtss2sd %xmm0, %xmm0 + movl $.L.str, %esi + movb $1, %al + callq fprintf + movslq %r14d, %rax + imulq $1717986919, %rax, %rcx # imm = 0x66666667 + movq %rcx, %rdx + shrq $63, %rdx + sarq $37, %rcx + addl %edx, %ecx + imull $80, %ecx, %ecx + subl %ecx, %eax + cmpl $79, %eax + jne .LBB1_4 +# BB#3: # in Loop: Header=BB1_2 Depth=2 + movq stdout(%rip), %rsi + movl $10, %edi + callq fputc +.LBB1_4: # in Loop: Header=BB1_2 Depth=2 + incq %r14 + movq stdout(%rip), %rsi + cmpq $1536, %r14 # imm = 0x600 + movq %rsi, %rdi + jne .LBB1_2 +# BB#5: # in Loop: Header=BB1_1 Depth=1 + movl $10, %edi + callq fputc + addq $6144, %rbx # imm = 0x1800 + jne .LBB1_1 +# BB#6: + addq $8, %rsp + popq %rbx + popq %r14 + ret +.Ltmp1: + .size print_array, .Ltmp1-print_array + + .globl main + .align 16, 0x90 + .type main,@function +main: # @main +# BB#0: # %pollyBB + pushq %rbp + movq %rsp, %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + subq $56, %rsp + movq $A, -72(%rbp) + movq $B, -64(%rbp) + movl $init_array.omp_subfn, %edi + leaq -72(%rbp), %rbx + movq %rbx, %rsi + xorl %edx, %edx + xorl %ecx, %ecx + movl $1536, %r8d # imm = 0x600 + movl $1, %r9d + callq GOMP_parallel_loop_runtime_start + movq %rbx, %rdi + callq init_array.omp_subfn + callq GOMP_parallel_end + movl $main.omp_subfn, %edi + leaq -96(%rbp), %rsi + movq $C, -96(%rbp) + movq $A, -88(%rbp) + movq $B, -80(%rbp) + xorl %edx, %edx + xorl %ecx, %ecx + movl $1536, %r8d # imm = 0x600 + movl $1, %r9d + callq GOMP_parallel_loop_runtime_start + leaq -48(%rbp), %rdi + leaq -56(%rbp), %rsi + callq GOMP_loop_runtime_next + testb $1, %al + je .LBB2_6 +# BB#1: + leaq -48(%rbp), %rbx + leaq -56(%rbp), %r14 + .align 16, 0x90 +.LBB2_3: # %omp.loadIVBounds.i + # =>This Loop Header: Depth=1 + # Child Loop BB2_5 Depth 2 + movq -56(%rbp), %r15 + decq %r15 + movq -48(%rbp), %r12 + cmpq %r15, %r12 + jg .LBB2_2 +# BB#4: # %polly.loop_header2.preheader.lr.ph.i + # in Loop: Header=BB2_3 Depth=1 + leaq (%r12,%r12,2), %rax + shlq $11, %rax + leaq C(%rax), %r13 + .align 16, 0x90 +.LBB2_5: # %polly.loop_header2.preheader.i + # Parent Loop BB2_3 Depth=1 + # => This Inner Loop Header: Depth=2 + movq %r13, %rdi + xorl %esi, %esi + movl $6144, %edx # imm = 0x1800 + callq memset + addq $6144, %r13 # imm = 0x1800 + incq %r12 + cmpq %r15, %r12 + jle .LBB2_5 +.LBB2_2: # %omp.checkNext.loopexit.i + # in Loop: Header=BB2_3 Depth=1 + movq %rbx, %rdi + movq %r14, %rsi + callq GOMP_loop_runtime_next + testb $1, %al + jne .LBB2_3 +.LBB2_6: # %main.omp_subfn.exit + callq GOMP_loop_end_nowait + callq GOMP_parallel_end + movq %rsp, %rax + leaq -32(%rax), %rbx + movl $main.omp_subfn1, %edi + xorl %ecx, %ecx + movl $1536, %r8d # imm = 0x600 + movl $64, %r9d + movq %rbx, %rsp + movq $C, -32(%rax) + movq $A, -24(%rax) + movq $B, -16(%rax) + movq %rbx, %rsi + xorl %edx, %edx + callq GOMP_parallel_loop_runtime_start + movq %rbx, %rdi + callq main.omp_subfn1 + callq GOMP_parallel_end + xorl %eax, %eax + leaq -40(%rbp), %rsp + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + ret +.Ltmp2: + .size main, .Ltmp2-main + + .section .rodata.cst8,"aM",@progbits,8 + .align 8 +.LCPI3_0: + .quad 4602678819172646912 # double 5.000000e-01 + .text + .align 16, 0x90 + .type init_array.omp_subfn,@function +init_array.omp_subfn: # @init_array.omp_subfn +.Leh_func_begin3: +.Ltmp6: + .cfi_startproc +# BB#0: # %omp.setup + pushq %r14 +.Ltmp7: + .cfi_def_cfa_offset 16 + pushq %rbx +.Ltmp8: + .cfi_def_cfa_offset 24 + subq $24, %rsp +.Ltmp9: + .cfi_def_cfa_offset 48 +.Ltmp10: + .cfi_offset 3, -24 +.Ltmp11: + .cfi_offset 14, -16 + leaq 16(%rsp), %rdi + leaq 8(%rsp), %rsi + callq GOMP_loop_runtime_next + testb $1, %al + je .LBB3_2 +# BB#1: + leaq 16(%rsp), %rbx + leaq 8(%rsp), %r14 + jmp .LBB3_4 +.LBB3_2: # %omp.exit + callq GOMP_loop_end_nowait + addq $24, %rsp + popq %rbx + popq %r14 + ret + .align 16, 0x90 +.LBB3_3: # %omp.checkNext.loopexit + # in Loop: Header=BB3_4 Depth=1 + movq %rbx, %rdi + movq %r14, %rsi + callq GOMP_loop_runtime_next + testb $1, %al + je .LBB3_2 +.LBB3_4: # %omp.loadIVBounds + # =>This Loop Header: Depth=1 + # Child Loop BB3_7 Depth 2 + # Child Loop BB3_8 Depth 3 + movq 8(%rsp), %rax + decq %rax + movq 16(%rsp), %rcx + cmpq %rax, %rcx + jg .LBB3_3 +# BB#5: # %polly.loop_header2.preheader.lr.ph + # in Loop: Header=BB3_4 Depth=1 + movq %rcx, %rdx + shlq $11, %rdx + leaq (%rdx,%rdx,2), %rdx + jmp .LBB3_7 + .align 16, 0x90 +.LBB3_6: # %polly.loop_header.loopexit + # in Loop: Header=BB3_7 Depth=2 + addq $6144, %rdx # imm = 0x1800 + incq %rcx + cmpq %rax, %rcx + jg .LBB3_3 +.LBB3_7: # %polly.loop_header2.preheader + # Parent Loop BB3_4 Depth=1 + # => This Loop Header: Depth=2 + # Child Loop BB3_8 Depth 3 + movq $-1536, %rsi # imm = 0xFFFFFFFFFFFFFA00 + xorl %edi, %edi + .align 16, 0x90 +.LBB3_8: # %polly.loop_body3 + # Parent Loop BB3_4 Depth=1 + # Parent Loop BB3_7 Depth=2 + # => This Inner Loop Header: Depth=3 + movl %edi, %r8d + sarl $31, %r8d + shrl $22, %r8d + addl %edi, %r8d + andl $-1024, %r8d # imm = 0xFFFFFFFFFFFFFC00 + negl %r8d + leal 1(%rdi,%r8), %r8d + cvtsi2sd %r8d, %xmm0 + mulsd .LCPI3_0(%rip), %xmm0 + cvtsd2ss %xmm0, %xmm0 + movss %xmm0, A+6144(%rdx,%rsi,4) + movss %xmm0, B+6144(%rdx,%rsi,4) + addl %ecx, %edi + incq %rsi + jne .LBB3_8 + jmp .LBB3_6 +.Ltmp12: + .size init_array.omp_subfn, .Ltmp12-init_array.omp_subfn +.Ltmp13: + .cfi_endproc +.Leh_func_end3: + + .align 16, 0x90 + .type main.omp_subfn,@function +main.omp_subfn: # @main.omp_subfn +.Leh_func_begin4: +.Ltmp20: + .cfi_startproc +# BB#0: # %omp.setup + pushq %r15 +.Ltmp21: + .cfi_def_cfa_offset 16 + pushq %r14 +.Ltmp22: + .cfi_def_cfa_offset 24 + pushq %r13 +.Ltmp23: + .cfi_def_cfa_offset 32 + pushq %r12 +.Ltmp24: + .cfi_def_cfa_offset 40 + pushq %rbx +.Ltmp25: + .cfi_def_cfa_offset 48 + subq $16, %rsp +.Ltmp26: + .cfi_def_cfa_offset 64 +.Ltmp27: + .cfi_offset 3, -48 +.Ltmp28: + .cfi_offset 12, -40 +.Ltmp29: + .cfi_offset 13, -32 +.Ltmp30: + .cfi_offset 14, -24 +.Ltmp31: + .cfi_offset 15, -16 + leaq 8(%rsp), %rdi + leaq (%rsp), %rsi + callq GOMP_loop_runtime_next + testb $1, %al + je .LBB4_2 +# BB#1: + leaq 8(%rsp), %rbx + leaq (%rsp), %r14 + jmp .LBB4_4 +.LBB4_2: # %omp.exit + callq GOMP_loop_end_nowait + addq $16, %rsp + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + ret + .align 16, 0x90 +.LBB4_3: # %omp.checkNext.loopexit + # in Loop: Header=BB4_4 Depth=1 + movq %rbx, %rdi + movq %r14, %rsi + callq GOMP_loop_runtime_next + testb $1, %al + je .LBB4_2 +.LBB4_4: # %omp.loadIVBounds + # =>This Loop Header: Depth=1 + # Child Loop BB4_6 Depth 2 + movq (%rsp), %r15 + decq %r15 + movq 8(%rsp), %r12 + cmpq %r15, %r12 + jg .LBB4_3 +# BB#5: # %polly.loop_header2.preheader.lr.ph + # in Loop: Header=BB4_4 Depth=1 + leaq (%r12,%r12,2), %rax + shlq $11, %rax + leaq C(%rax), %r13 + .align 16, 0x90 +.LBB4_6: # %polly.loop_header2.preheader + # Parent Loop BB4_4 Depth=1 + # => This Inner Loop Header: Depth=2 + movq %r13, %rdi + xorl %esi, %esi + movl $6144, %edx # imm = 0x1800 + callq memset + addq $6144, %r13 # imm = 0x1800 + incq %r12 + cmpq %r15, %r12 + jle .LBB4_6 + jmp .LBB4_3 +.Ltmp32: + .size main.omp_subfn, .Ltmp32-main.omp_subfn +.Ltmp33: + .cfi_endproc +.Leh_func_end4: + + .align 16, 0x90 + .type main.omp_subfn1,@function +main.omp_subfn1: # @main.omp_subfn1 +.Leh_func_begin5: +.Ltmp41: + .cfi_startproc +# BB#0: # %omp.setup + pushq %rbp +.Ltmp42: + .cfi_def_cfa_offset 16 + pushq %r15 +.Ltmp43: + .cfi_def_cfa_offset 24 + pushq %r14 +.Ltmp44: + .cfi_def_cfa_offset 32 + pushq %r13 +.Ltmp45: + .cfi_def_cfa_offset 40 + pushq %r12 +.Ltmp46: + .cfi_def_cfa_offset 48 + pushq %rbx +.Ltmp47: + .cfi_def_cfa_offset 56 + subq $40, %rsp +.Ltmp48: + .cfi_def_cfa_offset 96 +.Ltmp49: + .cfi_offset 3, -56 +.Ltmp50: + .cfi_offset 12, -48 +.Ltmp51: + .cfi_offset 13, -40 +.Ltmp52: + .cfi_offset 14, -32 +.Ltmp53: + .cfi_offset 15, -24 +.Ltmp54: + .cfi_offset 6, -16 + leaq 32(%rsp), %rdi + leaq 24(%rsp), %rsi + jmp .LBB5_1 + .align 16, 0x90 +.LBB5_4: # %omp.loadIVBounds + # in Loop: Header=BB5_1 Depth=1 + movq 24(%rsp), %rax + decq %rax + movq %rax, (%rsp) # 8-byte Spill + movq 32(%rsp), %rcx + cmpq %rax, %rcx + jg .LBB5_3 +# BB#5: # %polly.loop_header2.preheader.lr.ph + # in Loop: Header=BB5_1 Depth=1 + leaq (%rcx,%rcx,2), %rax + movq %rcx, %rdx + shlq $9, %rdx + leaq (%rdx,%rdx,2), %rdx + movq %rdx, 16(%rsp) # 8-byte Spill + shlq $11, %rax + leaq A(%rax), %rax + movq %rax, 8(%rsp) # 8-byte Spill + jmp .LBB5_7 + .align 16, 0x90 +.LBB5_6: # %polly.loop_header.loopexit + # in Loop: Header=BB5_7 Depth=2 + addq $98304, 16(%rsp) # 8-byte Folded Spill + # imm = 0x18000 + addq $393216, 8(%rsp) # 8-byte Folded Spill + # imm = 0x60000 + addq $64, %rcx + cmpq (%rsp), %rcx # 8-byte Folded Reload + jg .LBB5_3 +.LBB5_7: # %polly.loop_header2.preheader + # Parent Loop BB5_1 Depth=1 + # => This Loop Header: Depth=2 + # Child Loop BB5_9 Depth 3 + # Child Loop BB5_11 Depth 4 + # Child Loop BB5_14 Depth 5 + # Child Loop BB5_18 Depth 6 + # Child Loop BB5_19 Depth 7 + leaq 63(%rcx), %rax + xorl %edx, %edx + jmp .LBB5_9 + .align 16, 0x90 +.LBB5_8: # %polly.loop_header2.loopexit + # in Loop: Header=BB5_9 Depth=3 + addq $64, %rdx + cmpq $1536, %rdx # imm = 0x600 + je .LBB5_6 +.LBB5_9: # %polly.loop_header7.preheader + # Parent Loop BB5_1 Depth=1 + # Parent Loop BB5_7 Depth=2 + # => This Loop Header: Depth=3 + # Child Loop BB5_11 Depth 4 + # Child Loop BB5_14 Depth 5 + # Child Loop BB5_18 Depth 6 + # Child Loop BB5_19 Depth 7 + movq 16(%rsp), %rsi # 8-byte Reload + leaq (%rsi,%rdx), %rsi + leaq 63(%rdx), %rdi + xorl %r8d, %r8d + movq 8(%rsp), %r9 # 8-byte Reload + movq %rdx, %r10 + jmp .LBB5_11 + .align 16, 0x90 +.LBB5_10: # %polly.loop_header7.loopexit + # in Loop: Header=BB5_11 Depth=4 + addq $256, %r9 # imm = 0x100 + addq $98304, %r10 # imm = 0x18000 + addq $64, %r8 + cmpq $1536, %r8 # imm = 0x600 + je .LBB5_8 +.LBB5_11: # %polly.loop_body8 + # Parent Loop BB5_1 Depth=1 + # Parent Loop BB5_7 Depth=2 + # Parent Loop BB5_9 Depth=3 + # => This Loop Header: Depth=4 + # Child Loop BB5_14 Depth 5 + # Child Loop BB5_18 Depth 6 + # Child Loop BB5_19 Depth 7 + movabsq $9223372036854775744, %r11 # imm = 0x7FFFFFFFFFFFFFC0 + cmpq %r11, %rcx + jg .LBB5_10 +# BB#12: # %polly.loop_body13.lr.ph + # in Loop: Header=BB5_11 Depth=4 + leaq 63(%r8), %r11 + movq %rcx, %rbx + movq %rsi, %r14 + movq %r9, %r15 + jmp .LBB5_14 + .align 16, 0x90 +.LBB5_13: # %polly.loop_header12.loopexit + # in Loop: Header=BB5_14 Depth=5 + addq $1536, %r14 # imm = 0x600 + addq $6144, %r15 # imm = 0x1800 + incq %rbx + cmpq %rax, %rbx + jg .LBB5_10 +.LBB5_14: # %polly.loop_body13 + # Parent Loop BB5_1 Depth=1 + # Parent Loop BB5_7 Depth=2 + # Parent Loop BB5_9 Depth=3 + # Parent Loop BB5_11 Depth=4 + # => This Loop Header: Depth=5 + # Child Loop BB5_18 Depth 6 + # Child Loop BB5_19 Depth 7 + cmpq %r11, %r8 + jg .LBB5_13 +# BB#15: # %polly.loop_body13 + # in Loop: Header=BB5_14 Depth=5 + cmpq %rdi, %rdx + jg .LBB5_13 +# BB#16: # %polly.loop_body23.lr.ph.preheader + # in Loop: Header=BB5_14 Depth=5 + xorl %r12d, %r12d + movq %r10, %r13 + jmp .LBB5_18 + .align 16, 0x90 +.LBB5_17: # %polly.loop_header17.loopexit + # in Loop: Header=BB5_18 Depth=6 + addq $1536, %r13 # imm = 0x600 + incq %r12 + cmpq $64, %r12 + je .LBB5_13 +.LBB5_18: # %polly.loop_body23.lr.ph + # Parent Loop BB5_1 Depth=1 + # Parent Loop BB5_7 Depth=2 + # Parent Loop BB5_9 Depth=3 + # Parent Loop BB5_11 Depth=4 + # Parent Loop BB5_14 Depth=5 + # => This Loop Header: Depth=6 + # Child Loop BB5_19 Depth 7 + movss (%r15,%r12,4), %xmm0 + pshufd $0, %xmm0, %xmm0 # xmm0 = xmm0[0,0,0,0] + xorl %ebp, %ebp + .align 16, 0x90 +.LBB5_19: # %polly.loop_body23 + # Parent Loop BB5_1 Depth=1 + # Parent Loop BB5_7 Depth=2 + # Parent Loop BB5_9 Depth=3 + # Parent Loop BB5_11 Depth=4 + # Parent Loop BB5_14 Depth=5 + # Parent Loop BB5_18 Depth=6 + # => This Inner Loop Header: Depth=7 + movaps B(%rbp,%r13,4), %xmm1 + mulps %xmm0, %xmm1 + addps C(%rbp,%r14,4), %xmm1 + movaps %xmm1, C(%rbp,%r14,4) + addq $16, %rbp + cmpq $256, %rbp # imm = 0x100 + jne .LBB5_19 + jmp .LBB5_17 +.LBB5_3: # %omp.checkNext.loopexit + # in Loop: Header=BB5_1 Depth=1 + leaq 32(%rsp), %rax + movq %rax, %rdi + leaq 24(%rsp), %rax + movq %rax, %rsi +.LBB5_1: # %omp.setup + # =>This Loop Header: Depth=1 + # Child Loop BB5_7 Depth 2 + # Child Loop BB5_9 Depth 3 + # Child Loop BB5_11 Depth 4 + # Child Loop BB5_14 Depth 5 + # Child Loop BB5_18 Depth 6 + # Child Loop BB5_19 Depth 7 + callq GOMP_loop_runtime_next + testb $1, %al + jne .LBB5_4 +# BB#2: # %omp.exit + callq GOMP_loop_end_nowait + addq $40, %rsp + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + popq %rbp + ret +.Ltmp55: + .size main.omp_subfn1, .Ltmp55-main.omp_subfn1 +.Ltmp56: + .cfi_endproc +.Leh_func_end5: + + .type A,@object # @A + .comm A,9437184,16 + .type B,@object # @B + .comm B,9437184,16 + .type .L.str,@object # @.str + .section .rodata.str1.1,"aMS",@progbits,1 +.L.str: + .asciz "%lf " + .size .L.str, 5 + + .type C,@object # @C + .comm C,9437184,16 + + .section ".note.GNU-stack","",@progbits |

