diff options
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-intel-ocl.ll | 430 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/x86-interrupt_cc.ll | 733 | 
2 files changed, 1082 insertions, 81 deletions
diff --git a/llvm/test/CodeGen/X86/avx512-intel-ocl.ll b/llvm/test/CodeGen/X86/avx512-intel-ocl.ll index 77743fbc02f..96879b0f3fc 100644 --- a/llvm/test/CodeGen/X86/avx512-intel-ocl.ll +++ b/llvm/test/CodeGen/X86/avx512-intel-ocl.ll @@ -1,33 +1,82 @@ -; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=knl | FileCheck -check-prefix=X32 %s -; RUN: llc < %s -mtriple=i386-pc-win32 -mcpu=knl | FileCheck -check-prefix=X32 %s -; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=knl | FileCheck -check-prefix=WIN64 %s -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck -check-prefix=X64 %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=knl | FileCheck %s -check-prefix=X32 -check-prefix=X32-KNL +; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=skx | FileCheck %s -check-prefix=X32 -check-prefix=X32-SKX +; RUN: llc < %s -mtriple=i386-pc-win32 -mcpu=knl | FileCheck %s -check-prefix=WIN32 -check-prefix=WIN32-KNL +; RUN: llc < %s -mtriple=i386-pc-win32 -mcpu=skx | FileCheck %s -check-prefix=WIN32 -check-prefix=WIN32-SKX +; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=knl | FileCheck %s -check-prefix=WIN64 -check-prefix=WIN64-KNL +; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=skx | FileCheck %s -check-prefix=WIN64 -check-prefix=WIN64-SKX +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s -check-prefix=X64 -check-prefix=X64-KNL +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s -check-prefix=X64 -check-prefix=X64-SKX  declare <16 x float> @func_float16_ptr(<16 x float>, <16 x float> *)  declare <16 x float> @func_float16(<16 x float>, <16 x float>)  declare i32 @func_int(i32, i32) -; WIN64-LABEL: testf16_inp -; WIN64: vaddps  {{.*}}, {{%zmm[0-1]}} -; WIN64: leaq    {{.*}}(%rsp), %rcx -; WIN64: call -; WIN64: ret - -; X32-LABEL: testf16_inp -; X32: vaddps  {{.*}}, {{%zmm[0-1]}} -; Push is not deemed profitable if we're realigning the stack. -; X32: {{pushl|movl}}   %eax -; X32: call -; X32: ret - -; X64-LABEL: testf16_inp -; X64: vaddps  {{.*}}, {{%zmm[0-1]}} -; X64: movq    %rsp, %rdi -; X64: call -; X64: ret -  ;test calling conventions - input parameters  define <16 x float> @testf16_inp(<16 x float> %a, <16 x float> %b) nounwind { +; X32-LABEL: testf16_inp: +; X32:       ## %bb.0: +; X32-NEXT:    pushl %ebp +; X32-NEXT:    movl %esp, %ebp +; X32-NEXT:    andl $-64, %esp +; X32-NEXT:    subl $192, %esp +; X32-NEXT:    vaddps %zmm1, %zmm0, %zmm0 +; X32-NEXT:    leal {{[0-9]+}}(%esp), %eax +; X32-NEXT:    movl %eax, (%esp) +; X32-NEXT:    calll _func_float16_ptr +; X32-NEXT:    vaddps {{[0-9]+}}(%esp), %zmm0, %zmm0 +; X32-NEXT:    movl %ebp, %esp +; X32-NEXT:    popl %ebp +; X32-NEXT:    retl +; +; WIN32-LABEL: testf16_inp: +; WIN32:       # %bb.0: +; WIN32-NEXT:    pushl %ebp +; WIN32-NEXT:    movl %esp, %ebp +; WIN32-NEXT:    andl $-64, %esp +; WIN32-NEXT:    subl $128, %esp +; WIN32-NEXT:    vaddps %zmm1, %zmm0, %zmm0 +; WIN32-NEXT:    movl %esp, %eax +; WIN32-NEXT:    pushl %eax +; WIN32-NEXT:    calll _func_float16_ptr +; WIN32-NEXT:    addl $4, %esp +; WIN32-NEXT:    vaddps (%esp), %zmm0, %zmm0 +; WIN32-NEXT:    movl %ebp, %esp +; WIN32-NEXT:    popl %ebp +; WIN32-NEXT:    retl +; +; WIN64-LABEL: testf16_inp: +; WIN64:       # %bb.0: +; WIN64-NEXT:    pushq %rbp +; WIN64-NEXT:    subq $176, %rsp +; WIN64-NEXT:    leaq {{[0-9]+}}(%rsp), %rbp +; WIN64-NEXT:    andq $-64, %rsp +; WIN64-NEXT:    vmovaps (%rcx), %zmm0 +; WIN64-NEXT:    vaddps (%rdx), %zmm0, %zmm0 +; WIN64-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx +; WIN64-NEXT:    callq func_float16_ptr +; WIN64-NEXT:    vaddps {{[0-9]+}}(%rsp), %zmm0, %zmm0 +; WIN64-NEXT:    leaq 48(%rbp), %rsp +; WIN64-NEXT:    popq %rbp +; WIN64-NEXT:    retq +; +; X64-LABEL: testf16_inp: +; X64:       ## %bb.0: +; X64-NEXT:    pushq %rbp +; X64-NEXT:    movq %rsp, %rbp +; X64-NEXT:    pushq %r13 +; X64-NEXT:    pushq %r12 +; X64-NEXT:    andq $-64, %rsp +; X64-NEXT:    subq $128, %rsp +; X64-NEXT:    vaddps %zmm1, %zmm0, %zmm0 +; X64-NEXT:    movq %rsp, %rdi +; X64-NEXT:    callq _func_float16_ptr +; X64-NEXT:    vaddps (%rsp), %zmm0, %zmm0 +; X64-NEXT:    leaq -16(%rbp), %rsp +; X64-NEXT:    popq %r12 +; X64-NEXT:    popq %r13 +; X64-NEXT:    popq %rbp +; X64-NEXT:    retq    %y = alloca <16 x float>, align 16    %x = fadd <16 x float> %a, %b    %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y) @@ -38,19 +87,77 @@ define <16 x float> @testf16_inp(<16 x float> %a, <16 x float> %b) nounwind {  ;test calling conventions - preserved registers -; preserved zmm16- -; WIN64-LABEL: testf16_regs -; WIN64: call -; WIN64: vaddps  %zmm16, %zmm0, %zmm0 -; WIN64: ret - -; preserved zmm16- -; X64-LABEL: testf16_regs -; X64: call -; X64: vaddps  %zmm16, %zmm0, %zmm0 -; X64: ret -  define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind { +; X32-LABEL: testf16_regs: +; X32:       ## %bb.0: +; X32-NEXT:    pushl %ebp +; X32-NEXT:    movl %esp, %ebp +; X32-NEXT:    andl $-64, %esp +; X32-NEXT:    subl $256, %esp ## imm = 0x100 +; X32-NEXT:    vmovaps %zmm1, {{[0-9]+}}(%esp) ## 64-byte Spill +; X32-NEXT:    vaddps %zmm1, %zmm0, %zmm0 +; X32-NEXT:    leal {{[0-9]+}}(%esp), %eax +; X32-NEXT:    movl %eax, (%esp) +; X32-NEXT:    calll _func_float16_ptr +; X32-NEXT:    vaddps {{[0-9]+}}(%esp), %zmm0, %zmm0 ## 64-byte Folded Reload +; X32-NEXT:    vaddps {{[0-9]+}}(%esp), %zmm0, %zmm0 +; X32-NEXT:    movl %ebp, %esp +; X32-NEXT:    popl %ebp +; X32-NEXT:    retl +; +; WIN32-LABEL: testf16_regs: +; WIN32:       # %bb.0: +; WIN32-NEXT:    pushl %ebp +; WIN32-NEXT:    movl %esp, %ebp +; WIN32-NEXT:    andl $-64, %esp +; WIN32-NEXT:    subl $192, %esp +; WIN32-NEXT:    vmovaps %zmm1, (%esp) # 64-byte Spill +; WIN32-NEXT:    vaddps %zmm1, %zmm0, %zmm0 +; WIN32-NEXT:    leal {{[0-9]+}}(%esp), %eax +; WIN32-NEXT:    pushl %eax +; WIN32-NEXT:    calll _func_float16_ptr +; WIN32-NEXT:    addl $4, %esp +; WIN32-NEXT:    vaddps (%esp), %zmm0, %zmm0 # 64-byte Folded Reload +; WIN32-NEXT:    vaddps {{[0-9]+}}(%esp), %zmm0, %zmm0 +; WIN32-NEXT:    movl %ebp, %esp +; WIN32-NEXT:    popl %ebp +; WIN32-NEXT:    retl +; +; WIN64-LABEL: testf16_regs: +; WIN64:       # %bb.0: +; WIN64-NEXT:    pushq %rbp +; WIN64-NEXT:    subq $176, %rsp +; WIN64-NEXT:    leaq {{[0-9]+}}(%rsp), %rbp +; WIN64-NEXT:    andq $-64, %rsp +; WIN64-NEXT:    vmovaps (%rdx), %zmm16 +; WIN64-NEXT:    vaddps (%rcx), %zmm16, %zmm0 +; WIN64-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx +; WIN64-NEXT:    callq func_float16_ptr +; WIN64-NEXT:    vaddps %zmm16, %zmm0, %zmm0 +; WIN64-NEXT:    vaddps {{[0-9]+}}(%rsp), %zmm0, %zmm0 +; WIN64-NEXT:    leaq 48(%rbp), %rsp +; WIN64-NEXT:    popq %rbp +; WIN64-NEXT:    retq +; +; X64-LABEL: testf16_regs: +; X64:       ## %bb.0: +; X64-NEXT:    pushq %rbp +; X64-NEXT:    movq %rsp, %rbp +; X64-NEXT:    pushq %r13 +; X64-NEXT:    pushq %r12 +; X64-NEXT:    andq $-64, %rsp +; X64-NEXT:    subq $128, %rsp +; X64-NEXT:    vmovaps %zmm1, %zmm16 +; X64-NEXT:    vaddps %zmm1, %zmm0, %zmm0 +; X64-NEXT:    movq %rsp, %rdi +; X64-NEXT:    callq _func_float16_ptr +; X64-NEXT:    vaddps %zmm16, %zmm0, %zmm0 +; X64-NEXT:    vaddps (%rsp), %zmm0, %zmm0 +; X64-NEXT:    leaq -16(%rbp), %rsp +; X64-NEXT:    popq %r12 +; X64-NEXT:    popq %r13 +; X64-NEXT:    popq %rbp +; X64-NEXT:    retq    %y = alloca <16 x float>, align 16    %x = fadd <16 x float> %a, %b    %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y) @@ -61,24 +168,124 @@ define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind {  }  ; test calling conventions - prolog and epilog -; WIN64-LABEL: test_prolog_epilog -; WIN64: vmovaps %zmm21, {{.*(%rbp).*}}     # 64-byte Spill -; WIN64: vmovaps %zmm6, {{.*(%rbp).*}}     # 64-byte Spill -; WIN64: call -; WIN64: vmovaps {{.*(%rbp).*}}, %zmm6      # 64-byte Reload -; WIN64: vmovaps {{.*(%rbp).*}}, %zmm21     # 64-byte Reload - -; X64-LABEL: test_prolog_epilog -; X64:  kmovq   %k7, {{.*}}(%rsp)         ## 8-byte Spill -; X64:  kmovq   %k6, {{.*}}(%rsp)         ## 8-byte Spill -; X64:  kmovq   %k5, {{.*}}(%rsp)         ## 8-byte Spill -; X64:  kmovq   %k4, {{.*}}(%rsp)         ## 8-byte Spill -; X64: vmovups %zmm31, {{.*}}(%rsp)  ## 64-byte Spill -; X64: vmovups %zmm16, {{.*}}(%rsp)  ## 64-byte Spill -; X64: call -; X64: vmovups {{.*}}(%rsp), %zmm16 ## 64-byte Reload -; X64: vmovups {{.*}}(%rsp), %zmm31 ## 64-byte Reload  define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x float> %b) nounwind { +; X32-LABEL: test_prolog_epilog: +; X32:       ## %bb.0: +; X32-NEXT:    subl $12, %esp +; X32-NEXT:    calll _func_float16 +; X32-NEXT:    addl $12, %esp +; X32-NEXT:    retl +; +; WIN32-LABEL: test_prolog_epilog: +; WIN32:       # %bb.0: +; WIN32-NEXT:    calll _func_float16 +; WIN32-NEXT:    retl +; +; WIN64-LABEL: test_prolog_epilog: +; WIN64:       # %bb.0: +; WIN64-NEXT:    pushq %rbp +; WIN64-NEXT:    subq $1328, %rsp # imm = 0x530 +; WIN64-NEXT:    leaq {{[0-9]+}}(%rsp), %rbp +; WIN64-NEXT:    kmovq %k7, 1192(%rbp) # 8-byte Spill +; WIN64-NEXT:    kmovq %k6, 1184(%rbp) # 8-byte Spill +; WIN64-NEXT:    kmovq %k5, 1176(%rbp) # 8-byte Spill +; WIN64-NEXT:    kmovq %k4, 1168(%rbp) # 8-byte Spill +; WIN64-NEXT:    vmovaps %zmm21, 1056(%rbp) # 64-byte Spill +; WIN64-NEXT:    vmovaps %zmm20, 960(%rbp) # 64-byte Spill +; WIN64-NEXT:    vmovaps %zmm19, 896(%rbp) # 64-byte Spill +; WIN64-NEXT:    vmovaps %zmm18, 832(%rbp) # 64-byte Spill +; WIN64-NEXT:    vmovaps %zmm17, 768(%rbp) # 64-byte Spill +; WIN64-NEXT:    vmovaps %zmm16, 704(%rbp) # 64-byte Spill +; WIN64-NEXT:    vmovaps %zmm15, 640(%rbp) # 64-byte Spill +; WIN64-NEXT:    vmovaps %zmm14, 576(%rbp) # 64-byte Spill +; WIN64-NEXT:    vmovaps %zmm13, 512(%rbp) # 64-byte Spill +; WIN64-NEXT:    vmovaps %zmm12, 448(%rbp) # 64-byte Spill +; WIN64-NEXT:    vmovaps %zmm11, 384(%rbp) # 64-byte Spill +; WIN64-NEXT:    vmovaps %zmm10, 320(%rbp) # 64-byte Spill +; WIN64-NEXT:    vmovaps %zmm9, 256(%rbp) # 64-byte Spill +; WIN64-NEXT:    vmovaps %zmm8, 192(%rbp) # 64-byte Spill +; WIN64-NEXT:    vmovaps %zmm7, 128(%rbp) # 64-byte Spill +; WIN64-NEXT:    vmovaps %zmm6, 64(%rbp) # 64-byte Spill +; WIN64-NEXT:    andq $-64, %rsp +; WIN64-NEXT:    vmovaps %zmm1, {{[0-9]+}}(%rsp) +; WIN64-NEXT:    vmovaps %zmm0, {{[0-9]+}}(%rsp) +; WIN64-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx +; WIN64-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx +; WIN64-NEXT:    callq func_float16 +; WIN64-NEXT:    vmovaps 64(%rbp), %zmm6 # 64-byte Reload +; WIN64-NEXT:    vmovaps 128(%rbp), %zmm7 # 64-byte Reload +; WIN64-NEXT:    vmovaps 192(%rbp), %zmm8 # 64-byte Reload +; WIN64-NEXT:    vmovaps 256(%rbp), %zmm9 # 64-byte Reload +; WIN64-NEXT:    vmovaps 320(%rbp), %zmm10 # 64-byte Reload +; WIN64-NEXT:    vmovaps 384(%rbp), %zmm11 # 64-byte Reload +; WIN64-NEXT:    vmovaps 448(%rbp), %zmm12 # 64-byte Reload +; WIN64-NEXT:    vmovaps 512(%rbp), %zmm13 # 64-byte Reload +; WIN64-NEXT:    vmovaps 576(%rbp), %zmm14 # 64-byte Reload +; WIN64-NEXT:    vmovaps 640(%rbp), %zmm15 # 64-byte Reload +; WIN64-NEXT:    vmovaps 704(%rbp), %zmm16 # 64-byte Reload +; WIN64-NEXT:    vmovaps 768(%rbp), %zmm17 # 64-byte Reload +; WIN64-NEXT:    vmovaps 832(%rbp), %zmm18 # 64-byte Reload +; WIN64-NEXT:    vmovaps 896(%rbp), %zmm19 # 64-byte Reload +; WIN64-NEXT:    vmovaps 960(%rbp), %zmm20 # 64-byte Reload +; WIN64-NEXT:    vmovaps 1056(%rbp), %zmm21 # 64-byte Reload +; WIN64-NEXT:    kmovq 1168(%rbp), %k4 # 8-byte Reload +; WIN64-NEXT:    kmovq 1176(%rbp), %k5 # 8-byte Reload +; WIN64-NEXT:    kmovq 1184(%rbp), %k6 # 8-byte Reload +; WIN64-NEXT:    kmovq 1192(%rbp), %k7 # 8-byte Reload +; WIN64-NEXT:    leaq 1200(%rbp), %rsp +; WIN64-NEXT:    popq %rbp +; WIN64-NEXT:    retq +; +; X64-LABEL: test_prolog_epilog: +; X64:       ## %bb.0: +; X64-NEXT:    pushq %rsi +; X64-NEXT:    pushq %rdi +; X64-NEXT:    subq $1192, %rsp ## imm = 0x4A8 +; X64-NEXT:    kmovq %k7, {{[0-9]+}}(%rsp) ## 8-byte Spill +; X64-NEXT:    kmovq %k6, {{[0-9]+}}(%rsp) ## 8-byte Spill +; X64-NEXT:    kmovq %k5, {{[0-9]+}}(%rsp) ## 8-byte Spill +; X64-NEXT:    kmovq %k4, {{[0-9]+}}(%rsp) ## 8-byte Spill +; X64-NEXT:    vmovups %zmm31, {{[0-9]+}}(%rsp) ## 64-byte Spill +; X64-NEXT:    vmovups %zmm30, {{[0-9]+}}(%rsp) ## 64-byte Spill +; X64-NEXT:    vmovups %zmm29, {{[0-9]+}}(%rsp) ## 64-byte Spill +; X64-NEXT:    vmovups %zmm28, {{[0-9]+}}(%rsp) ## 64-byte Spill +; X64-NEXT:    vmovups %zmm27, {{[0-9]+}}(%rsp) ## 64-byte Spill +; X64-NEXT:    vmovups %zmm26, {{[0-9]+}}(%rsp) ## 64-byte Spill +; X64-NEXT:    vmovups %zmm25, {{[0-9]+}}(%rsp) ## 64-byte Spill +; X64-NEXT:    vmovups %zmm24, {{[0-9]+}}(%rsp) ## 64-byte Spill +; X64-NEXT:    vmovups %zmm23, {{[0-9]+}}(%rsp) ## 64-byte Spill +; X64-NEXT:    vmovups %zmm22, {{[0-9]+}}(%rsp) ## 64-byte Spill +; X64-NEXT:    vmovups %zmm21, {{[0-9]+}}(%rsp) ## 64-byte Spill +; X64-NEXT:    vmovups %zmm20, {{[0-9]+}}(%rsp) ## 64-byte Spill +; X64-NEXT:    vmovups %zmm19, {{[0-9]+}}(%rsp) ## 64-byte Spill +; X64-NEXT:    vmovups %zmm18, {{[0-9]+}}(%rsp) ## 64-byte Spill +; X64-NEXT:    vmovups %zmm17, {{[0-9]+}}(%rsp) ## 64-byte Spill +; X64-NEXT:    vmovups %zmm16, (%rsp) ## 64-byte Spill +; X64-NEXT:    callq _func_float16 +; X64-NEXT:    vmovups (%rsp), %zmm16 ## 64-byte Reload +; X64-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm17 ## 64-byte Reload +; X64-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm18 ## 64-byte Reload +; X64-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm19 ## 64-byte Reload +; X64-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm20 ## 64-byte Reload +; X64-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm21 ## 64-byte Reload +; X64-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm22 ## 64-byte Reload +; X64-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm23 ## 64-byte Reload +; X64-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm24 ## 64-byte Reload +; X64-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm25 ## 64-byte Reload +; X64-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm26 ## 64-byte Reload +; X64-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm27 ## 64-byte Reload +; X64-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm28 ## 64-byte Reload +; X64-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm29 ## 64-byte Reload +; X64-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm30 ## 64-byte Reload +; X64-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm31 ## 64-byte Reload +; X64-NEXT:    kmovq {{[0-9]+}}(%rsp), %k4 ## 8-byte Reload +; X64-NEXT:    kmovq {{[0-9]+}}(%rsp), %k5 ## 8-byte Reload +; X64-NEXT:    kmovq {{[0-9]+}}(%rsp), %k6 ## 8-byte Reload +; X64-NEXT:    kmovq {{[0-9]+}}(%rsp), %k7 ## 8-byte Reload +; X64-NEXT:    addq $1192, %rsp ## imm = 0x4A8 +; X64-NEXT:    popq %rdi +; X64-NEXT:    popq %rsi +; X64-NEXT:    retq     %c = call <16 x float> @func_float16(<16 x float> %a, <16 x float> %b)     ret <16 x float> %c  } @@ -86,19 +293,126 @@ define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x fl  declare <16 x float> @func_float16_mask(<16 x float>, <16 x i1>) -; X64-LABEL: testf16_inp_mask -; X64: kmovw   %edi, %k1 -; X64: call  define <16 x float> @testf16_inp_mask(<16 x float> %a, i16 %mask)  { +; X32-LABEL: testf16_inp_mask: +; X32:       ## %bb.0: +; X32-NEXT:    subl $12, %esp +; X32-NEXT:    .cfi_def_cfa_offset 16 +; X32-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 +; X32-NEXT:    calll _func_float16_mask +; X32-NEXT:    addl $12, %esp +; X32-NEXT:    retl +; +; WIN32-LABEL: testf16_inp_mask: +; WIN32:       # %bb.0: +; WIN32-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 +; WIN32-NEXT:    calll _func_float16_mask +; WIN32-NEXT:    retl +; +; WIN64-KNL-LABEL: testf16_inp_mask: +; WIN64-KNL:       # %bb.0: +; WIN64-KNL-NEXT:    subq $40, %rsp +; WIN64-KNL-NEXT:    .seh_stackalloc 40 +; WIN64-KNL-NEXT:    .seh_endprologue +; WIN64-KNL-NEXT:    vmovaps (%rcx), %zmm0 +; WIN64-KNL-NEXT:    kmovw %edx, %k1 +; WIN64-KNL-NEXT:    callq func_float16_mask +; WIN64-KNL-NEXT:    nop +; WIN64-KNL-NEXT:    addq $40, %rsp +; WIN64-KNL-NEXT:    retq +; WIN64-KNL-NEXT:    .seh_handlerdata +; WIN64-KNL-NEXT:    .text +; WIN64-KNL-NEXT:    .seh_endproc +; +; WIN64-SKX-LABEL: testf16_inp_mask: +; WIN64-SKX:       # %bb.0: +; WIN64-SKX-NEXT:    subq $40, %rsp +; WIN64-SKX-NEXT:    .seh_stackalloc 40 +; WIN64-SKX-NEXT:    .seh_endprologue +; WIN64-SKX-NEXT:    vmovaps (%rcx), %zmm0 +; WIN64-SKX-NEXT:    kmovd %edx, %k1 +; WIN64-SKX-NEXT:    callq func_float16_mask +; WIN64-SKX-NEXT:    nop +; WIN64-SKX-NEXT:    addq $40, %rsp +; WIN64-SKX-NEXT:    retq +; WIN64-SKX-NEXT:    .seh_handlerdata +; WIN64-SKX-NEXT:    .text +; WIN64-SKX-NEXT:    .seh_endproc +; +; X64-KNL-LABEL: testf16_inp_mask: +; X64-KNL:       ## %bb.0: +; X64-KNL-NEXT:    pushq %rbp +; X64-KNL-NEXT:    .cfi_def_cfa_offset 16 +; X64-KNL-NEXT:    pushq %r13 +; X64-KNL-NEXT:    .cfi_def_cfa_offset 24 +; X64-KNL-NEXT:    pushq %r12 +; X64-KNL-NEXT:    .cfi_def_cfa_offset 32 +; X64-KNL-NEXT:    .cfi_offset %r12, -32 +; X64-KNL-NEXT:    .cfi_offset %r13, -24 +; X64-KNL-NEXT:    .cfi_offset %rbp, -16 +; X64-KNL-NEXT:    kmovw %edi, %k1 +; X64-KNL-NEXT:    callq _func_float16_mask +; X64-KNL-NEXT:    popq %r12 +; X64-KNL-NEXT:    popq %r13 +; X64-KNL-NEXT:    popq %rbp +; X64-KNL-NEXT:    retq +; +; X64-SKX-LABEL: testf16_inp_mask: +; X64-SKX:       ## %bb.0: +; X64-SKX-NEXT:    pushq %rbp +; X64-SKX-NEXT:    .cfi_def_cfa_offset 16 +; X64-SKX-NEXT:    pushq %r13 +; X64-SKX-NEXT:    .cfi_def_cfa_offset 24 +; X64-SKX-NEXT:    pushq %r12 +; X64-SKX-NEXT:    .cfi_def_cfa_offset 32 +; X64-SKX-NEXT:    .cfi_offset %r12, -32 +; X64-SKX-NEXT:    .cfi_offset %r13, -24 +; X64-SKX-NEXT:    .cfi_offset %rbp, -16 +; X64-SKX-NEXT:    kmovd %edi, %k1 +; X64-SKX-NEXT:    callq _func_float16_mask +; X64-SKX-NEXT:    popq %r12 +; X64-SKX-NEXT:    popq %r13 +; X64-SKX-NEXT:    popq %rbp +; X64-SKX-NEXT:    retq    %imask = bitcast i16 %mask to <16 x i1>    %1 = call intel_ocl_bicc <16 x float> @func_float16_mask(<16 x float> %a, <16 x i1> %imask)    ret <16 x float> %1  } -; X64-LABEL: test_prolog_epilog_with_mask -; X64: kxorw   %k{{.*}}, %k{{.*}}, %k1 -; X64: call  define intel_ocl_bicc <16 x float> @test_prolog_epilog_with_mask(<16 x float> %a, <16 x i32> %x1, <16 x i32>%x2, <16 x i1> %mask) nounwind { +; X32-LABEL: test_prolog_epilog_with_mask: +; X32:       ## %bb.0: +; X32-NEXT:    subl $12, %esp +; X32-NEXT:    vpcmpeqd %zmm2, %zmm1, %k0 +; X32-NEXT:    kxorw %k1, %k0, %k1 +; X32-NEXT:    calll _func_float16_mask +; X32-NEXT:    addl $12, %esp +; X32-NEXT:    retl +; +; WIN32-LABEL: test_prolog_epilog_with_mask: +; WIN32:       # %bb.0: +; WIN32-NEXT:    vpcmpeqd %zmm2, %zmm1, %k0 +; WIN32-NEXT:    kxorw %k1, %k0, %k1 +; WIN32-NEXT:    calll _func_float16_mask +; WIN32-NEXT:    retl +; +; WIN64-LABEL: test_prolog_epilog_with_mask: +; WIN64:       # %bb.0: +; WIN64-NEXT:    subq $40, %rsp +; WIN64-NEXT:    vpcmpeqd %zmm2, %zmm1, %k0 +; WIN64-NEXT:    kxorw %k1, %k0, %k1 +; WIN64-NEXT:    callq func_float16_mask +; WIN64-NEXT:    addq $40, %rsp +; WIN64-NEXT:    retq +; +; X64-LABEL: test_prolog_epilog_with_mask: +; X64:       ## %bb.0: +; X64-NEXT:    pushq %rax +; X64-NEXT:    vpcmpeqd %zmm2, %zmm1, %k0 +; X64-NEXT:    kxorw %k1, %k0, %k1 +; X64-NEXT:    callq _func_float16_mask +; X64-NEXT:    popq %rax +; X64-NEXT:    retq     %cmp_res = icmp eq <16 x i32>%x1, %x2     %mask1 = xor <16 x i1> %cmp_res, %mask     %c = call intel_ocl_bicc <16 x float> @func_float16_mask(<16 x float> %a, <16 x i1>%mask1) diff --git a/llvm/test/CodeGen/X86/x86-interrupt_cc.ll b/llvm/test/CodeGen/X86/x86-interrupt_cc.ll index 3251d731468..61493536e4b 100644 --- a/llvm/test/CodeGen/X86/x86-interrupt_cc.ll +++ b/llvm/test/CodeGen/X86/x86-interrupt_cc.ll @@ -1,30 +1,717 @@ -; RUN: llc -verify-machineinstrs -mtriple=x86_64-apple-macosx -show-mc-encoding -mattr=+avx512f < %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK64 -; RUN: llc -verify-machineinstrs -mtriple=i386-apple-macosx -show-mc-encoding -mattr=+avx512f < %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK32 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=x86_64-apple-macosx -show-mc-encoding -mcpu=knl < %s | FileCheck %s -check-prefix=CHECK64 -check-prefix=CHECK64-KNL +; RUN: llc -verify-machineinstrs -mtriple=x86_64-apple-macosx -show-mc-encoding -mcpu=skx < %s | FileCheck %s -check-prefix=CHECK64 -check-prefix=CHECK64-SKX +; RUN: llc -verify-machineinstrs -mtriple=i386-apple-macosx -show-mc-encoding -mcpu=knl < %s | FileCheck %s -check-prefix=CHECK32 -check-prefix=CHECK32-KNL +; RUN: llc -verify-machineinstrs -mtriple=i386-apple-macosx -show-mc-encoding -mcpu=skx < %s | FileCheck %s -check-prefix=CHECK32 -check-prefix=CHECK32-SKX  ; Make sure we spill the high numbered zmm registers and K registers with the right encoding. -; CHECK-LABEL: foo -; CHECK: kmovq %k7, {{.+}} -; CHECK64:      encoding: [0xc4,0xe1,0xf8,0x91,0xbc,0x24,0x68,0x08,0x00,0x00] -; CHECK32:      encoding: [0xc4,0xe1,0xf8,0x91,0xbc,0x24,0x68,0x02,0x00,0x00] -; k6 is used as an anchor for the previous regexp. -; CHECK-NEXT: kmovq %k6 - -; CHECK64: movups %zmm31, {{.+}} -; CHECK64:      encoding: [0x62,0x61,0x7c,0x48,0x11,0xbc,0x24,0xe0,0x07,0x00,0x00]  -; zmm30 is used as an anchor for the previous regexp. -; CHECK64-NEXT: movups %zmm30 - -; CHECK32-NOT: zmm31 -; CHECK32-NOT: zmm8 -; CHECK32: movups %zmm7, {{.+}} -; CHECK32:      encoding: [0x62,0xf1,0x7c,0x48,0x11,0xbc,0x24,0xe0,0x01,0x00,0x00]  -; zmm6 is used as an anchor for the previous regexp. -; CHECK32-NEXT: movups %zmm6 - -; CHECK: call -; CHECK: iret  define x86_intrcc void @foo(i8* %frame) { +; CHECK64-KNL-LABEL: foo: +; CHECK64-KNL:       ## %bb.0: +; CHECK64-KNL-NEXT:    pushq %rax ## encoding: [0x50] +; CHECK64-KNL-NEXT:    .cfi_def_cfa_offset 16 +; CHECK64-KNL-NEXT:    pushq %r11 ## encoding: [0x41,0x53] +; CHECK64-KNL-NEXT:    .cfi_def_cfa_offset 24 +; CHECK64-KNL-NEXT:    pushq %r10 ## encoding: [0x41,0x52] +; CHECK64-KNL-NEXT:    .cfi_def_cfa_offset 32 +; CHECK64-KNL-NEXT:    pushq %r9 ## encoding: [0x41,0x51] +; CHECK64-KNL-NEXT:    .cfi_def_cfa_offset 40 +; CHECK64-KNL-NEXT:    pushq %r8 ## encoding: [0x41,0x50] +; CHECK64-KNL-NEXT:    .cfi_def_cfa_offset 48 +; CHECK64-KNL-NEXT:    pushq %rdi ## encoding: [0x57] +; CHECK64-KNL-NEXT:    .cfi_def_cfa_offset 56 +; CHECK64-KNL-NEXT:    pushq %rsi ## encoding: [0x56] +; CHECK64-KNL-NEXT:    .cfi_def_cfa_offset 64 +; CHECK64-KNL-NEXT:    pushq %rdx ## encoding: [0x52] +; CHECK64-KNL-NEXT:    .cfi_def_cfa_offset 72 +; CHECK64-KNL-NEXT:    pushq %rcx ## encoding: [0x51] +; CHECK64-KNL-NEXT:    .cfi_def_cfa_offset 80 +; CHECK64-KNL-NEXT:    subq $2160, %rsp ## encoding: [0x48,0x81,0xec,0x70,0x08,0x00,0x00] +; CHECK64-KNL-NEXT:    ## imm = 0x870 +; CHECK64-KNL-NEXT:    kmovq %k7, {{[0-9]+}}(%rsp) ## 8-byte Spill +; CHECK64-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0xbc,0x24,0x68,0x08,0x00,0x00] +; CHECK64-KNL-NEXT:    kmovq %k6, {{[0-9]+}}(%rsp) ## 8-byte Spill +; CHECK64-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0xb4,0x24,0x60,0x08,0x00,0x00] +; CHECK64-KNL-NEXT:    kmovq %k5, {{[0-9]+}}(%rsp) ## 8-byte Spill +; CHECK64-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0xac,0x24,0x58,0x08,0x00,0x00] +; CHECK64-KNL-NEXT:    kmovq %k4, {{[0-9]+}}(%rsp) ## 8-byte Spill +; CHECK64-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0xa4,0x24,0x50,0x08,0x00,0x00] +; CHECK64-KNL-NEXT:    kmovq %k3, {{[0-9]+}}(%rsp) ## 8-byte Spill +; CHECK64-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0x9c,0x24,0x48,0x08,0x00,0x00] +; CHECK64-KNL-NEXT:    kmovq %k2, {{[0-9]+}}(%rsp) ## 8-byte Spill +; CHECK64-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0x94,0x24,0x40,0x08,0x00,0x00] +; CHECK64-KNL-NEXT:    kmovq %k1, {{[0-9]+}}(%rsp) ## 8-byte Spill +; CHECK64-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0x8c,0x24,0x38,0x08,0x00,0x00] +; CHECK64-KNL-NEXT:    kmovq %k0, {{[0-9]+}}(%rsp) ## 8-byte Spill +; CHECK64-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0x84,0x24,0x30,0x08,0x00,0x00] +; CHECK64-KNL-NEXT:    vmovups %zmm31, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x11,0xbc,0x24,0xe0,0x07,0x00,0x00] +; CHECK64-KNL-NEXT:    vmovups %zmm30, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x74,0x24,0x1e] +; CHECK64-KNL-NEXT:    vmovups %zmm29, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x6c,0x24,0x1d] +; CHECK64-KNL-NEXT:    vmovups %zmm28, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x64,0x24,0x1c] +; CHECK64-KNL-NEXT:    vmovups %zmm27, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x5c,0x24,0x1b] +; CHECK64-KNL-NEXT:    vmovups %zmm26, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x54,0x24,0x1a] +; CHECK64-KNL-NEXT:    vmovups %zmm25, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x4c,0x24,0x19] +; CHECK64-KNL-NEXT:    vmovups %zmm24, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x44,0x24,0x18] +; CHECK64-KNL-NEXT:    vmovups %zmm23, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x11,0x7c,0x24,0x17] +; CHECK64-KNL-NEXT:    vmovups %zmm22, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x11,0x74,0x24,0x16] +; CHECK64-KNL-NEXT:    vmovups %zmm21, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x11,0x6c,0x24,0x15] +; CHECK64-KNL-NEXT:    vmovups %zmm20, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x11,0x64,0x24,0x14] +; CHECK64-KNL-NEXT:    vmovups %zmm19, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x11,0x5c,0x24,0x13] +; CHECK64-KNL-NEXT:    vmovups %zmm18, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x11,0x54,0x24,0x12] +; CHECK64-KNL-NEXT:    vmovups %zmm17, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x11,0x4c,0x24,0x11] +; CHECK64-KNL-NEXT:    vmovups %zmm16, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x11,0x44,0x24,0x10] +; CHECK64-KNL-NEXT:    vmovups %zmm15, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x11,0x7c,0x24,0x0f] +; CHECK64-KNL-NEXT:    vmovups %zmm14, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x11,0x74,0x24,0x0e] +; CHECK64-KNL-NEXT:    vmovups %zmm13, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x11,0x6c,0x24,0x0d] +; CHECK64-KNL-NEXT:    vmovups %zmm12, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x11,0x64,0x24,0x0c] +; CHECK64-KNL-NEXT:    vmovups %zmm11, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x11,0x5c,0x24,0x0b] +; CHECK64-KNL-NEXT:    vmovups %zmm10, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x11,0x54,0x24,0x0a] +; CHECK64-KNL-NEXT:    vmovups %zmm9, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x11,0x4c,0x24,0x09] +; CHECK64-KNL-NEXT:    vmovups %zmm8, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x11,0x44,0x24,0x08] +; CHECK64-KNL-NEXT:    vmovups %zmm7, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x7c,0x24,0x07] +; CHECK64-KNL-NEXT:    vmovups %zmm6, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x74,0x24,0x06] +; CHECK64-KNL-NEXT:    vmovups %zmm5, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x6c,0x24,0x05] +; CHECK64-KNL-NEXT:    vmovups %zmm4, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x64,0x24,0x04] +; CHECK64-KNL-NEXT:    vmovups %zmm3, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x5c,0x24,0x03] +; CHECK64-KNL-NEXT:    vmovups %zmm2, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x54,0x24,0x02] +; CHECK64-KNL-NEXT:    vmovups %zmm1, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x4c,0x24,0x01] +; CHECK64-KNL-NEXT:    vmovups %zmm0, (%rsp) ## 64-byte Spill +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x04,0x24] +; CHECK64-KNL-NEXT:    .cfi_def_cfa_offset 2240 +; CHECK64-KNL-NEXT:    .cfi_offset %rcx, -80 +; CHECK64-KNL-NEXT:    .cfi_offset %rdx, -72 +; CHECK64-KNL-NEXT:    .cfi_offset %rsi, -64 +; CHECK64-KNL-NEXT:    .cfi_offset %rdi, -56 +; CHECK64-KNL-NEXT:    .cfi_offset %r8, -48 +; CHECK64-KNL-NEXT:    .cfi_offset %r9, -40 +; CHECK64-KNL-NEXT:    .cfi_offset %r10, -32 +; CHECK64-KNL-NEXT:    .cfi_offset %r11, -24 +; CHECK64-KNL-NEXT:    .cfi_offset %rax, -16 +; CHECK64-KNL-NEXT:    .cfi_offset %xmm0, -2240 +; CHECK64-KNL-NEXT:    .cfi_offset %xmm1, -2176 +; CHECK64-KNL-NEXT:    .cfi_offset %xmm2, -2112 +; CHECK64-KNL-NEXT:    .cfi_offset %xmm3, -2048 +; CHECK64-KNL-NEXT:    .cfi_offset %xmm4, -1984 +; CHECK64-KNL-NEXT:    .cfi_offset %xmm5, -1920 +; CHECK64-KNL-NEXT:    .cfi_offset %xmm6, -1856 +; CHECK64-KNL-NEXT:    .cfi_offset %xmm7, -1792 +; CHECK64-KNL-NEXT:    .cfi_offset %xmm8, -1728 +; CHECK64-KNL-NEXT:    .cfi_offset %xmm9, -1664 +; CHECK64-KNL-NEXT:    .cfi_offset %xmm10, -1600 +; CHECK64-KNL-NEXT:    .cfi_offset %xmm11, -1536 +; CHECK64-KNL-NEXT:    .cfi_offset %xmm12, -1472 +; CHECK64-KNL-NEXT:    .cfi_offset %xmm13, -1408 +; CHECK64-KNL-NEXT:    .cfi_offset %xmm14, -1344 +; CHECK64-KNL-NEXT:    .cfi_offset %xmm15, -1280 +; CHECK64-KNL-NEXT:    .cfi_offset %xmm16, -1216 +; CHECK64-KNL-NEXT:    .cfi_offset %xmm17, -1152 +; CHECK64-KNL-NEXT:    .cfi_offset %xmm18, -1088 +; CHECK64-KNL-NEXT:    .cfi_offset %xmm19, -1024 +; CHECK64-KNL-NEXT:    .cfi_offset %xmm20, -960 +; CHECK64-KNL-NEXT:    .cfi_offset %xmm21, -896 +; CHECK64-KNL-NEXT:    .cfi_offset %xmm22, -832 +; CHECK64-KNL-NEXT:    .cfi_offset %xmm23, -768 +; CHECK64-KNL-NEXT:    .cfi_offset %xmm24, -704 +; CHECK64-KNL-NEXT:    .cfi_offset %xmm25, -640 +; CHECK64-KNL-NEXT:    .cfi_offset %xmm26, -576 +; CHECK64-KNL-NEXT:    .cfi_offset %xmm27, -512 +; CHECK64-KNL-NEXT:    .cfi_offset %xmm28, -448 +; CHECK64-KNL-NEXT:    .cfi_offset %xmm29, -384 +; CHECK64-KNL-NEXT:    .cfi_offset %xmm30, -320 +; CHECK64-KNL-NEXT:    .cfi_offset %xmm31, -224 +; CHECK64-KNL-NEXT:    .cfi_offset %k0, -144 +; CHECK64-KNL-NEXT:    .cfi_offset %k1, -136 +; CHECK64-KNL-NEXT:    .cfi_offset %k2, -128 +; CHECK64-KNL-NEXT:    .cfi_offset %k3, -120 +; CHECK64-KNL-NEXT:    .cfi_offset %k4, -112 +; CHECK64-KNL-NEXT:    .cfi_offset %k5, -104 +; CHECK64-KNL-NEXT:    .cfi_offset %k6, -96 +; CHECK64-KNL-NEXT:    .cfi_offset %k7, -88 +; CHECK64-KNL-NEXT:    cld ## encoding: [0xfc] +; CHECK64-KNL-NEXT:    callq _bar ## encoding: [0xe8,A,A,A,A] +; CHECK64-KNL-NEXT:    ## fixup A - offset: 1, value: _bar-4, kind: FK_PCRel_4 +; CHECK64-KNL-NEXT:    vmovups (%rsp), %zmm0 ## 64-byte Reload +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x04,0x24] +; CHECK64-KNL-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm1 ## 64-byte Reload +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x4c,0x24,0x01] +; CHECK64-KNL-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm2 ## 64-byte Reload +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x54,0x24,0x02] +; CHECK64-KNL-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm3 ## 64-byte Reload +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x5c,0x24,0x03] +; CHECK64-KNL-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm4 ## 64-byte Reload +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x64,0x24,0x04] +; CHECK64-KNL-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm5 ## 64-byte Reload +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x6c,0x24,0x05] +; CHECK64-KNL-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm6 ## 64-byte Reload +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x74,0x24,0x06] +; CHECK64-KNL-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm7 ## 64-byte Reload +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x7c,0x24,0x07] +; CHECK64-KNL-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm8 ## 64-byte Reload +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x10,0x44,0x24,0x08] +; CHECK64-KNL-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm9 ## 64-byte Reload +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x10,0x4c,0x24,0x09] +; CHECK64-KNL-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm10 ## 64-byte Reload +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x10,0x54,0x24,0x0a] +; CHECK64-KNL-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm11 ## 64-byte Reload +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x10,0x5c,0x24,0x0b] +; CHECK64-KNL-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm12 ## 64-byte Reload +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x10,0x64,0x24,0x0c] +; CHECK64-KNL-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm13 ## 64-byte Reload +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x10,0x6c,0x24,0x0d] +; CHECK64-KNL-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm14 ## 64-byte Reload +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x10,0x74,0x24,0x0e] +; CHECK64-KNL-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm15 ## 64-byte Reload +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x10,0x7c,0x24,0x0f] +; CHECK64-KNL-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm16 ## 64-byte Reload +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x10,0x44,0x24,0x10] +; CHECK64-KNL-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm17 ## 64-byte Reload +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x10,0x4c,0x24,0x11] +; CHECK64-KNL-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm18 ## 64-byte Reload +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x10,0x54,0x24,0x12] +; CHECK64-KNL-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm19 ## 64-byte Reload +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x10,0x5c,0x24,0x13] +; CHECK64-KNL-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm20 ## 64-byte Reload +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x10,0x64,0x24,0x14] +; CHECK64-KNL-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm21 ## 64-byte Reload +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x10,0x6c,0x24,0x15] +; CHECK64-KNL-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm22 ## 64-byte Reload +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x10,0x74,0x24,0x16] +; CHECK64-KNL-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm23 ## 64-byte Reload +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x10,0x7c,0x24,0x17] +; CHECK64-KNL-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm24 ## 64-byte Reload +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x44,0x24,0x18] +; CHECK64-KNL-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm25 ## 64-byte Reload +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x4c,0x24,0x19] +; CHECK64-KNL-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm26 ## 64-byte Reload +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x54,0x24,0x1a] +; CHECK64-KNL-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm27 ## 64-byte Reload +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x5c,0x24,0x1b] +; CHECK64-KNL-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm28 ## 64-byte Reload +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x64,0x24,0x1c] +; CHECK64-KNL-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm29 ## 64-byte Reload +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x6c,0x24,0x1d] +; CHECK64-KNL-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm30 ## 64-byte Reload +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x74,0x24,0x1e] +; CHECK64-KNL-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm31 ## 64-byte Reload +; CHECK64-KNL-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x10,0xbc,0x24,0xe0,0x07,0x00,0x00] +; CHECK64-KNL-NEXT:    kmovq {{[0-9]+}}(%rsp), %k0 ## 8-byte Reload +; CHECK64-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0x84,0x24,0x30,0x08,0x00,0x00] +; CHECK64-KNL-NEXT:    kmovq {{[0-9]+}}(%rsp), %k1 ## 8-byte Reload +; CHECK64-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0x8c,0x24,0x38,0x08,0x00,0x00] +; CHECK64-KNL-NEXT:    kmovq {{[0-9]+}}(%rsp), %k2 ## 8-byte Reload +; CHECK64-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0x94,0x24,0x40,0x08,0x00,0x00] +; CHECK64-KNL-NEXT:    kmovq {{[0-9]+}}(%rsp), %k3 ## 8-byte Reload +; CHECK64-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0x9c,0x24,0x48,0x08,0x00,0x00] +; CHECK64-KNL-NEXT:    kmovq {{[0-9]+}}(%rsp), %k4 ## 8-byte Reload +; CHECK64-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0xa4,0x24,0x50,0x08,0x00,0x00] +; CHECK64-KNL-NEXT:    kmovq {{[0-9]+}}(%rsp), %k5 ## 8-byte Reload +; CHECK64-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0xac,0x24,0x58,0x08,0x00,0x00] +; CHECK64-KNL-NEXT:    kmovq {{[0-9]+}}(%rsp), %k6 ## 8-byte Reload +; CHECK64-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0xb4,0x24,0x60,0x08,0x00,0x00] +; CHECK64-KNL-NEXT:    kmovq {{[0-9]+}}(%rsp), %k7 ## 8-byte Reload +; CHECK64-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0xbc,0x24,0x68,0x08,0x00,0x00] +; CHECK64-KNL-NEXT:    addq $2160, %rsp ## encoding: [0x48,0x81,0xc4,0x70,0x08,0x00,0x00] +; CHECK64-KNL-NEXT:    ## imm = 0x870 +; CHECK64-KNL-NEXT:    popq %rcx ## encoding: [0x59] +; CHECK64-KNL-NEXT:    popq %rdx ## encoding: [0x5a] +; CHECK64-KNL-NEXT:    popq %rsi ## encoding: [0x5e] +; CHECK64-KNL-NEXT:    popq %rdi ## encoding: [0x5f] +; CHECK64-KNL-NEXT:    popq %r8 ## encoding: [0x41,0x58] +; CHECK64-KNL-NEXT:    popq %r9 ## encoding: [0x41,0x59] +; CHECK64-KNL-NEXT:    popq %r10 ## encoding: [0x41,0x5a] +; CHECK64-KNL-NEXT:    popq %r11 ## encoding: [0x41,0x5b] +; CHECK64-KNL-NEXT:    popq %rax ## encoding: [0x58] +; CHECK64-KNL-NEXT:    iretq ## encoding: [0x48,0xcf] +; +; CHECK64-SKX-LABEL: foo: +; CHECK64-SKX:       ## %bb.0: +; CHECK64-SKX-NEXT:    pushq %rax ## encoding: [0x50] +; CHECK64-SKX-NEXT:    .cfi_def_cfa_offset 16 +; CHECK64-SKX-NEXT:    pushq %r11 ## encoding: [0x41,0x53] +; CHECK64-SKX-NEXT:    .cfi_def_cfa_offset 24 +; CHECK64-SKX-NEXT:    pushq %r10 ## encoding: [0x41,0x52] +; CHECK64-SKX-NEXT:    .cfi_def_cfa_offset 32 +; CHECK64-SKX-NEXT:    pushq %r9 ## encoding: [0x41,0x51] +; CHECK64-SKX-NEXT:    .cfi_def_cfa_offset 40 +; CHECK64-SKX-NEXT:    pushq %r8 ## encoding: [0x41,0x50] +; CHECK64-SKX-NEXT:    .cfi_def_cfa_offset 48 +; CHECK64-SKX-NEXT:    pushq %rdi ## encoding: [0x57] +; CHECK64-SKX-NEXT:    .cfi_def_cfa_offset 56 +; CHECK64-SKX-NEXT:    pushq %rsi ## encoding: [0x56] +; CHECK64-SKX-NEXT:    .cfi_def_cfa_offset 64 +; CHECK64-SKX-NEXT:    pushq %rdx ## encoding: [0x52] +; CHECK64-SKX-NEXT:    .cfi_def_cfa_offset 72 +; CHECK64-SKX-NEXT:    pushq %rcx ## encoding: [0x51] +; CHECK64-SKX-NEXT:    .cfi_def_cfa_offset 80 +; CHECK64-SKX-NEXT:    subq $2160, %rsp ## encoding: [0x48,0x81,0xec,0x70,0x08,0x00,0x00] +; CHECK64-SKX-NEXT:    ## imm = 0x870 +; CHECK64-SKX-NEXT:    kmovq %k7, {{[0-9]+}}(%rsp) ## 8-byte Spill +; CHECK64-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0xbc,0x24,0x68,0x08,0x00,0x00] +; CHECK64-SKX-NEXT:    kmovq %k6, {{[0-9]+}}(%rsp) ## 8-byte Spill +; CHECK64-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0xb4,0x24,0x60,0x08,0x00,0x00] +; CHECK64-SKX-NEXT:    kmovq %k5, {{[0-9]+}}(%rsp) ## 8-byte Spill +; CHECK64-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0xac,0x24,0x58,0x08,0x00,0x00] +; CHECK64-SKX-NEXT:    kmovq %k4, {{[0-9]+}}(%rsp) ## 8-byte Spill +; CHECK64-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0xa4,0x24,0x50,0x08,0x00,0x00] +; CHECK64-SKX-NEXT:    kmovq %k3, {{[0-9]+}}(%rsp) ## 8-byte Spill +; CHECK64-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0x9c,0x24,0x48,0x08,0x00,0x00] +; CHECK64-SKX-NEXT:    kmovq %k2, {{[0-9]+}}(%rsp) ## 8-byte Spill +; CHECK64-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0x94,0x24,0x40,0x08,0x00,0x00] +; CHECK64-SKX-NEXT:    kmovq %k1, {{[0-9]+}}(%rsp) ## 8-byte Spill +; CHECK64-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0x8c,0x24,0x38,0x08,0x00,0x00] +; CHECK64-SKX-NEXT:    kmovq %k0, {{[0-9]+}}(%rsp) ## 8-byte Spill +; CHECK64-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0x84,0x24,0x30,0x08,0x00,0x00] +; CHECK64-SKX-NEXT:    vmovups %zmm31, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x11,0xbc,0x24,0xe0,0x07,0x00,0x00] +; CHECK64-SKX-NEXT:    vmovups %zmm30, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x74,0x24,0x1e] +; CHECK64-SKX-NEXT:    vmovups %zmm29, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x6c,0x24,0x1d] +; CHECK64-SKX-NEXT:    vmovups %zmm28, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x64,0x24,0x1c] +; CHECK64-SKX-NEXT:    vmovups %zmm27, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x5c,0x24,0x1b] +; CHECK64-SKX-NEXT:    vmovups %zmm26, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x54,0x24,0x1a] +; CHECK64-SKX-NEXT:    vmovups %zmm25, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x4c,0x24,0x19] +; CHECK64-SKX-NEXT:    vmovups %zmm24, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x44,0x24,0x18] +; CHECK64-SKX-NEXT:    vmovups %zmm23, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x11,0x7c,0x24,0x17] +; CHECK64-SKX-NEXT:    vmovups %zmm22, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x11,0x74,0x24,0x16] +; CHECK64-SKX-NEXT:    vmovups %zmm21, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x11,0x6c,0x24,0x15] +; CHECK64-SKX-NEXT:    vmovups %zmm20, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x11,0x64,0x24,0x14] +; CHECK64-SKX-NEXT:    vmovups %zmm19, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x11,0x5c,0x24,0x13] +; CHECK64-SKX-NEXT:    vmovups %zmm18, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x11,0x54,0x24,0x12] +; CHECK64-SKX-NEXT:    vmovups %zmm17, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x11,0x4c,0x24,0x11] +; CHECK64-SKX-NEXT:    vmovups %zmm16, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x11,0x44,0x24,0x10] +; CHECK64-SKX-NEXT:    vmovups %zmm15, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x11,0x7c,0x24,0x0f] +; CHECK64-SKX-NEXT:    vmovups %zmm14, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x11,0x74,0x24,0x0e] +; CHECK64-SKX-NEXT:    vmovups %zmm13, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x11,0x6c,0x24,0x0d] +; CHECK64-SKX-NEXT:    vmovups %zmm12, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x11,0x64,0x24,0x0c] +; CHECK64-SKX-NEXT:    vmovups %zmm11, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x11,0x5c,0x24,0x0b] +; CHECK64-SKX-NEXT:    vmovups %zmm10, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x11,0x54,0x24,0x0a] +; CHECK64-SKX-NEXT:    vmovups %zmm9, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x11,0x4c,0x24,0x09] +; CHECK64-SKX-NEXT:    vmovups %zmm8, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x11,0x44,0x24,0x08] +; CHECK64-SKX-NEXT:    vmovups %zmm7, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x7c,0x24,0x07] +; CHECK64-SKX-NEXT:    vmovups %zmm6, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x74,0x24,0x06] +; CHECK64-SKX-NEXT:    vmovups %zmm5, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x6c,0x24,0x05] +; CHECK64-SKX-NEXT:    vmovups %zmm4, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x64,0x24,0x04] +; CHECK64-SKX-NEXT:    vmovups %zmm3, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x5c,0x24,0x03] +; CHECK64-SKX-NEXT:    vmovups %zmm2, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x54,0x24,0x02] +; CHECK64-SKX-NEXT:    vmovups %zmm1, {{[0-9]+}}(%rsp) ## 64-byte Spill +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x4c,0x24,0x01] +; CHECK64-SKX-NEXT:    vmovups %zmm0, (%rsp) ## 64-byte Spill +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x04,0x24] +; CHECK64-SKX-NEXT:    .cfi_def_cfa_offset 2240 +; CHECK64-SKX-NEXT:    .cfi_offset %rcx, -80 +; CHECK64-SKX-NEXT:    .cfi_offset %rdx, -72 +; CHECK64-SKX-NEXT:    .cfi_offset %rsi, -64 +; CHECK64-SKX-NEXT:    .cfi_offset %rdi, -56 +; CHECK64-SKX-NEXT:    .cfi_offset %r8, -48 +; CHECK64-SKX-NEXT:    .cfi_offset %r9, -40 +; CHECK64-SKX-NEXT:    .cfi_offset %r10, -32 +; CHECK64-SKX-NEXT:    .cfi_offset %r11, -24 +; CHECK64-SKX-NEXT:    .cfi_offset %rax, -16 +; CHECK64-SKX-NEXT:    .cfi_offset %xmm0, -2240 +; CHECK64-SKX-NEXT:    .cfi_offset %xmm1, -2176 +; CHECK64-SKX-NEXT:    .cfi_offset %xmm2, -2112 +; CHECK64-SKX-NEXT:    .cfi_offset %xmm3, -2048 +; CHECK64-SKX-NEXT:    .cfi_offset %xmm4, -1984 +; CHECK64-SKX-NEXT:    .cfi_offset %xmm5, -1920 +; CHECK64-SKX-NEXT:    .cfi_offset %xmm6, -1856 +; CHECK64-SKX-NEXT:    .cfi_offset %xmm7, -1792 +; CHECK64-SKX-NEXT:    .cfi_offset %xmm8, -1728 +; CHECK64-SKX-NEXT:    .cfi_offset %xmm9, -1664 +; CHECK64-SKX-NEXT:    .cfi_offset %xmm10, -1600 +; CHECK64-SKX-NEXT:    .cfi_offset %xmm11, -1536 +; CHECK64-SKX-NEXT:    .cfi_offset %xmm12, -1472 +; CHECK64-SKX-NEXT:    .cfi_offset %xmm13, -1408 +; CHECK64-SKX-NEXT:    .cfi_offset %xmm14, -1344 +; CHECK64-SKX-NEXT:    .cfi_offset %xmm15, -1280 +; CHECK64-SKX-NEXT:    .cfi_offset %xmm16, -1216 +; CHECK64-SKX-NEXT:    .cfi_offset %xmm17, -1152 +; CHECK64-SKX-NEXT:    .cfi_offset %xmm18, -1088 +; CHECK64-SKX-NEXT:    .cfi_offset %xmm19, -1024 +; CHECK64-SKX-NEXT:    .cfi_offset %xmm20, -960 +; CHECK64-SKX-NEXT:    .cfi_offset %xmm21, -896 +; CHECK64-SKX-NEXT:    .cfi_offset %xmm22, -832 +; CHECK64-SKX-NEXT:    .cfi_offset %xmm23, -768 +; CHECK64-SKX-NEXT:    .cfi_offset %xmm24, -704 +; CHECK64-SKX-NEXT:    .cfi_offset %xmm25, -640 +; CHECK64-SKX-NEXT:    .cfi_offset %xmm26, -576 +; CHECK64-SKX-NEXT:    .cfi_offset %xmm27, -512 +; CHECK64-SKX-NEXT:    .cfi_offset %xmm28, -448 +; CHECK64-SKX-NEXT:    .cfi_offset %xmm29, -384 +; CHECK64-SKX-NEXT:    .cfi_offset %xmm30, -320 +; CHECK64-SKX-NEXT:    .cfi_offset %xmm31, -224 +; CHECK64-SKX-NEXT:    .cfi_offset %k0, -144 +; CHECK64-SKX-NEXT:    .cfi_offset %k1, -136 +; CHECK64-SKX-NEXT:    .cfi_offset %k2, -128 +; CHECK64-SKX-NEXT:    .cfi_offset %k3, -120 +; CHECK64-SKX-NEXT:    .cfi_offset %k4, -112 +; CHECK64-SKX-NEXT:    .cfi_offset %k5, -104 +; CHECK64-SKX-NEXT:    .cfi_offset %k6, -96 +; CHECK64-SKX-NEXT:    .cfi_offset %k7, -88 +; CHECK64-SKX-NEXT:    cld ## encoding: [0xfc] +; CHECK64-SKX-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77] +; CHECK64-SKX-NEXT:    callq _bar ## encoding: [0xe8,A,A,A,A] +; CHECK64-SKX-NEXT:    ## fixup A - offset: 1, value: _bar-4, kind: FK_PCRel_4 +; CHECK64-SKX-NEXT:    vmovups (%rsp), %zmm0 ## 64-byte Reload +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x04,0x24] +; CHECK64-SKX-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm1 ## 64-byte Reload +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x4c,0x24,0x01] +; CHECK64-SKX-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm2 ## 64-byte Reload +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x54,0x24,0x02] +; CHECK64-SKX-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm3 ## 64-byte Reload +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x5c,0x24,0x03] +; CHECK64-SKX-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm4 ## 64-byte Reload +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x64,0x24,0x04] +; CHECK64-SKX-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm5 ## 64-byte Reload +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x6c,0x24,0x05] +; CHECK64-SKX-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm6 ## 64-byte Reload +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x74,0x24,0x06] +; CHECK64-SKX-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm7 ## 64-byte Reload +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x7c,0x24,0x07] +; CHECK64-SKX-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm8 ## 64-byte Reload +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x10,0x44,0x24,0x08] +; CHECK64-SKX-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm9 ## 64-byte Reload +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x10,0x4c,0x24,0x09] +; CHECK64-SKX-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm10 ## 64-byte Reload +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x10,0x54,0x24,0x0a] +; CHECK64-SKX-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm11 ## 64-byte Reload +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x10,0x5c,0x24,0x0b] +; CHECK64-SKX-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm12 ## 64-byte Reload +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x10,0x64,0x24,0x0c] +; CHECK64-SKX-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm13 ## 64-byte Reload +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x10,0x6c,0x24,0x0d] +; CHECK64-SKX-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm14 ## 64-byte Reload +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x10,0x74,0x24,0x0e] +; CHECK64-SKX-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm15 ## 64-byte Reload +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x71,0x7c,0x48,0x10,0x7c,0x24,0x0f] +; CHECK64-SKX-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm16 ## 64-byte Reload +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x10,0x44,0x24,0x10] +; CHECK64-SKX-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm17 ## 64-byte Reload +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x10,0x4c,0x24,0x11] +; CHECK64-SKX-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm18 ## 64-byte Reload +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x10,0x54,0x24,0x12] +; CHECK64-SKX-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm19 ## 64-byte Reload +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x10,0x5c,0x24,0x13] +; CHECK64-SKX-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm20 ## 64-byte Reload +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x10,0x64,0x24,0x14] +; CHECK64-SKX-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm21 ## 64-byte Reload +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x10,0x6c,0x24,0x15] +; CHECK64-SKX-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm22 ## 64-byte Reload +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x10,0x74,0x24,0x16] +; CHECK64-SKX-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm23 ## 64-byte Reload +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0xe1,0x7c,0x48,0x10,0x7c,0x24,0x17] +; CHECK64-SKX-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm24 ## 64-byte Reload +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x44,0x24,0x18] +; CHECK64-SKX-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm25 ## 64-byte Reload +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x4c,0x24,0x19] +; CHECK64-SKX-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm26 ## 64-byte Reload +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x54,0x24,0x1a] +; CHECK64-SKX-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm27 ## 64-byte Reload +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x5c,0x24,0x1b] +; CHECK64-SKX-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm28 ## 64-byte Reload +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x64,0x24,0x1c] +; CHECK64-SKX-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm29 ## 64-byte Reload +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x6c,0x24,0x1d] +; CHECK64-SKX-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm30 ## 64-byte Reload +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x74,0x24,0x1e] +; CHECK64-SKX-NEXT:    vmovups {{[0-9]+}}(%rsp), %zmm31 ## 64-byte Reload +; CHECK64-SKX-NEXT:    ## encoding: [0x62,0x61,0x7c,0x48,0x10,0xbc,0x24,0xe0,0x07,0x00,0x00] +; CHECK64-SKX-NEXT:    kmovq {{[0-9]+}}(%rsp), %k0 ## 8-byte Reload +; CHECK64-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0x84,0x24,0x30,0x08,0x00,0x00] +; CHECK64-SKX-NEXT:    kmovq {{[0-9]+}}(%rsp), %k1 ## 8-byte Reload +; CHECK64-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0x8c,0x24,0x38,0x08,0x00,0x00] +; CHECK64-SKX-NEXT:    kmovq {{[0-9]+}}(%rsp), %k2 ## 8-byte Reload +; CHECK64-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0x94,0x24,0x40,0x08,0x00,0x00] +; CHECK64-SKX-NEXT:    kmovq {{[0-9]+}}(%rsp), %k3 ## 8-byte Reload +; CHECK64-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0x9c,0x24,0x48,0x08,0x00,0x00] +; CHECK64-SKX-NEXT:    kmovq {{[0-9]+}}(%rsp), %k4 ## 8-byte Reload +; CHECK64-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0xa4,0x24,0x50,0x08,0x00,0x00] +; CHECK64-SKX-NEXT:    kmovq {{[0-9]+}}(%rsp), %k5 ## 8-byte Reload +; CHECK64-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0xac,0x24,0x58,0x08,0x00,0x00] +; CHECK64-SKX-NEXT:    kmovq {{[0-9]+}}(%rsp), %k6 ## 8-byte Reload +; CHECK64-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0xb4,0x24,0x60,0x08,0x00,0x00] +; CHECK64-SKX-NEXT:    kmovq {{[0-9]+}}(%rsp), %k7 ## 8-byte Reload +; CHECK64-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0xbc,0x24,0x68,0x08,0x00,0x00] +; CHECK64-SKX-NEXT:    addq $2160, %rsp ## encoding: [0x48,0x81,0xc4,0x70,0x08,0x00,0x00] +; CHECK64-SKX-NEXT:    ## imm = 0x870 +; CHECK64-SKX-NEXT:    popq %rcx ## encoding: [0x59] +; CHECK64-SKX-NEXT:    popq %rdx ## encoding: [0x5a] +; CHECK64-SKX-NEXT:    popq %rsi ## encoding: [0x5e] +; CHECK64-SKX-NEXT:    popq %rdi ## encoding: [0x5f] +; CHECK64-SKX-NEXT:    popq %r8 ## encoding: [0x41,0x58] +; CHECK64-SKX-NEXT:    popq %r9 ## encoding: [0x41,0x59] +; CHECK64-SKX-NEXT:    popq %r10 ## encoding: [0x41,0x5a] +; CHECK64-SKX-NEXT:    popq %r11 ## encoding: [0x41,0x5b] +; CHECK64-SKX-NEXT:    popq %rax ## encoding: [0x58] +; CHECK64-SKX-NEXT:    iretq ## encoding: [0x48,0xcf] +; +; CHECK32-KNL-LABEL: foo: +; CHECK32-KNL:       ## %bb.0: +; CHECK32-KNL-NEXT:    pushl %edx ## encoding: [0x52] +; CHECK32-KNL-NEXT:    .cfi_def_cfa_offset 8 +; CHECK32-KNL-NEXT:    pushl %ecx ## encoding: [0x51] +; CHECK32-KNL-NEXT:    .cfi_def_cfa_offset 12 +; CHECK32-KNL-NEXT:    pushl %eax ## encoding: [0x50] +; CHECK32-KNL-NEXT:    .cfi_def_cfa_offset 16 +; CHECK32-KNL-NEXT:    subl $624, %esp ## encoding: [0x81,0xec,0x70,0x02,0x00,0x00] +; CHECK32-KNL-NEXT:    ## imm = 0x270 +; CHECK32-KNL-NEXT:    kmovq %k7, {{[0-9]+}}(%esp) ## 8-byte Spill +; CHECK32-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0xbc,0x24,0x68,0x02,0x00,0x00] +; CHECK32-KNL-NEXT:    kmovq %k6, {{[0-9]+}}(%esp) ## 8-byte Spill +; CHECK32-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0xb4,0x24,0x60,0x02,0x00,0x00] +; CHECK32-KNL-NEXT:    kmovq %k5, {{[0-9]+}}(%esp) ## 8-byte Spill +; CHECK32-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0xac,0x24,0x58,0x02,0x00,0x00] +; CHECK32-KNL-NEXT:    kmovq %k4, {{[0-9]+}}(%esp) ## 8-byte Spill +; CHECK32-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0xa4,0x24,0x50,0x02,0x00,0x00] +; CHECK32-KNL-NEXT:    kmovq %k3, {{[0-9]+}}(%esp) ## 8-byte Spill +; CHECK32-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0x9c,0x24,0x48,0x02,0x00,0x00] +; CHECK32-KNL-NEXT:    kmovq %k2, {{[0-9]+}}(%esp) ## 8-byte Spill +; CHECK32-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0x94,0x24,0x40,0x02,0x00,0x00] +; CHECK32-KNL-NEXT:    kmovq %k1, {{[0-9]+}}(%esp) ## 8-byte Spill +; CHECK32-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0x8c,0x24,0x38,0x02,0x00,0x00] +; CHECK32-KNL-NEXT:    kmovq %k0, {{[0-9]+}}(%esp) ## 8-byte Spill +; CHECK32-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0x84,0x24,0x30,0x02,0x00,0x00] +; CHECK32-KNL-NEXT:    vmovups %zmm7, {{[0-9]+}}(%esp) ## 64-byte Spill +; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0xbc,0x24,0xe0,0x01,0x00,0x00] +; CHECK32-KNL-NEXT:    vmovups %zmm6, {{[0-9]+}}(%esp) ## 64-byte Spill +; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x74,0x24,0x06] +; CHECK32-KNL-NEXT:    vmovups %zmm5, {{[0-9]+}}(%esp) ## 64-byte Spill +; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x6c,0x24,0x05] +; CHECK32-KNL-NEXT:    vmovups %zmm4, {{[0-9]+}}(%esp) ## 64-byte Spill +; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x64,0x24,0x04] +; CHECK32-KNL-NEXT:    vmovups %zmm3, {{[0-9]+}}(%esp) ## 64-byte Spill +; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x5c,0x24,0x03] +; CHECK32-KNL-NEXT:    vmovups %zmm2, {{[0-9]+}}(%esp) ## 64-byte Spill +; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x54,0x24,0x02] +; CHECK32-KNL-NEXT:    vmovups %zmm1, {{[0-9]+}}(%esp) ## 64-byte Spill +; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x4c,0x24,0x01] +; CHECK32-KNL-NEXT:    vmovups %zmm0, (%esp) ## 64-byte Spill +; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x04,0x24] +; CHECK32-KNL-NEXT:    .cfi_def_cfa_offset 640 +; CHECK32-KNL-NEXT:    .cfi_offset %eax, -16 +; CHECK32-KNL-NEXT:    .cfi_offset %ecx, -12 +; CHECK32-KNL-NEXT:    .cfi_offset %edx, -8 +; CHECK32-KNL-NEXT:    .cfi_offset %xmm0, -640 +; CHECK32-KNL-NEXT:    .cfi_offset %xmm1, -576 +; CHECK32-KNL-NEXT:    .cfi_offset %xmm2, -512 +; CHECK32-KNL-NEXT:    .cfi_offset %xmm3, -448 +; CHECK32-KNL-NEXT:    .cfi_offset %xmm4, -384 +; CHECK32-KNL-NEXT:    .cfi_offset %xmm5, -320 +; CHECK32-KNL-NEXT:    .cfi_offset %xmm6, -256 +; CHECK32-KNL-NEXT:    .cfi_offset %xmm7, -160 +; CHECK32-KNL-NEXT:    .cfi_offset %k0, -80 +; CHECK32-KNL-NEXT:    .cfi_offset %k1, -72 +; CHECK32-KNL-NEXT:    .cfi_offset %k2, -64 +; CHECK32-KNL-NEXT:    .cfi_offset %k3, -56 +; CHECK32-KNL-NEXT:    .cfi_offset %k4, -48 +; CHECK32-KNL-NEXT:    .cfi_offset %k5, -40 +; CHECK32-KNL-NEXT:    .cfi_offset %k6, -32 +; CHECK32-KNL-NEXT:    .cfi_offset %k7, -24 +; CHECK32-KNL-NEXT:    cld ## encoding: [0xfc] +; CHECK32-KNL-NEXT:    calll _bar ## encoding: [0xe8,A,A,A,A] +; CHECK32-KNL-NEXT:    ## fixup A - offset: 1, value: _bar-4, kind: FK_PCRel_4 +; CHECK32-KNL-NEXT:    vmovups (%esp), %zmm0 ## 64-byte Reload +; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x04,0x24] +; CHECK32-KNL-NEXT:    vmovups {{[0-9]+}}(%esp), %zmm1 ## 64-byte Reload +; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x4c,0x24,0x01] +; CHECK32-KNL-NEXT:    vmovups {{[0-9]+}}(%esp), %zmm2 ## 64-byte Reload +; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x54,0x24,0x02] +; CHECK32-KNL-NEXT:    vmovups {{[0-9]+}}(%esp), %zmm3 ## 64-byte Reload +; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x5c,0x24,0x03] +; CHECK32-KNL-NEXT:    vmovups {{[0-9]+}}(%esp), %zmm4 ## 64-byte Reload +; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x64,0x24,0x04] +; CHECK32-KNL-NEXT:    vmovups {{[0-9]+}}(%esp), %zmm5 ## 64-byte Reload +; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x6c,0x24,0x05] +; CHECK32-KNL-NEXT:    vmovups {{[0-9]+}}(%esp), %zmm6 ## 64-byte Reload +; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x74,0x24,0x06] +; CHECK32-KNL-NEXT:    vmovups {{[0-9]+}}(%esp), %zmm7 ## 64-byte Reload +; CHECK32-KNL-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0xbc,0x24,0xe0,0x01,0x00,0x00] +; CHECK32-KNL-NEXT:    kmovq {{[0-9]+}}(%esp), %k0 ## 8-byte Reload +; CHECK32-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0x84,0x24,0x30,0x02,0x00,0x00] +; CHECK32-KNL-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 ## 8-byte Reload +; CHECK32-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0x8c,0x24,0x38,0x02,0x00,0x00] +; CHECK32-KNL-NEXT:    kmovq {{[0-9]+}}(%esp), %k2 ## 8-byte Reload +; CHECK32-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0x94,0x24,0x40,0x02,0x00,0x00] +; CHECK32-KNL-NEXT:    kmovq {{[0-9]+}}(%esp), %k3 ## 8-byte Reload +; CHECK32-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0x9c,0x24,0x48,0x02,0x00,0x00] +; CHECK32-KNL-NEXT:    kmovq {{[0-9]+}}(%esp), %k4 ## 8-byte Reload +; CHECK32-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0xa4,0x24,0x50,0x02,0x00,0x00] +; CHECK32-KNL-NEXT:    kmovq {{[0-9]+}}(%esp), %k5 ## 8-byte Reload +; CHECK32-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0xac,0x24,0x58,0x02,0x00,0x00] +; CHECK32-KNL-NEXT:    kmovq {{[0-9]+}}(%esp), %k6 ## 8-byte Reload +; CHECK32-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0xb4,0x24,0x60,0x02,0x00,0x00] +; CHECK32-KNL-NEXT:    kmovq {{[0-9]+}}(%esp), %k7 ## 8-byte Reload +; CHECK32-KNL-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0xbc,0x24,0x68,0x02,0x00,0x00] +; CHECK32-KNL-NEXT:    addl $624, %esp ## encoding: [0x81,0xc4,0x70,0x02,0x00,0x00] +; CHECK32-KNL-NEXT:    ## imm = 0x270 +; CHECK32-KNL-NEXT:    popl %eax ## encoding: [0x58] +; CHECK32-KNL-NEXT:    popl %ecx ## encoding: [0x59] +; CHECK32-KNL-NEXT:    popl %edx ## encoding: [0x5a] +; CHECK32-KNL-NEXT:    iretl ## encoding: [0xcf] +; +; CHECK32-SKX-LABEL: foo: +; CHECK32-SKX:       ## %bb.0: +; CHECK32-SKX-NEXT:    pushl %edx ## encoding: [0x52] +; CHECK32-SKX-NEXT:    .cfi_def_cfa_offset 8 +; CHECK32-SKX-NEXT:    pushl %ecx ## encoding: [0x51] +; CHECK32-SKX-NEXT:    .cfi_def_cfa_offset 12 +; CHECK32-SKX-NEXT:    pushl %eax ## encoding: [0x50] +; CHECK32-SKX-NEXT:    .cfi_def_cfa_offset 16 +; CHECK32-SKX-NEXT:    subl $624, %esp ## encoding: [0x81,0xec,0x70,0x02,0x00,0x00] +; CHECK32-SKX-NEXT:    ## imm = 0x270 +; CHECK32-SKX-NEXT:    kmovq %k7, {{[0-9]+}}(%esp) ## 8-byte Spill +; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0xbc,0x24,0x68,0x02,0x00,0x00] +; CHECK32-SKX-NEXT:    kmovq %k6, {{[0-9]+}}(%esp) ## 8-byte Spill +; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0xb4,0x24,0x60,0x02,0x00,0x00] +; CHECK32-SKX-NEXT:    kmovq %k5, {{[0-9]+}}(%esp) ## 8-byte Spill +; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0xac,0x24,0x58,0x02,0x00,0x00] +; CHECK32-SKX-NEXT:    kmovq %k4, {{[0-9]+}}(%esp) ## 8-byte Spill +; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0xa4,0x24,0x50,0x02,0x00,0x00] +; CHECK32-SKX-NEXT:    kmovq %k3, {{[0-9]+}}(%esp) ## 8-byte Spill +; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0x9c,0x24,0x48,0x02,0x00,0x00] +; CHECK32-SKX-NEXT:    kmovq %k2, {{[0-9]+}}(%esp) ## 8-byte Spill +; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0x94,0x24,0x40,0x02,0x00,0x00] +; CHECK32-SKX-NEXT:    kmovq %k1, {{[0-9]+}}(%esp) ## 8-byte Spill +; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0x8c,0x24,0x38,0x02,0x00,0x00] +; CHECK32-SKX-NEXT:    kmovq %k0, {{[0-9]+}}(%esp) ## 8-byte Spill +; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x91,0x84,0x24,0x30,0x02,0x00,0x00] +; CHECK32-SKX-NEXT:    vmovups %zmm7, {{[0-9]+}}(%esp) ## 64-byte Spill +; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0xbc,0x24,0xe0,0x01,0x00,0x00] +; CHECK32-SKX-NEXT:    vmovups %zmm6, {{[0-9]+}}(%esp) ## 64-byte Spill +; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x74,0x24,0x06] +; CHECK32-SKX-NEXT:    vmovups %zmm5, {{[0-9]+}}(%esp) ## 64-byte Spill +; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x6c,0x24,0x05] +; CHECK32-SKX-NEXT:    vmovups %zmm4, {{[0-9]+}}(%esp) ## 64-byte Spill +; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x64,0x24,0x04] +; CHECK32-SKX-NEXT:    vmovups %zmm3, {{[0-9]+}}(%esp) ## 64-byte Spill +; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x5c,0x24,0x03] +; CHECK32-SKX-NEXT:    vmovups %zmm2, {{[0-9]+}}(%esp) ## 64-byte Spill +; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x54,0x24,0x02] +; CHECK32-SKX-NEXT:    vmovups %zmm1, {{[0-9]+}}(%esp) ## 64-byte Spill +; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x4c,0x24,0x01] +; CHECK32-SKX-NEXT:    vmovups %zmm0, (%esp) ## 64-byte Spill +; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x04,0x24] +; CHECK32-SKX-NEXT:    .cfi_def_cfa_offset 640 +; CHECK32-SKX-NEXT:    .cfi_offset %eax, -16 +; CHECK32-SKX-NEXT:    .cfi_offset %ecx, -12 +; CHECK32-SKX-NEXT:    .cfi_offset %edx, -8 +; CHECK32-SKX-NEXT:    .cfi_offset %xmm0, -640 +; CHECK32-SKX-NEXT:    .cfi_offset %xmm1, -576 +; CHECK32-SKX-NEXT:    .cfi_offset %xmm2, -512 +; CHECK32-SKX-NEXT:    .cfi_offset %xmm3, -448 +; CHECK32-SKX-NEXT:    .cfi_offset %xmm4, -384 +; CHECK32-SKX-NEXT:    .cfi_offset %xmm5, -320 +; CHECK32-SKX-NEXT:    .cfi_offset %xmm6, -256 +; CHECK32-SKX-NEXT:    .cfi_offset %xmm7, -160 +; CHECK32-SKX-NEXT:    .cfi_offset %k0, -80 +; CHECK32-SKX-NEXT:    .cfi_offset %k1, -72 +; CHECK32-SKX-NEXT:    .cfi_offset %k2, -64 +; CHECK32-SKX-NEXT:    .cfi_offset %k3, -56 +; CHECK32-SKX-NEXT:    .cfi_offset %k4, -48 +; CHECK32-SKX-NEXT:    .cfi_offset %k5, -40 +; CHECK32-SKX-NEXT:    .cfi_offset %k6, -32 +; CHECK32-SKX-NEXT:    .cfi_offset %k7, -24 +; CHECK32-SKX-NEXT:    cld ## encoding: [0xfc] +; CHECK32-SKX-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77] +; CHECK32-SKX-NEXT:    calll _bar ## encoding: [0xe8,A,A,A,A] +; CHECK32-SKX-NEXT:    ## fixup A - offset: 1, value: _bar-4, kind: FK_PCRel_4 +; CHECK32-SKX-NEXT:    vmovups (%esp), %zmm0 ## 64-byte Reload +; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x04,0x24] +; CHECK32-SKX-NEXT:    vmovups {{[0-9]+}}(%esp), %zmm1 ## 64-byte Reload +; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x4c,0x24,0x01] +; CHECK32-SKX-NEXT:    vmovups {{[0-9]+}}(%esp), %zmm2 ## 64-byte Reload +; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x54,0x24,0x02] +; CHECK32-SKX-NEXT:    vmovups {{[0-9]+}}(%esp), %zmm3 ## 64-byte Reload +; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x5c,0x24,0x03] +; CHECK32-SKX-NEXT:    vmovups {{[0-9]+}}(%esp), %zmm4 ## 64-byte Reload +; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x64,0x24,0x04] +; CHECK32-SKX-NEXT:    vmovups {{[0-9]+}}(%esp), %zmm5 ## 64-byte Reload +; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x6c,0x24,0x05] +; CHECK32-SKX-NEXT:    vmovups {{[0-9]+}}(%esp), %zmm6 ## 64-byte Reload +; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x74,0x24,0x06] +; CHECK32-SKX-NEXT:    vmovups {{[0-9]+}}(%esp), %zmm7 ## 64-byte Reload +; CHECK32-SKX-NEXT:    ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0xbc,0x24,0xe0,0x01,0x00,0x00] +; CHECK32-SKX-NEXT:    kmovq {{[0-9]+}}(%esp), %k0 ## 8-byte Reload +; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0x84,0x24,0x30,0x02,0x00,0x00] +; CHECK32-SKX-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 ## 8-byte Reload +; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0x8c,0x24,0x38,0x02,0x00,0x00] +; CHECK32-SKX-NEXT:    kmovq {{[0-9]+}}(%esp), %k2 ## 8-byte Reload +; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0x94,0x24,0x40,0x02,0x00,0x00] +; CHECK32-SKX-NEXT:    kmovq {{[0-9]+}}(%esp), %k3 ## 8-byte Reload +; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0x9c,0x24,0x48,0x02,0x00,0x00] +; CHECK32-SKX-NEXT:    kmovq {{[0-9]+}}(%esp), %k4 ## 8-byte Reload +; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0xa4,0x24,0x50,0x02,0x00,0x00] +; CHECK32-SKX-NEXT:    kmovq {{[0-9]+}}(%esp), %k5 ## 8-byte Reload +; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0xac,0x24,0x58,0x02,0x00,0x00] +; CHECK32-SKX-NEXT:    kmovq {{[0-9]+}}(%esp), %k6 ## 8-byte Reload +; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0xb4,0x24,0x60,0x02,0x00,0x00] +; CHECK32-SKX-NEXT:    kmovq {{[0-9]+}}(%esp), %k7 ## 8-byte Reload +; CHECK32-SKX-NEXT:    ## encoding: [0xc4,0xe1,0xf8,0x90,0xbc,0x24,0x68,0x02,0x00,0x00] +; CHECK32-SKX-NEXT:    addl $624, %esp ## encoding: [0x81,0xc4,0x70,0x02,0x00,0x00] +; CHECK32-SKX-NEXT:    ## imm = 0x270 +; CHECK32-SKX-NEXT:    popl %eax ## encoding: [0x58] +; CHECK32-SKX-NEXT:    popl %ecx ## encoding: [0x59] +; CHECK32-SKX-NEXT:    popl %edx ## encoding: [0x5a] +; CHECK32-SKX-NEXT:    iretl ## encoding: [0xcf]    call void @bar()    ret void  }  | 

