diff options
| author | Lama Saba <lama.saba@intel.com> | 2018-02-14 14:58:53 +0000 |
|---|---|---|
| committer | Lama Saba <lama.saba@intel.com> | 2018-02-14 14:58:53 +0000 |
| commit | fe1016c485286c09fdd523d56b85673a910c631b (patch) | |
| tree | a0175fd82cae71222ad7bcae0961761f90b4dd78 /llvm/test/CodeGen | |
| parent | 86d15bff68fbec668fc4b2449af7dfd449b491fb (diff) | |
| download | bcm5719-llvm-fe1016c485286c09fdd523d56b85673a910c631b.tar.gz bcm5719-llvm-fe1016c485286c09fdd523d56b85673a910c631b.zip | |
[X86] Reduce Store Forward Block issues in HW - Recommit after fixing Bug 36346
If a load follows a store and reloads data that the store has written to memory, Intel microarchitectures can in many cases forward the data directly from the store to the load, This "store forwarding" saves cycles by enabling the load to directly obtain the data instead of accessing the data from cache or memory.
A "store forward block" occurs in cases that a store cannot be forwarded to the load. The most typical case of store forward block on Intel Core microarchiticutre that a small store cannot be forwarded to a large load.
The estimated penalty for a store forward block is ~13 cycles.
This pass tries to recognize and handle cases where "store forward block" is created by the compiler when lowering memcpy calls to a sequence
of a load and a store.
The pass currently only handles cases where memcpy is lowered to XMM/YMM registers, it tries to break the memcpy into smaller copies.
breaking the memcpy should be possible since there is no atomicity guarantee for loads and stores to XMM/YMM.
Change-Id: Ic41aa9ade6512e0478db66e07e2fde41b4fb35f9
llvm-svn: 325128
Diffstat (limited to 'llvm/test/CodeGen')
| -rw-r--r-- | llvm/test/CodeGen/X86/fixup-sfb-32.ll | 1926 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/fixup-sfb.ll | 1378 |
2 files changed, 3304 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/fixup-sfb-32.ll b/llvm/test/CodeGen/X86/fixup-sfb-32.ll new file mode 100644 index 00000000000..afb34d18595 --- /dev/null +++ b/llvm/test/CodeGen/X86/fixup-sfb-32.ll @@ -0,0 +1,1926 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-linux | FileCheck %s -check-prefix=CHECK +; RUN: llc < %s -mtriple=i686-linux --disable-fixup-SFB | FileCheck %s --check-prefix=DISABLED +; RUN: llc < %s -mtriple=i686-linux -mattr +sse4.1 | FileCheck %s -check-prefix=CHECK-AVX2 +; RUN: llc < %s -mtriple=i686-linux -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s -check-prefix=CHECK-AVX512 + +%struct.S = type { i32, i32, i32, i32 } + +; Function Attrs: nounwind uwtable +define void @test_conditional_block(%struct.S* nocapture %s1, %struct.S* nocapture %s2, i32 %x, %struct.S* nocapture %s3, %struct.S* nocapture readonly %s4) local_unnamed_addr #0 { +; CHECK-LABEL: test_conditional_block: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: .cfi_offset %esi, -12 +; CHECK-NEXT: .cfi_offset %edi, -8 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: cmpl $18, %edi +; CHECK-NEXT: jl .LBB0_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: movl %edi, 4(%ecx) +; CHECK-NEXT: .LBB0_2: # %if.end +; CHECK-NEXT: movups (%esi), %xmm0 +; CHECK-NEXT: movups %xmm0, (%edx) +; CHECK-NEXT: movl (%ecx), %edx +; CHECK-NEXT: movl %edx, (%eax) +; CHECK-NEXT: movl 4(%ecx), %edx +; CHECK-NEXT: movl %edx, 4(%eax) +; CHECK-NEXT: movl 8(%ecx), %edx +; CHECK-NEXT: movl %edx, 8(%eax) +; CHECK-NEXT: movl 12(%ecx), %ecx +; CHECK-NEXT: movl %ecx, 12(%eax) +; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %edi +; CHECK-NEXT: retl +; +; DISABLED-LABEL: test_conditional_block: +; DISABLED: # %bb.0: # %entry +; DISABLED-NEXT: pushl %edi +; DISABLED-NEXT: .cfi_def_cfa_offset 8 +; DISABLED-NEXT: pushl %esi +; DISABLED-NEXT: .cfi_def_cfa_offset 12 +; DISABLED-NEXT: .cfi_offset %esi, -12 +; DISABLED-NEXT: .cfi_offset %edi, -8 +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %edx +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %ecx +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %edi +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %eax +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %esi +; DISABLED-NEXT: cmpl $18, %edi +; DISABLED-NEXT: jl .LBB0_2 +; DISABLED-NEXT: # %bb.1: # %if.then +; DISABLED-NEXT: movl %edi, 4(%esi) +; DISABLED-NEXT: .LBB0_2: # %if.end +; DISABLED-NEXT: movups (%edx), %xmm0 +; DISABLED-NEXT: movups %xmm0, (%ecx) +; DISABLED-NEXT: movups (%esi), %xmm0 +; DISABLED-NEXT: movups %xmm0, (%eax) +; DISABLED-NEXT: popl %esi +; DISABLED-NEXT: popl %edi +; DISABLED-NEXT: retl +; +; CHECK-AVX2-LABEL: test_conditional_block: +; CHECK-AVX2: # %bb.0: # %entry +; CHECK-AVX2-NEXT: pushl %edi +; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 8 +; CHECK-AVX2-NEXT: pushl %esi +; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 12 +; CHECK-AVX2-NEXT: .cfi_offset %esi, -12 +; CHECK-AVX2-NEXT: .cfi_offset %edi, -8 +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-AVX2-NEXT: cmpl $18, %edi +; CHECK-AVX2-NEXT: jl .LBB0_2 +; CHECK-AVX2-NEXT: # %bb.1: # %if.then +; CHECK-AVX2-NEXT: movl %edi, 4(%ecx) +; CHECK-AVX2-NEXT: .LBB0_2: # %if.end +; CHECK-AVX2-NEXT: movups (%esi), %xmm0 +; CHECK-AVX2-NEXT: movups %xmm0, (%edx) +; CHECK-AVX2-NEXT: movl (%ecx), %edx +; CHECK-AVX2-NEXT: movl %edx, (%eax) +; CHECK-AVX2-NEXT: movl 4(%ecx), %edx +; CHECK-AVX2-NEXT: movl %edx, 4(%eax) +; CHECK-AVX2-NEXT: movl 8(%ecx), %edx +; CHECK-AVX2-NEXT: movl %edx, 8(%eax) +; CHECK-AVX2-NEXT: movl 12(%ecx), %ecx +; CHECK-AVX2-NEXT: movl %ecx, 12(%eax) +; CHECK-AVX2-NEXT: popl %esi +; CHECK-AVX2-NEXT: popl %edi +; CHECK-AVX2-NEXT: retl +; +; CHECK-AVX512-LABEL: test_conditional_block: +; CHECK-AVX512: # %bb.0: # %entry +; CHECK-AVX512-NEXT: pushl %edi +; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 8 +; CHECK-AVX512-NEXT: pushl %esi +; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 12 +; CHECK-AVX512-NEXT: .cfi_offset %esi, -12 +; CHECK-AVX512-NEXT: .cfi_offset %edi, -8 +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-AVX512-NEXT: cmpl $18, %edi +; CHECK-AVX512-NEXT: jl .LBB0_2 +; CHECK-AVX512-NEXT: # %bb.1: # %if.then +; CHECK-AVX512-NEXT: movl %edi, 4(%ecx) +; CHECK-AVX512-NEXT: .LBB0_2: # %if.end +; CHECK-AVX512-NEXT: vmovups (%esi), %xmm0 +; CHECK-AVX512-NEXT: vmovups %xmm0, (%edx) +; CHECK-AVX512-NEXT: movl (%ecx), %edx +; CHECK-AVX512-NEXT: movl %edx, (%eax) +; CHECK-AVX512-NEXT: movl 4(%ecx), %edx +; CHECK-AVX512-NEXT: movl %edx, 4(%eax) +; CHECK-AVX512-NEXT: movl 8(%ecx), %edx +; CHECK-AVX512-NEXT: movl %edx, 8(%eax) +; CHECK-AVX512-NEXT: movl 12(%ecx), %ecx +; CHECK-AVX512-NEXT: movl %ecx, 12(%eax) +; CHECK-AVX512-NEXT: popl %esi +; CHECK-AVX512-NEXT: popl %edi +; CHECK-AVX512-NEXT: retl +entry: + %cmp = icmp sgt i32 %x, 17 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %b = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 1 + store i32 %x, i32* %b, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + %0 = bitcast %struct.S* %s3 to i8* + %1 = bitcast %struct.S* %s4 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 4, i1 false) + %2 = bitcast %struct.S* %s2 to i8* + %3 = bitcast %struct.S* %s1 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 4, i1 false) + ret void +} + +; Function Attrs: nounwind uwtable +define void @test_imm_store(%struct.S* nocapture %s1, %struct.S* nocapture %s2, i32 %x, %struct.S* nocapture %s3) local_unnamed_addr #0 { +; CHECK-LABEL: test_imm_store: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl $0, (%edx) +; CHECK-NEXT: movl $1, (%ecx) +; CHECK-NEXT: movl (%edx), %ecx +; CHECK-NEXT: movl %ecx, (%eax) +; CHECK-NEXT: movl 4(%edx), %ecx +; CHECK-NEXT: movl %ecx, 4(%eax) +; CHECK-NEXT: movl 8(%edx), %ecx +; CHECK-NEXT: movl %ecx, 8(%eax) +; CHECK-NEXT: movl 12(%edx), %ecx +; CHECK-NEXT: movl %ecx, 12(%eax) +; CHECK-NEXT: retl +; +; DISABLED-LABEL: test_imm_store: +; DISABLED: # %bb.0: # %entry +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %eax +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %ecx +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %edx +; DISABLED-NEXT: movl $0, (%edx) +; DISABLED-NEXT: movl $1, (%ecx) +; DISABLED-NEXT: movups (%edx), %xmm0 +; DISABLED-NEXT: movups %xmm0, (%eax) +; DISABLED-NEXT: retl +; +; CHECK-AVX2-LABEL: test_imm_store: +; CHECK-AVX2: # %bb.0: # %entry +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-AVX2-NEXT: movl $0, (%edx) +; CHECK-AVX2-NEXT: movl $1, (%ecx) +; CHECK-AVX2-NEXT: movl (%edx), %ecx +; CHECK-AVX2-NEXT: movl %ecx, (%eax) +; CHECK-AVX2-NEXT: movl 4(%edx), %ecx +; CHECK-AVX2-NEXT: movl %ecx, 4(%eax) +; CHECK-AVX2-NEXT: movl 8(%edx), %ecx +; CHECK-AVX2-NEXT: movl %ecx, 8(%eax) +; CHECK-AVX2-NEXT: movl 12(%edx), %ecx +; CHECK-AVX2-NEXT: movl %ecx, 12(%eax) +; CHECK-AVX2-NEXT: retl +; +; CHECK-AVX512-LABEL: test_imm_store: +; CHECK-AVX512: # %bb.0: # %entry +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-AVX512-NEXT: movl $0, (%edx) +; CHECK-AVX512-NEXT: movl $1, (%ecx) +; CHECK-AVX512-NEXT: movl (%edx), %ecx +; CHECK-AVX512-NEXT: movl %ecx, (%eax) +; CHECK-AVX512-NEXT: movl 4(%edx), %ecx +; CHECK-AVX512-NEXT: movl %ecx, 4(%eax) +; CHECK-AVX512-NEXT: movl 8(%edx), %ecx +; CHECK-AVX512-NEXT: movl %ecx, 8(%eax) +; CHECK-AVX512-NEXT: movl 12(%edx), %ecx +; CHECK-AVX512-NEXT: movl %ecx, 12(%eax) +; CHECK-AVX512-NEXT: retl +entry: + %a = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 0 + store i32 0, i32* %a, align 4 + %a1 = getelementptr inbounds %struct.S, %struct.S* %s3, i64 0, i32 0 + store i32 1, i32* %a1, align 4 + %0 = bitcast %struct.S* %s2 to i8* + %1 = bitcast %struct.S* %s1 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 4, i1 false) + ret void +} + +; Function Attrs: nounwind uwtable +define void @test_nondirect_br(%struct.S* nocapture %s1, %struct.S* nocapture %s2, i32 %x, %struct.S* nocapture %s3, %struct.S* nocapture readonly %s4, i32 %x2) local_unnamed_addr #0 { +; CHECK-LABEL: test_nondirect_br: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: .cfi_offset %esi, -12 +; CHECK-NEXT: .cfi_offset %edi, -8 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: cmpl $18, %ecx +; CHECK-NEXT: jl .LBB2_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: movl %ecx, 4(%eax) +; CHECK-NEXT: .LBB2_2: # %if.end +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: cmpl $14, %edx +; CHECK-NEXT: jl .LBB2_4 +; CHECK-NEXT: # %bb.3: # %if.then2 +; CHECK-NEXT: movl %edx, 12(%eax) +; CHECK-NEXT: .LBB2_4: # %if.end3 +; CHECK-NEXT: movups (%edi), %xmm0 +; CHECK-NEXT: movups %xmm0, (%esi) +; CHECK-NEXT: movl (%eax), %edx +; CHECK-NEXT: movl %edx, (%ecx) +; CHECK-NEXT: movl 4(%eax), %edx +; CHECK-NEXT: movl %edx, 4(%ecx) +; CHECK-NEXT: movl 8(%eax), %edx +; CHECK-NEXT: movl %edx, 8(%ecx) +; CHECK-NEXT: movl 12(%eax), %eax +; CHECK-NEXT: movl %eax, 12(%ecx) +; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %edi +; CHECK-NEXT: retl +; +; DISABLED-LABEL: test_nondirect_br: +; DISABLED: # %bb.0: # %entry +; DISABLED-NEXT: pushl %edi +; DISABLED-NEXT: .cfi_def_cfa_offset 8 +; DISABLED-NEXT: pushl %esi +; DISABLED-NEXT: .cfi_def_cfa_offset 12 +; DISABLED-NEXT: .cfi_offset %esi, -12 +; DISABLED-NEXT: .cfi_offset %edi, -8 +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %ecx +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %edx +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %eax +; DISABLED-NEXT: cmpl $18, %edx +; DISABLED-NEXT: jl .LBB2_2 +; DISABLED-NEXT: # %bb.1: # %if.then +; DISABLED-NEXT: movl %edx, 4(%eax) +; DISABLED-NEXT: .LBB2_2: # %if.end +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %edi +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %esi +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %edx +; DISABLED-NEXT: cmpl $14, %ecx +; DISABLED-NEXT: jl .LBB2_4 +; DISABLED-NEXT: # %bb.3: # %if.then2 +; DISABLED-NEXT: movl %ecx, 12(%eax) +; DISABLED-NEXT: .LBB2_4: # %if.end3 +; DISABLED-NEXT: movups (%edi), %xmm0 +; DISABLED-NEXT: movups %xmm0, (%esi) +; DISABLED-NEXT: movups (%eax), %xmm0 +; DISABLED-NEXT: movups %xmm0, (%edx) +; DISABLED-NEXT: popl %esi +; DISABLED-NEXT: popl %edi +; DISABLED-NEXT: retl +; +; CHECK-AVX2-LABEL: test_nondirect_br: +; CHECK-AVX2: # %bb.0: # %entry +; CHECK-AVX2-NEXT: pushl %edi +; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 8 +; CHECK-AVX2-NEXT: pushl %esi +; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 12 +; CHECK-AVX2-NEXT: .cfi_offset %esi, -12 +; CHECK-AVX2-NEXT: .cfi_offset %edi, -8 +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-AVX2-NEXT: cmpl $18, %ecx +; CHECK-AVX2-NEXT: jl .LBB2_2 +; CHECK-AVX2-NEXT: # %bb.1: # %if.then +; CHECK-AVX2-NEXT: movl %ecx, 4(%eax) +; CHECK-AVX2-NEXT: .LBB2_2: # %if.end +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-AVX2-NEXT: cmpl $14, %edx +; CHECK-AVX2-NEXT: jl .LBB2_4 +; CHECK-AVX2-NEXT: # %bb.3: # %if.then2 +; CHECK-AVX2-NEXT: movl %edx, 12(%eax) +; CHECK-AVX2-NEXT: .LBB2_4: # %if.end3 +; CHECK-AVX2-NEXT: movups (%edi), %xmm0 +; CHECK-AVX2-NEXT: movups %xmm0, (%esi) +; CHECK-AVX2-NEXT: movl (%eax), %edx +; CHECK-AVX2-NEXT: movl %edx, (%ecx) +; CHECK-AVX2-NEXT: movl 4(%eax), %edx +; CHECK-AVX2-NEXT: movl %edx, 4(%ecx) +; CHECK-AVX2-NEXT: movl 8(%eax), %edx +; CHECK-AVX2-NEXT: movl %edx, 8(%ecx) +; CHECK-AVX2-NEXT: movl 12(%eax), %eax +; CHECK-AVX2-NEXT: movl %eax, 12(%ecx) +; CHECK-AVX2-NEXT: popl %esi +; CHECK-AVX2-NEXT: popl %edi +; CHECK-AVX2-NEXT: retl +; +; CHECK-AVX512-LABEL: test_nondirect_br: +; CHECK-AVX512: # %bb.0: # %entry +; CHECK-AVX512-NEXT: pushl %edi +; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 8 +; CHECK-AVX512-NEXT: pushl %esi +; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 12 +; CHECK-AVX512-NEXT: .cfi_offset %esi, -12 +; CHECK-AVX512-NEXT: .cfi_offset %edi, -8 +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-AVX512-NEXT: cmpl $18, %ecx +; CHECK-AVX512-NEXT: jl .LBB2_2 +; CHECK-AVX512-NEXT: # %bb.1: # %if.then +; CHECK-AVX512-NEXT: movl %ecx, 4(%eax) +; CHECK-AVX512-NEXT: .LBB2_2: # %if.end +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-AVX512-NEXT: cmpl $14, %edx +; CHECK-AVX512-NEXT: jl .LBB2_4 +; CHECK-AVX512-NEXT: # %bb.3: # %if.then2 +; CHECK-AVX512-NEXT: movl %edx, 12(%eax) +; CHECK-AVX512-NEXT: .LBB2_4: # %if.end3 +; CHECK-AVX512-NEXT: vmovups (%edi), %xmm0 +; CHECK-AVX512-NEXT: vmovups %xmm0, (%esi) +; CHECK-AVX512-NEXT: movl (%eax), %edx +; CHECK-AVX512-NEXT: movl %edx, (%ecx) +; CHECK-AVX512-NEXT: movl 4(%eax), %edx +; CHECK-AVX512-NEXT: movl %edx, 4(%ecx) +; CHECK-AVX512-NEXT: movl 8(%eax), %edx +; CHECK-AVX512-NEXT: movl %edx, 8(%ecx) +; CHECK-AVX512-NEXT: movl 12(%eax), %eax +; CHECK-AVX512-NEXT: movl %eax, 12(%ecx) +; CHECK-AVX512-NEXT: popl %esi +; CHECK-AVX512-NEXT: popl %edi +; CHECK-AVX512-NEXT: retl +entry: + %cmp = icmp sgt i32 %x, 17 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %b = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 1 + store i32 %x, i32* %b, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + %cmp1 = icmp sgt i32 %x2, 13 + br i1 %cmp1, label %if.then2, label %if.end3 + +if.then2: ; preds = %if.end + %d = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 3 + store i32 %x2, i32* %d, align 4 + br label %if.end3 + +if.end3: ; preds = %if.then2, %if.end + %0 = bitcast %struct.S* %s3 to i8* + %1 = bitcast %struct.S* %s4 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 4, i1 false) + %2 = bitcast %struct.S* %s2 to i8* + %3 = bitcast %struct.S* %s1 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 4, i1 false) + ret void +} + +; Function Attrs: nounwind uwtable +define void @test_2preds_block(%struct.S* nocapture %s1, %struct.S* nocapture %s2, i32 %x, %struct.S* nocapture %s3, %struct.S* nocapture readonly %s4, i32 %x2) local_unnamed_addr #0 { +; CHECK-LABEL: test_2preds_block: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %ebx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %esi, -16 +; CHECK-NEXT: .cfi_offset %edi, -12 +; CHECK-NEXT: .cfi_offset %ebx, -8 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx +; CHECK-NEXT: movl %ebx, 12(%ecx) +; CHECK-NEXT: cmpl $18, %edi +; CHECK-NEXT: jl .LBB3_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: movl %edi, 4(%ecx) +; CHECK-NEXT: .LBB3_2: # %if.end +; CHECK-NEXT: movups (%esi), %xmm0 +; CHECK-NEXT: movups %xmm0, (%edx) +; CHECK-NEXT: movl (%ecx), %edx +; CHECK-NEXT: movl %edx, (%eax) +; CHECK-NEXT: movl 4(%ecx), %edx +; CHECK-NEXT: movl %edx, 4(%eax) +; CHECK-NEXT: movl 8(%ecx), %edx +; CHECK-NEXT: movl %edx, 8(%eax) +; CHECK-NEXT: movl 12(%ecx), %ecx +; CHECK-NEXT: movl %ecx, 12(%eax) +; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %edi +; CHECK-NEXT: popl %ebx +; CHECK-NEXT: retl +; +; DISABLED-LABEL: test_2preds_block: +; DISABLED: # %bb.0: # %entry +; DISABLED-NEXT: pushl %ebx +; DISABLED-NEXT: .cfi_def_cfa_offset 8 +; DISABLED-NEXT: pushl %edi +; DISABLED-NEXT: .cfi_def_cfa_offset 12 +; DISABLED-NEXT: pushl %esi +; DISABLED-NEXT: .cfi_def_cfa_offset 16 +; DISABLED-NEXT: .cfi_offset %esi, -16 +; DISABLED-NEXT: .cfi_offset %edi, -12 +; DISABLED-NEXT: .cfi_offset %ebx, -8 +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %edx +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %ecx +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %edi +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %eax +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %esi +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %ebx +; DISABLED-NEXT: movl %ebx, 12(%esi) +; DISABLED-NEXT: cmpl $18, %edi +; DISABLED-NEXT: jl .LBB3_2 +; DISABLED-NEXT: # %bb.1: # %if.then +; DISABLED-NEXT: movl %edi, 4(%esi) +; DISABLED-NEXT: .LBB3_2: # %if.end +; DISABLED-NEXT: movups (%edx), %xmm0 +; DISABLED-NEXT: movups %xmm0, (%ecx) +; DISABLED-NEXT: movups (%esi), %xmm0 +; DISABLED-NEXT: movups %xmm0, (%eax) +; DISABLED-NEXT: popl %esi +; DISABLED-NEXT: popl %edi +; DISABLED-NEXT: popl %ebx +; DISABLED-NEXT: retl +; +; CHECK-AVX2-LABEL: test_2preds_block: +; CHECK-AVX2: # %bb.0: # %entry +; CHECK-AVX2-NEXT: pushl %ebx +; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 8 +; CHECK-AVX2-NEXT: pushl %edi +; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 12 +; CHECK-AVX2-NEXT: pushl %esi +; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 16 +; CHECK-AVX2-NEXT: .cfi_offset %esi, -16 +; CHECK-AVX2-NEXT: .cfi_offset %edi, -12 +; CHECK-AVX2-NEXT: .cfi_offset %ebx, -8 +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ebx +; CHECK-AVX2-NEXT: movl %ebx, 12(%ecx) +; CHECK-AVX2-NEXT: cmpl $18, %edi +; CHECK-AVX2-NEXT: jl .LBB3_2 +; CHECK-AVX2-NEXT: # %bb.1: # %if.then +; CHECK-AVX2-NEXT: movl %edi, 4(%ecx) +; CHECK-AVX2-NEXT: .LBB3_2: # %if.end +; CHECK-AVX2-NEXT: movups (%esi), %xmm0 +; CHECK-AVX2-NEXT: movups %xmm0, (%edx) +; CHECK-AVX2-NEXT: movl (%ecx), %edx +; CHECK-AVX2-NEXT: movl %edx, (%eax) +; CHECK-AVX2-NEXT: movl 4(%ecx), %edx +; CHECK-AVX2-NEXT: movl %edx, 4(%eax) +; CHECK-AVX2-NEXT: movl 8(%ecx), %edx +; CHECK-AVX2-NEXT: movl %edx, 8(%eax) +; CHECK-AVX2-NEXT: movl 12(%ecx), %ecx +; CHECK-AVX2-NEXT: movl %ecx, 12(%eax) +; CHECK-AVX2-NEXT: popl %esi +; CHECK-AVX2-NEXT: popl %edi +; CHECK-AVX2-NEXT: popl %ebx +; CHECK-AVX2-NEXT: retl +; +; CHECK-AVX512-LABEL: test_2preds_block: +; CHECK-AVX512: # %bb.0: # %entry +; CHECK-AVX512-NEXT: pushl %ebx +; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 8 +; CHECK-AVX512-NEXT: pushl %edi +; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 12 +; CHECK-AVX512-NEXT: pushl %esi +; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 16 +; CHECK-AVX512-NEXT: .cfi_offset %esi, -16 +; CHECK-AVX512-NEXT: .cfi_offset %edi, -12 +; CHECK-AVX512-NEXT: .cfi_offset %ebx, -8 +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ebx +; CHECK-AVX512-NEXT: movl %ebx, 12(%ecx) +; CHECK-AVX512-NEXT: cmpl $18, %edi +; CHECK-AVX512-NEXT: jl .LBB3_2 +; CHECK-AVX512-NEXT: # %bb.1: # %if.then +; CHECK-AVX512-NEXT: movl %edi, 4(%ecx) +; CHECK-AVX512-NEXT: .LBB3_2: # %if.end +; CHECK-AVX512-NEXT: vmovups (%esi), %xmm0 +; CHECK-AVX512-NEXT: vmovups %xmm0, (%edx) +; CHECK-AVX512-NEXT: movl (%ecx), %edx +; CHECK-AVX512-NEXT: movl %edx, (%eax) +; CHECK-AVX512-NEXT: movl 4(%ecx), %edx +; CHECK-AVX512-NEXT: movl %edx, 4(%eax) +; CHECK-AVX512-NEXT: movl 8(%ecx), %edx +; CHECK-AVX512-NEXT: movl %edx, 8(%eax) +; CHECK-AVX512-NEXT: movl 12(%ecx), %ecx +; CHECK-AVX512-NEXT: movl %ecx, 12(%eax) +; CHECK-AVX512-NEXT: popl %esi +; CHECK-AVX512-NEXT: popl %edi +; CHECK-AVX512-NEXT: popl %ebx +; CHECK-AVX512-NEXT: retl +entry: + %d = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 3 + store i32 %x2, i32* %d, align 4 + %cmp = icmp sgt i32 %x, 17 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %b = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 1 + store i32 %x, i32* %b, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + %0 = bitcast %struct.S* %s3 to i8* + %1 = bitcast %struct.S* %s4 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 4, i1 false) + %2 = bitcast %struct.S* %s2 to i8* + %3 = bitcast %struct.S* %s1 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 4, i1 false) + ret void +} +%struct.S2 = type { i64, i64 } + +; Function Attrs: nounwind uwtable +define void @test_type64(%struct.S2* nocapture %s1, %struct.S2* nocapture %s2, i32 %x, %struct.S2* nocapture %s3, %struct.S2* nocapture readonly %s4) local_unnamed_addr #0 { +; CHECK-LABEL: test_type64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: .cfi_offset %esi, -12 +; CHECK-NEXT: .cfi_offset %edi, -8 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: cmpl $18, %edi +; CHECK-NEXT: jl .LBB4_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: movl %edi, 8(%ecx) +; CHECK-NEXT: sarl $31, %edi +; CHECK-NEXT: movl %edi, 12(%ecx) +; CHECK-NEXT: .LBB4_2: # %if.end +; CHECK-NEXT: movups (%esi), %xmm0 +; CHECK-NEXT: movups %xmm0, (%edx) +; CHECK-NEXT: movl (%ecx), %edx +; CHECK-NEXT: movl %edx, (%eax) +; CHECK-NEXT: movl 4(%ecx), %edx +; CHECK-NEXT: movl %edx, 4(%eax) +; CHECK-NEXT: movl 8(%ecx), %edx +; CHECK-NEXT: movl %edx, 8(%eax) +; CHECK-NEXT: movl 12(%ecx), %ecx +; CHECK-NEXT: movl %ecx, 12(%eax) +; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %edi +; CHECK-NEXT: retl +; +; DISABLED-LABEL: test_type64: +; DISABLED: # %bb.0: # %entry +; DISABLED-NEXT: pushl %edi +; DISABLED-NEXT: .cfi_def_cfa_offset 8 +; DISABLED-NEXT: pushl %esi +; DISABLED-NEXT: .cfi_def_cfa_offset 12 +; DISABLED-NEXT: .cfi_offset %esi, -12 +; DISABLED-NEXT: .cfi_offset %edi, -8 +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %edx +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %ecx +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %edi +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %eax +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %esi +; DISABLED-NEXT: cmpl $18, %edi +; DISABLED-NEXT: jl .LBB4_2 +; DISABLED-NEXT: # %bb.1: # %if.then +; DISABLED-NEXT: movl %edi, 8(%esi) +; DISABLED-NEXT: sarl $31, %edi +; DISABLED-NEXT: movl %edi, 12(%esi) +; DISABLED-NEXT: .LBB4_2: # %if.end +; DISABLED-NEXT: movups (%edx), %xmm0 +; DISABLED-NEXT: movups %xmm0, (%ecx) +; DISABLED-NEXT: movups (%esi), %xmm0 +; DISABLED-NEXT: movups %xmm0, (%eax) +; DISABLED-NEXT: popl %esi +; DISABLED-NEXT: popl %edi +; DISABLED-NEXT: retl +; +; CHECK-AVX2-LABEL: test_type64: +; CHECK-AVX2: # %bb.0: # %entry +; CHECK-AVX2-NEXT: pushl %edi +; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 8 +; CHECK-AVX2-NEXT: pushl %esi +; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 12 +; CHECK-AVX2-NEXT: .cfi_offset %esi, -12 +; CHECK-AVX2-NEXT: .cfi_offset %edi, -8 +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-AVX2-NEXT: cmpl $18, %edi +; CHECK-AVX2-NEXT: jl .LBB4_2 +; CHECK-AVX2-NEXT: # %bb.1: # %if.then +; CHECK-AVX2-NEXT: movl %edi, 8(%ecx) +; CHECK-AVX2-NEXT: sarl $31, %edi +; CHECK-AVX2-NEXT: movl %edi, 12(%ecx) +; CHECK-AVX2-NEXT: .LBB4_2: # %if.end +; CHECK-AVX2-NEXT: movups (%esi), %xmm0 +; CHECK-AVX2-NEXT: movups %xmm0, (%edx) +; CHECK-AVX2-NEXT: movl (%ecx), %edx +; CHECK-AVX2-NEXT: movl %edx, (%eax) +; CHECK-AVX2-NEXT: movl 4(%ecx), %edx +; CHECK-AVX2-NEXT: movl %edx, 4(%eax) +; CHECK-AVX2-NEXT: movl 8(%ecx), %edx +; CHECK-AVX2-NEXT: movl %edx, 8(%eax) +; CHECK-AVX2-NEXT: movl 12(%ecx), %ecx +; CHECK-AVX2-NEXT: movl %ecx, 12(%eax) +; CHECK-AVX2-NEXT: popl %esi +; CHECK-AVX2-NEXT: popl %edi +; CHECK-AVX2-NEXT: retl +; +; CHECK-AVX512-LABEL: test_type64: +; CHECK-AVX512: # %bb.0: # %entry +; CHECK-AVX512-NEXT: pushl %edi +; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 8 +; CHECK-AVX512-NEXT: pushl %esi +; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 12 +; CHECK-AVX512-NEXT: .cfi_offset %esi, -12 +; CHECK-AVX512-NEXT: .cfi_offset %edi, -8 +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-AVX512-NEXT: cmpl $18, %edi +; CHECK-AVX512-NEXT: jl .LBB4_2 +; CHECK-AVX512-NEXT: # %bb.1: # %if.then +; CHECK-AVX512-NEXT: movl %edi, 8(%ecx) +; CHECK-AVX512-NEXT: sarl $31, %edi +; CHECK-AVX512-NEXT: movl %edi, 12(%ecx) +; CHECK-AVX512-NEXT: .LBB4_2: # %if.end +; CHECK-AVX512-NEXT: vmovups (%esi), %xmm0 +; CHECK-AVX512-NEXT: vmovups %xmm0, (%edx) +; CHECK-AVX512-NEXT: movl (%ecx), %edx +; CHECK-AVX512-NEXT: movl %edx, (%eax) +; CHECK-AVX512-NEXT: movl 4(%ecx), %edx +; CHECK-AVX512-NEXT: movl %edx, 4(%eax) +; CHECK-AVX512-NEXT: movl 8(%ecx), %edx +; CHECK-AVX512-NEXT: movl %edx, 8(%eax) +; CHECK-AVX512-NEXT: movl 12(%ecx), %ecx +; CHECK-AVX512-NEXT: movl %ecx, 12(%eax) +; CHECK-AVX512-NEXT: popl %esi +; CHECK-AVX512-NEXT: popl %edi +; CHECK-AVX512-NEXT: retl +entry: + %cmp = icmp sgt i32 %x, 17 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %conv = sext i32 %x to i64 + %b = getelementptr inbounds %struct.S2, %struct.S2* %s1, i64 0, i32 1 + store i64 %conv, i64* %b, align 8 + br label %if.end + +if.end: ; preds = %if.then, %entry + %0 = bitcast %struct.S2* %s3 to i8* + %1 = bitcast %struct.S2* %s4 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 8, i1 false) + %2 = bitcast %struct.S2* %s2 to i8* + %3 = bitcast %struct.S2* %s1 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 8, i1 false) + ret void +} +%struct.S3 = type { i64, i8, i8, i16, i32 } + +; Function Attrs: noinline nounwind uwtable +define void @test_mixed_type(%struct.S3* nocapture %s1, %struct.S3* nocapture %s2, i32 %x, %struct.S3* nocapture readnone %s3, %struct.S3* nocapture readnone %s4) local_unnamed_addr #0 { +; CHECK-LABEL: test_mixed_type: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: .cfi_offset %esi, -8 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: cmpl $18, %edx +; CHECK-NEXT: jl .LBB5_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: movl %edx, %esi +; CHECK-NEXT: sarl $31, %esi +; CHECK-NEXT: movl %edx, (%ecx) +; CHECK-NEXT: movl %esi, 4(%ecx) +; CHECK-NEXT: movb %dl, 8(%ecx) +; CHECK-NEXT: .LBB5_2: # %if.end +; CHECK-NEXT: movl (%ecx), %edx +; CHECK-NEXT: movl %edx, (%eax) +; CHECK-NEXT: movl 4(%ecx), %edx +; CHECK-NEXT: movl %edx, 4(%eax) +; CHECK-NEXT: movb 8(%ecx), %dl +; CHECK-NEXT: movb %dl, 8(%eax) +; CHECK-NEXT: movl 9(%ecx), %edx +; CHECK-NEXT: movl %edx, 9(%eax) +; CHECK-NEXT: movzwl 13(%ecx), %edx +; CHECK-NEXT: movw %dx, 13(%eax) +; CHECK-NEXT: movb 15(%ecx), %cl +; CHECK-NEXT: movb %cl, 15(%eax) +; CHECK-NEXT: popl %esi +; CHECK-NEXT: retl +; +; DISABLED-LABEL: test_mixed_type: +; DISABLED: # %bb.0: # %entry +; DISABLED-NEXT: pushl %esi +; DISABLED-NEXT: .cfi_def_cfa_offset 8 +; DISABLED-NEXT: .cfi_offset %esi, -8 +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %edx +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %eax +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %ecx +; DISABLED-NEXT: cmpl $18, %edx +; DISABLED-NEXT: jl .LBB5_2 +; DISABLED-NEXT: # %bb.1: # %if.then +; DISABLED-NEXT: movl %edx, %esi +; DISABLED-NEXT: sarl $31, %esi +; DISABLED-NEXT: movl %edx, (%ecx) +; DISABLED-NEXT: movl %esi, 4(%ecx) +; DISABLED-NEXT: movb %dl, 8(%ecx) +; DISABLED-NEXT: .LBB5_2: # %if.end +; DISABLED-NEXT: movups (%ecx), %xmm0 +; DISABLED-NEXT: movups %xmm0, (%eax) +; DISABLED-NEXT: popl %esi +; DISABLED-NEXT: retl +; +; CHECK-AVX2-LABEL: test_mixed_type: +; CHECK-AVX2: # %bb.0: # %entry +; CHECK-AVX2-NEXT: pushl %esi +; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 8 +; CHECK-AVX2-NEXT: .cfi_offset %esi, -8 +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-AVX2-NEXT: cmpl $18, %edx +; CHECK-AVX2-NEXT: jl .LBB5_2 +; CHECK-AVX2-NEXT: # %bb.1: # %if.then +; CHECK-AVX2-NEXT: movl %edx, %esi +; CHECK-AVX2-NEXT: sarl $31, %esi +; CHECK-AVX2-NEXT: movl %edx, (%ecx) +; CHECK-AVX2-NEXT: movl %esi, 4(%ecx) +; CHECK-AVX2-NEXT: movb %dl, 8(%ecx) +; CHECK-AVX2-NEXT: .LBB5_2: # %if.end +; CHECK-AVX2-NEXT: movl (%ecx), %edx +; CHECK-AVX2-NEXT: movl %edx, (%eax) +; CHECK-AVX2-NEXT: movl 4(%ecx), %edx +; CHECK-AVX2-NEXT: movl %edx, 4(%eax) +; CHECK-AVX2-NEXT: movb 8(%ecx), %dl +; CHECK-AVX2-NEXT: movb %dl, 8(%eax) +; CHECK-AVX2-NEXT: movl 9(%ecx), %edx +; CHECK-AVX2-NEXT: movl %edx, 9(%eax) +; CHECK-AVX2-NEXT: movzwl 13(%ecx), %edx +; CHECK-AVX2-NEXT: movw %dx, 13(%eax) +; CHECK-AVX2-NEXT: movb 15(%ecx), %cl +; CHECK-AVX2-NEXT: movb %cl, 15(%eax) +; CHECK-AVX2-NEXT: popl %esi +; CHECK-AVX2-NEXT: retl +; +; CHECK-AVX512-LABEL: test_mixed_type: +; CHECK-AVX512: # %bb.0: # %entry +; CHECK-AVX512-NEXT: pushl %esi +; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 8 +; CHECK-AVX512-NEXT: .cfi_offset %esi, -8 +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-AVX512-NEXT: cmpl $18, %edx +; CHECK-AVX512-NEXT: jl .LBB5_2 +; CHECK-AVX512-NEXT: # %bb.1: # %if.then +; CHECK-AVX512-NEXT: movl %edx, %esi +; CHECK-AVX512-NEXT: sarl $31, %esi +; CHECK-AVX512-NEXT: movl %edx, (%ecx) +; CHECK-AVX512-NEXT: movl %esi, 4(%ecx) +; CHECK-AVX512-NEXT: movb %dl, 8(%ecx) +; CHECK-AVX512-NEXT: .LBB5_2: # %if.end +; CHECK-AVX512-NEXT: movl (%ecx), %edx +; CHECK-AVX512-NEXT: movl %edx, (%eax) +; CHECK-AVX512-NEXT: movl 4(%ecx), %edx +; CHECK-AVX512-NEXT: movl %edx, 4(%eax) +; CHECK-AVX512-NEXT: movb 8(%ecx), %dl +; CHECK-AVX512-NEXT: movb %dl, 8(%eax) +; CHECK-AVX512-NEXT: movl 9(%ecx), %edx +; CHECK-AVX512-NEXT: movl %edx, 9(%eax) +; CHECK-AVX512-NEXT: movzwl 13(%ecx), %edx +; CHECK-AVX512-NEXT: movw %dx, 13(%eax) +; CHECK-AVX512-NEXT: movb 15(%ecx), %cl +; CHECK-AVX512-NEXT: movb %cl, 15(%eax) +; CHECK-AVX512-NEXT: popl %esi +; CHECK-AVX512-NEXT: retl +entry: + %cmp = icmp sgt i32 %x, 17 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %conv = sext i32 %x to i64 + %a = getelementptr inbounds %struct.S3, %struct.S3* %s1, i64 0, i32 0 + store i64 %conv, i64* %a, align 8 + %conv1 = trunc i32 %x to i8 + %b = getelementptr inbounds %struct.S3, %struct.S3* %s1, i64 0, i32 1 + store i8 %conv1, i8* %b, align 8 + br label %if.end + +if.end: ; preds = %if.then, %entry + %0 = bitcast %struct.S3* %s2 to i8* + %1 = bitcast %struct.S3* %s1 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 8, i1 false) + ret void +} +%struct.S4 = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } + +; Function Attrs: nounwind uwtable +define void @test_multiple_blocks(%struct.S4* nocapture %s1, %struct.S4* nocapture %s2) local_unnamed_addr #0 { +; CHECK-LABEL: test_multiple_blocks: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl $0, 4(%ecx) +; CHECK-NEXT: movl $0, 36(%ecx) +; CHECK-NEXT: movups 16(%ecx), %xmm0 +; CHECK-NEXT: movups %xmm0, 16(%eax) +; CHECK-NEXT: movl 32(%ecx), %edx +; CHECK-NEXT: movl %edx, 32(%eax) +; CHECK-NEXT: movl 36(%ecx), %edx +; CHECK-NEXT: movl %edx, 36(%eax) +; CHECK-NEXT: movl 40(%ecx), %edx +; CHECK-NEXT: movl %edx, 40(%eax) +; CHECK-NEXT: movl 44(%ecx), %edx +; CHECK-NEXT: movl %edx, 44(%eax) +; CHECK-NEXT: movl (%ecx), %edx +; CHECK-NEXT: movl %edx, (%eax) +; CHECK-NEXT: movl 4(%ecx), %edx +; CHECK-NEXT: movl %edx, 4(%eax) +; CHECK-NEXT: movl 8(%ecx), %edx +; CHECK-NEXT: movl %edx, 8(%eax) +; CHECK-NEXT: movl 12(%ecx), %ecx +; CHECK-NEXT: movl %ecx, 12(%eax) +; CHECK-NEXT: retl +; +; DISABLED-LABEL: test_multiple_blocks: +; DISABLED: # %bb.0: # %entry +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %eax +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %ecx +; DISABLED-NEXT: movl $0, 4(%ecx) +; DISABLED-NEXT: movl $0, 36(%ecx) +; DISABLED-NEXT: movups 16(%ecx), %xmm0 +; DISABLED-NEXT: movups %xmm0, 16(%eax) +; DISABLED-NEXT: movups 32(%ecx), %xmm0 +; DISABLED-NEXT: movups %xmm0, 32(%eax) +; DISABLED-NEXT: movups (%ecx), %xmm0 +; DISABLED-NEXT: movups %xmm0, (%eax) +; DISABLED-NEXT: retl +; +; CHECK-AVX2-LABEL: test_multiple_blocks: +; CHECK-AVX2: # %bb.0: # %entry +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-AVX2-NEXT: movl $0, 4(%ecx) +; CHECK-AVX2-NEXT: movl $0, 36(%ecx) +; CHECK-AVX2-NEXT: movups 16(%ecx), %xmm0 +; CHECK-AVX2-NEXT: movups %xmm0, 16(%eax) +; CHECK-AVX2-NEXT: movl 32(%ecx), %edx +; CHECK-AVX2-NEXT: movl %edx, 32(%eax) +; CHECK-AVX2-NEXT: movl 36(%ecx), %edx +; CHECK-AVX2-NEXT: movl %edx, 36(%eax) +; CHECK-AVX2-NEXT: movl 40(%ecx), %edx +; CHECK-AVX2-NEXT: movl %edx, 40(%eax) +; CHECK-AVX2-NEXT: movl 44(%ecx), %edx +; CHECK-AVX2-NEXT: movl %edx, 44(%eax) +; CHECK-AVX2-NEXT: movl (%ecx), %edx +; CHECK-AVX2-NEXT: movl %edx, (%eax) +; CHECK-AVX2-NEXT: movl 4(%ecx), %edx +; CHECK-AVX2-NEXT: movl %edx, 4(%eax) +; CHECK-AVX2-NEXT: movl 8(%ecx), %edx +; CHECK-AVX2-NEXT: movl %edx, 8(%eax) +; CHECK-AVX2-NEXT: movl 12(%ecx), %ecx +; CHECK-AVX2-NEXT: movl %ecx, 12(%eax) +; CHECK-AVX2-NEXT: retl +; +; CHECK-AVX512-LABEL: test_multiple_blocks: +; CHECK-AVX512: # %bb.0: # %entry +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-AVX512-NEXT: movl $0, 4(%ecx) +; CHECK-AVX512-NEXT: movl $0, 36(%ecx) +; CHECK-AVX512-NEXT: vmovups 16(%ecx), %xmm0 +; CHECK-AVX512-NEXT: vmovups %xmm0, 16(%eax) +; CHECK-AVX512-NEXT: movl 32(%ecx), %edx +; CHECK-AVX512-NEXT: movl %edx, 32(%eax) +; CHECK-AVX512-NEXT: movl 36(%ecx), %edx +; CHECK-AVX512-NEXT: movl %edx, 36(%eax) +; CHECK-AVX512-NEXT: movl 40(%ecx), %edx +; CHECK-AVX512-NEXT: movl %edx, 40(%eax) +; CHECK-AVX512-NEXT: movl 44(%ecx), %edx +; CHECK-AVX512-NEXT: movl %edx, 44(%eax) +; CHECK-AVX512-NEXT: movl (%ecx), %edx +; CHECK-AVX512-NEXT: movl %edx, (%eax) +; CHECK-AVX512-NEXT: movl 4(%ecx), %edx +; CHECK-AVX512-NEXT: movl %edx, 4(%eax) +; CHECK-AVX512-NEXT: vmovups 8(%ecx), %xmm0 +; CHECK-AVX512-NEXT: vmovups %xmm0, 8(%eax) +; CHECK-AVX512-NEXT: movl 24(%ecx), %edx +; CHECK-AVX512-NEXT: movl %edx, 24(%eax) +; CHECK-AVX512-NEXT: movl 28(%ecx), %ecx +; CHECK-AVX512-NEXT: movl %ecx, 28(%eax) +; CHECK-AVX512-NEXT: retl +entry: + %b = getelementptr inbounds %struct.S4, %struct.S4* %s1, i64 0, i32 1 + store i32 0, i32* %b, align 4 + %b3 = getelementptr inbounds %struct.S4, %struct.S4* %s1, i64 0, i32 9 + store i32 0, i32* %b3, align 4 + %0 = bitcast %struct.S4* %s2 to i8* + %1 = bitcast %struct.S4* %s1 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 48, i32 4, i1 false) + ret void +} +%struct.S5 = type { i16, i16, i16, i16, i16, i16, i16, i16 } + +; Function Attrs: nounwind uwtable +define void @test_type16(%struct.S5* nocapture %s1, %struct.S5* nocapture %s2, i32 %x, %struct.S5* nocapture %s3, %struct.S5* nocapture readonly %s4) local_unnamed_addr #0 { +; CHECK-LABEL: test_type16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: .cfi_offset %esi, -12 +; CHECK-NEXT: .cfi_offset %edi, -8 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: cmpl $18, %edi +; CHECK-NEXT: jl .LBB7_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: movw %di, 2(%ecx) +; CHECK-NEXT: .LBB7_2: # %if.end +; CHECK-NEXT: movups (%esi), %xmm0 +; CHECK-NEXT: movups %xmm0, (%edx) +; CHECK-NEXT: movzwl (%ecx), %edx +; CHECK-NEXT: movw %dx, (%eax) +; CHECK-NEXT: movzwl 2(%ecx), %edx +; CHECK-NEXT: movw %dx, 2(%eax) +; CHECK-NEXT: movl 4(%ecx), %edx +; CHECK-NEXT: movl %edx, 4(%eax) +; CHECK-NEXT: movl 8(%ecx), %edx +; CHECK-NEXT: movl %edx, 8(%eax) +; CHECK-NEXT: movl 12(%ecx), %ecx +; CHECK-NEXT: movl %ecx, 12(%eax) +; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %edi +; CHECK-NEXT: retl +; +; DISABLED-LABEL: test_type16: +; DISABLED: # %bb.0: # %entry +; DISABLED-NEXT: pushl %edi +; DISABLED-NEXT: .cfi_def_cfa_offset 8 +; DISABLED-NEXT: pushl %esi +; DISABLED-NEXT: .cfi_def_cfa_offset 12 +; DISABLED-NEXT: .cfi_offset %esi, -12 +; DISABLED-NEXT: .cfi_offset %edi, -8 +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %edx +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %ecx +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %edi +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %eax +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %esi +; DISABLED-NEXT: cmpl $18, %edi +; DISABLED-NEXT: jl .LBB7_2 +; DISABLED-NEXT: # %bb.1: # %if.then +; DISABLED-NEXT: movw %di, 2(%esi) +; DISABLED-NEXT: .LBB7_2: # %if.end +; DISABLED-NEXT: movups (%edx), %xmm0 +; DISABLED-NEXT: movups %xmm0, (%ecx) +; DISABLED-NEXT: movups (%esi), %xmm0 +; DISABLED-NEXT: movups %xmm0, (%eax) +; DISABLED-NEXT: popl %esi +; DISABLED-NEXT: popl %edi +; DISABLED-NEXT: retl +; +; CHECK-AVX2-LABEL: test_type16: +; CHECK-AVX2: # %bb.0: # %entry +; CHECK-AVX2-NEXT: pushl %edi +; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 8 +; CHECK-AVX2-NEXT: pushl %esi +; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 12 +; CHECK-AVX2-NEXT: .cfi_offset %esi, -12 +; CHECK-AVX2-NEXT: .cfi_offset %edi, -8 +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-AVX2-NEXT: cmpl $18, %edi +; CHECK-AVX2-NEXT: jl .LBB7_2 +; CHECK-AVX2-NEXT: # %bb.1: # %if.then +; CHECK-AVX2-NEXT: movw %di, 2(%ecx) +; CHECK-AVX2-NEXT: .LBB7_2: # %if.end +; CHECK-AVX2-NEXT: movups (%esi), %xmm0 +; CHECK-AVX2-NEXT: movups %xmm0, (%edx) +; CHECK-AVX2-NEXT: movzwl (%ecx), %edx +; CHECK-AVX2-NEXT: movw %dx, (%eax) +; CHECK-AVX2-NEXT: movzwl 2(%ecx), %edx +; CHECK-AVX2-NEXT: movw %dx, 2(%eax) +; CHECK-AVX2-NEXT: movl 4(%ecx), %edx +; CHECK-AVX2-NEXT: movl %edx, 4(%eax) +; CHECK-AVX2-NEXT: movl 8(%ecx), %edx +; CHECK-AVX2-NEXT: movl %edx, 8(%eax) +; CHECK-AVX2-NEXT: movl 12(%ecx), %ecx +; CHECK-AVX2-NEXT: movl %ecx, 12(%eax) +; CHECK-AVX2-NEXT: popl %esi +; CHECK-AVX2-NEXT: popl %edi +; CHECK-AVX2-NEXT: retl +; +; CHECK-AVX512-LABEL: test_type16: +; CHECK-AVX512: # %bb.0: # %entry +; CHECK-AVX512-NEXT: pushl %edi +; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 8 +; CHECK-AVX512-NEXT: pushl %esi +; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 12 +; CHECK-AVX512-NEXT: .cfi_offset %esi, -12 +; CHECK-AVX512-NEXT: .cfi_offset %edi, -8 +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-AVX512-NEXT: cmpl $18, %edi +; CHECK-AVX512-NEXT: jl .LBB7_2 +; CHECK-AVX512-NEXT: # %bb.1: # %if.then +; CHECK-AVX512-NEXT: movw %di, 2(%ecx) +; CHECK-AVX512-NEXT: .LBB7_2: # %if.end +; CHECK-AVX512-NEXT: vmovups (%esi), %xmm0 +; CHECK-AVX512-NEXT: vmovups %xmm0, (%edx) +; CHECK-AVX512-NEXT: movzwl (%ecx), %edx +; CHECK-AVX512-NEXT: movw %dx, (%eax) +; CHECK-AVX512-NEXT: movzwl 2(%ecx), %edx +; CHECK-AVX512-NEXT: movw %dx, 2(%eax) +; CHECK-AVX512-NEXT: movl 4(%ecx), %edx +; CHECK-AVX512-NEXT: movl %edx, 4(%eax) +; CHECK-AVX512-NEXT: movl 8(%ecx), %edx +; CHECK-AVX512-NEXT: movl %edx, 8(%eax) +; CHECK-AVX512-NEXT: movl 12(%ecx), %ecx +; CHECK-AVX512-NEXT: movl %ecx, 12(%eax) +; CHECK-AVX512-NEXT: popl %esi +; CHECK-AVX512-NEXT: popl %edi +; CHECK-AVX512-NEXT: retl +entry: + %cmp = icmp sgt i32 %x, 17 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %conv = trunc i32 %x to i16 + %b = getelementptr inbounds %struct.S5, %struct.S5* %s1, i64 0, i32 1 + store i16 %conv, i16* %b, align 2 + br label %if.end + +if.end: ; preds = %if.then, %entry + %0 = bitcast %struct.S5* %s3 to i8* + %1 = bitcast %struct.S5* %s4 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 2, i1 false) + %2 = bitcast %struct.S5* %s2 to i8* + %3 = bitcast %struct.S5* %s1 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 2, i1 false) + ret void +} + +%struct.S6 = type { [4 x i32], i32, i32, i32, i32 } + +; Function Attrs: nounwind uwtable +define void @test_stack(%struct.S6* noalias nocapture sret %agg.result, %struct.S6* byval nocapture readnone align 8 %s1, %struct.S6* byval nocapture align 8 %s2, i32 %x) local_unnamed_addr #0 { +; CHECK-LABEL: test_stack: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %eax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movups {{[0-9]+}}(%esp), %xmm0 +; CHECK-NEXT: movups %xmm0, (%eax) +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl %ecx, 16(%eax) +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl %ecx, 20(%eax) +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl %ecx, 24(%eax) +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl %ecx, 28(%eax) +; CHECK-NEXT: popl %ecx +; CHECK-NEXT: retl $4 +; +; DISABLED-LABEL: test_stack: +; DISABLED: # %bb.0: # %entry +; DISABLED-NEXT: pushl %eax +; DISABLED-NEXT: .cfi_def_cfa_offset 8 +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %eax +; DISABLED-NEXT: movl %eax, {{[0-9]+}}(%esp) +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %eax +; DISABLED-NEXT: movups {{[0-9]+}}(%esp), %xmm0 +; DISABLED-NEXT: movups %xmm0, (%eax) +; DISABLED-NEXT: movups {{[0-9]+}}(%esp), %xmm0 +; DISABLED-NEXT: movups %xmm0, 16(%eax) +; DISABLED-NEXT: popl %ecx +; DISABLED-NEXT: retl $4 +; +; CHECK-AVX2-LABEL: test_stack: +; CHECK-AVX2: # %bb.0: # %entry +; CHECK-AVX2-NEXT: pushl %eax +; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 8 +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-AVX2-NEXT: movl %eax, {{[0-9]+}}(%esp) +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-AVX2-NEXT: movups {{[0-9]+}}(%esp), %xmm0 +; CHECK-AVX2-NEXT: movups %xmm0, (%eax) +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-AVX2-NEXT: movl %ecx, 16(%eax) +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-AVX2-NEXT: movl %ecx, 20(%eax) +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-AVX2-NEXT: movl %ecx, 24(%eax) +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-AVX2-NEXT: movl %ecx, 28(%eax) +; CHECK-AVX2-NEXT: popl %ecx +; CHECK-AVX2-NEXT: retl $4 +; +; CHECK-AVX512-LABEL: test_stack: +; CHECK-AVX512: # %bb.0: # %entry +; CHECK-AVX512-NEXT: pushl %eax +; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 8 +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-AVX512-NEXT: movl %eax, {{[0-9]+}}(%esp) +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-AVX512-NEXT: movl %ecx, 16(%eax) +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-AVX512-NEXT: movl %ecx, 20(%eax) +; CHECK-AVX512-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0 +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-AVX512-NEXT: movl %ecx, 24(%eax) +; CHECK-AVX512-NEXT: vmovups %xmm0, (%eax) +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-AVX512-NEXT: movl %ecx, 28(%eax) +; CHECK-AVX512-NEXT: popl %ecx +; CHECK-AVX512-NEXT: retl $4 +entry: + %s6.sroa.0.0..sroa_cast1 = bitcast %struct.S6* %s2 to i8* + %s6.sroa.3.0..sroa_idx4 = getelementptr inbounds %struct.S6, %struct.S6* %s2, i64 0, i32 3 + store i32 %x, i32* %s6.sroa.3.0..sroa_idx4, align 8 + %0 = bitcast %struct.S6* %agg.result to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* nonnull %s6.sroa.0.0..sroa_cast1, i64 32, i32 4, i1 false) + ret void +} + +; Function Attrs: nounwind uwtable +define void @test_limit_all(%struct.S* %s1, %struct.S* nocapture %s2, i32 %x, %struct.S* nocapture %s3, %struct.S* nocapture readonly %s4, i32 %x2) local_unnamed_addr #0 { +; CHECK-LABEL: test_limit_all: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: pushl %ebx +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 20 +; CHECK-NEXT: subl $12, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset %esi, -20 +; CHECK-NEXT: .cfi_offset %edi, -16 +; CHECK-NEXT: .cfi_offset %ebx, -12 +; CHECK-NEXT: .cfi_offset %ebp, -8 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl %eax, 12(%ebp) +; CHECK-NEXT: movl %ebp, (%esp) +; CHECK-NEXT: calll bar +; CHECK-NEXT: cmpl $18, %esi +; CHECK-NEXT: jl .LBB9_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: movl %esi, 4(%ebp) +; CHECK-NEXT: movl %ebp, (%esp) +; CHECK-NEXT: calll bar +; CHECK-NEXT: .LBB9_2: # %if.end +; CHECK-NEXT: movups (%ebx), %xmm0 +; CHECK-NEXT: movups %xmm0, (%edi) +; CHECK-NEXT: movups (%ebp), %xmm0 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movups %xmm0, (%eax) +; CHECK-NEXT: addl $12, %esp +; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %edi +; CHECK-NEXT: popl %ebx +; CHECK-NEXT: popl %ebp +; CHECK-NEXT: retl +; +; DISABLED-LABEL: test_limit_all: +; DISABLED: # %bb.0: # %entry +; DISABLED-NEXT: pushl %ebp +; DISABLED-NEXT: .cfi_def_cfa_offset 8 +; DISABLED-NEXT: pushl %ebx +; DISABLED-NEXT: .cfi_def_cfa_offset 12 +; DISABLED-NEXT: pushl %edi +; DISABLED-NEXT: .cfi_def_cfa_offset 16 +; DISABLED-NEXT: pushl %esi +; DISABLED-NEXT: .cfi_def_cfa_offset 20 +; DISABLED-NEXT: subl $12, %esp +; DISABLED-NEXT: .cfi_def_cfa_offset 32 +; DISABLED-NEXT: .cfi_offset %esi, -20 +; DISABLED-NEXT: .cfi_offset %edi, -16 +; DISABLED-NEXT: .cfi_offset %ebx, -12 +; DISABLED-NEXT: .cfi_offset %ebp, -8 +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %ebx +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %edi +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %esi +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %ebp +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %eax +; DISABLED-NEXT: movl %eax, 12(%ebp) +; DISABLED-NEXT: movl %ebp, (%esp) +; DISABLED-NEXT: calll bar +; DISABLED-NEXT: cmpl $18, %esi +; DISABLED-NEXT: jl .LBB9_2 +; DISABLED-NEXT: # %bb.1: # %if.then +; DISABLED-NEXT: movl %esi, 4(%ebp) +; DISABLED-NEXT: movl %ebp, (%esp) +; DISABLED-NEXT: calll bar +; DISABLED-NEXT: .LBB9_2: # %if.end +; DISABLED-NEXT: movups (%ebx), %xmm0 +; DISABLED-NEXT: movups %xmm0, (%edi) +; DISABLED-NEXT: movups (%ebp), %xmm0 +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %eax +; DISABLED-NEXT: movups %xmm0, (%eax) +; DISABLED-NEXT: addl $12, %esp +; DISABLED-NEXT: popl %esi +; DISABLED-NEXT: popl %edi +; DISABLED-NEXT: popl %ebx +; DISABLED-NEXT: popl %ebp +; DISABLED-NEXT: retl +; +; CHECK-AVX2-LABEL: test_limit_all: +; CHECK-AVX2: # %bb.0: # %entry +; CHECK-AVX2-NEXT: pushl %ebp +; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 8 +; CHECK-AVX2-NEXT: pushl %ebx +; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 12 +; CHECK-AVX2-NEXT: pushl %edi +; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 16 +; CHECK-AVX2-NEXT: pushl %esi +; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 20 +; CHECK-AVX2-NEXT: subl $12, %esp +; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 32 +; CHECK-AVX2-NEXT: .cfi_offset %esi, -20 +; CHECK-AVX2-NEXT: .cfi_offset %edi, -16 +; CHECK-AVX2-NEXT: .cfi_offset %ebx, -12 +; CHECK-AVX2-NEXT: .cfi_offset %ebp, -8 +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ebx +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ebp +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-AVX2-NEXT: movl %eax, 12(%ebp) +; CHECK-AVX2-NEXT: movl %ebp, (%esp) +; CHECK-AVX2-NEXT: calll bar +; CHECK-AVX2-NEXT: cmpl $18, %esi +; CHECK-AVX2-NEXT: jl .LBB9_2 +; CHECK-AVX2-NEXT: # %bb.1: # %if.then +; CHECK-AVX2-NEXT: movl %esi, 4(%ebp) +; CHECK-AVX2-NEXT: movl %ebp, (%esp) +; CHECK-AVX2-NEXT: calll bar +; CHECK-AVX2-NEXT: .LBB9_2: # %if.end +; CHECK-AVX2-NEXT: movups (%ebx), %xmm0 +; CHECK-AVX2-NEXT: movups %xmm0, (%edi) +; CHECK-AVX2-NEXT: movups (%ebp), %xmm0 +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-AVX2-NEXT: movups %xmm0, (%eax) +; CHECK-AVX2-NEXT: addl $12, %esp +; CHECK-AVX2-NEXT: popl %esi +; CHECK-AVX2-NEXT: popl %edi +; CHECK-AVX2-NEXT: popl %ebx +; CHECK-AVX2-NEXT: popl %ebp +; CHECK-AVX2-NEXT: retl +; +; CHECK-AVX512-LABEL: test_limit_all: +; CHECK-AVX512: # %bb.0: # %entry +; CHECK-AVX512-NEXT: pushl %ebp +; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 8 +; CHECK-AVX512-NEXT: pushl %ebx +; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 12 +; CHECK-AVX512-NEXT: pushl %edi +; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 16 +; CHECK-AVX512-NEXT: pushl %esi +; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 20 +; CHECK-AVX512-NEXT: subl $12, %esp +; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 32 +; CHECK-AVX512-NEXT: .cfi_offset %esi, -20 +; CHECK-AVX512-NEXT: .cfi_offset %edi, -16 +; CHECK-AVX512-NEXT: .cfi_offset %ebx, -12 +; CHECK-AVX512-NEXT: .cfi_offset %ebp, -8 +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ebx +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ebp +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-AVX512-NEXT: movl %eax, 12(%ebp) +; CHECK-AVX512-NEXT: movl %ebp, (%esp) +; CHECK-AVX512-NEXT: calll bar +; CHECK-AVX512-NEXT: cmpl $18, %esi +; CHECK-AVX512-NEXT: jl .LBB9_2 +; CHECK-AVX512-NEXT: # %bb.1: # %if.then +; CHECK-AVX512-NEXT: movl %esi, 4(%ebp) +; CHECK-AVX512-NEXT: movl %ebp, (%esp) +; CHECK-AVX512-NEXT: calll bar +; CHECK-AVX512-NEXT: .LBB9_2: # %if.end +; CHECK-AVX512-NEXT: vmovups (%ebx), %xmm0 +; CHECK-AVX512-NEXT: vmovups %xmm0, (%edi) +; CHECK-AVX512-NEXT: vmovups (%ebp), %xmm0 +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-AVX512-NEXT: vmovups %xmm0, (%eax) +; CHECK-AVX512-NEXT: addl $12, %esp +; CHECK-AVX512-NEXT: popl %esi +; CHECK-AVX512-NEXT: popl %edi +; CHECK-AVX512-NEXT: popl %ebx +; CHECK-AVX512-NEXT: popl %ebp +; CHECK-AVX512-NEXT: retl +entry: + %d = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 3 + store i32 %x2, i32* %d, align 4 + tail call void @bar(%struct.S* %s1) #3 + %cmp = icmp sgt i32 %x, 17 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %b = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 1 + store i32 %x, i32* %b, align 4 + tail call void @bar(%struct.S* nonnull %s1) #3 + br label %if.end + +if.end: ; preds = %if.then, %entry + %0 = bitcast %struct.S* %s3 to i8* + %1 = bitcast %struct.S* %s4 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 4, i1 false) + %2 = bitcast %struct.S* %s2 to i8* + %3 = bitcast %struct.S* %s1 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 4, i1 false) + ret void +} + +; Function Attrs: nounwind uwtable +define void @test_limit_one_pred(%struct.S* %s1, %struct.S* nocapture %s2, i32 %x, %struct.S* nocapture %s3, %struct.S* nocapture readonly %s4, i32 %x2) local_unnamed_addr #0 { +; CHECK-LABEL: test_limit_one_pred: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: pushl %ebx +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 20 +; CHECK-NEXT: subl $12, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset %esi, -20 +; CHECK-NEXT: .cfi_offset %edi, -16 +; CHECK-NEXT: .cfi_offset %ebx, -12 +; CHECK-NEXT: .cfi_offset %ebp, -8 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl %ecx, 12(%edi) +; CHECK-NEXT: cmpl $18, %eax +; CHECK-NEXT: jl .LBB10_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: movl %eax, 4(%edi) +; CHECK-NEXT: movl %edi, (%esp) +; CHECK-NEXT: calll bar +; CHECK-NEXT: .LBB10_2: # %if.end +; CHECK-NEXT: movups (%ebp), %xmm0 +; CHECK-NEXT: movups %xmm0, (%ebx) +; CHECK-NEXT: movl (%edi), %eax +; CHECK-NEXT: movl %eax, (%esi) +; CHECK-NEXT: movl 4(%edi), %eax +; CHECK-NEXT: movl %eax, 4(%esi) +; CHECK-NEXT: movl 8(%edi), %eax +; CHECK-NEXT: movl %eax, 8(%esi) +; CHECK-NEXT: movl 12(%edi), %eax +; CHECK-NEXT: movl %eax, 12(%esi) +; CHECK-NEXT: addl $12, %esp +; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %edi +; CHECK-NEXT: popl %ebx +; CHECK-NEXT: popl %ebp +; CHECK-NEXT: retl +; +; DISABLED-LABEL: test_limit_one_pred: +; DISABLED: # %bb.0: # %entry +; DISABLED-NEXT: pushl %ebp +; DISABLED-NEXT: .cfi_def_cfa_offset 8 +; DISABLED-NEXT: pushl %ebx +; DISABLED-NEXT: .cfi_def_cfa_offset 12 +; DISABLED-NEXT: pushl %edi +; DISABLED-NEXT: .cfi_def_cfa_offset 16 +; DISABLED-NEXT: pushl %esi +; DISABLED-NEXT: .cfi_def_cfa_offset 20 +; DISABLED-NEXT: subl $12, %esp +; DISABLED-NEXT: .cfi_def_cfa_offset 32 +; DISABLED-NEXT: .cfi_offset %esi, -20 +; DISABLED-NEXT: .cfi_offset %edi, -16 +; DISABLED-NEXT: .cfi_offset %ebx, -12 +; DISABLED-NEXT: .cfi_offset %ebp, -8 +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %ebx +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %edi +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %eax +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %esi +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %ebp +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %ecx +; DISABLED-NEXT: movl %ecx, 12(%ebp) +; DISABLED-NEXT: cmpl $18, %eax +; DISABLED-NEXT: jl .LBB10_2 +; DISABLED-NEXT: # %bb.1: # %if.then +; DISABLED-NEXT: movl %eax, 4(%ebp) +; DISABLED-NEXT: movl %ebp, (%esp) +; DISABLED-NEXT: calll bar +; DISABLED-NEXT: .LBB10_2: # %if.end +; DISABLED-NEXT: movups (%ebx), %xmm0 +; DISABLED-NEXT: movups %xmm0, (%edi) +; DISABLED-NEXT: movups (%ebp), %xmm0 +; DISABLED-NEXT: movups %xmm0, (%esi) +; DISABLED-NEXT: addl $12, %esp +; DISABLED-NEXT: popl %esi +; DISABLED-NEXT: popl %edi +; DISABLED-NEXT: popl %ebx +; DISABLED-NEXT: popl %ebp +; DISABLED-NEXT: retl +; +; CHECK-AVX2-LABEL: test_limit_one_pred: +; CHECK-AVX2: # %bb.0: # %entry +; CHECK-AVX2-NEXT: pushl %ebp +; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 8 +; CHECK-AVX2-NEXT: pushl %ebx +; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 12 +; CHECK-AVX2-NEXT: pushl %edi +; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 16 +; CHECK-AVX2-NEXT: pushl %esi +; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 20 +; CHECK-AVX2-NEXT: subl $12, %esp +; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 32 +; CHECK-AVX2-NEXT: .cfi_offset %esi, -20 +; CHECK-AVX2-NEXT: .cfi_offset %edi, -16 +; CHECK-AVX2-NEXT: .cfi_offset %ebx, -12 +; CHECK-AVX2-NEXT: .cfi_offset %ebp, -8 +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ebp +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ebx +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-AVX2-NEXT: movl %ecx, 12(%edi) +; CHECK-AVX2-NEXT: cmpl $18, %eax +; CHECK-AVX2-NEXT: jl .LBB10_2 +; CHECK-AVX2-NEXT: # %bb.1: # %if.then +; CHECK-AVX2-NEXT: movl %eax, 4(%edi) +; CHECK-AVX2-NEXT: movl %edi, (%esp) +; CHECK-AVX2-NEXT: calll bar +; CHECK-AVX2-NEXT: .LBB10_2: # %if.end +; CHECK-AVX2-NEXT: movups (%ebp), %xmm0 +; CHECK-AVX2-NEXT: movups %xmm0, (%ebx) +; CHECK-AVX2-NEXT: movl (%edi), %eax +; CHECK-AVX2-NEXT: movl %eax, (%esi) +; CHECK-AVX2-NEXT: movl 4(%edi), %eax +; CHECK-AVX2-NEXT: movl %eax, 4(%esi) +; CHECK-AVX2-NEXT: movl 8(%edi), %eax +; CHECK-AVX2-NEXT: movl %eax, 8(%esi) +; CHECK-AVX2-NEXT: movl 12(%edi), %eax +; CHECK-AVX2-NEXT: movl %eax, 12(%esi) +; CHECK-AVX2-NEXT: addl $12, %esp +; CHECK-AVX2-NEXT: popl %esi +; CHECK-AVX2-NEXT: popl %edi +; CHECK-AVX2-NEXT: popl %ebx +; CHECK-AVX2-NEXT: popl %ebp +; CHECK-AVX2-NEXT: retl +; +; CHECK-AVX512-LABEL: test_limit_one_pred: +; CHECK-AVX512: # %bb.0: # %entry +; CHECK-AVX512-NEXT: pushl %ebp +; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 8 +; CHECK-AVX512-NEXT: pushl %ebx +; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 12 +; CHECK-AVX512-NEXT: pushl %edi +; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 16 +; CHECK-AVX512-NEXT: pushl %esi +; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 20 +; CHECK-AVX512-NEXT: subl $12, %esp +; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 32 +; CHECK-AVX512-NEXT: .cfi_offset %esi, -20 +; CHECK-AVX512-NEXT: .cfi_offset %edi, -16 +; CHECK-AVX512-NEXT: .cfi_offset %ebx, -12 +; CHECK-AVX512-NEXT: .cfi_offset %ebp, -8 +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ebp +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ebx +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-AVX512-NEXT: movl %ecx, 12(%edi) +; CHECK-AVX512-NEXT: cmpl $18, %eax +; CHECK-AVX512-NEXT: jl .LBB10_2 +; CHECK-AVX512-NEXT: # %bb.1: # %if.then +; CHECK-AVX512-NEXT: movl %eax, 4(%edi) +; CHECK-AVX512-NEXT: movl %edi, (%esp) +; CHECK-AVX512-NEXT: calll bar +; CHECK-AVX512-NEXT: .LBB10_2: # %if.end +; CHECK-AVX512-NEXT: vmovups (%ebp), %xmm0 +; CHECK-AVX512-NEXT: vmovups %xmm0, (%ebx) +; CHECK-AVX512-NEXT: movl (%edi), %eax +; CHECK-AVX512-NEXT: movl %eax, (%esi) +; CHECK-AVX512-NEXT: movl 4(%edi), %eax +; CHECK-AVX512-NEXT: movl %eax, 4(%esi) +; CHECK-AVX512-NEXT: movl 8(%edi), %eax +; CHECK-AVX512-NEXT: movl %eax, 8(%esi) +; CHECK-AVX512-NEXT: movl 12(%edi), %eax +; CHECK-AVX512-NEXT: movl %eax, 12(%esi) +; CHECK-AVX512-NEXT: addl $12, %esp +; CHECK-AVX512-NEXT: popl %esi +; CHECK-AVX512-NEXT: popl %edi +; CHECK-AVX512-NEXT: popl %ebx +; CHECK-AVX512-NEXT: popl %ebp +; CHECK-AVX512-NEXT: retl +entry: + %d = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 3 + store i32 %x2, i32* %d, align 4 + %cmp = icmp sgt i32 %x, 17 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %b = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 1 + store i32 %x, i32* %b, align 4 + tail call void @bar(%struct.S* nonnull %s1) #3 + br label %if.end + +if.end: ; preds = %if.then, %entry + %0 = bitcast %struct.S* %s3 to i8* + %1 = bitcast %struct.S* %s4 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 4, i1 false) + %2 = bitcast %struct.S* %s2 to i8* + %3 = bitcast %struct.S* %s1 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 4, i1 false) + ret void +} + + +declare void @bar(%struct.S*) local_unnamed_addr #1 + + +; Function Attrs: argmemonly nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) #1 + +attributes #0 = { nounwind uwtable "target-cpu"="x86-64" } + +%struct.S7 = type { float, float, float , float, float, float, float, float } + +; Function Attrs: nounwind uwtable +define void @test_conditional_block_float(%struct.S7* nocapture %s1, %struct.S7* nocapture %s2, i32 %x, %struct.S7* nocapture %s3, %struct.S7* nocapture readonly %s4, float %y) local_unnamed_addr #0 { +; CHECK-LABEL: test_conditional_block_float: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %ebx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %esi, -16 +; CHECK-NEXT: .cfi_offset %edi, -12 +; CHECK-NEXT: .cfi_offset %ebx, -8 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: cmpl $18, {{[0-9]+}}(%esp) +; CHECK-NEXT: jl .LBB11_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: movl $1065353216, 4(%ecx) # imm = 0x3F800000 +; CHECK-NEXT: .LBB11_2: # %if.end +; CHECK-NEXT: movups (%esi), %xmm0 +; CHECK-NEXT: movups 16(%esi), %xmm1 +; CHECK-NEXT: movups %xmm1, 16(%edx) +; CHECK-NEXT: movups %xmm0, (%edx) +; CHECK-NEXT: movl (%ecx), %edx +; CHECK-NEXT: movl 4(%ecx), %esi +; CHECK-NEXT: movl 8(%ecx), %edi +; CHECK-NEXT: movl 12(%ecx), %ebx +; CHECK-NEXT: movups 16(%ecx), %xmm0 +; CHECK-NEXT: movups %xmm0, 16(%eax) +; CHECK-NEXT: movl %edx, (%eax) +; CHECK-NEXT: movl %esi, 4(%eax) +; CHECK-NEXT: movl %edi, 8(%eax) +; CHECK-NEXT: movl %ebx, 12(%eax) +; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %edi +; CHECK-NEXT: popl %ebx +; CHECK-NEXT: retl +; +; DISABLED-LABEL: test_conditional_block_float: +; DISABLED: # %bb.0: # %entry +; DISABLED-NEXT: pushl %esi +; DISABLED-NEXT: .cfi_def_cfa_offset 8 +; DISABLED-NEXT: .cfi_offset %esi, -8 +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %esi +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %edx +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %eax +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %ecx +; DISABLED-NEXT: cmpl $18, {{[0-9]+}}(%esp) +; DISABLED-NEXT: jl .LBB11_2 +; DISABLED-NEXT: # %bb.1: # %if.then +; DISABLED-NEXT: movl $1065353216, 4(%ecx) # imm = 0x3F800000 +; DISABLED-NEXT: .LBB11_2: # %if.end +; DISABLED-NEXT: movups (%esi), %xmm0 +; DISABLED-NEXT: movups 16(%esi), %xmm1 +; DISABLED-NEXT: movups %xmm1, 16(%edx) +; DISABLED-NEXT: movups %xmm0, (%edx) +; DISABLED-NEXT: movups (%ecx), %xmm0 +; DISABLED-NEXT: movups 16(%ecx), %xmm1 +; DISABLED-NEXT: movups %xmm1, 16(%eax) +; DISABLED-NEXT: movups %xmm0, (%eax) +; DISABLED-NEXT: popl %esi +; DISABLED-NEXT: retl +; +; CHECK-AVX2-LABEL: test_conditional_block_float: +; CHECK-AVX2: # %bb.0: # %entry +; CHECK-AVX2-NEXT: pushl %ebx +; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 8 +; CHECK-AVX2-NEXT: pushl %edi +; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 12 +; CHECK-AVX2-NEXT: pushl %esi +; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 16 +; CHECK-AVX2-NEXT: .cfi_offset %esi, -16 +; CHECK-AVX2-NEXT: .cfi_offset %edi, -12 +; CHECK-AVX2-NEXT: .cfi_offset %ebx, -8 +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-AVX2-NEXT: cmpl $18, {{[0-9]+}}(%esp) +; CHECK-AVX2-NEXT: jl .LBB11_2 +; CHECK-AVX2-NEXT: # %bb.1: # %if.then +; CHECK-AVX2-NEXT: movl $1065353216, 4(%ecx) # imm = 0x3F800000 +; CHECK-AVX2-NEXT: .LBB11_2: # %if.end +; CHECK-AVX2-NEXT: movups (%esi), %xmm0 +; CHECK-AVX2-NEXT: movups 16(%esi), %xmm1 +; CHECK-AVX2-NEXT: movups %xmm1, 16(%edx) +; CHECK-AVX2-NEXT: movups %xmm0, (%edx) +; CHECK-AVX2-NEXT: movl (%ecx), %edx +; CHECK-AVX2-NEXT: movl 4(%ecx), %esi +; CHECK-AVX2-NEXT: movl 8(%ecx), %edi +; CHECK-AVX2-NEXT: movl 12(%ecx), %ebx +; CHECK-AVX2-NEXT: movups 16(%ecx), %xmm0 +; CHECK-AVX2-NEXT: movups %xmm0, 16(%eax) +; CHECK-AVX2-NEXT: movl %edx, (%eax) +; CHECK-AVX2-NEXT: movl %esi, 4(%eax) +; CHECK-AVX2-NEXT: movl %edi, 8(%eax) +; CHECK-AVX2-NEXT: movl %ebx, 12(%eax) +; CHECK-AVX2-NEXT: popl %esi +; CHECK-AVX2-NEXT: popl %edi +; CHECK-AVX2-NEXT: popl %ebx +; CHECK-AVX2-NEXT: retl +; +; CHECK-AVX512-LABEL: test_conditional_block_float: +; CHECK-AVX512: # %bb.0: # %entry +; CHECK-AVX512-NEXT: pushl %esi +; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 8 +; CHECK-AVX512-NEXT: .cfi_offset %esi, -8 +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-AVX512-NEXT: cmpl $18, {{[0-9]+}}(%esp) +; CHECK-AVX512-NEXT: jl .LBB11_2 +; CHECK-AVX512-NEXT: # %bb.1: # %if.then +; CHECK-AVX512-NEXT: movl $1065353216, 4(%ecx) # imm = 0x3F800000 +; CHECK-AVX512-NEXT: .LBB11_2: # %if.end +; CHECK-AVX512-NEXT: vmovups (%esi), %ymm0 +; CHECK-AVX512-NEXT: vmovups %ymm0, (%edx) +; CHECK-AVX512-NEXT: movl (%ecx), %edx +; CHECK-AVX512-NEXT: movl %edx, (%eax) +; CHECK-AVX512-NEXT: movl 4(%ecx), %edx +; CHECK-AVX512-NEXT: movl %edx, 4(%eax) +; CHECK-AVX512-NEXT: vmovups 8(%ecx), %xmm0 +; CHECK-AVX512-NEXT: vmovups %xmm0, 8(%eax) +; CHECK-AVX512-NEXT: movl 24(%ecx), %edx +; CHECK-AVX512-NEXT: movl %edx, 24(%eax) +; CHECK-AVX512-NEXT: movl 28(%ecx), %ecx +; CHECK-AVX512-NEXT: movl %ecx, 28(%eax) +; CHECK-AVX512-NEXT: popl %esi +; CHECK-AVX512-NEXT: vzeroupper +; CHECK-AVX512-NEXT: retl +entry: + %cmp = icmp sgt i32 %x, 17 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %b = getelementptr inbounds %struct.S7, %struct.S7* %s1, i64 0, i32 1 + store float 1.0, float* %b, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + %0 = bitcast %struct.S7* %s3 to i8* + %1 = bitcast %struct.S7* %s4 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 32, i32 4, i1 false) + %2 = bitcast %struct.S7* %s2 to i8* + %3 = bitcast %struct.S7* %s1 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 32, i32 4, i1 false) + ret void +} + +%struct.S8 = type { i64, i64, i64, i64, i64, i64 } + +; Function Attrs: nounwind uwtable +define void @test_conditional_block_ymm(%struct.S8* nocapture %s1, %struct.S8* nocapture %s2, i32 %x, %struct.S8* nocapture %s3, %struct.S8* nocapture readonly %s4) local_unnamed_addr #0 { +; CHECK-LABEL: test_conditional_block_ymm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %ebx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %esi, -16 +; CHECK-NEXT: .cfi_offset %edi, -12 +; CHECK-NEXT: .cfi_offset %ebx, -8 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: cmpl $18, {{[0-9]+}}(%esp) +; CHECK-NEXT: jl .LBB12_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: movl $0, 12(%ecx) +; CHECK-NEXT: movl $1, 8(%ecx) +; CHECK-NEXT: .LBB12_2: # %if.end +; CHECK-NEXT: movups (%esi), %xmm0 +; CHECK-NEXT: movups 16(%esi), %xmm1 +; CHECK-NEXT: movups %xmm1, 16(%edx) +; CHECK-NEXT: movups %xmm0, (%edx) +; CHECK-NEXT: movl (%ecx), %edx +; CHECK-NEXT: movl 4(%ecx), %esi +; CHECK-NEXT: movl 8(%ecx), %edi +; CHECK-NEXT: movl 12(%ecx), %ebx +; CHECK-NEXT: movups 16(%ecx), %xmm0 +; CHECK-NEXT: movups %xmm0, 16(%eax) +; CHECK-NEXT: movl %edx, (%eax) +; CHECK-NEXT: movl %esi, 4(%eax) +; CHECK-NEXT: movl %edi, 8(%eax) +; CHECK-NEXT: movl %ebx, 12(%eax) +; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %edi +; CHECK-NEXT: popl %ebx +; CHECK-NEXT: retl +; +; DISABLED-LABEL: test_conditional_block_ymm: +; DISABLED: # %bb.0: # %entry +; DISABLED-NEXT: pushl %esi +; DISABLED-NEXT: .cfi_def_cfa_offset 8 +; DISABLED-NEXT: .cfi_offset %esi, -8 +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %esi +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %edx +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %eax +; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %ecx +; DISABLED-NEXT: cmpl $18, {{[0-9]+}}(%esp) +; DISABLED-NEXT: jl .LBB12_2 +; DISABLED-NEXT: # %bb.1: # %if.then +; DISABLED-NEXT: movl $0, 12(%ecx) +; DISABLED-NEXT: movl $1, 8(%ecx) +; DISABLED-NEXT: .LBB12_2: # %if.end +; DISABLED-NEXT: movups (%esi), %xmm0 +; DISABLED-NEXT: movups 16(%esi), %xmm1 +; DISABLED-NEXT: movups %xmm1, 16(%edx) +; DISABLED-NEXT: movups %xmm0, (%edx) +; DISABLED-NEXT: movups (%ecx), %xmm0 +; DISABLED-NEXT: movups 16(%ecx), %xmm1 +; DISABLED-NEXT: movups %xmm1, 16(%eax) +; DISABLED-NEXT: movups %xmm0, (%eax) +; DISABLED-NEXT: popl %esi +; DISABLED-NEXT: retl +; +; CHECK-AVX2-LABEL: test_conditional_block_ymm: +; CHECK-AVX2: # %bb.0: # %entry +; CHECK-AVX2-NEXT: pushl %ebx +; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 8 +; CHECK-AVX2-NEXT: pushl %edi +; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 12 +; CHECK-AVX2-NEXT: pushl %esi +; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 16 +; CHECK-AVX2-NEXT: .cfi_offset %esi, -16 +; CHECK-AVX2-NEXT: .cfi_offset %edi, -12 +; CHECK-AVX2-NEXT: .cfi_offset %ebx, -8 +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-AVX2-NEXT: cmpl $18, {{[0-9]+}}(%esp) +; CHECK-AVX2-NEXT: jl .LBB12_2 +; CHECK-AVX2-NEXT: # %bb.1: # %if.then +; CHECK-AVX2-NEXT: movl $0, 12(%ecx) +; CHECK-AVX2-NEXT: movl $1, 8(%ecx) +; CHECK-AVX2-NEXT: .LBB12_2: # %if.end +; CHECK-AVX2-NEXT: movups (%esi), %xmm0 +; CHECK-AVX2-NEXT: movups 16(%esi), %xmm1 +; CHECK-AVX2-NEXT: movups %xmm1, 16(%edx) +; CHECK-AVX2-NEXT: movups %xmm0, (%edx) +; CHECK-AVX2-NEXT: movl (%ecx), %edx +; CHECK-AVX2-NEXT: movl 4(%ecx), %esi +; CHECK-AVX2-NEXT: movl 8(%ecx), %edi +; CHECK-AVX2-NEXT: movl 12(%ecx), %ebx +; CHECK-AVX2-NEXT: movups 16(%ecx), %xmm0 +; CHECK-AVX2-NEXT: movups %xmm0, 16(%eax) +; CHECK-AVX2-NEXT: movl %edx, (%eax) +; CHECK-AVX2-NEXT: movl %esi, 4(%eax) +; CHECK-AVX2-NEXT: movl %edi, 8(%eax) +; CHECK-AVX2-NEXT: movl %ebx, 12(%eax) +; CHECK-AVX2-NEXT: popl %esi +; CHECK-AVX2-NEXT: popl %edi +; CHECK-AVX2-NEXT: popl %ebx +; CHECK-AVX2-NEXT: retl +; +; CHECK-AVX512-LABEL: test_conditional_block_ymm: +; CHECK-AVX512: # %bb.0: # %entry +; CHECK-AVX512-NEXT: pushl %esi +; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 8 +; CHECK-AVX512-NEXT: .cfi_offset %esi, -8 +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-AVX512-NEXT: cmpl $18, {{[0-9]+}}(%esp) +; CHECK-AVX512-NEXT: jl .LBB12_2 +; CHECK-AVX512-NEXT: # %bb.1: # %if.then +; CHECK-AVX512-NEXT: movl $0, 12(%ecx) +; CHECK-AVX512-NEXT: movl $1, 8(%ecx) +; CHECK-AVX512-NEXT: .LBB12_2: # %if.end +; CHECK-AVX512-NEXT: vmovups (%esi), %ymm0 +; CHECK-AVX512-NEXT: vmovups %ymm0, (%edx) +; CHECK-AVX512-NEXT: movl (%ecx), %edx +; CHECK-AVX512-NEXT: movl %edx, (%eax) +; CHECK-AVX512-NEXT: movl 4(%ecx), %edx +; CHECK-AVX512-NEXT: movl %edx, 4(%eax) +; CHECK-AVX512-NEXT: movl 8(%ecx), %edx +; CHECK-AVX512-NEXT: movl %edx, 8(%eax) +; CHECK-AVX512-NEXT: movl 12(%ecx), %edx +; CHECK-AVX512-NEXT: movl %edx, 12(%eax) +; CHECK-AVX512-NEXT: vmovups 16(%ecx), %xmm0 +; CHECK-AVX512-NEXT: vmovups %xmm0, 16(%eax) +; CHECK-AVX512-NEXT: popl %esi +; CHECK-AVX512-NEXT: vzeroupper +; CHECK-AVX512-NEXT: retl +entry: + %cmp = icmp sgt i32 %x, 17 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %b = getelementptr inbounds %struct.S8, %struct.S8* %s1, i64 0, i32 1 + store i64 1, i64* %b, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + %0 = bitcast %struct.S8* %s3 to i8* + %1 = bitcast %struct.S8* %s4 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 32, i32 4, i1 false) + %2 = bitcast %struct.S8* %s2 to i8* + %3 = bitcast %struct.S8* %s1 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 32, i32 4, i1 false) + ret void +} + diff --git a/llvm/test/CodeGen/X86/fixup-sfb.ll b/llvm/test/CodeGen/X86/fixup-sfb.ll new file mode 100644 index 00000000000..e73b0bb447f --- /dev/null +++ b/llvm/test/CodeGen/X86/fixup-sfb.ll @@ -0,0 +1,1378 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=CHECK +; RUN: llc < %s -mtriple=x86_64-linux --disable-fixup-SFB | FileCheck %s --check-prefix=DISABLED +; RUN: llc < %s -mtriple=x86_64-linux -mcpu=core-avx2 | FileCheck %s -check-prefix=CHECK-AVX2 +; RUN: llc < %s -mtriple=x86_64-linux -mcpu=skx | FileCheck %s -check-prefix=CHECK-AVX512 + +; RUN: llc < %s -mtriple=i686-linux +; RUN: llc < %s -mtriple=i686-linux --disable-fixup-SFB +; RUN: llc < %s -mtriple=i686-linux -mattr sse4 +; RUN: llc < %s -mtriple=i686-linux -mattr avx512 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.S = type { i32, i32, i32, i32 } + +; Function Attrs: nounwind uwtable +define void @test_conditional_block(%struct.S* nocapture %s1, %struct.S* nocapture %s2, i32 %x, %struct.S* nocapture %s3, %struct.S* nocapture readonly %s4) local_unnamed_addr #0 { +; CHECK-LABEL: test_conditional_block: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpl $18, %edx +; CHECK-NEXT: jl .LBB0_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: movl %edx, 4(%rdi) +; CHECK-NEXT: .LBB0_2: # %if.end +; CHECK-NEXT: movups (%r8), %xmm0 +; CHECK-NEXT: movups %xmm0, (%rcx) +; CHECK-NEXT: movl (%rdi), %eax +; CHECK-NEXT: movl %eax, (%rsi) +; CHECK-NEXT: movl 4(%rdi), %eax +; CHECK-NEXT: movl %eax, 4(%rsi) +; CHECK-NEXT: movq 8(%rdi), %rax +; CHECK-NEXT: movq %rax, 8(%rsi) +; CHECK-NEXT: retq +; +; DISABLED-LABEL: test_conditional_block: +; DISABLED: # %bb.0: # %entry +; DISABLED-NEXT: cmpl $18, %edx +; DISABLED-NEXT: jl .LBB0_2 +; DISABLED-NEXT: # %bb.1: # %if.then +; DISABLED-NEXT: movl %edx, 4(%rdi) +; DISABLED-NEXT: .LBB0_2: # %if.end +; DISABLED-NEXT: movups (%r8), %xmm0 +; DISABLED-NEXT: movups %xmm0, (%rcx) +; DISABLED-NEXT: movups (%rdi), %xmm0 +; DISABLED-NEXT: movups %xmm0, (%rsi) +; DISABLED-NEXT: retq +; +; CHECK-AVX2-LABEL: test_conditional_block: +; CHECK-AVX2: # %bb.0: # %entry +; CHECK-AVX2-NEXT: cmpl $18, %edx +; CHECK-AVX2-NEXT: jl .LBB0_2 +; CHECK-AVX2-NEXT: # %bb.1: # %if.then +; CHECK-AVX2-NEXT: movl %edx, 4(%rdi) +; CHECK-AVX2-NEXT: .LBB0_2: # %if.end +; CHECK-AVX2-NEXT: vmovups (%r8), %xmm0 +; CHECK-AVX2-NEXT: vmovups %xmm0, (%rcx) +; CHECK-AVX2-NEXT: movl (%rdi), %eax +; CHECK-AVX2-NEXT: movl %eax, (%rsi) +; CHECK-AVX2-NEXT: movl 4(%rdi), %eax +; CHECK-AVX2-NEXT: movl %eax, 4(%rsi) +; CHECK-AVX2-NEXT: movq 8(%rdi), %rax +; CHECK-AVX2-NEXT: movq %rax, 8(%rsi) +; CHECK-AVX2-NEXT: retq +; +; CHECK-AVX512-LABEL: test_conditional_block: +; CHECK-AVX512: # %bb.0: # %entry +; CHECK-AVX512-NEXT: cmpl $18, %edx +; CHECK-AVX512-NEXT: jl .LBB0_2 +; CHECK-AVX512-NEXT: # %bb.1: # %if.then +; CHECK-AVX512-NEXT: movl %edx, 4(%rdi) +; CHECK-AVX512-NEXT: .LBB0_2: # %if.end +; CHECK-AVX512-NEXT: vmovups (%r8), %xmm0 +; CHECK-AVX512-NEXT: vmovups %xmm0, (%rcx) +; CHECK-AVX512-NEXT: movl (%rdi), %eax +; CHECK-AVX512-NEXT: movl %eax, (%rsi) +; CHECK-AVX512-NEXT: movl 4(%rdi), %eax +; CHECK-AVX512-NEXT: movl %eax, 4(%rsi) +; CHECK-AVX512-NEXT: movq 8(%rdi), %rax +; CHECK-AVX512-NEXT: movq %rax, 8(%rsi) +; CHECK-AVX512-NEXT: retq +entry: + %cmp = icmp sgt i32 %x, 17 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %b = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 1 + store i32 %x, i32* %b, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + %0 = bitcast %struct.S* %s3 to i8* + %1 = bitcast %struct.S* %s4 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 4, i1 false) + %2 = bitcast %struct.S* %s2 to i8* + %3 = bitcast %struct.S* %s1 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 4, i1 false) + ret void +} + +; Function Attrs: nounwind uwtable +define void @test_imm_store(%struct.S* nocapture %s1, %struct.S* nocapture %s2, i32 %x, %struct.S* nocapture %s3) local_unnamed_addr #0 { +; CHECK-LABEL: test_imm_store: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl $0, (%rdi) +; CHECK-NEXT: movl $1, (%rcx) +; CHECK-NEXT: movl (%rdi), %eax +; CHECK-NEXT: movl %eax, (%rsi) +; CHECK-NEXT: movq 4(%rdi), %rax +; CHECK-NEXT: movq %rax, 4(%rsi) +; CHECK-NEXT: movl 12(%rdi), %eax +; CHECK-NEXT: movl %eax, 12(%rsi) +; CHECK-NEXT: retq +; +; DISABLED-LABEL: test_imm_store: +; DISABLED: # %bb.0: # %entry +; DISABLED-NEXT: movl $0, (%rdi) +; DISABLED-NEXT: movl $1, (%rcx) +; DISABLED-NEXT: movups (%rdi), %xmm0 +; DISABLED-NEXT: movups %xmm0, (%rsi) +; DISABLED-NEXT: retq +; +; CHECK-AVX2-LABEL: test_imm_store: +; CHECK-AVX2: # %bb.0: # %entry +; CHECK-AVX2-NEXT: movl $0, (%rdi) +; CHECK-AVX2-NEXT: movl $1, (%rcx) +; CHECK-AVX2-NEXT: movl (%rdi), %eax +; CHECK-AVX2-NEXT: movl %eax, (%rsi) +; CHECK-AVX2-NEXT: movq 4(%rdi), %rax +; CHECK-AVX2-NEXT: movq %rax, 4(%rsi) +; CHECK-AVX2-NEXT: movl 12(%rdi), %eax +; CHECK-AVX2-NEXT: movl %eax, 12(%rsi) +; CHECK-AVX2-NEXT: retq +; +; CHECK-AVX512-LABEL: test_imm_store: +; CHECK-AVX512: # %bb.0: # %entry +; CHECK-AVX512-NEXT: movl $0, (%rdi) +; CHECK-AVX512-NEXT: movl $1, (%rcx) +; CHECK-AVX512-NEXT: movl (%rdi), %eax +; CHECK-AVX512-NEXT: movl %eax, (%rsi) +; CHECK-AVX512-NEXT: movq 4(%rdi), %rax +; CHECK-AVX512-NEXT: movq %rax, 4(%rsi) +; CHECK-AVX512-NEXT: movl 12(%rdi), %eax +; CHECK-AVX512-NEXT: movl %eax, 12(%rsi) +; CHECK-AVX512-NEXT: retq +entry: + %a = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 0 + store i32 0, i32* %a, align 4 + %a1 = getelementptr inbounds %struct.S, %struct.S* %s3, i64 0, i32 0 + store i32 1, i32* %a1, align 4 + %0 = bitcast %struct.S* %s2 to i8* + %1 = bitcast %struct.S* %s1 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 4, i1 false) + ret void +} + +; Function Attrs: nounwind uwtable +define void @test_nondirect_br(%struct.S* nocapture %s1, %struct.S* nocapture %s2, i32 %x, %struct.S* nocapture %s3, %struct.S* nocapture readonly %s4, i32 %x2) local_unnamed_addr #0 { +; CHECK-LABEL: test_nondirect_br: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpl $18, %edx +; CHECK-NEXT: jl .LBB2_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: movl %edx, 4(%rdi) +; CHECK-NEXT: .LBB2_2: # %if.end +; CHECK-NEXT: cmpl $14, %r9d +; CHECK-NEXT: jl .LBB2_4 +; CHECK-NEXT: # %bb.3: # %if.then2 +; CHECK-NEXT: movl %r9d, 12(%rdi) +; CHECK-NEXT: .LBB2_4: # %if.end3 +; CHECK-NEXT: movups (%r8), %xmm0 +; CHECK-NEXT: movups %xmm0, (%rcx) +; CHECK-NEXT: movq (%rdi), %rax +; CHECK-NEXT: movq %rax, (%rsi) +; CHECK-NEXT: movl 8(%rdi), %eax +; CHECK-NEXT: movl %eax, 8(%rsi) +; CHECK-NEXT: movl 12(%rdi), %eax +; CHECK-NEXT: movl %eax, 12(%rsi) +; CHECK-NEXT: retq +; +; DISABLED-LABEL: test_nondirect_br: +; DISABLED: # %bb.0: # %entry +; DISABLED-NEXT: cmpl $18, %edx +; DISABLED-NEXT: jl .LBB2_2 +; DISABLED-NEXT: # %bb.1: # %if.then +; DISABLED-NEXT: movl %edx, 4(%rdi) +; DISABLED-NEXT: .LBB2_2: # %if.end +; DISABLED-NEXT: cmpl $14, %r9d +; DISABLED-NEXT: jl .LBB2_4 +; DISABLED-NEXT: # %bb.3: # %if.then2 +; DISABLED-NEXT: movl %r9d, 12(%rdi) +; DISABLED-NEXT: .LBB2_4: # %if.end3 +; DISABLED-NEXT: movups (%r8), %xmm0 +; DISABLED-NEXT: movups %xmm0, (%rcx) +; DISABLED-NEXT: movups (%rdi), %xmm0 +; DISABLED-NEXT: movups %xmm0, (%rsi) +; DISABLED-NEXT: retq +; +; CHECK-AVX2-LABEL: test_nondirect_br: +; CHECK-AVX2: # %bb.0: # %entry +; CHECK-AVX2-NEXT: cmpl $18, %edx +; CHECK-AVX2-NEXT: jl .LBB2_2 +; CHECK-AVX2-NEXT: # %bb.1: # %if.then +; CHECK-AVX2-NEXT: movl %edx, 4(%rdi) +; CHECK-AVX2-NEXT: .LBB2_2: # %if.end +; CHECK-AVX2-NEXT: cmpl $14, %r9d +; CHECK-AVX2-NEXT: jl .LBB2_4 +; CHECK-AVX2-NEXT: # %bb.3: # %if.then2 +; CHECK-AVX2-NEXT: movl %r9d, 12(%rdi) +; CHECK-AVX2-NEXT: .LBB2_4: # %if.end3 +; CHECK-AVX2-NEXT: vmovups (%r8), %xmm0 +; CHECK-AVX2-NEXT: vmovups %xmm0, (%rcx) +; CHECK-AVX2-NEXT: movq (%rdi), %rax +; CHECK-AVX2-NEXT: movq %rax, (%rsi) +; CHECK-AVX2-NEXT: movl 8(%rdi), %eax +; CHECK-AVX2-NEXT: movl %eax, 8(%rsi) +; CHECK-AVX2-NEXT: movl 12(%rdi), %eax +; CHECK-AVX2-NEXT: movl %eax, 12(%rsi) +; CHECK-AVX2-NEXT: retq +; +; CHECK-AVX512-LABEL: test_nondirect_br: +; CHECK-AVX512: # %bb.0: # %entry +; CHECK-AVX512-NEXT: cmpl $18, %edx +; CHECK-AVX512-NEXT: jl .LBB2_2 +; CHECK-AVX512-NEXT: # %bb.1: # %if.then +; CHECK-AVX512-NEXT: movl %edx, 4(%rdi) +; CHECK-AVX512-NEXT: .LBB2_2: # %if.end +; CHECK-AVX512-NEXT: cmpl $14, %r9d +; CHECK-AVX512-NEXT: jl .LBB2_4 +; CHECK-AVX512-NEXT: # %bb.3: # %if.then2 +; CHECK-AVX512-NEXT: movl %r9d, 12(%rdi) +; CHECK-AVX512-NEXT: .LBB2_4: # %if.end3 +; CHECK-AVX512-NEXT: vmovups (%r8), %xmm0 +; CHECK-AVX512-NEXT: vmovups %xmm0, (%rcx) +; CHECK-AVX512-NEXT: movq (%rdi), %rax +; CHECK-AVX512-NEXT: movq %rax, (%rsi) +; CHECK-AVX512-NEXT: movl 8(%rdi), %eax +; CHECK-AVX512-NEXT: movl %eax, 8(%rsi) +; CHECK-AVX512-NEXT: movl 12(%rdi), %eax +; CHECK-AVX512-NEXT: movl %eax, 12(%rsi) +; CHECK-AVX512-NEXT: retq +entry: + %cmp = icmp sgt i32 %x, 17 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %b = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 1 + store i32 %x, i32* %b, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + %cmp1 = icmp sgt i32 %x2, 13 + br i1 %cmp1, label %if.then2, label %if.end3 + +if.then2: ; preds = %if.end + %d = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 3 + store i32 %x2, i32* %d, align 4 + br label %if.end3 + +if.end3: ; preds = %if.then2, %if.end + %0 = bitcast %struct.S* %s3 to i8* + %1 = bitcast %struct.S* %s4 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 4, i1 false) + %2 = bitcast %struct.S* %s2 to i8* + %3 = bitcast %struct.S* %s1 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 4, i1 false) + ret void +} + +; Function Attrs: nounwind uwtable +define void @test_2preds_block(%struct.S* nocapture %s1, %struct.S* nocapture %s2, i32 %x, %struct.S* nocapture %s3, %struct.S* nocapture readonly %s4, i32 %x2) local_unnamed_addr #0 { +; CHECK-LABEL: test_2preds_block: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl %r9d, 12(%rdi) +; CHECK-NEXT: cmpl $18, %edx +; CHECK-NEXT: jl .LBB3_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: movl %edx, 4(%rdi) +; CHECK-NEXT: .LBB3_2: # %if.end +; CHECK-NEXT: movups (%r8), %xmm0 +; CHECK-NEXT: movups %xmm0, (%rcx) +; CHECK-NEXT: movl (%rdi), %eax +; CHECK-NEXT: movl %eax, (%rsi) +; CHECK-NEXT: movl 4(%rdi), %eax +; CHECK-NEXT: movl %eax, 4(%rsi) +; CHECK-NEXT: movl 8(%rdi), %eax +; CHECK-NEXT: movl %eax, 8(%rsi) +; CHECK-NEXT: movl 12(%rdi), %eax +; CHECK-NEXT: movl %eax, 12(%rsi) +; CHECK-NEXT: retq +; +; DISABLED-LABEL: test_2preds_block: +; DISABLED: # %bb.0: # %entry +; DISABLED-NEXT: movl %r9d, 12(%rdi) +; DISABLED-NEXT: cmpl $18, %edx +; DISABLED-NEXT: jl .LBB3_2 +; DISABLED-NEXT: # %bb.1: # %if.then +; DISABLED-NEXT: movl %edx, 4(%rdi) +; DISABLED-NEXT: .LBB3_2: # %if.end +; DISABLED-NEXT: movups (%r8), %xmm0 +; DISABLED-NEXT: movups %xmm0, (%rcx) +; DISABLED-NEXT: movups (%rdi), %xmm0 +; DISABLED-NEXT: movups %xmm0, (%rsi) +; DISABLED-NEXT: retq +; +; CHECK-AVX2-LABEL: test_2preds_block: +; CHECK-AVX2: # %bb.0: # %entry +; CHECK-AVX2-NEXT: movl %r9d, 12(%rdi) +; CHECK-AVX2-NEXT: cmpl $18, %edx +; CHECK-AVX2-NEXT: jl .LBB3_2 +; CHECK-AVX2-NEXT: # %bb.1: # %if.then +; CHECK-AVX2-NEXT: movl %edx, 4(%rdi) +; CHECK-AVX2-NEXT: .LBB3_2: # %if.end +; CHECK-AVX2-NEXT: vmovups (%r8), %xmm0 +; CHECK-AVX2-NEXT: vmovups %xmm0, (%rcx) +; CHECK-AVX2-NEXT: movl (%rdi), %eax +; CHECK-AVX2-NEXT: movl %eax, (%rsi) +; CHECK-AVX2-NEXT: movl 4(%rdi), %eax +; CHECK-AVX2-NEXT: movl %eax, 4(%rsi) +; CHECK-AVX2-NEXT: movl 8(%rdi), %eax +; CHECK-AVX2-NEXT: movl %eax, 8(%rsi) +; CHECK-AVX2-NEXT: movl 12(%rdi), %eax +; CHECK-AVX2-NEXT: movl %eax, 12(%rsi) +; CHECK-AVX2-NEXT: retq +; +; CHECK-AVX512-LABEL: test_2preds_block: +; CHECK-AVX512: # %bb.0: # %entry +; CHECK-AVX512-NEXT: movl %r9d, 12(%rdi) +; CHECK-AVX512-NEXT: cmpl $18, %edx +; CHECK-AVX512-NEXT: jl .LBB3_2 +; CHECK-AVX512-NEXT: # %bb.1: # %if.then +; CHECK-AVX512-NEXT: movl %edx, 4(%rdi) +; CHECK-AVX512-NEXT: .LBB3_2: # %if.end +; CHECK-AVX512-NEXT: vmovups (%r8), %xmm0 +; CHECK-AVX512-NEXT: vmovups %xmm0, (%rcx) +; CHECK-AVX512-NEXT: movl (%rdi), %eax +; CHECK-AVX512-NEXT: movl %eax, (%rsi) +; CHECK-AVX512-NEXT: movl 4(%rdi), %eax +; CHECK-AVX512-NEXT: movl %eax, 4(%rsi) +; CHECK-AVX512-NEXT: movl 8(%rdi), %eax +; CHECK-AVX512-NEXT: movl %eax, 8(%rsi) +; CHECK-AVX512-NEXT: movl 12(%rdi), %eax +; CHECK-AVX512-NEXT: movl %eax, 12(%rsi) +; CHECK-AVX512-NEXT: retq +entry: + %d = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 3 + store i32 %x2, i32* %d, align 4 + %cmp = icmp sgt i32 %x, 17 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %b = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 1 + store i32 %x, i32* %b, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + %0 = bitcast %struct.S* %s3 to i8* + %1 = bitcast %struct.S* %s4 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 4, i1 false) + %2 = bitcast %struct.S* %s2 to i8* + %3 = bitcast %struct.S* %s1 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 4, i1 false) + ret void +} +%struct.S2 = type { i64, i64 } + +; Function Attrs: nounwind uwtable +define void @test_type64(%struct.S2* nocapture %s1, %struct.S2* nocapture %s2, i32 %x, %struct.S2* nocapture %s3, %struct.S2* nocapture readonly %s4) local_unnamed_addr #0 { +; CHECK-LABEL: test_type64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpl $18, %edx +; CHECK-NEXT: jl .LBB4_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: movslq %edx, %rax +; CHECK-NEXT: movq %rax, 8(%rdi) +; CHECK-NEXT: .LBB4_2: # %if.end +; CHECK-NEXT: movups (%r8), %xmm0 +; CHECK-NEXT: movups %xmm0, (%rcx) +; CHECK-NEXT: movq (%rdi), %rax +; CHECK-NEXT: movq %rax, (%rsi) +; CHECK-NEXT: movq 8(%rdi), %rax +; CHECK-NEXT: movq %rax, 8(%rsi) +; CHECK-NEXT: retq +; +; DISABLED-LABEL: test_type64: +; DISABLED: # %bb.0: # %entry +; DISABLED-NEXT: cmpl $18, %edx +; DISABLED-NEXT: jl .LBB4_2 +; DISABLED-NEXT: # %bb.1: # %if.then +; DISABLED-NEXT: movslq %edx, %rax +; DISABLED-NEXT: movq %rax, 8(%rdi) +; DISABLED-NEXT: .LBB4_2: # %if.end +; DISABLED-NEXT: movups (%r8), %xmm0 +; DISABLED-NEXT: movups %xmm0, (%rcx) +; DISABLED-NEXT: movups (%rdi), %xmm0 +; DISABLED-NEXT: movups %xmm0, (%rsi) +; DISABLED-NEXT: retq +; +; CHECK-AVX2-LABEL: test_type64: +; CHECK-AVX2: # %bb.0: # %entry +; CHECK-AVX2-NEXT: cmpl $18, %edx +; CHECK-AVX2-NEXT: jl .LBB4_2 +; CHECK-AVX2-NEXT: # %bb.1: # %if.then +; CHECK-AVX2-NEXT: movslq %edx, %rax +; CHECK-AVX2-NEXT: movq %rax, 8(%rdi) +; CHECK-AVX2-NEXT: .LBB4_2: # %if.end +; CHECK-AVX2-NEXT: vmovups (%r8), %xmm0 +; CHECK-AVX2-NEXT: vmovups %xmm0, (%rcx) +; CHECK-AVX2-NEXT: movq (%rdi), %rax +; CHECK-AVX2-NEXT: movq %rax, (%rsi) +; CHECK-AVX2-NEXT: movq 8(%rdi), %rax +; CHECK-AVX2-NEXT: movq %rax, 8(%rsi) +; CHECK-AVX2-NEXT: retq +; +; CHECK-AVX512-LABEL: test_type64: +; CHECK-AVX512: # %bb.0: # %entry +; CHECK-AVX512-NEXT: cmpl $18, %edx +; CHECK-AVX512-NEXT: jl .LBB4_2 +; CHECK-AVX512-NEXT: # %bb.1: # %if.then +; CHECK-AVX512-NEXT: movslq %edx, %rax +; CHECK-AVX512-NEXT: movq %rax, 8(%rdi) +; CHECK-AVX512-NEXT: .LBB4_2: # %if.end +; CHECK-AVX512-NEXT: vmovups (%r8), %xmm0 +; CHECK-AVX512-NEXT: vmovups %xmm0, (%rcx) +; CHECK-AVX512-NEXT: movq (%rdi), %rax +; CHECK-AVX512-NEXT: movq %rax, (%rsi) +; CHECK-AVX512-NEXT: movq 8(%rdi), %rax +; CHECK-AVX512-NEXT: movq %rax, 8(%rsi) +; CHECK-AVX512-NEXT: retq +entry: + %cmp = icmp sgt i32 %x, 17 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %conv = sext i32 %x to i64 + %b = getelementptr inbounds %struct.S2, %struct.S2* %s1, i64 0, i32 1 + store i64 %conv, i64* %b, align 8 + br label %if.end + +if.end: ; preds = %if.then, %entry + %0 = bitcast %struct.S2* %s3 to i8* + %1 = bitcast %struct.S2* %s4 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 8, i1 false) + %2 = bitcast %struct.S2* %s2 to i8* + %3 = bitcast %struct.S2* %s1 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 8, i1 false) + ret void +} +%struct.S3 = type { i64, i8, i8, i16, i32 } + +; Function Attrs: noinline nounwind uwtable +define void @test_mixed_type(%struct.S3* nocapture %s1, %struct.S3* nocapture %s2, i32 %x, %struct.S3* nocapture readnone %s3, %struct.S3* nocapture readnone %s4) local_unnamed_addr #0 { +; CHECK-LABEL: test_mixed_type: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpl $18, %edx +; CHECK-NEXT: jl .LBB5_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: movslq %edx, %rax +; CHECK-NEXT: movq %rax, (%rdi) +; CHECK-NEXT: movb %dl, 8(%rdi) +; CHECK-NEXT: .LBB5_2: # %if.end +; CHECK-NEXT: movq (%rdi), %rax +; CHECK-NEXT: movq %rax, (%rsi) +; CHECK-NEXT: movb 8(%rdi), %al +; CHECK-NEXT: movb %al, 8(%rsi) +; CHECK-NEXT: movl 9(%rdi), %eax +; CHECK-NEXT: movl %eax, 9(%rsi) +; CHECK-NEXT: movzwl 13(%rdi), %eax +; CHECK-NEXT: movw %ax, 13(%rsi) +; CHECK-NEXT: movb 15(%rdi), %al +; CHECK-NEXT: movb %al, 15(%rsi) +; CHECK-NEXT: retq +; +; DISABLED-LABEL: test_mixed_type: +; DISABLED: # %bb.0: # %entry +; DISABLED-NEXT: cmpl $18, %edx +; DISABLED-NEXT: jl .LBB5_2 +; DISABLED-NEXT: # %bb.1: # %if.then +; DISABLED-NEXT: movslq %edx, %rax +; DISABLED-NEXT: movq %rax, (%rdi) +; DISABLED-NEXT: movb %dl, 8(%rdi) +; DISABLED-NEXT: .LBB5_2: # %if.end +; DISABLED-NEXT: movups (%rdi), %xmm0 +; DISABLED-NEXT: movups %xmm0, (%rsi) +; DISABLED-NEXT: retq +; +; CHECK-AVX2-LABEL: test_mixed_type: +; CHECK-AVX2: # %bb.0: # %entry +; CHECK-AVX2-NEXT: cmpl $18, %edx +; CHECK-AVX2-NEXT: jl .LBB5_2 +; CHECK-AVX2-NEXT: # %bb.1: # %if.then +; CHECK-AVX2-NEXT: movslq %edx, %rax +; CHECK-AVX2-NEXT: movq %rax, (%rdi) +; CHECK-AVX2-NEXT: movb %dl, 8(%rdi) +; CHECK-AVX2-NEXT: .LBB5_2: # %if.end +; CHECK-AVX2-NEXT: movq (%rdi), %rax +; CHECK-AVX2-NEXT: movq %rax, (%rsi) +; CHECK-AVX2-NEXT: movb 8(%rdi), %al +; CHECK-AVX2-NEXT: movb %al, 8(%rsi) +; CHECK-AVX2-NEXT: movl 9(%rdi), %eax +; CHECK-AVX2-NEXT: movl %eax, 9(%rsi) +; CHECK-AVX2-NEXT: movzwl 13(%rdi), %eax +; CHECK-AVX2-NEXT: movw %ax, 13(%rsi) +; CHECK-AVX2-NEXT: movb 15(%rdi), %al +; CHECK-AVX2-NEXT: movb %al, 15(%rsi) +; CHECK-AVX2-NEXT: retq +; +; CHECK-AVX512-LABEL: test_mixed_type: +; CHECK-AVX512: # %bb.0: # %entry +; CHECK-AVX512-NEXT: cmpl $18, %edx +; CHECK-AVX512-NEXT: jl .LBB5_2 +; CHECK-AVX512-NEXT: # %bb.1: # %if.then +; CHECK-AVX512-NEXT: movslq %edx, %rax +; CHECK-AVX512-NEXT: movq %rax, (%rdi) +; CHECK-AVX512-NEXT: movb %dl, 8(%rdi) +; CHECK-AVX512-NEXT: .LBB5_2: # %if.end +; CHECK-AVX512-NEXT: movq (%rdi), %rax +; CHECK-AVX512-NEXT: movq %rax, (%rsi) +; CHECK-AVX512-NEXT: movb 8(%rdi), %al +; CHECK-AVX512-NEXT: movb %al, 8(%rsi) +; CHECK-AVX512-NEXT: movl 9(%rdi), %eax +; CHECK-AVX512-NEXT: movl %eax, 9(%rsi) +; CHECK-AVX512-NEXT: movzwl 13(%rdi), %eax +; CHECK-AVX512-NEXT: movw %ax, 13(%rsi) +; CHECK-AVX512-NEXT: movb 15(%rdi), %al +; CHECK-AVX512-NEXT: movb %al, 15(%rsi) +; CHECK-AVX512-NEXT: retq +entry: + %cmp = icmp sgt i32 %x, 17 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %conv = sext i32 %x to i64 + %a = getelementptr inbounds %struct.S3, %struct.S3* %s1, i64 0, i32 0 + store i64 %conv, i64* %a, align 8 + %conv1 = trunc i32 %x to i8 + %b = getelementptr inbounds %struct.S3, %struct.S3* %s1, i64 0, i32 1 + store i8 %conv1, i8* %b, align 8 + br label %if.end + +if.end: ; preds = %if.then, %entry + %0 = bitcast %struct.S3* %s2 to i8* + %1 = bitcast %struct.S3* %s1 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 8, i1 false) + ret void +} +%struct.S4 = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } + +; Function Attrs: nounwind uwtable +define void @test_multiple_blocks(%struct.S4* nocapture %s1, %struct.S4* nocapture %s2) local_unnamed_addr #0 { +; CHECK-LABEL: test_multiple_blocks: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl $0, 4(%rdi) +; CHECK-NEXT: movl $0, 36(%rdi) +; CHECK-NEXT: movups 16(%rdi), %xmm0 +; CHECK-NEXT: movups %xmm0, 16(%rsi) +; CHECK-NEXT: movl 32(%rdi), %eax +; CHECK-NEXT: movl %eax, 32(%rsi) +; CHECK-NEXT: movl 36(%rdi), %eax +; CHECK-NEXT: movl %eax, 36(%rsi) +; CHECK-NEXT: movq 40(%rdi), %rax +; CHECK-NEXT: movq %rax, 40(%rsi) +; CHECK-NEXT: movl (%rdi), %eax +; CHECK-NEXT: movl %eax, (%rsi) +; CHECK-NEXT: movl 4(%rdi), %eax +; CHECK-NEXT: movl %eax, 4(%rsi) +; CHECK-NEXT: movq 8(%rdi), %rax +; CHECK-NEXT: movq %rax, 8(%rsi) +; CHECK-NEXT: retq +; +; DISABLED-LABEL: test_multiple_blocks: +; DISABLED: # %bb.0: # %entry +; DISABLED-NEXT: movl $0, 4(%rdi) +; DISABLED-NEXT: movl $0, 36(%rdi) +; DISABLED-NEXT: movups 16(%rdi), %xmm0 +; DISABLED-NEXT: movups %xmm0, 16(%rsi) +; DISABLED-NEXT: movups 32(%rdi), %xmm0 +; DISABLED-NEXT: movups %xmm0, 32(%rsi) +; DISABLED-NEXT: movups (%rdi), %xmm0 +; DISABLED-NEXT: movups %xmm0, (%rsi) +; DISABLED-NEXT: retq +; +; CHECK-AVX2-LABEL: test_multiple_blocks: +; CHECK-AVX2: # %bb.0: # %entry +; CHECK-AVX2-NEXT: movl $0, 4(%rdi) +; CHECK-AVX2-NEXT: movl $0, 36(%rdi) +; CHECK-AVX2-NEXT: vmovups 16(%rdi), %xmm0 +; CHECK-AVX2-NEXT: vmovups %xmm0, 16(%rsi) +; CHECK-AVX2-NEXT: movl 32(%rdi), %eax +; CHECK-AVX2-NEXT: movl %eax, 32(%rsi) +; CHECK-AVX2-NEXT: movl 36(%rdi), %eax +; CHECK-AVX2-NEXT: movl %eax, 36(%rsi) +; CHECK-AVX2-NEXT: movq 40(%rdi), %rax +; CHECK-AVX2-NEXT: movq %rax, 40(%rsi) +; CHECK-AVX2-NEXT: movl (%rdi), %eax +; CHECK-AVX2-NEXT: movl %eax, (%rsi) +; CHECK-AVX2-NEXT: movl 4(%rdi), %eax +; CHECK-AVX2-NEXT: movl %eax, 4(%rsi) +; CHECK-AVX2-NEXT: vmovups 8(%rdi), %xmm0 +; CHECK-AVX2-NEXT: vmovups %xmm0, 8(%rsi) +; CHECK-AVX2-NEXT: movq 24(%rdi), %rax +; CHECK-AVX2-NEXT: movq %rax, 24(%rsi) +; CHECK-AVX2-NEXT: retq +; +; CHECK-AVX512-LABEL: test_multiple_blocks: +; CHECK-AVX512: # %bb.0: # %entry +; CHECK-AVX512-NEXT: movl $0, 4(%rdi) +; CHECK-AVX512-NEXT: movl $0, 36(%rdi) +; CHECK-AVX512-NEXT: vmovups 16(%rdi), %xmm0 +; CHECK-AVX512-NEXT: vmovups %xmm0, 16(%rsi) +; CHECK-AVX512-NEXT: movl 32(%rdi), %eax +; CHECK-AVX512-NEXT: movl %eax, 32(%rsi) +; CHECK-AVX512-NEXT: movl 36(%rdi), %eax +; CHECK-AVX512-NEXT: movl %eax, 36(%rsi) +; CHECK-AVX512-NEXT: movq 40(%rdi), %rax +; CHECK-AVX512-NEXT: movq %rax, 40(%rsi) +; CHECK-AVX512-NEXT: movl (%rdi), %eax +; CHECK-AVX512-NEXT: movl %eax, (%rsi) +; CHECK-AVX512-NEXT: movl 4(%rdi), %eax +; CHECK-AVX512-NEXT: movl %eax, 4(%rsi) +; CHECK-AVX512-NEXT: vmovups 8(%rdi), %xmm0 +; CHECK-AVX512-NEXT: vmovups %xmm0, 8(%rsi) +; CHECK-AVX512-NEXT: movq 24(%rdi), %rax +; CHECK-AVX512-NEXT: movq %rax, 24(%rsi) +; CHECK-AVX512-NEXT: retq +entry: + %b = getelementptr inbounds %struct.S4, %struct.S4* %s1, i64 0, i32 1 + store i32 0, i32* %b, align 4 + %b3 = getelementptr inbounds %struct.S4, %struct.S4* %s1, i64 0, i32 9 + store i32 0, i32* %b3, align 4 + %0 = bitcast %struct.S4* %s2 to i8* + %1 = bitcast %struct.S4* %s1 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 48, i32 4, i1 false) + ret void +} +%struct.S5 = type { i16, i16, i16, i16, i16, i16, i16, i16 } + +; Function Attrs: nounwind uwtable +define void @test_type16(%struct.S5* nocapture %s1, %struct.S5* nocapture %s2, i32 %x, %struct.S5* nocapture %s3, %struct.S5* nocapture readonly %s4) local_unnamed_addr #0 { +; CHECK-LABEL: test_type16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpl $18, %edx +; CHECK-NEXT: jl .LBB7_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: movw %dx, 2(%rdi) +; CHECK-NEXT: .LBB7_2: # %if.end +; CHECK-NEXT: movups (%r8), %xmm0 +; CHECK-NEXT: movups %xmm0, (%rcx) +; CHECK-NEXT: movzwl (%rdi), %eax +; CHECK-NEXT: movw %ax, (%rsi) +; CHECK-NEXT: movzwl 2(%rdi), %eax +; CHECK-NEXT: movw %ax, 2(%rsi) +; CHECK-NEXT: movq 4(%rdi), %rax +; CHECK-NEXT: movq %rax, 4(%rsi) +; CHECK-NEXT: movl 12(%rdi), %eax +; CHECK-NEXT: movl %eax, 12(%rsi) +; CHECK-NEXT: retq +; +; DISABLED-LABEL: test_type16: +; DISABLED: # %bb.0: # %entry +; DISABLED-NEXT: cmpl $18, %edx +; DISABLED-NEXT: jl .LBB7_2 +; DISABLED-NEXT: # %bb.1: # %if.then +; DISABLED-NEXT: movw %dx, 2(%rdi) +; DISABLED-NEXT: .LBB7_2: # %if.end +; DISABLED-NEXT: movups (%r8), %xmm0 +; DISABLED-NEXT: movups %xmm0, (%rcx) +; DISABLED-NEXT: movups (%rdi), %xmm0 +; DISABLED-NEXT: movups %xmm0, (%rsi) +; DISABLED-NEXT: retq +; +; CHECK-AVX2-LABEL: test_type16: +; CHECK-AVX2: # %bb.0: # %entry +; CHECK-AVX2-NEXT: cmpl $18, %edx +; CHECK-AVX2-NEXT: jl .LBB7_2 +; CHECK-AVX2-NEXT: # %bb.1: # %if.then +; CHECK-AVX2-NEXT: movw %dx, 2(%rdi) +; CHECK-AVX2-NEXT: .LBB7_2: # %if.end +; CHECK-AVX2-NEXT: vmovups (%r8), %xmm0 +; CHECK-AVX2-NEXT: vmovups %xmm0, (%rcx) +; CHECK-AVX2-NEXT: movzwl (%rdi), %eax +; CHECK-AVX2-NEXT: movw %ax, (%rsi) +; CHECK-AVX2-NEXT: movzwl 2(%rdi), %eax +; CHECK-AVX2-NEXT: movw %ax, 2(%rsi) +; CHECK-AVX2-NEXT: movq 4(%rdi), %rax +; CHECK-AVX2-NEXT: movq %rax, 4(%rsi) +; CHECK-AVX2-NEXT: movl 12(%rdi), %eax +; CHECK-AVX2-NEXT: movl %eax, 12(%rsi) +; CHECK-AVX2-NEXT: retq +; +; CHECK-AVX512-LABEL: test_type16: +; CHECK-AVX512: # %bb.0: # %entry +; CHECK-AVX512-NEXT: cmpl $18, %edx +; CHECK-AVX512-NEXT: jl .LBB7_2 +; CHECK-AVX512-NEXT: # %bb.1: # %if.then +; CHECK-AVX512-NEXT: movw %dx, 2(%rdi) +; CHECK-AVX512-NEXT: .LBB7_2: # %if.end +; CHECK-AVX512-NEXT: vmovups (%r8), %xmm0 +; CHECK-AVX512-NEXT: vmovups %xmm0, (%rcx) +; CHECK-AVX512-NEXT: movzwl (%rdi), %eax +; CHECK-AVX512-NEXT: movw %ax, (%rsi) +; CHECK-AVX512-NEXT: movzwl 2(%rdi), %eax +; CHECK-AVX512-NEXT: movw %ax, 2(%rsi) +; CHECK-AVX512-NEXT: movq 4(%rdi), %rax +; CHECK-AVX512-NEXT: movq %rax, 4(%rsi) +; CHECK-AVX512-NEXT: movl 12(%rdi), %eax +; CHECK-AVX512-NEXT: movl %eax, 12(%rsi) +; CHECK-AVX512-NEXT: retq +entry: + %cmp = icmp sgt i32 %x, 17 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %conv = trunc i32 %x to i16 + %b = getelementptr inbounds %struct.S5, %struct.S5* %s1, i64 0, i32 1 + store i16 %conv, i16* %b, align 2 + br label %if.end + +if.end: ; preds = %if.then, %entry + %0 = bitcast %struct.S5* %s3 to i8* + %1 = bitcast %struct.S5* %s4 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 2, i1 false) + %2 = bitcast %struct.S5* %s2 to i8* + %3 = bitcast %struct.S5* %s1 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 2, i1 false) + ret void +} + +%struct.S6 = type { [4 x i32], i32, i32, i32, i32 } + +; Function Attrs: nounwind uwtable +define void @test_stack(%struct.S6* noalias nocapture sret %agg.result, %struct.S6* byval nocapture readnone align 8 %s1, %struct.S6* byval nocapture align 8 %s2, i32 %x) local_unnamed_addr #0 { +; CHECK-LABEL: test_stack: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl %esi, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 +; CHECK-NEXT: movups %xmm0, (%rdi) +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax +; CHECK-NEXT: movq %rax, 16(%rdi) +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, 24(%rdi) +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movl %eax, 28(%rdi) +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: retq +; +; DISABLED-LABEL: test_stack: +; DISABLED: # %bb.0: # %entry +; DISABLED-NEXT: movl %esi, {{[0-9]+}}(%rsp) +; DISABLED-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 +; DISABLED-NEXT: movups %xmm0, (%rdi) +; DISABLED-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 +; DISABLED-NEXT: movups %xmm0, 16(%rdi) +; DISABLED-NEXT: movq %rdi, %rax +; DISABLED-NEXT: retq +; +; CHECK-AVX2-LABEL: test_stack: +; CHECK-AVX2: # %bb.0: # %entry +; CHECK-AVX2-NEXT: movl %esi, {{[0-9]+}}(%rsp) +; CHECK-AVX2-NEXT: vmovups {{[0-9]+}}(%rsp), %xmm0 +; CHECK-AVX2-NEXT: vmovups %xmm0, (%rdi) +; CHECK-AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax +; CHECK-AVX2-NEXT: movq %rax, 16(%rdi) +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-AVX2-NEXT: movl %eax, 24(%rdi) +; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-AVX2-NEXT: movl %eax, 28(%rdi) +; CHECK-AVX2-NEXT: movq %rdi, %rax +; CHECK-AVX2-NEXT: retq +; +; CHECK-AVX512-LABEL: test_stack: +; CHECK-AVX512: # %bb.0: # %entry +; CHECK-AVX512-NEXT: movl %esi, {{[0-9]+}}(%rsp) +; CHECK-AVX512-NEXT: vmovups {{[0-9]+}}(%rsp), %xmm0 +; CHECK-AVX512-NEXT: vmovups %xmm0, (%rdi) +; CHECK-AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax +; CHECK-AVX512-NEXT: movq %rax, 16(%rdi) +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-AVX512-NEXT: movl %eax, 24(%rdi) +; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-AVX512-NEXT: movl %eax, 28(%rdi) +; CHECK-AVX512-NEXT: movq %rdi, %rax +; CHECK-AVX512-NEXT: retq +entry: + %s6.sroa.0.0..sroa_cast1 = bitcast %struct.S6* %s2 to i8* + %s6.sroa.3.0..sroa_idx4 = getelementptr inbounds %struct.S6, %struct.S6* %s2, i64 0, i32 3 + store i32 %x, i32* %s6.sroa.3.0..sroa_idx4, align 8 + %0 = bitcast %struct.S6* %agg.result to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* nonnull %s6.sroa.0.0..sroa_cast1, i64 32, i32 4, i1 false) + ret void +} + +; Function Attrs: nounwind uwtable +define void @test_limit_all(%struct.S* %s1, %struct.S* nocapture %s2, i32 %x, %struct.S* nocapture %s3, %struct.S* nocapture readonly %s4, i32 %x2) local_unnamed_addr #0 { +; CHECK-LABEL: test_limit_all: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: pushq %r15 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: pushq %r12 +; CHECK-NEXT: .cfi_def_cfa_offset 40 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset %rbx, -48 +; CHECK-NEXT: .cfi_offset %r12, -40 +; CHECK-NEXT: .cfi_offset %r14, -32 +; CHECK-NEXT: .cfi_offset %r15, -24 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movq %r8, %r15 +; CHECK-NEXT: movq %rcx, %r14 +; CHECK-NEXT: movl %edx, %ebp +; CHECK-NEXT: movq %rsi, %r12 +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: movl %r9d, 12(%rdi) +; CHECK-NEXT: callq bar +; CHECK-NEXT: cmpl $18, %ebp +; CHECK-NEXT: jl .LBB9_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: movl %ebp, 4(%rbx) +; CHECK-NEXT: movq %rbx, %rdi +; CHECK-NEXT: callq bar +; CHECK-NEXT: .LBB9_2: # %if.end +; CHECK-NEXT: movups (%r15), %xmm0 +; CHECK-NEXT: movups %xmm0, (%r14) +; CHECK-NEXT: movups (%rbx), %xmm0 +; CHECK-NEXT: movups %xmm0, (%r12) +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r12 +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %r15 +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: retq +; +; DISABLED-LABEL: test_limit_all: +; DISABLED: # %bb.0: # %entry +; DISABLED-NEXT: pushq %rbp +; DISABLED-NEXT: .cfi_def_cfa_offset 16 +; DISABLED-NEXT: pushq %r15 +; DISABLED-NEXT: .cfi_def_cfa_offset 24 +; DISABLED-NEXT: pushq %r14 +; DISABLED-NEXT: .cfi_def_cfa_offset 32 +; DISABLED-NEXT: pushq %r12 +; DISABLED-NEXT: .cfi_def_cfa_offset 40 +; DISABLED-NEXT: pushq %rbx +; DISABLED-NEXT: .cfi_def_cfa_offset 48 +; DISABLED-NEXT: .cfi_offset %rbx, -48 +; DISABLED-NEXT: .cfi_offset %r12, -40 +; DISABLED-NEXT: .cfi_offset %r14, -32 +; DISABLED-NEXT: .cfi_offset %r15, -24 +; DISABLED-NEXT: .cfi_offset %rbp, -16 +; DISABLED-NEXT: movq %r8, %r15 +; DISABLED-NEXT: movq %rcx, %r14 +; DISABLED-NEXT: movl %edx, %ebp +; DISABLED-NEXT: movq %rsi, %r12 +; DISABLED-NEXT: movq %rdi, %rbx +; DISABLED-NEXT: movl %r9d, 12(%rdi) +; DISABLED-NEXT: callq bar +; DISABLED-NEXT: cmpl $18, %ebp +; DISABLED-NEXT: jl .LBB9_2 +; DISABLED-NEXT: # %bb.1: # %if.then +; DISABLED-NEXT: movl %ebp, 4(%rbx) +; DISABLED-NEXT: movq %rbx, %rdi +; DISABLED-NEXT: callq bar +; DISABLED-NEXT: .LBB9_2: # %if.end +; DISABLED-NEXT: movups (%r15), %xmm0 +; DISABLED-NEXT: movups %xmm0, (%r14) +; DISABLED-NEXT: movups (%rbx), %xmm0 +; DISABLED-NEXT: movups %xmm0, (%r12) +; DISABLED-NEXT: popq %rbx +; DISABLED-NEXT: popq %r12 +; DISABLED-NEXT: popq %r14 +; DISABLED-NEXT: popq %r15 +; DISABLED-NEXT: popq %rbp +; DISABLED-NEXT: retq +; +; CHECK-AVX2-LABEL: test_limit_all: +; CHECK-AVX2: # %bb.0: # %entry +; CHECK-AVX2-NEXT: pushq %rbp +; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 16 +; CHECK-AVX2-NEXT: pushq %r15 +; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 24 +; CHECK-AVX2-NEXT: pushq %r14 +; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 32 +; CHECK-AVX2-NEXT: pushq %r12 +; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 40 +; CHECK-AVX2-NEXT: pushq %rbx +; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 48 +; CHECK-AVX2-NEXT: .cfi_offset %rbx, -48 +; CHECK-AVX2-NEXT: .cfi_offset %r12, -40 +; CHECK-AVX2-NEXT: .cfi_offset %r14, -32 +; CHECK-AVX2-NEXT: .cfi_offset %r15, -24 +; CHECK-AVX2-NEXT: .cfi_offset %rbp, -16 +; CHECK-AVX2-NEXT: movq %r8, %r15 +; CHECK-AVX2-NEXT: movq %rcx, %r14 +; CHECK-AVX2-NEXT: movl %edx, %ebp +; CHECK-AVX2-NEXT: movq %rsi, %r12 +; CHECK-AVX2-NEXT: movq %rdi, %rbx +; CHECK-AVX2-NEXT: movl %r9d, 12(%rdi) +; CHECK-AVX2-NEXT: callq bar +; CHECK-AVX2-NEXT: cmpl $18, %ebp +; CHECK-AVX2-NEXT: jl .LBB9_2 +; CHECK-AVX2-NEXT: # %bb.1: # %if.then +; CHECK-AVX2-NEXT: movl %ebp, 4(%rbx) +; CHECK-AVX2-NEXT: movq %rbx, %rdi +; CHECK-AVX2-NEXT: callq bar +; CHECK-AVX2-NEXT: .LBB9_2: # %if.end +; CHECK-AVX2-NEXT: vmovups (%r15), %xmm0 +; CHECK-AVX2-NEXT: vmovups %xmm0, (%r14) +; CHECK-AVX2-NEXT: vmovups (%rbx), %xmm0 +; CHECK-AVX2-NEXT: vmovups %xmm0, (%r12) +; CHECK-AVX2-NEXT: popq %rbx +; CHECK-AVX2-NEXT: popq %r12 +; CHECK-AVX2-NEXT: popq %r14 +; CHECK-AVX2-NEXT: popq %r15 +; CHECK-AVX2-NEXT: popq %rbp +; CHECK-AVX2-NEXT: retq +; +; CHECK-AVX512-LABEL: test_limit_all: +; CHECK-AVX512: # %bb.0: # %entry +; CHECK-AVX512-NEXT: pushq %rbp +; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 16 +; CHECK-AVX512-NEXT: pushq %r15 +; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 24 +; CHECK-AVX512-NEXT: pushq %r14 +; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 32 +; CHECK-AVX512-NEXT: pushq %r12 +; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 40 +; CHECK-AVX512-NEXT: pushq %rbx +; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 48 +; CHECK-AVX512-NEXT: .cfi_offset %rbx, -48 +; CHECK-AVX512-NEXT: .cfi_offset %r12, -40 +; CHECK-AVX512-NEXT: .cfi_offset %r14, -32 +; CHECK-AVX512-NEXT: .cfi_offset %r15, -24 +; CHECK-AVX512-NEXT: .cfi_offset %rbp, -16 +; CHECK-AVX512-NEXT: movq %r8, %r15 +; CHECK-AVX512-NEXT: movq %rcx, %r14 +; CHECK-AVX512-NEXT: movl %edx, %ebp +; CHECK-AVX512-NEXT: movq %rsi, %r12 +; CHECK-AVX512-NEXT: movq %rdi, %rbx +; CHECK-AVX512-NEXT: movl %r9d, 12(%rdi) +; CHECK-AVX512-NEXT: callq bar +; CHECK-AVX512-NEXT: cmpl $18, %ebp +; CHECK-AVX512-NEXT: jl .LBB9_2 +; CHECK-AVX512-NEXT: # %bb.1: # %if.then +; CHECK-AVX512-NEXT: movl %ebp, 4(%rbx) +; CHECK-AVX512-NEXT: movq %rbx, %rdi +; CHECK-AVX512-NEXT: callq bar +; CHECK-AVX512-NEXT: .LBB9_2: # %if.end +; CHECK-AVX512-NEXT: vmovups (%r15), %xmm0 +; CHECK-AVX512-NEXT: vmovups %xmm0, (%r14) +; CHECK-AVX512-NEXT: vmovups (%rbx), %xmm0 +; CHECK-AVX512-NEXT: vmovups %xmm0, (%r12) +; CHECK-AVX512-NEXT: popq %rbx +; CHECK-AVX512-NEXT: popq %r12 +; CHECK-AVX512-NEXT: popq %r14 +; CHECK-AVX512-NEXT: popq %r15 +; CHECK-AVX512-NEXT: popq %rbp +; CHECK-AVX512-NEXT: retq +entry: + %d = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 3 + store i32 %x2, i32* %d, align 4 + tail call void @bar(%struct.S* %s1) #3 + %cmp = icmp sgt i32 %x, 17 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %b = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 1 + store i32 %x, i32* %b, align 4 + tail call void @bar(%struct.S* nonnull %s1) #3 + br label %if.end + +if.end: ; preds = %if.then, %entry + %0 = bitcast %struct.S* %s3 to i8* + %1 = bitcast %struct.S* %s4 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 4, i1 false) + %2 = bitcast %struct.S* %s2 to i8* + %3 = bitcast %struct.S* %s1 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 4, i1 false) + ret void +} + +; Function Attrs: nounwind uwtable +define void @test_limit_one_pred(%struct.S* %s1, %struct.S* nocapture %s2, i32 %x, %struct.S* nocapture %s3, %struct.S* nocapture readonly %s4, i32 %x2) local_unnamed_addr #0 { +; CHECK-LABEL: test_limit_one_pred: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %r15 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: pushq %r12 +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 40 +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset %rbx, -40 +; CHECK-NEXT: .cfi_offset %r12, -32 +; CHECK-NEXT: .cfi_offset %r14, -24 +; CHECK-NEXT: .cfi_offset %r15, -16 +; CHECK-NEXT: movq %r8, %r12 +; CHECK-NEXT: movq %rcx, %r15 +; CHECK-NEXT: movq %rsi, %r14 +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: movl %r9d, 12(%rdi) +; CHECK-NEXT: cmpl $18, %edx +; CHECK-NEXT: jl .LBB10_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: movl %edx, 4(%rbx) +; CHECK-NEXT: movq %rbx, %rdi +; CHECK-NEXT: callq bar +; CHECK-NEXT: .LBB10_2: # %if.end +; CHECK-NEXT: movups (%r12), %xmm0 +; CHECK-NEXT: movups %xmm0, (%r15) +; CHECK-NEXT: movq (%rbx), %rax +; CHECK-NEXT: movq %rax, (%r14) +; CHECK-NEXT: movl 8(%rbx), %eax +; CHECK-NEXT: movl %eax, 8(%r14) +; CHECK-NEXT: movl 12(%rbx), %eax +; CHECK-NEXT: movl %eax, 12(%r14) +; CHECK-NEXT: addq $8, %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r12 +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %r15 +; CHECK-NEXT: retq +; +; DISABLED-LABEL: test_limit_one_pred: +; DISABLED: # %bb.0: # %entry +; DISABLED-NEXT: pushq %r15 +; DISABLED-NEXT: .cfi_def_cfa_offset 16 +; DISABLED-NEXT: pushq %r14 +; DISABLED-NEXT: .cfi_def_cfa_offset 24 +; DISABLED-NEXT: pushq %r12 +; DISABLED-NEXT: .cfi_def_cfa_offset 32 +; DISABLED-NEXT: pushq %rbx +; DISABLED-NEXT: .cfi_def_cfa_offset 40 +; DISABLED-NEXT: pushq %rax +; DISABLED-NEXT: .cfi_def_cfa_offset 48 +; DISABLED-NEXT: .cfi_offset %rbx, -40 +; DISABLED-NEXT: .cfi_offset %r12, -32 +; DISABLED-NEXT: .cfi_offset %r14, -24 +; DISABLED-NEXT: .cfi_offset %r15, -16 +; DISABLED-NEXT: movq %r8, %r15 +; DISABLED-NEXT: movq %rcx, %r14 +; DISABLED-NEXT: movq %rsi, %r12 +; DISABLED-NEXT: movq %rdi, %rbx +; DISABLED-NEXT: movl %r9d, 12(%rdi) +; DISABLED-NEXT: cmpl $18, %edx +; DISABLED-NEXT: jl .LBB10_2 +; DISABLED-NEXT: # %bb.1: # %if.then +; DISABLED-NEXT: movl %edx, 4(%rbx) +; DISABLED-NEXT: movq %rbx, %rdi +; DISABLED-NEXT: callq bar +; DISABLED-NEXT: .LBB10_2: # %if.end +; DISABLED-NEXT: movups (%r15), %xmm0 +; DISABLED-NEXT: movups %xmm0, (%r14) +; DISABLED-NEXT: movups (%rbx), %xmm0 +; DISABLED-NEXT: movups %xmm0, (%r12) +; DISABLED-NEXT: addq $8, %rsp +; DISABLED-NEXT: popq %rbx +; DISABLED-NEXT: popq %r12 +; DISABLED-NEXT: popq %r14 +; DISABLED-NEXT: popq %r15 +; DISABLED-NEXT: retq +; +; CHECK-AVX2-LABEL: test_limit_one_pred: +; CHECK-AVX2: # %bb.0: # %entry +; CHECK-AVX2-NEXT: pushq %r15 +; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 16 +; CHECK-AVX2-NEXT: pushq %r14 +; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 24 +; CHECK-AVX2-NEXT: pushq %r12 +; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 32 +; CHECK-AVX2-NEXT: pushq %rbx +; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 40 +; CHECK-AVX2-NEXT: pushq %rax +; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 48 +; CHECK-AVX2-NEXT: .cfi_offset %rbx, -40 +; CHECK-AVX2-NEXT: .cfi_offset %r12, -32 +; CHECK-AVX2-NEXT: .cfi_offset %r14, -24 +; CHECK-AVX2-NEXT: .cfi_offset %r15, -16 +; CHECK-AVX2-NEXT: movq %r8, %r12 +; CHECK-AVX2-NEXT: movq %rcx, %r15 +; CHECK-AVX2-NEXT: movq %rsi, %r14 +; CHECK-AVX2-NEXT: movq %rdi, %rbx +; CHECK-AVX2-NEXT: movl %r9d, 12(%rdi) +; CHECK-AVX2-NEXT: cmpl $18, %edx +; CHECK-AVX2-NEXT: jl .LBB10_2 +; CHECK-AVX2-NEXT: # %bb.1: # %if.then +; CHECK-AVX2-NEXT: movl %edx, 4(%rbx) +; CHECK-AVX2-NEXT: movq %rbx, %rdi +; CHECK-AVX2-NEXT: callq bar +; CHECK-AVX2-NEXT: .LBB10_2: # %if.end +; CHECK-AVX2-NEXT: vmovups (%r12), %xmm0 +; CHECK-AVX2-NEXT: vmovups %xmm0, (%r15) +; CHECK-AVX2-NEXT: movq (%rbx), %rax +; CHECK-AVX2-NEXT: movq %rax, (%r14) +; CHECK-AVX2-NEXT: movl 8(%rbx), %eax +; CHECK-AVX2-NEXT: movl %eax, 8(%r14) +; CHECK-AVX2-NEXT: movl 12(%rbx), %eax +; CHECK-AVX2-NEXT: movl %eax, 12(%r14) +; CHECK-AVX2-NEXT: addq $8, %rsp +; CHECK-AVX2-NEXT: popq %rbx +; CHECK-AVX2-NEXT: popq %r12 +; CHECK-AVX2-NEXT: popq %r14 +; CHECK-AVX2-NEXT: popq %r15 +; CHECK-AVX2-NEXT: retq +; +; CHECK-AVX512-LABEL: test_limit_one_pred: +; CHECK-AVX512: # %bb.0: # %entry +; CHECK-AVX512-NEXT: pushq %r15 +; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 16 +; CHECK-AVX512-NEXT: pushq %r14 +; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 24 +; CHECK-AVX512-NEXT: pushq %r12 +; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 32 +; CHECK-AVX512-NEXT: pushq %rbx +; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 40 +; CHECK-AVX512-NEXT: pushq %rax +; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 48 +; CHECK-AVX512-NEXT: .cfi_offset %rbx, -40 +; CHECK-AVX512-NEXT: .cfi_offset %r12, -32 +; CHECK-AVX512-NEXT: .cfi_offset %r14, -24 +; CHECK-AVX512-NEXT: .cfi_offset %r15, -16 +; CHECK-AVX512-NEXT: movq %r8, %r12 +; CHECK-AVX512-NEXT: movq %rcx, %r15 +; CHECK-AVX512-NEXT: movq %rsi, %r14 +; CHECK-AVX512-NEXT: movq %rdi, %rbx +; CHECK-AVX512-NEXT: movl %r9d, 12(%rdi) +; CHECK-AVX512-NEXT: cmpl $18, %edx +; CHECK-AVX512-NEXT: jl .LBB10_2 +; CHECK-AVX512-NEXT: # %bb.1: # %if.then +; CHECK-AVX512-NEXT: movl %edx, 4(%rbx) +; CHECK-AVX512-NEXT: movq %rbx, %rdi +; CHECK-AVX512-NEXT: callq bar +; CHECK-AVX512-NEXT: .LBB10_2: # %if.end +; CHECK-AVX512-NEXT: vmovups (%r12), %xmm0 +; CHECK-AVX512-NEXT: vmovups %xmm0, (%r15) +; CHECK-AVX512-NEXT: movq (%rbx), %rax +; CHECK-AVX512-NEXT: movq %rax, (%r14) +; CHECK-AVX512-NEXT: movl 8(%rbx), %eax +; CHECK-AVX512-NEXT: movl %eax, 8(%r14) +; CHECK-AVX512-NEXT: movl 12(%rbx), %eax +; CHECK-AVX512-NEXT: movl %eax, 12(%r14) +; CHECK-AVX512-NEXT: addq $8, %rsp +; CHECK-AVX512-NEXT: popq %rbx +; CHECK-AVX512-NEXT: popq %r12 +; CHECK-AVX512-NEXT: popq %r14 +; CHECK-AVX512-NEXT: popq %r15 +; CHECK-AVX512-NEXT: retq +entry: + %d = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 3 + store i32 %x2, i32* %d, align 4 + %cmp = icmp sgt i32 %x, 17 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %b = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 1 + store i32 %x, i32* %b, align 4 + tail call void @bar(%struct.S* nonnull %s1) #3 + br label %if.end + +if.end: ; preds = %if.then, %entry + %0 = bitcast %struct.S* %s3 to i8* + %1 = bitcast %struct.S* %s4 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 4, i1 false) + %2 = bitcast %struct.S* %s2 to i8* + %3 = bitcast %struct.S* %s1 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 4, i1 false) + ret void +} + + +declare void @bar(%struct.S*) local_unnamed_addr #1 + + +; Function Attrs: argmemonly nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) #1 + +attributes #0 = { nounwind uwtable "target-cpu"="x86-64" } + +%struct.S7 = type { float, float, float , float, float, float, float, float } + +; Function Attrs: nounwind uwtable +define void @test_conditional_block_float(%struct.S7* nocapture %s1, %struct.S7* nocapture %s2, i32 %x, %struct.S7* nocapture %s3, %struct.S7* nocapture readonly %s4, float %y) local_unnamed_addr #0 { +; CHECK-LABEL: test_conditional_block_float: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpl $18, %edx +; CHECK-NEXT: jl .LBB11_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: movl $1065353216, 4(%rdi) # imm = 0x3F800000 +; CHECK-NEXT: .LBB11_2: # %if.end +; CHECK-NEXT: movups (%r8), %xmm0 +; CHECK-NEXT: movups 16(%r8), %xmm1 +; CHECK-NEXT: movups %xmm1, 16(%rcx) +; CHECK-NEXT: movups %xmm0, (%rcx) +; CHECK-NEXT: movl (%rdi), %eax +; CHECK-NEXT: movl 4(%rdi), %ecx +; CHECK-NEXT: movq 8(%rdi), %rdx +; CHECK-NEXT: movups 16(%rdi), %xmm0 +; CHECK-NEXT: movups %xmm0, 16(%rsi) +; CHECK-NEXT: movl %eax, (%rsi) +; CHECK-NEXT: movl %ecx, 4(%rsi) +; CHECK-NEXT: movq %rdx, 8(%rsi) +; CHECK-NEXT: retq +; +; DISABLED-LABEL: test_conditional_block_float: +; DISABLED: # %bb.0: # %entry +; DISABLED-NEXT: cmpl $18, %edx +; DISABLED-NEXT: jl .LBB11_2 +; DISABLED-NEXT: # %bb.1: # %if.then +; DISABLED-NEXT: movl $1065353216, 4(%rdi) # imm = 0x3F800000 +; DISABLED-NEXT: .LBB11_2: # %if.end +; DISABLED-NEXT: movups (%r8), %xmm0 +; DISABLED-NEXT: movups 16(%r8), %xmm1 +; DISABLED-NEXT: movups %xmm1, 16(%rcx) +; DISABLED-NEXT: movups %xmm0, (%rcx) +; DISABLED-NEXT: movups (%rdi), %xmm0 +; DISABLED-NEXT: movups 16(%rdi), %xmm1 +; DISABLED-NEXT: movups %xmm1, 16(%rsi) +; DISABLED-NEXT: movups %xmm0, (%rsi) +; DISABLED-NEXT: retq +; +; CHECK-AVX2-LABEL: test_conditional_block_float: +; CHECK-AVX2: # %bb.0: # %entry +; CHECK-AVX2-NEXT: cmpl $18, %edx +; CHECK-AVX2-NEXT: jl .LBB11_2 +; CHECK-AVX2-NEXT: # %bb.1: # %if.then +; CHECK-AVX2-NEXT: movl $1065353216, 4(%rdi) # imm = 0x3F800000 +; CHECK-AVX2-NEXT: .LBB11_2: # %if.end +; CHECK-AVX2-NEXT: vmovups (%r8), %ymm0 +; CHECK-AVX2-NEXT: vmovups %ymm0, (%rcx) +; CHECK-AVX2-NEXT: movl (%rdi), %eax +; CHECK-AVX2-NEXT: movl %eax, (%rsi) +; CHECK-AVX2-NEXT: movl 4(%rdi), %eax +; CHECK-AVX2-NEXT: movl %eax, 4(%rsi) +; CHECK-AVX2-NEXT: vmovups 8(%rdi), %xmm0 +; CHECK-AVX2-NEXT: vmovups %xmm0, 8(%rsi) +; CHECK-AVX2-NEXT: movq 24(%rdi), %rax +; CHECK-AVX2-NEXT: movq %rax, 24(%rsi) +; CHECK-AVX2-NEXT: vzeroupper +; CHECK-AVX2-NEXT: retq +; +; CHECK-AVX512-LABEL: test_conditional_block_float: +; CHECK-AVX512: # %bb.0: # %entry +; CHECK-AVX512-NEXT: cmpl $18, %edx +; CHECK-AVX512-NEXT: jl .LBB11_2 +; CHECK-AVX512-NEXT: # %bb.1: # %if.then +; CHECK-AVX512-NEXT: movl $1065353216, 4(%rdi) # imm = 0x3F800000 +; CHECK-AVX512-NEXT: .LBB11_2: # %if.end +; CHECK-AVX512-NEXT: vmovups (%r8), %ymm0 +; CHECK-AVX512-NEXT: vmovups %ymm0, (%rcx) +; CHECK-AVX512-NEXT: movl (%rdi), %eax +; CHECK-AVX512-NEXT: movl %eax, (%rsi) +; CHECK-AVX512-NEXT: movl 4(%rdi), %eax +; CHECK-AVX512-NEXT: movl %eax, 4(%rsi) +; CHECK-AVX512-NEXT: vmovups 8(%rdi), %xmm0 +; CHECK-AVX512-NEXT: vmovups %xmm0, 8(%rsi) +; CHECK-AVX512-NEXT: movq 24(%rdi), %rax +; CHECK-AVX512-NEXT: movq %rax, 24(%rsi) +; CHECK-AVX512-NEXT: vzeroupper +; CHECK-AVX512-NEXT: retq +entry: + %cmp = icmp sgt i32 %x, 17 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %b = getelementptr inbounds %struct.S7, %struct.S7* %s1, i64 0, i32 1 + store float 1.0, float* %b, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + %0 = bitcast %struct.S7* %s3 to i8* + %1 = bitcast %struct.S7* %s4 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 32, i32 4, i1 false) + %2 = bitcast %struct.S7* %s2 to i8* + %3 = bitcast %struct.S7* %s1 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 32, i32 4, i1 false) + ret void +} + +%struct.S8 = type { i64, i64, i64, i64, i64, i64 } + +; Function Attrs: nounwind uwtable +define void @test_conditional_block_ymm(%struct.S8* nocapture %s1, %struct.S8* nocapture %s2, i32 %x, %struct.S8* nocapture %s3, %struct.S8* nocapture readonly %s4) local_unnamed_addr #0 { +; CHECK-LABEL: test_conditional_block_ymm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpl $18, %edx +; CHECK-NEXT: jl .LBB12_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: movq $1, 8(%rdi) +; CHECK-NEXT: .LBB12_2: # %if.end +; CHECK-NEXT: movups (%r8), %xmm0 +; CHECK-NEXT: movups 16(%r8), %xmm1 +; CHECK-NEXT: movups %xmm1, 16(%rcx) +; CHECK-NEXT: movups %xmm0, (%rcx) +; CHECK-NEXT: movq (%rdi), %rax +; CHECK-NEXT: movq 8(%rdi), %rcx +; CHECK-NEXT: movups 16(%rdi), %xmm0 +; CHECK-NEXT: movups %xmm0, 16(%rsi) +; CHECK-NEXT: movq %rax, (%rsi) +; CHECK-NEXT: movq %rcx, 8(%rsi) +; CHECK-NEXT: retq +; +; DISABLED-LABEL: test_conditional_block_ymm: +; DISABLED: # %bb.0: # %entry +; DISABLED-NEXT: cmpl $18, %edx +; DISABLED-NEXT: jl .LBB12_2 +; DISABLED-NEXT: # %bb.1: # %if.then +; DISABLED-NEXT: movq $1, 8(%rdi) +; DISABLED-NEXT: .LBB12_2: # %if.end +; DISABLED-NEXT: movups (%r8), %xmm0 +; DISABLED-NEXT: movups 16(%r8), %xmm1 +; DISABLED-NEXT: movups %xmm1, 16(%rcx) +; DISABLED-NEXT: movups %xmm0, (%rcx) +; DISABLED-NEXT: movups (%rdi), %xmm0 +; DISABLED-NEXT: movups 16(%rdi), %xmm1 +; DISABLED-NEXT: movups %xmm1, 16(%rsi) +; DISABLED-NEXT: movups %xmm0, (%rsi) +; DISABLED-NEXT: retq +; +; CHECK-AVX2-LABEL: test_conditional_block_ymm: +; CHECK-AVX2: # %bb.0: # %entry +; CHECK-AVX2-NEXT: cmpl $18, %edx +; CHECK-AVX2-NEXT: jl .LBB12_2 +; CHECK-AVX2-NEXT: # %bb.1: # %if.then +; CHECK-AVX2-NEXT: movq $1, 8(%rdi) +; CHECK-AVX2-NEXT: .LBB12_2: # %if.end +; CHECK-AVX2-NEXT: vmovups (%r8), %ymm0 +; CHECK-AVX2-NEXT: vmovups %ymm0, (%rcx) +; CHECK-AVX2-NEXT: movq (%rdi), %rax +; CHECK-AVX2-NEXT: movq %rax, (%rsi) +; CHECK-AVX2-NEXT: movq 8(%rdi), %rax +; CHECK-AVX2-NEXT: movq %rax, 8(%rsi) +; CHECK-AVX2-NEXT: vmovups 16(%rdi), %xmm0 +; CHECK-AVX2-NEXT: vmovups %xmm0, 16(%rsi) +; CHECK-AVX2-NEXT: vzeroupper +; CHECK-AVX2-NEXT: retq +; +; CHECK-AVX512-LABEL: test_conditional_block_ymm: +; CHECK-AVX512: # %bb.0: # %entry +; CHECK-AVX512-NEXT: cmpl $18, %edx +; CHECK-AVX512-NEXT: jl .LBB12_2 +; CHECK-AVX512-NEXT: # %bb.1: # %if.then +; CHECK-AVX512-NEXT: movq $1, 8(%rdi) +; CHECK-AVX512-NEXT: .LBB12_2: # %if.end +; CHECK-AVX512-NEXT: vmovups (%r8), %ymm0 +; CHECK-AVX512-NEXT: vmovups %ymm0, (%rcx) +; CHECK-AVX512-NEXT: movq (%rdi), %rax +; CHECK-AVX512-NEXT: movq %rax, (%rsi) +; CHECK-AVX512-NEXT: movq 8(%rdi), %rax +; CHECK-AVX512-NEXT: movq %rax, 8(%rsi) +; CHECK-AVX512-NEXT: vmovups 16(%rdi), %xmm0 +; CHECK-AVX512-NEXT: vmovups %xmm0, 16(%rsi) +; CHECK-AVX512-NEXT: vzeroupper +; CHECK-AVX512-NEXT: retq +entry: + %cmp = icmp sgt i32 %x, 17 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %b = getelementptr inbounds %struct.S8, %struct.S8* %s1, i64 0, i32 1 + store i64 1, i64* %b, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + %0 = bitcast %struct.S8* %s3 to i8* + %1 = bitcast %struct.S8* %s4 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 32, i32 4, i1 false) + %2 = bitcast %struct.S8* %s2 to i8* + %3 = bitcast %struct.S8* %s1 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 32, i32 4, i1 false) + ret void +} + |

