diff options
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll | 120 |
1 files changed, 60 insertions, 60 deletions
diff --git a/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll b/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll index ac9ca931ecd..f593925c809 100644 --- a/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll +++ b/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll @@ -2507,12 +2507,12 @@ define void @bcast_unfold_smin_v4i32(i32* %arg) { ; CHECK-LABEL: bcast_unfold_smin_v4i32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 +; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2,2,2,2] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB72_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vmovdqu 4096(%rdi,%rax), %xmm0 -; CHECK-NEXT: vpminsd {{.*}}(%rip){1to4}, %xmm0, %xmm0 -; CHECK-NEXT: vmovdqu %xmm0, 4096(%rdi,%rax) +; CHECK-NEXT: vpminsd 4096(%rdi,%rax), %xmm0, %xmm1 +; CHECK-NEXT: vmovdqu %xmm1, 4096(%rdi,%rax) ; CHECK-NEXT: addq $16, %rax ; CHECK-NEXT: jne .LBB72_1 ; CHECK-NEXT: # %bb.2: # %bb10 @@ -2541,12 +2541,12 @@ define void @bcast_unfold_smin_v8i32(i32* %arg) { ; CHECK-LABEL: bcast_unfold_smin_v8i32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 +; CHECK-NEXT: vpbroadcastd {{.*#+}} ymm0 = [2,2,2,2,2,2,2,2] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB73_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vmovdqu 4096(%rdi,%rax), %ymm0 -; CHECK-NEXT: vpminsd {{.*}}(%rip){1to8}, %ymm0, %ymm0 -; CHECK-NEXT: vmovdqu %ymm0, 4096(%rdi,%rax) +; CHECK-NEXT: vpminsd 4096(%rdi,%rax), %ymm0, %ymm1 +; CHECK-NEXT: vmovdqu %ymm1, 4096(%rdi,%rax) ; CHECK-NEXT: addq $32, %rax ; CHECK-NEXT: jne .LBB73_1 ; CHECK-NEXT: # %bb.2: # %bb10 @@ -2576,12 +2576,12 @@ define void @bcast_unfold_smin_v16i32(i32* %arg) { ; CHECK-LABEL: bcast_unfold_smin_v16i32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 +; CHECK-NEXT: vpbroadcastd {{.*#+}} zmm0 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB74_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vmovdqu64 4096(%rdi,%rax), %zmm0 -; CHECK-NEXT: vpminsd {{.*}}(%rip){1to16}, %zmm0, %zmm0 -; CHECK-NEXT: vmovdqu64 %zmm0, 4096(%rdi,%rax) +; CHECK-NEXT: vpminsd 4096(%rdi,%rax), %zmm0, %zmm1 +; CHECK-NEXT: vmovdqu64 %zmm1, 4096(%rdi,%rax) ; CHECK-NEXT: addq $64, %rax ; CHECK-NEXT: jne .LBB74_1 ; CHECK-NEXT: # %bb.2: # %bb10 @@ -2645,12 +2645,12 @@ define void @bcast_unfold_smin_v4i64(i64* %arg) { ; CHECK-LABEL: bcast_unfold_smin_v4i64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 +; CHECK-NEXT: vpbroadcastq {{.*#+}} ymm0 = [2,2,2,2] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB76_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vmovdqu 8192(%rdi,%rax), %ymm0 -; CHECK-NEXT: vpminsq {{.*}}(%rip){1to4}, %ymm0, %ymm0 -; CHECK-NEXT: vmovdqu %ymm0, 8192(%rdi,%rax) +; CHECK-NEXT: vpminsq 8192(%rdi,%rax), %ymm0, %ymm1 +; CHECK-NEXT: vmovdqu %ymm1, 8192(%rdi,%rax) ; CHECK-NEXT: addq $32, %rax ; CHECK-NEXT: jne .LBB76_1 ; CHECK-NEXT: # %bb.2: # %bb10 @@ -2680,12 +2680,12 @@ define void @bcast_unfold_smin_v8i64(i64* %arg) { ; CHECK-LABEL: bcast_unfold_smin_v8i64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 +; CHECK-NEXT: vpbroadcastq {{.*#+}} zmm0 = [2,2,2,2,2,2,2,2] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB77_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vmovdqu64 8192(%rdi,%rax), %zmm0 -; CHECK-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; CHECK-NEXT: vmovdqu64 %zmm0, 8192(%rdi,%rax) +; CHECK-NEXT: vpminsq 8192(%rdi,%rax), %zmm0, %zmm1 +; CHECK-NEXT: vmovdqu64 %zmm1, 8192(%rdi,%rax) ; CHECK-NEXT: addq $64, %rax ; CHECK-NEXT: jne .LBB77_1 ; CHECK-NEXT: # %bb.2: # %bb10 @@ -2715,12 +2715,12 @@ define void @bcast_unfold_smax_v4i32(i32* %arg) { ; CHECK-LABEL: bcast_unfold_smax_v4i32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 +; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2,2,2,2] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB78_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vmovdqu 4096(%rdi,%rax), %xmm0 -; CHECK-NEXT: vpmaxsd {{.*}}(%rip){1to4}, %xmm0, %xmm0 -; CHECK-NEXT: vmovdqu %xmm0, 4096(%rdi,%rax) +; CHECK-NEXT: vpmaxsd 4096(%rdi,%rax), %xmm0, %xmm1 +; CHECK-NEXT: vmovdqu %xmm1, 4096(%rdi,%rax) ; CHECK-NEXT: addq $16, %rax ; CHECK-NEXT: jne .LBB78_1 ; CHECK-NEXT: # %bb.2: # %bb10 @@ -2749,12 +2749,12 @@ define void @bcast_unfold_smax_v8i32(i32* %arg) { ; CHECK-LABEL: bcast_unfold_smax_v8i32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 +; CHECK-NEXT: vpbroadcastd {{.*#+}} ymm0 = [2,2,2,2,2,2,2,2] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB79_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vmovdqu 4096(%rdi,%rax), %ymm0 -; CHECK-NEXT: vpmaxsd {{.*}}(%rip){1to8}, %ymm0, %ymm0 -; CHECK-NEXT: vmovdqu %ymm0, 4096(%rdi,%rax) +; CHECK-NEXT: vpmaxsd 4096(%rdi,%rax), %ymm0, %ymm1 +; CHECK-NEXT: vmovdqu %ymm1, 4096(%rdi,%rax) ; CHECK-NEXT: addq $32, %rax ; CHECK-NEXT: jne .LBB79_1 ; CHECK-NEXT: # %bb.2: # %bb10 @@ -2784,12 +2784,12 @@ define void @bcast_unfold_smax_v16i32(i32* %arg) { ; CHECK-LABEL: bcast_unfold_smax_v16i32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 +; CHECK-NEXT: vpbroadcastd {{.*#+}} zmm0 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB80_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vmovdqu64 4096(%rdi,%rax), %zmm0 -; CHECK-NEXT: vpmaxsd {{.*}}(%rip){1to16}, %zmm0, %zmm0 -; CHECK-NEXT: vmovdqu64 %zmm0, 4096(%rdi,%rax) +; CHECK-NEXT: vpmaxsd 4096(%rdi,%rax), %zmm0, %zmm1 +; CHECK-NEXT: vmovdqu64 %zmm1, 4096(%rdi,%rax) ; CHECK-NEXT: addq $64, %rax ; CHECK-NEXT: jne .LBB80_1 ; CHECK-NEXT: # %bb.2: # %bb10 @@ -2853,12 +2853,12 @@ define void @bcast_unfold_smax_v4i64(i64* %arg) { ; CHECK-LABEL: bcast_unfold_smax_v4i64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 +; CHECK-NEXT: vpbroadcastq {{.*#+}} ymm0 = [2,2,2,2] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB82_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vmovdqu 8192(%rdi,%rax), %ymm0 -; CHECK-NEXT: vpmaxsq {{.*}}(%rip){1to4}, %ymm0, %ymm0 -; CHECK-NEXT: vmovdqu %ymm0, 8192(%rdi,%rax) +; CHECK-NEXT: vpmaxsq 8192(%rdi,%rax), %ymm0, %ymm1 +; CHECK-NEXT: vmovdqu %ymm1, 8192(%rdi,%rax) ; CHECK-NEXT: addq $32, %rax ; CHECK-NEXT: jne .LBB82_1 ; CHECK-NEXT: # %bb.2: # %bb10 @@ -2888,12 +2888,12 @@ define void @bcast_unfold_smax_v8i64(i64* %arg) { ; CHECK-LABEL: bcast_unfold_smax_v8i64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 +; CHECK-NEXT: vpbroadcastq {{.*#+}} zmm0 = [2,2,2,2,2,2,2,2] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB83_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vmovdqu64 8192(%rdi,%rax), %zmm0 -; CHECK-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; CHECK-NEXT: vmovdqu64 %zmm0, 8192(%rdi,%rax) +; CHECK-NEXT: vpmaxsq 8192(%rdi,%rax), %zmm0, %zmm1 +; CHECK-NEXT: vmovdqu64 %zmm1, 8192(%rdi,%rax) ; CHECK-NEXT: addq $64, %rax ; CHECK-NEXT: jne .LBB83_1 ; CHECK-NEXT: # %bb.2: # %bb10 @@ -2923,12 +2923,12 @@ define void @bcast_unfold_umin_v4i32(i32* %arg) { ; CHECK-LABEL: bcast_unfold_umin_v4i32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 +; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2,2,2,2] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB84_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vmovdqu 4096(%rdi,%rax), %xmm0 -; CHECK-NEXT: vpminud {{.*}}(%rip){1to4}, %xmm0, %xmm0 -; CHECK-NEXT: vmovdqu %xmm0, 4096(%rdi,%rax) +; CHECK-NEXT: vpminud 4096(%rdi,%rax), %xmm0, %xmm1 +; CHECK-NEXT: vmovdqu %xmm1, 4096(%rdi,%rax) ; CHECK-NEXT: addq $16, %rax ; CHECK-NEXT: jne .LBB84_1 ; CHECK-NEXT: # %bb.2: # %bb10 @@ -2957,12 +2957,12 @@ define void @bcast_unfold_umin_v8i32(i32* %arg) { ; CHECK-LABEL: bcast_unfold_umin_v8i32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 +; CHECK-NEXT: vpbroadcastd {{.*#+}} ymm0 = [2,2,2,2,2,2,2,2] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB85_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vmovdqu 4096(%rdi,%rax), %ymm0 -; CHECK-NEXT: vpminud {{.*}}(%rip){1to8}, %ymm0, %ymm0 -; CHECK-NEXT: vmovdqu %ymm0, 4096(%rdi,%rax) +; CHECK-NEXT: vpminud 4096(%rdi,%rax), %ymm0, %ymm1 +; CHECK-NEXT: vmovdqu %ymm1, 4096(%rdi,%rax) ; CHECK-NEXT: addq $32, %rax ; CHECK-NEXT: jne .LBB85_1 ; CHECK-NEXT: # %bb.2: # %bb10 @@ -2992,12 +2992,12 @@ define void @bcast_unfold_umin_v16i32(i32* %arg) { ; CHECK-LABEL: bcast_unfold_umin_v16i32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 +; CHECK-NEXT: vpbroadcastd {{.*#+}} zmm0 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB86_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vmovdqu64 4096(%rdi,%rax), %zmm0 -; CHECK-NEXT: vpminud {{.*}}(%rip){1to16}, %zmm0, %zmm0 -; CHECK-NEXT: vmovdqu64 %zmm0, 4096(%rdi,%rax) +; CHECK-NEXT: vpminud 4096(%rdi,%rax), %zmm0, %zmm1 +; CHECK-NEXT: vmovdqu64 %zmm1, 4096(%rdi,%rax) ; CHECK-NEXT: addq $64, %rax ; CHECK-NEXT: jne .LBB86_1 ; CHECK-NEXT: # %bb.2: # %bb10 @@ -3061,12 +3061,12 @@ define void @bcast_unfold_umin_v4i64(i64* %arg) { ; CHECK-LABEL: bcast_unfold_umin_v4i64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 +; CHECK-NEXT: vpbroadcastq {{.*#+}} ymm0 = [2,2,2,2] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB88_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vmovdqu 8192(%rdi,%rax), %ymm0 -; CHECK-NEXT: vpminuq {{.*}}(%rip){1to4}, %ymm0, %ymm0 -; CHECK-NEXT: vmovdqu %ymm0, 8192(%rdi,%rax) +; CHECK-NEXT: vpminuq 8192(%rdi,%rax), %ymm0, %ymm1 +; CHECK-NEXT: vmovdqu %ymm1, 8192(%rdi,%rax) ; CHECK-NEXT: addq $32, %rax ; CHECK-NEXT: jne .LBB88_1 ; CHECK-NEXT: # %bb.2: # %bb10 @@ -3096,12 +3096,12 @@ define void @bcast_unfold_umin_v8i64(i64* %arg) { ; CHECK-LABEL: bcast_unfold_umin_v8i64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 +; CHECK-NEXT: vpbroadcastq {{.*#+}} zmm0 = [2,2,2,2,2,2,2,2] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB89_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vmovdqu64 8192(%rdi,%rax), %zmm0 -; CHECK-NEXT: vpminuq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; CHECK-NEXT: vmovdqu64 %zmm0, 8192(%rdi,%rax) +; CHECK-NEXT: vpminuq 8192(%rdi,%rax), %zmm0, %zmm1 +; CHECK-NEXT: vmovdqu64 %zmm1, 8192(%rdi,%rax) ; CHECK-NEXT: addq $64, %rax ; CHECK-NEXT: jne .LBB89_1 ; CHECK-NEXT: # %bb.2: # %bb10 @@ -3131,12 +3131,12 @@ define void @bcast_unfold_umax_v4i32(i32* %arg) { ; CHECK-LABEL: bcast_unfold_umax_v4i32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 +; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2,2,2,2] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB90_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vmovdqu 4096(%rdi,%rax), %xmm0 -; CHECK-NEXT: vpmaxud {{.*}}(%rip){1to4}, %xmm0, %xmm0 -; CHECK-NEXT: vmovdqu %xmm0, 4096(%rdi,%rax) +; CHECK-NEXT: vpmaxud 4096(%rdi,%rax), %xmm0, %xmm1 +; CHECK-NEXT: vmovdqu %xmm1, 4096(%rdi,%rax) ; CHECK-NEXT: addq $16, %rax ; CHECK-NEXT: jne .LBB90_1 ; CHECK-NEXT: # %bb.2: # %bb10 @@ -3165,12 +3165,12 @@ define void @bcast_unfold_umax_v8i32(i32* %arg) { ; CHECK-LABEL: bcast_unfold_umax_v8i32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 +; CHECK-NEXT: vpbroadcastd {{.*#+}} ymm0 = [2,2,2,2,2,2,2,2] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB91_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vmovdqu 4096(%rdi,%rax), %ymm0 -; CHECK-NEXT: vpmaxud {{.*}}(%rip){1to8}, %ymm0, %ymm0 -; CHECK-NEXT: vmovdqu %ymm0, 4096(%rdi,%rax) +; CHECK-NEXT: vpmaxud 4096(%rdi,%rax), %ymm0, %ymm1 +; CHECK-NEXT: vmovdqu %ymm1, 4096(%rdi,%rax) ; CHECK-NEXT: addq $32, %rax ; CHECK-NEXT: jne .LBB91_1 ; CHECK-NEXT: # %bb.2: # %bb10 @@ -3200,12 +3200,12 @@ define void @bcast_unfold_umax_v16i32(i32* %arg) { ; CHECK-LABEL: bcast_unfold_umax_v16i32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 +; CHECK-NEXT: vpbroadcastd {{.*#+}} zmm0 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB92_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vmovdqu64 4096(%rdi,%rax), %zmm0 -; CHECK-NEXT: vpmaxud {{.*}}(%rip){1to16}, %zmm0, %zmm0 -; CHECK-NEXT: vmovdqu64 %zmm0, 4096(%rdi,%rax) +; CHECK-NEXT: vpmaxud 4096(%rdi,%rax), %zmm0, %zmm1 +; CHECK-NEXT: vmovdqu64 %zmm1, 4096(%rdi,%rax) ; CHECK-NEXT: addq $64, %rax ; CHECK-NEXT: jne .LBB92_1 ; CHECK-NEXT: # %bb.2: # %bb10 @@ -3269,12 +3269,12 @@ define void @bcast_unfold_umax_v4i64(i64* %arg) { ; CHECK-LABEL: bcast_unfold_umax_v4i64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 +; CHECK-NEXT: vpbroadcastq {{.*#+}} ymm0 = [2,2,2,2] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB94_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vmovdqu 8192(%rdi,%rax), %ymm0 -; CHECK-NEXT: vpmaxuq {{.*}}(%rip){1to4}, %ymm0, %ymm0 -; CHECK-NEXT: vmovdqu %ymm0, 8192(%rdi,%rax) +; CHECK-NEXT: vpmaxuq 8192(%rdi,%rax), %ymm0, %ymm1 +; CHECK-NEXT: vmovdqu %ymm1, 8192(%rdi,%rax) ; CHECK-NEXT: addq $32, %rax ; CHECK-NEXT: jne .LBB94_1 ; CHECK-NEXT: # %bb.2: # %bb10 @@ -3304,12 +3304,12 @@ define void @bcast_unfold_umax_v8i64(i64* %arg) { ; CHECK-LABEL: bcast_unfold_umax_v8i64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 +; CHECK-NEXT: vpbroadcastq {{.*#+}} zmm0 = [2,2,2,2,2,2,2,2] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB95_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vmovdqu64 8192(%rdi,%rax), %zmm0 -; CHECK-NEXT: vpmaxuq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; CHECK-NEXT: vmovdqu64 %zmm0, 8192(%rdi,%rax) +; CHECK-NEXT: vpmaxuq 8192(%rdi,%rax), %zmm0, %zmm1 +; CHECK-NEXT: vmovdqu64 %zmm1, 8192(%rdi,%rax) ; CHECK-NEXT: addq $64, %rax ; CHECK-NEXT: jne .LBB95_1 ; CHECK-NEXT: # %bb.2: # %bb10 |