diff options
author | Craig Topper <craig.topper@intel.com> | 2019-08-30 20:04:23 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2019-08-30 20:04:23 +0000 |
commit | 4b61b6476beb2641c289186c6b15e22b8ec2a222 (patch) | |
tree | 155f9d758a74106d5c3adf87e3f8692694c5cda2 | |
parent | 5b4f640499c12565b3f01c4687bb6fc30e01dc99 (diff) | |
download | bcm5719-llvm-4b61b6476beb2641c289186c6b15e22b8ec2a222.tar.gz bcm5719-llvm-4b61b6476beb2641c289186c6b15e22b8ec2a222.zip |
[X86] Fix mul test cases in avx512-broadcast-unfold.ll to not get canonicalized to fadd. Remove the fsub test cases which were also testing fadd.
Not sure how to prevent an fsub by constant getting turned into an fadd by negative constant.
llvm-svn: 370515
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll | 276 |
1 files changed, 37 insertions, 239 deletions
diff --git a/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll b/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll index 08a89e2c791..ed243ae800c 100644 --- a/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll +++ b/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll @@ -1237,15 +1237,15 @@ bb9: ; preds = %bb1 ret void } -define void @bcast_unfold_fsub_v16f32(float* nocapture %arg) { -; CHECK-LABEL: bcast_unfold_fsub_v16f32: +define void @bcast_unfold_fmul_v16f32(float* nocapture %arg) { +; CHECK-LABEL: bcast_unfold_fmul_v16f32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB36_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vmovups 4096(%rdi,%rax), %zmm0 -; CHECK-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 +; CHECK-NEXT: vmulps {{.*}}(%rip){1to16}, %zmm0, %zmm0 ; CHECK-NEXT: vmovups %zmm0, 4096(%rdi,%rax) ; CHECK-NEXT: addq $64, %rax ; CHECK-NEXT: jne .LBB36_1 @@ -1260,7 +1260,7 @@ bb1: ; preds = %bb1, %bb %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp %tmp3 = bitcast float* %tmp2 to <16 x float>* %tmp4 = load <16 x float>, <16 x float>* %tmp3, align 4 - %tmp5 = fsub <16 x float> %tmp4, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00> + %tmp5 = fmul <16 x float> %tmp4, <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00> %tmp6 = bitcast float* %tmp2 to <16 x float>* store <16 x float> %tmp5, <16 x float>* %tmp6, align 4 %tmp7 = add i64 %tmp, 16 @@ -1271,15 +1271,15 @@ bb9: ; preds = %bb1 ret void } -define void @bcast_unfold_fsub_v8f32(float* nocapture %arg) { -; CHECK-LABEL: bcast_unfold_fsub_v8f32: +define void @bcast_unfold_fmul_v8f32(float* nocapture %arg) { +; CHECK-LABEL: bcast_unfold_fmul_v8f32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB37_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vmovups 4096(%rdi,%rax), %ymm0 -; CHECK-NEXT: vaddps {{.*}}(%rip){1to8}, %ymm0, %ymm0 +; CHECK-NEXT: vmulps {{.*}}(%rip){1to8}, %ymm0, %ymm0 ; CHECK-NEXT: vmovups %ymm0, 4096(%rdi,%rax) ; CHECK-NEXT: addq $32, %rax ; CHECK-NEXT: jne .LBB37_1 @@ -1294,7 +1294,7 @@ bb1: ; preds = %bb1, %bb %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp %tmp3 = bitcast float* %tmp2 to <8 x float>* %tmp4 = load <8 x float>, <8 x float>* %tmp3, align 4 - %tmp5 = fsub <8 x float> %tmp4, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00> + %tmp5 = fmul <8 x float> %tmp4, <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00> %tmp6 = bitcast float* %tmp2 to <8 x float>* store <8 x float> %tmp5, <8 x float>* %tmp6, align 4 %tmp7 = add i64 %tmp, 8 @@ -1305,15 +1305,15 @@ bb9: ; preds = %bb1 ret void } -define void @bcast_unfold_fsub_v4f32(float* nocapture %arg) { -; CHECK-LABEL: bcast_unfold_fsub_v4f32: +define void @bcast_unfold_fmul_v4f32(float* nocapture %arg) { +; CHECK-LABEL: bcast_unfold_fmul_v4f32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB38_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vmovups 4096(%rdi,%rax), %xmm0 -; CHECK-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0 +; CHECK-NEXT: vmulps {{.*}}(%rip){1to4}, %xmm0, %xmm0 ; CHECK-NEXT: vmovups %xmm0, 4096(%rdi,%rax) ; CHECK-NEXT: addq $16, %rax ; CHECK-NEXT: jne .LBB38_1 @@ -1327,7 +1327,7 @@ bb1: ; preds = %bb1, %bb %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp %tmp3 = bitcast float* %tmp2 to <4 x float>* %tmp4 = load <4 x float>, <4 x float>* %tmp3, align 4 - %tmp5 = fsub <4 x float> %tmp4, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00> + %tmp5 = fmul <4 x float> %tmp4, <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00> %tmp6 = bitcast float* %tmp2 to <4 x float>* store <4 x float> %tmp5, <4 x float>* %tmp6, align 4 %tmp7 = add i64 %tmp, 4 @@ -1338,15 +1338,15 @@ bb9: ; preds = %bb1 ret void } -define void @bcast_unfold_fsub_v8f64(double* nocapture %arg) { -; CHECK-LABEL: bcast_unfold_fsub_v8f64: +define void @bcast_unfold_fmul_v8f64(double* nocapture %arg) { +; CHECK-LABEL: bcast_unfold_fmul_v8f64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB39_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vmovupd 8192(%rdi,%rax), %zmm0 -; CHECK-NEXT: vaddpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; CHECK-NEXT: vmulpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 ; CHECK-NEXT: vmovupd %zmm0, 8192(%rdi,%rax) ; CHECK-NEXT: addq $64, %rax ; CHECK-NEXT: jne .LBB39_1 @@ -1361,7 +1361,7 @@ bb1: ; preds = %bb1, %bb %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp %tmp3 = bitcast double* %tmp2 to <8 x double>* %tmp4 = load <8 x double>, <8 x double>* %tmp3, align 8 - %tmp5 = fsub <8 x double> %tmp4, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00> + %tmp5 = fmul <8 x double> %tmp4, <double 3.000000e+00, double 3.000000e+00, double 3.000000e+00, double 3.000000e+00, double 3.000000e+00, double 3.000000e+00, double 3.000000e+00, double 3.000000e+00> %tmp6 = bitcast double* %tmp2 to <8 x double>* store <8 x double> %tmp5, <8 x double>* %tmp6, align 8 %tmp7 = add i64 %tmp, 8 @@ -1372,15 +1372,15 @@ bb9: ; preds = %bb1 ret void } -define void @bcast_unfold_fsub_v4f64(double* nocapture %arg) { -; CHECK-LABEL: bcast_unfold_fsub_v4f64: +define void @bcast_unfold_fmul_v4f64(double* nocapture %arg) { +; CHECK-LABEL: bcast_unfold_fmul_v4f64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB40_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vmovupd 8192(%rdi,%rax), %ymm0 -; CHECK-NEXT: vaddpd {{.*}}(%rip){1to4}, %ymm0, %ymm0 +; CHECK-NEXT: vmulpd {{.*}}(%rip){1to4}, %ymm0, %ymm0 ; CHECK-NEXT: vmovupd %ymm0, 8192(%rdi,%rax) ; CHECK-NEXT: addq $32, %rax ; CHECK-NEXT: jne .LBB40_1 @@ -1395,7 +1395,7 @@ bb1: ; preds = %bb1, %bb %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp %tmp3 = bitcast double* %tmp2 to <4 x double>* %tmp4 = load <4 x double>, <4 x double>* %tmp3, align 8 - %tmp5 = fsub <4 x double> %tmp4, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00> + %tmp5 = fmul <4 x double> %tmp4, <double 3.000000e+00, double 3.000000e+00, double 3.000000e+00, double 3.000000e+00> %tmp6 = bitcast double* %tmp2 to <4 x double>* store <4 x double> %tmp5, <4 x double>* %tmp6, align 8 %tmp7 = add i64 %tmp, 4 @@ -1406,15 +1406,15 @@ bb9: ; preds = %bb1 ret void } -define void @bcast_unfold_fsub_v2f64(double* nocapture %arg) { -; CHECK-LABEL: bcast_unfold_fsub_v2f64: +define void @bcast_unfold_fmul_v2f64(double* nocapture %arg) { +; CHECK-LABEL: bcast_unfold_fmul_v2f64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 -; CHECK-NEXT: vmovapd {{.*#+}} xmm0 = [-2.0E+0,-2.0E+0] +; CHECK-NEXT: vmovapd {{.*#+}} xmm0 = [3.0E+0,3.0E+0] ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB41_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vaddpd 8192(%rdi,%rax), %xmm0, %xmm1 +; CHECK-NEXT: vmulpd 8192(%rdi,%rax), %xmm0, %xmm1 ; CHECK-NEXT: vmovupd %xmm1, 8192(%rdi,%rax) ; CHECK-NEXT: addq $16, %rax ; CHECK-NEXT: jne .LBB41_1 @@ -1428,209 +1428,7 @@ bb1: ; preds = %bb1, %bb %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp %tmp3 = bitcast double* %tmp2 to <2 x double>* %tmp4 = load <2 x double>, <2 x double>* %tmp3, align 8 - %tmp5 = fsub <2 x double> %tmp4, <double 2.000000e+00, double 2.000000e+00> - %tmp6 = bitcast double* %tmp2 to <2 x double>* - store <2 x double> %tmp5, <2 x double>* %tmp6, align 8 - %tmp7 = add i64 %tmp, 2 - %tmp8 = icmp eq i64 %tmp7, 1024 - br i1 %tmp8, label %bb9, label %bb1 - -bb9: ; preds = %bb1 - ret void -} - -define void @bcast_unfold_fmul_v16f32(float* nocapture %arg) { -; CHECK-LABEL: bcast_unfold_fmul_v16f32: -; CHECK: # %bb.0: # %bb -; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB42_1: # %bb1 -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vmovups 4096(%rdi,%rax), %zmm0 -; CHECK-NEXT: vaddps %zmm0, %zmm0, %zmm0 -; CHECK-NEXT: vmovups %zmm0, 4096(%rdi,%rax) -; CHECK-NEXT: addq $64, %rax -; CHECK-NEXT: jne .LBB42_1 -; CHECK-NEXT: # %bb.2: # %bb9 -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -bb: - br label %bb1 - -bb1: ; preds = %bb1, %bb - %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ] - %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp - %tmp3 = bitcast float* %tmp2 to <16 x float>* - %tmp4 = load <16 x float>, <16 x float>* %tmp3, align 4 - %tmp5 = fmul <16 x float> %tmp4, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00> - %tmp6 = bitcast float* %tmp2 to <16 x float>* - store <16 x float> %tmp5, <16 x float>* %tmp6, align 4 - %tmp7 = add i64 %tmp, 16 - %tmp8 = icmp eq i64 %tmp7, 1024 - br i1 %tmp8, label %bb9, label %bb1 - -bb9: ; preds = %bb1 - ret void -} - -define void @bcast_unfold_fmul_v8f32(float* nocapture %arg) { -; CHECK-LABEL: bcast_unfold_fmul_v8f32: -; CHECK: # %bb.0: # %bb -; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB43_1: # %bb1 -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vmovups 4096(%rdi,%rax), %ymm0 -; CHECK-NEXT: vaddps %ymm0, %ymm0, %ymm0 -; CHECK-NEXT: vmovups %ymm0, 4096(%rdi,%rax) -; CHECK-NEXT: addq $32, %rax -; CHECK-NEXT: jne .LBB43_1 -; CHECK-NEXT: # %bb.2: # %bb9 -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -bb: - br label %bb1 - -bb1: ; preds = %bb1, %bb - %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ] - %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp - %tmp3 = bitcast float* %tmp2 to <8 x float>* - %tmp4 = load <8 x float>, <8 x float>* %tmp3, align 4 - %tmp5 = fmul <8 x float> %tmp4, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00> - %tmp6 = bitcast float* %tmp2 to <8 x float>* - store <8 x float> %tmp5, <8 x float>* %tmp6, align 4 - %tmp7 = add i64 %tmp, 8 - %tmp8 = icmp eq i64 %tmp7, 1024 - br i1 %tmp8, label %bb9, label %bb1 - -bb9: ; preds = %bb1 - ret void -} - -define void @bcast_unfold_fmul_v4f32(float* nocapture %arg) { -; CHECK-LABEL: bcast_unfold_fmul_v4f32: -; CHECK: # %bb.0: # %bb -; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB44_1: # %bb1 -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vmovups 4096(%rdi,%rax), %xmm0 -; CHECK-NEXT: vaddps %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: vmovups %xmm0, 4096(%rdi,%rax) -; CHECK-NEXT: addq $16, %rax -; CHECK-NEXT: jne .LBB44_1 -; CHECK-NEXT: # %bb.2: # %bb9 -; CHECK-NEXT: retq -bb: - br label %bb1 - -bb1: ; preds = %bb1, %bb - %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ] - %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp - %tmp3 = bitcast float* %tmp2 to <4 x float>* - %tmp4 = load <4 x float>, <4 x float>* %tmp3, align 4 - %tmp5 = fmul <4 x float> %tmp4, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00> - %tmp6 = bitcast float* %tmp2 to <4 x float>* - store <4 x float> %tmp5, <4 x float>* %tmp6, align 4 - %tmp7 = add i64 %tmp, 4 - %tmp8 = icmp eq i64 %tmp7, 1024 - br i1 %tmp8, label %bb9, label %bb1 - -bb9: ; preds = %bb1 - ret void -} - -define void @bcast_unfold_fmul_v8f64(double* nocapture %arg) { -; CHECK-LABEL: bcast_unfold_fmul_v8f64: -; CHECK: # %bb.0: # %bb -; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB45_1: # %bb1 -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vmovupd 8192(%rdi,%rax), %zmm0 -; CHECK-NEXT: vaddpd %zmm0, %zmm0, %zmm0 -; CHECK-NEXT: vmovupd %zmm0, 8192(%rdi,%rax) -; CHECK-NEXT: addq $64, %rax -; CHECK-NEXT: jne .LBB45_1 -; CHECK-NEXT: # %bb.2: # %bb9 -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -bb: - br label %bb1 - -bb1: ; preds = %bb1, %bb - %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ] - %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp - %tmp3 = bitcast double* %tmp2 to <8 x double>* - %tmp4 = load <8 x double>, <8 x double>* %tmp3, align 8 - %tmp5 = fmul <8 x double> %tmp4, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00> - %tmp6 = bitcast double* %tmp2 to <8 x double>* - store <8 x double> %tmp5, <8 x double>* %tmp6, align 8 - %tmp7 = add i64 %tmp, 8 - %tmp8 = icmp eq i64 %tmp7, 1024 - br i1 %tmp8, label %bb9, label %bb1 - -bb9: ; preds = %bb1 - ret void -} - -define void @bcast_unfold_fmul_v4f64(double* nocapture %arg) { -; CHECK-LABEL: bcast_unfold_fmul_v4f64: -; CHECK: # %bb.0: # %bb -; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB46_1: # %bb1 -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vmovupd 8192(%rdi,%rax), %ymm0 -; CHECK-NEXT: vaddpd %ymm0, %ymm0, %ymm0 -; CHECK-NEXT: vmovupd %ymm0, 8192(%rdi,%rax) -; CHECK-NEXT: addq $32, %rax -; CHECK-NEXT: jne .LBB46_1 -; CHECK-NEXT: # %bb.2: # %bb9 -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -bb: - br label %bb1 - -bb1: ; preds = %bb1, %bb - %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ] - %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp - %tmp3 = bitcast double* %tmp2 to <4 x double>* - %tmp4 = load <4 x double>, <4 x double>* %tmp3, align 8 - %tmp5 = fmul <4 x double> %tmp4, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00> - %tmp6 = bitcast double* %tmp2 to <4 x double>* - store <4 x double> %tmp5, <4 x double>* %tmp6, align 8 - %tmp7 = add i64 %tmp, 4 - %tmp8 = icmp eq i64 %tmp7, 1024 - br i1 %tmp8, label %bb9, label %bb1 - -bb9: ; preds = %bb1 - ret void -} - -define void @bcast_unfold_fmul_v2f64(double* nocapture %arg) { -; CHECK-LABEL: bcast_unfold_fmul_v2f64: -; CHECK: # %bb.0: # %bb -; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB47_1: # %bb1 -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vmovupd 8192(%rdi,%rax), %xmm0 -; CHECK-NEXT: vaddpd %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: vmovupd %xmm0, 8192(%rdi,%rax) -; CHECK-NEXT: addq $16, %rax -; CHECK-NEXT: jne .LBB47_1 -; CHECK-NEXT: # %bb.2: # %bb9 -; CHECK-NEXT: retq -bb: - br label %bb1 - -bb1: ; preds = %bb1, %bb - %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ] - %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp - %tmp3 = bitcast double* %tmp2 to <2 x double>* - %tmp4 = load <2 x double>, <2 x double>* %tmp3, align 8 - %tmp5 = fmul <2 x double> %tmp4, <double 2.000000e+00, double 2.000000e+00> + %tmp5 = fmul <2 x double> %tmp4, <double 3.000000e+00, double 3.000000e+00> %tmp6 = bitcast double* %tmp2 to <2 x double>* store <2 x double> %tmp5, <2 x double>* %tmp6, align 8 %tmp7 = add i64 %tmp, 2 @@ -1646,13 +1444,13 @@ define void @bcast_unfold_fdiv_v16f32(float* nocapture %arg) { ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB48_1: # %bb1 +; CHECK-NEXT: .LBB42_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vmovups 4096(%rdi,%rax), %zmm0 ; CHECK-NEXT: vdivps {{.*}}(%rip){1to16}, %zmm0, %zmm0 ; CHECK-NEXT: vmovups %zmm0, 4096(%rdi,%rax) ; CHECK-NEXT: addq $64, %rax -; CHECK-NEXT: jne .LBB48_1 +; CHECK-NEXT: jne .LBB42_1 ; CHECK-NEXT: # %bb.2: # %bb9 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -1680,13 +1478,13 @@ define void @bcast_unfold_fdiv_v8f32(float* nocapture %arg) { ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB49_1: # %bb1 +; CHECK-NEXT: .LBB43_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vmovups 4096(%rdi,%rax), %ymm0 ; CHECK-NEXT: vdivps {{.*}}(%rip){1to8}, %ymm0, %ymm0 ; CHECK-NEXT: vmovups %ymm0, 4096(%rdi,%rax) ; CHECK-NEXT: addq $32, %rax -; CHECK-NEXT: jne .LBB49_1 +; CHECK-NEXT: jne .LBB43_1 ; CHECK-NEXT: # %bb.2: # %bb9 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -1714,13 +1512,13 @@ define void @bcast_unfold_fdiv_v4f32(float* nocapture %arg) { ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB50_1: # %bb1 +; CHECK-NEXT: .LBB44_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vmovups 4096(%rdi,%rax), %xmm0 ; CHECK-NEXT: vdivps {{.*}}(%rip){1to4}, %xmm0, %xmm0 ; CHECK-NEXT: vmovups %xmm0, 4096(%rdi,%rax) ; CHECK-NEXT: addq $16, %rax -; CHECK-NEXT: jne .LBB50_1 +; CHECK-NEXT: jne .LBB44_1 ; CHECK-NEXT: # %bb.2: # %bb9 ; CHECK-NEXT: retq bb: @@ -1747,13 +1545,13 @@ define void @bcast_unfold_fdiv_v8f64(double* nocapture %arg) { ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB51_1: # %bb1 +; CHECK-NEXT: .LBB45_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vmovupd 8192(%rdi,%rax), %zmm0 ; CHECK-NEXT: vdivpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 ; CHECK-NEXT: vmovupd %zmm0, 8192(%rdi,%rax) ; CHECK-NEXT: addq $64, %rax -; CHECK-NEXT: jne .LBB51_1 +; CHECK-NEXT: jne .LBB45_1 ; CHECK-NEXT: # %bb.2: # %bb9 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -1781,13 +1579,13 @@ define void @bcast_unfold_fdiv_v4f64(double* nocapture %arg) { ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB52_1: # %bb1 +; CHECK-NEXT: .LBB46_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vmovupd 8192(%rdi,%rax), %ymm0 ; CHECK-NEXT: vdivpd {{.*}}(%rip){1to4}, %ymm0, %ymm0 ; CHECK-NEXT: vmovupd %ymm0, 8192(%rdi,%rax) ; CHECK-NEXT: addq $32, %rax -; CHECK-NEXT: jne .LBB52_1 +; CHECK-NEXT: jne .LBB46_1 ; CHECK-NEXT: # %bb.2: # %bb9 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -1816,13 +1614,13 @@ define void @bcast_unfold_fdiv_v2f64(double* nocapture %arg) { ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 ; CHECK-NEXT: vmovapd {{.*#+}} xmm0 = [2.0E+0,2.0E+0] ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB53_1: # %bb1 +; CHECK-NEXT: .LBB47_1: # %bb1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vmovupd 8192(%rdi,%rax), %xmm1 ; CHECK-NEXT: vdivpd %xmm0, %xmm1, %xmm1 ; CHECK-NEXT: vmovupd %xmm1, 8192(%rdi,%rax) ; CHECK-NEXT: addq $16, %rax -; CHECK-NEXT: jne .LBB53_1 +; CHECK-NEXT: jne .LBB47_1 ; CHECK-NEXT: # %bb.2: # %bb9 ; CHECK-NEXT: retq bb: |