diff options
author | Chandler Carruth <chandlerc@gmail.com> | 2014-10-03 00:44:46 +0000 |
---|---|---|
committer | Chandler Carruth <chandlerc@gmail.com> | 2014-10-03 00:44:46 +0000 |
commit | 666ee3f7ab6acdf20bd1e51aab1535b3bf323a34 (patch) | |
tree | 3a9de47ac109a4ba2ce8f30f369bcf2fbb3c175f | |
parent | f12e1ab3131827d167aef49d6b1b9ac07a4fb554 (diff) | |
download | bcm5719-llvm-666ee3f7ab6acdf20bd1e51aab1535b3bf323a34.tar.gz bcm5719-llvm-666ee3f7ab6acdf20bd1e51aab1535b3bf323a34.zip |
[x86] Regenerate an avx512 test with my script to provide a nice
baseline for updates from the new vector shuffle lowering.
I've inspected the results here, and I couldn't find any register
allocation decisions where there should be any realistic way to register
allocate things differently. The closest was the imul test case. If you
see something here you'd like register number variables on, just shout
and I'll add them.
llvm-svn: 218935
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-arith.ll | 322 |
1 files changed, 192 insertions, 130 deletions
diff --git a/llvm/test/CodeGen/X86/avx512-arith.ll b/llvm/test/CodeGen/X86/avx512-arith.ll index e6cffba2732..c43da9c03a6 100644 --- a/llvm/test/CodeGen/X86/avx512-arith.ll +++ b/llvm/test/CodeGen/X86/avx512-arith.ll @@ -1,189 +1,217 @@ ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s -; CHECK-LABEL: addpd512 -; CHECK: vaddpd -; CHECK: ret define <8 x double> @addpd512(<8 x double> %y, <8 x double> %x) { +; CHECK-LABEL: addpd512: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: retq entry: %add.i = fadd <8 x double> %x, %y ret <8 x double> %add.i } -; CHECK-LABEL: addpd512fold -; CHECK: vaddpd LCP{{.*}}(%rip) -; CHECK: ret define <8 x double> @addpd512fold(<8 x double> %y) { +; CHECK-LABEL: addpd512fold: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vaddpd {{.*}}(%rip), %zmm0, %zmm0 +; CHECK-NEXT: retq entry: %add.i = fadd <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.800000e+00, double 2.300000e+00, double 1.200000e+00> ret <8 x double> %add.i } -; CHECK-LABEL: addps512 -; CHECK: vaddps -; CHECK: ret define <16 x float> @addps512(<16 x float> %y, <16 x float> %x) { +; CHECK-LABEL: addps512: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: retq entry: %add.i = fadd <16 x float> %x, %y ret <16 x float> %add.i } -; CHECK-LABEL: addps512fold -; CHECK: vaddps LCP{{.*}}(%rip) -; CHECK: ret define <16 x float> @addps512fold(<16 x float> %y) { +; CHECK-LABEL: addps512fold: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vaddps {{.*}}(%rip), %zmm0, %zmm0 +; CHECK-NEXT: retq entry: %add.i = fadd <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 4.500000e+00, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000> ret <16 x float> %add.i } -; CHECK-LABEL: subpd512 -; CHECK: vsubpd -; CHECK: ret define <8 x double> @subpd512(<8 x double> %y, <8 x double> %x) { +; CHECK-LABEL: subpd512: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vsubpd %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: retq entry: %sub.i = fsub <8 x double> %x, %y ret <8 x double> %sub.i } -; CHECK-LABEL: @subpd512fold -; CHECK: vsubpd (% -; CHECK: ret define <8 x double> @subpd512fold(<8 x double> %y, <8 x double>* %x) { +; CHECK-LABEL: subpd512fold: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vsubpd (%rdi), %zmm0, %zmm0 +; CHECK-NEXT: retq entry: %tmp2 = load <8 x double>* %x, align 8 %sub.i = fsub <8 x double> %y, %tmp2 ret <8 x double> %sub.i } -; CHECK-LABEL: @subps512 -; CHECK: vsubps -; CHECK: ret define <16 x float> @subps512(<16 x float> %y, <16 x float> %x) { +; CHECK-LABEL: subps512: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vsubps %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: retq entry: %sub.i = fsub <16 x float> %x, %y ret <16 x float> %sub.i } -; CHECK-LABEL: subps512fold -; CHECK: vsubps (% -; CHECK: ret define <16 x float> @subps512fold(<16 x float> %y, <16 x float>* %x) { +; CHECK-LABEL: subps512fold: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vsubps (%rdi), %zmm0, %zmm0 +; CHECK-NEXT: retq entry: %tmp2 = load <16 x float>* %x, align 4 %sub.i = fsub <16 x float> %y, %tmp2 ret <16 x float> %sub.i } -; CHECK-LABEL: imulq512 -; CHECK: vpmuludq -; CHECK: vpmuludq -; CHECK: ret define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) { +; CHECK-LABEL: imulq512: +; CHECK: ## BB#0: +; CHECK-NEXT: vpmuludq %zmm0, %zmm1, %zmm2 +; CHECK-NEXT: vpsrlq $32, %zmm0, %zmm3 +; CHECK-NEXT: vpmuludq %zmm3, %zmm1, %zmm3 +; CHECK-NEXT: vpsllq $32, %zmm3, %zmm3 +; CHECK-NEXT: vpaddq %zmm3, %zmm2, %zmm2 +; CHECK-NEXT: vpsrlq $32, %zmm1, %zmm1 +; CHECK-NEXT: vpmuludq %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: vpsllq $32, %zmm0, %zmm0 +; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0 +; CHECK-NEXT: retq %z = mul <8 x i64>%x, %y ret <8 x i64>%z } -; CHECK-LABEL: mulpd512 -; CHECK: vmulpd -; CHECK: ret define <8 x double> @mulpd512(<8 x double> %y, <8 x double> %x) { +; CHECK-LABEL: mulpd512: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vmulpd %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: retq entry: %mul.i = fmul <8 x double> %x, %y ret <8 x double> %mul.i } -; CHECK-LABEL: mulpd512fold -; CHECK: vmulpd LCP{{.*}}(%rip) -; CHECK: ret define <8 x double> @mulpd512fold(<8 x double> %y) { +; CHECK-LABEL: mulpd512fold: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vmulpd {{.*}}(%rip), %zmm0, %zmm0 +; CHECK-NEXT: retq entry: %mul.i = fmul <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00> ret <8 x double> %mul.i } -; CHECK-LABEL: mulps512 -; CHECK: vmulps -; CHECK: ret define <16 x float> @mulps512(<16 x float> %y, <16 x float> %x) { +; CHECK-LABEL: mulps512: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vmulps %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: retq entry: %mul.i = fmul <16 x float> %x, %y ret <16 x float> %mul.i } -; CHECK-LABEL: mulps512fold -; CHECK: vmulps LCP{{.*}}(%rip) -; CHECK: ret define <16 x float> @mulps512fold(<16 x float> %y) { +; CHECK-LABEL: mulps512fold: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 +; CHECK-NEXT: retq entry: %mul.i = fmul <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000> ret <16 x float> %mul.i } -; CHECK-LABEL: divpd512 -; CHECK: vdivpd -; CHECK: ret define <8 x double> @divpd512(<8 x double> %y, <8 x double> %x) { +; CHECK-LABEL: divpd512: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vdivpd %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: retq entry: %div.i = fdiv <8 x double> %x, %y ret <8 x double> %div.i } -; CHECK-LABEL: divpd512fold -; CHECK: vdivpd LCP{{.*}}(%rip) -; CHECK: ret define <8 x double> @divpd512fold(<8 x double> %y) { +; CHECK-LABEL: divpd512fold: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vdivpd {{.*}}(%rip), %zmm0, %zmm0 +; CHECK-NEXT: retq entry: %div.i = fdiv <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00> ret <8 x double> %div.i } -; CHECK-LABEL: divps512 -; CHECK: vdivps -; CHECK: ret define <16 x float> @divps512(<16 x float> %y, <16 x float> %x) { +; CHECK-LABEL: divps512: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vdivps %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: retq entry: %div.i = fdiv <16 x float> %x, %y ret <16 x float> %div.i } -; CHECK-LABEL: divps512fold -; CHECK: vdivps LCP{{.*}}(%rip) -; CHECK: ret define <16 x float> @divps512fold(<16 x float> %y) { +; CHECK-LABEL: divps512fold: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vdivps {{.*}}(%rip), %zmm0, %zmm0 +; CHECK-NEXT: retq entry: %div.i = fdiv <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000> ret <16 x float> %div.i } -; CHECK-LABEL: vpaddq_test -; CHECK: vpaddq %zmm -; CHECK: ret define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone { +; CHECK-LABEL: vpaddq_test: +; CHECK: ## BB#0: +; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: retq %x = add <8 x i64> %i, %j ret <8 x i64> %x } -; CHECK-LABEL: vpaddq_fold_test -; CHECK: vpaddq (% -; CHECK: ret define <8 x i64> @vpaddq_fold_test(<8 x i64> %i, <8 x i64>* %j) nounwind { +; CHECK-LABEL: vpaddq_fold_test: +; CHECK: ## BB#0: +; CHECK-NEXT: vpaddq (%rdi), %zmm0, %zmm0 +; CHECK-NEXT: retq %tmp = load <8 x i64>* %j, align 4 %x = add <8 x i64> %i, %tmp ret <8 x i64> %x } -; CHECK-LABEL: vpaddq_broadcast_test -; CHECK: vpaddq LCP{{.*}}(%rip){1to8} -; CHECK: ret define <8 x i64> @vpaddq_broadcast_test(<8 x i64> %i) nounwind { +; CHECK-LABEL: vpaddq_broadcast_test: +; CHECK: ## BB#0: +; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; CHECK-NEXT: retq %x = add <8 x i64> %i, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1> ret <8 x i64> %x } -; CHECK-LABEL: vpaddq_broadcast2_test -; CHECK: vpaddq (%rdi){1to8} -; CHECK: ret define <8 x i64> @vpaddq_broadcast2_test(<8 x i64> %i, i64* %j) nounwind { +; CHECK-LABEL: vpaddq_broadcast2_test: +; CHECK: ## BB#0: +; CHECK-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0 +; CHECK-NEXT: retq %tmp = load i64* %j %j.0 = insertelement <8 x i64> undef, i64 %tmp, i32 0 %j.1 = insertelement <8 x i64> %j.0, i64 %tmp, i32 1 @@ -197,55 +225,67 @@ define <8 x i64> @vpaddq_broadcast2_test(<8 x i64> %i, i64* %j) nounwind { ret <8 x i64> %x } -; CHECK-LABEL: vpaddd_test -; CHECK: vpaddd %zmm -; CHECK: ret define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone { +; CHECK-LABEL: vpaddd_test: +; CHECK: ## BB#0: +; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: retq %x = add <16 x i32> %i, %j ret <16 x i32> %x } -; CHECK-LABEL: vpaddd_fold_test -; CHECK: vpaddd (% -; CHECK: ret define <16 x i32> @vpaddd_fold_test(<16 x i32> %i, <16 x i32>* %j) nounwind { +; CHECK-LABEL: vpaddd_fold_test: +; CHECK: ## BB#0: +; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 +; CHECK-NEXT: retq %tmp = load <16 x i32>* %j, align 4 %x = add <16 x i32> %i, %tmp ret <16 x i32> %x } -; CHECK-LABEL: vpaddd_broadcast_test -; CHECK: vpaddd LCP{{.*}}(%rip){1to16} -; CHECK: ret define <16 x i32> @vpaddd_broadcast_test(<16 x i32> %i) nounwind { +; CHECK-LABEL: vpaddd_broadcast_test: +; CHECK: ## BB#0: +; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 +; CHECK-NEXT: retq %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> ret <16 x i32> %x } -; CHECK-LABEL: vpaddd_mask_test -; CHECK: vpaddd {{%zmm[0-9], %zmm[0-9], %zmm[0-9] {%k[1-7]}}} -; CHECK: ret define <16 x i32> @vpaddd_mask_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone { +; CHECK-LABEL: vpaddd_mask_test: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %zmm3, %zmm3, %zmm3 +; CHECK-NEXT: vpcmpneqd %zmm3, %zmm2, %k1 +; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} +; CHECK-NEXT: retq %mask = icmp ne <16 x i32> %mask1, zeroinitializer %x = add <16 x i32> %i, %j %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i ret <16 x i32> %r } -; CHECK-LABEL: vpaddd_maskz_test -; CHECK: vpaddd {{%zmm[0-9], %zmm[0-9], %zmm[0-9] {%k[1-7]} {z}}} -; CHECK: ret define <16 x i32> @vpaddd_maskz_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone { +; CHECK-LABEL: vpaddd_maskz_test: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %zmm3, %zmm3, %zmm3 +; CHECK-NEXT: vpcmpneqd %zmm3, %zmm2, %k1 +; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} +; CHECK-NEXT: retq %mask = icmp ne <16 x i32> %mask1, zeroinitializer %x = add <16 x i32> %i, %j %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer ret <16 x i32> %r } -; CHECK-LABEL: vpaddd_mask_fold_test -; CHECK: vpaddd (%rdi), {{%zmm[0-9], %zmm[0-9] {%k[1-7]}}} -; CHECK: ret define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone { +; CHECK-LABEL: vpaddd_mask_fold_test: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 +; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 +; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} +; CHECK-NEXT: retq %mask = icmp ne <16 x i32> %mask1, zeroinitializer %j = load <16 x i32>* %j.ptr %x = add <16 x i32> %i, %j @@ -253,20 +293,26 @@ define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 ret <16 x i32> %r } -; CHECK-LABEL: vpaddd_mask_broadcast_test -; CHECK: vpaddd LCP{{.*}}(%rip){1to16}, {{%zmm[0-9], %zmm[0-9] {%k[1-7]}}} -; CHECK: ret define <16 x i32> @vpaddd_mask_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone { +; CHECK-LABEL: vpaddd_mask_broadcast_test: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 +; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 +; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} +; CHECK-NEXT: retq %mask = icmp ne <16 x i32> %mask1, zeroinitializer %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i ret <16 x i32> %r } -; CHECK-LABEL: vpaddd_maskz_fold_test -; CHECK: vpaddd (%rdi), {{%zmm[0-9], %zmm[0-9] {%k[1-7]}}} {z} -; CHECK: ret define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone { +; CHECK-LABEL: vpaddd_maskz_fold_test: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 +; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 +; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} +; CHECK-NEXT: retq %mask = icmp ne <16 x i32> %mask1, zeroinitializer %j = load <16 x i32>* %j.ptr %x = add <16 x i32> %i, %j @@ -274,125 +320,141 @@ define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 ret <16 x i32> %r } -; CHECK-LABEL: vpaddd_maskz_broadcast_test -; CHECK: vpaddd LCP{{.*}}(%rip){1to16}, {{%zmm[0-9], %zmm[0-9] {%k[1-7]}}} {z} -; CHECK: ret define <16 x i32> @vpaddd_maskz_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone { +; CHECK-LABEL: vpaddd_maskz_broadcast_test: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 +; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 +; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z} +; CHECK-NEXT: retq %mask = icmp ne <16 x i32> %mask1, zeroinitializer %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer ret <16 x i32> %r } -; CHECK-LABEL: vpsubq_test -; CHECK: vpsubq %zmm -; CHECK: ret define <8 x i64> @vpsubq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone { +; CHECK-LABEL: vpsubq_test: +; CHECK: ## BB#0: +; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: retq %x = sub <8 x i64> %i, %j ret <8 x i64> %x } -; CHECK-LABEL: vpsubd_test -; CHECK: vpsubd -; CHECK: ret define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone { +; CHECK-LABEL: vpsubd_test: +; CHECK: ## BB#0: +; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: retq %x = sub <16 x i32> %i, %j ret <16 x i32> %x } -; CHECK-LABEL: vpmulld_test -; CHECK: vpmulld %zmm -; CHECK: ret define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) { +; CHECK-LABEL: vpmulld_test: +; CHECK: ## BB#0: +; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: retq %x = mul <16 x i32> %i, %j ret <16 x i32> %x } -; CHECK-LABEL: sqrtA -; CHECK: vsqrtss {{.*}} -; CHECK: ret declare float @sqrtf(float) readnone define float @sqrtA(float %a) nounwind uwtable readnone ssp { +; CHECK-LABEL: sqrtA: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: retq entry: %conv1 = tail call float @sqrtf(float %a) nounwind readnone ret float %conv1 } -; CHECK-LABEL: sqrtB -; CHECK: vsqrtsd {{.*}} -; CHECK: ret declare double @sqrt(double) readnone define double @sqrtB(double %a) nounwind uwtable readnone ssp { +; CHECK-LABEL: sqrtB: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: retq entry: %call = tail call double @sqrt(double %a) nounwind readnone ret double %call } -; CHECK-LABEL: sqrtC -; CHECK: vsqrtss {{.*}} -; CHECK: ret declare float @llvm.sqrt.f32(float) define float @sqrtC(float %a) nounwind { +; CHECK-LABEL: sqrtC: +; CHECK: ## BB#0: +; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: retq %b = call float @llvm.sqrt.f32(float %a) ret float %b } -; CHECK-LABEL: sqrtD -; CHECK: vsqrtps {{.*}} -; CHECK: ret declare <16 x float> @llvm.sqrt.v16f32(<16 x float>) define <16 x float> @sqrtD(<16 x float> %a) nounwind { +; CHECK-LABEL: sqrtD: +; CHECK: ## BB#0: +; CHECK-NEXT: vsqrtps %zmm0, %zmm0 +; CHECK-NEXT: retq %b = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a) ret <16 x float> %b } -; CHECK-LABEL: sqrtE -; CHECK: vsqrtpd {{.*}} -; CHECK: ret declare <8 x double> @llvm.sqrt.v8f64(<8 x double>) define <8 x double> @sqrtE(<8 x double> %a) nounwind { +; CHECK-LABEL: sqrtE: +; CHECK: ## BB#0: +; CHECK-NEXT: vsqrtpd %zmm0, %zmm0 +; CHECK-NEXT: retq %b = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a) ret <8 x double> %b } -; CHECK-LABEL: fadd_broadcast -; CHECK: LCP{{.*}}(%rip){1to16}, %zmm0, %zmm0 -; CHECK: ret define <16 x float> @fadd_broadcast(<16 x float> %a) nounwind { +; CHECK-LABEL: fadd_broadcast: +; CHECK: ## BB#0: +; CHECK-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 +; CHECK-NEXT: retq %b = fadd <16 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000> ret <16 x float> %b } -; CHECK-LABEL: addq_broadcast -; CHECK: vpaddq LCP{{.*}}(%rip){1to8}, %zmm0, %zmm0 -; CHECK: ret define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind { +; CHECK-LABEL: addq_broadcast: +; CHECK: ## BB#0: +; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; CHECK-NEXT: retq %b = add <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2> ret <8 x i64> %b } -; CHECK-LABEL: orq_broadcast -; CHECK: vporq LCP{{.*}}(%rip){1to8}, %zmm0, %zmm0 -; CHECK: ret define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind { +; CHECK-LABEL: orq_broadcast: +; CHECK: ## BB#0: +; CHECK-NEXT: vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; CHECK-NEXT: retq %b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2> ret <8 x i64> %b } -; CHECK-LABEL: andd512fold -; CHECK: vpandd (% -; CHECK: ret define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) { +; CHECK-LABEL: andd512fold: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpandd (%rdi), %zmm0, %zmm0 +; CHECK-NEXT: retq entry: %a = load <16 x i32>* %x, align 4 %b = and <16 x i32> %y, %a ret <16 x i32> %b } -; CHECK-LABEL: andqbrst -; CHECK: vpandq (%rdi){1to8}, %zmm -; CHECK: ret define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) { +; CHECK-LABEL: andqbrst: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpandq (%rdi){1to8}, %zmm0, %zmm0 +; CHECK-NEXT: retq entry: %a = load i64* %ap, align 8 %b = insertelement <8 x i64> undef, i64 %a, i32 0 |