diff options
Diffstat (limited to 'llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll')
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll | 391 |
1 files changed, 391 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll index 7b73a55c889..3a1bdf1ccc1 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll @@ -2562,6 +2562,397 @@ entry: %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer ret <8 x i64> %2 } +define <4 x float> @test_mm_mask_add_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { +; X32-LABEL: test_mm_mask_add_ss: +; X32: # %bb.0: # %entry +; X32-NEXT: movb {{[0-9]+}}(%esp), %al +; X32-NEXT: kmovw %eax, %k1 +; X32-NEXT: vaddss %xmm2, %xmm1, %xmm0 {%k1} +; X32-NEXT: retl +; +; X64-LABEL: test_mm_mask_add_ss: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vaddss %xmm2, %xmm1, %xmm0 {%k1} +; X64-NEXT: retq +entry: + %vecext.i.i = extractelement <4 x float> %__B, i32 0 + %vecext1.i.i = extractelement <4 x float> %__A, i32 0 + %add.i.i = fadd float %vecext1.i.i, %vecext.i.i + %0 = and i8 %__U, 1 + %tobool.i = icmp eq i8 %0, 0 + %vecext1.i = extractelement <4 x float> %__W, i32 0 + %cond.i = select i1 %tobool.i, float %vecext1.i, float %add.i.i + %vecins.i = insertelement <4 x float> %__A, float %cond.i, i32 0 + ret <4 x float> %vecins.i +} + +define <4 x float> @test_mm_maskz_add_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { +; X32-LABEL: test_mm_maskz_add_ss: +; X32: # %bb.0: # %entry +; X32-NEXT: movb {{[0-9]+}}(%esp), %al +; X32-NEXT: kmovw %eax, %k1 +; X32-NEXT: vaddss %xmm1, %xmm0, %xmm0 {%k1} {z} +; X32-NEXT: retl +; +; X64-LABEL: test_mm_maskz_add_ss: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vaddss %xmm1, %xmm0, %xmm0 {%k1} {z} +; X64-NEXT: retq +entry: + %vecext.i.i = extractelement <4 x float> %__B, i32 0 + %vecext1.i.i = extractelement <4 x float> %__A, i32 0 + %add.i.i = fadd float %vecext1.i.i, %vecext.i.i + %0 = and i8 %__U, 1 + %tobool.i = icmp eq i8 %0, 0 + %cond.i = select i1 %tobool.i, float 0.000000e+00, float %add.i.i + %vecins.i = insertelement <4 x float> %__A, float %cond.i, i32 0 + ret <4 x float> %vecins.i +} + +define <2 x double> @test_mm_mask_add_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { +; X32-LABEL: test_mm_mask_add_sd: +; X32: # %bb.0: # %entry +; X32-NEXT: movb {{[0-9]+}}(%esp), %al +; X32-NEXT: kmovw %eax, %k1 +; X32-NEXT: vaddsd %xmm2, %xmm1, %xmm0 {%k1} +; X32-NEXT: retl +; +; X64-LABEL: test_mm_mask_add_sd: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vaddsd %xmm2, %xmm1, %xmm0 {%k1} +; X64-NEXT: retq +entry: + %vecext.i.i = extractelement <2 x double> %__B, i32 0 + %vecext1.i.i = extractelement <2 x double> %__A, i32 0 + %add.i.i = fadd double %vecext1.i.i, %vecext.i.i + %0 = and i8 %__U, 1 + %tobool.i = icmp eq i8 %0, 0 + %vecext1.i = extractelement <2 x double> %__W, i32 0 + %cond.i = select i1 %tobool.i, double %vecext1.i, double %add.i.i + %vecins.i = insertelement <2 x double> %__A, double %cond.i, i32 0 + ret <2 x double> %vecins.i +} + +define <2 x double> @test_mm_maskz_add_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { +; X32-LABEL: test_mm_maskz_add_sd: +; X32: # %bb.0: # %entry +; X32-NEXT: movb {{[0-9]+}}(%esp), %al +; X32-NEXT: kmovw %eax, %k1 +; X32-NEXT: vaddsd %xmm1, %xmm0, %xmm0 {%k1} {z} +; X32-NEXT: retl +; +; X64-LABEL: test_mm_maskz_add_sd: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vaddsd %xmm1, %xmm0, %xmm0 {%k1} {z} +; X64-NEXT: retq +entry: + %vecext.i.i = extractelement <2 x double> %__B, i32 0 + %vecext1.i.i = extractelement <2 x double> %__A, i32 0 + %add.i.i = fadd double %vecext1.i.i, %vecext.i.i + %0 = and i8 %__U, 1 + %tobool.i = icmp eq i8 %0, 0 + %cond.i = select i1 %tobool.i, double 0.000000e+00, double %add.i.i + %vecins.i = insertelement <2 x double> %__A, double %cond.i, i32 0 + ret <2 x double> %vecins.i +} + +define <4 x float> @test_mm_mask_sub_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { +; X32-LABEL: test_mm_mask_sub_ss: +; X32: # %bb.0: # %entry +; X32-NEXT: movb {{[0-9]+}}(%esp), %al +; X32-NEXT: kmovw %eax, %k1 +; X32-NEXT: vsubss %xmm2, %xmm1, %xmm0 {%k1} +; X32-NEXT: retl +; +; X64-LABEL: test_mm_mask_sub_ss: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vsubss %xmm2, %xmm1, %xmm0 {%k1} +; X64-NEXT: retq +entry: + %vecext.i.i = extractelement <4 x float> %__B, i32 0 + %vecext1.i.i = extractelement <4 x float> %__A, i32 0 + %sub.i.i = fsub float %vecext1.i.i, %vecext.i.i + %0 = and i8 %__U, 1 + %tobool.i = icmp eq i8 %0, 0 + %vecext1.i = extractelement <4 x float> %__W, i32 0 + %cond.i = select i1 %tobool.i, float %vecext1.i, float %sub.i.i + %vecins.i = insertelement <4 x float> %__A, float %cond.i, i32 0 + ret <4 x float> %vecins.i +} + +define <4 x float> @test_mm_maskz_sub_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { +; X32-LABEL: test_mm_maskz_sub_ss: +; X32: # %bb.0: # %entry +; X32-NEXT: movb {{[0-9]+}}(%esp), %al +; X32-NEXT: kmovw %eax, %k1 +; X32-NEXT: vsubss %xmm1, %xmm0, %xmm0 {%k1} {z} +; X32-NEXT: retl +; +; X64-LABEL: test_mm_maskz_sub_ss: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vsubss %xmm1, %xmm0, %xmm0 {%k1} {z} +; X64-NEXT: retq +entry: + %vecext.i.i = extractelement <4 x float> %__B, i32 0 + %vecext1.i.i = extractelement <4 x float> %__A, i32 0 + %sub.i.i = fsub float %vecext1.i.i, %vecext.i.i + %0 = and i8 %__U, 1 + %tobool.i = icmp eq i8 %0, 0 + %cond.i = select i1 %tobool.i, float 0.000000e+00, float %sub.i.i + %vecins.i = insertelement <4 x float> %__A, float %cond.i, i32 0 + ret <4 x float> %vecins.i +} + +define <2 x double> @test_mm_mask_sub_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { +; X32-LABEL: test_mm_mask_sub_sd: +; X32: # %bb.0: # %entry +; X32-NEXT: movb {{[0-9]+}}(%esp), %al +; X32-NEXT: kmovw %eax, %k1 +; X32-NEXT: vsubsd %xmm2, %xmm1, %xmm0 {%k1} +; X32-NEXT: retl +; +; X64-LABEL: test_mm_mask_sub_sd: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vsubsd %xmm2, %xmm1, %xmm0 {%k1} +; X64-NEXT: retq +entry: + %vecext.i.i = extractelement <2 x double> %__B, i32 0 + %vecext1.i.i = extractelement <2 x double> %__A, i32 0 + %sub.i.i = fsub double %vecext1.i.i, %vecext.i.i + %0 = and i8 %__U, 1 + %tobool.i = icmp eq i8 %0, 0 + %vecext1.i = extractelement <2 x double> %__W, i32 0 + %cond.i = select i1 %tobool.i, double %vecext1.i, double %sub.i.i + %vecins.i = insertelement <2 x double> %__A, double %cond.i, i32 0 + ret <2 x double> %vecins.i +} + +define <2 x double> @test_mm_maskz_sub_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { +; X32-LABEL: test_mm_maskz_sub_sd: +; X32: # %bb.0: # %entry +; X32-NEXT: movb {{[0-9]+}}(%esp), %al +; X32-NEXT: kmovw %eax, %k1 +; X32-NEXT: vsubsd %xmm1, %xmm0, %xmm0 {%k1} {z} +; X32-NEXT: retl +; +; X64-LABEL: test_mm_maskz_sub_sd: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vsubsd %xmm1, %xmm0, %xmm0 {%k1} {z} +; X64-NEXT: retq +entry: + %vecext.i.i = extractelement <2 x double> %__B, i32 0 + %vecext1.i.i = extractelement <2 x double> %__A, i32 0 + %sub.i.i = fsub double %vecext1.i.i, %vecext.i.i + %0 = and i8 %__U, 1 + %tobool.i = icmp eq i8 %0, 0 + %cond.i = select i1 %tobool.i, double 0.000000e+00, double %sub.i.i + %vecins.i = insertelement <2 x double> %__A, double %cond.i, i32 0 + ret <2 x double> %vecins.i +} + +define <4 x float> @test_mm_mask_mul_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { +; X32-LABEL: test_mm_mask_mul_ss: +; X32: # %bb.0: # %entry +; X32-NEXT: movb {{[0-9]+}}(%esp), %al +; X32-NEXT: kmovw %eax, %k1 +; X32-NEXT: vmulss %xmm2, %xmm1, %xmm0 {%k1} +; X32-NEXT: retl +; +; X64-LABEL: test_mm_mask_mul_ss: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vmulss %xmm2, %xmm1, %xmm0 {%k1} +; X64-NEXT: retq +entry: + %vecext.i.i = extractelement <4 x float> %__B, i32 0 + %vecext1.i.i = extractelement <4 x float> %__A, i32 0 + %mul.i.i = fmul float %vecext1.i.i, %vecext.i.i + %0 = and i8 %__U, 1 + %tobool.i = icmp eq i8 %0, 0 + %vecext1.i = extractelement <4 x float> %__W, i32 0 + %cond.i = select i1 %tobool.i, float %vecext1.i, float %mul.i.i + %vecins.i = insertelement <4 x float> %__A, float %cond.i, i32 0 + ret <4 x float> %vecins.i +} + +define <4 x float> @test_mm_maskz_mul_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { +; X32-LABEL: test_mm_maskz_mul_ss: +; X32: # %bb.0: # %entry +; X32-NEXT: movb {{[0-9]+}}(%esp), %al +; X32-NEXT: kmovw %eax, %k1 +; X32-NEXT: vmulss %xmm1, %xmm0, %xmm0 {%k1} {z} +; X32-NEXT: retl +; +; X64-LABEL: test_mm_maskz_mul_ss: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vmulss %xmm1, %xmm0, %xmm0 {%k1} {z} +; X64-NEXT: retq +entry: + %vecext.i.i = extractelement <4 x float> %__B, i32 0 + %vecext1.i.i = extractelement <4 x float> %__A, i32 0 + %mul.i.i = fmul float %vecext1.i.i, %vecext.i.i + %0 = and i8 %__U, 1 + %tobool.i = icmp eq i8 %0, 0 + %cond.i = select i1 %tobool.i, float 0.000000e+00, float %mul.i.i + %vecins.i = insertelement <4 x float> %__A, float %cond.i, i32 0 + ret <4 x float> %vecins.i +} + +define <2 x double> @test_mm_mask_mul_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { +; X32-LABEL: test_mm_mask_mul_sd: +; X32: # %bb.0: # %entry +; X32-NEXT: movb {{[0-9]+}}(%esp), %al +; X32-NEXT: kmovw %eax, %k1 +; X32-NEXT: vmulsd %xmm2, %xmm1, %xmm0 {%k1} +; X32-NEXT: retl +; +; X64-LABEL: test_mm_mask_mul_sd: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vmulsd %xmm2, %xmm1, %xmm0 {%k1} +; X64-NEXT: retq +entry: + %vecext.i.i = extractelement <2 x double> %__B, i32 0 + %vecext1.i.i = extractelement <2 x double> %__A, i32 0 + %mul.i.i = fmul double %vecext1.i.i, %vecext.i.i + %0 = and i8 %__U, 1 + %tobool.i = icmp eq i8 %0, 0 + %vecext1.i = extractelement <2 x double> %__W, i32 0 + %cond.i = select i1 %tobool.i, double %vecext1.i, double %mul.i.i + %vecins.i = insertelement <2 x double> %__A, double %cond.i, i32 0 + ret <2 x double> %vecins.i +} + +define <2 x double> @test_mm_maskz_mul_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { +; X32-LABEL: test_mm_maskz_mul_sd: +; X32: # %bb.0: # %entry +; X32-NEXT: movb {{[0-9]+}}(%esp), %al +; X32-NEXT: kmovw %eax, %k1 +; X32-NEXT: vmulsd %xmm1, %xmm0, %xmm0 {%k1} {z} +; X32-NEXT: retl +; +; X64-LABEL: test_mm_maskz_mul_sd: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vmulsd %xmm1, %xmm0, %xmm0 {%k1} {z} +; X64-NEXT: retq +entry: + %vecext.i.i = extractelement <2 x double> %__B, i32 0 + %vecext1.i.i = extractelement <2 x double> %__A, i32 0 + %mul.i.i = fmul double %vecext1.i.i, %vecext.i.i + %0 = and i8 %__U, 1 + %tobool.i = icmp eq i8 %0, 0 + %cond.i = select i1 %tobool.i, double 0.000000e+00, double %mul.i.i + %vecins.i = insertelement <2 x double> %__A, double %cond.i, i32 0 + ret <2 x double> %vecins.i +} + +define <4 x float> @test_mm_mask_div_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { +; X32-LABEL: test_mm_mask_div_ss: +; X32: # %bb.0: # %entry +; X32-NEXT: movb {{[0-9]+}}(%esp), %al +; X32-NEXT: kmovw %eax, %k1 +; X32-NEXT: vdivss %xmm2, %xmm1, %xmm0 {%k1} +; X32-NEXT: retl +; +; X64-LABEL: test_mm_mask_div_ss: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vdivss %xmm2, %xmm1, %xmm0 {%k1} +; X64-NEXT: retq +entry: + %0 = extractelement <4 x float> %__A, i64 0 + %1 = extractelement <4 x float> %__B, i64 0 + %2 = extractelement <4 x float> %__W, i64 0 + %3 = fdiv float %0, %1 + %4 = bitcast i8 %__U to <8 x i1> + %5 = extractelement <8 x i1> %4, i64 0 + %6 = select i1 %5, float %3, float %2 + %7 = insertelement <4 x float> %__A, float %6, i64 0 + ret <4 x float> %7 +} + +define <4 x float> @test_mm_maskz_div_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { +; X32-LABEL: test_mm_maskz_div_ss: +; X32: # %bb.0: # %entry +; X32-NEXT: movb {{[0-9]+}}(%esp), %al +; X32-NEXT: kmovw %eax, %k1 +; X32-NEXT: vdivss %xmm1, %xmm0, %xmm0 {%k1} {z} +; X32-NEXT: retl +; +; X64-LABEL: test_mm_maskz_div_ss: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vdivss %xmm1, %xmm0, %xmm0 {%k1} {z} +; X64-NEXT: retq +entry: + %0 = extractelement <4 x float> %__A, i64 0 + %1 = extractelement <4 x float> %__B, i64 0 + %2 = fdiv float %0, %1 + %3 = bitcast i8 %__U to <8 x i1> + %4 = extractelement <8 x i1> %3, i64 0 + %5 = select i1 %4, float %2, float 0.000000e+00 + %6 = insertelement <4 x float> %__A, float %5, i64 0 + ret <4 x float> %6 +} + +define <2 x double> @test_mm_mask_div_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { +; X32-LABEL: test_mm_mask_div_sd: +; X32: # %bb.0: # %entry +; X32-NEXT: movb {{[0-9]+}}(%esp), %al +; X32-NEXT: kmovw %eax, %k1 +; X32-NEXT: vdivsd %xmm2, %xmm1, %xmm0 {%k1} +; X32-NEXT: retl +; +; X64-LABEL: test_mm_mask_div_sd: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vdivsd %xmm2, %xmm1, %xmm0 {%k1} +; X64-NEXT: retq +entry: + %0 = extractelement <2 x double> %__A, i64 0 + %1 = extractelement <2 x double> %__B, i64 0 + %2 = extractelement <2 x double> %__W, i64 0 + %3 = fdiv double %0, %1 + %4 = bitcast i8 %__U to <8 x i1> + %5 = extractelement <8 x i1> %4, i64 0 + %6 = select i1 %5, double %3, double %2 + %7 = insertelement <2 x double> %__A, double %6, i64 0 + ret <2 x double> %7 +} + +define <2 x double> @test_mm_maskz_div_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { +; X32-LABEL: test_mm_maskz_div_sd: +; X32: # %bb.0: # %entry +; X32-NEXT: movb {{[0-9]+}}(%esp), %al +; X32-NEXT: kmovw %eax, %k1 +; X32-NEXT: vdivsd %xmm1, %xmm0, %xmm0 {%k1} {z} +; X32-NEXT: retl +; +; X64-LABEL: test_mm_maskz_div_sd: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vdivsd %xmm1, %xmm0, %xmm0 {%k1} {z} +; X64-NEXT: retq +entry: + %0 = extractelement <2 x double> %__A, i64 0 + %1 = extractelement <2 x double> %__B, i64 0 + %2 = fdiv double %0, %1 + %3 = bitcast i8 %__U to <8 x i1> + %4 = extractelement <8 x i1> %3, i64 0 + %5 = select i1 %4, double %2, double 0.000000e+00 + %6 = insertelement <2 x double> %__A, double %5, i64 0 + ret <2 x double> %6 +} define <8 x double> @test_mm512_fmadd_round_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) { |