diff options
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512dq-intrinsics-fast-isel.ll | 161 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll | 31 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512dq-intrinsics.ll | 20 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512dqvl-intrinsics-fast-isel.ll | 171 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll | 62 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll | 43 | 
6 files changed, 464 insertions, 24 deletions
diff --git a/llvm/test/CodeGen/X86/avx512dq-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512dq-intrinsics-fast-isel.ll new file mode 100644 index 00000000000..cd86d5e10f6 --- /dev/null +++ b/llvm/test/CodeGen/X86/avx512dq-intrinsics-fast-isel.ll @@ -0,0 +1,161 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=CHECK,X86 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=CHECK,X64 + +; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512dq-builtins.c + +define zeroext i8 @test_mm512_mask_fpclass_pd_mask(i8 zeroext %__U, <8 x double> %__A) { +; X86-LABEL: test_mm512_mask_fpclass_pd_mask: +; X86:       # %bb.0: # %entry +; X86-NEXT:    vfpclasspd $4, %zmm0, %k0 +; X86-NEXT:    kmovw %k0, %eax +; X86-NEXT:    andb {{[0-9]+}}(%esp), %al +; X86-NEXT:    # kill: def $al killed $al killed $eax +; X86-NEXT:    vzeroupper +; X86-NEXT:    retl +; +; X64-LABEL: test_mm512_mask_fpclass_pd_mask: +; X64:       # %bb.0: # %entry +; X64-NEXT:    vfpclasspd $4, %zmm0, %k0 +; X64-NEXT:    kmovw %k0, %eax +; X64-NEXT:    andb %dil, %al +; X64-NEXT:    # kill: def $al killed $al killed $eax +; X64-NEXT:    vzeroupper +; X64-NEXT:    retq +entry: +  %0 = tail call <8 x i1> @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %__A, i32 4) +  %1 = bitcast i8 %__U to <8 x i1> +  %2 = and <8 x i1> %0, %1 +  %3 = bitcast <8 x i1> %2 to i8 +  ret i8 %3 +} + +declare <8 x i1> @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double>, i32) + +define zeroext i8 @test_mm512_fpclass_pd_mask(<8 x double> %__A) { +; CHECK-LABEL: test_mm512_fpclass_pd_mask: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vfpclasspd $4, %zmm0, %k0 +; CHECK-NEXT:    kmovw %k0, %eax +; CHECK-NEXT:    # kill: def $al killed $al killed $eax +; CHECK-NEXT:    vzeroupper +; CHECK-NEXT:    ret{{[l|q]}} +entry: +  %0 = tail call <8 x i1> @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %__A, i32 4) +  %1 = bitcast <8 x i1> %0 to i8 +  ret i8 %1 +} + +define zeroext i16 @test_mm512_mask_fpclass_ps_mask(i16 zeroext %__U, <16 x float> %__A) { +; X86-LABEL: test_mm512_mask_fpclass_ps_mask: +; X86:       # %bb.0: # %entry +; X86-NEXT:    vfpclassps $4, %zmm0, %k0 +; X86-NEXT:    kmovw %k0, %eax +; X86-NEXT:    andw {{[0-9]+}}(%esp), %ax +; X86-NEXT:    # kill: def $ax killed $ax killed $eax +; X86-NEXT:    vzeroupper +; X86-NEXT:    retl +; +; X64-LABEL: test_mm512_mask_fpclass_ps_mask: +; X64:       # %bb.0: # %entry +; X64-NEXT:    vfpclassps $4, %zmm0, %k0 +; X64-NEXT:    kmovw %k0, %eax +; X64-NEXT:    andl %edi, %eax +; X64-NEXT:    # kill: def $ax killed $ax killed $eax +; X64-NEXT:    vzeroupper +; X64-NEXT:    retq +entry: +  %0 = tail call <16 x i1> @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %__A, i32 4) +  %1 = bitcast i16 %__U to <16 x i1> +  %2 = and <16 x i1> %0, %1 +  %3 = bitcast <16 x i1> %2 to i16 +  ret i16 %3 +} + +declare <16 x i1> @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float>, i32) + +define zeroext i16 @test_mm512_fpclass_ps_mask(<16 x float> %__A) { +; CHECK-LABEL: test_mm512_fpclass_ps_mask: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vfpclassps $4, %zmm0, %k0 +; CHECK-NEXT:    kmovw %k0, %eax +; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax +; CHECK-NEXT:    vzeroupper +; CHECK-NEXT:    ret{{[l|q]}} +entry: +  %0 = tail call <16 x i1> @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %__A, i32 4) +  %1 = bitcast <16 x i1> %0 to i16 +  ret i16 %1 +} + +define zeroext i8 @test_mm_fpclass_sd_mask(<4 x float> %__A) { +; CHECK-LABEL: test_mm_fpclass_sd_mask: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vfpclasssd $2, %xmm0, %k0 +; CHECK-NEXT:    kmovw %k0, %eax +; CHECK-NEXT:    # kill: def $al killed $al killed $eax +; CHECK-NEXT:    ret{{[l|q]}} +entry: +  %0 = bitcast <4 x float> %__A to <2 x double> +  %1 = tail call i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double> %0, i32 2, i8 -1) +  ret i8 %1 +} + +declare i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double>, i32, i8) + +define zeroext i8 @test_mm_mask_fpclass_sd_mask(i8 zeroext %__U, <4 x float> %__A) { +; X86-LABEL: test_mm_mask_fpclass_sd_mask: +; X86:       # %bb.0: # %entry +; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 +; X86-NEXT:    vfpclasssd $2, %xmm0, %k0 {%k1} +; X86-NEXT:    kmovw %k0, %eax +; X86-NEXT:    # kill: def $al killed $al killed $eax +; X86-NEXT:    retl +; +; X64-LABEL: test_mm_mask_fpclass_sd_mask: +; X64:       # %bb.0: # %entry +; X64-NEXT:    kmovw %edi, %k1 +; X64-NEXT:    vfpclasssd $2, %xmm0, %k0 {%k1} +; X64-NEXT:    kmovw %k0, %eax +; X64-NEXT:    # kill: def $al killed $al killed $eax +; X64-NEXT:    retq +entry: +  %0 = bitcast <4 x float> %__A to <2 x double> +  %1 = tail call i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double> %0, i32 2, i8 %__U) +  ret i8 %1 +} + +define zeroext i8 @test_mm_fpclass_ss_mask(<4 x float> %__A) { +; CHECK-LABEL: test_mm_fpclass_ss_mask: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vfpclassss $2, %xmm0, %k0 +; CHECK-NEXT:    kmovw %k0, %eax +; CHECK-NEXT:    # kill: def $al killed $al killed $eax +; CHECK-NEXT:    ret{{[l|q]}} +entry: +  %0 = tail call i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float> %__A, i32 2, i8 -1) +  ret i8 %0 +} + +declare i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float>, i32, i8) + +define zeroext i8 @test_mm_mask_fpclass_ss_mask(i8 zeroext %__U, <4 x float> %__A) { +; X86-LABEL: test_mm_mask_fpclass_ss_mask: +; X86:       # %bb.0: # %entry +; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 +; X86-NEXT:    vfpclassss $2, %xmm0, %k0 {%k1} +; X86-NEXT:    kmovw %k0, %eax +; X86-NEXT:    # kill: def $al killed $al killed $eax +; X86-NEXT:    retl +; +; X64-LABEL: test_mm_mask_fpclass_ss_mask: +; X64:       # %bb.0: # %entry +; X64-NEXT:    kmovw %edi, %k1 +; X64-NEXT:    vfpclassss $2, %xmm0, %k0 {%k1} +; X64-NEXT:    kmovw %k0, %eax +; X64-NEXT:    # kill: def $al killed $al killed $eax +; X64-NEXT:    retq +entry: +  %0 = tail call i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float> %__A, i32 2, i8 %__U) +  ret i8 %0 +} diff --git a/llvm/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll index 3105e6d4842..4463a00b2b8 100644 --- a/llvm/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll @@ -536,3 +536,34 @@ define i8@test_int_x86_avx512_cvtq2mask_512(<8 x i64> %x0) {    %res = call i8 @llvm.x86.avx512.cvtq2mask.512(<8 x i64> %x0)    ret i8 %res  } + +declare i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double>, i32, i8) + +define i8 @test_int_x86_avx512_mask_fpclass_pd_512(<8 x double> %x0) { +; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_512: +; CHECK:       # %bb.0: +; CHECK-NEXT:    vfpclasspd $2, %zmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x48,0x66,0xc8,0x02] +; CHECK-NEXT:    vfpclasspd $4, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x66,0xc0,0x04] +; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT:    # kill: def $al killed $al killed $eax +; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77] +; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3] +    %res = call i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 4, i8 -1) +    %res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 2, i8 %res) +    ret i8 %res1 +} +declare i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float>, i32, i16) + +define i16@test_int_x86_avx512_mask_fpclass_ps_512(<16 x float> %x0) { +; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_512: +; CHECK:       # %bb.0: +; CHECK-NEXT:    vfpclassps $2, %zmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x48,0x66,0xc8,0x02] +; CHECK-NEXT:    vfpclassps $4, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x66,0xc0,0x04] +; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax +; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77] +; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3] +    %res = call i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 4, i16 -1) +    %res1 = call i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 2, i16 %res) +    ret i16 %res1 +} diff --git a/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll b/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll index e2d25adf3c8..41e0952d51a 100644 --- a/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll @@ -600,7 +600,7 @@ define <2 x double>@test_int_x86_avx512_mask_range_sd(<2 x double> %x0, <2 x dou    ret <2 x double> %res4  } -declare i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double>, i32, i8) +declare <8 x i1> @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double>, i32)  define i8 @test_int_x86_avx512_mask_fpclass_pd_512(<8 x double> %x0) {  ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_512: @@ -611,11 +611,13 @@ define i8 @test_int_x86_avx512_mask_fpclass_pd_512(<8 x double> %x0) {  ; CHECK-NEXT:    # kill: def $al killed $al killed $eax  ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]  ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3] -    %res = call i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 4, i8 -1) -    %res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 2, i8 %res) -    ret i8 %res1 +  %res = call <8 x i1> @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 4) +  %res1 = call <8 x i1> @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 2) +  %1 = and <8 x i1> %res1, %res +  %2 = bitcast <8 x i1> %1 to i8 +  ret i8 %2  } -declare i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float>, i32, i16) +declare <16 x i1> @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float>, i32)  define i16@test_int_x86_avx512_mask_fpclass_ps_512(<16 x float> %x0) {  ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_512: @@ -626,9 +628,11 @@ define i16@test_int_x86_avx512_mask_fpclass_ps_512(<16 x float> %x0) {  ; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax  ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]  ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3] -    %res = call i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 4, i16 -1) -    %res1 = call i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 2, i16 %res) -    ret i16 %res1 +  %res = call <16 x i1> @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 4) +  %res1 = call <16 x i1> @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 2) +  %1 = and <16 x i1> %res1, %res +  %2 = bitcast <16 x i1> %1 to i16 +  ret i16 %2  }  declare i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double>, i32, i8) diff --git a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-fast-isel.ll index 4393418382b..48cfa9a4eaf 100644 --- a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-fast-isel.ll @@ -203,3 +203,174 @@ entry:    %1 = select <4 x i1> %extract.i, <4 x double> %conv.i.i, <4 x double> zeroinitializer    ret <4 x double> %1  } + +define zeroext i8 @test_mm_mask_fpclass_pd_mask(i8 zeroext %__U, <2 x double> %__A) { +; X86-LABEL: test_mm_mask_fpclass_pd_mask: +; X86:       # %bb.0: # %entry +; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 +; X86-NEXT:    vfpclasspd $2, %xmm0, %k0 {%k1} +; X86-NEXT:    kmovw %k0, %eax +; X86-NEXT:    # kill: def $al killed $al killed $eax +; X86-NEXT:    retl +; +; X64-LABEL: test_mm_mask_fpclass_pd_mask: +; X64:       # %bb.0: # %entry +; X64-NEXT:    kmovw %edi, %k1 +; X64-NEXT:    vfpclasspd $2, %xmm0, %k0 {%k1} +; X64-NEXT:    kmovw %k0, %eax +; X64-NEXT:    # kill: def $al killed $al killed $eax +; X64-NEXT:    retq +entry: +  %0 = tail call <2 x i1> @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %__A, i32 2) +  %1 = bitcast i8 %__U to <8 x i1> +  %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1> +  %2 = and <2 x i1> %0, %extract +  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> +  %4 = bitcast <8 x i1> %3 to i8 +  ret i8 %4 +} + +declare <2 x i1> @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double>, i32) + +define zeroext i8 @test_mm_fpclass_pd_mask(<2 x double> %__A) { +; CHECK-LABEL: test_mm_fpclass_pd_mask: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vfpclasspd $2, %xmm0, %k0 +; CHECK-NEXT:    kmovw %k0, %eax +; CHECK-NEXT:    # kill: def $al killed $al killed $eax +; CHECK-NEXT:    ret{{[l|q]}} +entry: +  %0 = tail call <2 x i1> @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %__A, i32 2) +  %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> +  %2 = bitcast <8 x i1> %1 to i8 +  ret i8 %2 +} + +define zeroext i8 @test_mm256_mask_fpclass_pd_mask(i8 zeroext %__U, <4 x double> %__A) { +; X86-LABEL: test_mm256_mask_fpclass_pd_mask: +; X86:       # %bb.0: # %entry +; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 +; X86-NEXT:    vfpclasspd $2, %ymm0, %k0 {%k1} +; X86-NEXT:    kmovw %k0, %eax +; X86-NEXT:    # kill: def $al killed $al killed $eax +; X86-NEXT:    vzeroupper +; X86-NEXT:    retl +; +; X64-LABEL: test_mm256_mask_fpclass_pd_mask: +; X64:       # %bb.0: # %entry +; X64-NEXT:    kmovw %edi, %k1 +; X64-NEXT:    vfpclasspd $2, %ymm0, %k0 {%k1} +; X64-NEXT:    kmovw %k0, %eax +; X64-NEXT:    # kill: def $al killed $al killed $eax +; X64-NEXT:    vzeroupper +; X64-NEXT:    retq +entry: +  %0 = tail call <4 x i1> @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %__A, i32 2) +  %1 = bitcast i8 %__U to <8 x i1> +  %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> +  %2 = and <4 x i1> %0, %extract +  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> +  %4 = bitcast <8 x i1> %3 to i8 +  ret i8 %4 +} + +declare <4 x i1> @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double>, i32) + +define zeroext i8 @test_mm256_fpclass_pd_mask(<4 x double> %__A) { +; CHECK-LABEL: test_mm256_fpclass_pd_mask: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vfpclasspd $2, %ymm0, %k0 +; CHECK-NEXT:    kmovw %k0, %eax +; CHECK-NEXT:    # kill: def $al killed $al killed $eax +; CHECK-NEXT:    vzeroupper +; CHECK-NEXT:    ret{{[l|q]}} +entry: +  %0 = tail call <4 x i1> @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %__A, i32 2) +  %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> +  %2 = bitcast <8 x i1> %1 to i8 +  ret i8 %2 +} + +define zeroext i8 @test_mm_mask_fpclass_ps_mask(i8 zeroext %__U, <4 x float> %__A) { +; X86-LABEL: test_mm_mask_fpclass_ps_mask: +; X86:       # %bb.0: # %entry +; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 +; X86-NEXT:    vfpclassps $2, %xmm0, %k0 {%k1} +; X86-NEXT:    kmovw %k0, %eax +; X86-NEXT:    # kill: def $al killed $al killed $eax +; X86-NEXT:    retl +; +; X64-LABEL: test_mm_mask_fpclass_ps_mask: +; X64:       # %bb.0: # %entry +; X64-NEXT:    kmovw %edi, %k1 +; X64-NEXT:    vfpclassps $2, %xmm0, %k0 {%k1} +; X64-NEXT:    kmovw %k0, %eax +; X64-NEXT:    # kill: def $al killed $al killed $eax +; X64-NEXT:    retq +entry: +  %0 = tail call <4 x i1> @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %__A, i32 2) +  %1 = bitcast i8 %__U to <8 x i1> +  %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> +  %2 = and <4 x i1> %0, %extract +  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> +  %4 = bitcast <8 x i1> %3 to i8 +  ret i8 %4 +} + +declare <4 x i1> @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float>, i32) + +define zeroext i8 @test_mm_fpclass_ps_mask(<4 x float> %__A) { +; CHECK-LABEL: test_mm_fpclass_ps_mask: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vfpclassps $2, %xmm0, %k0 +; CHECK-NEXT:    kmovw %k0, %eax +; CHECK-NEXT:    # kill: def $al killed $al killed $eax +; CHECK-NEXT:    ret{{[l|q]}} +entry: +  %0 = tail call <4 x i1> @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %__A, i32 2) +  %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> +  %2 = bitcast <8 x i1> %1 to i8 +  ret i8 %2 +} + +define zeroext i8 @test_mm256_mask_fpclass_ps_mask(i8 zeroext %__U, <8 x float> %__A) { +; X86-LABEL: test_mm256_mask_fpclass_ps_mask: +; X86:       # %bb.0: # %entry +; X86-NEXT:    vfpclassps $2, %ymm0, %k0 +; X86-NEXT:    kmovw %k0, %eax +; X86-NEXT:    andb {{[0-9]+}}(%esp), %al +; X86-NEXT:    # kill: def $al killed $al killed $eax +; X86-NEXT:    vzeroupper +; X86-NEXT:    retl +; +; X64-LABEL: test_mm256_mask_fpclass_ps_mask: +; X64:       # %bb.0: # %entry +; X64-NEXT:    vfpclassps $2, %ymm0, %k0 +; X64-NEXT:    kmovw %k0, %eax +; X64-NEXT:    andb %dil, %al +; X64-NEXT:    # kill: def $al killed $al killed $eax +; X64-NEXT:    vzeroupper +; X64-NEXT:    retq +entry: +  %0 = tail call <8 x i1> @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %__A, i32 2) +  %1 = bitcast i8 %__U to <8 x i1> +  %2 = and <8 x i1> %0, %1 +  %3 = bitcast <8 x i1> %2 to i8 +  ret i8 %3 +} + +declare <8 x i1> @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float>, i32) + +define zeroext i8 @test_mm256_fpclass_ps_mask(<8 x float> %__A) { +; CHECK-LABEL: test_mm256_fpclass_ps_mask: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    vfpclassps $2, %ymm0, %k0 +; CHECK-NEXT:    kmovw %k0, %eax +; CHECK-NEXT:    # kill: def $al killed $al killed $eax +; CHECK-NEXT:    vzeroupper +; CHECK-NEXT:    ret{{[l|q]}} +entry: +  %0 = tail call <8 x i1> @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %__A, i32 2) +  %1 = bitcast <8 x i1> %0 to i8 +  ret i8 %1 +} diff --git a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll index 457caa65daa..a7a44253c80 100644 --- a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll @@ -2928,3 +2928,65 @@ define <4 x double>@test_int_x86_avx512_mask_cvt_uqq2pd_256(<4 x i64> %x0, <4 x    %res2 = fadd <4 x double> %res, %res1    ret <4 x double> %res2  } + +declare i8 @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float>, i32, i8) + +define i8 @test_int_x86_avx512_mask_fpclass_ps_128(<4 x float> %x0) { +; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_128: +; CHECK:       # %bb.0: +; CHECK-NEXT:    vfpclassps $4, %xmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x08,0x66,0xc8,0x04] +; CHECK-NEXT:    vfpclassps $2, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x66,0xc0,0x02] +; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT:    # kill: def $al killed $al killed $eax +; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3] +  %res = call i8 @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %x0, i32 2, i8 -1) +  %res1 = call i8 @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %x0, i32 4, i8 %res) +  ret i8 %res1 +} + +declare i8 @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float>, i32, i8) + +define i8 @test_int_x86_avx512_mask_fpclass_ps_256(<8 x float> %x0) { +; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_256: +; CHECK:       # %bb.0: +; CHECK-NEXT:    vfpclassps $4, %ymm0, %k1 # encoding: [0x62,0xf3,0x7d,0x28,0x66,0xc8,0x04] +; CHECK-NEXT:    vfpclassps $2, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x66,0xc0,0x02] +; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT:    # kill: def $al killed $al killed $eax +; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77] +; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3] +  %res = call i8 @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %x0, i32 2, i8 -1) +  %res1 = call i8 @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %x0, i32 4, i8 %res) +  ret i8 %res1 +} + +declare i8 @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double>, i32, i8) + +define i8 @test_int_x86_avx512_mask_fpclass_pd_128(<2 x double> %x0) { +; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_128: +; CHECK:       # %bb.0: +; CHECK-NEXT:    vfpclasspd $2, %xmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x08,0x66,0xc8,0x02] +; CHECK-NEXT:    vfpclasspd $4, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x66,0xc0,0x04] +; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT:    # kill: def $al killed $al killed $eax +; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3] +  %res =  call i8 @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %x0, i32 4, i8 -1) +  %res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %x0, i32 2, i8 %res) +  ret i8 %res1 +} + +declare i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double>, i32, i8) + +define i8 @test_int_x86_avx512_mask_fpclass_pd_256(<4 x double> %x0) { +; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_256: +; CHECK:       # %bb.0: +; CHECK-NEXT:    vfpclasspd $4, %ymm0, %k1 # encoding: [0x62,0xf3,0xfd,0x28,0x66,0xc8,0x04] +; CHECK-NEXT:    vfpclasspd $2, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x66,0xc0,0x02] +; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] +; CHECK-NEXT:    # kill: def $al killed $al killed $eax +; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77] +; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3] +  %res = call i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %x0, i32 2, i8 -1) +  %res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %x0, i32 4, i8 %res) +  ret i8 %res1 +} diff --git a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll index ec6ca328693..e0bfc188a27 100644 --- a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll @@ -734,7 +734,7 @@ define <8 x float>@test_int_x86_avx512_mask_range_ps_256(<8 x float> %x0, <8 x f    ret <8 x float> %res2  } -declare i8 @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float>, i32, i8) +declare <4 x i1> @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float>, i32)  define i8 @test_int_x86_avx512_mask_fpclass_ps_128(<4 x float> %x0) {  ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_128: @@ -744,12 +744,15 @@ define i8 @test_int_x86_avx512_mask_fpclass_ps_128(<4 x float> %x0) {  ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]  ; CHECK-NEXT:    # kill: def $al killed $al killed $eax  ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3] -  %res = call i8 @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %x0, i32 2, i8 -1) -  %res1 = call i8 @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %x0, i32 4, i8 %res) -  ret i8 %res1 +  %res = call <4 x i1> @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %x0, i32 2) +  %res1 = call <4 x i1> @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %x0, i32 4) +  %1 = and <4 x i1> %res1, %res +  %2 = shufflevector <4 x i1> %1, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> +  %3 = bitcast <8 x i1> %2 to i8 +  ret i8 %3  } -declare i8 @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float>, i32, i8) +declare <8 x i1> @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float>, i32)  define i8 @test_int_x86_avx512_mask_fpclass_ps_256(<8 x float> %x0) {  ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_256: @@ -760,12 +763,14 @@ define i8 @test_int_x86_avx512_mask_fpclass_ps_256(<8 x float> %x0) {  ; CHECK-NEXT:    # kill: def $al killed $al killed $eax  ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]  ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3] -  %res = call i8 @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %x0, i32 2, i8 -1) -  %res1 = call i8 @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %x0, i32 4, i8 %res) -  ret i8 %res1 +  %res = call <8 x i1> @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %x0, i32 2) +  %res1 = call <8 x i1> @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %x0, i32 4) +  %1 = and <8 x i1> %res1, %res +  %2 = bitcast <8 x i1> %1 to i8 +  ret i8 %2  } -declare i8 @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double>, i32, i8) +declare <2 x i1> @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double>, i32)  define i8 @test_int_x86_avx512_mask_fpclass_pd_128(<2 x double> %x0) {  ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_128: @@ -775,12 +780,15 @@ define i8 @test_int_x86_avx512_mask_fpclass_pd_128(<2 x double> %x0) {  ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]  ; CHECK-NEXT:    # kill: def $al killed $al killed $eax  ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3] -  %res =  call i8 @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %x0, i32 4, i8 -1) -  %res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %x0, i32 2, i8 %res) -  ret i8 %res1 +  %res = call <2 x i1> @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %x0, i32 4) +  %res1 = call <2 x i1> @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %x0, i32 2) +  %1 = and <2 x i1> %res1, %res +  %2 = shufflevector <2 x i1> %1, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> +  %3 = bitcast <8 x i1> %2 to i8 +  ret i8 %3  } -declare i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double>, i32, i8) +declare <4 x i1> @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double>, i32)  define i8 @test_int_x86_avx512_mask_fpclass_pd_256(<4 x double> %x0) {  ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_256: @@ -791,7 +799,10 @@ define i8 @test_int_x86_avx512_mask_fpclass_pd_256(<4 x double> %x0) {  ; CHECK-NEXT:    # kill: def $al killed $al killed $eax  ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]  ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3] -  %res = call i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %x0, i32 2, i8 -1) -  %res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %x0, i32 4, i8 %res) -  ret i8 %res1 +  %res = call <4 x i1> @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %x0, i32 2) +  %res1 = call <4 x i1> @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %x0, i32 4) +  %1 = and <4 x i1> %res1, %res +  %2 = shufflevector <4 x i1> %1, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> +  %3 = bitcast <8 x i1> %2 to i8 +  ret i8 %3  }  | 

