diff options
| author | Craig Topper <craig.topper@intel.com> | 2018-06-26 01:31:53 +0000 | 
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2018-06-26 01:31:53 +0000 | 
| commit | 6f4fdfa9afd609352d07c47be1b040564fad4060 (patch) | |
| tree | 8ac37a90756835628fcc47d5c0a2f157993a78a9 | |
| parent | ce72161ddf2af2c9ef3eca397404aa3324ec0de9 (diff) | |
| download | bcm5719-llvm-6f4fdfa9afd609352d07c47be1b040564fad4060.tar.gz bcm5719-llvm-6f4fdfa9afd609352d07c47be1b040564fad4060.zip  | |
Revert r335562 and 335563 "[X86] Redefine avx512 packed fpclass intrinsics to return a vXi1 mask and implement the mask input argument using an 'and' IR instruction."
These were supposed to have been squashed to a single commit.
llvm-svn: 335566
| -rw-r--r-- | llvm/include/llvm/IR/IntrinsicsX86.td | 18 | ||||
| -rw-r--r-- | llvm/lib/IR/AutoUpgrade.cpp | 43 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 25 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512dq-intrinsics-fast-isel.ll | 161 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll | 31 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512dq-intrinsics.ll | 20 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512dqvl-intrinsics-fast-isel.ll | 171 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll | 62 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll | 43 | 
9 files changed, 55 insertions, 519 deletions
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index 5b8b18fc5fd..45ffdf4c21d 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -1251,22 +1251,28 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".                    llvm_v4i64_ty], [IntrNoMem]>;    def int_x86_avx512_mask_fpclass_pd_128 : -          Intrinsic<[llvm_v2i1_ty], [llvm_v2f64_ty, llvm_i32_ty], +         GCCBuiltin<"__builtin_ia32_fpclasspd128_mask">, +          Intrinsic<[llvm_i8_ty], [llvm_v2f64_ty, llvm_i32_ty, llvm_i8_ty],            [IntrNoMem]>;    def int_x86_avx512_mask_fpclass_pd_256 : -          Intrinsic<[llvm_v4i1_ty], [llvm_v4f64_ty, llvm_i32_ty], +         GCCBuiltin<"__builtin_ia32_fpclasspd256_mask">, +          Intrinsic<[llvm_i8_ty], [llvm_v4f64_ty, llvm_i32_ty, llvm_i8_ty],            [IntrNoMem]>;    def int_x86_avx512_mask_fpclass_pd_512 : -          Intrinsic<[llvm_v8i1_ty], [llvm_v8f64_ty, llvm_i32_ty], +         GCCBuiltin<"__builtin_ia32_fpclasspd512_mask">, +          Intrinsic<[llvm_i8_ty], [llvm_v8f64_ty, llvm_i32_ty, llvm_i8_ty],            [IntrNoMem]>;    def int_x86_avx512_mask_fpclass_ps_128 : -          Intrinsic<[llvm_v4i1_ty], [llvm_v4f32_ty, llvm_i32_ty], +         GCCBuiltin<"__builtin_ia32_fpclassps128_mask">, +          Intrinsic<[llvm_i8_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i8_ty],            [IntrNoMem]>;    def int_x86_avx512_mask_fpclass_ps_256 : -          Intrinsic<[llvm_v8i1_ty], [llvm_v8f32_ty, llvm_i32_ty], +         GCCBuiltin<"__builtin_ia32_fpclassps256_mask">, +          Intrinsic<[llvm_i8_ty], [llvm_v8f32_ty, llvm_i32_ty, llvm_i8_ty],            [IntrNoMem]>;    def int_x86_avx512_mask_fpclass_ps_512 : -          Intrinsic<[llvm_v16i1_ty], [llvm_v16f32_ty, llvm_i32_ty], +         GCCBuiltin<"__builtin_ia32_fpclassps512_mask">, +          Intrinsic<[llvm_i16_ty], [llvm_v16f32_ty, llvm_i32_ty, llvm_i16_ty],            [IntrNoMem]>;    def int_x86_avx512_mask_fpclass_sd :           GCCBuiltin<"__builtin_ia32_fpclasssd_mask">, diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 65d9bd54e4f..4a79275feea 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -406,24 +406,6 @@ static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,    if (Name == "avx512.mask.cmp.ps.512") // Added in 7.0      return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_512,                                       NewFn); -  if (Name == "avx512.mask.fpclass.pd.128") // Added in 7.0 -    return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_fpclass_pd_128, -                                     NewFn); -  if (Name == "avx512.mask.fpclass.pd.256") // Added in 7.0 -    return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_fpclass_pd_256, -                                     NewFn); -  if (Name == "avx512.mask.fpclass.pd.512") // Added in 7.0 -    return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_fpclass_pd_512, -                                     NewFn); -  if (Name == "avx512.mask.fpclass.ps.128") // Added in 7.0 -    return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_fpclass_ps_128, -                                     NewFn); -  if (Name == "avx512.mask.fpclass.ps.256") // Added in 7.0 -    return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_fpclass_ps_256, -                                     NewFn); -  if (Name == "avx512.mask.fpclass.ps.512") // Added in 7.0 -    return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_fpclass_ps_512, -                                     NewFn);    // frcz.ss/sd may need to have an argument dropped. Added in 3.2    if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) { @@ -3143,31 +3125,6 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {      return;    } -  case Intrinsic::x86_avx512_mask_fpclass_pd_128: -  case Intrinsic::x86_avx512_mask_fpclass_pd_256: -  case Intrinsic::x86_avx512_mask_fpclass_pd_512: -  case Intrinsic::x86_avx512_mask_fpclass_ps_128: -  case Intrinsic::x86_avx512_mask_fpclass_ps_256: -  case Intrinsic::x86_avx512_mask_fpclass_ps_512: { -    SmallVector<Value *, 4> Args; -    Args.push_back(CI->getArgOperand(0)); -    Args.push_back(CI->getArgOperand(1)); - -    NewCall = Builder.CreateCall(NewFn, Args); -    unsigned NumElts = Args[0]->getType()->getVectorNumElements(); -    Value *Res = ApplyX86MaskOn1BitsVec(Builder, NewCall, CI->getArgOperand(2), -                                        NumElts); - -    std::string Name = CI->getName(); -    if (!Name.empty()) { -      CI->setName(Name + ".old"); -      NewCall->setName(Name); -    } -    CI->replaceAllUsesWith(Res); -    CI->eraseFromParent(); -    return; -  } -    case Intrinsic::thread_pointer: {      NewCall = Builder.CreateCall(NewFn, {});      break; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 660e555bc71..e0637094a06 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -20748,11 +20748,23 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,                                    Mask, PassThru, Subtarget, DAG);      }      case FPCLASS: { -      // FPclass intrinsics -      SDValue Src1 = Op.getOperand(1); -      MVT MaskVT = Op.getSimpleValueType(); -      SDValue Imm = Op.getOperand(2); -      return DAG.getNode(IntrData->Opc0, dl, MaskVT, Src1, Imm); +      // FPclass intrinsics with mask +       SDValue Src1 = Op.getOperand(1); +       MVT VT = Src1.getSimpleValueType(); +       MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); +       SDValue Imm = Op.getOperand(2); +       SDValue Mask = Op.getOperand(3); +       MVT BitcastVT = MVT::getVectorVT(MVT::i1, +                                     Mask.getSimpleValueType().getSizeInBits()); +       SDValue FPclass = DAG.getNode(IntrData->Opc0, dl, MaskVT, Src1, Imm); +       SDValue FPclassMask = getVectorMaskingNode(FPclass, Mask, SDValue(), +                                                  Subtarget, DAG); +       // Need to fill with zeros to ensure the bitcast will produce zeroes +       // for the upper bits in the v2i1/v4i1 case. +       SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, BitcastVT, +                                 DAG.getConstant(0, dl, BitcastVT), +                                 FPclassMask, DAG.getIntPtrConstant(0, dl)); +       return DAG.getBitcast(Op.getValueType(), Res);      }      case FPCLASSS: {        SDValue Src1 = Op.getOperand(1); @@ -20796,7 +20808,8 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,      }      case CMP_MASK_CC: { -      MVT MaskVT = Op.getSimpleValueType(); +      MVT VT = Op.getOperand(1).getSimpleValueType(); +      MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());        SDValue Cmp;        SDValue CC = Op.getOperand(3);        CC = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, CC); diff --git a/llvm/test/CodeGen/X86/avx512dq-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512dq-intrinsics-fast-isel.ll deleted file mode 100644 index cd86d5e10f6..00000000000 --- a/llvm/test/CodeGen/X86/avx512dq-intrinsics-fast-isel.ll +++ /dev/null @@ -1,161 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=CHECK,X86 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=CHECK,X64 - -; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512dq-builtins.c - -define zeroext i8 @test_mm512_mask_fpclass_pd_mask(i8 zeroext %__U, <8 x double> %__A) { -; X86-LABEL: test_mm512_mask_fpclass_pd_mask: -; X86:       # %bb.0: # %entry -; X86-NEXT:    vfpclasspd $4, %zmm0, %k0 -; X86-NEXT:    kmovw %k0, %eax -; X86-NEXT:    andb {{[0-9]+}}(%esp), %al -; X86-NEXT:    # kill: def $al killed $al killed $eax -; X86-NEXT:    vzeroupper -; X86-NEXT:    retl -; -; X64-LABEL: test_mm512_mask_fpclass_pd_mask: -; X64:       # %bb.0: # %entry -; X64-NEXT:    vfpclasspd $4, %zmm0, %k0 -; X64-NEXT:    kmovw %k0, %eax -; X64-NEXT:    andb %dil, %al -; X64-NEXT:    # kill: def $al killed $al killed $eax -; X64-NEXT:    vzeroupper -; X64-NEXT:    retq -entry: -  %0 = tail call <8 x i1> @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %__A, i32 4) -  %1 = bitcast i8 %__U to <8 x i1> -  %2 = and <8 x i1> %0, %1 -  %3 = bitcast <8 x i1> %2 to i8 -  ret i8 %3 -} - -declare <8 x i1> @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double>, i32) - -define zeroext i8 @test_mm512_fpclass_pd_mask(<8 x double> %__A) { -; CHECK-LABEL: test_mm512_fpclass_pd_mask: -; CHECK:       # %bb.0: # %entry -; CHECK-NEXT:    vfpclasspd $4, %zmm0, %k0 -; CHECK-NEXT:    kmovw %k0, %eax -; CHECK-NEXT:    # kill: def $al killed $al killed $eax -; CHECK-NEXT:    vzeroupper -; CHECK-NEXT:    ret{{[l|q]}} -entry: -  %0 = tail call <8 x i1> @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %__A, i32 4) -  %1 = bitcast <8 x i1> %0 to i8 -  ret i8 %1 -} - -define zeroext i16 @test_mm512_mask_fpclass_ps_mask(i16 zeroext %__U, <16 x float> %__A) { -; X86-LABEL: test_mm512_mask_fpclass_ps_mask: -; X86:       # %bb.0: # %entry -; X86-NEXT:    vfpclassps $4, %zmm0, %k0 -; X86-NEXT:    kmovw %k0, %eax -; X86-NEXT:    andw {{[0-9]+}}(%esp), %ax -; X86-NEXT:    # kill: def $ax killed $ax killed $eax -; X86-NEXT:    vzeroupper -; X86-NEXT:    retl -; -; X64-LABEL: test_mm512_mask_fpclass_ps_mask: -; X64:       # %bb.0: # %entry -; X64-NEXT:    vfpclassps $4, %zmm0, %k0 -; X64-NEXT:    kmovw %k0, %eax -; X64-NEXT:    andl %edi, %eax -; X64-NEXT:    # kill: def $ax killed $ax killed $eax -; X64-NEXT:    vzeroupper -; X64-NEXT:    retq -entry: -  %0 = tail call <16 x i1> @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %__A, i32 4) -  %1 = bitcast i16 %__U to <16 x i1> -  %2 = and <16 x i1> %0, %1 -  %3 = bitcast <16 x i1> %2 to i16 -  ret i16 %3 -} - -declare <16 x i1> @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float>, i32) - -define zeroext i16 @test_mm512_fpclass_ps_mask(<16 x float> %__A) { -; CHECK-LABEL: test_mm512_fpclass_ps_mask: -; CHECK:       # %bb.0: # %entry -; CHECK-NEXT:    vfpclassps $4, %zmm0, %k0 -; CHECK-NEXT:    kmovw %k0, %eax -; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax -; CHECK-NEXT:    vzeroupper -; CHECK-NEXT:    ret{{[l|q]}} -entry: -  %0 = tail call <16 x i1> @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %__A, i32 4) -  %1 = bitcast <16 x i1> %0 to i16 -  ret i16 %1 -} - -define zeroext i8 @test_mm_fpclass_sd_mask(<4 x float> %__A) { -; CHECK-LABEL: test_mm_fpclass_sd_mask: -; CHECK:       # %bb.0: # %entry -; CHECK-NEXT:    vfpclasssd $2, %xmm0, %k0 -; CHECK-NEXT:    kmovw %k0, %eax -; CHECK-NEXT:    # kill: def $al killed $al killed $eax -; CHECK-NEXT:    ret{{[l|q]}} -entry: -  %0 = bitcast <4 x float> %__A to <2 x double> -  %1 = tail call i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double> %0, i32 2, i8 -1) -  ret i8 %1 -} - -declare i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double>, i32, i8) - -define zeroext i8 @test_mm_mask_fpclass_sd_mask(i8 zeroext %__U, <4 x float> %__A) { -; X86-LABEL: test_mm_mask_fpclass_sd_mask: -; X86:       # %bb.0: # %entry -; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 -; X86-NEXT:    vfpclasssd $2, %xmm0, %k0 {%k1} -; X86-NEXT:    kmovw %k0, %eax -; X86-NEXT:    # kill: def $al killed $al killed $eax -; X86-NEXT:    retl -; -; X64-LABEL: test_mm_mask_fpclass_sd_mask: -; X64:       # %bb.0: # %entry -; X64-NEXT:    kmovw %edi, %k1 -; X64-NEXT:    vfpclasssd $2, %xmm0, %k0 {%k1} -; X64-NEXT:    kmovw %k0, %eax -; X64-NEXT:    # kill: def $al killed $al killed $eax -; X64-NEXT:    retq -entry: -  %0 = bitcast <4 x float> %__A to <2 x double> -  %1 = tail call i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double> %0, i32 2, i8 %__U) -  ret i8 %1 -} - -define zeroext i8 @test_mm_fpclass_ss_mask(<4 x float> %__A) { -; CHECK-LABEL: test_mm_fpclass_ss_mask: -; CHECK:       # %bb.0: # %entry -; CHECK-NEXT:    vfpclassss $2, %xmm0, %k0 -; CHECK-NEXT:    kmovw %k0, %eax -; CHECK-NEXT:    # kill: def $al killed $al killed $eax -; CHECK-NEXT:    ret{{[l|q]}} -entry: -  %0 = tail call i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float> %__A, i32 2, i8 -1) -  ret i8 %0 -} - -declare i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float>, i32, i8) - -define zeroext i8 @test_mm_mask_fpclass_ss_mask(i8 zeroext %__U, <4 x float> %__A) { -; X86-LABEL: test_mm_mask_fpclass_ss_mask: -; X86:       # %bb.0: # %entry -; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 -; X86-NEXT:    vfpclassss $2, %xmm0, %k0 {%k1} -; X86-NEXT:    kmovw %k0, %eax -; X86-NEXT:    # kill: def $al killed $al killed $eax -; X86-NEXT:    retl -; -; X64-LABEL: test_mm_mask_fpclass_ss_mask: -; X64:       # %bb.0: # %entry -; X64-NEXT:    kmovw %edi, %k1 -; X64-NEXT:    vfpclassss $2, %xmm0, %k0 {%k1} -; X64-NEXT:    kmovw %k0, %eax -; X64-NEXT:    # kill: def $al killed $al killed $eax -; X64-NEXT:    retq -entry: -  %0 = tail call i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float> %__A, i32 2, i8 %__U) -  ret i8 %0 -} diff --git a/llvm/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll index 4463a00b2b8..3105e6d4842 100644 --- a/llvm/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll @@ -536,34 +536,3 @@ define i8@test_int_x86_avx512_cvtq2mask_512(<8 x i64> %x0) {    %res = call i8 @llvm.x86.avx512.cvtq2mask.512(<8 x i64> %x0)    ret i8 %res  } - -declare i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double>, i32, i8) - -define i8 @test_int_x86_avx512_mask_fpclass_pd_512(<8 x double> %x0) { -; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_512: -; CHECK:       # %bb.0: -; CHECK-NEXT:    vfpclasspd $2, %zmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x48,0x66,0xc8,0x02] -; CHECK-NEXT:    vfpclasspd $4, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x66,0xc0,0x04] -; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT:    # kill: def $al killed $al killed $eax -; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77] -; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3] -    %res = call i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 4, i8 -1) -    %res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 2, i8 %res) -    ret i8 %res1 -} -declare i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float>, i32, i16) - -define i16@test_int_x86_avx512_mask_fpclass_ps_512(<16 x float> %x0) { -; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_512: -; CHECK:       # %bb.0: -; CHECK-NEXT:    vfpclassps $2, %zmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x48,0x66,0xc8,0x02] -; CHECK-NEXT:    vfpclassps $4, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x66,0xc0,0x04] -; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax -; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77] -; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3] -    %res = call i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 4, i16 -1) -    %res1 = call i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 2, i16 %res) -    ret i16 %res1 -} diff --git a/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll b/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll index 41e0952d51a..e2d25adf3c8 100644 --- a/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll @@ -600,7 +600,7 @@ define <2 x double>@test_int_x86_avx512_mask_range_sd(<2 x double> %x0, <2 x dou    ret <2 x double> %res4  } -declare <8 x i1> @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double>, i32) +declare i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double>, i32, i8)  define i8 @test_int_x86_avx512_mask_fpclass_pd_512(<8 x double> %x0) {  ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_512: @@ -611,13 +611,11 @@ define i8 @test_int_x86_avx512_mask_fpclass_pd_512(<8 x double> %x0) {  ; CHECK-NEXT:    # kill: def $al killed $al killed $eax  ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]  ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3] -  %res = call <8 x i1> @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 4) -  %res1 = call <8 x i1> @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 2) -  %1 = and <8 x i1> %res1, %res -  %2 = bitcast <8 x i1> %1 to i8 -  ret i8 %2 +    %res = call i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 4, i8 -1) +    %res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 2, i8 %res) +    ret i8 %res1  } -declare <16 x i1> @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float>, i32) +declare i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float>, i32, i16)  define i16@test_int_x86_avx512_mask_fpclass_ps_512(<16 x float> %x0) {  ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_512: @@ -628,11 +626,9 @@ define i16@test_int_x86_avx512_mask_fpclass_ps_512(<16 x float> %x0) {  ; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax  ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]  ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3] -  %res = call <16 x i1> @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 4) -  %res1 = call <16 x i1> @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 2) -  %1 = and <16 x i1> %res1, %res -  %2 = bitcast <16 x i1> %1 to i16 -  ret i16 %2 +    %res = call i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 4, i16 -1) +    %res1 = call i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 2, i16 %res) +    ret i16 %res1  }  declare i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double>, i32, i8) diff --git a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-fast-isel.ll index 48cfa9a4eaf..4393418382b 100644 --- a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-fast-isel.ll @@ -203,174 +203,3 @@ entry:    %1 = select <4 x i1> %extract.i, <4 x double> %conv.i.i, <4 x double> zeroinitializer    ret <4 x double> %1  } - -define zeroext i8 @test_mm_mask_fpclass_pd_mask(i8 zeroext %__U, <2 x double> %__A) { -; X86-LABEL: test_mm_mask_fpclass_pd_mask: -; X86:       # %bb.0: # %entry -; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 -; X86-NEXT:    vfpclasspd $2, %xmm0, %k0 {%k1} -; X86-NEXT:    kmovw %k0, %eax -; X86-NEXT:    # kill: def $al killed $al killed $eax -; X86-NEXT:    retl -; -; X64-LABEL: test_mm_mask_fpclass_pd_mask: -; X64:       # %bb.0: # %entry -; X64-NEXT:    kmovw %edi, %k1 -; X64-NEXT:    vfpclasspd $2, %xmm0, %k0 {%k1} -; X64-NEXT:    kmovw %k0, %eax -; X64-NEXT:    # kill: def $al killed $al killed $eax -; X64-NEXT:    retq -entry: -  %0 = tail call <2 x i1> @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %__A, i32 2) -  %1 = bitcast i8 %__U to <8 x i1> -  %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1> -  %2 = and <2 x i1> %0, %extract -  %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> -  %4 = bitcast <8 x i1> %3 to i8 -  ret i8 %4 -} - -declare <2 x i1> @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double>, i32) - -define zeroext i8 @test_mm_fpclass_pd_mask(<2 x double> %__A) { -; CHECK-LABEL: test_mm_fpclass_pd_mask: -; CHECK:       # %bb.0: # %entry -; CHECK-NEXT:    vfpclasspd $2, %xmm0, %k0 -; CHECK-NEXT:    kmovw %k0, %eax -; CHECK-NEXT:    # kill: def $al killed $al killed $eax -; CHECK-NEXT:    ret{{[l|q]}} -entry: -  %0 = tail call <2 x i1> @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %__A, i32 2) -  %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> -  %2 = bitcast <8 x i1> %1 to i8 -  ret i8 %2 -} - -define zeroext i8 @test_mm256_mask_fpclass_pd_mask(i8 zeroext %__U, <4 x double> %__A) { -; X86-LABEL: test_mm256_mask_fpclass_pd_mask: -; X86:       # %bb.0: # %entry -; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 -; X86-NEXT:    vfpclasspd $2, %ymm0, %k0 {%k1} -; X86-NEXT:    kmovw %k0, %eax -; X86-NEXT:    # kill: def $al killed $al killed $eax -; X86-NEXT:    vzeroupper -; X86-NEXT:    retl -; -; X64-LABEL: test_mm256_mask_fpclass_pd_mask: -; X64:       # %bb.0: # %entry -; X64-NEXT:    kmovw %edi, %k1 -; X64-NEXT:    vfpclasspd $2, %ymm0, %k0 {%k1} -; X64-NEXT:    kmovw %k0, %eax -; X64-NEXT:    # kill: def $al killed $al killed $eax -; X64-NEXT:    vzeroupper -; X64-NEXT:    retq -entry: -  %0 = tail call <4 x i1> @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %__A, i32 2) -  %1 = bitcast i8 %__U to <8 x i1> -  %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> -  %2 = and <4 x i1> %0, %extract -  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> -  %4 = bitcast <8 x i1> %3 to i8 -  ret i8 %4 -} - -declare <4 x i1> @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double>, i32) - -define zeroext i8 @test_mm256_fpclass_pd_mask(<4 x double> %__A) { -; CHECK-LABEL: test_mm256_fpclass_pd_mask: -; CHECK:       # %bb.0: # %entry -; CHECK-NEXT:    vfpclasspd $2, %ymm0, %k0 -; CHECK-NEXT:    kmovw %k0, %eax -; CHECK-NEXT:    # kill: def $al killed $al killed $eax -; CHECK-NEXT:    vzeroupper -; CHECK-NEXT:    ret{{[l|q]}} -entry: -  %0 = tail call <4 x i1> @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %__A, i32 2) -  %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> -  %2 = bitcast <8 x i1> %1 to i8 -  ret i8 %2 -} - -define zeroext i8 @test_mm_mask_fpclass_ps_mask(i8 zeroext %__U, <4 x float> %__A) { -; X86-LABEL: test_mm_mask_fpclass_ps_mask: -; X86:       # %bb.0: # %entry -; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 -; X86-NEXT:    vfpclassps $2, %xmm0, %k0 {%k1} -; X86-NEXT:    kmovw %k0, %eax -; X86-NEXT:    # kill: def $al killed $al killed $eax -; X86-NEXT:    retl -; -; X64-LABEL: test_mm_mask_fpclass_ps_mask: -; X64:       # %bb.0: # %entry -; X64-NEXT:    kmovw %edi, %k1 -; X64-NEXT:    vfpclassps $2, %xmm0, %k0 {%k1} -; X64-NEXT:    kmovw %k0, %eax -; X64-NEXT:    # kill: def $al killed $al killed $eax -; X64-NEXT:    retq -entry: -  %0 = tail call <4 x i1> @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %__A, i32 2) -  %1 = bitcast i8 %__U to <8 x i1> -  %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> -  %2 = and <4 x i1> %0, %extract -  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> -  %4 = bitcast <8 x i1> %3 to i8 -  ret i8 %4 -} - -declare <4 x i1> @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float>, i32) - -define zeroext i8 @test_mm_fpclass_ps_mask(<4 x float> %__A) { -; CHECK-LABEL: test_mm_fpclass_ps_mask: -; CHECK:       # %bb.0: # %entry -; CHECK-NEXT:    vfpclassps $2, %xmm0, %k0 -; CHECK-NEXT:    kmovw %k0, %eax -; CHECK-NEXT:    # kill: def $al killed $al killed $eax -; CHECK-NEXT:    ret{{[l|q]}} -entry: -  %0 = tail call <4 x i1> @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %__A, i32 2) -  %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> -  %2 = bitcast <8 x i1> %1 to i8 -  ret i8 %2 -} - -define zeroext i8 @test_mm256_mask_fpclass_ps_mask(i8 zeroext %__U, <8 x float> %__A) { -; X86-LABEL: test_mm256_mask_fpclass_ps_mask: -; X86:       # %bb.0: # %entry -; X86-NEXT:    vfpclassps $2, %ymm0, %k0 -; X86-NEXT:    kmovw %k0, %eax -; X86-NEXT:    andb {{[0-9]+}}(%esp), %al -; X86-NEXT:    # kill: def $al killed $al killed $eax -; X86-NEXT:    vzeroupper -; X86-NEXT:    retl -; -; X64-LABEL: test_mm256_mask_fpclass_ps_mask: -; X64:       # %bb.0: # %entry -; X64-NEXT:    vfpclassps $2, %ymm0, %k0 -; X64-NEXT:    kmovw %k0, %eax -; X64-NEXT:    andb %dil, %al -; X64-NEXT:    # kill: def $al killed $al killed $eax -; X64-NEXT:    vzeroupper -; X64-NEXT:    retq -entry: -  %0 = tail call <8 x i1> @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %__A, i32 2) -  %1 = bitcast i8 %__U to <8 x i1> -  %2 = and <8 x i1> %0, %1 -  %3 = bitcast <8 x i1> %2 to i8 -  ret i8 %3 -} - -declare <8 x i1> @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float>, i32) - -define zeroext i8 @test_mm256_fpclass_ps_mask(<8 x float> %__A) { -; CHECK-LABEL: test_mm256_fpclass_ps_mask: -; CHECK:       # %bb.0: # %entry -; CHECK-NEXT:    vfpclassps $2, %ymm0, %k0 -; CHECK-NEXT:    kmovw %k0, %eax -; CHECK-NEXT:    # kill: def $al killed $al killed $eax -; CHECK-NEXT:    vzeroupper -; CHECK-NEXT:    ret{{[l|q]}} -entry: -  %0 = tail call <8 x i1> @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %__A, i32 2) -  %1 = bitcast <8 x i1> %0 to i8 -  ret i8 %1 -} diff --git a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll index a7a44253c80..457caa65daa 100644 --- a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll @@ -2928,65 +2928,3 @@ define <4 x double>@test_int_x86_avx512_mask_cvt_uqq2pd_256(<4 x i64> %x0, <4 x    %res2 = fadd <4 x double> %res, %res1    ret <4 x double> %res2  } - -declare i8 @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float>, i32, i8) - -define i8 @test_int_x86_avx512_mask_fpclass_ps_128(<4 x float> %x0) { -; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_128: -; CHECK:       # %bb.0: -; CHECK-NEXT:    vfpclassps $4, %xmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x08,0x66,0xc8,0x04] -; CHECK-NEXT:    vfpclassps $2, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x66,0xc0,0x02] -; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT:    # kill: def $al killed $al killed $eax -; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3] -  %res = call i8 @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %x0, i32 2, i8 -1) -  %res1 = call i8 @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %x0, i32 4, i8 %res) -  ret i8 %res1 -} - -declare i8 @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float>, i32, i8) - -define i8 @test_int_x86_avx512_mask_fpclass_ps_256(<8 x float> %x0) { -; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_256: -; CHECK:       # %bb.0: -; CHECK-NEXT:    vfpclassps $4, %ymm0, %k1 # encoding: [0x62,0xf3,0x7d,0x28,0x66,0xc8,0x04] -; CHECK-NEXT:    vfpclassps $2, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x66,0xc0,0x02] -; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT:    # kill: def $al killed $al killed $eax -; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77] -; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3] -  %res = call i8 @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %x0, i32 2, i8 -1) -  %res1 = call i8 @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %x0, i32 4, i8 %res) -  ret i8 %res1 -} - -declare i8 @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double>, i32, i8) - -define i8 @test_int_x86_avx512_mask_fpclass_pd_128(<2 x double> %x0) { -; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_128: -; CHECK:       # %bb.0: -; CHECK-NEXT:    vfpclasspd $2, %xmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x08,0x66,0xc8,0x02] -; CHECK-NEXT:    vfpclasspd $4, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x66,0xc0,0x04] -; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT:    # kill: def $al killed $al killed $eax -; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3] -  %res =  call i8 @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %x0, i32 4, i8 -1) -  %res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %x0, i32 2, i8 %res) -  ret i8 %res1 -} - -declare i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double>, i32, i8) - -define i8 @test_int_x86_avx512_mask_fpclass_pd_256(<4 x double> %x0) { -; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_256: -; CHECK:       # %bb.0: -; CHECK-NEXT:    vfpclasspd $4, %ymm0, %k1 # encoding: [0x62,0xf3,0xfd,0x28,0x66,0xc8,0x04] -; CHECK-NEXT:    vfpclasspd $2, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x66,0xc0,0x02] -; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT:    # kill: def $al killed $al killed $eax -; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77] -; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3] -  %res = call i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %x0, i32 2, i8 -1) -  %res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %x0, i32 4, i8 %res) -  ret i8 %res1 -} diff --git a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll index e0bfc188a27..ec6ca328693 100644 --- a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll @@ -734,7 +734,7 @@ define <8 x float>@test_int_x86_avx512_mask_range_ps_256(<8 x float> %x0, <8 x f    ret <8 x float> %res2  } -declare <4 x i1> @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float>, i32) +declare i8 @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float>, i32, i8)  define i8 @test_int_x86_avx512_mask_fpclass_ps_128(<4 x float> %x0) {  ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_128: @@ -744,15 +744,12 @@ define i8 @test_int_x86_avx512_mask_fpclass_ps_128(<4 x float> %x0) {  ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]  ; CHECK-NEXT:    # kill: def $al killed $al killed $eax  ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3] -  %res = call <4 x i1> @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %x0, i32 2) -  %res1 = call <4 x i1> @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %x0, i32 4) -  %1 = and <4 x i1> %res1, %res -  %2 = shufflevector <4 x i1> %1, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> -  %3 = bitcast <8 x i1> %2 to i8 -  ret i8 %3 +  %res = call i8 @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %x0, i32 2, i8 -1) +  %res1 = call i8 @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %x0, i32 4, i8 %res) +  ret i8 %res1  } -declare <8 x i1> @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float>, i32) +declare i8 @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float>, i32, i8)  define i8 @test_int_x86_avx512_mask_fpclass_ps_256(<8 x float> %x0) {  ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_256: @@ -763,14 +760,12 @@ define i8 @test_int_x86_avx512_mask_fpclass_ps_256(<8 x float> %x0) {  ; CHECK-NEXT:    # kill: def $al killed $al killed $eax  ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]  ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3] -  %res = call <8 x i1> @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %x0, i32 2) -  %res1 = call <8 x i1> @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %x0, i32 4) -  %1 = and <8 x i1> %res1, %res -  %2 = bitcast <8 x i1> %1 to i8 -  ret i8 %2 +  %res = call i8 @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %x0, i32 2, i8 -1) +  %res1 = call i8 @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %x0, i32 4, i8 %res) +  ret i8 %res1  } -declare <2 x i1> @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double>, i32) +declare i8 @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double>, i32, i8)  define i8 @test_int_x86_avx512_mask_fpclass_pd_128(<2 x double> %x0) {  ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_128: @@ -780,15 +775,12 @@ define i8 @test_int_x86_avx512_mask_fpclass_pd_128(<2 x double> %x0) {  ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]  ; CHECK-NEXT:    # kill: def $al killed $al killed $eax  ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3] -  %res = call <2 x i1> @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %x0, i32 4) -  %res1 = call <2 x i1> @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %x0, i32 2) -  %1 = and <2 x i1> %res1, %res -  %2 = shufflevector <2 x i1> %1, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> -  %3 = bitcast <8 x i1> %2 to i8 -  ret i8 %3 +  %res =  call i8 @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %x0, i32 4, i8 -1) +  %res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %x0, i32 2, i8 %res) +  ret i8 %res1  } -declare <4 x i1> @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double>, i32) +declare i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double>, i32, i8)  define i8 @test_int_x86_avx512_mask_fpclass_pd_256(<4 x double> %x0) {  ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_256: @@ -799,10 +791,7 @@ define i8 @test_int_x86_avx512_mask_fpclass_pd_256(<4 x double> %x0) {  ; CHECK-NEXT:    # kill: def $al killed $al killed $eax  ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]  ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3] -  %res = call <4 x i1> @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %x0, i32 2) -  %res1 = call <4 x i1> @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %x0, i32 4) -  %1 = and <4 x i1> %res1, %res -  %2 = shufflevector <4 x i1> %1, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> -  %3 = bitcast <8 x i1> %2 to i8 -  ret i8 %3 +  %res = call i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %x0, i32 2, i8 -1) +  %res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %x0, i32 4, i8 %res) +  ret i8 %res1  }  | 

