diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 46 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 26 | ||||
| -rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 122 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vec_floor.ll | 112 | ||||
| -rw-r--r-- | llvm/test/Transforms/InstCombine/X86/x86-avx.ll | 41 | ||||
| -rw-r--r-- | llvm/test/Transforms/InstCombine/X86/x86-avx512.ll | 207 | ||||
| -rw-r--r-- | llvm/test/Transforms/InstCombine/X86/x86-sse41.ll | 44 | 
7 files changed, 72 insertions, 526 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 73abd964aa1..7c7c27340cd 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -9392,32 +9392,6 @@ defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd,                              (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info,                              fp64imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>; -multiclass avx512_masked_scalar_imm<SDNode OpNode, string OpcPrefix, SDNode Move, -                                    X86VectorVTInfo _, PatLeaf ZeroFP, -                                    bits<8> ImmV, Predicate BasePredicate> { -  let Predicates = [BasePredicate] in { -    def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects VK1WM:$mask, -               (OpNode (extractelt _.VT:$src2, (iPTR 0))), -               (extractelt _.VT:$dst, (iPTR 0))))), -              (!cast<Instruction>("V"#OpcPrefix#Zr_Intk) -               _.VT:$dst, VK1WM:$mask, _.VT:$src1, _.VT:$src2, (i32 ImmV))>; - -    def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects VK1WM:$mask, -               (OpNode (extractelt _.VT:$src2, (iPTR 0))), ZeroFP))), -              (!cast<Instruction>("V"#OpcPrefix#Zr_Intkz) -               VK1WM:$mask, _.VT:$src1, _.VT:$src2, (i32 ImmV))>; -  } -} - -defm : avx512_masked_scalar_imm<ffloor, "RNDSCALESS", X86Movss, -                                v4f32x_info, fp32imm0, 0x09, HasAVX512>; -defm : avx512_masked_scalar_imm<fceil, "RNDSCALESS", X86Movss, -                                v4f32x_info, fp32imm0, 0x0A, HasAVX512>; -defm : avx512_masked_scalar_imm<ffloor, "RNDSCALESD", X86Movsd, -                                v2f64x_info, fp64imm0, 0x09, HasAVX512>; -defm : avx512_masked_scalar_imm<fceil, "RNDSCALESD", X86Movsd, -                                v2f64x_info, fp64imm0, 0x0A,  HasAVX512>; -  //-------------------------------------------------  // Integer truncate and extend operations @@ -12293,26 +12267,6 @@ multiclass AVX512_scalar_unary_math_patterns<SDNode OpNode, string OpcPrefix,  defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSS", X86Movss, v4f32x_info>;  defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSD", X86Movsd, v2f64x_info>; -multiclass AVX512_scalar_unary_math_imm_patterns<SDNode OpNode, string OpcPrefix, -                                                 SDNode Move, X86VectorVTInfo _, -                                                 bits<8> ImmV> { -  let Predicates = [HasAVX512] in { -    def : Pat<(_.VT (Move _.VT:$dst, -                     (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))), -              (!cast<Instruction>("V"#OpcPrefix#Zr_Int) _.VT:$dst, _.VT:$src, -                                                        (i32 ImmV))>; -  } -} - -defm : AVX512_scalar_unary_math_imm_patterns<ffloor, "RNDSCALESS", X86Movss, -                                             v4f32x_info, 0x01>; -defm : AVX512_scalar_unary_math_imm_patterns<fceil, "RNDSCALESS", X86Movss, -                                             v4f32x_info, 0x02>; -defm : AVX512_scalar_unary_math_imm_patterns<ffloor, "RNDSCALESD", X86Movsd, -                                             v2f64x_info, 0x01>; -defm : AVX512_scalar_unary_math_imm_patterns<fceil, "RNDSCALESD", X86Movsd, -                                             v2f64x_info, 0x02>; -  //===----------------------------------------------------------------------===//  // AES instructions  //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 737296d9714..18d9af8bdcd 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -3099,23 +3099,6 @@ multiclass scalar_unary_math_patterns<SDNode OpNode, string OpcPrefix, SDNode Mo    }  } -multiclass scalar_unary_math_imm_patterns<SDNode OpNode, string OpcPrefix, SDNode Move, -                                          ValueType VT, bits<8> ImmV, -                                          Predicate BasePredicate> { -  let Predicates = [BasePredicate] in { -    def : Pat<(VT (Move VT:$dst, (scalar_to_vector -                                  (OpNode (extractelt VT:$src, 0))))), -              (!cast<Instruction>(OpcPrefix#r_Int) VT:$dst, VT:$src, (i32 ImmV))>; -  } - -  // Repeat for AVX versions of the instructions. -  let Predicates = [UseAVX] in { -    def : Pat<(VT (Move VT:$dst, (scalar_to_vector -                                  (OpNode (extractelt VT:$src, 0))))), -              (!cast<Instruction>("V"#OpcPrefix#r_Int) VT:$dst, VT:$src, (i32 ImmV))>; -  } -} -  defm : scalar_unary_math_patterns<fsqrt, "SQRTSS", X86Movss, v4f32, UseSSE1>;  defm : scalar_unary_math_patterns<fsqrt, "SQRTSD", X86Movsd, v2f64, UseSSE2>; @@ -5984,15 +5967,6 @@ let Predicates = [UseSSE41] in {              (ROUNDPDm addr:$src, (i32 0xB))>;  } -defm : scalar_unary_math_imm_patterns<ffloor, "ROUNDSS", X86Movss, -                                      v4f32, 0x01, UseSSE41>; -defm : scalar_unary_math_imm_patterns<fceil, "ROUNDSS", X86Movss, -                                      v4f32, 0x02, UseSSE41>; -defm : scalar_unary_math_imm_patterns<ffloor, "ROUNDSD", X86Movsd, -                                      v2f64, 0x01, UseSSE41>; -defm : scalar_unary_math_imm_patterns<fceil, "ROUNDSD", X86Movsd, -                                      v2f64, 0x02, UseSSE41>; -  //===----------------------------------------------------------------------===//  // SSE4.1 - Packed Bit Test  //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 2013de065a0..8d022617d85 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -607,105 +607,6 @@ static Value *simplifyX86pack(IntrinsicInst &II,    return Builder.CreateTrunc(Shuffle, ResTy);  } -// Replace X86-specific intrinsics with generic floor-ceil where applicable. -static Value *simplifyX86round(IntrinsicInst &II, -                               InstCombiner::BuilderTy &Builder) { -  ConstantInt *Arg = nullptr; -  Intrinsic::ID IntrinsicID = II.getIntrinsicID(); - -  if (IntrinsicID == Intrinsic::x86_sse41_round_ss || -      IntrinsicID == Intrinsic::x86_sse41_round_sd) -    Arg = dyn_cast<ConstantInt>(II.getArgOperand(2)); -  else if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ss || -           IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_sd) -    Arg = dyn_cast<ConstantInt>(II.getArgOperand(4)); -  else -    Arg = dyn_cast<ConstantInt>(II.getArgOperand(1)); -  if (!Arg) -    return nullptr; -  unsigned RoundControl = Arg->getZExtValue(); - -  Arg = nullptr; -  unsigned SAE = 0; -  if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ps_512 || -      IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_pd_512) -    Arg = dyn_cast<ConstantInt>(II.getArgOperand(4)); -  else if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ss || -           IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_sd) -    Arg = dyn_cast<ConstantInt>(II.getArgOperand(5)); -  else -    SAE = 4; -  if (!SAE) { -    if (!Arg) -      return nullptr; -    SAE = Arg->getZExtValue(); -  } - -  if (SAE != 4 || (RoundControl != 2 /*ceil*/ && RoundControl != 1 /*floor*/)) -    return nullptr; - -  Value *Src, *Dst, *Mask; -  bool IsScalar = false; -  if (IntrinsicID == Intrinsic::x86_sse41_round_ss || -      IntrinsicID == Intrinsic::x86_sse41_round_sd || -      IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ss || -      IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_sd) { -    IsScalar = true; -    if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ss || -        IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_sd) { -      Mask = II.getArgOperand(3); -      Value *Zero = Constant::getNullValue(Mask->getType()); -      Mask = Builder.CreateAnd(Mask, 1); -      Mask = Builder.CreateICmp(ICmpInst::ICMP_NE, Mask, Zero); -      Dst = II.getArgOperand(2); -    } else -      Dst = II.getArgOperand(0); -    Src = Builder.CreateExtractElement(II.getArgOperand(1), (uint64_t)0); -  } else { -    Src = II.getArgOperand(0); -    if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ps_128 || -        IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ps_256 || -        IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ps_512 || -        IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_pd_128 || -        IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_pd_256 || -        IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_pd_512) { -      Dst = II.getArgOperand(2); -      Mask = II.getArgOperand(3); -    } else { -      Dst = Src; -      Mask = ConstantInt::getAllOnesValue( -          Builder.getIntNTy(Src->getType()->getVectorNumElements())); -    } -  } - -  Intrinsic::ID ID = (RoundControl == 2) ? Intrinsic::ceil : Intrinsic::floor; -  Value *Res = Builder.CreateUnaryIntrinsic(ID, Src, &II); -  if (!IsScalar) { -    if (auto *C = dyn_cast<Constant>(Mask)) -      if (C->isAllOnesValue()) -        return Res; -    auto *MaskTy = VectorType::get( -        Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth()); -    Mask = Builder.CreateBitCast(Mask, MaskTy); -    unsigned Width = Src->getType()->getVectorNumElements(); -    if (MaskTy->getVectorNumElements() > Width) { -      uint32_t Indices[4]; -      for (unsigned i = 0; i != Width; ++i) -        Indices[i] = i; -      Mask = Builder.CreateShuffleVector(Mask, Mask, -                                         makeArrayRef(Indices, Width)); -    } -    return Builder.CreateSelect(Mask, Res, Dst); -  } -  if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ss || -      IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_sd) { -    Dst = Builder.CreateExtractElement(Dst, (uint64_t)0); -    Res = Builder.CreateSelect(Mask, Res, Dst); -    Dst = II.getArgOperand(0); -  } -  return Builder.CreateInsertElement(Dst, Res, (uint64_t)0); -} -  static Value *simplifyX86movmsk(const IntrinsicInst &II,                                  InstCombiner::BuilderTy &Builder) {    Value *Arg = II.getArgOperand(0); @@ -2603,22 +2504,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {      break;    } -  case Intrinsic::x86_sse41_round_ps: -  case Intrinsic::x86_sse41_round_pd: -  case Intrinsic::x86_avx_round_ps_256: -  case Intrinsic::x86_avx_round_pd_256: -  case Intrinsic::x86_avx512_mask_rndscale_ps_128: -  case Intrinsic::x86_avx512_mask_rndscale_ps_256: -  case Intrinsic::x86_avx512_mask_rndscale_ps_512: -  case Intrinsic::x86_avx512_mask_rndscale_pd_128: -  case Intrinsic::x86_avx512_mask_rndscale_pd_256: -  case Intrinsic::x86_avx512_mask_rndscale_pd_512: -  case Intrinsic::x86_avx512_mask_rndscale_ss: -  case Intrinsic::x86_avx512_mask_rndscale_sd: -    if (Value *V = simplifyX86round(*II, Builder)) -      return replaceInstUsesWith(*II, V); -    break; -    case Intrinsic::x86_mmx_pmovmskb:    case Intrinsic::x86_sse_movmsk_ps:    case Intrinsic::x86_sse2_movmsk_pd: @@ -2812,13 +2697,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {      }      break; -  case Intrinsic::x86_sse41_round_ss: -  case Intrinsic::x86_sse41_round_sd: { -    if (Value *V = simplifyX86round(*II, Builder)) -      return replaceInstUsesWith(*II, V); -    break; -  } -    // Constant fold ashr( <A x Bi>, Ci ).    // Constant fold lshr( <A x Bi>, Ci ).    // Constant fold shl( <A x Bi>, Ci ). diff --git a/llvm/test/CodeGen/X86/vec_floor.ll b/llvm/test/CodeGen/X86/vec_floor.ll index 448c5efce17..a0d5ecd5c5d 100644 --- a/llvm/test/CodeGen/X86/vec_floor.ll +++ b/llvm/test/CodeGen/X86/vec_floor.ll @@ -821,18 +821,20 @@ define <4 x float> @const_trunc_v4f32() {  define <4 x float> @floor_ss(<4 x float> %x, <4 x float> %y) nounwind {  ; SSE41-LABEL: floor_ss:  ; SSE41:       ## %bb.0: -; SSE41-NEXT:    roundss $1, %xmm0, %xmm1 -; SSE41-NEXT:    movaps %xmm1, %xmm0 +; SSE41-NEXT:    roundss $9, %xmm0, %xmm0 +; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]  ; SSE41-NEXT:    retq  ;  ; AVX-LABEL: floor_ss:  ; AVX:       ## %bb.0: -; AVX-NEXT:    vroundss $1, %xmm0, %xmm1, %xmm0 +; AVX-NEXT:    vroundss $9, %xmm0, %xmm0, %xmm0 +; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]  ; AVX-NEXT:    retq  ;  ; AVX512-LABEL: floor_ss:  ; AVX512:       ## %bb.0: -; AVX512-NEXT:    vroundss $1, %xmm0, %xmm1, %xmm0 +; AVX512-NEXT:    vroundss $9, %xmm0, %xmm0, %xmm0 +; AVX512-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]  ; AVX512-NEXT:    retq    %s = extractelement <4 x float> %x, i32 0    %call = call float @llvm.floor.f32(float %s) @@ -844,18 +846,20 @@ declare float @llvm.floor.f32(float %s)  define <2 x double> @floor_sd(<2 x double> %x, <2 x double> %y) nounwind {  ; SSE41-LABEL: floor_sd:  ; SSE41:       ## %bb.0: -; SSE41-NEXT:    roundsd $1, %xmm0, %xmm1 -; SSE41-NEXT:    movapd %xmm1, %xmm0 +; SSE41-NEXT:    roundsd $9, %xmm0, %xmm0 +; SSE41-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]  ; SSE41-NEXT:    retq  ;  ; AVX-LABEL: floor_sd:  ; AVX:       ## %bb.0: -; AVX-NEXT:    vroundsd $1, %xmm0, %xmm1, %xmm0 +; AVX-NEXT:    vroundsd $9, %xmm0, %xmm0, %xmm0 +; AVX-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]  ; AVX-NEXT:    retq  ;  ; AVX512-LABEL: floor_sd:  ; AVX512:       ## %bb.0: -; AVX512-NEXT:    vroundsd $1, %xmm0, %xmm1, %xmm0 +; AVX512-NEXT:    vroundsd $9, %xmm0, %xmm0, %xmm0 +; AVX512-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]  ; AVX512-NEXT:    retq    %s = extractelement <2 x double> %x, i32 0    %call = call double @llvm.floor.f64(double %s) @@ -1373,8 +1377,9 @@ define <4 x float> @floor_mask_ss(<4 x float> %x, <4 x float> %y, <4 x float> %w  ;  ; AVX512-LABEL: floor_mask_ss:  ; AVX512:       ## %bb.0: +; AVX512-NEXT:    vroundss $9, %xmm0, %xmm0, %xmm0  ; AVX512-NEXT:    kmovw %edi, %k1 -; AVX512-NEXT:    vrndscaless $9, %xmm0, %xmm1, %xmm2 {%k1} +; AVX512-NEXT:    vmovss %xmm0, %xmm1, %xmm2 {%k1}  ; AVX512-NEXT:    vmovaps %xmm2, %xmm0  ; AVX512-NEXT:    retq    %mask = and i8 %k, 1 @@ -1414,8 +1419,9 @@ define <4 x float> @floor_maskz_ss(<4 x float> %x, <4 x float> %y, i8 %k) nounwi  ;  ; AVX512-LABEL: floor_maskz_ss:  ; AVX512:       ## %bb.0: +; AVX512-NEXT:    vroundss $9, %xmm0, %xmm0, %xmm0  ; AVX512-NEXT:    kmovw %edi, %k1 -; AVX512-NEXT:    vrndscaless $9, %xmm0, %xmm1, %xmm0 {%k1} {z} +; AVX512-NEXT:    vmovss %xmm0, %xmm1, %xmm0 {%k1} {z}  ; AVX512-NEXT:    retq    %mask = and i8 %k, 1    %nmask = icmp eq i8 %mask, 0 @@ -1451,8 +1457,9 @@ define <2 x double> @floor_mask_sd(<2 x double> %x, <2 x double> %y, <2 x double  ;  ; AVX512-LABEL: floor_mask_sd:  ; AVX512:       ## %bb.0: +; AVX512-NEXT:    vroundsd $9, %xmm0, %xmm0, %xmm0  ; AVX512-NEXT:    kmovw %edi, %k1 -; AVX512-NEXT:    vrndscalesd $9, %xmm0, %xmm1, %xmm2 {%k1} +; AVX512-NEXT:    vmovsd %xmm0, %xmm1, %xmm2 {%k1}  ; AVX512-NEXT:    vmovapd %xmm2, %xmm0  ; AVX512-NEXT:    retq    %mask = and i8 %k, 1 @@ -1492,8 +1499,9 @@ define <2 x double> @floor_maskz_sd(<2 x double> %x, <2 x double> %y, i8 %k) nou  ;  ; AVX512-LABEL: floor_maskz_sd:  ; AVX512:       ## %bb.0: +; AVX512-NEXT:    vroundsd $9, %xmm0, %xmm0, %xmm0  ; AVX512-NEXT:    kmovw %edi, %k1 -; AVX512-NEXT:    vrndscalesd $9, %xmm0, %xmm1, %xmm0 {%k1} {z} +; AVX512-NEXT:    vmovsd %xmm0, %xmm1, %xmm0 {%k1} {z}  ; AVX512-NEXT:    retq    %mask = and i8 %k, 1    %nmask = icmp eq i8 %mask, 0 @@ -1529,8 +1537,9 @@ define <4 x float> @floor_mask_ss_trunc(<4 x float> %x, <4 x float> %y, <4 x flo  ;  ; AVX512-LABEL: floor_mask_ss_trunc:  ; AVX512:       ## %bb.0: +; AVX512-NEXT:    vroundss $9, %xmm0, %xmm0, %xmm0  ; AVX512-NEXT:    kmovw %edi, %k1 -; AVX512-NEXT:    vrndscaless $9, %xmm0, %xmm1, %xmm2 {%k1} +; AVX512-NEXT:    vmovss %xmm0, %xmm1, %xmm2 {%k1}  ; AVX512-NEXT:    vmovaps %xmm2, %xmm0  ; AVX512-NEXT:    retq    %mask = trunc i16 %k to i1 @@ -1572,8 +1581,9 @@ define <4 x float> @floor_maskz_ss_trunc(<4 x float> %x, <4 x float> %y, i16 %k)  ;  ; AVX512-LABEL: floor_maskz_ss_trunc:  ; AVX512:       ## %bb.0: +; AVX512-NEXT:    vroundss $9, %xmm0, %xmm0, %xmm0  ; AVX512-NEXT:    kmovw %edi, %k1 -; AVX512-NEXT:    vrndscaless $9, %xmm0, %xmm1, %xmm0 {%k1} {z} +; AVX512-NEXT:    vmovss %xmm0, %xmm1, %xmm0 {%k1} {z}  ; AVX512-NEXT:    retq    %mask = trunc i16 %k to i1    %s = extractelement <4 x float> %x, i64 0 @@ -1608,8 +1618,9 @@ define <2 x double> @floor_mask_sd_trunc(<2 x double> %x, <2 x double> %y, <2 x  ;  ; AVX512-LABEL: floor_mask_sd_trunc:  ; AVX512:       ## %bb.0: +; AVX512-NEXT:    vroundsd $9, %xmm0, %xmm0, %xmm0  ; AVX512-NEXT:    kmovw %edi, %k1 -; AVX512-NEXT:    vrndscalesd $9, %xmm0, %xmm1, %xmm2 {%k1} +; AVX512-NEXT:    vmovsd %xmm0, %xmm1, %xmm2 {%k1}  ; AVX512-NEXT:    vmovapd %xmm2, %xmm0  ; AVX512-NEXT:    retq    %mask = trunc i16 %k to i1 @@ -1651,8 +1662,9 @@ define <2 x double> @floor_maskz_sd_trunc(<2 x double> %x, <2 x double> %y, i16  ;  ; AVX512-LABEL: floor_maskz_sd_trunc:  ; AVX512:       ## %bb.0: +; AVX512-NEXT:    vroundsd $9, %xmm0, %xmm0, %xmm0  ; AVX512-NEXT:    kmovw %edi, %k1 -; AVX512-NEXT:    vrndscalesd $9, %xmm0, %xmm1, %xmm0 {%k1} {z} +; AVX512-NEXT:    vmovsd %xmm0, %xmm1, %xmm0 {%k1} {z}  ; AVX512-NEXT:    retq    %mask = trunc i16 %k to i1    %s = extractelement <2 x double> %x, i64 0 @@ -1683,8 +1695,9 @@ define <4 x float> @floor_mask_ss_mask8(<4 x float> %x, <4 x float> %y, <4 x flo  ;  ; AVX512-LABEL: floor_mask_ss_mask8:  ; AVX512:       ## %bb.0: +; AVX512-NEXT:    vroundss $9, %xmm0, %xmm0, %xmm3  ; AVX512-NEXT:    vcmpeqss %xmm1, %xmm0, %k1 -; AVX512-NEXT:    vrndscaless $9, %xmm0, %xmm1, %xmm2 {%k1} +; AVX512-NEXT:    vmovss %xmm3, %xmm1, %xmm2 {%k1}  ; AVX512-NEXT:    vmovaps %xmm2, %xmm0  ; AVX512-NEXT:    retq    %mask1 = fcmp oeq <4 x float> %x, %y @@ -1716,8 +1729,9 @@ define <4 x float> @floor_maskz_ss_mask8(<4 x float> %x, <4 x float> %y) nounwin  ;  ; AVX512-LABEL: floor_maskz_ss_mask8:  ; AVX512:       ## %bb.0: +; AVX512-NEXT:    vroundss $9, %xmm0, %xmm0, %xmm2  ; AVX512-NEXT:    vcmpeqss %xmm1, %xmm0, %k1 -; AVX512-NEXT:    vrndscaless $9, %xmm0, %xmm1, %xmm0 {%k1} {z} +; AVX512-NEXT:    vmovss %xmm2, %xmm1, %xmm0 {%k1} {z}  ; AVX512-NEXT:    retq    %mask1 = fcmp oeq <4 x float> %x, %y    %mask = extractelement <4 x i1> %mask1, i64 0 @@ -1749,8 +1763,9 @@ define <2 x double> @floor_mask_sd_mask8(<2 x double> %x, <2 x double> %y, <2 x  ;  ; AVX512-LABEL: floor_mask_sd_mask8:  ; AVX512:       ## %bb.0: +; AVX512-NEXT:    vroundsd $9, %xmm0, %xmm0, %xmm3  ; AVX512-NEXT:    vcmpeqsd %xmm1, %xmm0, %k1 -; AVX512-NEXT:    vrndscalesd $9, %xmm0, %xmm1, %xmm2 {%k1} +; AVX512-NEXT:    vmovsd %xmm3, %xmm1, %xmm2 {%k1}  ; AVX512-NEXT:    vmovapd %xmm2, %xmm0  ; AVX512-NEXT:    retq    %mask1 = fcmp oeq <2 x double> %x, %y @@ -1782,8 +1797,9 @@ define <2 x double> @floor_maskz_sd_mask8(<2 x double> %x, <2 x double> %y) noun  ;  ; AVX512-LABEL: floor_maskz_sd_mask8:  ; AVX512:       ## %bb.0: +; AVX512-NEXT:    vroundsd $9, %xmm0, %xmm0, %xmm2  ; AVX512-NEXT:    vcmpeqsd %xmm1, %xmm0, %k1 -; AVX512-NEXT:    vrndscalesd $9, %xmm0, %xmm1, %xmm0 {%k1} {z} +; AVX512-NEXT:    vmovsd %xmm2, %xmm1, %xmm0 {%k1} {z}  ; AVX512-NEXT:    retq    %mask1 = fcmp oeq <2 x double> %x, %y    %mask = extractelement <2 x i1> %mask1, i64 0 @@ -1797,18 +1813,20 @@ define <2 x double> @floor_maskz_sd_mask8(<2 x double> %x, <2 x double> %y) noun  define <4 x float> @ceil_ss(<4 x float> %x, <4 x float> %y) nounwind {  ; SSE41-LABEL: ceil_ss:  ; SSE41:       ## %bb.0: -; SSE41-NEXT:    roundss $2, %xmm0, %xmm1 -; SSE41-NEXT:    movaps %xmm1, %xmm0 +; SSE41-NEXT:    roundss $10, %xmm0, %xmm0 +; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]  ; SSE41-NEXT:    retq  ;  ; AVX-LABEL: ceil_ss:  ; AVX:       ## %bb.0: -; AVX-NEXT:    vroundss $2, %xmm0, %xmm1, %xmm0 +; AVX-NEXT:    vroundss $10, %xmm0, %xmm0, %xmm0 +; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]  ; AVX-NEXT:    retq  ;  ; AVX512-LABEL: ceil_ss:  ; AVX512:       ## %bb.0: -; AVX512-NEXT:    vroundss $2, %xmm0, %xmm1, %xmm0 +; AVX512-NEXT:    vroundss $10, %xmm0, %xmm0, %xmm0 +; AVX512-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]  ; AVX512-NEXT:    retq    %s = extractelement <4 x float> %x, i32 0    %call = call float @llvm.ceil.f32(float %s) @@ -1820,18 +1838,20 @@ declare float @llvm.ceil.f32(float %s)  define <2 x double> @ceil_sd(<2 x double> %x, <2 x double> %y) nounwind {  ; SSE41-LABEL: ceil_sd:  ; SSE41:       ## %bb.0: -; SSE41-NEXT:    roundsd $2, %xmm0, %xmm1 -; SSE41-NEXT:    movapd %xmm1, %xmm0 +; SSE41-NEXT:    roundsd $10, %xmm0, %xmm0 +; SSE41-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]  ; SSE41-NEXT:    retq  ;  ; AVX-LABEL: ceil_sd:  ; AVX:       ## %bb.0: -; AVX-NEXT:    vroundsd $2, %xmm0, %xmm1, %xmm0 +; AVX-NEXT:    vroundsd $10, %xmm0, %xmm0, %xmm0 +; AVX-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]  ; AVX-NEXT:    retq  ;  ; AVX512-LABEL: ceil_sd:  ; AVX512:       ## %bb.0: -; AVX512-NEXT:    vroundsd $2, %xmm0, %xmm1, %xmm0 +; AVX512-NEXT:    vroundsd $10, %xmm0, %xmm0, %xmm0 +; AVX512-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]  ; AVX512-NEXT:    retq    %s = extractelement <2 x double> %x, i32 0    %call = call double @llvm.ceil.f64(double %s) @@ -2349,8 +2369,9 @@ define <4 x float> @ceil_mask_ss(<4 x float> %x, <4 x float> %y, <4 x float> %w,  ;  ; AVX512-LABEL: ceil_mask_ss:  ; AVX512:       ## %bb.0: +; AVX512-NEXT:    vroundss $10, %xmm0, %xmm0, %xmm0  ; AVX512-NEXT:    kmovw %edi, %k1 -; AVX512-NEXT:    vrndscaless $10, %xmm0, %xmm1, %xmm2 {%k1} +; AVX512-NEXT:    vmovss %xmm0, %xmm1, %xmm2 {%k1}  ; AVX512-NEXT:    vmovaps %xmm2, %xmm0  ; AVX512-NEXT:    retq    %mask = and i8 %k, 1 @@ -2390,8 +2411,9 @@ define <4 x float> @ceil_maskz_ss(<4 x float> %x, <4 x float> %y, i8 %k) nounwin  ;  ; AVX512-LABEL: ceil_maskz_ss:  ; AVX512:       ## %bb.0: +; AVX512-NEXT:    vroundss $10, %xmm0, %xmm0, %xmm0  ; AVX512-NEXT:    kmovw %edi, %k1 -; AVX512-NEXT:    vrndscaless $10, %xmm0, %xmm1, %xmm0 {%k1} {z} +; AVX512-NEXT:    vmovss %xmm0, %xmm1, %xmm0 {%k1} {z}  ; AVX512-NEXT:    retq    %mask = and i8 %k, 1    %nmask = icmp eq i8 %mask, 0 @@ -2427,8 +2449,9 @@ define <2 x double> @ceil_mask_sd(<2 x double> %x, <2 x double> %y, <2 x double>  ;  ; AVX512-LABEL: ceil_mask_sd:  ; AVX512:       ## %bb.0: +; AVX512-NEXT:    vroundsd $10, %xmm0, %xmm0, %xmm0  ; AVX512-NEXT:    kmovw %edi, %k1 -; AVX512-NEXT:    vrndscalesd $10, %xmm0, %xmm1, %xmm2 {%k1} +; AVX512-NEXT:    vmovsd %xmm0, %xmm1, %xmm2 {%k1}  ; AVX512-NEXT:    vmovapd %xmm2, %xmm0  ; AVX512-NEXT:    retq    %mask = and i8 %k, 1 @@ -2468,8 +2491,9 @@ define <2 x double> @ceil_maskz_sd(<2 x double> %x, <2 x double> %y, i8 %k) noun  ;  ; AVX512-LABEL: ceil_maskz_sd:  ; AVX512:       ## %bb.0: +; AVX512-NEXT:    vroundsd $10, %xmm0, %xmm0, %xmm0  ; AVX512-NEXT:    kmovw %edi, %k1 -; AVX512-NEXT:    vrndscalesd $10, %xmm0, %xmm1, %xmm0 {%k1} {z} +; AVX512-NEXT:    vmovsd %xmm0, %xmm1, %xmm0 {%k1} {z}  ; AVX512-NEXT:    retq    %mask = and i8 %k, 1    %nmask = icmp eq i8 %mask, 0 @@ -2505,8 +2529,9 @@ define <4 x float> @ceil_mask_ss_trunc(<4 x float> %x, <4 x float> %y, <4 x floa  ;  ; AVX512-LABEL: ceil_mask_ss_trunc:  ; AVX512:       ## %bb.0: +; AVX512-NEXT:    vroundss $10, %xmm0, %xmm0, %xmm0  ; AVX512-NEXT:    kmovw %edi, %k1 -; AVX512-NEXT:    vrndscaless $10, %xmm0, %xmm1, %xmm2 {%k1} +; AVX512-NEXT:    vmovss %xmm0, %xmm1, %xmm2 {%k1}  ; AVX512-NEXT:    vmovaps %xmm2, %xmm0  ; AVX512-NEXT:    retq    %mask = trunc i16 %k to i1 @@ -2548,8 +2573,9 @@ define <4 x float> @ceil_maskz_ss_trunc(<4 x float> %x, <4 x float> %y, i16 %k)  ;  ; AVX512-LABEL: ceil_maskz_ss_trunc:  ; AVX512:       ## %bb.0: +; AVX512-NEXT:    vroundss $10, %xmm0, %xmm0, %xmm0  ; AVX512-NEXT:    kmovw %edi, %k1 -; AVX512-NEXT:    vrndscaless $10, %xmm0, %xmm1, %xmm0 {%k1} {z} +; AVX512-NEXT:    vmovss %xmm0, %xmm1, %xmm0 {%k1} {z}  ; AVX512-NEXT:    retq    %mask = trunc i16 %k to i1    %s = extractelement <4 x float> %x, i64 0 @@ -2584,8 +2610,9 @@ define <2 x double> @ceil_mask_sd_trunc(<2 x double> %x, <2 x double> %y, <2 x d  ;  ; AVX512-LABEL: ceil_mask_sd_trunc:  ; AVX512:       ## %bb.0: +; AVX512-NEXT:    vroundsd $10, %xmm0, %xmm0, %xmm0  ; AVX512-NEXT:    kmovw %edi, %k1 -; AVX512-NEXT:    vrndscalesd $10, %xmm0, %xmm1, %xmm2 {%k1} +; AVX512-NEXT:    vmovsd %xmm0, %xmm1, %xmm2 {%k1}  ; AVX512-NEXT:    vmovapd %xmm2, %xmm0  ; AVX512-NEXT:    retq    %mask = trunc i16 %k to i1 @@ -2627,8 +2654,9 @@ define <2 x double> @ceil_maskz_sd_trunc(<2 x double> %x, <2 x double> %y, i16 %  ;  ; AVX512-LABEL: ceil_maskz_sd_trunc:  ; AVX512:       ## %bb.0: +; AVX512-NEXT:    vroundsd $10, %xmm0, %xmm0, %xmm0  ; AVX512-NEXT:    kmovw %edi, %k1 -; AVX512-NEXT:    vrndscalesd $10, %xmm0, %xmm1, %xmm0 {%k1} {z} +; AVX512-NEXT:    vmovsd %xmm0, %xmm1, %xmm0 {%k1} {z}  ; AVX512-NEXT:    retq    %mask = trunc i16 %k to i1    %s = extractelement <2 x double> %x, i64 0 @@ -2659,8 +2687,9 @@ define <4 x float> @ceil_mask_ss_mask8(<4 x float> %x, <4 x float> %y, <4 x floa  ;  ; AVX512-LABEL: ceil_mask_ss_mask8:  ; AVX512:       ## %bb.0: +; AVX512-NEXT:    vroundss $10, %xmm0, %xmm0, %xmm3  ; AVX512-NEXT:    vcmpeqss %xmm1, %xmm0, %k1 -; AVX512-NEXT:    vrndscaless $10, %xmm0, %xmm1, %xmm2 {%k1} +; AVX512-NEXT:    vmovss %xmm3, %xmm1, %xmm2 {%k1}  ; AVX512-NEXT:    vmovaps %xmm2, %xmm0  ; AVX512-NEXT:    retq    %mask1 = fcmp oeq <4 x float> %x, %y @@ -2692,8 +2721,9 @@ define <4 x float> @ceil_maskz_ss_mask8(<4 x float> %x, <4 x float> %y) nounwind  ;  ; AVX512-LABEL: ceil_maskz_ss_mask8:  ; AVX512:       ## %bb.0: +; AVX512-NEXT:    vroundss $10, %xmm0, %xmm0, %xmm2  ; AVX512-NEXT:    vcmpeqss %xmm1, %xmm0, %k1 -; AVX512-NEXT:    vrndscaless $10, %xmm0, %xmm1, %xmm0 {%k1} {z} +; AVX512-NEXT:    vmovss %xmm2, %xmm1, %xmm0 {%k1} {z}  ; AVX512-NEXT:    retq    %mask1 = fcmp oeq <4 x float> %x, %y    %mask = extractelement <4 x i1> %mask1, i64 0 @@ -2725,8 +2755,9 @@ define <2 x double> @ceil_mask_sd_mask8(<2 x double> %x, <2 x double> %y, <2 x d  ;  ; AVX512-LABEL: ceil_mask_sd_mask8:  ; AVX512:       ## %bb.0: +; AVX512-NEXT:    vroundsd $10, %xmm0, %xmm0, %xmm3  ; AVX512-NEXT:    vcmpeqsd %xmm1, %xmm0, %k1 -; AVX512-NEXT:    vrndscalesd $10, %xmm0, %xmm1, %xmm2 {%k1} +; AVX512-NEXT:    vmovsd %xmm3, %xmm1, %xmm2 {%k1}  ; AVX512-NEXT:    vmovapd %xmm2, %xmm0  ; AVX512-NEXT:    retq    %mask1 = fcmp oeq <2 x double> %x, %y @@ -2758,8 +2789,9 @@ define <2 x double> @ceil_maskz_sd_mask8(<2 x double> %x, <2 x double> %y) nounw  ;  ; AVX512-LABEL: ceil_maskz_sd_mask8:  ; AVX512:       ## %bb.0: +; AVX512-NEXT:    vroundsd $10, %xmm0, %xmm0, %xmm2  ; AVX512-NEXT:    vcmpeqsd %xmm1, %xmm0, %k1 -; AVX512-NEXT:    vrndscalesd $10, %xmm0, %xmm1, %xmm0 {%k1} {z} +; AVX512-NEXT:    vmovsd %xmm2, %xmm1, %xmm0 {%k1} {z}  ; AVX512-NEXT:    retq    %mask1 = fcmp oeq <2 x double> %x, %y    %mask = extractelement <2 x i1> %mask1, i64 0 diff --git a/llvm/test/Transforms/InstCombine/X86/x86-avx.ll b/llvm/test/Transforms/InstCombine/X86/x86-avx.ll deleted file mode 100644 index bad27d1e0c4..00000000000 --- a/llvm/test/Transforms/InstCombine/X86/x86-avx.ll +++ /dev/null @@ -1,41 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -instcombine -S | FileCheck %s - -declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) -declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) - -define <8 x float> @test_round_ps_floor(<8 x float> %a) { -; CHECK-LABEL: @test_round_ps_floor( -; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x float> @llvm.floor.v8f32(<8 x float> [[A:%.*]]) -; CHECK-NEXT:    ret <8 x float> [[TMP1]] -; -  %1 = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a, i32 1) -  ret <8 x float> %1 -} - -define <8 x float> @test_round_ps_ceil(<8 x float> %a) { -; CHECK-LABEL: @test_round_ps_ceil( -; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x float> @llvm.ceil.v8f32(<8 x float> [[A:%.*]]) -; CHECK-NEXT:    ret <8 x float> [[TMP1]] -; -  %1 = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a, i32 2) -  ret <8 x float> %1 -} - -define <4 x double> @test_round_pd_floor(<4 x double> %a) { -; CHECK-LABEL: @test_round_pd_floor( -; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x double> @llvm.floor.v4f64(<4 x double> [[A:%.*]]) -; CHECK-NEXT:    ret <4 x double> [[TMP1]] -; -  %1 = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a, i32 1) -  ret <4 x double> %1 -} - -define <4 x double> @test_round_pd_ceil(<4 x double> %a) { -; CHECK-LABEL: @test_round_pd_ceil( -; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x double> @llvm.ceil.v4f64(<4 x double> [[A:%.*]]) -; CHECK-NEXT:    ret <4 x double> [[TMP1]] -; -  %1 = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a, i32 2) -  ret <4 x double> %1 -} diff --git a/llvm/test/Transforms/InstCombine/X86/x86-avx512.ll b/llvm/test/Transforms/InstCombine/X86/x86-avx512.ll index 0e209989081..8491dec37a0 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-avx512.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-avx512.ll @@ -916,213 +916,6 @@ declare i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double>, i32)  declare i32 @llvm.x86.avx512.cvttsd2usi(<2 x double>, i32)  declare i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double>, i32) -declare <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32, i32) -declare <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32, i32) -declare <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float>, i32, <4 x float>, i8) -declare <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float>, i32, <8 x float>, i8) -declare <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float>, i32, <16 x float>, i16, i32) -declare <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double>, i32, <2 x double>, i8) -declare <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double>, i32, <4 x double>, i8) -declare <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double>, i32, <8 x double>, i8, i32) - -define <4 x float> @test_rndscale_ss_floor(<4 x float> %src0, <4 x float> %src1, <4 x float> %dst, i8 %k) { -; CHECK-LABEL: @test_rndscale_ss_floor( -; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[K:%.*]], 1 -; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0 -; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[SRC1:%.*]], i64 0 -; CHECK-NEXT:    [[TMP4:%.*]] = call float @llvm.floor.f32(float [[TMP3]]) -; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[DST:%.*]], i64 0 -; CHECK-NEXT:    [[TMP6:%.*]] = select i1 [[TMP2]], float [[TMP5]], float [[TMP4]] -; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x float> [[SRC0:%.*]], float [[TMP6]], i64 0 -; CHECK-NEXT:    ret <4 x float> [[TMP7]] -; -  %1 = call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> %src0, <4 x float> %src1, <4 x float> %dst, i8 %k, i32 1, i32 4) -  ret <4 x float> %1 -} - -define <4 x float> @test_rndscale_ss_ceil(<4 x float> %src0, <4 x float> %src1, <4 x float> %dst, i8 %k) { -; CHECK-LABEL: @test_rndscale_ss_ceil( -; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[K:%.*]], 1 -; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0 -; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[SRC1:%.*]], i64 0 -; CHECK-NEXT:    [[TMP4:%.*]] = call float @llvm.ceil.f32(float [[TMP3]]) -; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[DST:%.*]], i64 0 -; CHECK-NEXT:    [[TMP6:%.*]] = select i1 [[TMP2]], float [[TMP5]], float [[TMP4]] -; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x float> [[SRC0:%.*]], float [[TMP6]], i64 0 -; CHECK-NEXT:    ret <4 x float> [[TMP7]] -; -  %1 = call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> %src0, <4 x float> %src1, <4 x float> %dst, i8 %k, i32 2, i32 4) -  ret <4 x float> %1 -} - -define <2 x double> @test_rndscale_sd_floor(<2 x double> %src0, <2 x double> %src1, <2 x double> %dst, i8 %k) { -; CHECK-LABEL: @test_rndscale_sd_floor( -; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[K:%.*]], 1 -; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0 -; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x double> [[SRC1:%.*]], i64 0 -; CHECK-NEXT:    [[TMP4:%.*]] = call double @llvm.floor.f64(double [[TMP3]]) -; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x double> [[DST:%.*]], i64 0 -; CHECK-NEXT:    [[TMP6:%.*]] = select i1 [[TMP2]], double [[TMP5]], double [[TMP4]] -; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x double> [[SRC0:%.*]], double [[TMP6]], i64 0 -; CHECK-NEXT:    ret <2 x double> [[TMP7]] -; -  %1 = call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> %src0, <2 x double> %src1, <2 x double> %dst, i8 %k, i32 1, i32 4) -  ret <2 x double> %1 -} - -define <2 x double> @test_rndscale_sd_ceil(<2 x double> %src0, <2 x double> %src1, <2 x double> %dst, i8 %k) { -; CHECK-LABEL: @test_rndscale_sd_ceil( -; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[K:%.*]], 1 -; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0 -; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x double> [[SRC1:%.*]], i64 0 -; CHECK-NEXT:    [[TMP4:%.*]] = call double @llvm.ceil.f64(double [[TMP3]]) -; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x double> [[DST:%.*]], i64 0 -; CHECK-NEXT:    [[TMP6:%.*]] = select i1 [[TMP2]], double [[TMP5]], double [[TMP4]] -; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x double> [[SRC0:%.*]], double [[TMP6]], i64 0 -; CHECK-NEXT:    ret <2 x double> [[TMP7]] -; -  %1 = call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> %src0, <2 x double> %src1, <2 x double> %dst, i8 %k, i32 2, i32 4) -  ret <2 x double> %1 -} - -define <4 x float> @test_rndscale_ps_128_floor(<4 x float> %src, <4 x float> %dst, i8 %k) { -; CHECK-LABEL: @test_rndscale_ps_128_floor( -; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[SRC:%.*]]) -; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[K:%.*]] to <8 x i1> -; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> -; CHECK-NEXT:    [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x float> [[TMP1]], <4 x float> [[DST:%.*]] -; CHECK-NEXT:    ret <4 x float> [[TMP4]] -; -  %1 = call <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float> %src, i32 1, <4 x float> %dst, i8 %k) -  ret <4 x float> %1 -} - -define <4 x float> @test_rndscale_ps_128_ceil(<4 x float> %src, <4 x float> %dst, i8 %k) { -; CHECK-LABEL: @test_rndscale_ps_128_ceil( -; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> [[SRC:%.*]]) -; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[K:%.*]] to <8 x i1> -; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> -; CHECK-NEXT:    [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x float> [[TMP1]], <4 x float> [[DST:%.*]] -; CHECK-NEXT:    ret <4 x float> [[TMP4]] -; -  %1 = call <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float> %src, i32 2, <4 x float> %dst, i8 %k) -  ret <4 x float> %1 -} - -define <8 x float> @test_rndscale_ps_256_floor(<8 x float> %src, <8 x float> %dst, i8 %k) { -; CHECK-LABEL: @test_rndscale_ps_256_floor( -; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x float> @llvm.floor.v8f32(<8 x float> [[SRC:%.*]]) -; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[K:%.*]] to <8 x i1> -; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x float> [[TMP1]], <8 x float> [[DST:%.*]] -; CHECK-NEXT:    ret <8 x float> [[TMP3]] -; -  %1 = call <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float> %src, i32 1, <8 x float> %dst, i8 %k) -  ret <8 x float> %1 -} - -define <8 x float> @test_rndscale_ps_256_ceil(<8 x float> %src, <8 x float> %dst, i8 %k) { -; CHECK-LABEL: @test_rndscale_ps_256_ceil( -; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x float> @llvm.ceil.v8f32(<8 x float> [[SRC:%.*]]) -; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[K:%.*]] to <8 x i1> -; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x float> [[TMP1]], <8 x float> [[DST:%.*]] -; CHECK-NEXT:    ret <8 x float> [[TMP3]] -; -  %1 = call <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float> %src, i32 2, <8 x float> %dst, i8 %k) -  ret <8 x float> %1 -} - -define <16 x float> @test_rndscale_ps_512_floor(<16 x float> %src, <16 x float> %dst, i16 %k) { -; CHECK-LABEL: @test_rndscale_ps_512_floor( -; CHECK-NEXT:    [[TMP1:%.*]] = call <16 x float> @llvm.floor.v16f32(<16 x float> [[SRC:%.*]]) -; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[K:%.*]] to <16 x i1> -; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[DST:%.*]] -; CHECK-NEXT:    ret <16 x float> [[TMP3]] -; -  %1 = call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %src, i32 1, <16 x float> %dst, i16 %k, i32 4) -  ret <16 x float> %1 -} - -define <16 x float> @test_rndscale_ps_512_ceil(<16 x float> %src, <16 x float> %dst, i16 %k) { -; CHECK-LABEL: @test_rndscale_ps_512_ceil( -; CHECK-NEXT:    [[TMP1:%.*]] = call <16 x float> @llvm.ceil.v16f32(<16 x float> [[SRC:%.*]]) -; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[K:%.*]] to <16 x i1> -; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[DST:%.*]] -; CHECK-NEXT:    ret <16 x float> [[TMP3]] -; -  %1 = call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %src, i32 2, <16 x float> %dst, i16 %k, i32 4) -  ret <16 x float> %1 -} - -define <2 x double> @test_rndscale_pd_128_floor(<2 x double> %src, <2 x double> %dst, i8 %k) { -; CHECK-LABEL: @test_rndscale_pd_128_floor( -; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[SRC:%.*]]) -; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[K:%.*]] to <8 x i1> -; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <2 x i32> <i32 0, i32 1> -; CHECK-NEXT:    [[TMP4:%.*]] = select <2 x i1> [[TMP3]], <2 x double> [[TMP1]], <2 x double> [[DST:%.*]] -; CHECK-NEXT:    ret <2 x double> [[TMP4]] -; -  %1 = call <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double> %src, i32 1, <2 x double> %dst, i8 %k) -  ret <2 x double> %1 -} - -define <2 x double> @test_rndscale_pd_128_ceil(<2 x double> %src, <2 x double> %dst, i8 %k) { -; CHECK-LABEL: @test_rndscale_pd_128_ceil( -; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[SRC:%.*]]) -; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[K:%.*]] to <8 x i1> -; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <2 x i32> <i32 0, i32 1> -; CHECK-NEXT:    [[TMP4:%.*]] = select <2 x i1> [[TMP3]], <2 x double> [[TMP1]], <2 x double> [[DST:%.*]] -; CHECK-NEXT:    ret <2 x double> [[TMP4]] -; -  %1 = call <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double> %src, i32 2, <2 x double> %dst, i8 %k) -  ret <2 x double> %1 -} - -define <4 x double> @test_rndscale_pd_256_floor(<4 x double> %src, <4 x double> %dst, i8 %k) { -; CHECK-LABEL: @test_rndscale_pd_256_floor( -; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x double> @llvm.floor.v4f64(<4 x double> [[SRC:%.*]]) -; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[K:%.*]] to <8 x i1> -; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> -; CHECK-NEXT:    [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x double> [[TMP1]], <4 x double> [[DST:%.*]] -; CHECK-NEXT:    ret <4 x double> [[TMP4]] -; -  %1 = call <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double> %src, i32 1, <4 x double> %dst, i8 %k) -  ret <4 x double> %1 -} - -define <4 x double> @test_rndscale_pd_256_ceil(<4 x double> %src, <4 x double> %dst, i8 %k) { -; CHECK-LABEL: @test_rndscale_pd_256_ceil( -; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x double> @llvm.ceil.v4f64(<4 x double> [[SRC:%.*]]) -; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[K:%.*]] to <8 x i1> -; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> -; CHECK-NEXT:    [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x double> [[TMP1]], <4 x double> [[DST:%.*]] -; CHECK-NEXT:    ret <4 x double> [[TMP4]] -; -  %1 = call <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double> %src, i32 2, <4 x double> %dst, i8 %k) -  ret <4 x double> %1 -} - -define <8 x double> @test_rndscale_pd_512_floor(<8 x double> %src, <8 x double> %dst, i8 %k) { -; CHECK-LABEL: @test_rndscale_pd_512_floor( -; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x double> @llvm.floor.v8f64(<8 x double> [[SRC:%.*]]) -; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[K:%.*]] to <8 x i1> -; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[DST:%.*]] -; CHECK-NEXT:    ret <8 x double> [[TMP3]] -; -  %1 = call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %src, i32 1, <8 x double> %dst, i8 %k, i32 4) -  ret <8 x double> %1 -} - -define <8 x double> @test_rndscale_pd_512_ceil(<8 x double> %src, <8 x double> %dst, i8 %k) { -; CHECK-LABEL: @test_rndscale_pd_512_ceil( -; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x double> @llvm.ceil.v8f64(<8 x double> [[SRC:%.*]]) -; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[K:%.*]] to <8 x i1> -; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[DST:%.*]] -; CHECK-NEXT:    ret <8 x double> [[TMP3]] -; -  %1 = call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %src, i32 2, <8 x double> %dst, i8 %k, i32 4) -  ret <8 x double> %1 -} -  declare float @llvm.fma.f32(float, float, float) #1  define <4 x float> @test_mask_vfmadd_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { diff --git a/llvm/test/Transforms/InstCombine/X86/x86-sse41.ll b/llvm/test/Transforms/InstCombine/X86/x86-sse41.ll index ddc3b7372ea..f95b1b4d552 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-sse41.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-sse41.ll @@ -13,28 +13,6 @@ define <2 x double> @test_round_sd(<2 x double> %a, <2 x double> %b) {    ret <2 x double> %3  } -define <2 x double> @test_round_sd_floor(<2 x double> %a, <2 x double> %b) { -; CHECK-LABEL: @test_round_sd_floor( -; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0 -; CHECK-NEXT:    [[TMP2:%.*]] = call double @llvm.floor.f64(double [[TMP1]]) -; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> [[A:%.*]], double [[TMP2]], i64 0 -; CHECK-NEXT:    ret <2 x double> [[TMP3]] -; -  %1 = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a, <2 x double> %b, i32 1) -  ret <2 x double> %1 -} - -define <2 x double> @test_round_sd_ceil(<2 x double> %a, <2 x double> %b) { -; CHECK-LABEL: @test_round_sd_ceil( -; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0 -; CHECK-NEXT:    [[TMP2:%.*]] = call double @llvm.ceil.f64(double [[TMP1]]) -; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> [[A:%.*]], double [[TMP2]], i64 0 -; CHECK-NEXT:    ret <2 x double> [[TMP3]] -; -  %1 = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a, <2 x double> %b, i32 2) -  ret <2 x double> %1 -} -  define double @test_round_sd_0(double %a, double %b) {  ; CHECK-LABEL: @test_round_sd_0(  ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double [[B:%.*]], i32 0 @@ -79,28 +57,6 @@ define <4 x float> @test_round_ss(<4 x float> %a, <4 x float> %b) {    ret <4 x float> %7  } -define <4 x float> @test_round_ss_floor(<4 x float> %a, <4 x float> %b) { -; CHECK-LABEL: @test_round_ss_floor( -; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 -; CHECK-NEXT:    [[TMP2:%.*]] = call float @llvm.floor.f32(float [[TMP1]]) -; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[A:%.*]], float [[TMP2]], i64 0 -; CHECK-NEXT:    ret <4 x float> [[TMP3]] -; -  %1 = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a, <4 x float> %b, i32 1) -  ret <4 x float> %1 -} - -define <4 x float> @test_round_ss_ceil(<4 x float> %a, <4 x float> %b) { -; CHECK-LABEL: @test_round_ss_ceil( -; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 -; CHECK-NEXT:    [[TMP2:%.*]] = call float @llvm.ceil.f32(float [[TMP1]]) -; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[A:%.*]], float [[TMP2]], i64 0 -; CHECK-NEXT:    ret <4 x float> [[TMP3]] -; -  %1 = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a, <4 x float> %b, i32 2) -  ret <4 x float> %1 -} -  define float @test_round_ss_0(float %a, float %b) {  ; CHECK-LABEL: @test_round_ss_0(  ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0  | 

