diff options
| author | Liu, Chen3 <chen3.liu@intel.com> | 2019-12-31 11:38:17 +0800 | 
|---|---|---|
| committer | Liu, Chen3 <chen3.liu@intel.com> | 2020-01-01 20:42:12 +0800 | 
| commit | 8af492ade1bc5367ae529e451b9c9cd9e3d55e53 (patch) | |
| tree | bc8f8c6cd50bedeab5ab326c656c11fe6f09f654 | |
| parent | d2bb8c16e711602481c8b33d0e2ccc9994eb6641 (diff) | |
| download | bcm5719-llvm-8af492ade1bc5367ae529e451b9c9cd9e3d55e53.tar.gz bcm5719-llvm-8af492ade1bc5367ae529e451b9c9cd9e3d55e53.zip  | |
add strict float for round operation
Differential Revision: https://reviews.llvm.org/D72026
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 33 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 52 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 6 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 6 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 28 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/fp-strict-scalar-round.ll | 474 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vec-strict-256.ll | 117 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vec-strict-512.ll | 105 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vec-strict-round-128.ll | 174 | 
10 files changed, 956 insertions, 41 deletions
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 1f262e076c7..4e29597e941 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -897,27 +897,50 @@ void X86DAGToDAGISel::PreprocessISelDAG() {        continue;      }      case ISD::FCEIL: +    case ISD::STRICT_FCEIL:      case ISD::FFLOOR: +    case ISD::STRICT_FFLOOR:      case ISD::FTRUNC: +    case ISD::STRICT_FTRUNC:      case ISD::FNEARBYINT: -    case ISD::FRINT: { +    case ISD::STRICT_FNEARBYINT: +    case ISD::FRINT: +    case ISD::STRICT_FRINT: {        // Replace fp rounding with their X86 specific equivalent so we don't        // need 2 sets of patterns.        unsigned Imm;        switch (N->getOpcode()) {        default: llvm_unreachable("Unexpected opcode!"); +      case ISD::STRICT_FCEIL:        case ISD::FCEIL:      Imm = 0xA; break; +      case ISD::STRICT_FFLOOR:        case ISD::FFLOOR:     Imm = 0x9; break; +      case ISD::STRICT_FTRUNC:        case ISD::FTRUNC:     Imm = 0xB; break; +      case ISD::STRICT_FNEARBYINT:        case ISD::FNEARBYINT: Imm = 0xC; break; +      case ISD::STRICT_FRINT:        case ISD::FRINT:      Imm = 0x4; break;        }        SDLoc dl(N); -      SDValue Res = CurDAG->getNode( -          X86ISD::VRNDSCALE, dl, N->getValueType(0), N->getOperand(0), -          CurDAG->getTargetConstant(Imm, dl, MVT::i8)); +      bool IsStrict = N->isStrictFPOpcode(); +      SDValue Res; +      if (IsStrict) +        Res = CurDAG->getNode(X86ISD::STRICT_VRNDSCALE, dl, +                              {N->getValueType(0), MVT::Other}, +                              {N->getOperand(0), N->getOperand(1), +                               CurDAG->getTargetConstant(Imm, dl, MVT::i8)}); +      else +        Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl, N->getValueType(0), +                              N->getOperand(0), +                              CurDAG->getTargetConstant(Imm, dl, MVT::i8));        --I; -      CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res); +      if (IsStrict) { +        SDValue From[] = {SDValue(N, 0), SDValue(N, 1)}; +        SDValue To[] = {Res.getValue(0), Res.getValue(1)}; +        CurDAG->ReplaceAllUsesOfValuesWith(From, To, 2); +      } else +        CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);        ++I;        CurDAG->DeleteNode(N);        continue; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index a0f171e55dd..38911758a2e 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1068,11 +1068,16 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,    if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) {      for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) { -      setOperationAction(ISD::FFLOOR,           RoundedTy,  Legal); -      setOperationAction(ISD::FCEIL,            RoundedTy,  Legal); -      setOperationAction(ISD::FTRUNC,           RoundedTy,  Legal); -      setOperationAction(ISD::FRINT,            RoundedTy,  Legal); -      setOperationAction(ISD::FNEARBYINT,       RoundedTy,  Legal); +      setOperationAction(ISD::FFLOOR,            RoundedTy,  Legal); +      setOperationAction(ISD::STRICT_FFLOOR,     RoundedTy,  Legal); +      setOperationAction(ISD::FCEIL,             RoundedTy,  Legal); +      setOperationAction(ISD::STRICT_FCEIL,      RoundedTy,  Legal); +      setOperationAction(ISD::FTRUNC,            RoundedTy,  Legal); +      setOperationAction(ISD::STRICT_FTRUNC,     RoundedTy,  Legal); +      setOperationAction(ISD::FRINT,             RoundedTy,  Legal); +      setOperationAction(ISD::STRICT_FRINT,      RoundedTy,  Legal); +      setOperationAction(ISD::FNEARBYINT,        RoundedTy,  Legal); +      setOperationAction(ISD::STRICT_FNEARBYINT, RoundedTy,  Legal);      }      setOperationAction(ISD::SMAX,               MVT::v16i8, Legal); @@ -1144,14 +1149,19 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,                                                       : &X86::VR256RegClass);      for (auto VT : { MVT::v8f32, MVT::v4f64 }) { -      setOperationAction(ISD::FFLOOR,     VT, Legal); -      setOperationAction(ISD::FCEIL,      VT, Legal); -      setOperationAction(ISD::FTRUNC,     VT, Legal); -      setOperationAction(ISD::FRINT,      VT, Legal); -      setOperationAction(ISD::FNEARBYINT, VT, Legal); -      setOperationAction(ISD::FNEG,       VT, Custom); -      setOperationAction(ISD::FABS,       VT, Custom); -      setOperationAction(ISD::FCOPYSIGN,  VT, Custom); +      setOperationAction(ISD::FFLOOR,            VT, Legal); +      setOperationAction(ISD::STRICT_FFLOOR,     VT, Legal); +      setOperationAction(ISD::FCEIL,             VT, Legal); +      setOperationAction(ISD::STRICT_FCEIL,      VT, Legal); +      setOperationAction(ISD::FTRUNC,            VT, Legal); +      setOperationAction(ISD::STRICT_FTRUNC,     VT, Legal); +      setOperationAction(ISD::FRINT,             VT, Legal); +      setOperationAction(ISD::STRICT_FRINT,      VT, Legal); +      setOperationAction(ISD::FNEARBYINT,        VT, Legal); +      setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal); +      setOperationAction(ISD::FNEG,              VT, Custom); +      setOperationAction(ISD::FABS,              VT, Custom); +      setOperationAction(ISD::FCOPYSIGN,         VT, Custom);      }      // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted @@ -1503,11 +1513,16 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,      setOperationAction(ISD::SIGN_EXTEND,        MVT::v8i8, Custom);      for (auto VT : { MVT::v16f32, MVT::v8f64 }) { -      setOperationAction(ISD::FFLOOR,           VT, Legal); -      setOperationAction(ISD::FCEIL,            VT, Legal); -      setOperationAction(ISD::FTRUNC,           VT, Legal); -      setOperationAction(ISD::FRINT,            VT, Legal); -      setOperationAction(ISD::FNEARBYINT,       VT, Legal); +      setOperationAction(ISD::FFLOOR,            VT, Legal); +      setOperationAction(ISD::STRICT_FFLOOR,     VT, Legal); +      setOperationAction(ISD::FCEIL,             VT, Legal); +      setOperationAction(ISD::STRICT_FCEIL,      VT, Legal); +      setOperationAction(ISD::FTRUNC,            VT, Legal); +      setOperationAction(ISD::STRICT_FTRUNC,     VT, Legal); +      setOperationAction(ISD::FRINT,             VT, Legal); +      setOperationAction(ISD::STRICT_FRINT,      VT, Legal); +      setOperationAction(ISD::FNEARBYINT,        VT, Legal); +      setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);        setOperationAction(ISD::SELECT,           VT, Custom);      } @@ -29650,6 +29665,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {    case X86ISD::VPMADD52H:          return "X86ISD::VPMADD52H";    case X86ISD::VPMADD52L:          return "X86ISD::VPMADD52L";    case X86ISD::VRNDSCALE:          return "X86ISD::VRNDSCALE"; +  case X86ISD::STRICT_VRNDSCALE:   return "X86ISD::STRICT_VRNDSCALE";    case X86ISD::VRNDSCALE_SAE:      return "X86ISD::VRNDSCALE_SAE";    case X86ISD::VRNDSCALES:         return "X86ISD::VRNDSCALES";    case X86ISD::VRNDSCALES_SAE:     return "X86ISD::VRNDSCALES_SAE"; diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 655717bd605..16b076e85af 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -424,7 +424,7 @@ namespace llvm {        // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.        // Also used by the legacy (V)ROUND intrinsics where we mask out the        // scaling part of the immediate. -      VRNDSCALE, VRNDSCALE_SAE, VRNDSCALES, VRNDSCALES_SAE, +      VRNDSCALE, VRNDSCALE_SAE, VRNDSCALES, VRNDSCALES_SAE, STRICT_VRNDSCALE,        // Tests Types Of a FP Values for packed types.        VFPCLASS,        // Tests Types Of a FP Values for scalar types. diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 02ac454fe06..61f12785db7 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -9019,13 +9019,13 @@ multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,    }    let Predicates = [HasAVX512] in { -    def : Pat<(X86VRndScale _.FRC:$src1, timm:$src2), +    def : Pat<(X86any_VRndScale _.FRC:$src1, timm:$src2),                (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),                 _.FRC:$src1, timm:$src2))>;    }    let Predicates = [HasAVX512, OptForSize] in { -    def : Pat<(X86VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2), +    def : Pat<(X86any_VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2),                (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),                 addr:$src1, timm:$src2))>;    } @@ -10290,7 +10290,7 @@ defm VREDUCE   : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56                                X86VReduce, X86VReduceSAE, SchedWriteFRnd, HasDQI>,                                AVX512AIi8Base, EVEX;  defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09, -                              X86VRndScale, X86VRndScaleSAE, SchedWriteFRnd, HasAVX512>, +                              X86any_VRndScale, X86VRndScaleSAE, SchedWriteFRnd, HasAVX512>,                                AVX512AIi8Base, EVEX;  defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,                                X86VGetMant, X86VGetMantSAE, SchedWriteFRnd, HasAVX512>, diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index 37cba895c37..a9902013dfe 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -466,6 +466,12 @@ def X86VRangeSAE   : SDNode<"X86ISD::VRANGE_SAE",    SDTFPBinOpImm>;  def X86VReduce     : SDNode<"X86ISD::VREDUCE",       SDTFPUnaryOpImm>;  def X86VReduceSAE  : SDNode<"X86ISD::VREDUCE_SAE",   SDTFPUnaryOpImm>;  def X86VRndScale   : SDNode<"X86ISD::VRNDSCALE",     SDTFPUnaryOpImm>; +def X86strict_VRndScale : SDNode<"X86ISD::STRICT_VRNDSCALE", SDTFPUnaryOpImm, +                                  [SDNPHasChain]>; +def X86any_VRndScale    : PatFrags<(ops node:$src1, node:$src2), +                                    [(X86strict_VRndScale node:$src1, node:$src2), +                                    (X86VRndScale node:$src1, node:$src2)]>; +  def X86VRndScaleSAE: SDNode<"X86ISD::VRNDSCALE_SAE", SDTFPUnaryOpImm>;  def X86VGetMant    : SDNode<"X86ISD::VGETMANT",      SDTFPUnaryOpImm>;  def X86VGetMantSAE : SDNode<"X86ISD::VGETMANT_SAE",  SDTFPUnaryOpImm>; diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index c7ecfba5b24..ce085e6d56b 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -5540,19 +5540,19 @@ let Predicates = [HasAVX, NoVLX] in {    let ExeDomain = SSEPackedSingle, Uses = [MXCSR], mayRaiseFPException = 1 in {      // Intrinsic form      defm VROUNDPS  : sse41_fp_unop_p<0x08, "vroundps", f128mem, VR128, v4f32, -                                     loadv4f32, X86VRndScale, SchedWriteFRnd.XMM>, +                                     loadv4f32, X86any_VRndScale, SchedWriteFRnd.XMM>,                                     VEX, VEX_WIG;      defm VROUNDPSY : sse41_fp_unop_p<0x08, "vroundps", f256mem, VR256, v8f32, -                                     loadv8f32, X86VRndScale, SchedWriteFRnd.YMM>, +                                     loadv8f32, X86any_VRndScale, SchedWriteFRnd.YMM>,                                     VEX, VEX_L, VEX_WIG;    }    let ExeDomain = SSEPackedDouble, Uses = [MXCSR], mayRaiseFPException = 1 in {      defm VROUNDPD  : sse41_fp_unop_p<0x09, "vroundpd", f128mem, VR128, v2f64, -                                     loadv2f64, X86VRndScale, SchedWriteFRnd.XMM>, +                                     loadv2f64, X86any_VRndScale, SchedWriteFRnd.XMM>,                                     VEX, VEX_WIG;      defm VROUNDPDY : sse41_fp_unop_p<0x09, "vroundpd", f256mem, VR256, v4f64, -                                     loadv4f64, X86VRndScale, SchedWriteFRnd.YMM>, +                                     loadv4f64, X86any_VRndScale, SchedWriteFRnd.YMM>,                                     VEX, VEX_L, VEX_WIG;    }  } @@ -5565,25 +5565,25 @@ let Predicates = [UseAVX] in {  }  let Predicates = [UseAVX] in { -  def : Pat<(X86VRndScale FR32:$src1, timm:$src2), +  def : Pat<(X86any_VRndScale FR32:$src1, timm:$src2),              (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src1, timm:$src2)>; -  def : Pat<(X86VRndScale FR64:$src1, timm:$src2), +  def : Pat<(X86any_VRndScale FR64:$src1, timm:$src2),              (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src1, timm:$src2)>;  }  let Predicates = [UseAVX, OptForSize] in { -  def : Pat<(X86VRndScale (loadf32 addr:$src1), timm:$src2), +  def : Pat<(X86any_VRndScale (loadf32 addr:$src1), timm:$src2),              (VROUNDSSm (f32 (IMPLICIT_DEF)), addr:$src1, timm:$src2)>; -  def : Pat<(X86VRndScale (loadf64 addr:$src1), timm:$src2), +  def : Pat<(X86any_VRndScale (loadf64 addr:$src1), timm:$src2),              (VROUNDSDm (f64 (IMPLICIT_DEF)), addr:$src1, timm:$src2)>;  }  let ExeDomain = SSEPackedSingle in  defm ROUNDPS  : sse41_fp_unop_p<0x08, "roundps", f128mem, VR128, v4f32, -                                memopv4f32, X86VRndScale, SchedWriteFRnd.XMM>; +                                memopv4f32, X86any_VRndScale, SchedWriteFRnd.XMM>;  let ExeDomain = SSEPackedDouble in  defm ROUNDPD  : sse41_fp_unop_p<0x09, "roundpd", f128mem, VR128, v2f64, -                                memopv2f64, X86VRndScale, SchedWriteFRnd.XMM>; +                                memopv2f64, X86any_VRndScale, SchedWriteFRnd.XMM>;  defm ROUND  : sse41_fp_unop_s<0x0A, 0x0B, "round", SchedWriteFRnd.Scl>; @@ -5592,16 +5592,16 @@ defm ROUND  : sse41_fp_binop_s<0x0A, 0x0B, "round", SchedWriteFRnd.Scl,                                 v4f32, v2f64, X86RndScales>;  let Predicates = [UseSSE41] in { -  def : Pat<(X86VRndScale FR32:$src1, timm:$src2), +  def : Pat<(X86any_VRndScale FR32:$src1, timm:$src2),              (ROUNDSSr FR32:$src1, timm:$src2)>; -  def : Pat<(X86VRndScale FR64:$src1, timm:$src2), +  def : Pat<(X86any_VRndScale FR64:$src1, timm:$src2),              (ROUNDSDr FR64:$src1, timm:$src2)>;  }  let Predicates = [UseSSE41, OptForSize] in { -  def : Pat<(X86VRndScale (loadf32 addr:$src1), timm:$src2), +  def : Pat<(X86any_VRndScale (loadf32 addr:$src1), timm:$src2),              (ROUNDSSm addr:$src1, timm:$src2)>; -  def : Pat<(X86VRndScale (loadf64 addr:$src1), timm:$src2), +  def : Pat<(X86any_VRndScale (loadf64 addr:$src1), timm:$src2),              (ROUNDSDm addr:$src1, timm:$src2)>;  } diff --git a/llvm/test/CodeGen/X86/fp-strict-scalar-round.ll b/llvm/test/CodeGen/X86/fp-strict-scalar-round.ll new file mode 100644 index 00000000000..b5e7f9307de --- /dev/null +++ b/llvm/test/CodeGen/X86/fp-strict-scalar-round.ll @@ -0,0 +1,474 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=SSE41,SSE41-X86 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=SSE41,SSE41-X64 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX-X86 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX-X64 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX-X86,AVX512-X86 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX-X64,AVX512-X64 + +declare float @llvm.experimental.constrained.ceil.f32(float, metadata) +declare double @llvm.experimental.constrained.ceil.f64(double, metadata) +declare float @llvm.experimental.constrained.floor.f32(float, metadata) +declare double @llvm.experimental.constrained.floor.f64(double, metadata) +declare float @llvm.experimental.constrained.trunc.f32(float, metadata) +declare double @llvm.experimental.constrained.trunc.f64(double, metadata) +declare float @llvm.experimental.constrained.rint.f32(float, metadata, metadata) +declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata) +declare float @llvm.experimental.constrained.nearbyint.f32(float, metadata, metadata) +declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata) + +define float @fceil32(float %f) #0 { +; SSE41-X86-LABEL: fceil32: +; SSE41-X86:       # %bb.0: +; SSE41-X86-NEXT:    pushl %eax +; SSE41-X86-NEXT:    .cfi_def_cfa_offset 8 +; SSE41-X86-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE41-X86-NEXT:    roundss $10, %xmm0, %xmm0 +; SSE41-X86-NEXT:    movss %xmm0, (%esp) +; SSE41-X86-NEXT:    flds (%esp) +; SSE41-X86-NEXT:    popl %eax +; SSE41-X86-NEXT:    .cfi_def_cfa_offset 4 +; SSE41-X86-NEXT:    retl +; +; SSE41-X64-LABEL: fceil32: +; SSE41-X64:       # %bb.0: +; SSE41-X64-NEXT:    roundss $10, %xmm0, %xmm0 +; SSE41-X64-NEXT:    retq +; +; AVX-X86-LABEL: fceil32: +; AVX-X86:       # %bb.0: +; AVX-X86-NEXT:    pushl %eax +; AVX-X86-NEXT:    .cfi_def_cfa_offset 8 +; AVX-X86-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX-X86-NEXT:    vroundss $10, %xmm0, %xmm0, %xmm0 +; AVX-X86-NEXT:    vmovss %xmm0, (%esp) +; AVX-X86-NEXT:    flds (%esp) +; AVX-X86-NEXT:    popl %eax +; AVX-X86-NEXT:    .cfi_def_cfa_offset 4 +; AVX-X86-NEXT:    retl +; +; AVX-X64-LABEL: fceil32: +; AVX-X64:       # %bb.0: +; AVX-X64-NEXT:    vroundss $10, %xmm0, %xmm0, %xmm0 +; AVX-X64-NEXT:    retq +  %res = call float @llvm.experimental.constrained.ceil.f32( +                        float %f, metadata !"fpexcept.strict") +  ret float %res +} + +define double @fceilf64(double %f) #0 { +; SSE41-X86-LABEL: fceilf64: +; SSE41-X86:       # %bb.0: +; SSE41-X86-NEXT:    pushl %ebp +; SSE41-X86-NEXT:    .cfi_def_cfa_offset 8 +; SSE41-X86-NEXT:    .cfi_offset %ebp, -8 +; SSE41-X86-NEXT:    movl %esp, %ebp +; SSE41-X86-NEXT:    .cfi_def_cfa_register %ebp +; SSE41-X86-NEXT:    andl $-8, %esp +; SSE41-X86-NEXT:    subl $8, %esp +; SSE41-X86-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero +; SSE41-X86-NEXT:    roundsd $10, %xmm0, %xmm0 +; SSE41-X86-NEXT:    movsd %xmm0, (%esp) +; SSE41-X86-NEXT:    fldl (%esp) +; SSE41-X86-NEXT:    movl %ebp, %esp +; SSE41-X86-NEXT:    popl %ebp +; SSE41-X86-NEXT:    .cfi_def_cfa %esp, 4 +; SSE41-X86-NEXT:    retl +; +; SSE41-X64-LABEL: fceilf64: +; SSE41-X64:       # %bb.0: +; SSE41-X64-NEXT:    roundsd $10, %xmm0, %xmm0 +; SSE41-X64-NEXT:    retq +; +; AVX-X86-LABEL: fceilf64: +; AVX-X86:       # %bb.0: +; AVX-X86-NEXT:    pushl %ebp +; AVX-X86-NEXT:    .cfi_def_cfa_offset 8 +; AVX-X86-NEXT:    .cfi_offset %ebp, -8 +; AVX-X86-NEXT:    movl %esp, %ebp +; AVX-X86-NEXT:    .cfi_def_cfa_register %ebp +; AVX-X86-NEXT:    andl $-8, %esp +; AVX-X86-NEXT:    subl $8, %esp +; AVX-X86-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-X86-NEXT:    vroundsd $10, %xmm0, %xmm0, %xmm0 +; AVX-X86-NEXT:    vmovsd %xmm0, (%esp) +; AVX-X86-NEXT:    fldl (%esp) +; AVX-X86-NEXT:    movl %ebp, %esp +; AVX-X86-NEXT:    popl %ebp +; AVX-X86-NEXT:    .cfi_def_cfa %esp, 4 +; AVX-X86-NEXT:    retl +; +; AVX-X64-LABEL: fceilf64: +; AVX-X64:       # %bb.0: +; AVX-X64-NEXT:    vroundsd $10, %xmm0, %xmm0, %xmm0 +; AVX-X64-NEXT:    retq +  %res = call double @llvm.experimental.constrained.ceil.f64( +                        double %f, metadata !"fpexcept.strict") +  ret double %res +} + +define float @ffloor32(float %f) #0 { +; SSE41-X86-LABEL: ffloor32: +; SSE41-X86:       # %bb.0: +; SSE41-X86-NEXT:    pushl %eax +; SSE41-X86-NEXT:    .cfi_def_cfa_offset 8 +; SSE41-X86-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE41-X86-NEXT:    roundss $9, %xmm0, %xmm0 +; SSE41-X86-NEXT:    movss %xmm0, (%esp) +; SSE41-X86-NEXT:    flds (%esp) +; SSE41-X86-NEXT:    popl %eax +; SSE41-X86-NEXT:    .cfi_def_cfa_offset 4 +; SSE41-X86-NEXT:    retl +; +; SSE41-X64-LABEL: ffloor32: +; SSE41-X64:       # %bb.0: +; SSE41-X64-NEXT:    roundss $9, %xmm0, %xmm0 +; SSE41-X64-NEXT:    retq +; +; AVX-X86-LABEL: ffloor32: +; AVX-X86:       # %bb.0: +; AVX-X86-NEXT:    pushl %eax +; AVX-X86-NEXT:    .cfi_def_cfa_offset 8 +; AVX-X86-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX-X86-NEXT:    vroundss $9, %xmm0, %xmm0, %xmm0 +; AVX-X86-NEXT:    vmovss %xmm0, (%esp) +; AVX-X86-NEXT:    flds (%esp) +; AVX-X86-NEXT:    popl %eax +; AVX-X86-NEXT:    .cfi_def_cfa_offset 4 +; AVX-X86-NEXT:    retl +; +; AVX-X64-LABEL: ffloor32: +; AVX-X64:       # %bb.0: +; AVX-X64-NEXT:    vroundss $9, %xmm0, %xmm0, %xmm0 +; AVX-X64-NEXT:    retq +  %res = call float @llvm.experimental.constrained.floor.f32( +                        float %f, metadata !"fpexcept.strict") +  ret float %res +} + +define double @ffloorf64(double %f) #0 { +; SSE41-X86-LABEL: ffloorf64: +; SSE41-X86:       # %bb.0: +; SSE41-X86-NEXT:    pushl %ebp +; SSE41-X86-NEXT:    .cfi_def_cfa_offset 8 +; SSE41-X86-NEXT:    .cfi_offset %ebp, -8 +; SSE41-X86-NEXT:    movl %esp, %ebp +; SSE41-X86-NEXT:    .cfi_def_cfa_register %ebp +; SSE41-X86-NEXT:    andl $-8, %esp +; SSE41-X86-NEXT:    subl $8, %esp +; SSE41-X86-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero +; SSE41-X86-NEXT:    roundsd $9, %xmm0, %xmm0 +; SSE41-X86-NEXT:    movsd %xmm0, (%esp) +; SSE41-X86-NEXT:    fldl (%esp) +; SSE41-X86-NEXT:    movl %ebp, %esp +; SSE41-X86-NEXT:    popl %ebp +; SSE41-X86-NEXT:    .cfi_def_cfa %esp, 4 +; SSE41-X86-NEXT:    retl +; +; SSE41-X64-LABEL: ffloorf64: +; SSE41-X64:       # %bb.0: +; SSE41-X64-NEXT:    roundsd $9, %xmm0, %xmm0 +; SSE41-X64-NEXT:    retq +; +; AVX-X86-LABEL: ffloorf64: +; AVX-X86:       # %bb.0: +; AVX-X86-NEXT:    pushl %ebp +; AVX-X86-NEXT:    .cfi_def_cfa_offset 8 +; AVX-X86-NEXT:    .cfi_offset %ebp, -8 +; AVX-X86-NEXT:    movl %esp, %ebp +; AVX-X86-NEXT:    .cfi_def_cfa_register %ebp +; AVX-X86-NEXT:    andl $-8, %esp +; AVX-X86-NEXT:    subl $8, %esp +; AVX-X86-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-X86-NEXT:    vroundsd $9, %xmm0, %xmm0, %xmm0 +; AVX-X86-NEXT:    vmovsd %xmm0, (%esp) +; AVX-X86-NEXT:    fldl (%esp) +; AVX-X86-NEXT:    movl %ebp, %esp +; AVX-X86-NEXT:    popl %ebp +; AVX-X86-NEXT:    .cfi_def_cfa %esp, 4 +; AVX-X86-NEXT:    retl +; +; AVX-X64-LABEL: ffloorf64: +; AVX-X64:       # %bb.0: +; AVX-X64-NEXT:    vroundsd $9, %xmm0, %xmm0, %xmm0 +; AVX-X64-NEXT:    retq +  %res = call double @llvm.experimental.constrained.floor.f64( +                        double %f, metadata !"fpexcept.strict") +  ret double %res +} + +define float @ftrunc32(float %f) #0 { +; SSE41-X86-LABEL: ftrunc32: +; SSE41-X86:       # %bb.0: +; SSE41-X86-NEXT:    pushl %eax +; SSE41-X86-NEXT:    .cfi_def_cfa_offset 8 +; SSE41-X86-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE41-X86-NEXT:    roundss $11, %xmm0, %xmm0 +; SSE41-X86-NEXT:    movss %xmm0, (%esp) +; SSE41-X86-NEXT:    flds (%esp) +; SSE41-X86-NEXT:    popl %eax +; SSE41-X86-NEXT:    .cfi_def_cfa_offset 4 +; SSE41-X86-NEXT:    retl +; +; SSE41-X64-LABEL: ftrunc32: +; SSE41-X64:       # %bb.0: +; SSE41-X64-NEXT:    roundss $11, %xmm0, %xmm0 +; SSE41-X64-NEXT:    retq +; +; AVX-X86-LABEL: ftrunc32: +; AVX-X86:       # %bb.0: +; AVX-X86-NEXT:    pushl %eax +; AVX-X86-NEXT:    .cfi_def_cfa_offset 8 +; AVX-X86-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX-X86-NEXT:    vroundss $11, %xmm0, %xmm0, %xmm0 +; AVX-X86-NEXT:    vmovss %xmm0, (%esp) +; AVX-X86-NEXT:    flds (%esp) +; AVX-X86-NEXT:    popl %eax +; AVX-X86-NEXT:    .cfi_def_cfa_offset 4 +; AVX-X86-NEXT:    retl +; +; AVX-X64-LABEL: ftrunc32: +; AVX-X64:       # %bb.0: +; AVX-X64-NEXT:    vroundss $11, %xmm0, %xmm0, %xmm0 +; AVX-X64-NEXT:    retq +  %res = call float @llvm.experimental.constrained.trunc.f32( +                        float %f, metadata !"fpexcept.strict") +  ret float %res +} + +define double @ftruncf64(double %f) #0 { +; SSE41-X86-LABEL: ftruncf64: +; SSE41-X86:       # %bb.0: +; SSE41-X86-NEXT:    pushl %ebp +; SSE41-X86-NEXT:    .cfi_def_cfa_offset 8 +; SSE41-X86-NEXT:    .cfi_offset %ebp, -8 +; SSE41-X86-NEXT:    movl %esp, %ebp +; SSE41-X86-NEXT:    .cfi_def_cfa_register %ebp +; SSE41-X86-NEXT:    andl $-8, %esp +; SSE41-X86-NEXT:    subl $8, %esp +; SSE41-X86-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero +; SSE41-X86-NEXT:    roundsd $11, %xmm0, %xmm0 +; SSE41-X86-NEXT:    movsd %xmm0, (%esp) +; SSE41-X86-NEXT:    fldl (%esp) +; SSE41-X86-NEXT:    movl %ebp, %esp +; SSE41-X86-NEXT:    popl %ebp +; SSE41-X86-NEXT:    .cfi_def_cfa %esp, 4 +; SSE41-X86-NEXT:    retl +; +; SSE41-X64-LABEL: ftruncf64: +; SSE41-X64:       # %bb.0: +; SSE41-X64-NEXT:    roundsd $11, %xmm0, %xmm0 +; SSE41-X64-NEXT:    retq +; +; AVX-X86-LABEL: ftruncf64: +; AVX-X86:       # %bb.0: +; AVX-X86-NEXT:    pushl %ebp +; AVX-X86-NEXT:    .cfi_def_cfa_offset 8 +; AVX-X86-NEXT:    .cfi_offset %ebp, -8 +; AVX-X86-NEXT:    movl %esp, %ebp +; AVX-X86-NEXT:    .cfi_def_cfa_register %ebp +; AVX-X86-NEXT:    andl $-8, %esp +; AVX-X86-NEXT:    subl $8, %esp +; AVX-X86-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-X86-NEXT:    vroundsd $11, %xmm0, %xmm0, %xmm0 +; AVX-X86-NEXT:    vmovsd %xmm0, (%esp) +; AVX-X86-NEXT:    fldl (%esp) +; AVX-X86-NEXT:    movl %ebp, %esp +; AVX-X86-NEXT:    popl %ebp +; AVX-X86-NEXT:    .cfi_def_cfa %esp, 4 +; AVX-X86-NEXT:    retl +; +; AVX-X64-LABEL: ftruncf64: +; AVX-X64:       # %bb.0: +; AVX-X64-NEXT:    vroundsd $11, %xmm0, %xmm0, %xmm0 +; AVX-X64-NEXT:    retq +  %res = call double @llvm.experimental.constrained.trunc.f64( +                        double %f, metadata !"fpexcept.strict") +  ret double %res +} + +define float @frint32(float %f) #0 { +; SSE41-X86-LABEL: frint32: +; SSE41-X86:       # %bb.0: +; SSE41-X86-NEXT:    pushl %eax +; SSE41-X86-NEXT:    .cfi_def_cfa_offset 8 +; SSE41-X86-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE41-X86-NEXT:    roundss $4, %xmm0, %xmm0 +; SSE41-X86-NEXT:    movss %xmm0, (%esp) +; SSE41-X86-NEXT:    flds (%esp) +; SSE41-X86-NEXT:    popl %eax +; SSE41-X86-NEXT:    .cfi_def_cfa_offset 4 +; SSE41-X86-NEXT:    retl +; +; SSE41-X64-LABEL: frint32: +; SSE41-X64:       # %bb.0: +; SSE41-X64-NEXT:    roundss $4, %xmm0, %xmm0 +; SSE41-X64-NEXT:    retq +; +; AVX-X86-LABEL: frint32: +; AVX-X86:       # %bb.0: +; AVX-X86-NEXT:    pushl %eax +; AVX-X86-NEXT:    .cfi_def_cfa_offset 8 +; AVX-X86-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX-X86-NEXT:    vroundss $4, %xmm0, %xmm0, %xmm0 +; AVX-X86-NEXT:    vmovss %xmm0, (%esp) +; AVX-X86-NEXT:    flds (%esp) +; AVX-X86-NEXT:    popl %eax +; AVX-X86-NEXT:    .cfi_def_cfa_offset 4 +; AVX-X86-NEXT:    retl +; +; AVX-X64-LABEL: frint32: +; AVX-X64:       # %bb.0: +; AVX-X64-NEXT:    vroundss $4, %xmm0, %xmm0, %xmm0 +; AVX-X64-NEXT:    retq +  %res = call float @llvm.experimental.constrained.rint.f32( +                        float %f, +                        metadata !"round.dynamic", metadata !"fpexcept.strict") +  ret float %res +} + +define double @frintf64(double %f) #0 { +; SSE41-X86-LABEL: frintf64: +; SSE41-X86:       # %bb.0: +; SSE41-X86-NEXT:    pushl %ebp +; SSE41-X86-NEXT:    .cfi_def_cfa_offset 8 +; SSE41-X86-NEXT:    .cfi_offset %ebp, -8 +; SSE41-X86-NEXT:    movl %esp, %ebp +; SSE41-X86-NEXT:    .cfi_def_cfa_register %ebp +; SSE41-X86-NEXT:    andl $-8, %esp +; SSE41-X86-NEXT:    subl $8, %esp +; SSE41-X86-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero +; SSE41-X86-NEXT:    roundsd $4, %xmm0, %xmm0 +; SSE41-X86-NEXT:    movsd %xmm0, (%esp) +; SSE41-X86-NEXT:    fldl (%esp) +; SSE41-X86-NEXT:    movl %ebp, %esp +; SSE41-X86-NEXT:    popl %ebp +; SSE41-X86-NEXT:    .cfi_def_cfa %esp, 4 +; SSE41-X86-NEXT:    retl +; +; SSE41-X64-LABEL: frintf64: +; SSE41-X64:       # %bb.0: +; SSE41-X64-NEXT:    roundsd $4, %xmm0, %xmm0 +; SSE41-X64-NEXT:    retq +; +; AVX-X86-LABEL: frintf64: +; AVX-X86:       # %bb.0: +; AVX-X86-NEXT:    pushl %ebp +; AVX-X86-NEXT:    .cfi_def_cfa_offset 8 +; AVX-X86-NEXT:    .cfi_offset %ebp, -8 +; AVX-X86-NEXT:    movl %esp, %ebp +; AVX-X86-NEXT:    .cfi_def_cfa_register %ebp +; AVX-X86-NEXT:    andl $-8, %esp +; AVX-X86-NEXT:    subl $8, %esp +; AVX-X86-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-X86-NEXT:    vroundsd $4, %xmm0, %xmm0, %xmm0 +; AVX-X86-NEXT:    vmovsd %xmm0, (%esp) +; AVX-X86-NEXT:    fldl (%esp) +; AVX-X86-NEXT:    movl %ebp, %esp +; AVX-X86-NEXT:    popl %ebp +; AVX-X86-NEXT:    .cfi_def_cfa %esp, 4 +; AVX-X86-NEXT:    retl +; +; AVX-X64-LABEL: frintf64: +; AVX-X64:       # %bb.0: +; AVX-X64-NEXT:    vroundsd $4, %xmm0, %xmm0, %xmm0 +; AVX-X64-NEXT:    retq +  %res = call double @llvm.experimental.constrained.rint.f64( +                        double %f, +                        metadata !"round.dynamic", metadata !"fpexcept.strict") +  ret double %res +} + +define float @fnearbyint32(float %f) #0 { +; SSE41-X86-LABEL: fnearbyint32: +; SSE41-X86:       # %bb.0: +; SSE41-X86-NEXT:    pushl %eax +; SSE41-X86-NEXT:    .cfi_def_cfa_offset 8 +; SSE41-X86-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE41-X86-NEXT:    roundss $12, %xmm0, %xmm0 +; SSE41-X86-NEXT:    movss %xmm0, (%esp) +; SSE41-X86-NEXT:    flds (%esp) +; SSE41-X86-NEXT:    popl %eax +; SSE41-X86-NEXT:    .cfi_def_cfa_offset 4 +; SSE41-X86-NEXT:    retl +; +; SSE41-X64-LABEL: fnearbyint32: +; SSE41-X64:       # %bb.0: +; SSE41-X64-NEXT:    roundss $12, %xmm0, %xmm0 +; SSE41-X64-NEXT:    retq +; +; AVX-X86-LABEL: fnearbyint32: +; AVX-X86:       # %bb.0: +; AVX-X86-NEXT:    pushl %eax +; AVX-X86-NEXT:    .cfi_def_cfa_offset 8 +; AVX-X86-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX-X86-NEXT:    vroundss $12, %xmm0, %xmm0, %xmm0 +; AVX-X86-NEXT:    vmovss %xmm0, (%esp) +; AVX-X86-NEXT:    flds (%esp) +; AVX-X86-NEXT:    popl %eax +; AVX-X86-NEXT:    .cfi_def_cfa_offset 4 +; AVX-X86-NEXT:    retl +; +; AVX-X64-LABEL: fnearbyint32: +; AVX-X64:       # %bb.0: +; AVX-X64-NEXT:    vroundss $12, %xmm0, %xmm0, %xmm0 +; AVX-X64-NEXT:    retq +  %res = call float @llvm.experimental.constrained.nearbyint.f32( +                        float %f, +                        metadata !"round.dynamic", metadata !"fpexcept.strict") +  ret float %res +} + +define double @fnearbyintf64(double %f) #0 { +; SSE41-X86-LABEL: fnearbyintf64: +; SSE41-X86:       # %bb.0: +; SSE41-X86-NEXT:    pushl %ebp +; SSE41-X86-NEXT:    .cfi_def_cfa_offset 8 +; SSE41-X86-NEXT:    .cfi_offset %ebp, -8 +; SSE41-X86-NEXT:    movl %esp, %ebp +; SSE41-X86-NEXT:    .cfi_def_cfa_register %ebp +; SSE41-X86-NEXT:    andl $-8, %esp +; SSE41-X86-NEXT:    subl $8, %esp +; SSE41-X86-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero +; SSE41-X86-NEXT:    roundsd $12, %xmm0, %xmm0 +; SSE41-X86-NEXT:    movsd %xmm0, (%esp) +; SSE41-X86-NEXT:    fldl (%esp) +; SSE41-X86-NEXT:    movl %ebp, %esp +; SSE41-X86-NEXT:    popl %ebp +; SSE41-X86-NEXT:    .cfi_def_cfa %esp, 4 +; SSE41-X86-NEXT:    retl +; +; SSE41-X64-LABEL: fnearbyintf64: +; SSE41-X64:       # %bb.0: +; SSE41-X64-NEXT:    roundsd $12, %xmm0, %xmm0 +; SSE41-X64-NEXT:    retq +; +; AVX-X86-LABEL: fnearbyintf64: +; AVX-X86:       # %bb.0: +; AVX-X86-NEXT:    pushl %ebp +; AVX-X86-NEXT:    .cfi_def_cfa_offset 8 +; AVX-X86-NEXT:    .cfi_offset %ebp, -8 +; AVX-X86-NEXT:    movl %esp, %ebp +; AVX-X86-NEXT:    .cfi_def_cfa_register %ebp +; AVX-X86-NEXT:    andl $-8, %esp +; AVX-X86-NEXT:    subl $8, %esp +; AVX-X86-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-X86-NEXT:    vroundsd $12, %xmm0, %xmm0, %xmm0 +; AVX-X86-NEXT:    vmovsd %xmm0, (%esp) +; AVX-X86-NEXT:    fldl (%esp) +; AVX-X86-NEXT:    movl %ebp, %esp +; AVX-X86-NEXT:    popl %ebp +; AVX-X86-NEXT:    .cfi_def_cfa %esp, 4 +; AVX-X86-NEXT:    retl +; +; AVX-X64-LABEL: fnearbyintf64: +; AVX-X64:       # %bb.0: +; AVX-X64-NEXT:    vroundsd $12, %xmm0, %xmm0, %xmm0 +; AVX-X64-NEXT:    retq +  %res = call double @llvm.experimental.constrained.nearbyint.f64( +                        double %f, +                        metadata !"round.dynamic", metadata !"fpexcept.strict") +  ret double %res +} + +attributes #0 = { strictfp } diff --git a/llvm/test/CodeGen/X86/vec-strict-256.ll b/llvm/test/CodeGen/X86/vec-strict-256.ll index a0dc8bca875..404e691c37c 100644 --- a/llvm/test/CodeGen/X86/vec-strict-256.ll +++ b/llvm/test/CodeGen/X86/vec-strict-256.ll @@ -18,6 +18,16 @@ declare <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32(<4 x float  declare <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64(<4 x double>, metadata, metadata)  declare <4 x double> @llvm.experimental.constrained.fma.v4f64(<4 x double>, <4 x double>, <4 x double>, metadata, metadata)  declare <8 x float> @llvm.experimental.constrained.fma.v8f32(<8 x float>, <8 x float>, <8 x float>, metadata, metadata) +declare <8 x float> @llvm.experimental.constrained.ceil.v8f32(<8 x float>, metadata) +declare <4 x double>  @llvm.experimental.constrained.ceil.v4f64(<4 x double>, metadata) +declare <8 x float> @llvm.experimental.constrained.floor.v8f32(<8 x float>, metadata) +declare <4 x double> @llvm.experimental.constrained.floor.v4f64(<4 x double>, metadata) +declare <8 x float> @llvm.experimental.constrained.trunc.v8f32(<8 x float>, metadata) +declare <4 x double> @llvm.experimental.constrained.trunc.v4f64(<4 x double>, metadata) +declare <8 x float> @llvm.experimental.constrained.rint.v8f32(<8 x float>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.rint.v4f64(<4 x double>, metadata, metadata) +declare <8 x float> @llvm.experimental.constrained.nearbyint.v8f32(<8 x float>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(<4 x double>, metadata, metadata)  define <4 x double> @f1(<4 x double> %a, <4 x double> %b) #0 {  ; CHECK-LABEL: f1: @@ -178,4 +188,111 @@ define <4 x double> @f14(<4 x double> %a, <4 x double> %b, <4 x double> %c) #0 {    ret <4 x double> %res  } +define <8 x float> @fceilv8f32(<8 x float> %f) #0 { +; CHECK-LABEL: fceilv8f32: +; CHECK:       # %bb.0: +; CHECK-NEXT:    vroundps $10, %ymm0, %ymm0 +; CHECK-NEXT:    ret{{[l|q]}} +  %res = call <8 x float> @llvm.experimental.constrained.ceil.v8f32( +                          <8 x float> %f, metadata !"fpexcept.strict") +  ret <8 x float> %res +} + +define <4 x double> @fceilv4f64(<4 x double> %f) #0 { +; CHECK-LABEL: fceilv4f64: +; CHECK:       # %bb.0: +; CHECK-NEXT:    vroundpd $10, %ymm0, %ymm0 +; CHECK-NEXT:    ret{{[l|q]}} +  %res = call <4 x double> @llvm.experimental.constrained.ceil.v4f64( +                        <4 x double> %f, metadata !"fpexcept.strict") +  ret <4 x double> %res +} + +define <8 x float> @ffloorv8f32(<8 x float> %f) #0 { +; CHECK-LABEL: ffloorv8f32: +; CHECK:       # %bb.0: +; CHECK-NEXT:    vroundps $9, %ymm0, %ymm0 +; CHECK-NEXT:    ret{{[l|q]}} +  %res = call <8 x float> @llvm.experimental.constrained.floor.v8f32( +                          <8 x float> %f, metadata !"fpexcept.strict") +  ret <8 x float> %res +} + +define <4 x double> @ffloorv4f64(<4 x double> %f) #0 { +; CHECK-LABEL: ffloorv4f64: +; CHECK:       # %bb.0: +; CHECK-NEXT:    vroundpd $9, %ymm0, %ymm0 +; CHECK-NEXT:    ret{{[l|q]}} +  %res = call <4 x double> @llvm.experimental.constrained.floor.v4f64( +                        <4 x double> %f, metadata !"fpexcept.strict") +  ret <4 x double> %res +} + + +define <8 x float> @ftruncv8f32(<8 x float> %f) #0 { +; CHECK-LABEL: ftruncv8f32: +; CHECK:       # %bb.0: +; CHECK-NEXT:    vroundps $11, %ymm0, %ymm0 +; CHECK-NEXT:    ret{{[l|q]}} +  %res = call <8 x float> @llvm.experimental.constrained.trunc.v8f32( +                          <8 x float> %f, metadata !"fpexcept.strict") +  ret <8 x float> %res +} + +define <4 x double> @ftruncv4f64(<4 x double> %f) #0 { +; CHECK-LABEL: ftruncv4f64: +; CHECK:       # %bb.0: +; CHECK-NEXT:    vroundpd $11, %ymm0, %ymm0 +; CHECK-NEXT:    ret{{[l|q]}} +  %res = call <4 x double> @llvm.experimental.constrained.trunc.v4f64( +                        <4 x double> %f, metadata !"fpexcept.strict") +  ret <4 x double> %res +} + + +define <8 x float> @frintv8f32(<8 x float> %f) #0 { +; CHECK-LABEL: frintv8f32: +; CHECK:       # %bb.0: +; CHECK-NEXT:    vroundps $4, %ymm0, %ymm0 +; CHECK-NEXT:    ret{{[l|q]}} +  %res = call <8 x float> @llvm.experimental.constrained.rint.v8f32( +                          <8 x float> %f, +                          metadata !"round.dynamic", metadata !"fpexcept.strict") +  ret <8 x float> %res +} + +define <4 x double> @frintv4f64(<4 x double> %f) #0 { +; CHECK-LABEL: frintv4f64: +; CHECK:       # %bb.0: +; CHECK-NEXT:    vroundpd $4, %ymm0, %ymm0 +; CHECK-NEXT:    ret{{[l|q]}} +  %res = call <4 x double> @llvm.experimental.constrained.rint.v4f64( +                        <4 x double> %f, +                        metadata !"round.dynamic", metadata !"fpexcept.strict") +  ret <4 x double> %res +} + + +define <8 x float> @fnearbyintv8f32(<8 x float> %f) #0 { +; CHECK-LABEL: fnearbyintv8f32: +; CHECK:       # %bb.0: +; CHECK-NEXT:    vroundps $12, %ymm0, %ymm0 +; CHECK-NEXT:    ret{{[l|q]}} +  %res = call <8 x float> @llvm.experimental.constrained.nearbyint.v8f32( +                          <8 x float> %f, +                          metadata !"round.dynamic", metadata !"fpexcept.strict") +  ret <8 x float> %res +} + +define <4 x double> @fnearbyintv4f64(<4 x double> %f) #0 { +; CHECK-LABEL: fnearbyintv4f64: +; CHECK:       # %bb.0: +; CHECK-NEXT:    vroundpd $12, %ymm0, %ymm0 +; CHECK-NEXT:    ret{{[l|q]}} +  %res = call <4 x double> @llvm.experimental.constrained.nearbyint.v4f64( +                        <4 x double> %f, +                        metadata !"round.dynamic", metadata !"fpexcept.strict") +  ret <4 x double> %res +} +  attributes #0 = { strictfp } diff --git a/llvm/test/CodeGen/X86/vec-strict-512.ll b/llvm/test/CodeGen/X86/vec-strict-512.ll index 236fb77f9b5..88dec6a427d 100644 --- a/llvm/test/CodeGen/X86/vec-strict-512.ll +++ b/llvm/test/CodeGen/X86/vec-strict-512.ll @@ -16,6 +16,17 @@ declare <8 x double> @llvm.experimental.constrained.fpext.v8f64.v8f32(<8 x float  declare <8 x float> @llvm.experimental.constrained.fptrunc.v8f32.v8f64(<8 x double>, metadata, metadata)  declare <8 x double> @llvm.experimental.constrained.fma.v8f64(<8 x double>, <8 x double>, <8 x double>, metadata, metadata)  declare <16 x float> @llvm.experimental.constrained.fma.v16f32(<16 x float>, <16 x float>, <16 x float>, metadata, metadata) +declare <16 x float> @llvm.experimental.constrained.ceil.v16f32(<16 x float>, metadata) +declare <8 x double>  @llvm.experimental.constrained.ceil.v8f64(<8 x double>, metadata) +declare <16 x float> @llvm.experimental.constrained.floor.v16f32(<16 x float>, metadata) +declare <8 x double> @llvm.experimental.constrained.floor.v8f64(<8 x double>, metadata) +declare <16 x float> @llvm.experimental.constrained.trunc.v16f32(<16 x float>, metadata) +declare <8 x double> @llvm.experimental.constrained.trunc.v8f64(<8 x double>, metadata) +declare <16 x float> @llvm.experimental.constrained.rint.v16f32(<16 x float>, metadata, metadata) +declare <8 x double> @llvm.experimental.constrained.rint.v8f64(<8 x double>, metadata, metadata) +declare <16 x float> @llvm.experimental.constrained.nearbyint.v16f32(<16 x float>, metadata, metadata) +declare <8 x double> @llvm.experimental.constrained.nearbyint.v8f64(<8 x double>, metadata, metadata) +  define <8 x double> @f1(<8 x double> %a, <8 x double> %b) #0 {  ; CHECK-LABEL: f1: @@ -175,4 +186,98 @@ define <8 x double> @f14(<8 x double> %a, <8 x double> %b, <8 x double> %c) #0 {    ret <8 x double> %res  } +define <16 x float> @strict_vector_fceil_v16f32(<16 x float> %f) #0 { +; CHECK-LABEL: strict_vector_fceil_v16f32: +; CHECK:       # %bb.0: +; CHECK-NEXT:    vrndscaleps $10, %zmm0, %zmm0 +; CHECK-NEXT:    ret{{[l|q]}} +  %res = call <16 x float> @llvm.experimental.constrained.ceil.v16f32(<16 x float> %f, metadata !"fpexcept.strict") +  ret <16 x float> %res +} + +define <8 x double> @strict_vector_fceil_v8f64(<8 x double> %f) #0 { +; CHECK-LABEL: strict_vector_fceil_v8f64: +; CHECK:       # %bb.0: +; CHECK-NEXT:    vrndscalepd $10, %zmm0, %zmm0 +; CHECK-NEXT:    ret{{[l|q]}} +  %res = call <8 x double> @llvm.experimental.constrained.ceil.v8f64(<8 x double> %f, metadata !"fpexcept.strict") +  ret <8 x double> %res +} + +define <16 x float> @strict_vector_ffloor_v16f32(<16 x float> %f) #0 { +; CHECK-LABEL: strict_vector_ffloor_v16f32: +; CHECK:       # %bb.0: +; CHECK-NEXT:    vrndscaleps $9, %zmm0, %zmm0 +; CHECK-NEXT:    ret{{[l|q]}} +  %res = call <16 x float> @llvm.experimental.constrained.floor.v16f32(<16 x float> %f, metadata !"fpexcept.strict") +  ret <16 x float> %res +} + +define <8 x double> @strict_vector_ffloor_v8f64(<8 x double> %f) #0 { +; CHECK-LABEL: strict_vector_ffloor_v8f64: +; CHECK:       # %bb.0: +; CHECK-NEXT:    vrndscalepd $9, %zmm0, %zmm0 +; CHECK-NEXT:    ret{{[l|q]}} +  %res = call <8 x double> @llvm.experimental.constrained.floor.v8f64(<8 x double> %f, metadata !"fpexcept.strict") +  ret <8 x double> %res +} + +define <16 x float> @strict_vector_ftrunc_v16f32(<16 x float> %f) #0 { +; CHECK-LABEL: strict_vector_ftrunc_v16f32: +; CHECK:       # %bb.0: +; CHECK-NEXT:    vrndscaleps $11, %zmm0, %zmm0 +; CHECK-NEXT:    ret{{[l|q]}} +  %res = call <16 x float> @llvm.experimental.constrained.trunc.v16f32(<16 x float> %f, metadata !"fpexcept.strict") +  ret <16 x float> %res +} + +define <8 x double> @strict_vector_ftrunc_v8f64(<8 x double> %f) #0 { +; CHECK-LABEL: strict_vector_ftrunc_v8f64: +; CHECK:       # %bb.0: +; CHECK-NEXT:    vrndscalepd $11, %zmm0, %zmm0 +; CHECK-NEXT:    ret{{[l|q]}} +  %res = call <8 x double> @llvm.experimental.constrained.trunc.v8f64(<8 x double> %f, metadata !"fpexcept.strict") +  ret <8 x double> %res +} + +define <16 x float> @strict_vector_frint_v16f32(<16 x float> %f) #0 { +; CHECK-LABEL: strict_vector_frint_v16f32: +; CHECK:       # %bb.0: +; CHECK-NEXT:    vrndscaleps $4, %zmm0, %zmm0 +; CHECK-NEXT:    ret{{[l|q]}} +  %res = call <16 x float> @llvm.experimental.constrained.rint.v16f32(<16 x float> %f, +                             metadata !"round.dynamic", metadata !"fpexcept.strict") +  ret <16 x float> %res +} + +define <8 x double> @strict_vector_frint_v8f64(<8 x double> %f) #0 { +; CHECK-LABEL: strict_vector_frint_v8f64: +; CHECK:       # %bb.0: +; CHECK-NEXT:    vrndscalepd $4, %zmm0, %zmm0 +; CHECK-NEXT:    ret{{[l|q]}} +  %res = call <8 x double> @llvm.experimental.constrained.rint.v8f64(<8 x double> %f, +                            metadata !"round.dynamic", metadata !"fpexcept.strict") +  ret <8 x double> %res +} + +define <16 x float> @strict_vector_fnearbyint_v16f32(<16 x float> %f) #0 { +; CHECK-LABEL: strict_vector_fnearbyint_v16f32: +; CHECK:       # %bb.0: +; CHECK-NEXT:    vrndscaleps $12, %zmm0, %zmm0 +; CHECK-NEXT:    ret{{[l|q]}} +  %res = call <16 x float> @llvm.experimental.constrained.nearbyint.v16f32(<16 x float> %f, +                             metadata !"round.dynamic", metadata !"fpexcept.strict") +  ret <16 x float> %res +} + +define <8 x double> @strict_vector_fnearbyint_v8f64(<8 x double> %f) #0 { +; CHECK-LABEL: strict_vector_fnearbyint_v8f64: +; CHECK:       # %bb.0: +; CHECK-NEXT:    vrndscalepd $12, %zmm0, %zmm0 +; CHECK-NEXT:    ret{{[l|q]}} +  %res = call <8 x double> @llvm.experimental.constrained.nearbyint.v8f64(<8 x double> %f, +                             metadata !"round.dynamic", metadata !"fpexcept.strict") +  ret <8 x double> %res +} +  attributes #0 = { strictfp } diff --git a/llvm/test/CodeGen/X86/vec-strict-round-128.ll b/llvm/test/CodeGen/X86/vec-strict-round-128.ll new file mode 100644 index 00000000000..7d6a66e3366 --- /dev/null +++ b/llvm/test/CodeGen/X86/vec-strict-round-128.ll @@ -0,0 +1,174 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=SSE41 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=SSE41 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX + +declare <4 x float> @llvm.experimental.constrained.ceil.v4f32(<4 x float>, metadata) +declare <2 x double>  @llvm.experimental.constrained.ceil.v2f64(<2 x double>, metadata) +declare <4 x float> @llvm.experimental.constrained.floor.v4f32(<4 x float>, metadata) +declare <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double>, metadata) +declare <4 x float> @llvm.experimental.constrained.trunc.v4f32(<4 x float>, metadata) +declare <2 x double> @llvm.experimental.constrained.trunc.v2f64(<2 x double>, metadata) +declare <4 x float> @llvm.experimental.constrained.rint.v4f32(<4 x float>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double>, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.nearbyint.v4f32(<4 x float>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double>, metadata, metadata) + +define <4 x float> @fceilv4f32(<4 x float> %f) #0 { +; SSE41-LABEL: fceilv4f32: +; SSE41:       # %bb.0: +; SSE41-NEXT:    roundps $10, %xmm0, %xmm0 +; SSE41-NEXT:    ret{{[l|q]}} +; +; AVX-LABEL: fceilv4f32: +; AVX:       # %bb.0: +; AVX-NEXT:    vroundps $10, %xmm0, %xmm0 +; AVX-NEXT:    ret{{[l|q]}} +  %res = call <4 x float> @llvm.experimental.constrained.ceil.v4f32( +                          <4 x float> %f, metadata !"fpexcept.strict") +  ret <4 x float> %res +} + +define <2 x double> @fceilv2f64(<2 x double> %f) #0 { +; SSE41-LABEL: fceilv2f64: +; SSE41:       # %bb.0: +; SSE41-NEXT:    roundpd $10, %xmm0, %xmm0 +; SSE41-NEXT:    ret{{[l|q]}} +; +; AVX-LABEL: fceilv2f64: +; AVX:       # %bb.0: +; AVX-NEXT:    vroundpd $10, %xmm0, %xmm0 +; AVX-NEXT:    ret{{[l|q]}} +  %res = call <2 x double> @llvm.experimental.constrained.ceil.v2f64( +                        <2 x double> %f, metadata !"fpexcept.strict") +  ret <2 x double> %res +} + +define <4 x float> @ffloorv4f32(<4 x float> %f) #0 { +; SSE41-LABEL: ffloorv4f32: +; SSE41:       # %bb.0: +; SSE41-NEXT:    roundps $9, %xmm0, %xmm0 +; SSE41-NEXT:    ret{{[l|q]}} +; +; AVX-LABEL: ffloorv4f32: +; AVX:       # %bb.0: +; AVX-NEXT:    vroundps $9, %xmm0, %xmm0 +; AVX-NEXT:    ret{{[l|q]}} +  %res = call <4 x float> @llvm.experimental.constrained.floor.v4f32( +                          <4 x float> %f, metadata !"fpexcept.strict") +  ret <4 x float> %res +} + +define <2 x double> @ffloorv2f64(<2 x double> %f) #0 { +; SSE41-LABEL: ffloorv2f64: +; SSE41:       # %bb.0: +; SSE41-NEXT:    roundpd $9, %xmm0, %xmm0 +; SSE41-NEXT:    ret{{[l|q]}} +; +; AVX-LABEL: ffloorv2f64: +; AVX:       # %bb.0: +; AVX-NEXT:    vroundpd $9, %xmm0, %xmm0 +; AVX-NEXT:    ret{{[l|q]}} +  %res = call <2 x double> @llvm.experimental.constrained.floor.v2f64( +                        <2 x double> %f, metadata !"fpexcept.strict") +  ret <2 x double> %res +} + +define <4 x float> @ftruncv4f32(<4 x float> %f) #0 { +; SSE41-LABEL: ftruncv4f32: +; SSE41:       # %bb.0: +; SSE41-NEXT:    roundps $11, %xmm0, %xmm0 +; SSE41-NEXT:    ret{{[l|q]}} +; +; AVX-LABEL: ftruncv4f32: +; AVX:       # %bb.0: +; AVX-NEXT:    vroundps $11, %xmm0, %xmm0 +; AVX-NEXT:    ret{{[l|q]}} +  %res = call <4 x float> @llvm.experimental.constrained.trunc.v4f32( +                          <4 x float> %f, metadata !"fpexcept.strict") +  ret <4 x float> %res +} + +define <2 x double> @ftruncv2f64(<2 x double> %f) #0 { +; SSE41-LABEL: ftruncv2f64: +; SSE41:       # %bb.0: +; SSE41-NEXT:    roundpd $11, %xmm0, %xmm0 +; SSE41-NEXT:    ret{{[l|q]}} +; +; AVX-LABEL: ftruncv2f64: +; AVX:       # %bb.0: +; AVX-NEXT:    vroundpd $11, %xmm0, %xmm0 +; AVX-NEXT:    ret{{[l|q]}} +  %res = call <2 x double> @llvm.experimental.constrained.trunc.v2f64( +                        <2 x double> %f, metadata !"fpexcept.strict") +  ret <2 x double> %res +} + +define <4 x float> @frintv4f32(<4 x float> %f) #0 { +; SSE41-LABEL: frintv4f32: +; SSE41:       # %bb.0: +; SSE41-NEXT:    roundps $4, %xmm0, %xmm0 +; SSE41-NEXT:    ret{{[l|q]}} +; +; AVX-LABEL: frintv4f32: +; AVX:       # %bb.0: +; AVX-NEXT:    vroundps $4, %xmm0, %xmm0 +; AVX-NEXT:    ret{{[l|q]}} +  %res = call <4 x float> @llvm.experimental.constrained.rint.v4f32( +                          <4 x float> %f, +                          metadata !"round.dynamic", metadata !"fpexcept.strict") +  ret <4 x float> %res +} + +define <2 x double> @frintv2f64(<2 x double> %f) #0 { +; SSE41-LABEL: frintv2f64: +; SSE41:       # %bb.0: +; SSE41-NEXT:    roundpd $4, %xmm0, %xmm0 +; SSE41-NEXT:    ret{{[l|q]}} +; +; AVX-LABEL: frintv2f64: +; AVX:       # %bb.0: +; AVX-NEXT:    vroundpd $4, %xmm0, %xmm0 +; AVX-NEXT:    ret{{[l|q]}} +  %res = call <2 x double> @llvm.experimental.constrained.rint.v2f64( +                        <2 x double> %f, +                        metadata !"round.dynamic", metadata !"fpexcept.strict") +  ret <2 x double> %res +} + +define <4 x float> @fnearbyintv4f32(<4 x float> %f) #0 { +; SSE41-LABEL: fnearbyintv4f32: +; SSE41:       # %bb.0: +; SSE41-NEXT:    roundps $12, %xmm0, %xmm0 +; SSE41-NEXT:    ret{{[l|q]}} +; +; AVX-LABEL: fnearbyintv4f32: +; AVX:       # %bb.0: +; AVX-NEXT:    vroundps $12, %xmm0, %xmm0 +; AVX-NEXT:    ret{{[l|q]}} +  %res = call <4 x float> @llvm.experimental.constrained.nearbyint.v4f32( +                          <4 x float> %f, +                          metadata !"round.dynamic", metadata !"fpexcept.strict") +  ret <4 x float> %res +} + +define <2 x double> @fnearbyintv2f64(<2 x double> %f) #0 { +; SSE41-LABEL: fnearbyintv2f64: +; SSE41:       # %bb.0: +; SSE41-NEXT:    roundpd $12, %xmm0, %xmm0 +; SSE41-NEXT:    ret{{[l|q]}} +; +; AVX-LABEL: fnearbyintv2f64: +; AVX:       # %bb.0: +; AVX-NEXT:    vroundpd $12, %xmm0, %xmm0 +; AVX-NEXT:    ret{{[l|q]}} +  %res = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64( +                        <2 x double> %f, +                        metadata !"round.dynamic", metadata !"fpexcept.strict") +  ret <2 x double> %res +} + +attributes #0 = { strictfp }  | 

