diff options
| author | Craig Topper <craig.topper@intel.com> | 2018-07-12 19:58:06 +0000 | 
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2018-07-12 19:58:06 +0000 | 
| commit | 57c4585bab3eff4448af79db09ca26a4f6c0daea (patch) | |
| tree | 9707ec44f7e43e99591cfe218567127cc7c541dc | |
| parent | 1880a3f0d8e3cc7a2c1c03dd31a3b8e0a9a5d83e (diff) | |
| download | bcm5719-llvm-57c4585bab3eff4448af79db09ca26a4f6c0daea.tar.gz bcm5719-llvm-57c4585bab3eff4448af79db09ca26a4f6c0daea.zip  | |
[X86][FastISel] Support EVEX version of sqrt.
llvm-svn: 336939
| -rw-r--r-- | llvm/lib/Target/X86/X86FastISel.cpp | 19 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll | 13 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll | 13 | 
3 files changed, 29 insertions, 16 deletions
diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp index 4199eb2ad27..d65d81b17f4 100644 --- a/llvm/lib/Target/X86/X86FastISel.cpp +++ b/llvm/lib/Target/X86/X86FastISel.cpp @@ -2799,17 +2799,19 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {      // Unfortunately we can't use fastEmit_r, because the AVX version of FSQRT      // is not generated by FastISel yet.      // FIXME: Update this code once tablegen can handle it. -    static const uint16_t SqrtOpc[2][2] = { -      {X86::SQRTSSr, X86::VSQRTSSr}, -      {X86::SQRTSDr, X86::VSQRTSDr} +    static const uint16_t SqrtOpc[3][2] = { +      { X86::SQRTSSr,   X86::SQRTSDr }, +      { X86::VSQRTSSr,  X86::VSQRTSDr }, +      { X86::VSQRTSSZr, X86::VSQRTSDZr },      }; -    bool HasAVX = Subtarget->hasAVX(); +    unsigned AVXLevel = Subtarget->hasAVX512() ? 2 : +                        Subtarget->hasAVX()    ? 1 : +                                                 0;      unsigned Opc; -    const TargetRegisterClass *RC;      switch (VT.SimpleTy) {      default: return false; -    case MVT::f32: Opc = SqrtOpc[0][HasAVX]; RC = &X86::FR32RegClass; break; -    case MVT::f64: Opc = SqrtOpc[1][HasAVX]; RC = &X86::FR64RegClass; break; +    case MVT::f32: Opc = SqrtOpc[AVXLevel][0]; break; +    case MVT::f64: Opc = SqrtOpc[AVXLevel][1]; break;      }      const Value *SrcVal = II->getArgOperand(0); @@ -2818,8 +2820,9 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {      if (SrcReg == 0)        return false; +    const TargetRegisterClass *RC = TLI.getRegClassFor(VT);      unsigned ImplicitDefReg = 0; -    if (HasAVX) { +    if (AVXLevel > 0) {        ImplicitDefReg = createResultReg(RC);        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,                TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg); diff --git a/llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll index 1b54562703e..fce52bf6699 100644 --- a/llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll @@ -2614,10 +2614,15 @@ define float @test_mm_sqrt_ss_scalar(float %a0) {  ; X64-SSE-NEXT:    sqrtss %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x51,0xc0]  ; X64-SSE-NEXT:    retq # encoding: [0xc3]  ; -; X64-AVX-LABEL: test_mm_sqrt_ss_scalar: -; X64-AVX:       # %bb.0: -; X64-AVX-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x51,0xc0] -; X64-AVX-NEXT:    retq # encoding: [0xc3] +; X64-AVX1-LABEL: test_mm_sqrt_ss_scalar: +; X64-AVX1:       # %bb.0: +; X64-AVX1-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x51,0xc0] +; X64-AVX1-NEXT:    retq # encoding: [0xc3] +; +; X64-AVX512-LABEL: test_mm_sqrt_ss_scalar: +; X64-AVX512:       # %bb.0: +; X64-AVX512-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x51,0xc0] +; X64-AVX512-NEXT:    retq # encoding: [0xc3]    %sqrt = call float @llvm.sqrt.f32(float %a0)    ret float %sqrt  } diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll index 03acbaafe82..be389890bb7 100644 --- a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll @@ -4959,10 +4959,15 @@ define double @test_mm_sqrt_sd_scalar(double %a0) nounwind {  ; X64-SSE-NEXT:    sqrtsd %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x51,0xc0]  ; X64-SSE-NEXT:    retq # encoding: [0xc3]  ; -; X64-AVX-LABEL: test_mm_sqrt_sd_scalar: -; X64-AVX:       # %bb.0: -; X64-AVX-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x51,0xc0] -; X64-AVX-NEXT:    retq # encoding: [0xc3] +; X64-AVX1-LABEL: test_mm_sqrt_sd_scalar: +; X64-AVX1:       # %bb.0: +; X64-AVX1-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x51,0xc0] +; X64-AVX1-NEXT:    retq # encoding: [0xc3] +; +; X64-AVX512-LABEL: test_mm_sqrt_sd_scalar: +; X64-AVX512:       # %bb.0: +; X64-AVX512-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0] +; X64-AVX512-NEXT:    retq # encoding: [0xc3]    %sqrt = call double @llvm.sqrt.f64(double %a0)    ret double %sqrt  }  | 

