diff options
| -rw-r--r-- | llvm/lib/Target/X86/X86FastISel.cpp | 19 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll | 13 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll | 13 |
3 files changed, 29 insertions, 16 deletions
diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp index 4199eb2ad27..d65d81b17f4 100644 --- a/llvm/lib/Target/X86/X86FastISel.cpp +++ b/llvm/lib/Target/X86/X86FastISel.cpp @@ -2799,17 +2799,19 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { // Unfortunately we can't use fastEmit_r, because the AVX version of FSQRT // is not generated by FastISel yet. // FIXME: Update this code once tablegen can handle it. - static const uint16_t SqrtOpc[2][2] = { - {X86::SQRTSSr, X86::VSQRTSSr}, - {X86::SQRTSDr, X86::VSQRTSDr} + static const uint16_t SqrtOpc[3][2] = { + { X86::SQRTSSr, X86::SQRTSDr }, + { X86::VSQRTSSr, X86::VSQRTSDr }, + { X86::VSQRTSSZr, X86::VSQRTSDZr }, }; - bool HasAVX = Subtarget->hasAVX(); + unsigned AVXLevel = Subtarget->hasAVX512() ? 2 : + Subtarget->hasAVX() ? 1 : + 0; unsigned Opc; - const TargetRegisterClass *RC; switch (VT.SimpleTy) { default: return false; - case MVT::f32: Opc = SqrtOpc[0][HasAVX]; RC = &X86::FR32RegClass; break; - case MVT::f64: Opc = SqrtOpc[1][HasAVX]; RC = &X86::FR64RegClass; break; + case MVT::f32: Opc = SqrtOpc[AVXLevel][0]; break; + case MVT::f64: Opc = SqrtOpc[AVXLevel][1]; break; } const Value *SrcVal = II->getArgOperand(0); @@ -2818,8 +2820,9 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { if (SrcReg == 0) return false; + const TargetRegisterClass *RC = TLI.getRegClassFor(VT); unsigned ImplicitDefReg = 0; - if (HasAVX) { + if (AVXLevel > 0) { ImplicitDefReg = createResultReg(RC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg); diff --git a/llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll index 1b54562703e..fce52bf6699 100644 --- a/llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll @@ -2614,10 +2614,15 @@ define float @test_mm_sqrt_ss_scalar(float %a0) { ; X64-SSE-NEXT: sqrtss %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x51,0xc0] ; X64-SSE-NEXT: retq # encoding: [0xc3] ; -; X64-AVX-LABEL: test_mm_sqrt_ss_scalar: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x51,0xc0] -; X64-AVX-NEXT: retq # encoding: [0xc3] +; X64-AVX1-LABEL: test_mm_sqrt_ss_scalar: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x51,0xc0] +; X64-AVX1-NEXT: retq # encoding: [0xc3] +; +; X64-AVX512-LABEL: test_mm_sqrt_ss_scalar: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x51,0xc0] +; X64-AVX512-NEXT: retq # encoding: [0xc3] %sqrt = call float @llvm.sqrt.f32(float %a0) ret float %sqrt } diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll index 03acbaafe82..be389890bb7 100644 --- a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll @@ -4959,10 +4959,15 @@ define double @test_mm_sqrt_sd_scalar(double %a0) nounwind { ; X64-SSE-NEXT: sqrtsd %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x51,0xc0] ; X64-SSE-NEXT: retq # encoding: [0xc3] ; -; X64-AVX-LABEL: test_mm_sqrt_sd_scalar: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x51,0xc0] -; X64-AVX-NEXT: retq # encoding: [0xc3] +; X64-AVX1-LABEL: test_mm_sqrt_sd_scalar: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x51,0xc0] +; X64-AVX1-NEXT: retq # encoding: [0xc3] +; +; X64-AVX512-LABEL: test_mm_sqrt_sd_scalar: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0] +; X64-AVX512-NEXT: retq # encoding: [0xc3] %sqrt = call double @llvm.sqrt.f64(double %a0) ret double %sqrt } |

