summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJuergen Ributzka <juergen@apple.com>2014-06-11 23:11:02 +0000
committerJuergen Ributzka <juergen@apple.com>2014-06-11 23:11:02 +0000
commit272b570a80d9028ab1c8e2cc408148c2c30bb117 (patch)
treef9fe3ad9c8e378139871446f4e15f85df01f323f
parentfbaa3db909a2999af06ad5d7ed0320bb23c701cd (diff)
downloadbcm5719-llvm-272b570a80d9028ab1c8e2cc408148c2c30bb117.tar.gz
bcm5719-llvm-272b570a80d9028ab1c8e2cc408148c2c30bb117.zip
[FastISel][X86] Add support for the sqrt intrinsic.
llvm-svn: 210720
-rw-r--r--llvm/lib/Target/X86/X86FastISel.cpp52
-rw-r--r--llvm/test/CodeGen/X86/sqrt.ll26
2 files changed, 78 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp
index 23919849648..329a96eb9fc 100644
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -1781,6 +1781,58 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TRAP));
return true;
}
+ case Intrinsic::sqrt: {
+ if (!Subtarget->hasSSE1())
+ return false;
+
+ Type *RetTy = I.getCalledFunction()->getReturnType();
+
+ MVT VT;
+ if (!isTypeLegal(RetTy, VT))
+ return false;
+
+ // Unfortunatelly we can't use FastEmit_r, because the AVX version of FSQRT
+ // is not generated by FastISel yet.
+ // FIXME: Update this code once tablegen can handle it.
+ static const unsigned SqrtOpc[2][2] = {
+ {X86::SQRTSSr, X86::VSQRTSSr},
+ {X86::SQRTSDr, X86::VSQRTSDr}
+ };
+ bool HasAVX = Subtarget->hasAVX();
+ unsigned Opc;
+ const TargetRegisterClass *RC;
+ switch (VT.SimpleTy) {
+ default: return false;
+ case MVT::f32: Opc = SqrtOpc[0][HasAVX]; RC = &X86::FR32RegClass; break;
+ case MVT::f64: Opc = SqrtOpc[1][HasAVX]; RC = &X86::FR64RegClass; break;
+ }
+
+ const Value *SrcVal = I.getArgOperand(0);
+ unsigned SrcReg = getRegForValue(SrcVal);
+
+ if (SrcReg == 0)
+ return false;
+
+ unsigned ImplicitDefReg = 0;
+ if (HasAVX) {
+ ImplicitDefReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
+ }
+
+ unsigned ResultReg = createResultReg(RC);
+ MachineInstrBuilder MIB;
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
+ ResultReg);
+
+ if (ImplicitDefReg)
+ MIB.addReg(ImplicitDefReg);
+
+ MIB.addReg(SrcReg);
+
+ UpdateValueMap(&I, ResultReg);
+ return true;
+ }
case Intrinsic::sadd_with_overflow:
case Intrinsic::uadd_with_overflow:
case Intrinsic::ssub_with_overflow:
diff --git a/llvm/test/CodeGen/X86/sqrt.ll b/llvm/test/CodeGen/X86/sqrt.ll
new file mode 100644
index 00000000000..be7c6e86739
--- /dev/null
+++ b/llvm/test/CodeGen/X86/sqrt.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-avx,+sse2 | FileCheck %s --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-avx,+sse2 -fast-isel -fast-isel-abort | FileCheck %s --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-avx2,+avx | FileCheck %s --check-prefix=AVX
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-avx2,+avx -fast-isel -fast-isel-abort | FileCheck %s --check-prefix=AVX
+
+define float @test_sqrt_f32(float %a) {
+; SSE2-LABEL: test_sqrt_f32
+; SSE2: sqrtss %xmm0, %xmm0
+; AVX-LABEL: test_sqrt_f32
+; AVX: vsqrtss %xmm0, %xmm0
+ %res = call float @llvm.sqrt.f32(float %a)
+ ret float %res
+}
+declare float @llvm.sqrt.f32(float) nounwind readnone
+
+define double @test_sqrt_f64(double %a) {
+; SSE2-LABEL: test_sqrt_f64
+; SSE2: sqrtsd %xmm0, %xmm0
+; AVX-LABEL: test_sqrt_f64
+; AVX: vsqrtsd %xmm0, %xmm0
+ %res = call double @llvm.sqrt.f64(double %a)
+ ret double %res
+}
+declare double @llvm.sqrt.f64(double) nounwind readnone
+
+
OpenPOWER on IntegriCloud