summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
authorEvandro Menezes <e.menezes@samsung.com>2016-09-20 19:02:06 +0000
committerEvandro Menezes <e.menezes@samsung.com>2016-09-20 19:02:06 +0000
commitba4926efde147744f6aec2100870bac4ee912cc4 (patch)
tree25fe1a0578b55b2025b8510273541080c2b4d83b /llvm
parent61a1273d27f3a35cd306f41cb3dcc8575470b8e4 (diff)
downloadbcm5719-llvm-ba4926efde147744f6aec2100870bac4ee912cc4.tar.gz
bcm5719-llvm-ba4926efde147744f6aec2100870bac4ee912cc4.zip
Revert "[AArch64] Use the reciprocal estimation machinery"
This reverts commit b7d42b0048f65346e9fa37fb65defeea7ce8c337 per request by Eric Christopher <echristo@gmail.com> (v. http://bit.ly/2cmz6kW). llvm-svn: 282000
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp36
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.h9
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.td29
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetMachine.cpp28
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetMachine.h2
-rw-r--r--llvm/test/CodeGen/AArch64/recp-fastmath.ll79
-rw-r--r--llvm/test/CodeGen/AArch64/sqrt-fastmath.ll160
7 files changed, 3 insertions, 340 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 1fca58241ed..a611549c912 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -953,8 +953,6 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
case AArch64ISD::ST4LANEpost: return "AArch64ISD::ST4LANEpost";
case AArch64ISD::SMULL: return "AArch64ISD::SMULL";
case AArch64ISD::UMULL: return "AArch64ISD::UMULL";
- case AArch64ISD::FRSQRTE: return "AArch64ISD::FRSQRTE";
- case AArch64ISD::FRECPE: return "AArch64ISD::FRECPE";
}
return nullptr;
}
@@ -4585,40 +4583,6 @@ bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
// AArch64 Optimization Hooks
//===----------------------------------------------------------------------===//
-/// getEstimate - Return the appropriate estimate DAG for either the reciprocal
-/// or the reciprocal square root.
-static SDValue getEstimate(const AArch64Subtarget &ST,
- const AArch64TargetLowering::DAGCombinerInfo &DCI, unsigned Opcode,
- const SDValue &Operand, unsigned &ExtraSteps) {
- if (!ST.hasNEON())
- return SDValue();
-
- EVT VT = Operand.getValueType();
-
- std::string RecipOp;
- RecipOp = Opcode == (AArch64ISD::FRECPE) ? "div": "sqrt";
- RecipOp = ((VT.isVector()) ? "vec-": "") + RecipOp;
- RecipOp += (VT.getScalarType() == MVT::f64) ? "d": "f";
-
- TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
- if (!Recips.isEnabled(RecipOp))
- return SDValue();
-
- ExtraSteps = Recips.getRefinementSteps(RecipOp);
- return DCI.DAG.getNode(Opcode, SDLoc(Operand), VT, Operand);
-}
-
-SDValue AArch64TargetLowering::getRecipEstimate(SDValue Operand,
- DAGCombinerInfo &DCI, unsigned &ExtraSteps) const {
- return getEstimate(*Subtarget, DCI, AArch64ISD::FRECPE, Operand, ExtraSteps);
-}
-
-SDValue AArch64TargetLowering::getRsqrtEstimate(SDValue Operand,
- DAGCombinerInfo &DCI, unsigned &ExtraSteps, bool &UseOneConst) const {
- UseOneConst = true;
- return getEstimate(*Subtarget, DCI, AArch64ISD::FRSQRTE, Operand, ExtraSteps);
-}
-
//===----------------------------------------------------------------------===//
// AArch64 Inline Assembly Support
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 2d75b9f52a9..86f1d972c9d 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -187,10 +187,6 @@ enum NodeType : unsigned {
SMULL,
UMULL,
- // Reciprocal estimates.
- FRECPE,
- FRSQRTE,
-
// NEON Load/Store with post-increment base updates
LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,
LD3post,
@@ -521,11 +517,6 @@ private:
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
std::vector<SDNode *> *Created) const override;
- SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI,
- unsigned &RefinementSteps,
- bool &UseOneConstNR) const override;
- SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI,
- unsigned &RefinementSteps) const override;
unsigned combineRepeatedFPDivisors() const override;
ConstraintType getConstraintType(StringRef Constraint) const override;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 9c77b58e273..e58ad278215 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -286,9 +286,6 @@ def SDT_AArch64mull : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
def AArch64smull : SDNode<"AArch64ISD::SMULL", SDT_AArch64mull>;
def AArch64umull : SDNode<"AArch64ISD::UMULL", SDT_AArch64mull>;
-def AArch64frecpe : SDNode<"AArch64ISD::FRECPE", SDTFPUnaryOp>;
-def AArch64frsqrte : SDNode<"AArch64ISD::FRSQRTE", SDTFPUnaryOp>;
-
def AArch64saddv : SDNode<"AArch64ISD::SADDV", SDT_AArch64UnaryVec>;
def AArch64uaddv : SDNode<"AArch64ISD::UADDV", SDT_AArch64UnaryVec>;
def AArch64sminv : SDNode<"AArch64ISD::SMINV", SDT_AArch64UnaryVec>;
@@ -3409,19 +3406,6 @@ def : Pat<(f64 (int_aarch64_neon_frecpe (f64 FPR64:$Rn))),
def : Pat<(v1f64 (int_aarch64_neon_frecpe (v1f64 FPR64:$Rn))),
(FRECPEv1i64 FPR64:$Rn)>;
-def : Pat<(f32 (AArch64frecpe (f32 FPR32:$Rn))),
- (FRECPEv1i32 FPR32:$Rn)>;
-def : Pat<(v2f32 (AArch64frecpe (v2f32 V64:$Rn))),
- (FRECPEv2f32 V64:$Rn)>;
-def : Pat<(v4f32 (AArch64frecpe (v4f32 FPR128:$Rn))),
- (FRECPEv4f32 FPR128:$Rn)>;
-def : Pat<(f64 (AArch64frecpe (f64 FPR64:$Rn))),
- (FRECPEv1i64 FPR64:$Rn)>;
-def : Pat<(v1f64 (AArch64frecpe (v1f64 FPR64:$Rn))),
- (FRECPEv1i64 FPR64:$Rn)>;
-def : Pat<(v2f64 (AArch64frecpe (v2f64 FPR128:$Rn))),
- (FRECPEv2f64 FPR128:$Rn)>;
-
def : Pat<(f32 (int_aarch64_neon_frecpx (f32 FPR32:$Rn))),
(FRECPXv1i32 FPR32:$Rn)>;
def : Pat<(f64 (int_aarch64_neon_frecpx (f64 FPR64:$Rn))),
@@ -3434,19 +3418,6 @@ def : Pat<(f64 (int_aarch64_neon_frsqrte (f64 FPR64:$Rn))),
def : Pat<(v1f64 (int_aarch64_neon_frsqrte (v1f64 FPR64:$Rn))),
(FRSQRTEv1i64 FPR64:$Rn)>;
-def : Pat<(f32 (AArch64frsqrte (f32 FPR32:$Rn))),
- (FRSQRTEv1i32 FPR32:$Rn)>;
-def : Pat<(v2f32 (AArch64frsqrte (v2f32 V64:$Rn))),
- (FRSQRTEv2f32 V64:$Rn)>;
-def : Pat<(v4f32 (AArch64frsqrte (v4f32 FPR128:$Rn))),
- (FRSQRTEv4f32 FPR128:$Rn)>;
-def : Pat<(f64 (AArch64frsqrte (f64 FPR64:$Rn))),
- (FRSQRTEv1i64 FPR64:$Rn)>;
-def : Pat<(v1f64 (AArch64frsqrte (v1f64 FPR64:$Rn))),
- (FRSQRTEv1i64 FPR64:$Rn)>;
-def : Pat<(v2f64 (AArch64frsqrte (v2f64 FPR128:$Rn))),
- (FRSQRTEv2f64 FPR128:$Rn)>;
-
// If an integer is about to be converted to a floating point value,
// just load it on the floating point unit.
// Here are the patterns for 8 and 16-bits to float.
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index 9faeb6feb6a..52ccc78fb62 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -167,29 +167,6 @@ static std::string computeDataLayout(const Triple &TT, bool LittleEndian) {
return "E-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128";
}
-// Helper function to set up the defaults for reciprocals.
-static void initReciprocals(AArch64TargetMachine& TM, AArch64Subtarget& ST)
-{
- // For the estimates, convergence is quadratic, so essentially the number of
- // digits is doubled after each iteration. ARMv8, the minimum architected
- // accuracy of the initial estimate is 2^-8. Therefore, the number of extra
- // steps to refine the result for float (23 mantissa bits) and for double
- // (52 mantissa bits) are 2 and 3, respectively.
- unsigned ExtraStepsF = 2,
- ExtraStepsD = ExtraStepsF + 1;
- bool UseRsqrt = ST.useRSqrt();
-
- TM.Options.Reciprocals.setDefaults("sqrtf", UseRsqrt, ExtraStepsF);
- TM.Options.Reciprocals.setDefaults("sqrtd", UseRsqrt, ExtraStepsD);
- TM.Options.Reciprocals.setDefaults("vec-sqrtf", UseRsqrt, ExtraStepsF);
- TM.Options.Reciprocals.setDefaults("vec-sqrtd", UseRsqrt, ExtraStepsD);
-
- TM.Options.Reciprocals.setDefaults("divf", false, ExtraStepsF);
- TM.Options.Reciprocals.setDefaults("divd", false, ExtraStepsD);
- TM.Options.Reciprocals.setDefaults("vec-divf", false, ExtraStepsF);
- TM.Options.Reciprocals.setDefaults("vec-divd", false, ExtraStepsD);
-}
-
static Reloc::Model getEffectiveRelocModel(const Triple &TT,
Optional<Reloc::Model> RM) {
// AArch64 Darwin is always PIC.
@@ -214,8 +191,7 @@ AArch64TargetMachine::AArch64TargetMachine(
: LLVMTargetMachine(T, computeDataLayout(TT, LittleEndian), TT, CPU, FS,
Options, getEffectiveRelocModel(TT, RM), CM, OL),
TLOF(createTLOF(getTargetTriple())),
- Subtarget(TT, CPU, FS, *this, LittleEndian) {
- initReciprocals(*this, Subtarget);
+ isLittle(LittleEndian) {
initAsmInfo();
}
@@ -263,7 +239,7 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
// function that reside in TargetOptions.
resetTargetOptions(F);
I = llvm::make_unique<AArch64Subtarget>(TargetTriple, CPU, FS, *this,
- Subtarget.isLittleEndian());
+ isLittle);
#ifndef LLVM_BUILD_GLOBAL_ISEL
GISelAccessor *GISel = new GISelAccessor();
#else
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.h b/llvm/lib/Target/AArch64/AArch64TargetMachine.h
index b44107b065b..6fa5e83957e 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.h
@@ -46,7 +46,7 @@ public:
}
private:
- AArch64Subtarget Subtarget;
+ bool isLittle;
};
// AArch64 little endian target machine.
diff --git a/llvm/test/CodeGen/AArch64/recp-fastmath.ll b/llvm/test/CodeGen/AArch64/recp-fastmath.ll
deleted file mode 100644
index 710739b2cc5..00000000000
--- a/llvm/test/CodeGen/AArch64/recp-fastmath.ll
+++ /dev/null
@@ -1,79 +0,0 @@
-; RUN: llc < %s -mtriple=aarch64 -mattr=neon -recip=!div,!vec-div | FileCheck %s --check-prefix=FAULT
-; RUN: llc < %s -mtriple=aarch64 -mattr=neon -recip=div,vec-div | FileCheck %s
-
-define float @frecp(float %x) #0 {
- %div = fdiv fast float 1.0, %x
- ret float %div
-
-; FAULT-LABEL: frecp:
-; FAULT-NEXT: BB#0
-; FAULT-NEXT: fmov
-; FAULT-NEXT: fdiv
-
-; CHECK-LABEL: frecp:
-; CHECK-NEXT: BB#0
-; CHECK-NEXT: frecpe
-; CHECK-NEXT: fmov
-}
-
-define <2 x float> @f2recp(<2 x float> %x) #0 {
- %div = fdiv fast <2 x float> <float 1.0, float 1.0>, %x
- ret <2 x float> %div
-
-; FAULT-LABEL: f2recp:
-; FAULT-NEXT: BB#0
-; FAULT-NEXT: fmov
-; FAULT-NEXT: fdiv
-
-; CHECK-LABEL: f2recp:
-; CHECK-NEXT: BB#0
-; CHECK-NEXT: fmov
-; CHECK-NEXT: frecpe
-}
-
-define <4 x float> @f4recp(<4 x float> %x) #0 {
- %div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
- ret <4 x float> %div
-
-; FAULT-LABEL: f4recp:
-; FAULT-NEXT: BB#0
-; FAULT-NEXT: fmov
-; FAULT-NEXT: fdiv
-
-; CHECK-LABEL: f4recp:
-; CHECK-NEXT: BB#0
-; CHECK-NEXT: fmov
-; CHECK-NEXT: frecpe
-}
-
-define double @drecp(double %x) #0 {
- %div = fdiv fast double 1.0, %x
- ret double %div
-
-; FAULT-LABEL: drecp:
-; FAULT-NEXT: BB#0
-; FAULT-NEXT: fmov
-; FAULT-NEXT: fdiv
-
-; CHECK-LABEL: drecp:
-; CHECK-NEXT: BB#0
-; CHECK-NEXT: frecpe
-; CHECK-NEXT: fmov
-}
-
-define <2 x double> @d2recp(<2 x double> %x) #0 {
- %div = fdiv fast <2 x double> <double 1.0, double 1.0>, %x
- ret <2 x double> %div
-
-; FAULT-LABEL: d2recp:
-; FAULT-NEXT: BB#0
-; FAULT-NEXT: fmov
-; FAULT-NEXT: fdiv
-
-; CHECK-LABEL: d2recp:
-; CHECK-NEXT: BB#0
-; CHECK-NEXT: fmov
-; CHECK-NEXT: frecpe
-}
-
-attributes #0 = { nounwind "unsafe-fp-math"="true" }
diff --git a/llvm/test/CodeGen/AArch64/sqrt-fastmath.ll b/llvm/test/CodeGen/AArch64/sqrt-fastmath.ll
deleted file mode 100644
index 0d9533fd27f..00000000000
--- a/llvm/test/CodeGen/AArch64/sqrt-fastmath.ll
+++ /dev/null
@@ -1,160 +0,0 @@
-; RUN: llc < %s -mtriple=aarch64 -mattr=neon -recip=!sqrt,!vec-sqrt | FileCheck %s --check-prefix=FAULT
-; RUN: llc < %s -mtriple=aarch64 -mattr=neon -recip=sqrt,vec-sqrt | FileCheck %s
-; RUN: llc < %s -mtriple=aarch64 -mattr=neon,-use-reverse-square-root | FileCheck %s --check-prefix=FAULT
-; RUN: llc < %s -mtriple=aarch64 -mattr=neon,+use-reverse-square-root | FileCheck %s
-
-declare float @llvm.sqrt.f32(float) #1
-declare double @llvm.sqrt.f64(double) #1
-declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) #1
-declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) #1
-declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) #1
-
-define float @fsqrt(float %a) #0 {
- %1 = tail call fast float @llvm.sqrt.f32(float %a)
- ret float %1
-
-; FAULT-LABEL: fsqrt:
-; FAULT-NEXT: BB#0
-; FAULT-NEXT: fsqrt
-
-; CHECK-LABEL: fsqrt:
-; CHECK-NEXT: BB#0
-; CHECK-NEXT: fmov
-; CHECK-NEXT: frsqrte
-}
-
-define <2 x float> @f2sqrt(<2 x float> %a) #0 {
- %1 = tail call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> %a) #2
- ret <2 x float> %1
-
-; FAULT-LABEL: f2sqrt:
-; FAULT-NEXT: BB#0
-; FAULT-NEXT: fsqrt
-
-; CHECK-LABEL: f2sqrt:
-; CHECK-NEXT: BB#0
-; CHECK-NEXT: fmov
-; CHECK-NEXT: mov
-; CHECK-NEXT: frsqrte
-}
-
-define <4 x float> @f4sqrt(<4 x float> %a) #0 {
- %1 = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) #2
- ret <4 x float> %1
-
-; FAULT-LABEL: f4sqrt:
-; FAULT-NEXT: BB#0
-; FAULT-NEXT: fsqrt
-
-; CHECK-LABEL: f4sqrt:
-; CHECK-NEXT: BB#0
-; CHECK-NEXT: fmov
-; CHECK-NEXT: mov
-; CHECK-NEXT: frsqrte
-}
-
-define double @dsqrt(double %a) #0 {
- %1 = tail call fast double @llvm.sqrt.f64(double %a)
- ret double %1
-
-; FAULT-LABEL: dsqrt:
-; FAULT-NEXT: BB#0
-; FAULT-NEXT: fsqrt
-
-; CHECK-LABEL: dsqrt:
-; CHECK-NEXT: BB#0
-; CHECK-NEXT: fmov
-; CHECK-NEXT: frsqrte
-}
-
-define <2 x double> @d2sqrt(<2 x double> %a) #0 {
- %1 = tail call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> %a) #2
- ret <2 x double> %1
-
-; FAULT-LABEL: d2sqrt:
-; FAULT-NEXT: BB#0
-; FAULT-NEXT: fsqrt
-
-; CHECK-LABEL: d2sqrt:
-; CHECK-NEXT: BB#0
-; CHECK-NEXT: fmov
-; CHECK-NEXT: mov
-; CHECK-NEXT: frsqrte
-}
-
-define float @frsqrt(float %a) #0 {
- %1 = tail call fast float @llvm.sqrt.f32(float %a)
- %2 = fdiv fast float 1.000000e+00, %1
- ret float %2
-
-; FAULT-LABEL: frsqrt:
-; FAULT-NEXT: BB#0
-; FAULT-NEXT: fsqrt
-
-; CHECK-LABEL: frsqrt:
-; CHECK-NEXT: BB#0
-; CHECK-NEXT: fmov
-; CHECK-NEXT: frsqrte
-}
-
-define <2 x float> @f2rsqrt(<2 x float> %a) #0 {
- %1 = tail call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> %a) #2
- %2 = fdiv fast <2 x float> <float 1.000000e+00, float 1.000000e+00>, %1
- ret <2 x float> %2
-
-; FAULT-LABEL: f2rsqrt:
-; FAULT-NEXT: BB#0
-; FAULT-NEXT: fsqrt
-
-; CHECK-LABEL: f2rsqrt:
-; CHECK-NEXT: BB#0
-; CHECK-NEXT: fmov
-; CHECK-NEXT: frsqrte
-}
-
-define <4 x float> @f4rsqrt(<4 x float> %a) #0 {
- %1 = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) #2
- %2 = fdiv fast <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %1
- ret <4 x float> %2
-
-; FAULT-LABEL: f4rsqrt:
-; FAULT-NEXT: BB#0
-; FAULT-NEXT: fsqrt
-
-; CHECK-LABEL: f4rsqrt:
-; CHECK-NEXT: BB#0
-; CHECK-NEXT: fmov
-; CHECK-NEXT: frsqrte
-}
-
-define double @drsqrt(double %a) #0 {
- %1 = tail call fast double @llvm.sqrt.f64(double %a)
- %2 = fdiv fast double 1.000000e+00, %1
- ret double %2
-
-; FAULT-LABEL: drsqrt:
-; FAULT-NEXT: BB#0
-; FAULT-NEXT: fsqrt
-
-; CHECK-LABEL: drsqrt:
-; CHECK-NEXT: BB#0
-; CHECK-NEXT: fmov
-; CHECK-NEXT: frsqrte
-}
-
-define <2 x double> @d2rsqrt(<2 x double> %a) #0 {
- %1 = tail call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> %a) #2
- %2 = fdiv fast <2 x double> <double 1.000000e+00, double 1.000000e+00>, %1
- ret <2 x double> %2
-
-; FAULT-LABEL: d2rsqrt:
-; FAULT-NEXT: BB#0
-; FAULT-NEXT: fsqrt
-
-; CHECK-LABEL: d2rsqrt:
-; CHECK-NEXT: BB#0
-; CHECK-NEXT: fmov
-; CHECK-NEXT: frsqrte
-}
-
-attributes #0 = { nounwind "unsafe-fp-math"="true" }
OpenPOWER on IntegriCloud