summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2017-11-27 21:15:43 +0000
committerSanjay Patel <spatel@rotateright.com>2017-11-27 21:15:43 +0000
commit0de1a4bc2d2632ceb42a022c52195de323740e73 (patch)
tree2d93fe4f2d0f399395696ba1cce0f74b01fe7c09
parent7c3a89231cbb560050062dd904149efb77263e9f (diff)
downloadbcm5719-llvm-0de1a4bc2d2632ceb42a022c52195de323740e73.tar.gz
bcm5719-llvm-0de1a4bc2d2632ceb42a022c52195de323740e73.zip
[PartiallyInlineLibCalls][x86] add TTI hook to allow sqrt inlining to depend on arg rather than result
This should fix PR31455: https://bugs.llvm.org/show_bug.cgi?id=31455 Differential Revision: https://reviews.llvm.org/D28314 llvm-svn: 319094
-rw-r--r--llvm/include/llvm/Analysis/TargetTransformInfo.h11
-rw-r--r--llvm/include/llvm/Analysis/TargetTransformInfoImpl.h2
-rw-r--r--llvm/include/llvm/CodeGen/BasicTTIImpl.h4
-rw-r--r--llvm/lib/Analysis/TargetTransformInfo.cpp4
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.cpp4
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.h1
-rw-r--r--llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp15
-rw-r--r--llvm/test/CodeGen/X86/sqrt-partial.ll18
-rw-r--r--llvm/test/Transforms/PartiallyInlineLibCalls/X86/good-prototype.ll24
9 files changed, 58 insertions, 25 deletions
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index f3762454a37..90b71e93947 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -586,6 +586,12 @@ public:
/// \brief Return true if the hardware has a fast square-root instruction.
bool haveFastSqrt(Type *Ty) const;
+ /// Return true if it is faster to check if a floating-point value is NaN
+ /// (or not-NaN) versus a comparison against a constant FP zero value.
+ /// Targets should override this if materializing a 0.0 for comparison is
+ /// generally as cheap as checking for ordered/unordered.
+ bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const;
+
/// \brief Return the expected cost of supporting the floating point operation
/// of the specified type.
int getFPOpCost(Type *Ty) const;
@@ -1009,6 +1015,7 @@ public:
bool *Fast) = 0;
virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
virtual bool haveFastSqrt(Type *Ty) = 0;
+ virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0;
virtual int getFPOpCost(Type *Ty) = 0;
virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
Type *Ty) = 0;
@@ -1273,6 +1280,10 @@ public:
}
bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
+ bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override {
+ return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
+ }
+
int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); }
int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 4f27f6a1410..4c37402278e 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -316,6 +316,8 @@ public:
bool haveFastSqrt(Type *Ty) { return false; }
+ bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) { return true; }
+
unsigned getFPOpCost(Type *Ty) { return TargetTransformInfo::TCC_Basic; }
int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 0141f9eb777..bb5e7f9e8e3 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -297,6 +297,10 @@ public:
TLI->isOperationLegalOrCustom(ISD::FSQRT, VT);
}
+ bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) {
+ return true;
+ }
+
unsigned getFPOpCost(Type *Ty) {
// By default, FP instructions are no more expensive since they are
// implemented in HW. Target specific TTI can override this.
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 53bedfe3f63..7feb40da271 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -281,6 +281,10 @@ bool TargetTransformInfo::haveFastSqrt(Type *Ty) const {
return TTIImpl->haveFastSqrt(Ty);
}
+bool TargetTransformInfo::isFCmpOrdCheaperThanFCmpZero(Type *Ty) const {
+ return TTIImpl->isFCmpOrdCheaperThanFCmpZero(Ty);
+}
+
int TargetTransformInfo::getFPOpCost(Type *Ty) const {
int Cost = TTIImpl->getFPOpCost(Ty);
assert(Cost >= 0 && "TTI should not produce negative costs!");
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index d06d6a5d180..9b07491c75c 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -2537,6 +2537,10 @@ bool X86TTIImpl::hasDivRemOp(Type *DataType, bool IsSigned) {
return TLI->isOperationLegal(IsSigned ? ISD::SDIVREM : ISD::UDIVREM, VT);
}
+bool X86TTIImpl::isFCmpOrdCheaperThanFCmpZero(Type *Ty) {
+ return false;
+}
+
bool X86TTIImpl::areInlineCompatible(const Function *Caller,
const Function *Callee) const {
const TargetMachine &TM = getTLI()->getTargetMachine();
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h
index 81b804ea268..6f01a6fd11d 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -125,6 +125,7 @@ public:
bool isLegalMaskedGather(Type *DataType);
bool isLegalMaskedScatter(Type *DataType);
bool hasDivRemOp(Type *DataType, bool IsSigned);
+ bool isFCmpOrdCheaperThanFCmpZero(Type *Ty);
bool areInlineCompatible(const Function *Caller,
const Function *Callee) const;
const TTI::MemCmpExpansionOptions *enableMemCmpExpansion(
diff --git a/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp b/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
index a044fe38b76..1748815c594 100644
--- a/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
+++ b/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
@@ -26,7 +26,8 @@ using namespace llvm;
static bool optimizeSQRT(CallInst *Call, Function *CalledFunc,
- BasicBlock &CurrBB, Function::iterator &BB) {
+ BasicBlock &CurrBB, Function::iterator &BB,
+ const TargetTransformInfo *TTI) {
// There is no need to change the IR, since backend will emit sqrt
// instruction if the call has already been marked read-only.
if (Call->onlyReadsMemory())
@@ -39,7 +40,7 @@ static bool optimizeSQRT(CallInst *Call, Function *CalledFunc,
//
// (after)
// v0 = sqrt_noreadmem(src) # native sqrt instruction.
- // if (v0 is a NaN)
+ // [if (v0 is a NaN) || if (src < 0)]
// v1 = sqrt(src) # library call.
// dst = phi(v0, v1)
//
@@ -48,7 +49,8 @@ static bool optimizeSQRT(CallInst *Call, Function *CalledFunc,
// Create phi and replace all uses.
BasicBlock *JoinBB = llvm::SplitBlock(&CurrBB, Call->getNextNode());
IRBuilder<> Builder(JoinBB, JoinBB->begin());
- PHINode *Phi = Builder.CreatePHI(Call->getType(), 2);
+ Type *Ty = Call->getType();
+ PHINode *Phi = Builder.CreatePHI(Ty, 2);
Call->replaceAllUsesWith(Phi);
// Create basic block LibCallBB and insert a call to library function sqrt.
@@ -65,7 +67,10 @@ static bool optimizeSQRT(CallInst *Call, Function *CalledFunc,
Call->addAttribute(AttributeList::FunctionIndex, Attribute::ReadNone);
CurrBB.getTerminator()->eraseFromParent();
Builder.SetInsertPoint(&CurrBB);
- Value *FCmp = Builder.CreateFCmpOEQ(Call, Call);
+ Value *FCmp = TTI->isFCmpOrdCheaperThanFCmpZero(Ty)
+ ? Builder.CreateFCmpORD(Call, Call)
+ : Builder.CreateFCmpOGE(Call->getOperand(0),
+ ConstantFP::get(Ty, 0.0));
Builder.CreateCondBr(FCmp, JoinBB, LibCallBB);
// Add phi operands.
@@ -106,7 +111,7 @@ static bool runPartiallyInlineLibCalls(Function &F, TargetLibraryInfo *TLI,
case LibFunc_sqrtf:
case LibFunc_sqrt:
if (TTI->haveFastSqrt(Call->getType()) &&
- optimizeSQRT(Call, CalledFunc, *CurrBB, BB))
+ optimizeSQRT(Call, CalledFunc, *CurrBB, BB, TTI))
break;
continue;
default:
diff --git a/llvm/test/CodeGen/X86/sqrt-partial.ll b/llvm/test/CodeGen/X86/sqrt-partial.ll
index b4b53f1dfcf..a7d4ef29c52 100644
--- a/llvm/test/CodeGen/X86/sqrt-partial.ll
+++ b/llvm/test/CodeGen/X86/sqrt-partial.ll
@@ -3,7 +3,7 @@
; PR31455 - https://bugs.llvm.org/show_bug.cgi?id=31455
; We have to assume that errno can be set, so we have to make a libcall in that case.
-; But it's better for perf to check that the argument is valid rather than the result of
+; But it's better for perf to check that the argument is valid rather than the result of
; sqrtss/sqrtsd.
; Note: This is really a test of the -partially-inline-libcalls IR pass (and we have an IR test
; for that), but we're checking the final asm to make sure that comes out as expected too.
@@ -11,11 +11,11 @@
define float @f(float %val) nounwind {
; CHECK-LABEL: f:
; CHECK: # BB#0:
-; CHECK-NEXT: sqrtss %xmm0, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm1
-; CHECK-NEXT: jp .LBB0_2
+; CHECK-NEXT: xorps %xmm1, %xmm1
+; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: jb .LBB0_2
; CHECK-NEXT: # BB#1: # %.split
-; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: sqrtss %xmm0, %xmm0
; CHECK-NEXT: retq
; CHECK-NEXT: .LBB0_2: # %call.sqrt
; CHECK-NEXT: jmp sqrtf # TAILCALL
@@ -26,11 +26,11 @@ define float @f(float %val) nounwind {
define double @d(double %val) nounwind {
; CHECK-LABEL: d:
; CHECK: # BB#0:
-; CHECK-NEXT: sqrtsd %xmm0, %xmm1
-; CHECK-NEXT: ucomisd %xmm1, %xmm1
-; CHECK-NEXT: jp .LBB1_2
+; CHECK-NEXT: xorps %xmm1, %xmm1
+; CHECK-NEXT: ucomisd %xmm1, %xmm0
+; CHECK-NEXT: jb .LBB1_2
; CHECK-NEXT: # BB#1: # %.split
-; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: sqrtsd %xmm0, %xmm0
; CHECK-NEXT: retq
; CHECK-NEXT: .LBB1_2: # %call.sqrt
; CHECK-NEXT: jmp sqrt # TAILCALL
diff --git a/llvm/test/Transforms/PartiallyInlineLibCalls/X86/good-prototype.ll b/llvm/test/Transforms/PartiallyInlineLibCalls/X86/good-prototype.ll
index 0011134640c..98aa4621986 100644
--- a/llvm/test/Transforms/PartiallyInlineLibCalls/X86/good-prototype.ll
+++ b/llvm/test/Transforms/PartiallyInlineLibCalls/X86/good-prototype.ll
@@ -1,18 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -partially-inline-libcalls -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
; RUN: opt -S -passes=partially-inline-libcalls -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
define float @f(float %val) {
-; CHECK: @f
-; CHECK: entry:
-; CHECK-NEXT: %[[RES:.+]] = tail call float @sqrtf(float %val) #0
-; CHECK-NEXT: %[[CMP:.+]] = fcmp oeq float %[[RES]], %[[RES]]
-; CHECK-NEXT: br i1 %[[CMP]], label %[[EXIT:.+]], label %[[CALL:.+]]
-; CHECK: [[CALL]]:
-; CHECK-NEXT: %[[RES2:.+]] = tail call float @sqrtf(float %val){{$}}
-; CHECK-NEXT: br label %[[EXIT]]
-; CHECK: [[EXIT]]:
-; CHECK-NEXT: %[[RET:.+]] = phi float [ %[[RES]], %entry ], [ %[[RES2]], %[[CALL]] ]
-; CHECK-NEXT: ret float %[[RET]]
+; CHECK-LABEL: @f(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[RES:%.*]] = tail call float @sqrtf(float [[VAL:%.*]]) #0
+; CHECK-NEXT: [[TMP0:%.*]] = fcmp oge float [[VAL]], 0.000000e+00
+; CHECK-NEXT: br i1 [[TMP0]], label [[ENTRY_SPLIT:%.*]], label [[CALL_SQRT:%.*]]
+; CHECK: call.sqrt:
+; CHECK-NEXT: [[TMP1:%.*]] = tail call float @sqrtf(float [[VAL]])
+; CHECK-NEXT: br label [[ENTRY_SPLIT]]
+; CHECK: entry.split:
+; CHECK-NEXT: [[TMP2:%.*]] = phi float [ [[RES]], [[ENTRY:%.*]] ], [ [[TMP1]], [[CALL_SQRT]] ]
+; CHECK-NEXT: ret float [[TMP2]]
+;
entry:
%res = tail call float @sqrtf(float %val)
ret float %res
OpenPOWER on IntegriCloud