diff options
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 7 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86TargetTransformInfo.h | 54 |
2 files changed, 57 insertions, 4 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 9ae4a928fb5..a7ecfc2e586 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -3065,10 +3065,9 @@ bool X86TTIImpl::areInlineCompatible(const Function *Caller, const FeatureBitset &CalleeBits = TM.getSubtargetImpl(*Callee)->getFeatureBits(); - // FIXME: This is likely too limiting as it will include subtarget features - // that we might not care about for inlining, but it is conservatively - // correct. - return (CallerBits & CalleeBits) == CalleeBits; + FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList; + FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList; + return (RealCallerBits & RealCalleeBits) == RealCalleeBits; } const X86TTIImpl::TTI::MemCmpExpansionOptions * diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h index b2e3e05cbe3..5035818fde9 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -35,6 +35,60 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> { const X86Subtarget *getST() const { return ST; } const X86TargetLowering *getTLI() const { return TLI; } + const FeatureBitset InlineFeatureIgnoreList = { + // This indicates the CPU is 64 bit capable not that we are in 64-bit mode. + X86::Feature64Bit, + + // These features don't have any intrinsics or ABI effect. + X86::FeatureNOPL, + X86::FeatureCMPXCHG16B, + X86::FeatureLAHFSAHF, + + // Codegen control options. + X86::FeatureFast11ByteNOP, + X86::FeatureFast15ByteNOP, + X86::FeatureFastBEXTR, + X86::FeatureFastHorizontalOps, + X86::FeatureFastLZCNT, + X86::FeatureFastPartialYMMorZMMWrite, + X86::FeatureFastScalarFSQRT, + X86::FeatureFastSHLDRotate, + X86::FeatureFastVariableShuffle, + X86::FeatureFastVectorFSQRT, + X86::FeatureLEAForSP, + X86::FeatureLEAUsesAG, + X86::FeatureLZCNTFalseDeps, + X86::FeatureMacroFusion, + X86::FeatureMergeToThreeWayBranch, + X86::FeaturePadShortFunctions, + X86::FeaturePOPCNTFalseDeps, + X86::FeatureSSEUnalignedMem, + X86::FeatureSlow3OpsLEA, + X86::FeatureSlowDivide32, + X86::FeatureSlowDivide64, + X86::FeatureSlowIncDec, + X86::FeatureSlowLEA, + X86::FeatureSlowPMADDWD, + X86::FeatureSlowPMULLD, + X86::FeatureSlowSHLD, + X86::FeatureSlowTwoMemOps, + X86::FeatureSlowUAMem16, + + // Perf-tuning flags. + X86::FeatureHasFastGather, + X86::FeatureSlowUAMem32, + + // Based on whether user set the -mprefer-vector-width command line. + X86::FeaturePrefer256Bit, + + // CPU name enums. These just follow CPU string. + X86::ProcIntelAtom, + X86::ProcIntelGLM, + X86::ProcIntelGLP, + X86::ProcIntelSLM, + X86::ProcIntelTRM, + }; + public: explicit X86TTIImpl(const X86TargetMachine *TM, const Function &F) : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), |

