diff options
| author | Sanjay Patel <spatel@rotateright.com> | 2018-01-03 20:02:39 +0000 |
|---|---|---|
| committer | Sanjay Patel <spatel@rotateright.com> | 2018-01-03 20:02:39 +0000 |
| commit | f344987cad503e95e5cd727e1c9bec891269e32e (patch) | |
| tree | 0ffce1a29347fd9fce18d6b18b9ce6acfefa7c87 /llvm | |
| parent | e6e9c2751046c4244e340628fa9089af0779a03b (diff) | |
| download | bcm5719-llvm-f344987cad503e95e5cd727e1c9bec891269e32e.tar.gz bcm5719-llvm-f344987cad503e95e5cd727e1c9bec891269e32e.zip | |
[ExpandMemcmp] rename variables and add hook to override pref for number of loads per block; NFC
The preference only applies to 'memcmp() == 0' expansion, so try to make that clearer.
x86 will likely benefit by increasing the default value from '1' to '2' as seen in PR33325:
https://bugs.llvm.org/show_bug.cgi?id=33325
...so that is the planned follow-up to this clean-up step.
llvm-svn: 321756
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/include/llvm/CodeGen/TargetLowering.h | 12 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/ExpandMemCmp.cpp | 31 |
2 files changed, 29 insertions, 14 deletions
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 380e3b19dc8..e44cf97d300 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -1202,6 +1202,18 @@ public: return OptSize ? MaxLoadsPerMemcmpOptSize : MaxLoadsPerMemcmp; } + /// For memcmp expansion when the memcmp result is only compared equal or + /// not-equal to 0, allow up to this number of load pairs per block. As an + /// example, this may allow 'memcmp(a, b, 3) == 0' in a single block: + /// a0 = load2bytes &a[0] + /// b0 = load2bytes &b[0] + /// a2 = load1byte &a[2] + /// b2 = load1byte &b[2] + /// r = cmp eq (a0 ^ b0 | a2 ^ b2), 0 + virtual unsigned getMemcmpEqZeroLoadsPerBlock() const { + return 1; + } + /// \brief Get maximum # of store operations permitted for llvm.memmove /// /// This function returns the maximum number of store operations permitted diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/CodeGen/ExpandMemCmp.cpp index 09c808463a4..20a240fd344 100644 --- a/llvm/lib/CodeGen/ExpandMemCmp.cpp +++ b/llvm/lib/CodeGen/ExpandMemCmp.cpp @@ -32,7 +32,7 @@ STATISTIC(NumMemCmpGreaterThanMax, "Number of memcmp calls with size greater than max size"); STATISTIC(NumMemCmpInlined, "Number of inlined memcmp calls"); -static cl::opt<unsigned> MemCmpNumLoadsPerBlock( +static cl::opt<unsigned> MemCmpEqZeroNumLoadsPerBlock( "memcmp-num-loads-per-block", cl::Hidden, cl::init(1), cl::desc("The number of loads per basic block for inline expansion of " "memcmp that is only being compared against zero.")); @@ -56,7 +56,7 @@ class MemCmpExpansion { const uint64_t Size; unsigned MaxLoadSize; uint64_t NumLoadsNonOneByte; - const uint64_t NumLoadsPerBlock; + const uint64_t NumLoadsPerBlockForZeroCmp; std::vector<BasicBlock *> LoadCmpBlocks; BasicBlock *EndBlock; PHINode *PhiRes; @@ -102,7 +102,7 @@ class MemCmpExpansion { MemCmpExpansion(CallInst *CI, uint64_t Size, const TargetTransformInfo::MemCmpExpansionOptions &Options, unsigned MaxNumLoads, const bool IsUsedForZeroCmp, - unsigned NumLoadsPerBlock, const DataLayout &DL); + unsigned NumLoadsPerBlockForZeroCmp, const DataLayout &DL); unsigned getNumBlocks(); uint64_t getNumLoads() const { return LoadSequence.size(); } @@ -122,12 +122,12 @@ MemCmpExpansion::MemCmpExpansion( CallInst *const CI, uint64_t Size, const TargetTransformInfo::MemCmpExpansionOptions &Options, const unsigned MaxNumLoads, const bool IsUsedForZeroCmp, - const unsigned NumLoadsPerBlock, const DataLayout &TheDataLayout) + const unsigned MaxLoadsPerBlockForZeroCmp, const DataLayout &TheDataLayout) : CI(CI), Size(Size), MaxLoadSize(0), NumLoadsNonOneByte(0), - NumLoadsPerBlock(NumLoadsPerBlock), + NumLoadsPerBlockForZeroCmp(MaxLoadsPerBlockForZeroCmp), IsUsedForZeroCmp(IsUsedForZeroCmp), DL(TheDataLayout), Builder(CI) { @@ -171,8 +171,8 @@ MemCmpExpansion::MemCmpExpansion( unsigned MemCmpExpansion::getNumBlocks() { if (IsUsedForZeroCmp) - return getNumLoads() / NumLoadsPerBlock + - (getNumLoads() % NumLoadsPerBlock != 0 ? 1 : 0); + return getNumLoads() / NumLoadsPerBlockForZeroCmp + + (getNumLoads() % NumLoadsPerBlockForZeroCmp != 0 ? 1 : 0); return getNumLoads(); } @@ -249,7 +249,7 @@ Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex, Value *Diff; const unsigned NumLoads = - std::min(getNumLoads() - LoadIndex, NumLoadsPerBlock); + std::min(getNumLoads() - LoadIndex, NumLoadsPerBlockForZeroCmp); // For a single-block expansion, start inserting before the memcmp call. if (LoadCmpBlocks.empty()) @@ -519,8 +519,6 @@ Value *MemCmpExpansion::getMemCmpEqZeroOneBlock() { /// A memcmp expansion that only has one block of load and compare can bypass /// the compare, branch, and phi IR that is required in the general case. Value *MemCmpExpansion::getMemCmpOneBlock() { - assert(NumLoadsPerBlock == 1 && "Only handles one load pair per block"); - Type *LoadSizeType = IntegerType::get(CI->getContext(), Size * 8); Value *Source1 = CI->getArgOperand(0); Value *Source2 = CI->getArgOperand(1); @@ -570,7 +568,8 @@ Value *MemCmpExpansion::getMemCmpExpansion() { // not need to set up any extra blocks. This case could be handled in the DAG, // but since we have all of the machinery to flexibly expand any memcpy here, // we choose to handle this case too to avoid fragmented lowering. - if ((!IsUsedForZeroCmp && NumLoadsPerBlock != 1) || getNumBlocks() != 1) { + if ((!IsUsedForZeroCmp && NumLoadsPerBlockForZeroCmp != 1) || + getNumBlocks() != 1) { BasicBlock *StartBlock = CI->getParent(); EndBlock = StartBlock->splitBasicBlock(CI, "endblock"); setupEndBlockPHINodes(); @@ -596,8 +595,8 @@ Value *MemCmpExpansion::getMemCmpExpansion() { return getNumBlocks() == 1 ? getMemCmpEqZeroOneBlock() : getMemCmpExpansionZeroCase(); - // TODO: Handle more than one load pair per block in getMemCmpOneBlock(). - if (getNumBlocks() == 1 && NumLoadsPerBlock == 1) return getMemCmpOneBlock(); + if (getNumBlocks() == 1) + return getMemCmpOneBlock(); for (unsigned I = 0; I < getNumBlocks(); ++I) { emitLoadCompareBlock(I); @@ -709,8 +708,12 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI, const unsigned MaxNumLoads = TLI->getMaxExpandSizeMemcmp(CI->getFunction()->optForSize()); + unsigned NumLoadsPerBlock = MemCmpEqZeroNumLoadsPerBlock.getNumOccurrences() + ? MemCmpEqZeroNumLoadsPerBlock + : TLI->getMemcmpEqZeroLoadsPerBlock(); + MemCmpExpansion Expansion(CI, SizeVal, *Options, MaxNumLoads, - IsUsedForZeroCmp, MemCmpNumLoadsPerBlock, *DL); + IsUsedForZeroCmp, NumLoadsPerBlock, *DL); // Don't expand if this will require more loads than desired by the target. if (Expansion.getNumLoads() == 0) { |

