diff options
author | Sanjay Patel <spatel@rotateright.com> | 2017-06-27 23:15:01 +0000 |
---|---|---|
committer | Sanjay Patel <spatel@rotateright.com> | 2017-06-27 23:15:01 +0000 |
commit | 4b23fa0abf4a8397f5a4e89c1350bdbeb55abd01 (patch) | |
tree | 8fe1efbdef2cca621f7ed8b823e8e93178cf3af1 /llvm/lib/CodeGen | |
parent | c5fa6358ba73e50f06348fb56132c3700aaa1b3e (diff) | |
download | bcm5719-llvm-4b23fa0abf4a8397f5a4e89c1350bdbeb55abd01.tar.gz bcm5719-llvm-4b23fa0abf4a8397f5a4e89c1350bdbeb55abd01.zip |
[CGP] add specialization for memcmp expansion with only one basic block
llvm-svn: 306485
Diffstat (limited to 'llvm/lib/CodeGen')
-rw-r--r-- | llvm/lib/CodeGen/CodeGenPrepare.cpp | 42 |
1 files changed, 41 insertions, 1 deletions
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 474a214723e..892b85fe239 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -1679,6 +1679,7 @@ class MemCmpExpansion { void emitMemCmpResultBlock(); Value *getMemCmpExpansionZeroCase(unsigned Size); Value *getMemCmpEqZeroOneBlock(unsigned Size); + Value *getMemCmpOneBlock(unsigned Size); unsigned getLoadSize(unsigned Size); unsigned getNumLoads(unsigned Size); @@ -1711,7 +1712,7 @@ MemCmpExpansion::MemCmpExpansion(CallInst *CI, uint64_t Size, // we choose to handle this case too to avoid fragmented lowering. IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI); NumBlocks = calculateNumBlocks(Size); - if (!IsUsedForZeroCmp || NumBlocks != 1) { + if ((!IsUsedForZeroCmp && NumLoadsPerBlock != 1) || NumBlocks != 1) { BasicBlock *StartBlock = CI->getParent(); EndBlock = StartBlock->splitBasicBlock(CI, "endblock"); setupEndBlockPHINodes(); @@ -2090,6 +2091,41 @@ Value *MemCmpExpansion::getMemCmpEqZeroOneBlock(unsigned Size) { return Builder.CreateZExt(Cmp, Type::getInt32Ty(CI->getContext())); } +/// A memcmp expansion that only has one block of load and compare can bypass +/// the compare, branch, and phi IR that is required in the general case. +Value *MemCmpExpansion::getMemCmpOneBlock(unsigned Size) { + assert(NumLoadsPerBlock == 1 && "Only handles one load pair per block"); + + Type *LoadSizeType = IntegerType::get(CI->getContext(), Size * 8); + Value *Source1 = CI->getArgOperand(0); + Value *Source2 = CI->getArgOperand(1); + + // Cast source to LoadSizeType*. + if (Source1->getType() != LoadSizeType) + Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo()); + if (Source2->getType() != LoadSizeType) + Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); + + // Load LoadSizeType from the base address. + Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); + Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); + + if (DL.isLittleEndian() && Size != 1) { + Function *Bswap = Intrinsic::getDeclaration(CI->getModule(), + Intrinsic::bswap, LoadSizeType); + LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1); + LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2); + } + + // TODO: Instead of comparing ULT, just subtract and return the difference? + Value *CmpNE = Builder.CreateICmpNE(LoadSrc1, LoadSrc2); + Value *CmpULT = Builder.CreateICmpULT(LoadSrc1, LoadSrc2); + Type *I32 = Builder.getInt32Ty(); + Value *Sel1 = Builder.CreateSelect(CmpULT, ConstantInt::get(I32, -1), + ConstantInt::get(I32, 1)); + return Builder.CreateSelect(CmpNE, Sel1, ConstantInt::get(I32, 0)); +} + // This function expands the memcmp call into an inline expansion and returns // the memcmp result. Value *MemCmpExpansion::getMemCmpExpansion(uint64_t Size) { @@ -2097,6 +2133,10 @@ Value *MemCmpExpansion::getMemCmpExpansion(uint64_t Size) { return NumBlocks == 1 ? getMemCmpEqZeroOneBlock(Size) : getMemCmpExpansionZeroCase(Size); + // TODO: Handle more than one load pair per block in getMemCmpOneBlock(). + if (NumBlocks == 1 && NumLoadsPerBlock == 1) + return getMemCmpOneBlock(Size); + // This loop calls emitLoadCompareBlock for comparing Size bytes of the two // memcmp sources. It starts with loading using the maximum load size set by // the target. It processes any remaining bytes using a load size which is the |