summaryrefslogtreecommitdiffstats
path: root/llvm/lib/CodeGen
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2017-06-27 23:15:01 +0000
committerSanjay Patel <spatel@rotateright.com>2017-06-27 23:15:01 +0000
commit4b23fa0abf4a8397f5a4e89c1350bdbeb55abd01 (patch)
tree8fe1efbdef2cca621f7ed8b823e8e93178cf3af1 /llvm/lib/CodeGen
parentc5fa6358ba73e50f06348fb56132c3700aaa1b3e (diff)
downloadbcm5719-llvm-4b23fa0abf4a8397f5a4e89c1350bdbeb55abd01.tar.gz
bcm5719-llvm-4b23fa0abf4a8397f5a4e89c1350bdbeb55abd01.zip
[CGP] add specialization for memcmp expansion with only one basic block
llvm-svn: 306485
Diffstat (limited to 'llvm/lib/CodeGen')
-rw-r--r--llvm/lib/CodeGen/CodeGenPrepare.cpp42
1 files changed, 41 insertions, 1 deletions
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 474a214723e..892b85fe239 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -1679,6 +1679,7 @@ class MemCmpExpansion {
void emitMemCmpResultBlock();
Value *getMemCmpExpansionZeroCase(unsigned Size);
Value *getMemCmpEqZeroOneBlock(unsigned Size);
+ Value *getMemCmpOneBlock(unsigned Size);
unsigned getLoadSize(unsigned Size);
unsigned getNumLoads(unsigned Size);
@@ -1711,7 +1712,7 @@ MemCmpExpansion::MemCmpExpansion(CallInst *CI, uint64_t Size,
// we choose to handle this case too to avoid fragmented lowering.
IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI);
NumBlocks = calculateNumBlocks(Size);
- if (!IsUsedForZeroCmp || NumBlocks != 1) {
+ if ((!IsUsedForZeroCmp && NumLoadsPerBlock != 1) || NumBlocks != 1) {
BasicBlock *StartBlock = CI->getParent();
EndBlock = StartBlock->splitBasicBlock(CI, "endblock");
setupEndBlockPHINodes();
@@ -2090,6 +2091,41 @@ Value *MemCmpExpansion::getMemCmpEqZeroOneBlock(unsigned Size) {
return Builder.CreateZExt(Cmp, Type::getInt32Ty(CI->getContext()));
}
+/// A memcmp expansion that only has one block of load and compare can bypass
+/// the compare, branch, and phi IR that is required in the general case.
+Value *MemCmpExpansion::getMemCmpOneBlock(unsigned Size) {
+ assert(NumLoadsPerBlock == 1 && "Only handles one load pair per block");
+
+ Type *LoadSizeType = IntegerType::get(CI->getContext(), Size * 8);
+ Value *Source1 = CI->getArgOperand(0);
+ Value *Source2 = CI->getArgOperand(1);
+
+ // Cast source to LoadSizeType*.
+ if (Source1->getType() != LoadSizeType)
+ Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());
+ if (Source2->getType() != LoadSizeType)
+ Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());
+
+ // Load LoadSizeType from the base address.
+ Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
+ Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
+
+ if (DL.isLittleEndian() && Size != 1) {
+ Function *Bswap = Intrinsic::getDeclaration(CI->getModule(),
+ Intrinsic::bswap, LoadSizeType);
+ LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1);
+ LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2);
+ }
+
+ // TODO: Instead of comparing ULT, just subtract and return the difference?
+ Value *CmpNE = Builder.CreateICmpNE(LoadSrc1, LoadSrc2);
+ Value *CmpULT = Builder.CreateICmpULT(LoadSrc1, LoadSrc2);
+ Type *I32 = Builder.getInt32Ty();
+ Value *Sel1 = Builder.CreateSelect(CmpULT, ConstantInt::get(I32, -1),
+ ConstantInt::get(I32, 1));
+ return Builder.CreateSelect(CmpNE, Sel1, ConstantInt::get(I32, 0));
+}
+
// This function expands the memcmp call into an inline expansion and returns
// the memcmp result.
Value *MemCmpExpansion::getMemCmpExpansion(uint64_t Size) {
@@ -2097,6 +2133,10 @@ Value *MemCmpExpansion::getMemCmpExpansion(uint64_t Size) {
return NumBlocks == 1 ? getMemCmpEqZeroOneBlock(Size) :
getMemCmpExpansionZeroCase(Size);
+ // TODO: Handle more than one load pair per block in getMemCmpOneBlock().
+ if (NumBlocks == 1 && NumLoadsPerBlock == 1)
+ return getMemCmpOneBlock(Size);
+
// This loop calls emitLoadCompareBlock for comparing Size bytes of the two
// memcmp sources. It starts with loading using the maximum load size set by
// the target. It processes any remaining bytes using a load size which is the
OpenPOWER on IntegriCloud