summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/ARM/ARMCodeGenPrepare.cpp274
1 files changed, 109 insertions, 165 deletions
diff --git a/llvm/lib/Target/ARM/ARMCodeGenPrepare.cpp b/llvm/lib/Target/ARM/ARMCodeGenPrepare.cpp
index 24071277427..d0ff4441371 100644
--- a/llvm/lib/Target/ARM/ARMCodeGenPrepare.cpp
+++ b/llvm/lib/Target/ARM/ARMCodeGenPrepare.cpp
@@ -85,16 +85,15 @@ class ARMCodeGenPrepare : public FunctionPass {
const ARMSubtarget *ST = nullptr;
IRPromoter *Promoter = nullptr;
std::set<Value*> AllVisited;
- Type *OrigTy = nullptr;
- unsigned TypeSize = 0;
- bool isNarrowInstSupported(Instruction *I);
bool isSupportedValue(Value *V);
bool isLegalToPromote(Value *V);
bool TryToPromote(Value *V);
public:
static char ID;
+ static unsigned TypeSize;
+ Type *OrigTy = nullptr;
ARMCodeGenPrepare() : FunctionPass(ID) {}
@@ -126,65 +125,66 @@ static bool isSigned(Value *V) {
/// dealing with icmps but allow any other integer that is <= 16 bits. Void
/// types are accepted so we can handle switches.
static bool isSupportedType(Value *V) {
- if (V->getType()->isVoidTy())
+ LLVM_DEBUG(dbgs() << "ARM CGP: isSupportedType: " << *V << "\n");
+ Type *Ty = V->getType();
+ if (Ty->isVoidTy())
return true;
- const IntegerType *IntTy = dyn_cast<IntegerType>(V->getType());
- if (!IntTy)
- return false;
+ if (auto *Ld = dyn_cast<LoadInst>(V))
+ Ty = cast<PointerType>(Ld->getPointerOperandType())->getElementType();
- // Don't try to promote boolean values.
- if (IntTy->getBitWidth() == 1)
+ const IntegerType *IntTy = dyn_cast<IntegerType>(Ty);
+ if (!IntTy) {
+ LLVM_DEBUG(dbgs() << "ARM CGP: No, not an integer.\n");
return false;
+ }
- if (auto *ZExt = dyn_cast<ZExtInst>(V))
- return isSupportedType(ZExt->getOperand(0));
+ return IntTy->getBitWidth() == ARMCodeGenPrepare::TypeSize;
+}
- return IntTy->getBitWidth() <= 16;
+/// Return true if the given value is a leaf in the use-def chain, producing
+/// a narrow (i8, i16) value. These values will be zext to start the promotion
+/// of the tree to i32. We guarantee that these won't populate the upper bits
+/// of the register. ZExt on the loads will be free, and the same for call
+/// return values because we only accept ones that guarantee a zeroext ret val.
+/// Many arguments will have the zeroext attribute too, so those would be free
+/// too.
+static bool isSource(Value *V) {
+ // TODO Allow truncs and zext to be sources.
+ if (isa<Argument>(V))
+ return true;
+ else if (isa<LoadInst>(V))
+ return true;
+ else if (auto *Call = dyn_cast<CallInst>(V))
+ return Call->hasRetAttr(Attribute::AttrKind::ZExt);
+ return false;
}
/// Return true if V will require any promoted values to be truncated for the
-/// use to be valid.
+/// the IR to remain valid. We can't mutate the value type of these
+/// instructions.
static bool isSink(Value *V) {
+ // TODO The truncate also isn't actually necessary because we would already
+ // proved that the data value is kept within the range of the original data
+ // type.
auto UsesNarrowValue = [](Value *V) {
- return V->getType()->getScalarSizeInBits() <= 32;
+ return V->getType()->getScalarSizeInBits() == ARMCodeGenPrepare::TypeSize;
};
if (auto *Store = dyn_cast<StoreInst>(V))
return UsesNarrowValue(Store->getValueOperand());
if (auto *Return = dyn_cast<ReturnInst>(V))
return UsesNarrowValue(Return->getReturnValue());
+ if (auto *Trunc = dyn_cast<TruncInst>(V))
+ return UsesNarrowValue(Trunc->getOperand(0));
return isa<CallInst>(V);
}
-/// Return true if the given value is a leaf that will need to be zext'd.
-static bool isSource(Value *V) {
- if (isa<Argument>(V) && isSupportedType(V))
- return true;
- else if (isa<TruncInst>(V))
- return true;
- else if (auto *ZExt = dyn_cast<ZExtInst>(V))
- // ZExt can be a leaf if its the only user of a load.
- return isa<LoadInst>(ZExt->getOperand(0)) &&
- ZExt->getOperand(0)->hasOneUse();
- else if (auto *Call = dyn_cast<CallInst>(V))
- return Call->hasRetAttr(Attribute::AttrKind::ZExt);
- else if (auto *Load = dyn_cast<LoadInst>(V)) {
- if (!isa<IntegerType>(Load->getType()))
- return false;
- // A load is a leaf, unless its already just being zext'd.
- if (Load->hasOneUse() && isa<ZExtInst>(*Load->use_begin()))
- return false;
-
- return true;
- }
- return false;
-}
-
/// Return whether the instruction can be promoted within any modifications to
/// it's operands or result.
static bool isSafeOverflow(Instruction *I) {
+ // FIXME Do we need NSW too?
if (isa<OverflowingBinaryOperator>(I) && I->hasNoUnsignedWrap())
return true;
@@ -222,19 +222,18 @@ static bool isSafeOverflow(Instruction *I) {
}
static bool shouldPromote(Value *V) {
- auto *I = dyn_cast<Instruction>(V);
- if (!I)
+ if (!isa<IntegerType>(V->getType()) || isSink(V))
return false;
- if (!isa<IntegerType>(V->getType()))
- return false;
+ if (isSource(V))
+ return true;
- if (isa<StoreInst>(I) || isa<TerminatorInst>(I) || isa<TruncInst>(I) ||
- isa<ICmpInst>(I))
+ auto *I = dyn_cast<Instruction>(V);
+ if (!I)
return false;
- if (auto *ZExt = dyn_cast<ZExtInst>(I))
- return !ZExt->getDestTy()->isIntegerTy(32);
+ if (isa<ICmpInst>(I))
+ return false;
return true;
}
@@ -262,7 +261,7 @@ static bool isPromotedResultSafe(Value *V) {
/// Return the intrinsic for the instruction that can perform the same
/// operation but on a narrow type. This is using the parallel dsp intrinsics
/// on scalar values.
-static Intrinsic::ID getNarrowIntrinsic(Instruction *I, unsigned TypeSize) {
+static Intrinsic::ID getNarrowIntrinsic(Instruction *I) {
// Whether we use the signed or unsigned versions of these intrinsics
// doesn't matter because we're not using the GE bits that they set in
// the APSR.
@@ -270,10 +269,10 @@ static Intrinsic::ID getNarrowIntrinsic(Instruction *I, unsigned TypeSize) {
default:
break;
case Instruction::Add:
- return TypeSize == 16 ? Intrinsic::arm_uadd16 :
+ return ARMCodeGenPrepare::TypeSize == 16 ? Intrinsic::arm_uadd16 :
Intrinsic::arm_uadd8;
case Instruction::Sub:
- return TypeSize == 16 ? Intrinsic::arm_usub16 :
+ return ARMCodeGenPrepare::TypeSize == 16 ? Intrinsic::arm_usub16 :
Intrinsic::arm_usub8;
}
llvm_unreachable("unhandled opcode for narrow intrinsic");
@@ -285,10 +284,9 @@ void IRPromoter::Mutate(Type *OrigTy,
SmallPtrSetImpl<Instruction*> &Roots) {
IRBuilder<> Builder{Ctx};
Type *ExtTy = Type::getInt32Ty(M->getContext());
- unsigned TypeSize = OrigTy->getPrimitiveSizeInBits();
SmallPtrSet<Value*, 8> Promoted;
- LLVM_DEBUG(dbgs() << "ARM CGP: Promoting use-def chains to from " << TypeSize
- << " to 32-bits\n");
+ LLVM_DEBUG(dbgs() << "ARM CGP: Promoting use-def chains to from "
+ << ARMCodeGenPrepare::TypeSize << " to 32-bits\n");
auto ReplaceAllUsersOfWith = [&](Value *From, Value *To) {
SmallVector<Instruction*, 4> Users;
@@ -325,7 +323,7 @@ void IRPromoter::Mutate(Type *OrigTy,
LLVM_DEBUG(dbgs() << "ARM CGP: Inserting DSP intrinsic for "
<< *I << "\n");
Function *DSPInst =
- Intrinsic::getDeclaration(M, getNarrowIntrinsic(I, TypeSize));
+ Intrinsic::getDeclaration(M, getNarrowIntrinsic(I));
Builder.SetInsertPoint(I);
Builder.SetCurrentDebugLocation(I->getDebugLoc());
Value *Args[] = { I->getOperand(0), I->getOperand(1) };
@@ -353,9 +351,7 @@ void IRPromoter::Mutate(Type *OrigTy,
LLVM_DEBUG(dbgs() << "ARM CGP: Promoting leaves:\n");
for (auto V : Leaves) {
LLVM_DEBUG(dbgs() << " - " << *V << "\n");
- if (auto *ZExt = dyn_cast<ZExtInst>(V))
- ZExt->mutateType(ExtTy);
- else if (auto *I = dyn_cast<Instruction>(V))
+ if (auto *I = dyn_cast<Instruction>(V))
InsertZExt(I, I);
else if (auto *Arg = dyn_cast<Argument>(V)) {
BasicBlock &BB = Arg->getParent()->front();
@@ -401,17 +397,9 @@ void IRPromoter::Mutate(Type *OrigTy,
for (auto *V : Visited) {
if (Leaves.count(V))
continue;
- if (auto *ZExt = dyn_cast<ZExtInst>(V)) {
- if (ZExt->getDestTy() != ExtTy) {
- ZExt->mutateType(ExtTy);
- Promoted.insert(ZExt);
- }
- else if (ZExt->getSrcTy() == ExtTy) {
- ReplaceAllUsersOfWith(V, ZExt->getOperand(0));
- InstsToRemove.push_back(ZExt);
- }
+
+ if (!isa<Instruction>(V))
continue;
- }
if (!shouldPromote(V) || isPromotedResultSafe(V))
continue;
@@ -459,30 +447,6 @@ void IRPromoter::Mutate(Type *OrigTy,
LLVM_DEBUG(dbgs() << "ARM CGP: Mutation complete.\n");
}
-bool ARMCodeGenPrepare::isNarrowInstSupported(Instruction *I) {
- if (!ST->hasDSP() || !EnableDSP || !isSupportedType(I))
- return false;
-
- if (ST->isThumb() && !ST->hasThumb2())
- return false;
-
- if (I->getOpcode() != Instruction::Add && I->getOpcode() != Instruction::Sub)
- return false;
-
- // TODO
- // Would it be profitable? For Thumb code, these parallel DSP instructions
- // are only Thumb-2, so we wouldn't be able to dual issue on Cortex-M33. For
- // Cortex-A, specifically Cortex-A72, the latency is double and throughput is
- // halved. They also do not take immediates as operands.
- for (auto &Op : I->operands()) {
- if (isa<Constant>(Op)) {
- if (!EnableDSPWithImms)
- return false;
- }
- }
- return true;
-}
-
/// We accept most instructions, as well as Arguments and ConstantInsts. We
/// Disallow casts other than zext and truncs and only allow calls if their
/// return value is zeroext. We don't allow opcodes that can introduce sign
@@ -490,42 +454,42 @@ bool ARMCodeGenPrepare::isNarrowInstSupported(Instruction *I) {
bool ARMCodeGenPrepare::isSupportedValue(Value *V) {
LLVM_DEBUG(dbgs() << "ARM CGP: Is " << *V << " supported?\n");
- // Non-instruction values that we can handle.
- if (isa<ConstantInt>(V) || isa<Argument>(V))
- return true;
+ if (auto *ICmp = dyn_cast<ICmpInst>(V))
+ return ICmp->isEquality() || !ICmp->isSigned();
// Memory instructions
- if (isa<StoreInst>(V) || isa<LoadInst>(V) || isa<GetElementPtrInst>(V))
+ if (isa<StoreInst>(V) || isa<GetElementPtrInst>(V))
return true;
// Branches and targets.
- if (auto *ICmp = dyn_cast<ICmpInst>(V))
- return ICmp->isEquality() || !ICmp->isSigned();
-
if( isa<BranchInst>(V) || isa<SwitchInst>(V) || isa<BasicBlock>(V))
return true;
- if (isa<PHINode>(V) || isa<SelectInst>(V) || isa<ReturnInst>(V))
- return true;
+ // Non-instruction values that we can handle.
+ if (isa<ConstantInt>(V) || isa<Argument>(V))
+ return isSupportedType(V);
+
+ if (isa<PHINode>(V) || isa<SelectInst>(V) || isa<ReturnInst>(V) ||
+ isa<LoadInst>(V))
+ return isSupportedType(V);
+
+ // Currently, Trunc is the only cast we support.
+ if (auto *Trunc = dyn_cast<TruncInst>(V))
+ return isSupportedType(Trunc->getOperand(0));
// Special cases for calls as we need to check for zeroext
// TODO We should accept calls even if they don't have zeroext, as they can
// still be roots.
if (auto *Call = dyn_cast<CallInst>(V))
- return Call->hasRetAttr(Attribute::AttrKind::ZExt);
- else if (auto *Cast = dyn_cast<CastInst>(V)) {
- if (isa<ZExtInst>(Cast))
- return Cast->getDestTy()->getScalarSizeInBits() <= 32;
- else if (auto *Trunc = dyn_cast<TruncInst>(V))
- return Trunc->getDestTy()->getScalarSizeInBits() <= TypeSize;
- else {
- LLVM_DEBUG(dbgs() << "ARM CGP: No, unsupported cast.\n");
- return false;
- }
- } else if (!isa<BinaryOperator>(V)) {
+ return isSupportedType(Call) &&
+ Call->hasRetAttr(Attribute::AttrKind::ZExt);
+
+ if (!isa<BinaryOperator>(V)) {
LLVM_DEBUG(dbgs() << "ARM CGP: No, not a binary operator.\n");
return false;
}
+ if (!isSupportedType(V))
+ return false;
bool res = !isSigned(V);
if (!res)
@@ -537,39 +501,49 @@ bool ARMCodeGenPrepare::isSupportedValue(Value *V) {
/// smaller than the targeted promoted type. Check that we're not trying to
/// promote something larger than our base 'TypeSize' type.
bool ARMCodeGenPrepare::isLegalToPromote(Value *V) {
- if (!isSupportedType(V))
- return false;
+ if (isPromotedResultSafe(V))
+ return true;
- unsigned VSize = 0;
- if (auto *Ld = dyn_cast<LoadInst>(V)) {
- auto *PtrTy = cast<PointerType>(Ld->getPointerOperandType());
- VSize = PtrTy->getElementType()->getPrimitiveSizeInBits();
- } else if (auto *ZExt = dyn_cast<ZExtInst>(V)) {
- VSize = ZExt->getOperand(0)->getType()->getPrimitiveSizeInBits();
- } else {
- VSize = V->getType()->getPrimitiveSizeInBits();
- }
+ auto *I = dyn_cast<Instruction>(V);
+ if (!I)
+ return false;
- if (VSize > TypeSize)
+ // If promotion is not safe, can we use a DSP instruction to natively
+ // handle the narrow type?
+ if (!ST->hasDSP() || !EnableDSP || !isSupportedType(I))
return false;
- if (isPromotedResultSafe(V))
- return true;
+ if (ST->isThumb() && !ST->hasThumb2())
+ return false;
- if (auto *I = dyn_cast<Instruction>(V))
- return isNarrowInstSupported(I);
+ if (I->getOpcode() != Instruction::Add && I->getOpcode() != Instruction::Sub)
+ return false;
- return false;
+ // TODO
+ // Would it be profitable? For Thumb code, these parallel DSP instructions
+ // are only Thumb-2, so we wouldn't be able to dual issue on Cortex-M33. For
+ // Cortex-A, specifically Cortex-A72, the latency is double and throughput is
+ // halved. They also do not take immediates as operands.
+ for (auto &Op : I->operands()) {
+ if (isa<Constant>(Op)) {
+ if (!EnableDSPWithImms)
+ return false;
+ }
+ }
+ return true;
}
bool ARMCodeGenPrepare::TryToPromote(Value *V) {
OrigTy = V->getType();
TypeSize = OrigTy->getPrimitiveSizeInBits();
+ if (TypeSize > 16)
+ return false;
if (!isSupportedValue(V) || !shouldPromote(V) || !isLegalToPromote(V))
return false;
- LLVM_DEBUG(dbgs() << "ARM CGP: TryToPromote: " << *V << "\n");
+ LLVM_DEBUG(dbgs() << "ARM CGP: TryToPromote: " << *V << ", TypeSize = "
+ << TypeSize << "\n");
SetVector<Value*> WorkList;
SmallPtrSet<Value*, 8> Leaves;
@@ -584,6 +558,10 @@ bool ARMCodeGenPrepare::TryToPromote(Value *V) {
if (CurrentVisited.count(V))
return true;
+ // Ignore pointer value that aren't instructions.
+ if (!isa<Instruction>(V) && isa<PointerType>(V->getType()))
+ return true;
+
if (!isSupportedValue(V) || (shouldPromote(V) && !isLegalToPromote(V))) {
LLVM_DEBUG(dbgs() << "ARM CGP: Can't handle: " << *V << "\n");
return false;
@@ -638,41 +616,10 @@ bool ARMCodeGenPrepare::TryToPromote(Value *V) {
}
}
- unsigned NumToPromote = 0;
- unsigned Cost = 0;
- for (auto *V : CurrentVisited) {
- // Truncs will cause a uxt and no zeroext arguments will often require
- // a uxt somewhere.
- if (isa<TruncInst>(V))
- ++Cost;
- else if (auto *Arg = dyn_cast<Argument>(V)) {
- if (!Arg->hasZExtAttr())
- ++Cost;
- }
-
- // Mem ops can automatically be extended/truncated and non-instructions
- // don't need anything done.
- if (Leaves.count(V) || isa<StoreInst>(V) || !isa<Instruction>(V))
- continue;
-
- // Will need to truncate calls args and returns.
- if (Roots.count(cast<Instruction>(V))) {
- ++Cost;
- continue;
- }
-
- if (shouldPromote(V))
- ++NumToPromote;
- }
-
LLVM_DEBUG(dbgs() << "ARM CGP: Visited nodes:\n";
for (auto *I : CurrentVisited)
I->dump();
);
- LLVM_DEBUG(dbgs() << "ARM CGP: Cost of promoting " << NumToPromote
- << " instructions = " << Cost << "\n");
- if (Cost > NumToPromote || (NumToPromote == 0))
- return false;
Promoter->Mutate(OrigTy, CurrentVisited, Leaves, Roots);
return true;
@@ -712,12 +659,8 @@ bool ARMCodeGenPrepare::runOnFunction(Function &F) {
LLVM_DEBUG(dbgs() << "ARM CGP: Searching from: " << CI << "\n");
for (auto &Op : CI.operands()) {
- if (auto *I = dyn_cast<Instruction>(Op)) {
- if (isa<ZExtInst>(I))
- MadeChange |= TryToPromote(I->getOperand(0));
- else
- MadeChange |= TryToPromote(I);
- }
+ if (auto *I = dyn_cast<Instruction>(Op))
+ MadeChange |= TryToPromote(I);
}
}
}
@@ -744,6 +687,7 @@ INITIALIZE_PASS_END(ARMCodeGenPrepare, DEBUG_TYPE, "ARM IR optimizations",
false, false)
char ARMCodeGenPrepare::ID = 0;
+unsigned ARMCodeGenPrepare::TypeSize = 0;
FunctionPass *llvm::createARMCodeGenPreparePass() {
return new ARMCodeGenPrepare();
OpenPOWER on IntegriCloud