diff options
author | Sanjoy Das <sanjoy@playingwithpointers.com> | 2016-12-11 20:07:15 +0000 |
---|---|---|
committer | Sanjoy Das <sanjoy@playingwithpointers.com> | 2016-12-11 20:07:15 +0000 |
commit | 3336f681e3f548e082d363b848edaeb3e0fcb644 (patch) | |
tree | 36736acde3a80e22994651d343fe2d3435179107 /llvm/lib/IR/Verifier.cpp | |
parent | 81ed3499cdf61dcf3cfb426de24f4f406bd04b0f (diff) | |
download | bcm5719-llvm-3336f681e3f548e082d363b848edaeb3e0fcb644.tar.gz bcm5719-llvm-3336f681e3f548e082d363b848edaeb3e0fcb644.zip |
[Verifier] Add verification for TBAA metadata
Summary:
This change adds some verification in the IR verifier around struct path
TBAA metadata.
Other than some basic sanity checks (e.g. we get constant integers where
we expect constant integers), this checks:
- That by the time an struct access tuple `(base-type, offset)` is
"reduced" to a scalar base type, the offset is `0`. For instance, in
C++ you can't start from, say `("struct-a", 16)`, and end up with
`("int", 4)` -- by the time the base type is `"int"`, the offset
better be zero. In particular, a variant of this invariant is needed
for `llvm::getMostGenericTBAA` to be correct.
- That there are no cycles in a struct path.
- That struct type nodes have their offsets listed in an ascending
order.
- That when generating the struct access path, you eventually reach the
access type listed in the tbaa tag node.
Reviewers: dexonsmith, chandlerc, reames, mehdi_amini, manmanren
Subscribers: mcrosier, llvm-commits
Differential Revision: https://reviews.llvm.org/D26438
llvm-svn: 289402
Diffstat (limited to 'llvm/lib/IR/Verifier.cpp')
-rw-r--r-- | llvm/lib/IR/Verifier.cpp | 267 |
1 files changed, 265 insertions, 2 deletions
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 6f7e344b1b3..037506a389c 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -187,6 +187,14 @@ private: *OS << *C; } + void Write(const APInt *AI) { + if (!AI) + return; + *OS << *AI << '\n'; + } + + void Write(const unsigned i) { *OS << i << '\n'; } + template <typename T> void Write(ArrayRef<T> Vs) { for (const T &V : Vs) Write(V); @@ -285,6 +293,17 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport { // constant expressions, we can arrive at a particular user many times. SmallPtrSet<const Value *, 32> GlobalValueVisited; + /// Cache of TBAA base nodes that have already been visited. This cachce maps + /// a node that has been visited to a pair (IsInvalid, BitWidth) where + /// + /// \c IsInvalid is true iff the node is invalid. + /// \c BitWidth, if non-zero, is the bitwidth of the integer used to denoting + /// the offset of the access. If zero, only a zero offset is allowed. + /// + /// \c BitWidth has no meaning if \c IsInvalid is true. + typedef std::pair<bool, unsigned> TBAABaseNodeSummary; + DenseMap<MDNode *, TBAABaseNodeSummary> TBAABaseNodes; + void checkAtomicMemAccessSize(Type *Ty, const Instruction *I); public: @@ -393,6 +412,14 @@ private: void visitDereferenceableMetadata(Instruction &I, MDNode *MD); void visitTBAAMetadata(Instruction &I, MDNode *MD); + /// \name Helper functions used by \c visitTBAAMetadata. + /// @{ + MDNode *getFieldNodeFromTBAABaseNode(Instruction &I, MDNode *BaseNode, + APInt &Offset); + TBAABaseNodeSummary verifyTBAABaseNode(Instruction &I, MDNode *BaseNode); + TBAABaseNodeSummary verifyTBAABaseNodeImpl(Instruction &I, MDNode *BaseNode); + /// @} + template <class Ty> bool isValidMetadataArray(const MDTuple &N); #define HANDLE_SPECIALIZED_MDNODE_LEAF(CLASS) void visit##CLASS(const CLASS &N); #include "llvm/IR/Metadata.def" @@ -3657,6 +3684,174 @@ void Verifier::visitDereferenceableMetadata(Instruction& I, MDNode* MD) { "dereferenceable_or_null metadata value must be an i64!", &I); } +/// Verify that \p BaseNode can be used as the "base type" in the struct-path +/// TBAA scheme. This means \p BaseNode is either a scalar node, or a +/// struct-type node describing an aggregate data structure (like a struct). +Verifier::TBAABaseNodeSummary Verifier::verifyTBAABaseNode(Instruction &I, + MDNode *BaseNode) { + if (BaseNode->getNumOperands() < 2) { + CheckFailed("Base nodes must have at least two operands", &I, BaseNode); + return {true, ~0u}; + } + + auto Itr = TBAABaseNodes.find(BaseNode); + if (Itr != TBAABaseNodes.end()) + return Itr->second; + + auto Result = verifyTBAABaseNodeImpl(I, BaseNode); + auto InsertResult = TBAABaseNodes.insert({BaseNode, Result}); + (void)InsertResult; + assert(InsertResult.second && "We just checked!"); + return Result; +} + +Verifier::TBAABaseNodeSummary +Verifier::verifyTBAABaseNodeImpl(Instruction &I, MDNode *BaseNode) { + const Verifier::TBAABaseNodeSummary InvalidNode = {true, ~0u}; + + if (BaseNode->getNumOperands() == 2) { + // This is a scalar base node. + if (!BaseNode->getOperand(0) || !BaseNode->getOperand(1)) { + CheckFailed("Null operands in scalar type nodes!", &I, BaseNode); + return InvalidNode; + } + if (!isa<MDNode>(BaseNode->getOperand(1))) { + CheckFailed("Invalid parent operand in scalar TBAA node", &I, BaseNode); + return InvalidNode; + } + if (!isa<MDString>(BaseNode->getOperand(0))) { + CheckFailed("Invalid name operand in scalar TBAA node", &I, BaseNode); + return InvalidNode; + } + + // Scalar nodes can only be accessed at offset 0. + return {false, 0}; + } + + if (BaseNode->getNumOperands() % 2 != 1) { + CheckFailed("Struct tag nodes must have an odd number of operands!", + BaseNode); + return InvalidNode; + } + + bool Failed = false; + + Optional<APInt> PrevOffset; + unsigned BitWidth = ~0u; + + // We've already checked that BaseNode is not a degenerate root node with one + // operand in \c verifyTBAABaseNode, so this loop should run at least once. + for (unsigned Idx = 1; Idx < BaseNode->getNumOperands(); Idx += 2) { + const MDOperand &FieldTy = BaseNode->getOperand(Idx); + const MDOperand &FieldOffset = BaseNode->getOperand(Idx + 1); + if (!isa<MDNode>(FieldTy)) { + CheckFailed("Incorrect field entry in struct type node!", &I, BaseNode); + Failed = true; + continue; + } + + auto *OffsetEntryCI = + mdconst::dyn_extract_or_null<ConstantInt>(FieldOffset); + if (!OffsetEntryCI) { + CheckFailed("Offset entries must be constants!", &I, BaseNode); + Failed = true; + continue; + } + + if (BitWidth == ~0u) + BitWidth = OffsetEntryCI->getBitWidth(); + + if (OffsetEntryCI->getBitWidth() != BitWidth) { + CheckFailed( + "Bitwidth between the offsets and struct type entries must match", &I, + BaseNode); + Failed = true; + continue; + } + + // NB! As far as I can tell, we generate a non-strictly increasing offset + // sequence only from structs that have zero size bit fields. When + // recursing into a contained struct in \c getFieldNodeFromTBAABaseNode we + // pick the field lexically the latest in struct type metadata node. This + // mirrors the actual behavior of the alias analysis implementation. + bool IsAscending = + !PrevOffset || PrevOffset->ule(OffsetEntryCI->getValue()); + + if (!IsAscending) { + CheckFailed("Offsets must be increasing!", &I, BaseNode); + Failed = true; + } + + PrevOffset = OffsetEntryCI->getValue(); + } + + return Failed ? InvalidNode : Verifier::TBAABaseNodeSummary(false, BitWidth); +} + +static bool IsRootTBAANode(const MDNode *MD) { + return MD->getNumOperands() < 2; +} + +static bool IsScalarTBAANodeImpl(const MDNode *MD, + SmallPtrSetImpl<const MDNode *> &Visited) { + if (MD->getNumOperands() == 2) + return true; + + if (MD->getNumOperands() != 3) + return false; + + auto *Offset = mdconst::dyn_extract<ConstantInt>(MD->getOperand(2)); + if (!(Offset && Offset->isZero() && isa<MDString>(MD->getOperand(0)))) + return false; + + auto *Parent = dyn_cast<MDNode>(MD->getOperand(1)); + return Visited.insert(Parent).second && + (IsRootTBAANode(Parent) || IsScalarTBAANodeImpl(Parent, Visited)); +} + +static bool IsScalarTBAANode(const MDNode *MD) { + SmallPtrSet<const MDNode *, 4> Visited; + return IsScalarTBAANodeImpl(MD, Visited); +} + +/// Returns the field node at the offset \p Offset in \p BaseNode. Update \p +/// Offset in place to be the offset within the field node returned. +/// +/// We assume we've okayed \p BaseNode via \c verifyTBAABaseNode. +MDNode *Verifier::getFieldNodeFromTBAABaseNode(Instruction &I, MDNode *BaseNode, + APInt &Offset) { + assert(BaseNode->getNumOperands() >= 2 && "Invalid base node!"); + + // Scalar nodes have only one possible "field" -- their parent in the access + // hierarchy. Offset must be zero at this point, but our caller is supposed + // to Assert that. + if (BaseNode->getNumOperands() == 2) + return cast<MDNode>(BaseNode->getOperand(1)); + + for (unsigned Idx = 1; Idx < BaseNode->getNumOperands(); Idx += 2) { + auto *OffsetEntryCI = + mdconst::extract<ConstantInt>(BaseNode->getOperand(Idx + 1)); + if (OffsetEntryCI->getValue().ugt(Offset)) { + if (Idx == 1) { + CheckFailed("Could not find TBAA parent in struct type node", &I, + BaseNode, &Offset); + return nullptr; + } + + auto *PrevOffsetEntryCI = + mdconst::extract<ConstantInt>(BaseNode->getOperand(Idx - 1)); + Offset -= PrevOffsetEntryCI->getValue(); + return cast<MDNode>(BaseNode->getOperand(Idx - 2)); + } + } + + auto *LastOffsetEntryCI = mdconst::extract<ConstantInt>( + BaseNode->getOperand(BaseNode->getNumOperands() - 1)); + + Offset -= LastOffsetEntryCI->getValue(); + return cast<MDNode>(BaseNode->getOperand(BaseNode->getNumOperands() - 2)); +} + void Verifier::visitTBAAMetadata(Instruction &I, MDNode *MD) { bool IsStructPathTBAA = isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3; @@ -3664,6 +3859,70 @@ void Verifier::visitTBAAMetadata(Instruction &I, MDNode *MD) { Assert(IsStructPathTBAA, "Old-style TBAA is no longer allowed, use struct-path TBAA instead", &I); + + Assert(MD->getNumOperands() < 5, + "Struct tag metadata must have either 3 or 4 operands", &I, MD); + + MDNode *BaseNode = dyn_cast_or_null<MDNode>(MD->getOperand(0)); + MDNode *AccessType = dyn_cast_or_null<MDNode>(MD->getOperand(1)); + + if (MD->getNumOperands() == 4) { + auto *IsImmutableCI = + mdconst::dyn_extract_or_null<ConstantInt>(MD->getOperand(3)); + Assert(IsImmutableCI, + "Immutability tag on struct tag metadata must be a constant", &I, + MD); + Assert(IsImmutableCI->isZero() || IsImmutableCI->isOne(), + "Immutability part of the struct tag metadata must be either 0 or 1", + &I, MD); + } + + Assert(BaseNode && AccessType, + "Malformed struct tag metadata: base and access-type " + "should be non-null and point to Metadata nodes", + &I, MD, BaseNode, AccessType); + + Assert(IsScalarTBAANode(AccessType), "Access type node must be scalar", &I, + MD, AccessType); + + auto *OffsetCI = mdconst::dyn_extract_or_null<ConstantInt>(MD->getOperand(2)); + Assert(OffsetCI, "Offset must be constant integer", &I, MD); + + APInt Offset = OffsetCI->getValue(); + bool SeenAccessTypeInPath = false; + + SmallPtrSet<MDNode *, 4> StructPath; + + for (/* empty */; BaseNode && !IsRootTBAANode(BaseNode); + BaseNode = getFieldNodeFromTBAABaseNode(I, BaseNode, Offset)) { + if (!StructPath.insert(BaseNode).second) { + CheckFailed("Cycle detected in struct path", &I, MD); + return; + } + + bool Invalid; + unsigned BaseNodeBitWidth; + std::tie(Invalid, BaseNodeBitWidth) = verifyTBAABaseNode(I, BaseNode); + + // If the base node is invalid in itself, then we've already printed all the + // errors we wanted to print. + if (Invalid) + return; + + SeenAccessTypeInPath |= BaseNode == AccessType; + + if (IsScalarTBAANode(BaseNode) || BaseNode == AccessType) + Assert(Offset == 0, "Offset not zero at the point of scalar access", &I, + MD, &Offset); + + Assert(BaseNodeBitWidth == Offset.getBitWidth() || + (BaseNodeBitWidth == 0 && Offset == 0), + "Access bit-width not the same as description bit-width", &I, MD, + BaseNodeBitWidth, Offset.getBitWidth()); + } + + Assert(SeenAccessTypeInPath, "Did not see access type in access path!", &I, + MD); } /// verifyInstruction - Verify that an instruction is well formed. @@ -3801,8 +4060,12 @@ void Verifier::visitInstruction(Instruction &I) { if (MDNode *MD = I.getMetadata(LLVMContext::MD_dereferenceable_or_null)) visitDereferenceableMetadata(I, MD); - if (MDNode *MD = I.getMetadata(LLVMContext::MD_tbaa)) - visitTBAAMetadata(I, MD); + if (MDNode *TBAA = I.getMetadata(LLVMContext::MD_tbaa)) { + Assert(isa<LoadInst>(I) || isa<StoreInst>(I) || isa<CallInst>(I) || + isa<VAArgInst>(I), + "TBAA is only for loads, stores and calls!", &I); + visitTBAAMetadata(I, TBAA); + } if (MDNode *AlignMD = I.getMetadata(LLVMContext::MD_align)) { Assert(I.getType()->isPointerTy(), "align applies only to pointer types", |