diff options
Diffstat (limited to 'clang/lib/StaticAnalyzer')
-rw-r--r-- | clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp | 83 | ||||
-rw-r--r-- | clang/lib/StaticAnalyzer/Core/ProgramState.cpp | 103 | ||||
-rw-r--r-- | clang/lib/StaticAnalyzer/Core/RegionStore.cpp | 5 |
3 files changed, 111 insertions, 80 deletions
diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp index b1a54e77951..883c6a66329 100644 --- a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp @@ -65,21 +65,8 @@ private: /// and thus, is tainted. static bool isStdin(const Expr *E, CheckerContext &C); - /// This is called from getPointedToSymbol() to resolve symbol references for - /// the region underlying a LazyCompoundVal. This is the default binding - /// for the LCV, which could be a conjured symbol from a function call that - /// initialized the region. It only returns the conjured symbol if the LCV - /// covers the entire region, e.g. we avoid false positives by not returning - /// a default bindingc for an entire struct if the symbol for only a single - /// field or element within it is requested. - // TODO: Return an appropriate symbol for sub-fields/elements of an LCV so - // that they are also appropriately tainted. - static SymbolRef getLCVSymbol(CheckerContext &C, - nonloc::LazyCompoundVal &LCV); - - /// \brief Given a pointer argument, get the symbol of the value it contains - /// (points to). - static SymbolRef getPointedToSymbol(CheckerContext &C, const Expr *Arg); + /// \brief Given a pointer argument, return the value it points to. + static Optional<SVal> getPointedToSVal(CheckerContext &C, const Expr *Arg); /// Functions defining the attack surface. typedef ProgramStateRef (GenericTaintChecker::*FnCheck)(const CallExpr *, @@ -186,9 +173,14 @@ private: static inline bool isTaintedOrPointsToTainted(const Expr *E, ProgramStateRef State, CheckerContext &C) { - return (State->isTainted(E, C.getLocationContext()) || isStdin(E, C) || - (E->getType().getTypePtr()->isPointerType() && - State->isTainted(getPointedToSymbol(C, E)))); + if (State->isTainted(E, C.getLocationContext()) || isStdin(E, C)) + return true; + + if (!E->getType().getTypePtr()->isPointerType()) + return false; + + Optional<SVal> V = getPointedToSVal(C, E); + return (V && State->isTainted(*V)); } /// \brief Pre-process a function which propagates taint according to the @@ -400,9 +392,9 @@ bool GenericTaintChecker::propagateFromPre(const CallExpr *CE, if (CE->getNumArgs() < (ArgNum + 1)) return false; const Expr* Arg = CE->getArg(ArgNum); - SymbolRef Sym = getPointedToSymbol(C, Arg); - if (Sym) - State = State->addTaint(Sym); + Optional<SVal> V = getPointedToSVal(C, Arg); + if (V) + State = State->addTaint(*V); } // Clear up the taint info from the state. @@ -473,47 +465,20 @@ bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{ return false; } -SymbolRef GenericTaintChecker::getLCVSymbol(CheckerContext &C, - nonloc::LazyCompoundVal &LCV) { - StoreManager &StoreMgr = C.getStoreManager(); - - // getLCVSymbol() is reached in a PostStmt so we can always expect a default - // binding to exist if one is present. - if (Optional<SVal> binding = StoreMgr.getDefaultBinding(LCV)) { - SymbolRef Sym = binding->getAsSymbol(); - if (!Sym) - return nullptr; - - // If the LCV covers an entire base region return the default conjured symbol. - if (LCV.getRegion() == LCV.getRegion()->getBaseRegion()) - return Sym; - } - - // Otherwise, return a nullptr as there's not yet a functional way to taint - // sub-regions of LCVs. - return nullptr; -} - -SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C, - const Expr* Arg) { +Optional<SVal> GenericTaintChecker::getPointedToSVal(CheckerContext &C, + const Expr* Arg) { ProgramStateRef State = C.getState(); SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext()); if (AddrVal.isUnknownOrUndef()) - return nullptr; + return None; Optional<Loc> AddrLoc = AddrVal.getAs<Loc>(); if (!AddrLoc) - return nullptr; + return None; const PointerType *ArgTy = dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr()); - SVal Val = State->getSVal(*AddrLoc, - ArgTy ? ArgTy->getPointeeType(): QualType()); - - if (auto LCV = Val.getAs<nonloc::LazyCompoundVal>()) - return getLCVSymbol(C, *LCV); - - return Val.getAsSymbol(); + return State->getSVal(*AddrLoc, ArgTy ? ArgTy->getPointeeType(): QualType()); } ProgramStateRef @@ -633,9 +598,9 @@ ProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE, // The arguments are pointer arguments. The data they are pointing at is // tainted after the call. const Expr* Arg = CE->getArg(i); - SymbolRef Sym = getPointedToSymbol(C, Arg); - if (Sym) - State = State->addTaint(Sym); + Optional<SVal> V = getPointedToSVal(C, Arg); + if (V) + State = State->addTaint(*V); } return State; } @@ -710,10 +675,10 @@ bool GenericTaintChecker::generateReportIfTainted(const Expr *E, // Check for taint. ProgramStateRef State = C.getState(); - const SymbolRef PointedToSym = getPointedToSymbol(C, E); + Optional<SVal> PointedToSVal = getPointedToSVal(C, E); SVal TaintedSVal; - if (State->isTainted(PointedToSym)) - TaintedSVal = nonloc::SymbolVal(PointedToSym); + if (PointedToSVal && State->isTainted(*PointedToSVal)) + TaintedSVal = *PointedToSVal; else if (State->isTainted(E, C.getLocationContext())) TaintedSVal = C.getSVal(E); else diff --git a/clang/lib/StaticAnalyzer/Core/ProgramState.cpp b/clang/lib/StaticAnalyzer/Core/ProgramState.cpp index 31556c792fc..fc26de1a1f8 100644 --- a/clang/lib/StaticAnalyzer/Core/ProgramState.cpp +++ b/clang/lib/StaticAnalyzer/Core/ProgramState.cpp @@ -644,15 +644,33 @@ ProgramStateRef ProgramState::addTaint(const Stmt *S, if (const Expr *E = dyn_cast_or_null<Expr>(S)) S = E->IgnoreParens(); - SymbolRef Sym = getSVal(S, LCtx).getAsSymbol(); + return addTaint(getSVal(S, LCtx), Kind); +} + +ProgramStateRef ProgramState::addTaint(SVal V, + TaintTagType Kind) const { + SymbolRef Sym = V.getAsSymbol(); if (Sym) return addTaint(Sym, Kind); - const MemRegion *R = getSVal(S, LCtx).getAsRegion(); - addTaint(R, Kind); + // If the SVal represents a structure, try to mass-taint all values within the + // structure. For now it only works efficiently on lazy compound values that + // were conjured during a conservative evaluation of a function - either as + // return values of functions that return structures or arrays by value, or as + // values of structures or arrays passed into the function by reference, + // directly or through pointer aliasing. Such lazy compound values are + // characterized by having exactly one binding in their captured store within + // their parent region, which is a conjured symbol default-bound to the base + // region of the parent region. + if (auto LCV = V.getAs<nonloc::LazyCompoundVal>()) { + if (Optional<SVal> binding = getStateManager().StoreMgr->getDefaultBinding(*LCV)) { + if (SymbolRef Sym = binding->getAsSymbol()) + return addPartialTaint(Sym, LCV->getRegion(), Kind); + } + } - // Cannot add taint, so just return the state. - return this; + const MemRegion *R = V.getAsRegion(); + return addTaint(R, Kind); } ProgramStateRef ProgramState::addTaint(const MemRegion *R, @@ -674,6 +692,28 @@ ProgramStateRef ProgramState::addTaint(SymbolRef Sym, return NewState; } +ProgramStateRef ProgramState::addPartialTaint(SymbolRef ParentSym, + const SubRegion *SubRegion, + TaintTagType Kind) const { + // Ignore partial taint if the entire parent symbol is already tainted. + if (contains<TaintMap>(ParentSym) && *get<TaintMap>(ParentSym) == Kind) + return this; + + // Partial taint applies if only a portion of the symbol is tainted. + if (SubRegion == SubRegion->getBaseRegion()) + return addTaint(ParentSym, Kind); + + TaintedSubRegionsRef TaintedSubRegions(0, TSRFactory.getTreeFactory()); + if (const TaintedSubRegionsRef *SavedTaintedRegions = + get<DerivedSymTaint>(ParentSym)) + TaintedSubRegions = *SavedTaintedRegions; + + TaintedSubRegions = TaintedSubRegions.add(SubRegion, Kind); + ProgramStateRef NewState = set<DerivedSymTaint>(ParentSym, TaintedSubRegions); + assert(NewState); + return NewState; +} + bool ProgramState::isTainted(const Stmt *S, const LocationContext *LCtx, TaintTagType Kind) const { if (const Expr *E = dyn_cast_or_null<Expr>(S)) @@ -714,31 +754,54 @@ bool ProgramState::isTainted(SymbolRef Sym, TaintTagType Kind) const { return false; // Traverse all the symbols this symbol depends on to see if any are tainted. - bool Tainted = false; for (SymExpr::symbol_iterator SI = Sym->symbol_begin(), SE =Sym->symbol_end(); SI != SE; ++SI) { if (!isa<SymbolData>(*SI)) continue; - const TaintTagType *Tag = get<TaintMap>(*SI); - Tainted = (Tag && *Tag == Kind); + if (const TaintTagType *Tag = get<TaintMap>(*SI)) { + if (*Tag == Kind) + return true; + } - // If this is a SymbolDerived with a tainted parent, it's also tainted. - if (const SymbolDerived *SD = dyn_cast<SymbolDerived>(*SI)) - Tainted = Tainted || isTainted(SD->getParentSymbol(), Kind); + if (const SymbolDerived *SD = dyn_cast<SymbolDerived>(*SI)) { + // If this is a SymbolDerived with a tainted parent, it's also tainted. + if (isTainted(SD->getParentSymbol(), Kind)) + return true; + + // If this is a SymbolDerived with the same parent symbol as another + // tainted SymbolDerived and a region that's a sub-region of that tainted + // symbol, it's also tainted. + if (const TaintedSubRegionsRef *SymRegions = + get<DerivedSymTaint>(SD->getParentSymbol())) { + const TypedValueRegion *R = SD->getRegion(); + for (TaintedSubRegionsRef::iterator I = SymRegions->begin(), + E = SymRegions->end(); + I != E; ++I) { + // FIXME: The logic to identify tainted regions could be more + // complete. For example, this would not currently identify + // overlapping fields in a union as tainted. To identify this we can + // check for overlapping/nested byte offsets. + if (Kind == I->second && + (R == I->first || R->isSubRegionOf(I->first))) + return true; + } + } + } // If memory region is tainted, data is also tainted. - if (const SymbolRegionValue *SRV = dyn_cast<SymbolRegionValue>(*SI)) - Tainted = Tainted || isTainted(SRV->getRegion(), Kind); - - // If If this is a SymbolCast from a tainted value, it's also tainted. - if (const SymbolCast *SC = dyn_cast<SymbolCast>(*SI)) - Tainted = Tainted || isTainted(SC->getOperand(), Kind); + if (const SymbolRegionValue *SRV = dyn_cast<SymbolRegionValue>(*SI)) { + if (isTainted(SRV->getRegion(), Kind)) + return true; + } - if (Tainted) - return true; + // If this is a SymbolCast from a tainted value, it's also tainted. + if (const SymbolCast *SC = dyn_cast<SymbolCast>(*SI)) { + if (isTainted(SC->getOperand(), Kind)) + return true; + } } - return Tainted; + return false; } diff --git a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp index 3000e13d32c..28f78fa3ff5 100644 --- a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp +++ b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp @@ -496,7 +496,10 @@ public: // Part of public interface to class. Optional<SVal> getDefaultBinding(Store S, const MemRegion *R) override { RegionBindingsRef B = getRegionBindings(S); - return B.getDefaultBinding(R); + // Default bindings are always applied over a base region so look up the + // base region's default binding, otherwise the lookup will fail when R + // is at an offset from R->getBaseRegion(). + return B.getDefaultBinding(R->getBaseRegion()); } SVal getBinding(RegionBindingsConstRef B, Loc L, QualType T = QualType()); |