diff options
Diffstat (limited to 'clang/lib/StaticAnalyzer/Core/ProgramState.cpp')
-rw-r--r-- | clang/lib/StaticAnalyzer/Core/ProgramState.cpp | 103 |
1 files changed, 83 insertions, 20 deletions
diff --git a/clang/lib/StaticAnalyzer/Core/ProgramState.cpp b/clang/lib/StaticAnalyzer/Core/ProgramState.cpp index 31556c792fc..fc26de1a1f8 100644 --- a/clang/lib/StaticAnalyzer/Core/ProgramState.cpp +++ b/clang/lib/StaticAnalyzer/Core/ProgramState.cpp @@ -644,15 +644,33 @@ ProgramStateRef ProgramState::addTaint(const Stmt *S, if (const Expr *E = dyn_cast_or_null<Expr>(S)) S = E->IgnoreParens(); - SymbolRef Sym = getSVal(S, LCtx).getAsSymbol(); + return addTaint(getSVal(S, LCtx), Kind); +} + +ProgramStateRef ProgramState::addTaint(SVal V, + TaintTagType Kind) const { + SymbolRef Sym = V.getAsSymbol(); if (Sym) return addTaint(Sym, Kind); - const MemRegion *R = getSVal(S, LCtx).getAsRegion(); - addTaint(R, Kind); + // If the SVal represents a structure, try to mass-taint all values within the + // structure. For now it only works efficiently on lazy compound values that + // were conjured during a conservative evaluation of a function - either as + // return values of functions that return structures or arrays by value, or as + // values of structures or arrays passed into the function by reference, + // directly or through pointer aliasing. Such lazy compound values are + // characterized by having exactly one binding in their captured store within + // their parent region, which is a conjured symbol default-bound to the base + // region of the parent region. + if (auto LCV = V.getAs<nonloc::LazyCompoundVal>()) { + if (Optional<SVal> binding = getStateManager().StoreMgr->getDefaultBinding(*LCV)) { + if (SymbolRef Sym = binding->getAsSymbol()) + return addPartialTaint(Sym, LCV->getRegion(), Kind); + } + } - // Cannot add taint, so just return the state. - return this; + const MemRegion *R = V.getAsRegion(); + return addTaint(R, Kind); } ProgramStateRef ProgramState::addTaint(const MemRegion *R, @@ -674,6 +692,28 @@ ProgramStateRef ProgramState::addTaint(SymbolRef Sym, return NewState; } +ProgramStateRef ProgramState::addPartialTaint(SymbolRef ParentSym, + const SubRegion *SubRegion, + TaintTagType Kind) const { + // Ignore partial taint if the entire parent symbol is already tainted. + if (contains<TaintMap>(ParentSym) && *get<TaintMap>(ParentSym) == Kind) + return this; + + // Partial taint applies if only a portion of the symbol is tainted. + if (SubRegion == SubRegion->getBaseRegion()) + return addTaint(ParentSym, Kind); + + TaintedSubRegionsRef TaintedSubRegions(0, TSRFactory.getTreeFactory()); + if (const TaintedSubRegionsRef *SavedTaintedRegions = + get<DerivedSymTaint>(ParentSym)) + TaintedSubRegions = *SavedTaintedRegions; + + TaintedSubRegions = TaintedSubRegions.add(SubRegion, Kind); + ProgramStateRef NewState = set<DerivedSymTaint>(ParentSym, TaintedSubRegions); + assert(NewState); + return NewState; +} + bool ProgramState::isTainted(const Stmt *S, const LocationContext *LCtx, TaintTagType Kind) const { if (const Expr *E = dyn_cast_or_null<Expr>(S)) @@ -714,31 +754,54 @@ bool ProgramState::isTainted(SymbolRef Sym, TaintTagType Kind) const { return false; // Traverse all the symbols this symbol depends on to see if any are tainted. - bool Tainted = false; for (SymExpr::symbol_iterator SI = Sym->symbol_begin(), SE =Sym->symbol_end(); SI != SE; ++SI) { if (!isa<SymbolData>(*SI)) continue; - const TaintTagType *Tag = get<TaintMap>(*SI); - Tainted = (Tag && *Tag == Kind); + if (const TaintTagType *Tag = get<TaintMap>(*SI)) { + if (*Tag == Kind) + return true; + } - // If this is a SymbolDerived with a tainted parent, it's also tainted. - if (const SymbolDerived *SD = dyn_cast<SymbolDerived>(*SI)) - Tainted = Tainted || isTainted(SD->getParentSymbol(), Kind); + if (const SymbolDerived *SD = dyn_cast<SymbolDerived>(*SI)) { + // If this is a SymbolDerived with a tainted parent, it's also tainted. + if (isTainted(SD->getParentSymbol(), Kind)) + return true; + + // If this is a SymbolDerived with the same parent symbol as another + // tainted SymbolDerived and a region that's a sub-region of that tainted + // symbol, it's also tainted. + if (const TaintedSubRegionsRef *SymRegions = + get<DerivedSymTaint>(SD->getParentSymbol())) { + const TypedValueRegion *R = SD->getRegion(); + for (TaintedSubRegionsRef::iterator I = SymRegions->begin(), + E = SymRegions->end(); + I != E; ++I) { + // FIXME: The logic to identify tainted regions could be more + // complete. For example, this would not currently identify + // overlapping fields in a union as tainted. To identify this we can + // check for overlapping/nested byte offsets. + if (Kind == I->second && + (R == I->first || R->isSubRegionOf(I->first))) + return true; + } + } + } // If memory region is tainted, data is also tainted. - if (const SymbolRegionValue *SRV = dyn_cast<SymbolRegionValue>(*SI)) - Tainted = Tainted || isTainted(SRV->getRegion(), Kind); - - // If If this is a SymbolCast from a tainted value, it's also tainted. - if (const SymbolCast *SC = dyn_cast<SymbolCast>(*SI)) - Tainted = Tainted || isTainted(SC->getOperand(), Kind); + if (const SymbolRegionValue *SRV = dyn_cast<SymbolRegionValue>(*SI)) { + if (isTainted(SRV->getRegion(), Kind)) + return true; + } - if (Tainted) - return true; + // If this is a SymbolCast from a tainted value, it's also tainted. + if (const SymbolCast *SC = dyn_cast<SymbolCast>(*SI)) { + if (isTainted(SC->getOperand(), Kind)) + return true; + } } - return Tainted; + return false; } |