diff options
| author | Andy Gibbs <andyg1001@hotmail.co.uk> | 2016-02-26 15:35:16 +0000 | 
|---|---|---|
| committer | Andy Gibbs <andyg1001@hotmail.co.uk> | 2016-02-26 15:35:16 +0000 | 
| commit | 9a31b3b07a95e000a7b6689223dce8322d563265 (patch) | |
| tree | 47d1bed7acf7e02d09cd1304fe57c3e55cc0c5c5 | |
| parent | 0d5e9d3135f3f7784ca8c708699b324a644b97c7 (diff) | |
| download | bcm5719-llvm-9a31b3b07a95e000a7b6689223dce8322d563265.tar.gz bcm5719-llvm-9a31b3b07a95e000a7b6689223dce8322d563265.zip | |
Reduce false positives in printf/scanf format checker
Summary:
The printf/scanf format checker is a little over-zealous in handling the conditional operator.  This patch reduces work by not checking code-paths that are never used and reduces false positives regarding uncovered arguments, for example in the code fragment:
printf(minimal ? "%i\n" : "%i: %s\n", code, msg);
Reviewers: rtrieu
Subscribers: cfe-commits
Differential Revision: http://reviews.llvm.org/D15636
llvm-svn: 262025
| -rw-r--r-- | clang/lib/Sema/SemaChecking.cpp | 179 | ||||
| -rw-r--r-- | clang/test/Sema/format-strings-scanf.c | 14 | ||||
| -rw-r--r-- | clang/test/Sema/format-strings.c | 19 | 
3 files changed, 176 insertions, 36 deletions
| diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 134248dbe22..c38dda6464c 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -3256,6 +3256,51 @@ bool Sema::SemaBuiltinSetjmp(CallExpr *TheCall) {  }  namespace { +class UncoveredArgHandler { +  enum { Unknown = -1, AllCovered = -2 }; +  signed FirstUncoveredArg; +  SmallVector<const Expr *, 4> DiagnosticExprs; + +public: +  UncoveredArgHandler() : FirstUncoveredArg(Unknown) { } + +  bool hasUncoveredArg() const { +    return (FirstUncoveredArg >= 0); +  } + +  unsigned getUncoveredArg() const { +    assert(hasUncoveredArg() && "no uncovered argument"); +    return FirstUncoveredArg; +  } + +  void setAllCovered() { +    // A string has been found with all arguments covered, so clear out +    // the diagnostics. +    DiagnosticExprs.clear(); +    FirstUncoveredArg = AllCovered; +  } + +  void Update(signed NewFirstUncoveredArg, const Expr *StrExpr) { +    assert(NewFirstUncoveredArg >= 0 && "Outside range"); + +    // Don't update if a previous string covers all arguments. +    if (FirstUncoveredArg == AllCovered) +      return; + +    // UncoveredArgHandler tracks the highest uncovered argument index +    // and with it all the strings that match this index. +    if (NewFirstUncoveredArg == FirstUncoveredArg) +      DiagnosticExprs.push_back(StrExpr); +    else if (NewFirstUncoveredArg > FirstUncoveredArg) { +      DiagnosticExprs.clear(); +      DiagnosticExprs.push_back(StrExpr); +      FirstUncoveredArg = NewFirstUncoveredArg; +    } +  } + +  void Diagnose(Sema &S, bool IsFunctionCall, const Expr *ArgExpr); +}; +  enum StringLiteralCheckType {    SLCT_NotALiteral,    SLCT_UncheckedLiteral, @@ -3271,7 +3316,8 @@ static void CheckFormatString(Sema &S, const StringLiteral *FExpr,                                Sema::FormatStringType Type,                                bool inFunctionCall,                                Sema::VariadicCallType CallType, -                              llvm::SmallBitVector &CheckedVarArgs); +                              llvm::SmallBitVector &CheckedVarArgs, +                              UncoveredArgHandler &UncoveredArg);  // Determine if an expression is a string literal or constant string.  // If this function returns false on the arguments to a function expecting a @@ -3282,7 +3328,8 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args,                        bool HasVAListArg, unsigned format_idx,                        unsigned firstDataArg, Sema::FormatStringType Type,                        Sema::VariadicCallType CallType, bool InFunctionCall, -                      llvm::SmallBitVector &CheckedVarArgs) { +                      llvm::SmallBitVector &CheckedVarArgs, +                      UncoveredArgHandler &UncoveredArg) {   tryAgain:    if (E->isTypeDependent() || E->isValueDependent())      return SLCT_NotALiteral; @@ -3303,17 +3350,39 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args,      // completely checked only if both sub-expressions were checked.      const AbstractConditionalOperator *C =          cast<AbstractConditionalOperator>(E); -    StringLiteralCheckType Left = -        checkFormatStringExpr(S, C->getTrueExpr(), Args, -                              HasVAListArg, format_idx, firstDataArg, -                              Type, CallType, InFunctionCall, CheckedVarArgs); -    if (Left == SLCT_NotALiteral) -      return SLCT_NotALiteral; + +    // Determine whether it is necessary to check both sub-expressions, for +    // example, because the condition expression is a constant that can be +    // evaluated at compile time. +    bool CheckLeft = true, CheckRight = true; + +    bool Cond; +    if (C->getCond()->EvaluateAsBooleanCondition(Cond, S.getASTContext())) { +      if (Cond) +        CheckRight = false; +      else +        CheckLeft = false; +    } + +    StringLiteralCheckType Left; +    if (!CheckLeft) +      Left = SLCT_UncheckedLiteral; +    else { +      Left = checkFormatStringExpr(S, C->getTrueExpr(), Args, +                                   HasVAListArg, format_idx, firstDataArg, +                                   Type, CallType, InFunctionCall, +                                   CheckedVarArgs, UncoveredArg); +      if (Left == SLCT_NotALiteral || !CheckRight) +        return Left; +    } +      StringLiteralCheckType Right =          checkFormatStringExpr(S, C->getFalseExpr(), Args,                                HasVAListArg, format_idx, firstDataArg, -                              Type, CallType, InFunctionCall, CheckedVarArgs); -    return Left < Right ? Left : Right; +                              Type, CallType, InFunctionCall, CheckedVarArgs, +                              UncoveredArg); + +    return (CheckLeft && Left < Right) ? Left : Right;    }    case Stmt::ImplicitCastExprClass: { @@ -3364,7 +3433,8 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args,            return checkFormatStringExpr(S, Init, Args,                                         HasVAListArg, format_idx,                                         firstDataArg, Type, CallType, -                                       /*InFunctionCall*/false, CheckedVarArgs); +                                       /*InFunctionCall*/false, CheckedVarArgs, +                                       UncoveredArg);          }        } @@ -3419,7 +3489,7 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args,          return checkFormatStringExpr(S, Arg, Args,                                       HasVAListArg, format_idx, firstDataArg,                                       Type, CallType, InFunctionCall, -                                     CheckedVarArgs); +                                     CheckedVarArgs, UncoveredArg);        } else if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(ND)) {          unsigned BuiltinID = FD->getBuiltinID();          if (BuiltinID == Builtin::BI__builtin___CFStringMakeConstantString || @@ -3428,7 +3498,8 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args,            return checkFormatStringExpr(S, Arg, Args,                                         HasVAListArg, format_idx,                                         firstDataArg, Type, CallType, -                                       InFunctionCall, CheckedVarArgs); +                                       InFunctionCall, CheckedVarArgs, +                                       UncoveredArg);          }        }      } @@ -3447,7 +3518,7 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args,      if (StrE) {        CheckFormatString(S, StrE, E, Args, HasVAListArg, format_idx,                          firstDataArg, Type, InFunctionCall, CallType, -                        CheckedVarArgs); +                        CheckedVarArgs, UncoveredArg);        return SLCT_CheckedLiteral;      } @@ -3515,10 +3586,20 @@ bool Sema::CheckFormatArguments(ArrayRef<const Expr *> Args,    // C string (e.g. "%d")    // ObjC string uses the same format specifiers as C string, so we can use    // the same format string checking logic for both ObjC and C strings. +  UncoveredArgHandler UncoveredArg;    StringLiteralCheckType CT =        checkFormatStringExpr(*this, OrigFormatExpr, Args, HasVAListArg,                              format_idx, firstDataArg, Type, CallType, -                            /*IsFunctionCall*/true, CheckedVarArgs); +                            /*IsFunctionCall*/true, CheckedVarArgs, +                            UncoveredArg); + +  // Generate a diagnostic where an uncovered argument is detected. +  if (UncoveredArg.hasUncoveredArg()) { +    unsigned ArgIdx = UncoveredArg.getUncoveredArg() + firstDataArg; +    assert(ArgIdx < Args.size() && "ArgIdx outside bounds"); +    UncoveredArg.Diagnose(*this, /*IsFunctionCall*/true, Args[ArgIdx]); +  } +    if (CT != SLCT_NotALiteral)      // Literal format string found, check done!      return CT == SLCT_CheckedLiteral; @@ -3567,6 +3648,7 @@ protected:    bool inFunctionCall;    Sema::VariadicCallType CallType;    llvm::SmallBitVector &CheckedVarArgs; +  UncoveredArgHandler &UncoveredArg;  public:    CheckFormatHandler(Sema &s, const StringLiteral *fexpr, @@ -3575,14 +3657,15 @@ public:                       ArrayRef<const Expr *> Args,                       unsigned formatIdx, bool inFunctionCall,                       Sema::VariadicCallType callType, -                     llvm::SmallBitVector &CheckedVarArgs) +                     llvm::SmallBitVector &CheckedVarArgs, +                     UncoveredArgHandler &UncoveredArg)      : S(s), FExpr(fexpr), OrigFormatExpr(origFormatExpr),        FirstDataArg(firstDataArg), NumDataArgs(numDataArgs),        Beg(beg), HasVAListArg(hasVAListArg),        Args(Args), FormatIdx(formatIdx),        usesPositionalArgs(false), atFirstArg(true),        inFunctionCall(inFunctionCall), CallType(callType), -      CheckedVarArgs(CheckedVarArgs) { +      CheckedVarArgs(CheckedVarArgs), UncoveredArg(UncoveredArg) {      CoveredArgs.resize(numDataArgs);      CoveredArgs.reset();    } @@ -3813,26 +3896,44 @@ const Expr *CheckFormatHandler::getDataArg(unsigned i) const {  }  void CheckFormatHandler::DoneProcessing() { -    // Does the number of data arguments exceed the number of -    // format conversions in the format string? +  // Does the number of data arguments exceed the number of +  // format conversions in the format string?    if (!HasVAListArg) {        // Find any arguments that weren't covered.      CoveredArgs.flip();      signed notCoveredArg = CoveredArgs.find_first();      if (notCoveredArg >= 0) {        assert((unsigned)notCoveredArg < NumDataArgs); -      if (const Expr *E = getDataArg((unsigned) notCoveredArg)) { -        SourceLocation Loc = E->getLocStart(); -        if (!S.getSourceManager().isInSystemMacro(Loc)) { -          EmitFormatDiagnostic(S.PDiag(diag::warn_printf_data_arg_not_used), -                               Loc, /*IsStringLocation*/false, -                               getFormatStringRange()); -        } -      } +      UncoveredArg.Update(notCoveredArg, OrigFormatExpr); +    } else { +      UncoveredArg.setAllCovered();      }    }  } +void UncoveredArgHandler::Diagnose(Sema &S, bool IsFunctionCall, +                                   const Expr *ArgExpr) { +  assert(hasUncoveredArg() && DiagnosticExprs.size() > 0 && +         "Invalid state"); + +  if (!ArgExpr) +    return; + +  SourceLocation Loc = ArgExpr->getLocStart(); + +  if (S.getSourceManager().isInSystemMacro(Loc)) +    return; + +  PartialDiagnostic PDiag = S.PDiag(diag::warn_printf_data_arg_not_used); +  for (auto E : DiagnosticExprs) +    PDiag << E->getSourceRange(); + +  CheckFormatHandler::EmitFormatDiagnostic( +                                  S, IsFunctionCall, DiagnosticExprs[0], +                                  PDiag, Loc, /*IsStringLocation*/false, +                                  DiagnosticExprs[0]->getSourceRange()); +} +  bool  CheckFormatHandler::HandleInvalidConversionSpecifier(unsigned argIndex,                                                       SourceLocation Loc, @@ -3886,6 +3987,10 @@ CheckFormatHandler::CheckNumArgs(      EmitFormatDiagnostic(        PDiag, getLocationOfByte(CS.getStart()), /*IsStringLocation*/true,        getSpecifierRange(startSpecifier, specifierLen)); + +    // Since more arguments than conversion tokens are given, by extension +    // all arguments are covered, so mark this as so. +    UncoveredArg.setAllCovered();      return false;    }    return true; @@ -3967,10 +4072,12 @@ public:                       ArrayRef<const Expr *> Args,                       unsigned formatIdx, bool inFunctionCall,                       Sema::VariadicCallType CallType, -                     llvm::SmallBitVector &CheckedVarArgs) +                     llvm::SmallBitVector &CheckedVarArgs, +                     UncoveredArgHandler &UncoveredArg)      : CheckFormatHandler(s, fexpr, origFormatExpr, firstDataArg,                           numDataArgs, beg, hasVAListArg, Args, -                         formatIdx, inFunctionCall, CallType, CheckedVarArgs), +                         formatIdx, inFunctionCall, CallType, CheckedVarArgs, +                         UncoveredArg),        ObjCContext(isObjC)    {} @@ -4751,11 +4858,12 @@ public:                      ArrayRef<const Expr *> Args,                      unsigned formatIdx, bool inFunctionCall,                      Sema::VariadicCallType CallType, -                    llvm::SmallBitVector &CheckedVarArgs) +                    llvm::SmallBitVector &CheckedVarArgs, +                    UncoveredArgHandler &UncoveredArg)      : CheckFormatHandler(s, fexpr, origFormatExpr, firstDataArg,                           numDataArgs, beg, hasVAListArg,                           Args, formatIdx, inFunctionCall, CallType, -                         CheckedVarArgs) +                         CheckedVarArgs, UncoveredArg)    {}    bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS, @@ -4923,7 +5031,8 @@ static void CheckFormatString(Sema &S, const StringLiteral *FExpr,                                Sema::FormatStringType Type,                                bool inFunctionCall,                                Sema::VariadicCallType CallType, -                              llvm::SmallBitVector &CheckedVarArgs) { +                              llvm::SmallBitVector &CheckedVarArgs, +                              UncoveredArgHandler &UncoveredArg) {    // CHECK: is the format string a wide literal?    if (!FExpr->isAscii() && !FExpr->isUTF8()) {      CheckFormatHandler::EmitFormatDiagnostic( @@ -4971,7 +5080,8 @@ static void CheckFormatString(Sema &S, const StringLiteral *FExpr,                           numDataArgs, (Type == Sema::FST_NSString ||                                         Type == Sema::FST_OSTrace),                           Str, HasVAListArg, Args, format_idx, -                         inFunctionCall, CallType, CheckedVarArgs); +                         inFunctionCall, CallType, CheckedVarArgs, +                         UncoveredArg);      if (!analyze_format_string::ParsePrintfString(H, Str, Str + StrLen,                                                    S.getLangOpts(), @@ -4981,7 +5091,8 @@ static void CheckFormatString(Sema &S, const StringLiteral *FExpr,    } else if (Type == Sema::FST_Scanf) {      CheckScanfHandler H(S, FExpr, OrigFormatExpr, firstDataArg, numDataArgs,                          Str, HasVAListArg, Args, format_idx, -                        inFunctionCall, CallType, CheckedVarArgs); +                        inFunctionCall, CallType, CheckedVarArgs, +                        UncoveredArg);      if (!analyze_format_string::ParseScanfString(H, Str, Str + StrLen,                                                   S.getLangOpts(), diff --git a/clang/test/Sema/format-strings-scanf.c b/clang/test/Sema/format-strings-scanf.c index d3a03adf619..7a92842b245 100644 --- a/clang/test/Sema/format-strings-scanf.c +++ b/clang/test/Sema/format-strings-scanf.c @@ -18,7 +18,7 @@ int vfscanf(FILE * restrict, const char * restrict, va_list);  int vsscanf(const char * restrict, const char * restrict, va_list);  void test(const char *s, int *i) { -  scanf(s, i); // expected-warning{{ormat string is not a string literal}} +  scanf(s, i); // expected-warning{{format string is not a string literal}}    scanf("%0d", i); // expected-warning{{zero field width in scanf format string is unused}}    scanf("%00d", i); // expected-warning{{zero field width in scanf format string is unused}}    scanf("%d%[asdfasdfd", i, s); // expected-warning{{no closing ']' for '%[' in scanf format string}} @@ -171,3 +171,15 @@ void test_qualifiers(const int *cip, volatile int* vip,    scanf("%d", (ip_t)0); // No warning.    scanf("%d", (cip_t)0); // expected-warning{{format specifies type 'int *' but the argument has type 'cip_t' (aka 'const int *')}}  } + +void check_conditional_literal(char *s, int *i) { +  scanf(0 ? "%s" : "%d", i); // no warning +  scanf(1 ? "%s" : "%d", i); // expected-warning{{format specifies type 'char *'}} +  scanf(0 ? "%d %d" : "%d", i); // no warning +  scanf(1 ? "%d %d" : "%d", i); // expected-warning{{more '%' conversions than data arguments}} +  scanf(0 ? "%d %d" : "%d", i, s); // expected-warning{{data argument not used}} +  scanf(1 ? "%d %s" : "%d", i, s); // no warning +  scanf(i ? "%d %s" : "%d", i, s); // no warning +  scanf(i ? "%d" : "%d", i, s); // expected-warning{{data argument not used}} +  scanf(i ? "%s" : "%d", s); // expected-warning{{format specifies type 'int *'}} +} diff --git a/clang/test/Sema/format-strings.c b/clang/test/Sema/format-strings.c index 0d827e400d7..69723f6e1ac 100644 --- a/clang/test/Sema/format-strings.c +++ b/clang/test/Sema/format-strings.c @@ -46,6 +46,9 @@ void check_string_literal( FILE* fp, const char* s, char *buf, ... ) {    vscanf(s, ap); // expected-warning {{format string is not a string literal}} +  const char *const fmt = "%d"; // FIXME -- defined here +  printf(fmt, 1, 2); // expected-warning{{data argument not used}} +    // rdar://6079877    printf("abc"           "%*d", 1, 1); // no-warning @@ -86,6 +89,20 @@ void check_conditional_literal(const char* s, int i) {    printf(i == 0 ? (i == 1 ? "yes" : "no") : "dont know"); // no-warning    printf(i == 0 ? (i == 1 ? s : "no") : "dont know"); // expected-warning{{format string is not a string literal}}    printf("yes" ?: "no %d", 1); // expected-warning{{data argument not used by format string}} +  printf(0 ? "yes %s" : "no %d", 1); // no-warning +  printf(0 ? "yes %d" : "no %s", 1); // expected-warning{{format specifies type 'char *'}} + +  printf(0 ? "yes" : "no %d", 1); // no-warning +  printf(0 ? "yes %d" : "no", 1); // expected-warning{{data argument not used by format string}} +  printf(1 ? "yes" : "no %d", 1); // expected-warning{{data argument not used by format string}} +  printf(1 ? "yes %d" : "no", 1); // no-warning +  printf(i ? "yes" : "no %d", 1); // no-warning +  printf(i ? "yes %s" : "no %d", 1); // expected-warning{{format specifies type 'char *'}} +  printf(i ? "yes" : "no %d", 1, 2); // expected-warning{{data argument not used by format string}} + +  printf(i ? "%*s" : "-", i, s); // no-warning +  printf(i ? "yes" : 0 ? "no %*d" : "dont know %d", 1, 2); // expected-warning{{data argument not used by format string}} +  printf(i ? "%i\n" : "%i %s %s\n", i, s); // expected-warning{{more '%' conversions than data arguments}}  }  void check_writeback_specifier() @@ -519,7 +536,7 @@ void pr9751() {    // Make sure that the "format string is defined here" note is not emitted    // when the original string is within the argument expression. -  printf(1 ? "yes %d" : "no %d"); // expected-warning 2{{more '%' conversions than data arguments}} +  printf(1 ? "yes %d" : "no %d"); // expected-warning{{more '%' conversions than data arguments}}    const char kFormat17[] = "%hu"; // expected-note{{format string is defined here}}}    printf(kFormat17, (int[]){0}); // expected-warning{{format specifies type 'unsigned short' but the argument}} | 

