Merge pull request #2232 from geoffw0/formatsymbols

CPP: Fully support n$ in format strings
2019-11-12 09:43:20 +01:00 · 2019-11-12 09:43:20 +01:00 · c36b73f09c
--- a/change-notes/1.23/analysis-cpp.md
+++ b/change-notes/1.23/analysis-cpp.md
@ -25,6 +25,7 @@ The following changes in version 1.23 affect C/C++ analysis in all applications.
 | Unclear comparison precedence (`cpp/comparison-precedence`) | Fewer false positive results | False positives involving template classes and functions have been fixed. |
 | Comparison of narrow type with wide type in loop condition (`cpp/comparison-with-wider-type`) | Higher precision | The precision of this query has been increased to "high" as the alerts from this query have proved to be valuable on real-world projects. With this precision, results are now displayed by default in LGTM. |
 | Non-constant format string (`cpp/non-constant-format`) | Fewer false positive results | Fixed false positives resulting from mistmatching declarations of a formatting function. |
+| Wrong type of arguments to formatting function (`cpp/wrong-type-format-argument`) | More correct results and fewer false positive results | This query now understands explicitly specified argument numbers in format strings, such as the `1$` in `%1$s`. |

 ## Changes to libraries

--- a/cpp/ql/src/semmle/code/cpp/commons/Printf.qll
+++ b/cpp/ql/src/semmle/code/cpp/commons/Printf.qll
@ -131,15 +131,17 @@ class FormattingFunctionCall extends Expr {
  }

  /**
-   * Gets the argument corresponding to the nth conversion specifier
+   * Gets the argument corresponding to the nth conversion specifier.
   */
  Expr getConversionArgument(int n) {
-    exists(FormatLiteral fl, int b, int o |
+    exists(FormatLiteral fl |
      fl = this.getFormat() and
-      b = sum(int i, int toSum | i < n and toSum = fl.getNumArgNeeded(i) | toSum) and
-      o = fl.getNumArgNeeded(n) and
-      o > 0 and
-      result = this.getFormatArgument(b + o - 1)
+      (
+        result = this.getFormatArgument(fl.getParameterFieldValue(n))
+        or
+        result = this.getFormatArgument(fl.getFormatArgumentIndexFor(n, 2)) and
+        not exists(fl.getParameterFieldValue(n))
+      )
    )
  }

@ -149,11 +151,14 @@ class FormattingFunctionCall extends Expr {
   * an explicit minimum field width).
   */
  Expr getMinFieldWidthArgument(int n) {
-    exists(FormatLiteral fl, int b |
+    exists(FormatLiteral fl |
      fl = this.getFormat() and
-      b = sum(int i, int toSum | i < n and toSum = fl.getNumArgNeeded(i) | toSum) and
-      fl.hasImplicitMinFieldWidth(n) and
-      result = this.getFormatArgument(b)
+      (
+        result = this.getFormatArgument(fl.getMinFieldWidthParameterFieldValue(n))
+        or
+        result = this.getFormatArgument(fl.getFormatArgumentIndexFor(n, 0)) and
+        not exists(fl.getMinFieldWidthParameterFieldValue(n))
+      )
    )
  }

@ -163,12 +168,14 @@ class FormattingFunctionCall extends Expr {
   * precision).
   */
  Expr getPrecisionArgument(int n) {
-    exists(FormatLiteral fl, int b, int o |
+    exists(FormatLiteral fl |
      fl = this.getFormat() and
-      b = sum(int i, int toSum | i < n and toSum = fl.getNumArgNeeded(i) | toSum) and
-      (if fl.hasImplicitMinFieldWidth(n) then o = 1 else o = 0) and
-      fl.hasImplicitPrecision(n) and
-      result = this.getFormatArgument(b + o)
+      (
+        result = this.getFormatArgument(fl.getPrecisionParameterFieldValue(n))
+        or
+        result = this.getFormatArgument(fl.getFormatArgumentIndexFor(n, 1)) and
+        not exists(fl.getPrecisionParameterFieldValue(n))
+      )
    )
  }

@ -368,6 +375,14 @@ class FormatLiteral extends Literal {
   */
  string getParameterField(int n) { this.parseConvSpec(n, _, result, _, _, _, _, _) }

+  /**
+   * Gets the parameter field of the nth conversion specifier (if it has one) as a
+   * zero-based number.
+   */
+  int getParameterFieldValue(int n) {
+    result = this.getParameterField(n).regexpCapture("([0-9]*)\\$", 1).toInt() - 1
+  }
+
  /**
   * Gets the flags of the nth conversion specifier.
   */
@ -437,6 +452,14 @@ class FormatLiteral extends Literal {
   */
  int getMinFieldWidth(int n) { result = this.getMinFieldWidthOpt(n).toInt() }

+  /**
+   * Gets the zero-based parameter number of the minimum field width of the nth
+   * conversion specifier, if it is implicit and uses a parameter field (such as `*1$`).
+   */
+  int getMinFieldWidthParameterFieldValue(int n) {
+    result = this.getMinFieldWidthOpt(n).regexpCapture("\\*([0-9]*)\\$", 1).toInt() - 1
+  }
+
  /**
   * Gets the precision of the nth conversion specifier (empty string if none is given).
   */
@ -467,6 +490,14 @@ class FormatLiteral extends Literal {
    else result = this.getPrecisionOpt(n).regexpCapture("\\.([0-9]*)", 1).toInt()
  }

+  /**
+   * Gets the zero-based parameter number of the precision of the nth conversion
+   * specifier, if it is implicit and uses a parameter field (such as `*1$`).
+   */
+  int getPrecisionParameterFieldValue(int n) {
+    result = this.getPrecisionOpt(n).regexpCapture("\\.\\*([0-9]*)\\$", 1).toInt() - 1
+  }
+
  /**
   * Gets the length flag of the nth conversion specifier.
   */
@ -784,19 +815,49 @@ class FormatLiteral extends Literal {
    )
  }

+  /**
+   * Holds if the nth conversion specifier of this format string (if `mode = 2`), it's
+   * minimum field width (if `mode = 0`) or it's precision (if `mode = 1`) requires a
+   * format argument.
+   *
+   * Most conversion specifiers require a format argument, whereas minimum field width
+   * and precision only require a format argument if they are present and a `*` was
+   * used for it's value in the format string.
+   */
+  private predicate hasFormatArgumentIndexFor(int n, int mode) {
+    mode = 0 and
+    this.hasImplicitMinFieldWidth(n)
+    or
+    mode = 1 and
+    this.hasImplicitPrecision(n)
+    or
+    mode = 2 and
+    exists(this.getConvSpecOffset(n)) and
+    not this.getConversionChar(n) = "m"
+  }
+
+  /**
+   * Gets the computed format argument index for the nth conversion specifier of this
+   * format string (if `mode = 2`), it's minimum field width (if `mode = 0`) or it's
+   * precision (if `mode = 1`).  Has no result if that element is not present.  Does
+   * not account for positional arguments (`$`).
+   */
+  int getFormatArgumentIndexFor(int n, int mode) {
+    hasFormatArgumentIndexFor(n, mode) and
+    (3 * n) + mode = rank[result + 1](int n2, int mode2 |
+        hasFormatArgumentIndexFor(n2, mode2)
+      |
+        (3 * n2) + mode2
+      )
+  }
+
  /**
   * Gets the number of arguments required by the nth conversion specifier
   * of this format string.
   */
  int getNumArgNeeded(int n) {
    exists(this.getConvSpecOffset(n)) and
-    not this.getConversionChar(n) = "%" and
-    exists(int n1, int n2, int n3 |
-      (if this.hasImplicitMinFieldWidth(n) then n1 = 1 else n1 = 0) and
-      (if this.hasImplicitPrecision(n) then n2 = 1 else n2 = 0) and
-      (if this.getConversionChar(n) = "m" then n3 = 0 else n3 = 1) and
-      result = n1 + n2 + n3
-    )
+    result = count(int mode | hasFormatArgumentIndexFor(n, mode))
  }

  /**
@ -808,7 +869,7 @@ class FormatLiteral extends Literal {
      // At least one conversion specifier has a parameter field, in which case,
      // they all should have.
      result = max(string s | this.getParameterField(_) = s + "$" | s.toInt())
-    else result = sum(int n, int toSum | toSum = this.getNumArgNeeded(n) | toSum)
+    else result = count(int n, int mode | hasFormatArgumentIndexFor(n, mode))
  }

  /**
--- a/Bugs/Format/WrongTypeFormatArguments/Linux_signed_chars/WrongTypeFormatArguments.expected
+++ b/Bugs/Format/WrongTypeFormatArguments/Linux_signed_chars/WrongTypeFormatArguments.expected
@ -16,6 +16,43 @@
 | printf1.h:114:18:114:18 | d | This argument should be of type 'long double' but is of type 'double' |
 | printf1.h:147:19:147:19 | i | This argument should be of type 'long long' but is of type 'int' |
 | printf1.h:148:19:148:20 | ui | This argument should be of type 'unsigned long long' but is of type 'unsigned int' |
+| printf1.h:160:18:160:18 | i | This argument should be of type 'char *' but is of type 'int' |
+| printf1.h:161:21:161:21 | s | This argument should be of type 'int' but is of type 'char *' |
+| printf1.h:167:17:167:17 | i | This argument should be of type 'char *' but is of type 'int' |
+| printf1.h:168:18:168:18 | i | This argument should be of type 'char *' but is of type 'int' |
+| printf1.h:169:19:169:19 | i | This argument should be of type 'char *' but is of type 'int' |
+| printf1.h:174:17:174:17 | s | This argument should be of type 'int' but is of type 'char *' |
+| printf1.h:175:18:175:18 | s | This argument should be of type 'int' but is of type 'char *' |
+| printf1.h:176:19:176:19 | s | This argument should be of type 'int' but is of type 'char *' |
+| printf1.h:180:17:180:17 | s | This argument should be of type 'int' but is of type 'char *' |
+| printf1.h:181:20:181:20 | i | This argument should be of type 'char *' but is of type 'int' |
+| printf1.h:183:18:183:18 | s | This argument should be of type 'int' but is of type 'char *' |
+| printf1.h:184:21:184:21 | i | This argument should be of type 'char *' but is of type 'int' |
+| printf1.h:186:19:186:19 | s | This argument should be of type 'int' but is of type 'char *' |
+| printf1.h:187:22:187:22 | i | This argument should be of type 'char *' but is of type 'int' |
+| printf1.h:189:19:189:19 | s | This argument should be of type 'int' but is of type 'char *' |
+| printf1.h:190:22:190:22 | i | This argument should be of type 'char *' but is of type 'int' |
+| printf1.h:192:19:192:19 | s | This argument should be of type 'int' but is of type 'char *' |
+| printf1.h:193:22:193:22 | s | This argument should be of type 'int' but is of type 'char *' |
+| printf1.h:194:25:194:25 | i | This argument should be of type 'char *' but is of type 'int' |
+| printf1.h:198:24:198:24 | s | This argument should be of type 'int' but is of type 'char *' |
+| printf1.h:199:21:199:21 | i | This argument should be of type 'char *' but is of type 'int' |
+| printf1.h:202:26:202:26 | s | This argument should be of type 'int' but is of type 'char *' |
+| printf1.h:203:23:203:23 | i | This argument should be of type 'char *' but is of type 'int' |
+| printf1.h:206:25:206:25 | s | This argument should be of type 'int' but is of type 'char *' |
+| printf1.h:207:22:207:22 | i | This argument should be of type 'char *' but is of type 'int' |
+| printf1.h:210:26:210:26 | s | This argument should be of type 'int' but is of type 'char *' |
+| printf1.h:211:23:211:23 | i | This argument should be of type 'char *' but is of type 'int' |
+| printf1.h:214:28:214:28 | s | This argument should be of type 'int' but is of type 'char *' |
+| printf1.h:215:28:215:28 | s | This argument should be of type 'int' but is of type 'char *' |
+| printf1.h:216:25:216:25 | i | This argument should be of type 'char *' but is of type 'int' |
+| printf1.h:221:18:221:18 | s | This argument should be of type 'int' but is of type 'char *' |
+| printf1.h:222:20:222:20 | s | This argument should be of type 'int' but is of type 'char *' |
+| printf1.h:225:23:225:23 | i | This argument should be of type 'char *' but is of type 'int' |
+| printf1.h:228:24:228:24 | i | This argument should be of type 'char *' but is of type 'int' |
+| printf1.h:231:25:231:25 | i | This argument should be of type 'char *' but is of type 'int' |
+| printf1.h:234:25:234:25 | i | This argument should be of type 'char *' but is of type 'int' |
+| printf1.h:235:22:235:22 | s | This argument should be of type 'int' but is of type 'char *' |
 | real_world.h:61:21:61:22 | & ... | This argument should be of type 'int *' but is of type 'short *' |
 | real_world.h:62:22:62:23 | & ... | This argument should be of type 'short *' but is of type 'int *' |
 | real_world.h:63:22:63:24 | & ... | This argument should be of type 'short *' but is of type 'unsigned int *' |
--- a/Bugs/Format/WrongTypeFormatArguments/Linux_signed_chars/printf1.h
+++ b/Bugs/Format/WrongTypeFormatArguments/Linux_signed_chars/printf1.h
@ -151,3 +151,86 @@ void fun4()
  printf("%qi\n", ll); // GOOD
  printf("%qu\n", ull); // GOOD
 }
+
+void complexFormatSymbols(int i, const char *s)
+{
+  // positional arguments
+  printf("%1$i", i, s); // GOOD
+  printf("%2$s", i, s); // GOOD
+  printf("%1$s", i, s); // BAD
+  printf("%2$i", i, s); // BAD
+
+  // width / precision
+  printf("%4i", i); // GOOD
+  printf("%.4i", i); // GOOD
+  printf("%4.4i", i); // GOOD
+  printf("%4s", i); // BAD
+  printf("%.4s", i); // BAD
+  printf("%4.4s", i); // BAD
+
+  printf("%4s", s); // GOOD
+  printf("%.4s", s); // GOOD
+  printf("%4.4s", s); // GOOD
+  printf("%4i", s); // BAD
+  printf("%.4i", s); // BAD
+  printf("%4.4i", s); // BAD
+
+  // variable width / precision
+  printf("%*s", i, s); // GOOD
+  printf("%*s", s, s); // BAD
+  printf("%*s", i, i); // BAD
+  printf("%.*s", i, s); // GOOD
+  printf("%.*s", s, s); // BAD
+  printf("%.*s", i, i); // BAD
+  printf("%*.4s", i, s); // GOOD
+  printf("%*.4s", s, s); // BAD
+  printf("%*.4s", i, i); // BAD
+  printf("%4.*s", i, s); // GOOD
+  printf("%4.*s", s, s); // BAD
+  printf("%4.*s", i, i); // BAD
+  printf("%*.*s", i, i, s); // GOOD
+  printf("%*.*s", s, i, s); // BAD
+  printf("%*.*s", i, s, s); // BAD
+  printf("%*.*s", i, i, i); // BAD
+
+  // positional arguments mixed with variable width / precision
+  printf("%2$*1$s", i, s); // GOOD
+  printf("%2$*2$s", i, s); // BAD
+  printf("%1$*1$s", i, s); // BAD
+
+  printf("%2$*1$.4s", i, s); // GOOD
+  printf("%2$*2$.4s", i, s); // BAD
+  printf("%1$*1$.4s", i, s); // BAD
+
+  printf("%2$.*1$s", i, s); // GOOD
+  printf("%2$.*2$s", i, s); // BAD
+  printf("%1$.*1$s", i, s); // BAD
+
+  printf("%2$4.*1$s", i, s); // GOOD
+  printf("%2$4.*2$s", i, s); // BAD
+  printf("%1$4.*1$s", i, s); // BAD
+
+  printf("%2$*1$.*1$s", i, s); // GOOD
+  printf("%2$*2$.*1$s", i, s); // BAD
+  printf("%2$*1$.*2$s", i, s); // BAD
+  printf("%1$*1$.*1$s", i, s); // BAD
+
+  // left justify flag
+  printf("%-4s", s); // GOOD
+  printf("%1$-4s", s); // GOOD
+  printf("%-4i", s); // BAD
+  printf("%1$-4i", s); // BAD
+
+  printf("%1$-4s", s, i); // GOOD
+  printf("%2$-4s", s, i); // BAD
+
+  printf("%1$-.4s", s, i); // GOOD
+  printf("%2$-.4s", s, i); // BAD
+
+  printf("%1$-4.4s", s, i); // GOOD
+  printf("%2$-4.4s", s, i); // BAD
+
+  printf("%1$-*2$s", s, i); // GOOD
+  printf("%2$-*2$s", s, i); // BAD
+  printf("%1$-*1$s", s, i); // BAD
+}