Merge pull request #696 from asgerf/asgerf/dot-separated-access-paths

Go: Switch to dot-separated access paths in summary specs
This commit is contained in:
Chris Smowton 2022-02-22 15:34:27 +00:00 коммит произвёл GitHub
Родитель 980c27423a cb38df5980
Коммит 106ee5b8a2
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
9 изменённых файлов: 186 добавлений и 118 удалений

Просмотреть файл

@ -136,7 +136,7 @@ build/testdb/go.dbscheme: ql/lib/upgrades/initial/go.dbscheme
.PHONY: sync-dataflow-libraries
sync-dataflow-libraries:
for f in DataFlowImpl.qll DataFlowImpl2.qll DataFlowImplCommon.qll DataFlowImplConsistency.qll tainttracking1/TaintTrackingImpl.qll tainttracking2/TaintTrackingImpl.qll FlowSummaryImpl.qll;\
for f in DataFlowImpl.qll DataFlowImpl2.qll DataFlowImplCommon.qll DataFlowImplConsistency.qll tainttracking1/TaintTrackingImpl.qll tainttracking2/TaintTrackingImpl.qll FlowSummaryImpl.qll AccessPathSyntax.qll;\
do\
curl -s -o ./ql/lib/semmle/go/dataflow/internal/$$f https://raw.githubusercontent.com/github/codeql/$(DATAFLOW_BRANCH)/java/ql/lib/semmle/code/java/dataflow/internal/$$f;\
done

Просмотреть файл

@ -64,6 +64,7 @@ private import go
private import internal.DataFlowPrivate
private import internal.FlowSummaryImpl::Private::External
private import internal.FlowSummaryImplSpecific
private import internal.AccessPathSyntax
private import FlowSummary
/**
@ -78,8 +79,8 @@ private class BuiltinModel extends SummaryModelCsv {
override predicate row(string row) {
row =
[
";;false;append;;;ArrayElement of Argument[0];ArrayElement of ReturnValue;value",
";;false;append;;;Argument[1];ArrayElement of ReturnValue;value"
";;false;append;;;Argument[0].ArrayElement;ReturnValue.ArrayElement;value",
";;false;append;;;Argument[1];ReturnValue.ArrayElement;value"
]
}
}
@ -295,7 +296,7 @@ module CsvValidation {
msg = "Unrecognized extra API graph element \"" + ext + "\" in " + pred + " model."
)
or
exists(string pred, string input, string part |
exists(string pred, AccessPath input, string part |
sinkModel(_, _, _, _, _, _, input, _) and pred = "sink"
or
summaryModel(_, _, _, _, _, _, input, _, _) and pred = "summary"
@ -305,7 +306,7 @@ module CsvValidation {
not part = "" and
not parseArg(part, _)
or
specSplit(input, part, _) and
part = input.getToken(_) and
parseParam(part, _)
) and
msg = "Unrecognized input specification \"" + part + "\" in " + pred + " model."
@ -403,8 +404,7 @@ predicate hasExternalSpecification(Function f) {
exists(SourceOrSinkElement e | f = e.asEntity() | sourceElement(e, _, _) or sinkElement(e, _, _))
}
private predicate parseField(string c, DataFlow::FieldContent f) {
specSplit(_, c, _) and
private predicate parseField(AccessPathToken c, DataFlow::FieldContent f) {
exists(string fieldRegex, string package, string className, string fieldName |
fieldRegex = "^Field\\[(.*)\\.([^.]+)\\.([^.]+)\\]$" and
package = c.regexpCapture(fieldRegex, 1) and
@ -425,8 +425,7 @@ class SyntheticField extends string {
Type getType() { result instanceof EmptyInterfaceType }
}
private predicate parseSynthField(string c, string f) {
specSplit(_, c, _) and
private predicate parseSynthField(AccessPathToken c, string f) {
c.regexpCapture("SyntheticField\\[([.a-zA-Z0-9]+)\\]", 1) = f
}

Просмотреть файл

@ -0,0 +1,79 @@
/**
* Module for parsing access paths from CSV models, both the identifying access path used
* by dynamic languages, and the input/output specifications for summary steps.
*
* This file is used by the shared data flow library and by the JavaScript libraries
* (which does not use the shared data flow libraries).
*/
/** Companion module to the `AccessPath` class. */
module AccessPath {
/** A string that should be parsed as an access path. */
abstract class Range extends string {
bindingset[this]
Range() { any() }
}
}
/** Gets the `n`th token on the access path as a string. */
private string getRawToken(AccessPath path, int n) {
// Avoid splitting by '.' since tokens may contain dots, e.g. `Field[foo.Bar.x]`.
// Instead use regexpFind to match valid tokens, and supplement with a final length
// check (in `AccessPath.hasSyntaxError`) to ensure all characters were included in a token.
result = path.regexpFind("\\w+(?:\\[[^\\]]*\\])?(?=\\.|$)", n, _)
}
/**
* A string that occurs as an access path (either identifying or input/output spec)
* which might be relevant for this database.
*/
class AccessPath extends string instanceof AccessPath::Range {
/** Holds if this string is not a syntactically valid access path. */
predicate hasSyntaxError() {
// If the lengths match, all characters must haven been included in a token
// or seen by the `.` lookahead pattern.
this != "" and
not this.length() = sum(int n | | getRawToken(this, n).length() + 1) - 1
}
/** Gets the `n`th token on the access path (if there are no syntax errors). */
AccessPathToken getToken(int n) {
result = getRawToken(this, n) and
not this.hasSyntaxError()
}
/** Gets the number of tokens on the path (if there are no syntax errors). */
int getNumToken() {
result = count(int n | exists(getRawToken(this, n))) and
not this.hasSyntaxError()
}
}
/**
* An access part token such as `Argument[1]` or `ReturnValue`, appearing in one or more access paths.
*/
class AccessPathToken extends string {
AccessPathToken() { this = getRawToken(any(AccessPath path), _) }
private string getPart(int part) {
result = this.regexpCapture("([^\\[]+)(?:\\[([^\\]]*)\\])?", part)
}
/** Gets the name of the token, such as `Member` from `Member[x]` */
string getName() { result = this.getPart(1) }
/**
* Gets the argument list, such as `1,2` from `Member[1,2]`,
* or has no result if there are no arguments.
*/
string getArgumentList() { result = this.getPart(2) }
/** Gets the `n`th argument to this token, such as `x` or `y` from `Member[x,y]`. */
string getArgument(int n) { result = this.getArgumentList().splitAt(",", n).trim() }
/** Gets an argument to this token, such as `x` or `y` from `Member[x,y]`. */
string getAnArgument() { result = this.getArgument(_) }
/** Gets the number of arguments to this token, such as 2 for `Member[x,y]` or zero for `ReturnValue`. */
int getNumArgument() { result = count(int n | exists(this.getArgument(n))) }
}

Просмотреть файл

@ -99,7 +99,7 @@ module Public {
exists(SummaryComponent head, SummaryComponentStack tail |
head = this.head() and
tail = this.tail() and
result = head + " of " + tail
result = tail + "." + head
)
or
exists(SummaryComponent c |
@ -164,7 +164,7 @@ module Public {
exists(SummaryComponent head, SummaryComponentStack tail |
head = stack.head() and
tail = stack.tail() and
result = getComponentCsv(head) + " of " + getComponentStackCsv(tail)
result = getComponentStackCsv(tail) + "." + getComponentCsv(head)
)
or
exists(SummaryComponent c |
@ -228,6 +228,7 @@ module Public {
*/
module Private {
private import Public
import AccessPathSyntax
newtype TSummaryComponent =
TContentSummaryComponent(Content c) or
@ -811,84 +812,60 @@ module Private {
sinkElement(_, spec, _)
}
/** Holds if the `n`th component of specification `s` is `c`. */
predicate specSplit(string s, string c, int n) { relevantSpec(s) and s.splitAt(" of ", n) = c }
/** Holds if specification `s` has length `len`. */
predicate specLength(string s, int len) { len = 1 + max(int n | specSplit(s, _, n)) }
/** Gets the last component of specification `s`. */
string specLast(string s) {
exists(int len |
specLength(s, len) and
specSplit(s, result, len - 1)
)
private class AccessPathRange extends AccessPath::Range {
AccessPathRange() { relevantSpec(this) }
}
/** Holds if specification component `c` parses as parameter `n`. */
predicate parseParam(string c, ArgumentPosition pos) {
specSplit(_, c, _) and
exists(string body |
body = c.regexpCapture("Parameter\\[([^\\]]*)\\]", 1) and
pos = parseParamBody(body)
)
predicate parseParam(AccessPathToken token, ArgumentPosition pos) {
token.getName() = "Parameter" and
pos = parseParamBody(token.getAnArgument())
}
/** Holds if specification component `c` parses as argument `n`. */
predicate parseArg(string c, ParameterPosition pos) {
specSplit(_, c, _) and
exists(string body |
body = c.regexpCapture("Argument\\[([^\\]]*)\\]", 1) and
pos = parseArgBody(body)
)
predicate parseArg(AccessPathToken token, ParameterPosition pos) {
token.getName() = "Argument" and
pos = parseArgBody(token.getAnArgument())
}
private SummaryComponent interpretComponent(string c) {
specSplit(_, c, _) and
(
private SummaryComponent interpretComponent(AccessPathToken token) {
exists(ParameterPosition pos |
parseArg(c, pos) and result = SummaryComponent::argument(pos)
parseArg(token, pos) and result = SummaryComponent::argument(pos)
)
or
exists(ArgumentPosition pos |
parseParam(c, pos) and result = SummaryComponent::parameter(pos)
parseParam(token, pos) and result = SummaryComponent::parameter(pos)
)
or
c = "ReturnValue" and result = SummaryComponent::return(getReturnValueKind())
token = "ReturnValue" and result = SummaryComponent::return(getReturnValueKind())
or
result = interpretComponentSpecific(c)
)
result = interpretComponentSpecific(token)
}
/**
* Holds if `spec` specifies summary component stack `stack`.
*/
predicate interpretSpec(string spec, SummaryComponentStack stack) {
interpretSpec(spec, 0, stack)
predicate interpretSpec(AccessPath spec, SummaryComponentStack stack) {
interpretSpec(spec, spec.getNumToken(), stack)
}
private predicate interpretSpec(string spec, int idx, SummaryComponentStack stack) {
exists(string c |
relevantSpec(spec) and
specLength(spec, idx + 1) and
specSplit(spec, c, idx) and
stack = SummaryComponentStack::singleton(interpretComponent(c))
)
/** Holds if the first `n` tokens of `spec` resolves to `stack`. */
private predicate interpretSpec(AccessPath spec, int n, SummaryComponentStack stack) {
n = 1 and
stack = SummaryComponentStack::singleton(interpretComponent(spec.getToken(0)))
or
exists(SummaryComponent head, SummaryComponentStack tail |
interpretSpec(spec, idx, head, tail) and
interpretSpec(spec, n, head, tail) and
stack = SummaryComponentStack::push(head, tail)
)
}
/** Holds if the first `n` tokens of `spec` resolves to `head` followed by `tail` */
private predicate interpretSpec(
string output, int idx, SummaryComponent head, SummaryComponentStack tail
AccessPath spec, int n, SummaryComponent head, SummaryComponentStack tail
) {
exists(string c |
interpretSpec(output, idx + 1, tail) and
specSplit(output, c, idx) and
head = interpretComponent(c)
)
interpretSpec(spec, n - 1, tail) and
head = interpretComponent(spec.getToken(n - 1))
}
private class MkStack extends RequiredSummaryComponentStack {
@ -903,7 +880,7 @@ module Private {
override predicate propagatesFlow(
SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue
) {
exists(string inSpec, string outSpec, string kind |
exists(AccessPath inSpec, AccessPath outSpec, string kind |
summaryElement(this, inSpec, outSpec, kind) and
interpretSpec(inSpec, input) and
interpretSpec(outSpec, output)
@ -916,50 +893,56 @@ module Private {
}
/** Holds if component `c` of specification `spec` cannot be parsed. */
predicate invalidSpecComponent(string spec, string c) {
specSplit(spec, c, _) and
predicate invalidSpecComponent(AccessPath spec, string c) {
c = spec.getToken(_) and
not exists(interpretComponent(c))
}
private predicate inputNeedsReference(string c) {
c = "Argument" or
parseArg(c, _) or
private predicate inputNeedsReference(AccessPathToken c) {
c.getName() = "Argument" or
inputNeedsReferenceSpecific(c)
}
private predicate outputNeedsReference(string c) {
c = "Argument" or
parseArg(c, _) or
c = "ReturnValue" or
private predicate outputNeedsReference(AccessPathToken c) {
c.getName() = ["Argument", "ReturnValue"] or
outputNeedsReferenceSpecific(c)
}
private predicate sourceElementRef(InterpretNode ref, string output, string kind) {
private predicate sourceElementRef(InterpretNode ref, AccessPath output, string kind) {
exists(SourceOrSinkElement e |
sourceElement(e, output, kind) and
if outputNeedsReference(specLast(output))
if outputNeedsReference(output.getToken(0))
then e = ref.getCallTarget()
else e = ref.asElement()
)
}
private predicate sinkElementRef(InterpretNode ref, string input, string kind) {
private predicate sinkElementRef(InterpretNode ref, AccessPath input, string kind) {
exists(SourceOrSinkElement e |
sinkElement(e, input, kind) and
if inputNeedsReference(specLast(input))
if inputNeedsReference(input.getToken(0))
then e = ref.getCallTarget()
else e = ref.asElement()
)
}
private predicate interpretOutput(string output, int idx, InterpretNode ref, InterpretNode node) {
/** Holds if the first `n` tokens of `output` resolve to the given interpretation. */
private predicate interpretOutput(
AccessPath output, int n, InterpretNode ref, InterpretNode node
) {
sourceElementRef(ref, output, _) and
specLength(output, idx) and
node = ref
n = 0 and
(
if output = ""
then
// Allow language-specific interpretation of the empty access path
interpretOutputSpecific("", ref, node)
else node = ref
)
or
exists(InterpretNode mid, string c |
interpretOutput(output, idx + 1, ref, mid) and
specSplit(output, c, idx)
exists(InterpretNode mid, AccessPathToken c |
interpretOutput(output, n - 1, ref, mid) and
c = output.getToken(n - 1)
|
exists(ArgumentPosition apos, ParameterPosition ppos |
node.asNode().(PostUpdateNode).getPreUpdateNode().(ArgNode).argumentOf(mid.asCall(), apos) and
@ -982,14 +965,21 @@ module Private {
)
}
private predicate interpretInput(string input, int idx, InterpretNode ref, InterpretNode node) {
/** Holds if the first `n` tokens of `input` resolve to the given interpretation. */
private predicate interpretInput(AccessPath input, int n, InterpretNode ref, InterpretNode node) {
sinkElementRef(ref, input, _) and
specLength(input, idx) and
node = ref
n = 0 and
(
if input = ""
then
// Allow language-specific interpretation of the empty access path
interpretInputSpecific("", ref, node)
else node = ref
)
or
exists(InterpretNode mid, string c |
interpretInput(input, idx + 1, ref, mid) and
specSplit(input, c, idx)
exists(InterpretNode mid, AccessPathToken c |
interpretInput(input, n - 1, ref, mid) and
c = input.getToken(n - 1)
|
exists(ArgumentPosition apos, ParameterPosition ppos |
node.asNode().(ArgNode).argumentOf(mid.asCall(), apos) and
@ -1014,9 +1004,9 @@ module Private {
* model.
*/
predicate isSourceNode(InterpretNode node, string kind) {
exists(InterpretNode ref, string output |
exists(InterpretNode ref, AccessPath output |
sourceElementRef(ref, output, kind) and
interpretOutput(output, 0, ref, node)
interpretOutput(output, output.getNumToken(), ref, node)
)
}
@ -1025,9 +1015,9 @@ module Private {
* model.
*/
predicate isSinkNode(InterpretNode node, string kind) {
exists(InterpretNode ref, string input |
exists(InterpretNode ref, AccessPath input |
sinkElementRef(ref, input, kind) and
interpretInput(input, 0, ref, node)
interpretInput(input, input.getNumToken(), ref, node)
)
}
}

Просмотреть файл

@ -272,12 +272,12 @@ predicate interpretInputSpecific(string c, InterpretNode mid, InterpretNode n) {
}
/** Holds if specification component `c` parses as return value `n`. */
predicate parseReturn(string c, int n) {
External::specSplit(_, c, _) and
predicate parseReturn(AccessPathToken c, int n) {
(
c = "ReturnValue" and n = 0
or
n = parseConstantOrRange(c.regexpCapture("ReturnValue\\[([^\\]]+)\\]", 1))
c.getName() = "ReturnValue" and
n = parseConstantOrRange(c.getAnArgument())
)
}

Просмотреть файл

@ -10,7 +10,7 @@ private class FlowSources extends SourceModelCsv {
row =
[
"net/http;Request;true;Cookie;;;ReturnValue[0];remote",
"net/http;Request;true;Cookies;;;ArrayElement of ReturnValue;remote",
"net/http;Request;true;Cookies;;;ReturnValue.ArrayElement;remote",
"net/http;Request;true;FormFile;;;ReturnValue[0..1];remote",
"net/http;Request;true;FormValue;;;ReturnValue;remote",
"net/http;Request;true;MultipartReader;;;ReturnValue[0];remote",

Просмотреть файл

@ -21,19 +21,19 @@ class SummaryModelTest extends SummaryModelCsv {
"github.com/nonexistent/test;T;false;StepQualRes;;;Argument[-1];ReturnValue;taint",
"github.com/nonexistent/test;T;false;StepQualArg;;;Argument[-1];Argument[0];taint",
"github.com/nonexistent/test;;false;StepArgResNoQual;;;Argument[0];ReturnValue;taint",
"github.com/nonexistent/test;;false;StepArgResArrayContent;;;Argument[0];ArrayElement of ReturnValue;taint",
"github.com/nonexistent/test;;false;StepArgArrayContentRes;;;ArrayElement of Argument[0];ReturnValue;taint",
"github.com/nonexistent/test;;false;StepArgResCollectionContent;;;Argument[0];Element of ReturnValue;taint",
"github.com/nonexistent/test;;false;StepArgCollectionContentRes;;;Element of Argument[0];ReturnValue;taint",
"github.com/nonexistent/test;;false;StepArgResMapKeyContent;;;Argument[0];MapKey of ReturnValue;taint",
"github.com/nonexistent/test;;false;StepArgMapKeyContentRes;;;MapKey of Argument[0];ReturnValue;taint",
"github.com/nonexistent/test;;false;StepArgResMapValueContent;;;Argument[0];MapValue of ReturnValue;taint",
"github.com/nonexistent/test;;false;StepArgMapValueContentRes;;;MapValue of Argument[0];ReturnValue;taint",
"github.com/nonexistent/test;;false;GetElement;;;Element of Argument[0];ReturnValue;value",
"github.com/nonexistent/test;;false;GetMapKey;;;MapKey of Argument[0];ReturnValue;value",
"github.com/nonexistent/test;;false;SetElement;;;Argument[0];Element of ReturnValue;value",
"github.com/nonexistent/test;C;false;Get;;;Field[github.com/nonexistent/test.C.F] of Argument[-1];ReturnValue;value",
"github.com/nonexistent/test;C;false;Set;;;Argument[0];Field[github.com/nonexistent/test.C.F] of Argument[-1];value",
"github.com/nonexistent/test;;false;StepArgResArrayContent;;;Argument[0];ReturnValue.ArrayElement;taint",
"github.com/nonexistent/test;;false;StepArgArrayContentRes;;;Argument[0].ArrayElement;ReturnValue;taint",
"github.com/nonexistent/test;;false;StepArgResCollectionContent;;;Argument[0];ReturnValue.Element;taint",
"github.com/nonexistent/test;;false;StepArgCollectionContentRes;;;Argument[0].Element;ReturnValue;taint",
"github.com/nonexistent/test;;false;StepArgResMapKeyContent;;;Argument[0];ReturnValue.MapKey;taint",
"github.com/nonexistent/test;;false;StepArgMapKeyContentRes;;;Argument[0].MapKey;ReturnValue;taint",
"github.com/nonexistent/test;;false;StepArgResMapValueContent;;;Argument[0];ReturnValue.MapValue;taint",
"github.com/nonexistent/test;;false;StepArgMapValueContentRes;;;Argument[0].MapValue;ReturnValue;taint",
"github.com/nonexistent/test;;false;GetElement;;;Argument[0].Element;ReturnValue;value",
"github.com/nonexistent/test;;false;GetMapKey;;;Argument[0].MapKey;ReturnValue;value",
"github.com/nonexistent/test;;false;SetElement;;;Argument[0];ReturnValue.Element;value",
"github.com/nonexistent/test;C;false;Get;;;Argument[-1].Field[github.com/nonexistent/test.C.F];ReturnValue;value",
"github.com/nonexistent/test;C;false;Set;;;Argument[0];Argument[-1].Field[github.com/nonexistent/test.C.F];value",
]
}
}

Просмотреть файл

@ -16,8 +16,8 @@ class SummaryModelTest extends SummaryModelCsv {
"github.com/nonexistent/test;T;false;StepQualRes;;;Argument[-1];ReturnValue;taint",
"github.com/nonexistent/test;T;false;StepQualArg;;;Argument[-1];Argument[0];taint",
"github.com/nonexistent/test;;false;StepArgResNoQual;;;Argument[0];ReturnValue;taint",
"github.com/nonexistent/test;;false;StepArgResContent;;;Argument[0];ArrayElement of ReturnValue;taint",
"github.com/nonexistent/test;;false;StepArgContentRes;;;ArrayElement of Argument[0];ReturnValue;taint"
"github.com/nonexistent/test;;false;StepArgResContent;;;Argument[0];ReturnValue.ArrayElement;taint",
"github.com/nonexistent/test;;false;StepArgContentRes;;;Argument[0].ArrayElement;ReturnValue;taint"
]
}
}

Просмотреть файл

@ -9,10 +9,10 @@ class SummaryModelTest extends SummaryModelCsv {
[
//`namespace; type; subtypes; name; signature; ext; input; output; kind`
"github.com/nonexistent/test;;false;FunctionWithParameter;;;Argument[0];ReturnValue;value",
"github.com/nonexistent/test;;false;FunctionWithSliceParameter;;;ArrayElement of Argument[0];ReturnValue;value",
"github.com/nonexistent/test;;false;FunctionWithVarArgsParameter;;;ArrayElement of Argument[0];ReturnValue;value",
"github.com/nonexistent/test;;false;FunctionWithSliceOfStructsParameter;;;Field[github.com/nonexistent/test.A.Field] of ArrayElement of Argument[0];ReturnValue;value",
"github.com/nonexistent/test;;false;FunctionWithVarArgsOfStructsParameter;;;Field[github.com/nonexistent/test.A.Field] of ArrayElement of Argument[0];ReturnValue;value"
"github.com/nonexistent/test;;false;FunctionWithSliceParameter;;;Argument[0].ArrayElement;ReturnValue;value",
"github.com/nonexistent/test;;false;FunctionWithVarArgsParameter;;;Argument[0].ArrayElement;ReturnValue;value",
"github.com/nonexistent/test;;false;FunctionWithSliceOfStructsParameter;;;Argument[0].ArrayElement.Field[github.com/nonexistent/test.A.Field];ReturnValue;value",
"github.com/nonexistent/test;;false;FunctionWithVarArgsOfStructsParameter;;;Argument[0].ArrayElement.Field[github.com/nonexistent/test.A.Field];ReturnValue;value"
]
}
}