diff --git a/config/identical-files.json b/config/identical-files.json index 53b286fb8a1..77bee6b5097 100644 --- a/config/identical-files.json +++ b/config/identical-files.json @@ -53,14 +53,6 @@ "ruby/ql/lib/codeql/ruby/dataflow/internal/tainttracking1/TaintTrackingImpl.qll", "swift/ql/lib/codeql/swift/dataflow/internal/tainttracking1/TaintTrackingImpl.qll" ], - "DataFlow Java/C#/Go/Ruby/Python/Swift Flow Summaries": [ - "java/ql/lib/semmle/code/java/dataflow/internal/FlowSummaryImpl.qll", - "csharp/ql/lib/semmle/code/csharp/dataflow/internal/FlowSummaryImpl.qll", - "go/ql/lib/semmle/go/dataflow/internal/FlowSummaryImpl.qll", - "ruby/ql/lib/codeql/ruby/dataflow/internal/FlowSummaryImpl.qll", - "python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImpl.qll", - "swift/ql/lib/codeql/swift/dataflow/internal/FlowSummaryImpl.qll" - ], "SsaReadPosition Java/C#": [ "java/ql/lib/semmle/code/java/dataflow/internal/rangeanalysis/SsaReadPositionCommon.qll", "csharp/ql/lib/semmle/code/csharp/dataflow/internal/rangeanalysis/SsaReadPositionCommon.qll" @@ -466,15 +458,6 @@ "python/ql/lib/semmle/python/dataflow/new/internal/SummaryTypeTracker.qll", "ruby/ql/lib/codeql/ruby/typetracking/internal/SummaryTypeTracker.qll" ], - "AccessPathSyntax": [ - "csharp/ql/lib/semmle/code/csharp/dataflow/internal/AccessPathSyntax.qll", - "go/ql/lib/semmle/go/dataflow/internal/AccessPathSyntax.qll", - "java/ql/lib/semmle/code/java/dataflow/internal/AccessPathSyntax.qll", - "javascript/ql/lib/semmle/javascript/frameworks/data/internal/AccessPathSyntax.qll", - "ruby/ql/lib/codeql/ruby/dataflow/internal/AccessPathSyntax.qll", - "python/ql/lib/semmle/python/dataflow/new/internal/AccessPathSyntax.qll", - "swift/ql/lib/codeql/swift/dataflow/internal/AccessPathSyntax.qll" - ], "IncompleteUrlSubstringSanitization": [ "javascript/ql/src/Security/CWE-020/IncompleteUrlSubstringSanitization.qll", "ruby/ql/src/queries/security/cwe-020/IncompleteUrlSubstringSanitization.qll" diff --git a/csharp/ql/lib/semmle/code/csharp/dataflow/FlowSummary.qll b/csharp/ql/lib/semmle/code/csharp/dataflow/FlowSummary.qll index 70efa618c9a..18e4025f9e6 100644 --- a/csharp/ql/lib/semmle/code/csharp/dataflow/FlowSummary.qll +++ b/csharp/ql/lib/semmle/code/csharp/dataflow/FlowSummary.qll @@ -1,199 +1,23 @@ /** Provides classes and predicates for defining flow summaries. */ import csharp -private import dotnet private import internal.FlowSummaryImpl as Impl private import internal.DataFlowDispatch as DataFlowDispatch -private import Impl::Public::SummaryComponent as SummaryComponentInternal -class ParameterPosition = DataFlowDispatch::ParameterPosition; +deprecated class ParameterPosition = DataFlowDispatch::ParameterPosition; -class ArgumentPosition = DataFlowDispatch::ArgumentPosition; +deprecated class ArgumentPosition = DataFlowDispatch::ArgumentPosition; -// import all instances below -private module Summaries { - private import semmle.code.csharp.frameworks.EntityFramework -} +deprecated class SummaryComponent = Impl::Private::SummaryComponent; -class SummaryComponent = Impl::Public::SummaryComponent; +deprecated module SummaryComponent = Impl::Private::SummaryComponent; -/** Provides predicates for constructing summary components. */ -module SummaryComponent { - predicate content = SummaryComponentInternal::content/1; +deprecated class SummaryComponentStack = Impl::Private::SummaryComponentStack; - /** Gets a summary component for parameter `i`. */ - SummaryComponent parameter(int i) { - exists(ArgumentPosition pos | - result = SummaryComponentInternal::parameter(pos) and - i = pos.getPosition() - ) - } +deprecated module SummaryComponentStack = Impl::Private::SummaryComponentStack; - /** Gets a summary component for argument `i`. */ - SummaryComponent argument(int i) { - exists(ParameterPosition pos | - result = SummaryComponentInternal::argument(pos) and - i = pos.getPosition() - ) - } - - predicate return = SummaryComponentInternal::return/1; - - /** Gets a summary component that represents a qualifier. */ - SummaryComponent qualifier() { - exists(ParameterPosition pos | - result = SummaryComponentInternal::argument(pos) and - pos.isThisParameter() - ) - } - - /** Gets a summary component that represents an element in a collection. */ - SummaryComponent element() { result = content(any(DataFlow::ElementContent c)) } - - /** Gets a summary component for property `p`. */ - SummaryComponent property(Property p) { - result = content(any(DataFlow::PropertyContent c | c.getProperty() = p.getUnboundDeclaration())) - } - - /** Gets a summary component for field `f`. */ - SummaryComponent field(Field f) { - result = content(any(DataFlow::FieldContent c | c.getField() = f.getUnboundDeclaration())) - } - - /** Gets a summary component that represents the return value of a call. */ - SummaryComponent return() { result = return(any(DataFlowDispatch::NormalReturnKind rk)) } - - predicate syntheticGlobal = SummaryComponentInternal::syntheticGlobal/1; - - class SyntheticGlobal = SummaryComponentInternal::SyntheticGlobal; -} - -class SummaryComponentStack = Impl::Public::SummaryComponentStack; - -/** Provides predicates for constructing stacks of summary components. */ -module SummaryComponentStack { - private import Impl::Public::SummaryComponentStack as SummaryComponentStackInternal - - predicate singleton = SummaryComponentStackInternal::singleton/1; - - predicate push = SummaryComponentStackInternal::push/2; - - /** Gets a singleton stack for argument `i`. */ - SummaryComponentStack argument(int i) { result = singleton(SummaryComponent::argument(i)) } - - predicate return = SummaryComponentStackInternal::return/1; - - /** Gets a singleton stack representing a qualifier. */ - SummaryComponentStack qualifier() { result = singleton(SummaryComponent::qualifier()) } - - /** Gets a stack representing an element of `container`. */ - SummaryComponentStack elementOf(SummaryComponentStack container) { - result = push(SummaryComponent::element(), container) - } - - /** Gets a stack representing a property `p` of `object`. */ - SummaryComponentStack propertyOf(Property p, SummaryComponentStack object) { - result = push(SummaryComponent::property(p), object) - } - - /** Gets a stack representing a field `f` of `object`. */ - SummaryComponentStack fieldOf(Field f, SummaryComponentStack object) { - result = push(SummaryComponent::field(f), object) - } - - /** Gets a singleton stack representing the return value of a call. */ - SummaryComponentStack return() { result = singleton(SummaryComponent::return()) } - - /** Gets a singleton stack representing a synthetic global with name `name`. */ - SummaryComponentStack syntheticGlobal(string synthetic) { - result = singleton(SummaryComponent::syntheticGlobal(synthetic)) - } - - /** - * DEPRECATED: Use the member predicate `getMadRepresentation` instead. - * - * Gets a textual representation of this stack used for flow summaries. - */ - deprecated string getComponentStack(SummaryComponentStack s) { result = s.getMadRepresentation() } -} +deprecated class RequiredSummaryComponentStack = Impl::Private::RequiredSummaryComponentStack; class SummarizedCallable = Impl::Public::SummarizedCallable; -private predicate recordConstructorFlow(Constructor c, int i, Property p) { - c = any(RecordType r).getAMember() and - exists(string name | - c.getParameter(i).getName() = name and - c.getDeclaringType().getAMember(name) = p - ) -} - -private class RecordConstructorFlow extends SummarizedCallable { - RecordConstructorFlow() { recordConstructorFlow(this, _, _) } - - override predicate propagatesFlow( - SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue - ) { - exists(int i, Property p | - recordConstructorFlow(this, i, p) and - input = SummaryComponentStack::argument(i) and - output = SummaryComponentStack::propertyOf(p, SummaryComponentStack::return()) and - preservesValue = true - ) - } -} - -class RequiredSummaryComponentStack = Impl::Public::RequiredSummaryComponentStack; - -private class RecordConstructorFlowRequiredSummaryComponentStack extends RequiredSummaryComponentStack -{ - override predicate required(SummaryComponent head, SummaryComponentStack tail) { - exists(Property p | - recordConstructorFlow(_, _, p) and - head = SummaryComponent::property(p) and - tail = SummaryComponentStack::return() - ) - } -} - class Provenance = Impl::Public::Provenance; - -private import semmle.code.csharp.frameworks.system.linq.Expressions - -private SummaryComponent delegateSelf() { - exists(ArgumentPosition pos | - result = SummaryComponentInternal::parameter(pos) and - pos.isDelegateSelf() - ) -} - -private predicate mayInvokeCallback(Callable c, int n) { - c.getParameter(n).getType() instanceof SystemLinqExpressions::DelegateExtType and - not c.hasBody() and - (if c instanceof Accessor then not c.fromSource() else any()) -} - -private class SummarizedCallableWithCallback extends SummarizedCallable { - private int pos; - - SummarizedCallableWithCallback() { mayInvokeCallback(this, pos) } - - override predicate propagatesFlow( - SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue - ) { - input = SummaryComponentStack::argument(pos) and - output = SummaryComponentStack::push(delegateSelf(), input) and - preservesValue = true - } - - override predicate hasProvenance(Provenance provenance) { provenance = "hq-generated" } -} - -private class RequiredComponentStackForCallback extends RequiredSummaryComponentStack { - override predicate required(SummaryComponent head, SummaryComponentStack tail) { - exists(int pos | - mayInvokeCallback(_, pos) and - head = delegateSelf() and - tail = SummaryComponentStack::argument(pos) - ) - } -} diff --git a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/AccessPathSyntax.qll b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/AccessPathSyntax.qll deleted file mode 100644 index 0c3dc8427b2..00000000000 --- a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/AccessPathSyntax.qll +++ /dev/null @@ -1,182 +0,0 @@ -/** - * Module for parsing access paths from MaD models, both the identifying access path used - * by dynamic languages, and the input/output specifications for summary steps. - * - * This file is used by the shared data flow library and by the JavaScript libraries - * (which does not use the shared data flow libraries). - */ - -/** - * Convenience-predicate for extracting two capture groups at once. - */ -bindingset[input, regexp] -private predicate regexpCaptureTwo(string input, string regexp, string capture1, string capture2) { - capture1 = input.regexpCapture(regexp, 1) and - capture2 = input.regexpCapture(regexp, 2) -} - -/** Companion module to the `AccessPath` class. */ -module AccessPath { - /** A string that should be parsed as an access path. */ - abstract class Range extends string { - bindingset[this] - Range() { any() } - } - - /** - * Parses an integer constant `n` or interval `n1..n2` (inclusive) and gets the value - * of the constant or any value contained in the interval. - */ - bindingset[arg] - int parseInt(string arg) { - result = arg.toInt() - or - // Match "n1..n2" - exists(string lo, string hi | - regexpCaptureTwo(arg, "(-?\\d+)\\.\\.(-?\\d+)", lo, hi) and - result = [lo.toInt() .. hi.toInt()] - ) - } - - /** - * Parses a lower-bounded interval `n..` and gets the lower bound. - */ - bindingset[arg] - int parseLowerBound(string arg) { result = arg.regexpCapture("(-?\\d+)\\.\\.", 1).toInt() } - - /** - * Parses an integer constant or interval (bounded or unbounded) that explicitly - * references the arity, such as `N-1` or `N-3..N-1`. - * - * Note that expressions of form `N-x` will never resolve to a negative index, - * even if `N` is zero (it will have no result in that case). - */ - bindingset[arg, arity] - private int parseIntWithExplicitArity(string arg, int arity) { - result >= 0 and // do not allow N-1 to resolve to a negative index - exists(string lo | - // N-x - lo = arg.regexpCapture("N-(\\d+)", 1) and - result = arity - lo.toInt() - or - // N-x.. - lo = arg.regexpCapture("N-(\\d+)\\.\\.", 1) and - result = [arity - lo.toInt(), arity - 1] - ) - or - exists(string lo, string hi | - // x..N-y - regexpCaptureTwo(arg, "(-?\\d+)\\.\\.N-(\\d+)", lo, hi) and - result = [lo.toInt() .. arity - hi.toInt()] - or - // N-x..N-y - regexpCaptureTwo(arg, "N-(\\d+)\\.\\.N-(\\d+)", lo, hi) and - result = [arity - lo.toInt() .. arity - hi.toInt()] and - result >= 0 - or - // N-x..y - regexpCaptureTwo(arg, "N-(\\d+)\\.\\.(\\d+)", lo, hi) and - result = [arity - lo.toInt() .. hi.toInt()] and - result >= 0 - ) - } - - /** - * Parses an integer constant or interval (bounded or unbounded) and gets any - * of the integers contained within (of which there may be infinitely many). - * - * Has no result for arguments involving an explicit arity, such as `N-1`. - */ - bindingset[arg, result] - int parseIntUnbounded(string arg) { - result = parseInt(arg) - or - result >= parseLowerBound(arg) - } - - /** - * Parses an integer constant or interval (bounded or unbounded) that - * may reference the arity of a call, such as `N-1` or `N-3..N-1`. - * - * Note that expressions of form `N-x` will never resolve to a negative index, - * even if `N` is zero (it will have no result in that case). - */ - bindingset[arg, arity] - int parseIntWithArity(string arg, int arity) { - result = parseInt(arg) - or - result in [parseLowerBound(arg) .. arity - 1] - or - result = parseIntWithExplicitArity(arg, arity) - } -} - -/** Gets the `n`th token on the access path as a string. */ -private string getRawToken(AccessPath path, int n) { - // Avoid splitting by '.' since tokens may contain dots, e.g. `Field[foo.Bar.x]`. - // Instead use regexpFind to match valid tokens, and supplement with a final length - // check (in `AccessPath.hasSyntaxError`) to ensure all characters were included in a token. - result = path.regexpFind("\\w+(?:\\[[^\\]]*\\])?(?=\\.|$)", n, _) -} - -/** - * A string that occurs as an access path (either identifying or input/output spec) - * which might be relevant for this database. - */ -class AccessPath extends string instanceof AccessPath::Range { - /** Holds if this string is not a syntactically valid access path. */ - predicate hasSyntaxError() { - // If the lengths match, all characters must haven been included in a token - // or seen by the `.` lookahead pattern. - this != "" and - not this.length() = sum(int n | | getRawToken(this, n).length() + 1) - 1 - } - - /** Gets the `n`th token on the access path (if there are no syntax errors). */ - AccessPathToken getToken(int n) { - result = getRawToken(this, n) and - not this.hasSyntaxError() - } - - /** Gets the number of tokens on the path (if there are no syntax errors). */ - int getNumToken() { - result = count(int n | exists(getRawToken(this, n))) and - not this.hasSyntaxError() - } -} - -/** - * An access part token such as `Argument[1]` or `ReturnValue`, appearing in one or more access paths. - */ -class AccessPathToken extends string { - AccessPathToken() { this = getRawToken(_, _) } - - private string getPart(int part) { - result = this.regexpCapture("([^\\[]+)(?:\\[([^\\]]*)\\])?", part) - } - - /** Gets the name of the token, such as `Member` from `Member[x]` */ - string getName() { result = this.getPart(1) } - - /** - * Gets the argument list, such as `1,2` from `Member[1,2]`, - * or has no result if there are no arguments. - */ - string getArgumentList() { result = this.getPart(2) } - - /** Gets the `n`th argument to this token, such as `x` or `y` from `Member[x,y]`. */ - string getArgument(int n) { result = this.getArgumentList().splitAt(",", n).trim() } - - /** Gets the `n`th argument to this `name` token, such as `x` or `y` from `Member[x,y]`. */ - pragma[nomagic] - string getArgument(string name, int n) { name = this.getName() and result = this.getArgument(n) } - - /** Gets an argument to this token, such as `x` or `y` from `Member[x,y]`. */ - string getAnArgument() { result = this.getArgument(_) } - - /** Gets an argument to this `name` token, such as `x` or `y` from `Member[x,y]`. */ - string getAnArgument(string name) { result = this.getArgument(name, _) } - - /** Gets the number of arguments to this token, such as 2 for `Member[x,y]` or zero for `ReturnValue`. */ - int getNumArgument() { result = count(int n | exists(this.getArgument(n))) } -} diff --git a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowDispatch.qll b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowDispatch.qll index a06f9a183c2..1795cdcb432 100644 --- a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowDispatch.qll +++ b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowDispatch.qll @@ -116,9 +116,7 @@ private module Cached { // No need to include calls that are compiled from source not call.getImplementation().getMethod().compiledFromSource() } or - TSummaryCall( - FlowSummaryImpl::Public::SummarizedCallable c, FlowSummaryImpl::Private::SummaryNode receiver - ) { + TSummaryCall(FlowSummary::SummarizedCallable c, FlowSummaryImpl::Private::SummaryNode receiver) { FlowSummaryImpl::Private::summaryCallbackRange(c, receiver) } @@ -446,7 +444,7 @@ class CilDataFlowCall extends DataFlowCall, TCilCall { * the method `Select`. */ class SummaryCall extends DelegateDataFlowCall, TSummaryCall { - private FlowSummaryImpl::Public::SummarizedCallable c; + private FlowSummary::SummarizedCallable c; private FlowSummaryImpl::Private::SummaryNode receiver; SummaryCall() { this = TSummaryCall(c, receiver) } diff --git a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowPrivate.qll b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowPrivate.qll index 37b493e001f..60cc685bcea 100644 --- a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowPrivate.qll +++ b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowPrivate.qll @@ -676,11 +676,11 @@ private predicate fieldOrPropertyStore(Expr e, Content c, Expr src, Expr q, bool f instanceof InstanceFieldOrProperty or exists( - FlowSummaryImpl::Public::SummarizedCallable sc, - FlowSummaryImpl::Public::SummaryComponentStack input + FlowSummaryImpl::Private::SummarizedCallableImpl sc, + FlowSummaryImpl::Private::SummaryComponentStack input | sc.propagatesFlow(input, _, _) and - input.contains(FlowSummary::SummaryComponent::content(f.getContent())) + input.contains(FlowSummaryImpl::Private::SummaryComponent::content(f.getContent())) ) ) | @@ -1393,11 +1393,11 @@ private module ArgumentNodes { } private class SummaryArgumentNode extends FlowSummaryNode, ArgumentNodeImpl { - private DataFlowCall call_; + private SummaryCall call_; private ArgumentPosition pos_; SummaryArgumentNode() { - FlowSummaryImpl::Private::summaryArgumentNode(call_, this.getSummaryNode(), pos_) + FlowSummaryImpl::Private::summaryArgumentNode(call_.getReceiver(), this.getSummaryNode(), pos_) } override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) { @@ -1683,11 +1683,11 @@ private module OutNodes { } private class SummaryOutNode extends FlowSummaryNode, OutNode { - private DataFlowCall call; + private SummaryCall call; private ReturnKind kind_; SummaryOutNode() { - FlowSummaryImpl::Private::summaryOutNode(call, this.getSummaryNode(), kind_) + FlowSummaryImpl::Private::summaryOutNode(call.getReceiver(), this.getSummaryNode(), kind_) } override DataFlowCall getCall(ReturnKind kind) { result = call and kind = kind_ } @@ -1700,7 +1700,7 @@ import OutNodes class FlowSummaryNode extends NodeImpl, TFlowSummaryNode { FlowSummaryImpl::Private::SummaryNode getSummaryNode() { this = TFlowSummaryNode(result) } - FlowSummaryImpl::Public::SummarizedCallable getSummarizedCallable() { + FlowSummary::SummarizedCallable getSummarizedCallable() { result = this.getSummaryNode().getSummarizedCallable() } @@ -2424,7 +2424,10 @@ predicate additionalLambdaFlowStep(Node nodeFrom, Node nodeTo, boolean preserves * by default as a heuristic. */ predicate allowParameterReturnInSelf(ParameterNode p) { - FlowSummaryImpl::Private::summaryAllowParameterReturnInSelf(p) + exists(DataFlowCallable c, ParameterPosition pos | + parameterNode(p, c, pos) and + FlowSummaryImpl::Private::summaryAllowParameterReturnInSelf(c.asSummarizedCallable(), pos) + ) } /** An approximated `Content`. */ diff --git a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/ExternalFlow.qll b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/ExternalFlow.qll index 122567111cd..90d8b6b0ecd 100644 --- a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/ExternalFlow.qll +++ b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/ExternalFlow.qll @@ -88,13 +88,13 @@ import csharp import ExternalFlowExtensions -private import AccessPathSyntax private import DataFlowDispatch private import DataFlowPrivate private import DataFlowPublic +private import FlowSummaryImpl private import FlowSummaryImpl::Public +private import FlowSummaryImpl::Private private import FlowSummaryImpl::Private::External -private import FlowSummaryImplSpecific private import semmle.code.csharp.commons.QualifiedName private import codeql.mad.ModelValidation as SharedModelVal @@ -154,6 +154,21 @@ predicate modelCoverage(string namespace, int namespaces, string kind, string pa /** Provides a query predicate to check the MaD models for validation errors. */ module ModelValidation { + private import codeql.dataflow.internal.AccessPathSyntax as AccessPathSyntax + + private predicate getRelevantAccessPath(string path) { + summaryModel(_, _, _, _, _, _, path, _, _, _) or + summaryModel(_, _, _, _, _, _, _, path, _, _) or + sinkModel(_, _, _, _, _, _, path, _, _) or + sourceModel(_, _, _, _, _, _, path, _, _) + } + + private module MkAccessPath = AccessPathSyntax::AccessPath; + + class AccessPath = MkAccessPath::AccessPath; + + class AccessPathToken = MkAccessPath::AccessPathToken; + private string getInvalidModelInput() { exists(string pred, AccessPath input, AccessPathToken part | sinkModel(_, _, _, _, _, _, input, _, _) and pred = "sink" @@ -380,14 +395,14 @@ Declaration interpretElement( * A callable where there exists a MaD sink model that applies to it. */ class SinkCallable extends Callable { - SinkCallable() { sinkElement(this, _, _, _) } + SinkCallable() { SourceSinkInterpretationInput::sinkElement(this, _, _) } } /** * A callable where there exists a MaD source model that applies to it. */ class SourceCallable extends Callable { - SourceCallable() { sourceElement(this, _, _, _) } + SourceCallable() { SourceSinkInterpretationInput::sourceElement(this, _, _) } } cached @@ -398,7 +413,9 @@ private module Cached { */ cached predicate sourceNode(Node node, string kind) { - exists(InterpretNode n | isSourceNode(n, kind) and n.asNode() = node) + exists(SourceSinkInterpretationInput::InterpretNode n | + isSourceNode(n, kind) and n.asNode() = node + ) } /** @@ -407,7 +424,9 @@ private module Cached { */ cached predicate sinkNode(Node node, string kind) { - exists(InterpretNode n | isSinkNode(n, kind) and n.asNode() = node) + exists(SourceSinkInterpretationInput::InterpretNode n | + isSinkNode(n, kind) and n.asNode() = node + ) } } @@ -485,3 +504,65 @@ string asPartialNeutralModel(UnboundCallable c) { + parameters + ";" // ) } + +private predicate interpretSummary( + UnboundCallable c, string input, string output, string kind, string provenance +) { + exists( + string namespace, string type, boolean subtypes, string name, string signature, string ext + | + summaryModel(namespace, type, subtypes, name, signature, ext, input, output, kind, provenance) and + c = interpretElement(namespace, type, subtypes, name, signature, ext) + ) +} + +// adapter class for converting Mad summaries to `SummarizedCallable`s +private class SummarizedCallableAdapter extends SummarizedCallable { + SummarizedCallableAdapter() { interpretSummary(this, _, _, _, _) } + + private predicate relevantSummaryElementManual(string input, string output, string kind) { + exists(Provenance provenance | + interpretSummary(this, input, output, kind, provenance) and + provenance.isManual() + ) + } + + private predicate relevantSummaryElementGenerated(string input, string output, string kind) { + exists(Provenance provenance | + interpretSummary(this, input, output, kind, provenance) and + provenance.isGenerated() + ) + } + + override predicate propagatesFlow(string input, string output, boolean preservesValue) { + exists(string kind | + this.relevantSummaryElementManual(input, output, kind) + or + not this.relevantSummaryElementManual(_, _, _) and + this.relevantSummaryElementGenerated(input, output, kind) + | + if kind = "value" then preservesValue = true else preservesValue = false + ) + } + + override predicate hasProvenance(Provenance provenance) { + interpretSummary(this, _, _, _, provenance) + } +} + +// adapter class for converting Mad neutrals to `NeutralCallable`s +private class NeutralCallableAdapter extends NeutralCallable { + string kind; + string provenance_; + + NeutralCallableAdapter() { + exists(string namespace, string type, string name, string signature | + neutralModel(namespace, type, name, signature, kind, provenance_) and + this = interpretElement(namespace, type, false, name, signature, "") + ) + } + + override string getKind() { result = kind } + + override predicate hasProvenance(Provenance provenance) { provenance = provenance_ } +} diff --git a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/FlowSummaryImpl.qll b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/FlowSummaryImpl.qll index 0aa17c521b4..1804c976e52 100644 --- a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/FlowSummaryImpl.qll +++ b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/FlowSummaryImpl.qll @@ -1,1491 +1,438 @@ /** * Provides classes and predicates for defining flow summaries. - * - * The definitions in this file are language-independent, and language-specific - * definitions are passed in via the `DataFlowImplSpecific` and - * `FlowSummaryImplSpecific` modules. */ -private import FlowSummaryImplSpecific +private import csharp +private import semmle.code.csharp.frameworks.system.linq.Expressions +private import codeql.dataflow.internal.FlowSummaryImpl +private import codeql.dataflow.internal.AccessPathSyntax as AccessPath +private import DataFlowImplSpecific as DataFlowImplSpecific private import DataFlowImplSpecific::Private private import DataFlowImplSpecific::Public -private import DataFlowImplCommon -private import codeql.util.Unit +private import semmle.code.csharp.Unification +private import semmle.code.csharp.dataflow.internal.ExternalFlow -/** Provides classes and predicates for defining flow summaries. */ -module Public { - private import Private +module Input implements InputSig { + class SummarizedCallableBase = UnboundCallable; - /** - * A component used in a flow summary. - * - * Either a parameter or an argument at a given position, a specific - * content type, or a return kind. - */ - class SummaryComponent extends TSummaryComponent { - /** Gets a textual representation of this component used for MaD models. */ - string getMadRepresentation() { - result = getMadRepresentationSpecific(this) + ArgumentPosition callbackSelfParameterPosition() { result.isDelegateSelf() } + + ReturnKind getStandardReturnValueKind() { result instanceof NormalReturnKind } + + string encodeParameterPosition(ParameterPosition pos) { + result = pos.getPosition().toString() + or + pos.isThisParameter() and + result = "this" + or + pos.isDelegateSelf() and + result = "delegate-self" + } + + string encodeArgumentPosition(ArgumentPosition pos) { + result = pos.getPosition().toString() + or + pos.isQualifier() and + result = "this" + or + pos.isDelegateSelf() and + result = "delegate-self" + } + + string encodeContent(ContentSet c, string arg) { + c = TElementContent() and result = "Element" and arg = "" + or + exists(Field f | c = TFieldContent(f) and result = "Field" and arg = f.getFullyQualifiedName()) + or + exists(Property p | + c = TPropertyContent(p) and result = "Property" and arg = p.getFullyQualifiedName() + ) + or + exists(SyntheticField f | + c = TSyntheticFieldContent(f) and result = "SyntheticField" and arg = f + ) + } + + string encodeWithoutContent(ContentSet c, string arg) { + result = "WithoutElement" and + c = TElementContent() and + arg = "" + } + + string encodeWithContent(ContentSet c, string arg) { + result = "WithElement" and + c = TElementContent() and + arg = "" + } + + bindingset[token] + ParameterPosition decodeUnknownParameterPosition(AccessPath::AccessPathTokenBase token) { + // needed to support `Argument[x..y]` ranges + token.getName() = "Argument" and + result.getPosition() = AccessPath::parseInt(token.getAnArgument()) + } + + bindingset[token] + ArgumentPosition decodeUnknownArgumentPosition(AccessPath::AccessPathTokenBase token) { + // needed to support `Parameter[x..y]` ranges + token.getName() = "Parameter" and + result.getPosition() = AccessPath::parseInt(token.getAnArgument()) + } +} + +private import Make as Impl + +private module TypesInput implements Impl::Private::TypesInputSig { + DataFlowType getSyntheticGlobalType(Impl::Private::SyntheticGlobal sg) { + exists(sg) and + result.asGvnType() = Gvn::getGlobalValueNumber(any(ObjectType t)) + } + + DataFlowType getContentType(ContentSet c) { + exists(Type t | result.asGvnType() = Gvn::getGlobalValueNumber(t) | + t = c.(FieldContent).getField().getType() or - exists(ArgumentPosition pos | - this = TParameterSummaryComponent(pos) and - result = "Parameter[" + getArgumentPosition(pos) + "]" + t = c.(PropertyContent).getProperty().getType() + or + t = c.(SyntheticFieldContent).getField().getType() + or + c instanceof ElementContent and + t instanceof ObjectType // we don't know what the actual element type is + ) + } + + DataFlowType getParameterType(Impl::Public::SummarizedCallable c, ParameterPosition pos) { + exists(Type t | result.asGvnType() = Gvn::getGlobalValueNumber(t) | + exists(int i | + pos.getPosition() = i and + t = c.getParameter(i).getType() ) or - exists(ParameterPosition pos | - this = TArgumentSummaryComponent(pos) and - result = "Argument[" + getParameterPosition(pos) + "]" + pos.isThisParameter() and + t = c.getDeclaringType() + ) + } + + DataFlowType getReturnType(Impl::Public::SummarizedCallable c, ReturnKind rk) { + exists(Type t | result.asGvnType() = Gvn::getGlobalValueNumber(t) | + rk instanceof NormalReturnKind and + ( + t = c.(Constructor).getDeclaringType() + or + not c instanceof Constructor and + t = c.getReturnType() ) or - exists(string synthetic | - this = TSyntheticGlobalSummaryComponent(synthetic) and - result = "SyntheticGlobal[" + synthetic + "]" - ) + t = c.getParameter(rk.(OutRefReturnKind).getPosition()).getType() + ) + } + + DataFlowType getCallbackParameterType(DataFlowType t, ArgumentPosition pos) { + exists(SystemLinqExpressions::DelegateExtType dt | + t.asGvnType() = Gvn::getGlobalValueNumber(dt) and + result.asGvnType() = + Gvn::getGlobalValueNumber(dt.getDelegateType().getParameter(pos.getPosition()).getType()) + ) + or + pos.isDelegateSelf() and + result = t + } + + DataFlowType getCallbackReturnType(DataFlowType t, ReturnKind rk) { + rk instanceof NormalReturnKind and + exists(SystemLinqExpressions::DelegateExtType dt | + t.asGvnType() = Gvn::getGlobalValueNumber(dt) and + result.asGvnType() = Gvn::getGlobalValueNumber(dt.getDelegateType().getReturnType()) + ) + } +} + +private module StepsInput implements Impl::Private::StepsInputSig { + DataFlowCall getACall(Public::SummarizedCallable sc) { + sc = viableCallable(result).asSummarizedCallable() + } +} + +module SourceSinkInterpretationInput implements + Impl::Private::External::SourceSinkInterpretationInputSig +{ + private import csharp as Cs + + class Element = Cs::Element; + + predicate sourceElement(Element e, string output, string kind) { + exists( + string namespace, string type, boolean subtypes, string name, string signature, string ext + | + sourceModel(namespace, type, subtypes, name, signature, ext, output, kind, _) and + e = interpretElement(namespace, type, subtypes, name, signature, ext) + ) + } + + predicate sinkElement(Element e, string input, string kind) { + exists( + string namespace, string type, boolean subtypes, string name, string signature, string ext + | + sinkModel(namespace, type, subtypes, name, signature, ext, input, kind, _) and + e = interpretElement(namespace, type, subtypes, name, signature, ext) + ) + } + + class SourceOrSinkElement = Element; + + private newtype TInterpretNode = + TElement_(Element n) or + TNode_(Node n) or + TDataFlowCall_(DataFlowCall c) + + /** An entity used to interpret a source/sink specification. */ + class InterpretNode extends TInterpretNode { + /** Gets the element that this node corresponds to, if any. */ + SourceOrSinkElement asElement() { this = TElement_(result) } + + /** Gets the data-flow node that this node corresponds to, if any. */ + Node asNode() { this = TNode_(result) } + + /** Gets the call that this node corresponds to, if any. */ + DataFlowCall asCall() { this = TDataFlowCall_(result) } + + /** Gets the callable that this node corresponds to, if any. */ + DataFlowCallable asCallable() { result.getUnderlyingCallable() = this.asElement() } + + /** Gets the target of this call, if any. */ + Element getCallTarget() { result = this.asCall().(NonDelegateDataFlowCall).getATarget(_) } + + /** Gets a textual representation of this node. */ + string toString() { + result = this.asElement().toString() or - this = TReturnSummaryComponent(getReturnValueKind()) and result = "ReturnValue" + result = this.asNode().toString() + or + result = this.asCall().toString() } - /** Gets a textual representation of this summary component. */ - string toString() { result = this.getMadRepresentation() } + /** Gets the location of this node. */ + Location getLocation() { + result = this.asElement().getLocation() + or + result = this.asNode().getLocation() + or + result = this.asCall().getLocation() + } } + /** Provides additional sink specification logic. */ + bindingset[c] + predicate interpretOutput(string c, InterpretNode mid, InterpretNode node) { + exists(Node n | n = node.asNode() | + (c = "Parameter" or c = "") and + n.asParameter() = mid.asElement() + or + c = "" and + n.asExpr().(AssignableRead).getTarget().getUnboundDeclaration() = mid.asElement() + ) + } + + /** Provides additional source specification logic. */ + bindingset[c] + predicate interpretInput(string c, InterpretNode mid, InterpretNode node) { + c = "" and + exists(Assignable a | + node.asNode().asExpr() = a.getAnAssignedValue() and + a.getUnboundDeclaration() = mid.asElement() + ) + } +} + +module Private { + import Impl::Private + import Impl::Private::Types + + module Steps = Impl::Private::Steps; + + module External { + import Impl::Private::External + import Impl::Private::External::SourceSinkInterpretation + } + + private module SummaryComponentInternal = Impl::Private::SummaryComponent; + /** Provides predicates for constructing summary components. */ module SummaryComponent { - /** Gets a summary component for content `c`. */ - SummaryComponent content(ContentSet c) { result = TContentSummaryComponent(c) } + predicate content = SummaryComponentInternal::content/1; - /** Gets a summary component where data is not allowed to be stored in `c`. */ - SummaryComponent withoutContent(ContentSet c) { result = TWithoutContentSummaryComponent(c) } - - /** Gets a summary component where data must be stored in `c`. */ - SummaryComponent withContent(ContentSet c) { result = TWithContentSummaryComponent(c) } - - /** Gets a summary component for a parameter at position `pos`. */ - SummaryComponent parameter(ArgumentPosition pos) { result = TParameterSummaryComponent(pos) } - - /** Gets a summary component for an argument at position `pos`. */ - SummaryComponent argument(ParameterPosition pos) { result = TArgumentSummaryComponent(pos) } - - /** Gets a summary component for a return of kind `rk`. */ - SummaryComponent return(ReturnKind rk) { result = TReturnSummaryComponent(rk) } - - /** Gets a summary component for synthetic global `sg`. */ - SummaryComponent syntheticGlobal(SyntheticGlobal sg) { - result = TSyntheticGlobalSummaryComponent(sg) - } - - /** - * A synthetic global. This represents some form of global state, which - * summaries can read and write individually. - */ - abstract class SyntheticGlobal extends string { - bindingset[this] - SyntheticGlobal() { any() } - } - } - - /** - * A (non-empty) stack of summary components. - * - * A stack is used to represent where data is read from (input) or where it - * is written to (output). For example, an input stack `[Field f, Argument 0]` - * means that data is read from field `f` from the `0`th argument, while an - * output stack `[Field g, Return]` means that data is written to the field - * `g` of the returned object. - */ - class SummaryComponentStack extends TSummaryComponentStack { - /** Gets the head of this stack. */ - SummaryComponent head() { - this = TSingletonSummaryComponentStack(result) or - this = TConsSummaryComponentStack(result, _) - } - - /** Gets the tail of this stack, if any. */ - SummaryComponentStack tail() { this = TConsSummaryComponentStack(_, result) } - - /** Gets the length of this stack. */ - int length() { - this = TSingletonSummaryComponentStack(_) and result = 1 - or - result = 1 + this.tail().length() - } - - /** Gets the stack obtained by dropping the first `i` elements, if any. */ - SummaryComponentStack drop(int i) { - i = 0 and result = this - or - result = this.tail().drop(i - 1) - } - - /** Holds if this stack contains summary component `c`. */ - predicate contains(SummaryComponent c) { c = this.drop(_).head() } - - /** Gets the bottom element of this stack. */ - SummaryComponent bottom() { - this = TSingletonSummaryComponentStack(result) or result = this.tail().bottom() - } - - /** Gets a textual representation of this stack used for MaD models. */ - string getMadRepresentation() { - exists(SummaryComponent head, SummaryComponentStack tail | - head = this.head() and - tail = this.tail() and - result = tail.getMadRepresentation() + "." + head.getMadRepresentation() - ) - or - exists(SummaryComponent c | - this = TSingletonSummaryComponentStack(c) and - result = c.getMadRepresentation() + /** Gets a summary component for parameter `i`. */ + SummaryComponent parameter(int i) { + exists(ArgumentPosition pos | + result = SummaryComponentInternal::parameter(pos) and + i = pos.getPosition() ) } - /** Gets a textual representation of this stack. */ - string toString() { result = this.getMadRepresentation() } + /** Gets a summary component for argument `i`. */ + SummaryComponent argument(int i) { + exists(ParameterPosition pos | + result = SummaryComponentInternal::argument(pos) and + i = pos.getPosition() + ) + } + + predicate return = SummaryComponentInternal::return/1; + + /** Gets a summary component that represents a qualifier. */ + SummaryComponent qualifier() { + exists(ParameterPosition pos | + result = SummaryComponentInternal::argument(pos) and + pos.isThisParameter() + ) + } + + /** Gets a summary component that represents an element in a collection. */ + SummaryComponent element() { result = content(any(DataFlow::ElementContent c)) } + + /** Gets a summary component for property `p`. */ + SummaryComponent property(Property p) { + result = + content(any(DataFlow::PropertyContent c | c.getProperty() = p.getUnboundDeclaration())) + } + + /** Gets a summary component for field `f`. */ + SummaryComponent field(Field f) { + result = content(any(DataFlow::FieldContent c | c.getField() = f.getUnboundDeclaration())) + } + + /** Gets a summary component that represents the return value of a call. */ + SummaryComponent return() { result = return(any(NormalReturnKind rk)) } + + predicate syntheticGlobal = SummaryComponentInternal::syntheticGlobal/1; + + class SyntheticGlobal = Impl::Private::SyntheticGlobal; } + private module SummaryComponentStackInternal = Impl::Private::SummaryComponentStack; + /** Provides predicates for constructing stacks of summary components. */ module SummaryComponentStack { - /** Gets a singleton stack containing `c`. */ - SummaryComponentStack singleton(SummaryComponent c) { - result = TSingletonSummaryComponentStack(c) + predicate singleton = SummaryComponentStackInternal::singleton/1; + + predicate push = SummaryComponentStackInternal::push/2; + + /** Gets a singleton stack for argument `i`. */ + SummaryComponentStack argument(int i) { result = singleton(SummaryComponent::argument(i)) } + + predicate return = SummaryComponentStackInternal::return/1; + + /** Gets a singleton stack representing a qualifier. */ + SummaryComponentStack qualifier() { result = singleton(SummaryComponent::qualifier()) } + + /** Gets a stack representing an element of `container`. */ + SummaryComponentStack elementOf(SummaryComponentStack container) { + result = push(SummaryComponent::element(), container) } - /** - * Gets the stack obtained by pushing `head` onto `tail`. - * - * Make sure to override `RequiredSummaryComponentStack::required()` in order - * to ensure that the constructed stack exists. - */ - SummaryComponentStack push(SummaryComponent head, SummaryComponentStack tail) { - result = TConsSummaryComponentStack(head, tail) + /** Gets a stack representing a property `p` of `object`. */ + SummaryComponentStack propertyOf(Property p, SummaryComponentStack object) { + result = push(SummaryComponent::property(p), object) } - /** Gets a singleton stack for an argument at position `pos`. */ - SummaryComponentStack argument(ParameterPosition pos) { - result = singleton(SummaryComponent::argument(pos)) + /** Gets a stack representing a field `f` of `object`. */ + SummaryComponentStack fieldOf(Field f, SummaryComponentStack object) { + result = push(SummaryComponent::field(f), object) } - /** Gets a singleton stack representing a return of kind `rk`. */ - SummaryComponentStack return(ReturnKind rk) { result = singleton(SummaryComponent::return(rk)) } - } + /** Gets a singleton stack representing the return value of a call. */ + SummaryComponentStack return() { result = singleton(SummaryComponent::return()) } - /** - * A class that exists for QL technical reasons only (the IPA type used - * to represent component stacks needs to be bounded). - */ - class RequiredSummaryComponentStack extends Unit { - /** - * Holds if the stack obtained by pushing `head` onto `tail` is required. - */ - abstract predicate required(SummaryComponent head, SummaryComponentStack tail); - } - - /** - * Gets the valid model origin values. - */ - private string getValidModelOrigin() { - result = - [ - "ai", // AI (machine learning) - "df", // Dataflow (model generator) - "tb", // Type based (model generator) - "hq", // Heuristic query - ] - } - - /** - * A class used to represent provenance values for MaD models. - * - * The provenance value is a string of the form `origin-verification` - * (or just `manual`), where `origin` is a value indicating the - * origin of the model, and `verification` is a value indicating, how - * the model was verified. - * - * Examples could be: - * - `df-generated`: A model produced by the model generator, but not verified by a human. - * - `ai-manual`: A model produced by AI, but verified by a human. - */ - class Provenance extends string { - private string verification; - - Provenance() { - exists(string origin | origin = getValidModelOrigin() | - this = origin + "-" + verification and - verification = ["manual", "generated"] - ) - or - this = verification and verification = "manual" + /** Gets a singleton stack representing a synthetic global with name `name`. */ + SummaryComponentStack syntheticGlobal(string synthetic) { + result = singleton(SummaryComponent::syntheticGlobal(synthetic)) } - - /** - * Holds if this is a valid generated provenance value. - */ - predicate isGenerated() { verification = "generated" } - - /** - * Holds if this is a valid manual provenance value. - */ - predicate isManual() { verification = "manual" } - } - - /** A callable with a flow summary. */ - abstract class SummarizedCallable extends SummarizedCallableBase { - bindingset[this] - SummarizedCallable() { any() } - - /** - * Holds if data may flow from `input` to `output` through this callable. - * - * `preservesValue` indicates whether this is a value-preserving step - * or a taint-step. - * - * Input specifications are restricted to stacks that end with - * `SummaryComponent::argument(_)`, preceded by zero or more - * `SummaryComponent::return(_)` or `SummaryComponent::content(_)` components. - * - * Output specifications are restricted to stacks that end with - * `SummaryComponent::return(_)` or `SummaryComponent::argument(_)`. - * - * Output stacks ending with `SummaryComponent::return(_)` can be preceded by zero - * or more `SummaryComponent::content(_)` components. - * - * Output stacks ending with `SummaryComponent::argument(_)` can be preceded by an - * optional `SummaryComponent::parameter(_)` component, which in turn can be preceded - * by zero or more `SummaryComponent::content(_)` components. - */ - pragma[nomagic] - predicate propagatesFlow( - SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue - ) { - none() - } - - /** - * Holds if there exists a generated summary that applies to this callable. - */ - final predicate hasGeneratedModel() { - exists(Provenance p | p.isGenerated() and this.hasProvenance(p)) - } - - /** - * Holds if all the summaries that apply to this callable are auto generated and not manually created. - * That is, only apply generated models, when there are no manual models. - */ - final predicate applyGeneratedModel() { - this.hasGeneratedModel() and - not this.hasManualModel() - } - - /** - * Holds if there exists a manual summary that applies to this callable. - */ - final predicate hasManualModel() { - exists(Provenance p | p.isManual() and this.hasProvenance(p)) - } - - /** - * Holds if there exists a manual summary that applies to this callable. - * Always apply manual models if they exist. - */ - final predicate applyManualModel() { this.hasManualModel() } - - /** - * Holds if there exists a summary that applies to this callable - * that has provenance `provenance`. - */ - predicate hasProvenance(Provenance provenance) { provenance = "manual" } - } - - /** - * A callable where there is no flow via the callable. - */ - class NeutralSummaryCallable extends NeutralCallable { - NeutralSummaryCallable() { this.getKind() = "summary" } - } - - /** - * A callable that has a neutral model. - */ - class NeutralCallable extends NeutralCallableBase { - private string kind; - private Provenance provenance; - - NeutralCallable() { neutralElement(this, kind, provenance) } - - /** - * Holds if the neutral is auto generated. - */ - final predicate hasGeneratedModel() { provenance.isGenerated() } - - /** - * Holds if there exists a manual neutral that applies to this callable. - */ - final predicate hasManualModel() { provenance.isManual() } - - /** - * Holds if the neutral has provenance `p`. - */ - predicate hasProvenance(Provenance p) { p = provenance } - - /** - * Gets the kind of the neutral. - */ - string getKind() { result = kind } } } -/** - * Provides predicates for compiling flow summaries down to atomic local steps, - * read steps, and store steps. - */ -module Private { - private import Public - import AccessPathSyntax +module Public = Impl::Public; - newtype TSummaryComponent = - TContentSummaryComponent(ContentSet c) or - TParameterSummaryComponent(ArgumentPosition pos) or - TArgumentSummaryComponent(ParameterPosition pos) or - TReturnSummaryComponent(ReturnKind rk) or - TSyntheticGlobalSummaryComponent(SummaryComponent::SyntheticGlobal sg) or - TWithoutContentSummaryComponent(ContentSet c) or - TWithContentSummaryComponent(ContentSet c) +// import all instances below +private module BidirectionalImports { + private import semmle.code.csharp.dataflow.internal.ExternalFlow + private import semmle.code.csharp.frameworks.EntityFramework +} - private TParameterSummaryComponent callbackSelfParam() { - result = TParameterSummaryComponent(callbackSelfParameterPosition()) - } +private predicate recordConstructorFlow(Constructor c, int i, Property p) { + c = any(RecordType r).getAMember() and + exists(string name | + c.getParameter(i).getName() = name and + c.getDeclaringType().getAMember(name) = p + ) +} - newtype TSummaryComponentStack = - TSingletonSummaryComponentStack(SummaryComponent c) or - TConsSummaryComponentStack(SummaryComponent head, SummaryComponentStack tail) { - any(RequiredSummaryComponentStack x).required(head, tail) - or - any(RequiredSummaryComponentStack x).required(TParameterSummaryComponent(_), tail) and - head = callbackSelfParam() - or - derivedFluentFlowPush(_, _, _, head, tail, _) - } +private class RecordConstructorFlow extends Impl::Private::SummarizedCallableImpl { + RecordConstructorFlow() { recordConstructorFlow(this, _, _) } - pragma[nomagic] - private predicate summary( - SummarizedCallable c, SummaryComponentStack input, SummaryComponentStack output, + predicate propagatesFlowImpl( + Impl::Private::SummaryComponentStack input, Impl::Private::SummaryComponentStack output, boolean preservesValue ) { - c.propagatesFlow(input, output, preservesValue) - or - // observe side effects of callbacks on input arguments - c.propagatesFlow(output, input, preservesValue) and - preservesValue = true and - isCallbackParameter(input) and - isContentOfArgument(output, _) - or - // flow from the receiver of a callback into the instance-parameter - exists(SummaryComponentStack s, SummaryComponentStack callbackRef | - c.propagatesFlow(s, _, _) or c.propagatesFlow(_, s, _) - | - callbackRef = s.drop(_) and - (isCallbackParameter(callbackRef) or callbackRef.head() = TReturnSummaryComponent(_)) and - input = callbackRef.tail() and - output = TConsSummaryComponentStack(callbackSelfParam(), input) and + exists(int i, Property p | + recordConstructorFlow(this, i, p) and + input = Private::SummaryComponentStack::argument(i) and + output = + Private::SummaryComponentStack::propertyOf(p, Private::SummaryComponentStack::return()) and preservesValue = true ) - or - exists(SummaryComponentStack arg, SummaryComponentStack return | - derivedFluentFlow(c, input, arg, return, preservesValue) - | - arg.length() = 1 and - output = return - or - exists(SummaryComponent head, SummaryComponentStack tail | - derivedFluentFlowPush(c, input, arg, head, tail, 0) and - output = SummaryComponentStack::push(head, tail) - ) - ) - or - // Chain together summaries where values get passed into callbacks along the way - exists(SummaryComponentStack mid, boolean preservesValue1, boolean preservesValue2 | - c.propagatesFlow(input, mid, preservesValue1) and - c.propagatesFlow(mid, output, preservesValue2) and - mid.drop(mid.length() - 2) = - SummaryComponentStack::push(TParameterSummaryComponent(_), - SummaryComponentStack::singleton(TArgumentSummaryComponent(_))) and - preservesValue = preservesValue1.booleanAnd(preservesValue2) - ) } - /** - * Holds if `c` has a flow summary from `input` to `arg`, where `arg` - * writes to (contents of) arguments at position `pos`, and `c` has a - * value-preserving flow summary from the arguments at position `pos` - * to a return value (`return`). - * - * In such a case, we derive flow from `input` to (contents of) the return - * value. - * - * As an example, this simplifies modeling of fluent methods: - * for `StringBuilder.append(x)` with a specified value flow from qualifier to - * return value and taint flow from argument 0 to the qualifier, then this - * allows us to infer taint flow from argument 0 to the return value. - */ - pragma[nomagic] - private predicate derivedFluentFlow( - SummarizedCallable c, SummaryComponentStack input, SummaryComponentStack arg, - SummaryComponentStack return, boolean preservesValue + override predicate propagatesFlow( + Impl::Private::SummaryComponentStack input, Impl::Private::SummaryComponentStack output, + boolean preservesValue ) { - exists(ParameterPosition pos | - summary(c, input, arg, preservesValue) and - isContentOfArgument(arg, pos) and - summary(c, SummaryComponentStack::argument(pos), return, true) and - return.bottom() = TReturnSummaryComponent(_) - ) + this.propagatesFlowImpl(input, output, preservesValue) } - pragma[nomagic] - private predicate derivedFluentFlowPush( - SummarizedCallable c, SummaryComponentStack input, SummaryComponentStack arg, - SummaryComponent head, SummaryComponentStack tail, int i - ) { - derivedFluentFlow(c, input, arg, tail, _) and - head = arg.drop(i).head() and - i = arg.length() - 2 - or - exists(SummaryComponent head0, SummaryComponentStack tail0 | - derivedFluentFlowPush(c, input, arg, head0, tail0, i + 1) and - head = arg.drop(i).head() and - tail = SummaryComponentStack::push(head0, tail0) - ) - } + override predicate hasProvenance(Public::Provenance provenance) { provenance = "manual" } +} - private predicate isCallbackParameter(SummaryComponentStack s) { - s.head() = TParameterSummaryComponent(_) and exists(s.tail()) - } +// see `SummarizedCallableImpl` qldoc +private class RecordConstructorFlowAdapter extends Impl::Public::SummarizedCallable instanceof RecordConstructorFlow +{ + override predicate propagatesFlow(string input, string output, boolean preservesValue) { none() } - private predicate isContentOfArgument(SummaryComponentStack s, ParameterPosition pos) { - s.head() = TContentSummaryComponent(_) and isContentOfArgument(s.tail(), pos) - or - s = SummaryComponentStack::argument(pos) - } - - private predicate outputState(SummarizedCallable c, SummaryComponentStack s) { - summary(c, _, s, _) - or - exists(SummaryComponentStack out | - outputState(c, out) and - out.head() = TContentSummaryComponent(_) and - s = out.tail() - ) - or - // Add the argument node corresponding to the requested post-update node - inputState(c, s) and isCallbackParameter(s) - } - - private predicate inputState(SummarizedCallable c, SummaryComponentStack s) { - summary(c, s, _, _) - or - exists(SummaryComponentStack inp | inputState(c, inp) and s = inp.tail()) - or - exists(SummaryComponentStack out | - outputState(c, out) and - out.head() = TParameterSummaryComponent(_) and - s = out.tail() - ) - or - // Add the post-update node corresponding to the requested argument node - outputState(c, s) and isCallbackParameter(s) - or - // Add the parameter node for parameter side-effects - outputState(c, s) and s = SummaryComponentStack::argument(_) - } - - private newtype TSummaryNodeState = - TSummaryNodeInputState(SummaryComponentStack s) { inputState(_, s) } or - TSummaryNodeOutputState(SummaryComponentStack s) { outputState(_, s) } - - /** - * A state used to break up (complex) flow summaries into atomic flow steps. - * For a flow summary - * - * ```ql - * propagatesFlow( - * SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue - * ) - * ``` - * - * the following states are used: - * - * - `TSummaryNodeInputState(SummaryComponentStack s)`: - * this state represents that the components in `s` _have been read_ from the - * input. - * - `TSummaryNodeOutputState(SummaryComponentStack s)`: - * this state represents that the components in `s` _remain to be written_ to - * the output. - */ - private class SummaryNodeState extends TSummaryNodeState { - /** Holds if this state is a valid input state for `c`. */ - pragma[nomagic] - predicate isInputState(SummarizedCallable c, SummaryComponentStack s) { - this = TSummaryNodeInputState(s) and - inputState(c, s) - } - - /** Holds if this state is a valid output state for `c`. */ - pragma[nomagic] - predicate isOutputState(SummarizedCallable c, SummaryComponentStack s) { - this = TSummaryNodeOutputState(s) and - outputState(c, s) - } - - /** Gets a textual representation of this state. */ - string toString() { - exists(SummaryComponentStack s | - this = TSummaryNodeInputState(s) and - result = "read: " + s - ) - or - exists(SummaryComponentStack s | - this = TSummaryNodeOutputState(s) and - result = "to write: " + s - ) - } - } - - private newtype TSummaryNode = - TSummaryInternalNode(SummarizedCallable c, SummaryNodeState state) { - summaryNodeRange(c, state) - } or - TSummaryParameterNode(SummarizedCallable c, ParameterPosition pos) { - summaryParameterNodeRange(c, pos) - } - - abstract class SummaryNode extends TSummaryNode { - abstract string toString(); - - abstract SummarizedCallable getSummarizedCallable(); - } - - private class SummaryInternalNode extends SummaryNode, TSummaryInternalNode { - private SummarizedCallable c; - private SummaryNodeState state; - - SummaryInternalNode() { this = TSummaryInternalNode(c, state) } - - override string toString() { result = "[summary] " + state + " in " + c } - - override SummarizedCallable getSummarizedCallable() { result = c } - } - - private class SummaryParamNode extends SummaryNode, TSummaryParameterNode { - private SummarizedCallable c; - private ParameterPosition pos; - - SummaryParamNode() { this = TSummaryParameterNode(c, pos) } - - override string toString() { result = "[summary param] " + pos + " in " + c } - - override SummarizedCallable getSummarizedCallable() { result = c } - } - - /** - * Holds if `state` represents having read from a parameter at position - * `pos` in `c`. In this case we are not synthesizing a data-flow node, - * but instead assume that a relevant parameter node already exists. - */ - private predicate parameterReadState( - SummarizedCallable c, SummaryNodeState state, ParameterPosition pos - ) { - state.isInputState(c, SummaryComponentStack::argument(pos)) - } - - /** - * Holds if a synthesized summary node is needed for the state `state` in summarized - * callable `c`. - */ - private predicate summaryNodeRange(SummarizedCallable c, SummaryNodeState state) { - state.isInputState(c, _) and - not parameterReadState(c, state, _) - or - state.isOutputState(c, _) - } - - pragma[noinline] - private SummaryNode summaryNodeInputState(SummarizedCallable c, SummaryComponentStack s) { - exists(SummaryNodeState state | state.isInputState(c, s) | - result = TSummaryInternalNode(c, state) - or - exists(ParameterPosition pos | - parameterReadState(c, state, pos) and - result = TSummaryParameterNode(c, pos) - ) - ) - } - - pragma[noinline] - private SummaryNode summaryNodeOutputState(SummarizedCallable c, SummaryComponentStack s) { - exists(SummaryNodeState state | - state.isOutputState(c, s) and - result = TSummaryInternalNode(c, state) - ) - } - - /** - * Holds if a write targets `post`, which is a post-update node for a - * parameter at position `pos` in `c`. - */ - private predicate isParameterPostUpdate( - SummaryNode post, SummarizedCallable c, ParameterPosition pos - ) { - post = summaryNodeOutputState(c, SummaryComponentStack::argument(pos)) - } - - /** Holds if a parameter node at position `pos` is required for `c`. */ - private predicate summaryParameterNodeRange(SummarizedCallable c, ParameterPosition pos) { - parameterReadState(c, _, pos) - or - // Same as `isParameterPostUpdate(_, c, pos)`, but can be used in a negative context - any(SummaryNodeState state).isOutputState(c, SummaryComponentStack::argument(pos)) - } - - private predicate callbackOutput( - SummarizedCallable c, SummaryComponentStack s, SummaryNode receiver, ReturnKind rk - ) { - any(SummaryNodeState state).isInputState(c, s) and - s.head() = TReturnSummaryComponent(rk) and - receiver = summaryNodeInputState(c, s.tail()) - } - - private predicate callbackInput( - SummarizedCallable c, SummaryComponentStack s, SummaryNode receiver, ArgumentPosition pos - ) { - any(SummaryNodeState state).isOutputState(c, s) and - s.head() = TParameterSummaryComponent(pos) and - receiver = summaryNodeInputState(c, s.tail()) - } - - /** Holds if a call targeting `receiver` should be synthesized inside `c`. */ - predicate summaryCallbackRange(SummarizedCallable c, SummaryNode receiver) { - callbackOutput(c, _, receiver, _) - or - callbackInput(c, _, receiver, _) - } - - /** - * Gets the type of synthesized summary node `n`. - * - * The type is computed based on the language-specific predicates - * `getContentType()`, `getReturnType()`, `getCallbackParameterType()`, and - * `getCallbackReturnType()`. - */ - DataFlowType summaryNodeType(SummaryNode n) { - exists(SummaryNode pre | - summaryPostUpdateNode(n, pre) and - result = summaryNodeType(pre) - ) - or - exists(SummarizedCallable c, SummaryComponentStack s, SummaryComponent head | head = s.head() | - n = summaryNodeInputState(c, s) and - ( - exists(ContentSet cont | result = getContentType(cont) | - head = TContentSummaryComponent(cont) or - head = TWithContentSummaryComponent(cont) - ) - or - head = TWithoutContentSummaryComponent(_) and - result = summaryNodeType(summaryNodeInputState(c, s.tail())) - or - exists(ReturnKind rk | - head = TReturnSummaryComponent(rk) and - result = - getCallbackReturnType(summaryNodeType(summaryNodeInputState(pragma[only_bind_out](c), - s.tail())), rk) - ) - or - exists(SummaryComponent::SyntheticGlobal sg | - head = TSyntheticGlobalSummaryComponent(sg) and - result = getSyntheticGlobalType(sg) - ) - or - exists(ParameterPosition pos | - head = TArgumentSummaryComponent(pos) and - result = getParameterType(c, pos) - ) - ) - or - n = summaryNodeOutputState(c, s) and - ( - exists(ContentSet cont | - head = TContentSummaryComponent(cont) and result = getContentType(cont) - ) - or - s.length() = 1 and - exists(ReturnKind rk | - head = TReturnSummaryComponent(rk) and - result = getReturnType(c, rk) - ) - or - exists(ArgumentPosition pos | head = TParameterSummaryComponent(pos) | - result = - getCallbackParameterType(summaryNodeType(summaryNodeInputState(pragma[only_bind_out](c), - s.tail())), pos) - ) - or - exists(SummaryComponent::SyntheticGlobal sg | - head = TSyntheticGlobalSummaryComponent(sg) and - result = getSyntheticGlobalType(sg) - ) - ) - ) - } - - /** Holds if summary node `p` is a parameter with position `pos`. */ - predicate summaryParameterNode(SummaryNode p, ParameterPosition pos) { - p = TSummaryParameterNode(_, pos) - } - - /** Holds if summary node `out` contains output of kind `rk` from call `c`. */ - predicate summaryOutNode(DataFlowCall c, SummaryNode out, ReturnKind rk) { - exists(SummarizedCallable callable, SummaryComponentStack s, SummaryNode receiver | - callbackOutput(callable, s, receiver, rk) and - out = summaryNodeInputState(callable, s) and - c = summaryDataFlowCall(receiver) - ) - } - - /** Holds if summary node `arg` is at position `pos` in the call `c`. */ - predicate summaryArgumentNode(DataFlowCall c, SummaryNode arg, ArgumentPosition pos) { - exists(SummarizedCallable callable, SummaryComponentStack s, SummaryNode receiver | - callbackInput(callable, s, receiver, pos) and - arg = summaryNodeOutputState(callable, s) and - c = summaryDataFlowCall(receiver) - ) - } - - /** Holds if summary node `post` is a post-update node with pre-update node `pre`. */ - predicate summaryPostUpdateNode(SummaryNode post, SummaryNode pre) { - exists(SummarizedCallable c, ParameterPosition pos | - isParameterPostUpdate(post, c, pos) and - pre = TSummaryParameterNode(c, pos) - ) - or - exists(SummarizedCallable callable, SummaryComponentStack s | - callbackInput(callable, s, _, _) and - pre = summaryNodeOutputState(callable, s) and - post = summaryNodeInputState(callable, s) - ) - } - - /** Holds if summary node `ret` is a return node of kind `rk`. */ - predicate summaryReturnNode(SummaryNode ret, ReturnKind rk) { - exists(SummaryComponentStack s | - ret = summaryNodeOutputState(_, s) and - s = TSingletonSummaryComponentStack(TReturnSummaryComponent(rk)) - ) - } - - /** - * Holds if flow is allowed to pass from parameter `p`, to a return - * node, and back out to `p`. - */ - predicate summaryAllowParameterReturnInSelf(ParamNode p) { - exists(SummarizedCallable c, ParameterPosition ppos | - p.isParameterOf(inject(c), pragma[only_bind_into](ppos)) - | - exists(SummaryComponentStack inputContents, SummaryComponentStack outputContents | - summary(c, inputContents, outputContents, _) and - inputContents.bottom() = pragma[only_bind_into](TArgumentSummaryComponent(ppos)) and - outputContents.bottom() = pragma[only_bind_into](TArgumentSummaryComponent(ppos)) - ) - ) - } - - /** Provides a compilation of flow summaries to atomic data-flow steps. */ - module Steps { - /** - * Holds if there is a local step from `pred` to `succ`, which is synthesized - * from a flow summary. - */ - predicate summaryLocalStep(SummaryNode pred, SummaryNode succ, boolean preservesValue) { - exists( - SummarizedCallable c, SummaryComponentStack inputContents, - SummaryComponentStack outputContents - | - summary(c, inputContents, outputContents, preservesValue) and - pred = summaryNodeInputState(c, inputContents) and - succ = summaryNodeOutputState(c, outputContents) - | - preservesValue = true - or - preservesValue = false and not summary(c, inputContents, outputContents, true) - ) - or - exists(SummarizedCallable c, SummaryComponentStack s | - pred = summaryNodeInputState(c, s.tail()) and - succ = summaryNodeInputState(c, s) and - s.head() = [SummaryComponent::withContent(_), SummaryComponent::withoutContent(_)] and - preservesValue = true - ) - } - - /** - * Holds if there is a read step of content `c` from `pred` to `succ`, which - * is synthesized from a flow summary. - */ - predicate summaryReadStep(SummaryNode pred, ContentSet c, SummaryNode succ) { - exists(SummarizedCallable sc, SummaryComponentStack s | - pred = summaryNodeInputState(sc, s.tail()) and - succ = summaryNodeInputState(sc, s) and - SummaryComponent::content(c) = s.head() - ) - } - - /** - * Holds if there is a store step of content `c` from `pred` to `succ`, which - * is synthesized from a flow summary. - */ - predicate summaryStoreStep(SummaryNode pred, ContentSet c, SummaryNode succ) { - exists(SummarizedCallable sc, SummaryComponentStack s | - pred = summaryNodeOutputState(sc, s) and - succ = summaryNodeOutputState(sc, s.tail()) and - SummaryComponent::content(c) = s.head() - ) - } - - /** - * Holds if there is a jump step from `pred` to `succ`, which is synthesized - * from a flow summary. - */ - predicate summaryJumpStep(SummaryNode pred, SummaryNode succ) { - exists(SummaryComponentStack s | - s = SummaryComponentStack::singleton(SummaryComponent::syntheticGlobal(_)) and - pred = summaryNodeOutputState(_, s) and - succ = summaryNodeInputState(_, s) - ) - } - - /** - * Holds if values stored inside content `c` are cleared at `n`. `n` is a - * synthesized summary node, so in order for values to be cleared at calls - * to the relevant method, it is important that flow does not pass over - * the argument, either via use-use flow or def-use flow. - * - * Example: - * - * ``` - * a.b = taint; - * a.clearB(); // assume we have a flow summary for `clearB` that clears `b` on the qualifier - * sink(a.b); - * ``` - * - * In the above, flow should not pass from `a` on the first line (or the second - * line) to `a` on the third line. Instead, there will be synthesized flow from - * `a` on line 2 to the post-update node for `a` on that line (via an intermediate - * node where field `b` is cleared). - */ - predicate summaryClearsContent(SummaryNode n, ContentSet c) { - exists(SummarizedCallable sc, SummaryNodeState state, SummaryComponentStack stack | - n = TSummaryInternalNode(sc, state) and - state.isInputState(sc, stack) and - stack.head() = SummaryComponent::withoutContent(c) - ) - } - - /** - * Holds if the value that is being tracked is expected to be stored inside - * content `c` at `n`. - */ - predicate summaryExpectsContent(SummaryNode n, ContentSet c) { - exists(SummarizedCallable sc, SummaryNodeState state, SummaryComponentStack stack | - n = TSummaryInternalNode(sc, state) and - state.isInputState(sc, stack) and - stack.head() = SummaryComponent::withContent(c) - ) - } - - pragma[noinline] - private predicate viableParam( - DataFlowCall call, SummarizedCallable sc, ParameterPosition ppos, SummaryParamNode p - ) { - exists(DataFlowCallable c | - c = inject(sc) and - p = TSummaryParameterNode(sc, ppos) and - c = viableCallable(call) - ) - } - - pragma[nomagic] - private SummaryParamNode summaryArgParam(DataFlowCall call, ArgNode arg, SummarizedCallable sc) { - exists(ParameterPosition ppos | - argumentPositionMatch(call, arg, ppos) and - viableParam(call, sc, ppos, result) - ) - } - - /** - * Holds if `p` can reach `n` in a summarized callable, using only value-preserving - * local steps. `clearsOrExpects` records whether any node on the path from `p` to - * `n` either clears or expects contents. - */ - private predicate paramReachesLocal(SummaryParamNode p, SummaryNode n, boolean clearsOrExpects) { - viableParam(_, _, _, p) and - n = p and - clearsOrExpects = false - or - exists(SummaryNode mid, boolean clearsOrExpectsMid | - paramReachesLocal(p, mid, clearsOrExpectsMid) and - summaryLocalStep(mid, n, true) and - if - summaryClearsContent(n, _) or - summaryExpectsContent(n, _) - then clearsOrExpects = true - else clearsOrExpects = clearsOrExpectsMid - ) - } - - /** - * Holds if use-use flow starting from `arg` should be prohibited. - * - * This is the case when `arg` is the argument of a call that targets a - * flow summary where the corresponding parameter either clears contents - * or expects contents. - */ - pragma[nomagic] - predicate prohibitsUseUseFlow(ArgNode arg, SummarizedCallable sc) { - exists(SummaryParamNode p, ParameterPosition ppos, SummaryNode ret | - paramReachesLocal(p, ret, true) and - p = summaryArgParam(_, arg, sc) and - p = TSummaryParameterNode(_, pragma[only_bind_into](ppos)) and - isParameterPostUpdate(ret, _, pragma[only_bind_into](ppos)) - ) - } - - pragma[nomagic] - private predicate summaryReturnNodeExt(SummaryNode ret, ReturnKindExt rk) { - summaryReturnNode(ret, rk.(ValueReturnKind).getKind()) - or - exists(SummaryParamNode p, SummaryNode pre, ParameterPosition pos | - paramReachesLocal(p, pre, _) and - summaryPostUpdateNode(ret, pre) and - p = TSummaryParameterNode(_, pos) and - rk.(ParamUpdateReturnKind).getPosition() = pos - ) - } - - bindingset[ret] - private SummaryParamNode summaryArgParamRetOut( - ArgNode arg, SummaryNode ret, OutNodeExt out, SummarizedCallable sc - ) { - exists(DataFlowCall call, ReturnKindExt rk | - result = summaryArgParam(call, arg, sc) and - summaryReturnNodeExt(ret, pragma[only_bind_into](rk)) and - out = pragma[only_bind_into](rk).getAnOutNode(call) - ) - } - - /** - * Holds if `arg` flows to `out` using a simple value-preserving flow - * summary, that is, a flow summary without reads and stores. - * - * NOTE: This step should not be used in global data-flow/taint-tracking, but may - * be useful to include in the exposed local data-flow/taint-tracking relations. - */ - predicate summaryThroughStepValue(ArgNode arg, Node out, SummarizedCallable sc) { - exists(ReturnKind rk, SummaryNode ret, DataFlowCall call | - summaryLocalStep(summaryArgParam(call, arg, sc), ret, true) and - summaryReturnNode(ret, pragma[only_bind_into](rk)) and - out = getAnOutNode(call, pragma[only_bind_into](rk)) - ) - } - - /** - * Holds if `arg` flows to `out` using a simple flow summary involving taint - * step, that is, a flow summary without reads and stores. - * - * NOTE: This step should not be used in global data-flow/taint-tracking, but may - * be useful to include in the exposed local data-flow/taint-tracking relations. - */ - predicate summaryThroughStepTaint(ArgNode arg, Node out, SummarizedCallable sc) { - exists(SummaryNode ret | - summaryLocalStep(summaryArgParamRetOut(arg, ret, out, sc), ret, false) - ) - } - - /** - * Holds if there is a read(+taint) of `c` from `arg` to `out` using a - * flow summary. - * - * NOTE: This step should not be used in global data-flow/taint-tracking, but may - * be useful to include in the exposed local data-flow/taint-tracking relations. - */ - predicate summaryGetterStep(ArgNode arg, ContentSet c, Node out, SummarizedCallable sc) { - exists(SummaryNode mid, SummaryNode ret | - summaryReadStep(summaryArgParamRetOut(arg, ret, out, sc), c, mid) and - summaryLocalStep(mid, ret, _) - ) - } - - /** - * Holds if there is a (taint+)store of `arg` into content `c` of `out` using a - * flow summary. - * - * NOTE: This step should not be used in global data-flow/taint-tracking, but may - * be useful to include in the exposed local data-flow/taint-tracking relations. - */ - predicate summarySetterStep(ArgNode arg, ContentSet c, Node out, SummarizedCallable sc) { - exists(SummaryNode mid, SummaryNode ret | - summaryLocalStep(summaryArgParamRetOut(arg, ret, out, sc), mid, _) and - summaryStoreStep(mid, c, ret) - ) - } - } - - /** - * Provides a means of translating externally (e.g., MaD) defined flow - * summaries into a `SummarizedCallable`s. - */ - module External { - /** Holds if `spec` is a relevant external specification. */ - private predicate relevantSpec(string spec) { - summaryElement(_, spec, _, _, _) or - summaryElement(_, _, spec, _, _) or - sourceElement(_, spec, _, _) or - sinkElement(_, spec, _, _) - } - - private class AccessPathRange extends AccessPath::Range { - AccessPathRange() { relevantSpec(this) } - } - - /** Holds if specification component `token` parses as parameter `pos`. */ - predicate parseParam(AccessPathToken token, ArgumentPosition pos) { - token.getName() = "Parameter" and - pos = parseParamBody(token.getAnArgument()) - } - - /** Holds if specification component `token` parses as argument `pos`. */ - predicate parseArg(AccessPathToken token, ParameterPosition pos) { - token.getName() = "Argument" and - pos = parseArgBody(token.getAnArgument()) - } - - /** Holds if specification component `token` parses as synthetic global `sg`. */ - predicate parseSynthGlobal(AccessPathToken token, string sg) { - token.getName() = "SyntheticGlobal" and - sg = token.getAnArgument() - } - - private class SyntheticGlobalFromAccessPath extends SummaryComponent::SyntheticGlobal { - SyntheticGlobalFromAccessPath() { parseSynthGlobal(_, this) } - } - - private SummaryComponent interpretComponent(AccessPathToken token) { - exists(ParameterPosition pos | - parseArg(token, pos) and result = SummaryComponent::argument(pos) - ) - or - exists(ArgumentPosition pos | - parseParam(token, pos) and result = SummaryComponent::parameter(pos) - ) - or - token = "ReturnValue" and result = SummaryComponent::return(getReturnValueKind()) - or - exists(string sg | - parseSynthGlobal(token, sg) and result = SummaryComponent::syntheticGlobal(sg) - ) - or - result = interpretComponentSpecific(token) - } - - /** - * Holds if `spec` specifies summary component stack `stack`. - */ - predicate interpretSpec(AccessPath spec, SummaryComponentStack stack) { - interpretSpec(spec, spec.getNumToken(), stack) - } - - /** Holds if the first `n` tokens of `spec` resolves to `stack`. */ - private predicate interpretSpec(AccessPath spec, int n, SummaryComponentStack stack) { - n = 1 and - stack = SummaryComponentStack::singleton(interpretComponent(spec.getToken(0))) - or - exists(SummaryComponent head, SummaryComponentStack tail | - interpretSpec(spec, n, head, tail) and - stack = SummaryComponentStack::push(head, tail) - ) - } - - /** Holds if the first `n` tokens of `spec` resolves to `head` followed by `tail` */ - private predicate interpretSpec( - AccessPath spec, int n, SummaryComponent head, SummaryComponentStack tail - ) { - interpretSpec(spec, n - 1, tail) and - head = interpretComponent(spec.getToken(n - 1)) - } - - private class MkStack extends RequiredSummaryComponentStack { - override predicate required(SummaryComponent head, SummaryComponentStack tail) { - interpretSpec(_, _, head, tail) - } - } - - private class SummarizedCallableExternal extends SummarizedCallable { - SummarizedCallableExternal() { summaryElement(this, _, _, _, _) } - - private predicate relevantSummaryElementGenerated( - AccessPath inSpec, AccessPath outSpec, string kind - ) { - exists(Provenance provenance | - provenance.isGenerated() and - summaryElement(this, inSpec, outSpec, kind, provenance) - ) and - not this.applyManualModel() - } - - private predicate relevantSummaryElement(AccessPath inSpec, AccessPath outSpec, string kind) { - exists(Provenance provenance | - provenance.isManual() and - summaryElement(this, inSpec, outSpec, kind, provenance) - ) - or - this.relevantSummaryElementGenerated(inSpec, outSpec, kind) - } - - override predicate propagatesFlow( - SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue - ) { - exists(AccessPath inSpec, AccessPath outSpec, string kind | - this.relevantSummaryElement(inSpec, outSpec, kind) and - interpretSpec(inSpec, input) and - interpretSpec(outSpec, output) - | - kind = "value" and preservesValue = true - or - kind = "taint" and preservesValue = false - ) - } - - override predicate hasProvenance(Provenance provenance) { - summaryElement(this, _, _, _, provenance) - } - } - - /** Holds if component `c` of specification `spec` cannot be parsed. */ - predicate invalidSpecComponent(AccessPath spec, string c) { - c = spec.getToken(_) and - not exists(interpretComponent(c)) - } - - /** Holds if `provenance` is not a valid provenance value. */ - bindingset[provenance] - predicate invalidProvenance(string provenance) { not provenance instanceof Provenance } - - /** - * Holds if token `part` of specification `spec` has an invalid index. - * E.g., `Argument[-1]`. - */ - predicate invalidIndexComponent(AccessPath spec, AccessPathToken part) { - part = spec.getToken(_) and - part.getName() = ["Parameter", "Argument"] and - AccessPath::parseInt(part.getArgumentList()) < 0 - } - - private predicate inputNeedsReference(AccessPathToken c) { - c.getName() = "Argument" or - inputNeedsReferenceSpecific(c) - } - - private predicate outputNeedsReference(AccessPathToken c) { - c.getName() = ["Argument", "ReturnValue"] or - outputNeedsReferenceSpecific(c) - } - - private predicate sourceElementRef(InterpretNode ref, AccessPath output, string kind) { - exists(SourceOrSinkElement e | - sourceElement(e, output, kind, _) and - if outputNeedsReference(output.getToken(0)) - then e = ref.getCallTarget() - else e = ref.asElement() - ) - } - - private predicate sinkElementRef(InterpretNode ref, AccessPath input, string kind) { - exists(SourceOrSinkElement e | - sinkElement(e, input, kind, _) and - if inputNeedsReference(input.getToken(0)) - then e = ref.getCallTarget() - else e = ref.asElement() - ) - } - - /** Holds if the first `n` tokens of `output` resolve to the given interpretation. */ - private predicate interpretOutput( - AccessPath output, int n, InterpretNode ref, InterpretNode node - ) { - sourceElementRef(ref, output, _) and - n = 0 and - ( - if output = "" - then - // Allow language-specific interpretation of the empty access path - interpretOutputSpecific("", ref, node) - else node = ref - ) - or - exists(InterpretNode mid, AccessPathToken c | - interpretOutput(output, n - 1, ref, mid) and - c = output.getToken(n - 1) - | - exists(ArgumentPosition apos, ParameterPosition ppos | - node.asNode().(PostUpdateNode).getPreUpdateNode().(ArgNode).argumentOf(mid.asCall(), apos) and - parameterMatch(ppos, apos) - | - c = "Argument" or parseArg(c, ppos) - ) - or - exists(ArgumentPosition apos, ParameterPosition ppos | - node.asNode().(ParamNode).isParameterOf(mid.asCallable(), ppos) and - parameterMatch(ppos, apos) - | - c = "Parameter" or parseParam(c, apos) - ) - or - c = "ReturnValue" and - node.asNode() = getAnOutNodeExt(mid.asCall(), TValueReturn(getReturnValueKind())) - or - interpretOutputSpecific(c, mid, node) - ) - } - - /** Holds if the first `n` tokens of `input` resolve to the given interpretation. */ - private predicate interpretInput(AccessPath input, int n, InterpretNode ref, InterpretNode node) { - sinkElementRef(ref, input, _) and - n = 0 and - ( - if input = "" - then - // Allow language-specific interpretation of the empty access path - interpretInputSpecific("", ref, node) - else node = ref - ) - or - exists(InterpretNode mid, AccessPathToken c | - interpretInput(input, n - 1, ref, mid) and - c = input.getToken(n - 1) - | - exists(ArgumentPosition apos, ParameterPosition ppos | - node.asNode().(ArgNode).argumentOf(mid.asCall(), apos) and - parameterMatch(ppos, apos) - | - c = "Argument" or parseArg(c, ppos) - ) - or - exists(ReturnNodeExt ret | - c = "ReturnValue" and - ret = node.asNode() and - ret.getKind().(ValueReturnKind).getKind() = getReturnValueKind() and - mid.asCallable() = getNodeEnclosingCallable(ret) - ) - or - interpretInputSpecific(c, mid, node) - ) - } - - /** - * Holds if `node` is specified as a source with the given kind in a MaD flow - * model. - */ - predicate isSourceNode(InterpretNode node, string kind) { - exists(InterpretNode ref, AccessPath output | - sourceElementRef(ref, output, kind) and - interpretOutput(output, output.getNumToken(), ref, node) - ) - } - - /** - * Holds if `node` is specified as a sink with the given kind in a MaD flow - * model. - */ - predicate isSinkNode(InterpretNode node, string kind) { - exists(InterpretNode ref, AccessPath input | - sinkElementRef(ref, input, kind) and - interpretInput(input, input.getNumToken(), ref, node) - ) - } - } - - /** Provides a query predicate for outputting a set of relevant flow summaries. */ - module TestOutput { - /** A flow summary to include in the `summary/1` query predicate. */ - abstract class RelevantSummarizedCallable instanceof SummarizedCallable { - /** Gets the string representation of this callable used by `summary/1`. */ - abstract string getCallableCsv(); - - /** Holds if flow is propagated between `input` and `output`. */ - predicate relevantSummary( - SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue - ) { - super.propagatesFlow(input, output, preservesValue) - } - - string toString() { result = super.toString() } - } - - /** A model to include in the `neutral/1` query predicate. */ - abstract class RelevantNeutralCallable instanceof NeutralCallable { - /** Gets the string representation of this callable used by `neutral/1`. */ - abstract string getCallableCsv(); - - /** - * Gets the kind of the neutral. - */ - string getKind() { result = super.getKind() } - - string toString() { result = super.toString() } - } - - /** Render the kind in the format used in flow summaries. */ - private string renderKind(boolean preservesValue) { - preservesValue = true and result = "value" - or - preservesValue = false and result = "taint" - } - - private string renderProvenance(SummarizedCallable c) { - if c.applyManualModel() then result = "manual" else c.hasProvenance(result) - } - - private string renderProvenanceNeutral(NeutralCallable c) { - if c.hasManualModel() then result = "manual" else c.hasProvenance(result) - } - - /** - * A query predicate for outputting flow summaries in semi-colon separated format in QL tests. - * The syntax is: "namespace;type;overrides;name;signature;ext;inputspec;outputspec;kind;provenance", - * ext is hardcoded to empty. - */ - query predicate summary(string csv) { - exists( - RelevantSummarizedCallable c, SummaryComponentStack input, SummaryComponentStack output, - boolean preservesValue - | - c.relevantSummary(input, output, preservesValue) and - csv = - c.getCallableCsv() // Callable information - + input.getMadRepresentation() + ";" // input - + output.getMadRepresentation() + ";" // output - + renderKind(preservesValue) + ";" // kind - + renderProvenance(c) // provenance - ) - } - - /** - * Holds if a neutral model `csv` exists (semi-colon separated format). Used for testing purposes. - * The syntax is: "namespace;type;name;signature;kind;provenance"", - */ - query predicate neutral(string csv) { - exists(RelevantNeutralCallable c | - csv = - c.getCallableCsv() // Callable information - + c.getKind() + ";" // kind - + renderProvenanceNeutral(c) // provenance - ) - } - } - - /** - * Provides query predicates for rendering the generated data flow graph for - * a summarized callable. - * - * Import this module into a `.ql` file of `@kind graph` to render the graph. - * The graph is restricted to callables from `RelevantSummarizedCallable`. - */ - module RenderSummarizedCallable { - /** A summarized callable to include in the graph. */ - abstract class RelevantSummarizedCallable instanceof SummarizedCallable { - string toString() { result = super.toString() } - } - - private newtype TNodeOrCall = - MkNode(SummaryNode n) { - exists(RelevantSummarizedCallable c | - n = TSummaryInternalNode(c, _) - or - n = TSummaryParameterNode(c, _) - ) - } or - MkCall(DataFlowCall call) { - call = summaryDataFlowCall(_) and - call.getEnclosingCallable() = inject(any(RelevantSummarizedCallable c)) - } - - private class NodeOrCall extends TNodeOrCall { - SummaryNode asNode() { this = MkNode(result) } - - DataFlowCall asCall() { this = MkCall(result) } - - string toString() { - result = this.asNode().toString() - or - result = this.asCall().toString() - } - - /** - * Holds if this element is at the specified location. - * The location spans column `startcolumn` of line `startline` to - * column `endcolumn` of line `endline` in file `filepath`. - * For more information, see - * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). - */ - predicate hasLocationInfo( - string filepath, int startline, int startcolumn, int endline, int endcolumn - ) { - filepath = "" and - startline = 0 and - startcolumn = 0 and - endline = 0 and - endcolumn = 0 - } - } - - query predicate nodes(NodeOrCall n, string key, string val) { - key = "semmle.label" and val = n.toString() - } - - private predicate edgesComponent(NodeOrCall a, NodeOrCall b, string value) { - exists(boolean preservesValue | - Private::Steps::summaryLocalStep(a.asNode(), b.asNode(), preservesValue) and - if preservesValue = true then value = "value" else value = "taint" - ) - or - exists(ContentSet c | - Private::Steps::summaryReadStep(a.asNode(), c, b.asNode()) and - value = "read (" + c + ")" - or - Private::Steps::summaryStoreStep(a.asNode(), c, b.asNode()) and - value = "store (" + c + ")" - or - Private::Steps::summaryClearsContent(a.asNode(), c) and - b = a and - value = "clear (" + c + ")" - or - Private::Steps::summaryExpectsContent(a.asNode(), c) and - b = a and - value = "expect (" + c + ")" - ) - or - summaryPostUpdateNode(b.asNode(), a.asNode()) and - value = "post-update" - or - b.asCall() = summaryDataFlowCall(a.asNode()) and - value = "receiver" - or - exists(ArgumentPosition pos | - summaryArgumentNode(b.asCall(), a.asNode(), pos) and - value = "argument (" + pos + ")" - ) - } - - query predicate edges(NodeOrCall a, NodeOrCall b, string key, string value) { - key = "semmle.label" and - value = strictconcat(string s | edgesComponent(a, b, s) | s, " / ") - } + override predicate hasProvenance(Public::Provenance provenance) { + RecordConstructorFlow.super.hasProvenance(provenance) } } + +private class RecordConstructorFlowRequiredSummaryComponentStack extends Impl::Private::RequiredSummaryComponentStack +{ + override predicate required( + Impl::Private::SummaryComponent head, Impl::Private::SummaryComponentStack tail + ) { + exists(Property p | + recordConstructorFlow(_, _, p) and + head = Private::SummaryComponent::property(p) and + tail = Private::SummaryComponentStack::return() + ) + } +} + +private import semmle.code.csharp.frameworks.system.linq.Expressions + +private predicate mayInvokeCallback(Callable c, int n) { + c.getParameter(n).getType() instanceof SystemLinqExpressions::DelegateExtType and + not c.hasBody() and + (if c instanceof Accessor then not c.fromSource() else any()) +} + +private class SummarizedCallableWithCallback extends Public::SummarizedCallable { + private int pos; + + SummarizedCallableWithCallback() { mayInvokeCallback(this, pos) } + + override predicate propagatesFlow(string input, string output, boolean preservesValue) { + input = "Argument[" + pos + "]" and + output = "Argument[" + pos + "].Parameter[delegate-self]" and + preservesValue = true + } + + override predicate hasProvenance(Public::Provenance provenance) { provenance = "hq-generated" } +} diff --git a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/FlowSummaryImplSpecific.qll b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/FlowSummaryImplSpecific.qll deleted file mode 100644 index eadad66d755..00000000000 --- a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/FlowSummaryImplSpecific.qll +++ /dev/null @@ -1,340 +0,0 @@ -/** - * Provides C# specific classes and predicates for defining flow summaries. - */ - -private import csharp -private import dotnet -private import semmle.code.csharp.frameworks.system.linq.Expressions -private import DataFlowDispatch -private import DataFlowPrivate -private import DataFlowPublic -private import DataFlowImplCommon -private import FlowSummaryImpl::Private -private import FlowSummaryImpl::Public -private import semmle.code.csharp.Unification -private import ExternalFlow -private import semmle.code.csharp.dataflow.FlowSummary as FlowSummary - -/** - * A class of callables that are candidates for flow summary modeling. - */ -class SummarizedCallableBase = UnboundCallable; - -/** - * A class of callables that are candidates for neutral modeling. - */ -class NeutralCallableBase = UnboundCallable; - -/** - * A module for importing frameworks that define synthetic globals. - */ -private module SyntheticGlobals { - private import semmle.code.csharp.frameworks.EntityFramework -} - -DataFlowCallable inject(SummarizedCallable c) { result.asSummarizedCallable() = c } - -/** Gets the parameter position of the instance parameter. */ -ArgumentPosition callbackSelfParameterPosition() { result.isDelegateSelf() } - -/** Gets the synthesized data-flow call for `receiver`. */ -SummaryCall summaryDataFlowCall(SummaryNode receiver) { receiver = result.getReceiver() } - -/** Gets the type of content `c`. */ -DataFlowType getContentType(Content c) { - exists(Type t | result.asGvnType() = Gvn::getGlobalValueNumber(t) | - t = c.(FieldContent).getField().getType() - or - t = c.(PropertyContent).getProperty().getType() - or - t = c.(SyntheticFieldContent).getField().getType() - or - c instanceof ElementContent and - t instanceof ObjectType // we don't know what the actual element type is - ) -} - -/** Gets the type of the parameter at the given position. */ -DataFlowType getParameterType(SummarizedCallable c, ParameterPosition pos) { - exists(Type t | result.asGvnType() = Gvn::getGlobalValueNumber(t) | - exists(int i | - pos.getPosition() = i and - t = c.getParameter(i).getType() - ) - or - pos.isThisParameter() and - t = c.getDeclaringType() - ) -} - -/** Gets the return type of kind `rk` for callable `c`. */ -DataFlowType getReturnType(DotNet::Callable c, ReturnKind rk) { - exists(Type t | result.asGvnType() = Gvn::getGlobalValueNumber(t) | - rk instanceof NormalReturnKind and - ( - t = c.(Constructor).getDeclaringType() - or - not c instanceof Constructor and - t = c.getReturnType() - ) - or - t = c.getParameter(rk.(OutRefReturnKind).getPosition()).getType() - ) -} - -/** - * Gets the type of the parameter matching arguments at position `pos` in a - * synthesized call that targets a callback of type `t`. - */ -DataFlowType getCallbackParameterType(DataFlowType t, ArgumentPosition pos) { - exists(SystemLinqExpressions::DelegateExtType dt | - t.asGvnType() = Gvn::getGlobalValueNumber(dt) and - result.asGvnType() = - Gvn::getGlobalValueNumber(dt.getDelegateType().getParameter(pos.getPosition()).getType()) - ) - or - pos.isDelegateSelf() and - result = t -} - -/** - * Gets the return type of kind `rk` in a synthesized call that targets a - * callback of type `t`. - */ -DataFlowType getCallbackReturnType(DataFlowType t, ReturnKind rk) { - rk instanceof NormalReturnKind and - exists(SystemLinqExpressions::DelegateExtType dt | - t.asGvnType() = Gvn::getGlobalValueNumber(dt) and - result.asGvnType() = Gvn::getGlobalValueNumber(dt.getDelegateType().getReturnType()) - ) -} - -/** Gets the type of synthetic global `sg`. */ -DataFlowType getSyntheticGlobalType(SummaryComponent::SyntheticGlobal sg) { - exists(sg) and - result.asGvnType() = Gvn::getGlobalValueNumber(any(ObjectType t)) -} - -/** - * Holds if an external flow summary exists for `c` with input specification - * `input`, output specification `output`, kind `kind`, and provenance `provenance`. - */ -predicate summaryElement(Callable c, string input, string output, string kind, string provenance) { - exists( - string namespace, string type, boolean subtypes, string name, string signature, string ext - | - summaryModel(namespace, type, subtypes, name, signature, ext, input, output, kind, provenance) and - c = interpretElement(namespace, type, subtypes, name, signature, ext) - ) -} - -/** - * Holds if a neutral model exists for `c` of kind `kind` - * and with provenance `provenance`. - */ -predicate neutralElement(Callable c, string kind, string provenance) { - exists(string namespace, string type, string name, string signature | - neutralModel(namespace, type, name, signature, kind, provenance) and - c = interpretElement(namespace, type, false, name, signature, "") - ) -} - -/** - * Holds if an external source specification exists for `e` with output specification - * `output`, kind `kind`, and provenance `provenance`. - */ -predicate sourceElement(Element e, string output, string kind, string provenance) { - exists( - string namespace, string type, boolean subtypes, string name, string signature, string ext - | - sourceModel(namespace, type, subtypes, name, signature, ext, output, kind, provenance) and - e = interpretElement(namespace, type, subtypes, name, signature, ext) - ) -} - -/** - * Holds if an external sink specification exists for `e` with input specification - * `input`, kind `kind` and provenance `provenance`. - */ -predicate sinkElement(Element e, string input, string kind, string provenance) { - exists( - string namespace, string type, boolean subtypes, string name, string signature, string ext - | - sinkModel(namespace, type, subtypes, name, signature, ext, input, kind, provenance) and - e = interpretElement(namespace, type, subtypes, name, signature, ext) - ) -} - -/** Gets the summary component for specification component `c`, if any. */ -bindingset[c] -SummaryComponent interpretComponentSpecific(AccessPathToken c) { - c = "Element" and result = SummaryComponent::content(any(ElementContent ec)) - or - c = "WithoutElement" and result = SummaryComponent::withoutContent(any(ElementContent ec)) - or - c = "WithElement" and result = SummaryComponent::withContent(any(ElementContent ec)) - or - // Qualified names may contain commas,such as in `Tuple<,>`, so get the entire argument list - // rather than an individual argument. - exists(Field f | - c.getName() = "Field" and - c.getArgumentList() = f.getFullyQualifiedName() and - result = SummaryComponent::content(any(FieldContent fc | fc.getField() = f)) - ) - or - exists(Property p | - c.getName() = "Property" and - c.getArgumentList() = p.getFullyQualifiedName() and - result = SummaryComponent::content(any(PropertyContent pc | pc.getProperty() = p)) - ) - or - exists(SyntheticField f | - c.getAnArgument("SyntheticField") = f and - result = SummaryComponent::content(any(SyntheticFieldContent sfc | sfc.getField() = f)) - ) -} - -/** Gets the textual representation of the content in the format used for MaD models. */ -private string getContentSpecific(Content c) { - c = TElementContent() and result = "Element" - or - exists(Field f | c = TFieldContent(f) and result = "Field[" + f.getFullyQualifiedName() + "]") - or - exists(Property p | - c = TPropertyContent(p) and result = "Property[" + p.getFullyQualifiedName() + "]" - ) - or - exists(SyntheticField f | c = TSyntheticFieldContent(f) and result = "SyntheticField[" + f + "]") -} - -/** Gets the textual representation of a summary component in the format used for MaD models. */ -string getMadRepresentationSpecific(SummaryComponent sc) { - exists(Content c | sc = TContentSummaryComponent(c) and result = getContentSpecific(c)) - or - sc = TWithoutContentSummaryComponent(_) and result = "WithoutElement" - or - sc = TWithContentSummaryComponent(_) and result = "WithElement" - or - exists(OutRefReturnKind rk | - sc = TReturnSummaryComponent(rk) and - result = "Argument[" + rk.getPosition() + "]" - ) -} - -/** Gets the textual representation of a parameter position in the format used for flow summaries. */ -string getParameterPosition(ParameterPosition pos) { - result = pos.getPosition().toString() - or - pos.isThisParameter() and - result = "this" - or - pos.isDelegateSelf() and - result = "delegate-self" -} - -/** Gets the textual representation of an argument position in the format used for flow summaries. */ -string getArgumentPosition(ArgumentPosition pos) { - result = pos.getPosition().toString() - or - pos.isQualifier() and - result = "this" - or - pos.isDelegateSelf() and - result = "delegate-self" -} - -/** Holds if input specification component `c` needs a reference. */ -predicate inputNeedsReferenceSpecific(string c) { none() } - -/** Holds if output specification component `c` needs a reference. */ -predicate outputNeedsReferenceSpecific(string c) { none() } - -class SourceOrSinkElement = Element; - -/** Gets the return kind corresponding to specification `"ReturnValue"`. */ -NormalReturnKind getReturnValueKind() { any() } - -private newtype TInterpretNode = - TElement_(Element n) or - TNode_(Node n) or - TDataFlowCall_(DataFlowCall c) - -/** An entity used to interpret a source/sink specification. */ -class InterpretNode extends TInterpretNode { - /** Gets the element that this node corresponds to, if any. */ - SourceOrSinkElement asElement() { this = TElement_(result) } - - /** Gets the data-flow node that this node corresponds to, if any. */ - Node asNode() { this = TNode_(result) } - - /** Gets the call that this node corresponds to, if any. */ - DataFlowCall asCall() { this = TDataFlowCall_(result) } - - /** Gets the callable that this node corresponds to, if any. */ - DataFlowCallable asCallable() { result.getUnderlyingCallable() = this.asElement() } - - /** Gets the target of this call, if any. */ - Callable getCallTarget() { result = this.asCall().(NonDelegateDataFlowCall).getATarget(_) } - - /** Gets a textual representation of this node. */ - string toString() { - result = this.asElement().toString() - or - result = this.asNode().toString() - or - result = this.asCall().toString() - } - - /** Gets the location of this node. */ - Location getLocation() { - result = this.asElement().getLocation() - or - result = this.asNode().getLocation() - or - result = this.asCall().getLocation() - } -} - -/** Provides additional sink specification logic required for attributes. */ -predicate interpretOutputSpecific(string c, InterpretNode mid, InterpretNode node) { - exists(Node n | n = node.asNode() | - (c = "Parameter" or c = "") and - n.asParameter() = mid.asElement() - or - c = "" and - n.asExpr().(AssignableRead).getTarget().getUnboundDeclaration() = mid.asElement() - ) -} - -/** Provides additional sink specification logic required for attributes. */ -predicate interpretInputSpecific(string c, InterpretNode mid, InterpretNode n) { - c = "" and - exists(Assignable a | - n.asNode().asExpr() = a.getAnAssignedValue() and - a.getUnboundDeclaration() = mid.asElement() - ) -} - -/** Gets the argument position obtained by parsing `X` in `Parameter[X]`. */ -bindingset[s] -ArgumentPosition parseParamBody(string s) { - result.getPosition() = AccessPath::parseInt(s) - or - s = "this" and - result.isQualifier() - or - s = "delegate-self" and - result.isDelegateSelf() -} - -/** Gets the parameter position obtained by parsing `X` in `Argument[X]`. */ -bindingset[s] -ParameterPosition parseArgBody(string s) { - result.getPosition() = AccessPath::parseInt(s) - or - s = "this" and - result.isThisParameter() - or - s = "delegate-self" and - result.isDelegateSelf() -} diff --git a/csharp/ql/lib/semmle/code/csharp/frameworks/EntityFramework.qll b/csharp/ql/lib/semmle/code/csharp/frameworks/EntityFramework.qll index 362f73b3612..28cab54928b 100644 --- a/csharp/ql/lib/semmle/code/csharp/frameworks/EntityFramework.qll +++ b/csharp/ql/lib/semmle/code/csharp/frameworks/EntityFramework.qll @@ -9,7 +9,8 @@ private import semmle.code.csharp.frameworks.System private import semmle.code.csharp.frameworks.system.data.Entity private import semmle.code.csharp.frameworks.system.collections.Generic private import semmle.code.csharp.frameworks.Sql -private import semmle.code.csharp.dataflow.FlowSummary +private import semmle.code.csharp.dataflow.internal.FlowSummaryImpl::Public +private import semmle.code.csharp.dataflow.internal.FlowSummaryImpl::Private private import semmle.code.csharp.dataflow.internal.DataFlowPrivate as DataFlowPrivate /** @@ -85,9 +86,23 @@ module EntityFramework { } /** A flow summary for EntityFramework. */ - abstract class EFSummarizedCallable extends SummarizedCallable { + abstract class EFSummarizedCallable extends SummarizedCallableImpl { bindingset[this] EFSummarizedCallable() { any() } + + override predicate hasProvenance(Provenance provenance) { provenance = "manual" } + } + + // see `SummarizedCallableImpl` qldoc + private class EFSummarizedCallableAdapter extends SummarizedCallable instanceof EFSummarizedCallable + { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { + none() + } + + override predicate hasProvenance(Provenance provenance) { + EFSummarizedCallable.super.hasProvenance(provenance) + } } /** The class ``Microsoft.EntityFrameworkCore.DbQuery`1`` or ``System.Data.Entity.DbQuery`1``. */ diff --git a/csharp/ql/test/library-tests/dataflow/external-models/validatemodels.ql b/csharp/ql/test/library-tests/dataflow/external-models/validatemodels.ql index c538d5c29e1..2ae4c43ac82 100644 --- a/csharp/ql/test/library-tests/dataflow/external-models/validatemodels.ql +++ b/csharp/ql/test/library-tests/dataflow/external-models/validatemodels.ql @@ -1,15 +1,2 @@ import csharp -import semmle.code.csharp.dataflow.internal.ExternalFlow -import semmle.code.csharp.dataflow.internal.AccessPathSyntax -import ModelValidation - -private predicate getRelevantAccessPath(string path) { - summaryModel(_, _, _, _, _, _, path, _, _, _) or - summaryModel(_, _, _, _, _, _, _, path, _, _) or - sinkModel(_, _, _, _, _, _, path, _, _) or - sourceModel(_, _, _, _, _, _, path, _, _) -} - -private class AccessPathsExternal extends AccessPath::Range { - AccessPathsExternal() { getRelevantAccessPath(this) } -} +import semmle.code.csharp.dataflow.internal.ExternalFlow::ModelValidation diff --git a/csharp/ql/test/library-tests/dataflow/library/FlowSummaries.ql b/csharp/ql/test/library-tests/dataflow/library/FlowSummaries.ql index 82889478a1f..ee3ac53742a 100644 --- a/csharp/ql/test/library-tests/dataflow/library/FlowSummaries.ql +++ b/csharp/ql/test/library-tests/dataflow/library/FlowSummaries.ql @@ -1,13 +1,11 @@ -private import semmle.code.csharp.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl -private import semmle.code.csharp.dataflow.internal.ExternalFlow import shared.FlowSummaries +import semmle.code.csharp.dataflow.internal.ExternalFlow private class IncludeAllSummarizedCallable extends IncludeSummarizedCallable { IncludeAllSummarizedCallable() { exists(this) } } -private class IncludeNeutralSummarizedCallable extends RelevantNeutralCallable instanceof FlowSummaryImpl::Public::NeutralSummaryCallable -{ +private class IncludeNeutralSummarizedCallable extends RelevantNeutralCallable { /** Gets a string representing the callable in semi-colon separated format for use in flow summaries. */ final override string getCallableCsv() { result = asPartialNeutralModel(this) } } diff --git a/csharp/ql/test/library-tests/dataflow/library/FlowSummariesFiltered.ql b/csharp/ql/test/library-tests/dataflow/library/FlowSummariesFiltered.ql index fcfe037f1ed..c3584afcbc3 100644 --- a/csharp/ql/test/library-tests/dataflow/library/FlowSummariesFiltered.ql +++ b/csharp/ql/test/library-tests/dataflow/library/FlowSummariesFiltered.ql @@ -1,8 +1,9 @@ import shared.FlowSummaries private import semmle.code.csharp.dataflow.internal.ExternalFlow -class IncludeFilteredSummarizedCallable extends IncludeSummarizedCallable instanceof SummarizedCallable -{ +class IncludeFilteredSummarizedCallable extends IncludeSummarizedCallable { + IncludeFilteredSummarizedCallable() { exists(this) } + /** * Holds if flow is propagated between `input` and `output` and * if there is no summary for a callable in a `base` class or interface @@ -14,7 +15,7 @@ class IncludeFilteredSummarizedCallable extends IncludeSummarizedCallable instan super.propagatesFlow(input, output, preservesValue) and not exists(IncludeSummarizedCallable rsc | isBaseCallableOrPrototype(rsc) and - rsc.(SummarizedCallable).propagatesFlow(input, output, preservesValue) and + rsc.propagatesFlow(input, output, preservesValue) and this.(UnboundCallable).overridesOrImplementsUnbound(rsc) ) } diff --git a/csharp/ql/test/library-tests/frameworks/EntityFramework/FlowSummaries.ql b/csharp/ql/test/library-tests/frameworks/EntityFramework/FlowSummaries.ql index f2d0a1eadc5..bef72ca30a1 100644 --- a/csharp/ql/test/library-tests/frameworks/EntityFramework/FlowSummaries.ql +++ b/csharp/ql/test/library-tests/frameworks/EntityFramework/FlowSummaries.ql @@ -1,5 +1,6 @@ -import semmle.code.csharp.frameworks.EntityFramework::EntityFramework +import csharp import shared.FlowSummaries +import semmle.code.csharp.frameworks.EntityFramework::EntityFramework import semmle.code.csharp.dataflow.internal.ExternalFlow as ExternalFlow private class IncludeEFSummarizedCallable extends IncludeSummarizedCallable instanceof EFSummarizedCallable diff --git a/csharp/ql/test/shared/FlowSummaries.qll b/csharp/ql/test/shared/FlowSummaries.qll index c650052f903..83a1530b028 100644 --- a/csharp/ql/test/shared/FlowSummaries.qll +++ b/csharp/ql/test/shared/FlowSummaries.qll @@ -1,4 +1,5 @@ -import semmle.code.csharp.dataflow.FlowSummary +import semmle.code.csharp.dataflow.internal.FlowSummaryImpl::Private +import semmle.code.csharp.dataflow.internal.FlowSummaryImpl::Public import semmle.code.csharp.dataflow.internal.FlowSummaryImpl::Private::TestOutput private import semmle.code.csharp.dataflow.internal.ExternalFlow diff --git a/go/ql/lib/semmle/go/dataflow/ExternalFlow.qll b/go/ql/lib/semmle/go/dataflow/ExternalFlow.qll index 4b72ca8125a..cacad869509 100644 --- a/go/ql/lib/semmle/go/dataflow/ExternalFlow.qll +++ b/go/ql/lib/semmle/go/dataflow/ExternalFlow.qll @@ -76,10 +76,10 @@ private import go import internal.ExternalFlowExtensions private import internal.DataFlowPrivate +private import internal.FlowSummaryImpl +private import internal.FlowSummaryImpl::Private private import internal.FlowSummaryImpl::Private::External -private import internal.FlowSummaryImplSpecific -private import internal.AccessPathSyntax -private import FlowSummary +private import internal.FlowSummaryImpl::Public private import codeql.mad.ModelValidation as SharedModelVal /** Holds if `package` have MaD framework coverage. */ @@ -274,7 +274,7 @@ private string interpretPackage(string p) { } /** Gets the source/sink/summary element corresponding to the supplied parameters. */ -SourceOrSinkElement interpretElement( +SourceSinkInterpretationInput::SourceOrSinkElement interpretElement( string pkg, string type, boolean subtypes, string name, string signature, string ext ) { elementSpec(pkg, type, subtypes, name, signature, ext) and @@ -298,8 +298,9 @@ SourceOrSinkElement interpretElement( predicate hasExternalSpecification(Function f) { f = any(SummarizedCallable sc).asFunction() or - exists(SourceOrSinkElement e | f = e.asEntity() | - sourceElement(e, _, _, _) or sinkElement(e, _, _, _) + exists(SourceSinkInterpretationInput::SourceOrSinkElement e | f = e.asEntity() | + SourceSinkInterpretationInput::sourceElement(e, _, _) or + SourceSinkInterpretationInput::sinkElement(e, _, _) ) } @@ -353,7 +354,9 @@ private module Cached { */ cached predicate sourceNode(DataFlow::Node node, string kind) { - exists(InterpretNode n | isSourceNode(n, kind) and n.asNode() = node) + exists(SourceSinkInterpretationInput::InterpretNode n | + isSourceNode(n, kind) and n.asNode() = node + ) } /** @@ -362,8 +365,73 @@ private module Cached { */ cached predicate sinkNode(DataFlow::Node node, string kind) { - exists(InterpretNode n | isSinkNode(n, kind) and n.asNode() = node) + exists(SourceSinkInterpretationInput::InterpretNode n | + isSinkNode(n, kind) and n.asNode() = node + ) } } import Cached + +private predicate interpretSummary( + Callable c, string input, string output, string kind, string provenance +) { + exists( + string namespace, string type, boolean subtypes, string name, string signature, string ext + | + summaryModel(namespace, type, subtypes, name, signature, ext, input, output, kind, provenance) and + c.asFunction() = interpretElement(namespace, type, subtypes, name, signature, ext).asEntity() + ) +} + +// adapter class for converting Mad summaries to `SummarizedCallable`s +private class SummarizedCallableAdapter extends SummarizedCallable { + SummarizedCallableAdapter() { interpretSummary(this, _, _, _, _) } + + private predicate relevantSummaryElementManual(string input, string output, string kind) { + exists(Provenance provenance | + interpretSummary(this, input, output, kind, provenance) and + provenance.isManual() + ) + } + + private predicate relevantSummaryElementGenerated(string input, string output, string kind) { + exists(Provenance provenance | + interpretSummary(this, input, output, kind, provenance) and + provenance.isGenerated() + ) + } + + override predicate propagatesFlow(string input, string output, boolean preservesValue) { + exists(string kind | + this.relevantSummaryElementManual(input, output, kind) + or + not this.relevantSummaryElementManual(_, _, _) and + this.relevantSummaryElementGenerated(input, output, kind) + | + if kind = "value" then preservesValue = true else preservesValue = false + ) + } + + override predicate hasProvenance(Provenance provenance) { + interpretSummary(this, _, _, _, provenance) + } +} + +// adapter class for converting Mad neutrals to `NeutralCallable`s +private class NeutralCallableAdapter extends NeutralCallable { + string kind; + string provenance_; + + NeutralCallableAdapter() { + // Neutral models have not been implemented for Go. + none() and + exists(this) and + exists(kind) and + exists(provenance_) + } + + override string getKind() { result = kind } + + override predicate hasProvenance(Provenance provenance) { provenance = provenance_ } +} diff --git a/go/ql/lib/semmle/go/dataflow/FlowSummary.qll b/go/ql/lib/semmle/go/dataflow/FlowSummary.qll index 271e185a7f6..f38cfafc056 100644 --- a/go/ql/lib/semmle/go/dataflow/FlowSummary.qll +++ b/go/ql/lib/semmle/go/dataflow/FlowSummary.qll @@ -10,40 +10,14 @@ private import internal.DataFlowUtil // import all instances below private module Summaries { } -class SummaryComponent = Impl::Public::SummaryComponent; +deprecated class SummaryComponent = Impl::Private::SummaryComponent; -/** Provides predicates for constructing summary components. */ -module SummaryComponent { - import Impl::Public::SummaryComponent +deprecated module SummaryComponent = Impl::Private::SummaryComponent; - /** Gets a summary component that represents a qualifier. */ - SummaryComponent qualifier() { result = argument(-1) } +deprecated class SummaryComponentStack = Impl::Private::SummaryComponentStack; - /** Gets a summary component for field `f`. */ - SummaryComponent field(Field f) { result = content(any(FieldContent c | c.getField() = f)) } - - /** Gets a summary component that represents the return value of a call. */ - SummaryComponent return() { result = return(_) } -} - -class SummaryComponentStack = Impl::Public::SummaryComponentStack; - -/** Provides predicates for constructing stacks of summary components. */ -module SummaryComponentStack { - import Impl::Public::SummaryComponentStack - - /** Gets a singleton stack representing a qualifier. */ - SummaryComponentStack qualifier() { result = singleton(SummaryComponent::qualifier()) } - - /** Gets a stack representing a field `f` of `object`. */ - SummaryComponentStack fieldOf(Field f, SummaryComponentStack object) { - result = push(SummaryComponent::field(f), object) - } - - /** Gets a singleton stack representing a (normal) return. */ - SummaryComponentStack return() { result = singleton(SummaryComponent::return()) } -} +deprecated module SummaryComponentStack = Impl::Private::SummaryComponentStack; class SummarizedCallable = Impl::Public::SummarizedCallable; -class RequiredSummaryComponentStack = Impl::Public::RequiredSummaryComponentStack; +deprecated class RequiredSummaryComponentStack = Impl::Private::RequiredSummaryComponentStack; diff --git a/go/ql/lib/semmle/go/dataflow/internal/AccessPathSyntax.qll b/go/ql/lib/semmle/go/dataflow/internal/AccessPathSyntax.qll deleted file mode 100644 index 0c3dc8427b2..00000000000 --- a/go/ql/lib/semmle/go/dataflow/internal/AccessPathSyntax.qll +++ /dev/null @@ -1,182 +0,0 @@ -/** - * Module for parsing access paths from MaD models, both the identifying access path used - * by dynamic languages, and the input/output specifications for summary steps. - * - * This file is used by the shared data flow library and by the JavaScript libraries - * (which does not use the shared data flow libraries). - */ - -/** - * Convenience-predicate for extracting two capture groups at once. - */ -bindingset[input, regexp] -private predicate regexpCaptureTwo(string input, string regexp, string capture1, string capture2) { - capture1 = input.regexpCapture(regexp, 1) and - capture2 = input.regexpCapture(regexp, 2) -} - -/** Companion module to the `AccessPath` class. */ -module AccessPath { - /** A string that should be parsed as an access path. */ - abstract class Range extends string { - bindingset[this] - Range() { any() } - } - - /** - * Parses an integer constant `n` or interval `n1..n2` (inclusive) and gets the value - * of the constant or any value contained in the interval. - */ - bindingset[arg] - int parseInt(string arg) { - result = arg.toInt() - or - // Match "n1..n2" - exists(string lo, string hi | - regexpCaptureTwo(arg, "(-?\\d+)\\.\\.(-?\\d+)", lo, hi) and - result = [lo.toInt() .. hi.toInt()] - ) - } - - /** - * Parses a lower-bounded interval `n..` and gets the lower bound. - */ - bindingset[arg] - int parseLowerBound(string arg) { result = arg.regexpCapture("(-?\\d+)\\.\\.", 1).toInt() } - - /** - * Parses an integer constant or interval (bounded or unbounded) that explicitly - * references the arity, such as `N-1` or `N-3..N-1`. - * - * Note that expressions of form `N-x` will never resolve to a negative index, - * even if `N` is zero (it will have no result in that case). - */ - bindingset[arg, arity] - private int parseIntWithExplicitArity(string arg, int arity) { - result >= 0 and // do not allow N-1 to resolve to a negative index - exists(string lo | - // N-x - lo = arg.regexpCapture("N-(\\d+)", 1) and - result = arity - lo.toInt() - or - // N-x.. - lo = arg.regexpCapture("N-(\\d+)\\.\\.", 1) and - result = [arity - lo.toInt(), arity - 1] - ) - or - exists(string lo, string hi | - // x..N-y - regexpCaptureTwo(arg, "(-?\\d+)\\.\\.N-(\\d+)", lo, hi) and - result = [lo.toInt() .. arity - hi.toInt()] - or - // N-x..N-y - regexpCaptureTwo(arg, "N-(\\d+)\\.\\.N-(\\d+)", lo, hi) and - result = [arity - lo.toInt() .. arity - hi.toInt()] and - result >= 0 - or - // N-x..y - regexpCaptureTwo(arg, "N-(\\d+)\\.\\.(\\d+)", lo, hi) and - result = [arity - lo.toInt() .. hi.toInt()] and - result >= 0 - ) - } - - /** - * Parses an integer constant or interval (bounded or unbounded) and gets any - * of the integers contained within (of which there may be infinitely many). - * - * Has no result for arguments involving an explicit arity, such as `N-1`. - */ - bindingset[arg, result] - int parseIntUnbounded(string arg) { - result = parseInt(arg) - or - result >= parseLowerBound(arg) - } - - /** - * Parses an integer constant or interval (bounded or unbounded) that - * may reference the arity of a call, such as `N-1` or `N-3..N-1`. - * - * Note that expressions of form `N-x` will never resolve to a negative index, - * even if `N` is zero (it will have no result in that case). - */ - bindingset[arg, arity] - int parseIntWithArity(string arg, int arity) { - result = parseInt(arg) - or - result in [parseLowerBound(arg) .. arity - 1] - or - result = parseIntWithExplicitArity(arg, arity) - } -} - -/** Gets the `n`th token on the access path as a string. */ -private string getRawToken(AccessPath path, int n) { - // Avoid splitting by '.' since tokens may contain dots, e.g. `Field[foo.Bar.x]`. - // Instead use regexpFind to match valid tokens, and supplement with a final length - // check (in `AccessPath.hasSyntaxError`) to ensure all characters were included in a token. - result = path.regexpFind("\\w+(?:\\[[^\\]]*\\])?(?=\\.|$)", n, _) -} - -/** - * A string that occurs as an access path (either identifying or input/output spec) - * which might be relevant for this database. - */ -class AccessPath extends string instanceof AccessPath::Range { - /** Holds if this string is not a syntactically valid access path. */ - predicate hasSyntaxError() { - // If the lengths match, all characters must haven been included in a token - // or seen by the `.` lookahead pattern. - this != "" and - not this.length() = sum(int n | | getRawToken(this, n).length() + 1) - 1 - } - - /** Gets the `n`th token on the access path (if there are no syntax errors). */ - AccessPathToken getToken(int n) { - result = getRawToken(this, n) and - not this.hasSyntaxError() - } - - /** Gets the number of tokens on the path (if there are no syntax errors). */ - int getNumToken() { - result = count(int n | exists(getRawToken(this, n))) and - not this.hasSyntaxError() - } -} - -/** - * An access part token such as `Argument[1]` or `ReturnValue`, appearing in one or more access paths. - */ -class AccessPathToken extends string { - AccessPathToken() { this = getRawToken(_, _) } - - private string getPart(int part) { - result = this.regexpCapture("([^\\[]+)(?:\\[([^\\]]*)\\])?", part) - } - - /** Gets the name of the token, such as `Member` from `Member[x]` */ - string getName() { result = this.getPart(1) } - - /** - * Gets the argument list, such as `1,2` from `Member[1,2]`, - * or has no result if there are no arguments. - */ - string getArgumentList() { result = this.getPart(2) } - - /** Gets the `n`th argument to this token, such as `x` or `y` from `Member[x,y]`. */ - string getArgument(int n) { result = this.getArgumentList().splitAt(",", n).trim() } - - /** Gets the `n`th argument to this `name` token, such as `x` or `y` from `Member[x,y]`. */ - pragma[nomagic] - string getArgument(string name, int n) { name = this.getName() and result = this.getArgument(n) } - - /** Gets an argument to this token, such as `x` or `y` from `Member[x,y]`. */ - string getAnArgument() { result = this.getArgument(_) } - - /** Gets an argument to this `name` token, such as `x` or `y` from `Member[x,y]`. */ - string getAnArgument(string name) { result = this.getArgument(name, _) } - - /** Gets the number of arguments to this token, such as 2 for `Member[x,y]` or zero for `ReturnValue`. */ - int getNumArgument() { result = count(int n | exists(this.getArgument(n))) } -} diff --git a/go/ql/lib/semmle/go/dataflow/internal/DataFlowNodes.qll b/go/ql/lib/semmle/go/dataflow/internal/DataFlowNodes.qll index 0fcec57305e..9034e454278 100644 --- a/go/ql/lib/semmle/go/dataflow/internal/DataFlowNodes.qll +++ b/go/ql/lib/semmle/go/dataflow/internal/DataFlowNodes.qll @@ -86,7 +86,8 @@ module Private { /** Holds if this summary node is the `i`th argument of `call`. */ predicate isArgumentOf(DataFlowCall call, int i) { - FlowSummaryImpl::Private::summaryArgumentNode(call, this.getSummaryNode(), i) + // We do not currently have support for callback-based library models. + none() } /** Holds if this summary node is a return node. */ @@ -96,7 +97,8 @@ module Private { /** Holds if this summary node is an out node for `call`. */ predicate isOut(DataFlowCall call) { - FlowSummaryImpl::Private::summaryOutNode(call, this.getSummaryNode(), _) + // We do not currently have support for callback-based library models. + none() } } } diff --git a/go/ql/lib/semmle/go/dataflow/internal/DataFlowPrivate.qll b/go/ql/lib/semmle/go/dataflow/internal/DataFlowPrivate.qll index c75796594fb..e17f4cd9cd2 100644 --- a/go/ql/lib/semmle/go/dataflow/internal/DataFlowPrivate.qll +++ b/go/ql/lib/semmle/go/dataflow/internal/DataFlowPrivate.qll @@ -420,7 +420,10 @@ predicate additionalLambdaFlowStep(Node nodeFrom, Node nodeTo, boolean preserves * by default as a heuristic. */ predicate allowParameterReturnInSelf(ParameterNode p) { - FlowSummaryImpl::Private::summaryAllowParameterReturnInSelf(p) + exists(DataFlowCallable c, int pos | + p.isParameterOf(c, pos) and + FlowSummaryImpl::Private::summaryAllowParameterReturnInSelf(c.asSummarizedCallable(), pos) + ) } /** An approximated `Content`. */ diff --git a/go/ql/lib/semmle/go/dataflow/internal/FlowSummaryImpl.qll b/go/ql/lib/semmle/go/dataflow/internal/FlowSummaryImpl.qll index 0aa17c521b4..cbf33afff25 100644 --- a/go/ql/lib/semmle/go/dataflow/internal/FlowSummaryImpl.qll +++ b/go/ql/lib/semmle/go/dataflow/internal/FlowSummaryImpl.qll @@ -1,1491 +1,321 @@ /** * Provides classes and predicates for defining flow summaries. - * - * The definitions in this file are language-independent, and language-specific - * definitions are passed in via the `DataFlowImplSpecific` and - * `FlowSummaryImplSpecific` modules. */ -private import FlowSummaryImplSpecific +private import go +private import codeql.dataflow.internal.FlowSummaryImpl +private import codeql.dataflow.internal.AccessPathSyntax as AccessPath +private import DataFlowImplSpecific as DataFlowImplSpecific private import DataFlowImplSpecific::Private private import DataFlowImplSpecific::Public private import DataFlowImplCommon -private import codeql.util.Unit +private import semmle.go.dataflow.ExternalFlow -/** Provides classes and predicates for defining flow summaries. */ -module Public { - private import Private +private module FlowSummaries { + private import semmle.go.dataflow.FlowSummary as F +} - /** - * A component used in a flow summary. - * - * Either a parameter or an argument at a given position, a specific - * content type, or a return kind. - */ - class SummaryComponent extends TSummaryComponent { - /** Gets a textual representation of this component used for MaD models. */ - string getMadRepresentation() { - result = getMadRepresentationSpecific(this) - or - exists(ArgumentPosition pos | - this = TParameterSummaryComponent(pos) and - result = "Parameter[" + getArgumentPosition(pos) + "]" - ) - or - exists(ParameterPosition pos | - this = TArgumentSummaryComponent(pos) and - result = "Argument[" + getParameterPosition(pos) + "]" - ) - or - exists(string synthetic | - this = TSyntheticGlobalSummaryComponent(synthetic) and - result = "SyntheticGlobal[" + synthetic + "]" - ) - or - this = TReturnSummaryComponent(getReturnValueKind()) and result = "ReturnValue" - } +module Input implements InputSig { + class SummarizedCallableBase = Callable; - /** Gets a textual representation of this summary component. */ - string toString() { result = this.getMadRepresentation() } + ArgumentPosition callbackSelfParameterPosition() { result = -1 } + + ReturnKind getStandardReturnValueKind() { result = getReturnKind(0) } + + string encodeParameterPosition(ParameterPosition pos) { result = pos.toString() } + + string encodeArgumentPosition(ArgumentPosition pos) { result = pos.toString() } + + string encodeReturn(ReturnKind rk, string arg) { + exists(int pos | + rk = getReturnKind(pos) and + result = "ReturnValue" + | + pos = 0 and arg = "" + or + pos != 0 and + arg = pos.toString() + ) } - /** Provides predicates for constructing summary components. */ - module SummaryComponent { - /** Gets a summary component for content `c`. */ - SummaryComponent content(ContentSet c) { result = TContentSummaryComponent(c) } + string encodeContent(ContentSet cs, string arg) { + exists(Field f, string package, string className, string fieldName | + f = cs.(FieldContent).getField() and + f.hasQualifiedName(package, className, fieldName) and + result = "Field" and + arg = package + "." + className + "." + fieldName + ) + or + exists(SyntheticField f | + f = cs.(SyntheticFieldContent).getField() and result = "SyntheticField" and arg = f + ) + or + cs instanceof ArrayContent and result = "ArrayElement" and arg = "" + or + cs instanceof CollectionContent and result = "Element" and arg = "" + or + cs instanceof MapKeyContent and result = "MapKey" and arg = "" + or + cs instanceof MapValueContent and result = "MapValue" and arg = "" + or + cs instanceof PointerContent and result = "Dereference" and arg = "" + } - /** Gets a summary component where data is not allowed to be stored in `c`. */ - SummaryComponent withoutContent(ContentSet c) { result = TWithoutContentSummaryComponent(c) } + bindingset[token] + ParameterPosition decodeUnknownParameterPosition(AccessPath::AccessPathTokenBase token) { + // needed to support `Argument[x..y]` ranges + token.getName() = "Argument" and + result = AccessPath::parseInt(token.getAnArgument()) + } - /** Gets a summary component where data must be stored in `c`. */ - SummaryComponent withContent(ContentSet c) { result = TWithContentSummaryComponent(c) } + bindingset[token] + ArgumentPosition decodeUnknownArgumentPosition(AccessPath::AccessPathTokenBase token) { + // needed to support `Parameter[x..y]` ranges + token.getName() = "Parameter" and + result = AccessPath::parseInt(token.getAnArgument()) + } - /** Gets a summary component for a parameter at position `pos`. */ - SummaryComponent parameter(ArgumentPosition pos) { result = TParameterSummaryComponent(pos) } + bindingset[token] + ReturnKind decodeUnknownReturn(AccessPath::AccessPathTokenBase token) { + // needed to support `ReturnValue[x..y]` ranges, and `ReturnValue[0]` in addition to `ReturnValue` + token.getName() = "ReturnValue" and + result.getIndex() = AccessPath::parseInt(token.getAnArgument()) + } +} - /** Gets a summary component for an argument at position `pos`. */ - SummaryComponent argument(ParameterPosition pos) { result = TArgumentSummaryComponent(pos) } +private import Make as Impl - /** Gets a summary component for a return of kind `rk`. */ - SummaryComponent return(ReturnKind rk) { result = TReturnSummaryComponent(rk) } +private module StepsInput implements Impl::Private::StepsInputSig { + DataFlowCall getACall(Public::SummarizedCallable sc) { + exists(DataFlow::CallNode call | + call.asExpr() = result and + call.getACalleeIncludingExternals() = sc + ) + } +} - /** Gets a summary component for synthetic global `sg`. */ - SummaryComponent syntheticGlobal(SyntheticGlobal sg) { - result = TSyntheticGlobalSummaryComponent(sg) - } +module SourceSinkInterpretationInput implements + Impl::Private::External::SourceSinkInterpretationInputSig +{ + class Element = SourceOrSinkElement; - /** - * A synthetic global. This represents some form of global state, which - * summaries can read and write individually. - */ - abstract class SyntheticGlobal extends string { - bindingset[this] - SyntheticGlobal() { any() } - } + /** + * Holds if an external source specification exists for `e` with output specification + * `output`, kind `kind`, and provenance `provenance`. + */ + predicate sourceElement(SourceOrSinkElement e, string output, string kind) { + exists( + string package, string type, boolean subtypes, string name, string signature, string ext + | + sourceModel(package, type, subtypes, name, signature, ext, output, kind, _) and + e = interpretElement(package, type, subtypes, name, signature, ext) + ) } /** - * A (non-empty) stack of summary components. - * - * A stack is used to represent where data is read from (input) or where it - * is written to (output). For example, an input stack `[Field f, Argument 0]` - * means that data is read from field `f` from the `0`th argument, while an - * output stack `[Field g, Return]` means that data is written to the field - * `g` of the returned object. + * Holds if an external sink specification exists for `e` with input specification + * `input`, kind `kind` and provenance `provenance`. */ - class SummaryComponentStack extends TSummaryComponentStack { - /** Gets the head of this stack. */ - SummaryComponent head() { - this = TSingletonSummaryComponentStack(result) or - this = TConsSummaryComponentStack(result, _) + predicate sinkElement(SourceOrSinkElement e, string input, string kind) { + exists( + string package, string type, boolean subtypes, string name, string signature, string ext + | + sinkModel(package, type, subtypes, name, signature, ext, input, kind, _) and + e = interpretElement(package, type, subtypes, name, signature, ext) + ) + } + + private newtype TSourceOrSinkElement = + TEntityElement(Entity e) or + TAstElement(AstNode n) + + /** An element representable by CSV modeling. */ + class SourceOrSinkElement extends TSourceOrSinkElement { + /** Gets this source or sink element as an entity, if it is one. */ + Entity asEntity() { this = TEntityElement(result) } + + /** Gets this source or sink element as an AST node, if it is one. */ + AstNode asAstNode() { this = TAstElement(result) } + + /** Gets a textual representation of this source or sink element. */ + string toString() { + result = "element representing " + [this.asEntity().toString(), this.asAstNode().toString()] } - /** Gets the tail of this stack, if any. */ - SummaryComponentStack tail() { this = TConsSummaryComponentStack(_, result) } - - /** Gets the length of this stack. */ - int length() { - this = TSingletonSummaryComponentStack(_) and result = 1 - or - result = 1 + this.tail().length() - } - - /** Gets the stack obtained by dropping the first `i` elements, if any. */ - SummaryComponentStack drop(int i) { - i = 0 and result = this - or - result = this.tail().drop(i - 1) - } - - /** Holds if this stack contains summary component `c`. */ - predicate contains(SummaryComponent c) { c = this.drop(_).head() } - - /** Gets the bottom element of this stack. */ - SummaryComponent bottom() { - this = TSingletonSummaryComponentStack(result) or result = this.tail().bottom() - } - - /** Gets a textual representation of this stack used for MaD models. */ - string getMadRepresentation() { - exists(SummaryComponent head, SummaryComponentStack tail | - head = this.head() and - tail = this.tail() and - result = tail.getMadRepresentation() + "." + head.getMadRepresentation() - ) - or - exists(SummaryComponent c | - this = TSingletonSummaryComponentStack(c) and - result = c.getMadRepresentation() + /** Gets the location of this element. */ + Location getLocation() { + exists(string fp, int sl, int sc, int el, int ec | + this.hasLocationInfo(fp, sl, sc, el, ec) and + result.hasLocationInfo(fp, sl, sc, el, ec) ) } - /** Gets a textual representation of this stack. */ - string toString() { result = this.getMadRepresentation() } + /** Holds if this element is at the specified location. */ + predicate hasLocationInfo(string fp, int sl, int sc, int el, int ec) { + this.asEntity().hasLocationInfo(fp, sl, sc, el, ec) or + this.asAstNode().hasLocationInfo(fp, sl, sc, el, ec) + } } - /** Provides predicates for constructing stacks of summary components. */ - module SummaryComponentStack { - /** Gets a singleton stack containing `c`. */ - SummaryComponentStack singleton(SummaryComponent c) { - result = TSingletonSummaryComponentStack(c) + private newtype TInterpretNode = + TElement(SourceOrSinkElement n) or + TNode(Node n) + + /** An entity used to interpret a source/sink specification. */ + class InterpretNode extends TInterpretNode { + /** Gets the element that this node corresponds to, if any. */ + SourceOrSinkElement asElement() { this = TElement(result) } + + /** Gets the data-flow node that this node corresponds to, if any. */ + Node asNode() { this = TNode(result) } + + /** Gets the call that this node corresponds to, if any. */ + DataFlowCall asCall() { result = this.asElement().asAstNode() } + + /** Gets the callable that this node corresponds to, if any. */ + DataFlowCallable asCallable() { + result.asSummarizedCallable().asFunction() = this.asElement().asEntity() } - /** - * Gets the stack obtained by pushing `head` onto `tail`. - * - * Make sure to override `RequiredSummaryComponentStack::required()` in order - * to ensure that the constructed stack exists. - */ - SummaryComponentStack push(SummaryComponent head, SummaryComponentStack tail) { - result = TConsSummaryComponentStack(head, tail) + /** Gets the target of this call, if any. */ + SourceOrSinkElement getCallTarget() { + result.asEntity() = this.asCall().getNode().(DataFlow::CallNode).getTarget() } - /** Gets a singleton stack for an argument at position `pos`. */ - SummaryComponentStack argument(ParameterPosition pos) { - result = singleton(SummaryComponent::argument(pos)) - } - - /** Gets a singleton stack representing a return of kind `rk`. */ - SummaryComponentStack return(ReturnKind rk) { result = singleton(SummaryComponent::return(rk)) } - } - - /** - * A class that exists for QL technical reasons only (the IPA type used - * to represent component stacks needs to be bounded). - */ - class RequiredSummaryComponentStack extends Unit { - /** - * Holds if the stack obtained by pushing `head` onto `tail` is required. - */ - abstract predicate required(SummaryComponent head, SummaryComponentStack tail); - } - - /** - * Gets the valid model origin values. - */ - private string getValidModelOrigin() { - result = - [ - "ai", // AI (machine learning) - "df", // Dataflow (model generator) - "tb", // Type based (model generator) - "hq", // Heuristic query - ] - } - - /** - * A class used to represent provenance values for MaD models. - * - * The provenance value is a string of the form `origin-verification` - * (or just `manual`), where `origin` is a value indicating the - * origin of the model, and `verification` is a value indicating, how - * the model was verified. - * - * Examples could be: - * - `df-generated`: A model produced by the model generator, but not verified by a human. - * - `ai-manual`: A model produced by AI, but verified by a human. - */ - class Provenance extends string { - private string verification; - - Provenance() { - exists(string origin | origin = getValidModelOrigin() | - this = origin + "-" + verification and - verification = ["manual", "generated"] - ) + /** Gets a textual representation of this node. */ + string toString() { + result = this.asElement().toString() or - this = verification and verification = "manual" + result = this.asNode().toString() } - /** - * Holds if this is a valid generated provenance value. - */ - predicate isGenerated() { verification = "generated" } + /** Gets the location of this node. */ + predicate hasLocationInfo(string fp, int sl, int sc, int el, int ec) { + this.asElement().hasLocationInfo(fp, sl, sc, el, ec) + or + this.asNode().hasLocationInfo(fp, sl, sc, el, ec) + } - /** - * Holds if this is a valid manual provenance value. - */ - predicate isManual() { verification = "manual" } + Location getLocation() { + exists(string fp, int sl, int sc, int el, int ec | + this.hasLocationInfo(fp, sl, sc, el, ec) and + result.hasLocationInfo(fp, sl, sc, el, ec) + ) + } } - /** A callable with a flow summary. */ - abstract class SummarizedCallable extends SummarizedCallableBase { - bindingset[this] - SummarizedCallable() { any() } - - /** - * Holds if data may flow from `input` to `output` through this callable. - * - * `preservesValue` indicates whether this is a value-preserving step - * or a taint-step. - * - * Input specifications are restricted to stacks that end with - * `SummaryComponent::argument(_)`, preceded by zero or more - * `SummaryComponent::return(_)` or `SummaryComponent::content(_)` components. - * - * Output specifications are restricted to stacks that end with - * `SummaryComponent::return(_)` or `SummaryComponent::argument(_)`. - * - * Output stacks ending with `SummaryComponent::return(_)` can be preceded by zero - * or more `SummaryComponent::content(_)` components. - * - * Output stacks ending with `SummaryComponent::argument(_)` can be preceded by an - * optional `SummaryComponent::parameter(_)` component, which in turn can be preceded - * by zero or more `SummaryComponent::content(_)` components. - */ - pragma[nomagic] - predicate propagatesFlow( - SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue - ) { - none() - } - - /** - * Holds if there exists a generated summary that applies to this callable. - */ - final predicate hasGeneratedModel() { - exists(Provenance p | p.isGenerated() and this.hasProvenance(p)) - } - - /** - * Holds if all the summaries that apply to this callable are auto generated and not manually created. - * That is, only apply generated models, when there are no manual models. - */ - final predicate applyGeneratedModel() { - this.hasGeneratedModel() and - not this.hasManualModel() - } - - /** - * Holds if there exists a manual summary that applies to this callable. - */ - final predicate hasManualModel() { - exists(Provenance p | p.isManual() and this.hasProvenance(p)) - } - - /** - * Holds if there exists a manual summary that applies to this callable. - * Always apply manual models if they exist. - */ - final predicate applyManualModel() { this.hasManualModel() } - - /** - * Holds if there exists a summary that applies to this callable - * that has provenance `provenance`. - */ - predicate hasProvenance(Provenance provenance) { provenance = "manual" } + /** Provides additional sink specification logic. */ + bindingset[c] + predicate interpretOutput(string c, InterpretNode mid, InterpretNode node) { + exists(int pos | + node.asNode() = getAnOutNodeExt(mid.asCall(), TValueReturn(getReturnKind(pos))) + | + parseReturn(c, pos) + ) + or + exists(Node n, SourceOrSinkElement e | + n = node.asNode() and + e = mid.asElement() + | + (c = "Parameter" or c = "") and + node.asNode().asParameter() = e.asEntity() + or + c = "" and + n.(DataFlow::FieldReadNode).getField() = e.asEntity() + ) } - /** - * A callable where there is no flow via the callable. - */ - class NeutralSummaryCallable extends NeutralCallable { - NeutralSummaryCallable() { this.getKind() = "summary" } - } - - /** - * A callable that has a neutral model. - */ - class NeutralCallable extends NeutralCallableBase { - private string kind; - private Provenance provenance; - - NeutralCallable() { neutralElement(this, kind, provenance) } - - /** - * Holds if the neutral is auto generated. - */ - final predicate hasGeneratedModel() { provenance.isGenerated() } - - /** - * Holds if there exists a manual neutral that applies to this callable. - */ - final predicate hasManualModel() { provenance.isManual() } - - /** - * Holds if the neutral has provenance `p`. - */ - predicate hasProvenance(Provenance p) { p = provenance } - - /** - * Gets the kind of the neutral. - */ - string getKind() { result = kind } + /** Provides additional source specification logic. */ + bindingset[c] + predicate interpretInput(string c, InterpretNode mid, InterpretNode node) { + exists(int pos, ReturnNodeExt ret | + parseReturn(c, pos) and + ret = node.asNode() and + ret.getKind().(ValueReturnKind).getKind() = getReturnKind(pos) and + mid.asCallable() = getNodeEnclosingCallable(ret) + ) + or + exists(DataFlow::Write fw, Field f | + c = "" and + f = mid.asElement().asEntity() and + fw.writesField(_, f, node.asNode()) + ) } } /** - * Provides predicates for compiling flow summaries down to atomic local steps, - * read steps, and store steps. + * Holds if specification component `c` parses as return value `n` or a range + * containing `n`. */ +bindingset[c] +private predicate parseReturn(AccessPath::AccessPathTokenBase c, int n) { + ( + c = "ReturnValue" and n = 0 + or + c.getName() = "ReturnValue" and + n = AccessPath::parseInt(c.getAnArgument()) + ) +} + module Private { - private import Public - import AccessPathSyntax + import Impl::Private - newtype TSummaryComponent = - TContentSummaryComponent(ContentSet c) or - TParameterSummaryComponent(ArgumentPosition pos) or - TArgumentSummaryComponent(ParameterPosition pos) or - TReturnSummaryComponent(ReturnKind rk) or - TSyntheticGlobalSummaryComponent(SummaryComponent::SyntheticGlobal sg) or - TWithoutContentSummaryComponent(ContentSet c) or - TWithContentSummaryComponent(ContentSet c) + module Steps = Impl::Private::Steps; - private TParameterSummaryComponent callbackSelfParam() { - result = TParameterSummaryComponent(callbackSelfParameterPosition()) - } - - newtype TSummaryComponentStack = - TSingletonSummaryComponentStack(SummaryComponent c) or - TConsSummaryComponentStack(SummaryComponent head, SummaryComponentStack tail) { - any(RequiredSummaryComponentStack x).required(head, tail) - or - any(RequiredSummaryComponentStack x).required(TParameterSummaryComponent(_), tail) and - head = callbackSelfParam() - or - derivedFluentFlowPush(_, _, _, head, tail, _) - } - - pragma[nomagic] - private predicate summary( - SummarizedCallable c, SummaryComponentStack input, SummaryComponentStack output, - boolean preservesValue - ) { - c.propagatesFlow(input, output, preservesValue) - or - // observe side effects of callbacks on input arguments - c.propagatesFlow(output, input, preservesValue) and - preservesValue = true and - isCallbackParameter(input) and - isContentOfArgument(output, _) - or - // flow from the receiver of a callback into the instance-parameter - exists(SummaryComponentStack s, SummaryComponentStack callbackRef | - c.propagatesFlow(s, _, _) or c.propagatesFlow(_, s, _) - | - callbackRef = s.drop(_) and - (isCallbackParameter(callbackRef) or callbackRef.head() = TReturnSummaryComponent(_)) and - input = callbackRef.tail() and - output = TConsSummaryComponentStack(callbackSelfParam(), input) and - preservesValue = true - ) - or - exists(SummaryComponentStack arg, SummaryComponentStack return | - derivedFluentFlow(c, input, arg, return, preservesValue) - | - arg.length() = 1 and - output = return - or - exists(SummaryComponent head, SummaryComponentStack tail | - derivedFluentFlowPush(c, input, arg, head, tail, 0) and - output = SummaryComponentStack::push(head, tail) - ) - ) - or - // Chain together summaries where values get passed into callbacks along the way - exists(SummaryComponentStack mid, boolean preservesValue1, boolean preservesValue2 | - c.propagatesFlow(input, mid, preservesValue1) and - c.propagatesFlow(mid, output, preservesValue2) and - mid.drop(mid.length() - 2) = - SummaryComponentStack::push(TParameterSummaryComponent(_), - SummaryComponentStack::singleton(TArgumentSummaryComponent(_))) and - preservesValue = preservesValue1.booleanAnd(preservesValue2) - ) - } - - /** - * Holds if `c` has a flow summary from `input` to `arg`, where `arg` - * writes to (contents of) arguments at position `pos`, and `c` has a - * value-preserving flow summary from the arguments at position `pos` - * to a return value (`return`). - * - * In such a case, we derive flow from `input` to (contents of) the return - * value. - * - * As an example, this simplifies modeling of fluent methods: - * for `StringBuilder.append(x)` with a specified value flow from qualifier to - * return value and taint flow from argument 0 to the qualifier, then this - * allows us to infer taint flow from argument 0 to the return value. - */ - pragma[nomagic] - private predicate derivedFluentFlow( - SummarizedCallable c, SummaryComponentStack input, SummaryComponentStack arg, - SummaryComponentStack return, boolean preservesValue - ) { - exists(ParameterPosition pos | - summary(c, input, arg, preservesValue) and - isContentOfArgument(arg, pos) and - summary(c, SummaryComponentStack::argument(pos), return, true) and - return.bottom() = TReturnSummaryComponent(_) - ) - } - - pragma[nomagic] - private predicate derivedFluentFlowPush( - SummarizedCallable c, SummaryComponentStack input, SummaryComponentStack arg, - SummaryComponent head, SummaryComponentStack tail, int i - ) { - derivedFluentFlow(c, input, arg, tail, _) and - head = arg.drop(i).head() and - i = arg.length() - 2 - or - exists(SummaryComponent head0, SummaryComponentStack tail0 | - derivedFluentFlowPush(c, input, arg, head0, tail0, i + 1) and - head = arg.drop(i).head() and - tail = SummaryComponentStack::push(head0, tail0) - ) - } - - private predicate isCallbackParameter(SummaryComponentStack s) { - s.head() = TParameterSummaryComponent(_) and exists(s.tail()) - } - - private predicate isContentOfArgument(SummaryComponentStack s, ParameterPosition pos) { - s.head() = TContentSummaryComponent(_) and isContentOfArgument(s.tail(), pos) - or - s = SummaryComponentStack::argument(pos) - } - - private predicate outputState(SummarizedCallable c, SummaryComponentStack s) { - summary(c, _, s, _) - or - exists(SummaryComponentStack out | - outputState(c, out) and - out.head() = TContentSummaryComponent(_) and - s = out.tail() - ) - or - // Add the argument node corresponding to the requested post-update node - inputState(c, s) and isCallbackParameter(s) - } - - private predicate inputState(SummarizedCallable c, SummaryComponentStack s) { - summary(c, s, _, _) - or - exists(SummaryComponentStack inp | inputState(c, inp) and s = inp.tail()) - or - exists(SummaryComponentStack out | - outputState(c, out) and - out.head() = TParameterSummaryComponent(_) and - s = out.tail() - ) - or - // Add the post-update node corresponding to the requested argument node - outputState(c, s) and isCallbackParameter(s) - or - // Add the parameter node for parameter side-effects - outputState(c, s) and s = SummaryComponentStack::argument(_) - } - - private newtype TSummaryNodeState = - TSummaryNodeInputState(SummaryComponentStack s) { inputState(_, s) } or - TSummaryNodeOutputState(SummaryComponentStack s) { outputState(_, s) } - - /** - * A state used to break up (complex) flow summaries into atomic flow steps. - * For a flow summary - * - * ```ql - * propagatesFlow( - * SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue - * ) - * ``` - * - * the following states are used: - * - * - `TSummaryNodeInputState(SummaryComponentStack s)`: - * this state represents that the components in `s` _have been read_ from the - * input. - * - `TSummaryNodeOutputState(SummaryComponentStack s)`: - * this state represents that the components in `s` _remain to be written_ to - * the output. - */ - private class SummaryNodeState extends TSummaryNodeState { - /** Holds if this state is a valid input state for `c`. */ - pragma[nomagic] - predicate isInputState(SummarizedCallable c, SummaryComponentStack s) { - this = TSummaryNodeInputState(s) and - inputState(c, s) - } - - /** Holds if this state is a valid output state for `c`. */ - pragma[nomagic] - predicate isOutputState(SummarizedCallable c, SummaryComponentStack s) { - this = TSummaryNodeOutputState(s) and - outputState(c, s) - } - - /** Gets a textual representation of this state. */ - string toString() { - exists(SummaryComponentStack s | - this = TSummaryNodeInputState(s) and - result = "read: " + s - ) - or - exists(SummaryComponentStack s | - this = TSummaryNodeOutputState(s) and - result = "to write: " + s - ) - } - } - - private newtype TSummaryNode = - TSummaryInternalNode(SummarizedCallable c, SummaryNodeState state) { - summaryNodeRange(c, state) - } or - TSummaryParameterNode(SummarizedCallable c, ParameterPosition pos) { - summaryParameterNodeRange(c, pos) - } - - abstract class SummaryNode extends TSummaryNode { - abstract string toString(); - - abstract SummarizedCallable getSummarizedCallable(); - } - - private class SummaryInternalNode extends SummaryNode, TSummaryInternalNode { - private SummarizedCallable c; - private SummaryNodeState state; - - SummaryInternalNode() { this = TSummaryInternalNode(c, state) } - - override string toString() { result = "[summary] " + state + " in " + c } - - override SummarizedCallable getSummarizedCallable() { result = c } - } - - private class SummaryParamNode extends SummaryNode, TSummaryParameterNode { - private SummarizedCallable c; - private ParameterPosition pos; - - SummaryParamNode() { this = TSummaryParameterNode(c, pos) } - - override string toString() { result = "[summary param] " + pos + " in " + c } - - override SummarizedCallable getSummarizedCallable() { result = c } - } - - /** - * Holds if `state` represents having read from a parameter at position - * `pos` in `c`. In this case we are not synthesizing a data-flow node, - * but instead assume that a relevant parameter node already exists. - */ - private predicate parameterReadState( - SummarizedCallable c, SummaryNodeState state, ParameterPosition pos - ) { - state.isInputState(c, SummaryComponentStack::argument(pos)) - } - - /** - * Holds if a synthesized summary node is needed for the state `state` in summarized - * callable `c`. - */ - private predicate summaryNodeRange(SummarizedCallable c, SummaryNodeState state) { - state.isInputState(c, _) and - not parameterReadState(c, state, _) - or - state.isOutputState(c, _) - } - - pragma[noinline] - private SummaryNode summaryNodeInputState(SummarizedCallable c, SummaryComponentStack s) { - exists(SummaryNodeState state | state.isInputState(c, s) | - result = TSummaryInternalNode(c, state) - or - exists(ParameterPosition pos | - parameterReadState(c, state, pos) and - result = TSummaryParameterNode(c, pos) - ) - ) - } - - pragma[noinline] - private SummaryNode summaryNodeOutputState(SummarizedCallable c, SummaryComponentStack s) { - exists(SummaryNodeState state | - state.isOutputState(c, s) and - result = TSummaryInternalNode(c, state) - ) - } - - /** - * Holds if a write targets `post`, which is a post-update node for a - * parameter at position `pos` in `c`. - */ - private predicate isParameterPostUpdate( - SummaryNode post, SummarizedCallable c, ParameterPosition pos - ) { - post = summaryNodeOutputState(c, SummaryComponentStack::argument(pos)) - } - - /** Holds if a parameter node at position `pos` is required for `c`. */ - private predicate summaryParameterNodeRange(SummarizedCallable c, ParameterPosition pos) { - parameterReadState(c, _, pos) - or - // Same as `isParameterPostUpdate(_, c, pos)`, but can be used in a negative context - any(SummaryNodeState state).isOutputState(c, SummaryComponentStack::argument(pos)) - } - - private predicate callbackOutput( - SummarizedCallable c, SummaryComponentStack s, SummaryNode receiver, ReturnKind rk - ) { - any(SummaryNodeState state).isInputState(c, s) and - s.head() = TReturnSummaryComponent(rk) and - receiver = summaryNodeInputState(c, s.tail()) - } - - private predicate callbackInput( - SummarizedCallable c, SummaryComponentStack s, SummaryNode receiver, ArgumentPosition pos - ) { - any(SummaryNodeState state).isOutputState(c, s) and - s.head() = TParameterSummaryComponent(pos) and - receiver = summaryNodeInputState(c, s.tail()) - } - - /** Holds if a call targeting `receiver` should be synthesized inside `c`. */ - predicate summaryCallbackRange(SummarizedCallable c, SummaryNode receiver) { - callbackOutput(c, _, receiver, _) - or - callbackInput(c, _, receiver, _) - } - - /** - * Gets the type of synthesized summary node `n`. - * - * The type is computed based on the language-specific predicates - * `getContentType()`, `getReturnType()`, `getCallbackParameterType()`, and - * `getCallbackReturnType()`. - */ - DataFlowType summaryNodeType(SummaryNode n) { - exists(SummaryNode pre | - summaryPostUpdateNode(n, pre) and - result = summaryNodeType(pre) - ) - or - exists(SummarizedCallable c, SummaryComponentStack s, SummaryComponent head | head = s.head() | - n = summaryNodeInputState(c, s) and - ( - exists(ContentSet cont | result = getContentType(cont) | - head = TContentSummaryComponent(cont) or - head = TWithContentSummaryComponent(cont) - ) - or - head = TWithoutContentSummaryComponent(_) and - result = summaryNodeType(summaryNodeInputState(c, s.tail())) - or - exists(ReturnKind rk | - head = TReturnSummaryComponent(rk) and - result = - getCallbackReturnType(summaryNodeType(summaryNodeInputState(pragma[only_bind_out](c), - s.tail())), rk) - ) - or - exists(SummaryComponent::SyntheticGlobal sg | - head = TSyntheticGlobalSummaryComponent(sg) and - result = getSyntheticGlobalType(sg) - ) - or - exists(ParameterPosition pos | - head = TArgumentSummaryComponent(pos) and - result = getParameterType(c, pos) - ) - ) - or - n = summaryNodeOutputState(c, s) and - ( - exists(ContentSet cont | - head = TContentSummaryComponent(cont) and result = getContentType(cont) - ) - or - s.length() = 1 and - exists(ReturnKind rk | - head = TReturnSummaryComponent(rk) and - result = getReturnType(c, rk) - ) - or - exists(ArgumentPosition pos | head = TParameterSummaryComponent(pos) | - result = - getCallbackParameterType(summaryNodeType(summaryNodeInputState(pragma[only_bind_out](c), - s.tail())), pos) - ) - or - exists(SummaryComponent::SyntheticGlobal sg | - head = TSyntheticGlobalSummaryComponent(sg) and - result = getSyntheticGlobalType(sg) - ) - ) - ) - } - - /** Holds if summary node `p` is a parameter with position `pos`. */ - predicate summaryParameterNode(SummaryNode p, ParameterPosition pos) { - p = TSummaryParameterNode(_, pos) - } - - /** Holds if summary node `out` contains output of kind `rk` from call `c`. */ - predicate summaryOutNode(DataFlowCall c, SummaryNode out, ReturnKind rk) { - exists(SummarizedCallable callable, SummaryComponentStack s, SummaryNode receiver | - callbackOutput(callable, s, receiver, rk) and - out = summaryNodeInputState(callable, s) and - c = summaryDataFlowCall(receiver) - ) - } - - /** Holds if summary node `arg` is at position `pos` in the call `c`. */ - predicate summaryArgumentNode(DataFlowCall c, SummaryNode arg, ArgumentPosition pos) { - exists(SummarizedCallable callable, SummaryComponentStack s, SummaryNode receiver | - callbackInput(callable, s, receiver, pos) and - arg = summaryNodeOutputState(callable, s) and - c = summaryDataFlowCall(receiver) - ) - } - - /** Holds if summary node `post` is a post-update node with pre-update node `pre`. */ - predicate summaryPostUpdateNode(SummaryNode post, SummaryNode pre) { - exists(SummarizedCallable c, ParameterPosition pos | - isParameterPostUpdate(post, c, pos) and - pre = TSummaryParameterNode(c, pos) - ) - or - exists(SummarizedCallable callable, SummaryComponentStack s | - callbackInput(callable, s, _, _) and - pre = summaryNodeOutputState(callable, s) and - post = summaryNodeInputState(callable, s) - ) - } - - /** Holds if summary node `ret` is a return node of kind `rk`. */ - predicate summaryReturnNode(SummaryNode ret, ReturnKind rk) { - exists(SummaryComponentStack s | - ret = summaryNodeOutputState(_, s) and - s = TSingletonSummaryComponentStack(TReturnSummaryComponent(rk)) - ) - } - - /** - * Holds if flow is allowed to pass from parameter `p`, to a return - * node, and back out to `p`. - */ - predicate summaryAllowParameterReturnInSelf(ParamNode p) { - exists(SummarizedCallable c, ParameterPosition ppos | - p.isParameterOf(inject(c), pragma[only_bind_into](ppos)) - | - exists(SummaryComponentStack inputContents, SummaryComponentStack outputContents | - summary(c, inputContents, outputContents, _) and - inputContents.bottom() = pragma[only_bind_into](TArgumentSummaryComponent(ppos)) and - outputContents.bottom() = pragma[only_bind_into](TArgumentSummaryComponent(ppos)) - ) - ) - } - - /** Provides a compilation of flow summaries to atomic data-flow steps. */ - module Steps { - /** - * Holds if there is a local step from `pred` to `succ`, which is synthesized - * from a flow summary. - */ - predicate summaryLocalStep(SummaryNode pred, SummaryNode succ, boolean preservesValue) { - exists( - SummarizedCallable c, SummaryComponentStack inputContents, - SummaryComponentStack outputContents - | - summary(c, inputContents, outputContents, preservesValue) and - pred = summaryNodeInputState(c, inputContents) and - succ = summaryNodeOutputState(c, outputContents) - | - preservesValue = true - or - preservesValue = false and not summary(c, inputContents, outputContents, true) - ) - or - exists(SummarizedCallable c, SummaryComponentStack s | - pred = summaryNodeInputState(c, s.tail()) and - succ = summaryNodeInputState(c, s) and - s.head() = [SummaryComponent::withContent(_), SummaryComponent::withoutContent(_)] and - preservesValue = true - ) - } - - /** - * Holds if there is a read step of content `c` from `pred` to `succ`, which - * is synthesized from a flow summary. - */ - predicate summaryReadStep(SummaryNode pred, ContentSet c, SummaryNode succ) { - exists(SummarizedCallable sc, SummaryComponentStack s | - pred = summaryNodeInputState(sc, s.tail()) and - succ = summaryNodeInputState(sc, s) and - SummaryComponent::content(c) = s.head() - ) - } - - /** - * Holds if there is a store step of content `c` from `pred` to `succ`, which - * is synthesized from a flow summary. - */ - predicate summaryStoreStep(SummaryNode pred, ContentSet c, SummaryNode succ) { - exists(SummarizedCallable sc, SummaryComponentStack s | - pred = summaryNodeOutputState(sc, s) and - succ = summaryNodeOutputState(sc, s.tail()) and - SummaryComponent::content(c) = s.head() - ) - } - - /** - * Holds if there is a jump step from `pred` to `succ`, which is synthesized - * from a flow summary. - */ - predicate summaryJumpStep(SummaryNode pred, SummaryNode succ) { - exists(SummaryComponentStack s | - s = SummaryComponentStack::singleton(SummaryComponent::syntheticGlobal(_)) and - pred = summaryNodeOutputState(_, s) and - succ = summaryNodeInputState(_, s) - ) - } - - /** - * Holds if values stored inside content `c` are cleared at `n`. `n` is a - * synthesized summary node, so in order for values to be cleared at calls - * to the relevant method, it is important that flow does not pass over - * the argument, either via use-use flow or def-use flow. - * - * Example: - * - * ``` - * a.b = taint; - * a.clearB(); // assume we have a flow summary for `clearB` that clears `b` on the qualifier - * sink(a.b); - * ``` - * - * In the above, flow should not pass from `a` on the first line (or the second - * line) to `a` on the third line. Instead, there will be synthesized flow from - * `a` on line 2 to the post-update node for `a` on that line (via an intermediate - * node where field `b` is cleared). - */ - predicate summaryClearsContent(SummaryNode n, ContentSet c) { - exists(SummarizedCallable sc, SummaryNodeState state, SummaryComponentStack stack | - n = TSummaryInternalNode(sc, state) and - state.isInputState(sc, stack) and - stack.head() = SummaryComponent::withoutContent(c) - ) - } - - /** - * Holds if the value that is being tracked is expected to be stored inside - * content `c` at `n`. - */ - predicate summaryExpectsContent(SummaryNode n, ContentSet c) { - exists(SummarizedCallable sc, SummaryNodeState state, SummaryComponentStack stack | - n = TSummaryInternalNode(sc, state) and - state.isInputState(sc, stack) and - stack.head() = SummaryComponent::withContent(c) - ) - } - - pragma[noinline] - private predicate viableParam( - DataFlowCall call, SummarizedCallable sc, ParameterPosition ppos, SummaryParamNode p - ) { - exists(DataFlowCallable c | - c = inject(sc) and - p = TSummaryParameterNode(sc, ppos) and - c = viableCallable(call) - ) - } - - pragma[nomagic] - private SummaryParamNode summaryArgParam(DataFlowCall call, ArgNode arg, SummarizedCallable sc) { - exists(ParameterPosition ppos | - argumentPositionMatch(call, arg, ppos) and - viableParam(call, sc, ppos, result) - ) - } - - /** - * Holds if `p` can reach `n` in a summarized callable, using only value-preserving - * local steps. `clearsOrExpects` records whether any node on the path from `p` to - * `n` either clears or expects contents. - */ - private predicate paramReachesLocal(SummaryParamNode p, SummaryNode n, boolean clearsOrExpects) { - viableParam(_, _, _, p) and - n = p and - clearsOrExpects = false - or - exists(SummaryNode mid, boolean clearsOrExpectsMid | - paramReachesLocal(p, mid, clearsOrExpectsMid) and - summaryLocalStep(mid, n, true) and - if - summaryClearsContent(n, _) or - summaryExpectsContent(n, _) - then clearsOrExpects = true - else clearsOrExpects = clearsOrExpectsMid - ) - } - - /** - * Holds if use-use flow starting from `arg` should be prohibited. - * - * This is the case when `arg` is the argument of a call that targets a - * flow summary where the corresponding parameter either clears contents - * or expects contents. - */ - pragma[nomagic] - predicate prohibitsUseUseFlow(ArgNode arg, SummarizedCallable sc) { - exists(SummaryParamNode p, ParameterPosition ppos, SummaryNode ret | - paramReachesLocal(p, ret, true) and - p = summaryArgParam(_, arg, sc) and - p = TSummaryParameterNode(_, pragma[only_bind_into](ppos)) and - isParameterPostUpdate(ret, _, pragma[only_bind_into](ppos)) - ) - } - - pragma[nomagic] - private predicate summaryReturnNodeExt(SummaryNode ret, ReturnKindExt rk) { - summaryReturnNode(ret, rk.(ValueReturnKind).getKind()) - or - exists(SummaryParamNode p, SummaryNode pre, ParameterPosition pos | - paramReachesLocal(p, pre, _) and - summaryPostUpdateNode(ret, pre) and - p = TSummaryParameterNode(_, pos) and - rk.(ParamUpdateReturnKind).getPosition() = pos - ) - } - - bindingset[ret] - private SummaryParamNode summaryArgParamRetOut( - ArgNode arg, SummaryNode ret, OutNodeExt out, SummarizedCallable sc - ) { - exists(DataFlowCall call, ReturnKindExt rk | - result = summaryArgParam(call, arg, sc) and - summaryReturnNodeExt(ret, pragma[only_bind_into](rk)) and - out = pragma[only_bind_into](rk).getAnOutNode(call) - ) - } - - /** - * Holds if `arg` flows to `out` using a simple value-preserving flow - * summary, that is, a flow summary without reads and stores. - * - * NOTE: This step should not be used in global data-flow/taint-tracking, but may - * be useful to include in the exposed local data-flow/taint-tracking relations. - */ - predicate summaryThroughStepValue(ArgNode arg, Node out, SummarizedCallable sc) { - exists(ReturnKind rk, SummaryNode ret, DataFlowCall call | - summaryLocalStep(summaryArgParam(call, arg, sc), ret, true) and - summaryReturnNode(ret, pragma[only_bind_into](rk)) and - out = getAnOutNode(call, pragma[only_bind_into](rk)) - ) - } - - /** - * Holds if `arg` flows to `out` using a simple flow summary involving taint - * step, that is, a flow summary without reads and stores. - * - * NOTE: This step should not be used in global data-flow/taint-tracking, but may - * be useful to include in the exposed local data-flow/taint-tracking relations. - */ - predicate summaryThroughStepTaint(ArgNode arg, Node out, SummarizedCallable sc) { - exists(SummaryNode ret | - summaryLocalStep(summaryArgParamRetOut(arg, ret, out, sc), ret, false) - ) - } - - /** - * Holds if there is a read(+taint) of `c` from `arg` to `out` using a - * flow summary. - * - * NOTE: This step should not be used in global data-flow/taint-tracking, but may - * be useful to include in the exposed local data-flow/taint-tracking relations. - */ - predicate summaryGetterStep(ArgNode arg, ContentSet c, Node out, SummarizedCallable sc) { - exists(SummaryNode mid, SummaryNode ret | - summaryReadStep(summaryArgParamRetOut(arg, ret, out, sc), c, mid) and - summaryLocalStep(mid, ret, _) - ) - } - - /** - * Holds if there is a (taint+)store of `arg` into content `c` of `out` using a - * flow summary. - * - * NOTE: This step should not be used in global data-flow/taint-tracking, but may - * be useful to include in the exposed local data-flow/taint-tracking relations. - */ - predicate summarySetterStep(ArgNode arg, ContentSet c, Node out, SummarizedCallable sc) { - exists(SummaryNode mid, SummaryNode ret | - summaryLocalStep(summaryArgParamRetOut(arg, ret, out, sc), mid, _) and - summaryStoreStep(mid, c, ret) - ) - } - } - - /** - * Provides a means of translating externally (e.g., MaD) defined flow - * summaries into a `SummarizedCallable`s. - */ module External { - /** Holds if `spec` is a relevant external specification. */ - private predicate relevantSpec(string spec) { - summaryElement(_, spec, _, _, _) or - summaryElement(_, _, spec, _, _) or - sourceElement(_, spec, _, _) or - sinkElement(_, spec, _, _) - } - - private class AccessPathRange extends AccessPath::Range { - AccessPathRange() { relevantSpec(this) } - } - - /** Holds if specification component `token` parses as parameter `pos`. */ - predicate parseParam(AccessPathToken token, ArgumentPosition pos) { - token.getName() = "Parameter" and - pos = parseParamBody(token.getAnArgument()) - } - - /** Holds if specification component `token` parses as argument `pos`. */ - predicate parseArg(AccessPathToken token, ParameterPosition pos) { - token.getName() = "Argument" and - pos = parseArgBody(token.getAnArgument()) - } - - /** Holds if specification component `token` parses as synthetic global `sg`. */ - predicate parseSynthGlobal(AccessPathToken token, string sg) { - token.getName() = "SyntheticGlobal" and - sg = token.getAnArgument() - } - - private class SyntheticGlobalFromAccessPath extends SummaryComponent::SyntheticGlobal { - SyntheticGlobalFromAccessPath() { parseSynthGlobal(_, this) } - } - - private SummaryComponent interpretComponent(AccessPathToken token) { - exists(ParameterPosition pos | - parseArg(token, pos) and result = SummaryComponent::argument(pos) - ) - or - exists(ArgumentPosition pos | - parseParam(token, pos) and result = SummaryComponent::parameter(pos) - ) - or - token = "ReturnValue" and result = SummaryComponent::return(getReturnValueKind()) - or - exists(string sg | - parseSynthGlobal(token, sg) and result = SummaryComponent::syntheticGlobal(sg) - ) - or - result = interpretComponentSpecific(token) - } - - /** - * Holds if `spec` specifies summary component stack `stack`. - */ - predicate interpretSpec(AccessPath spec, SummaryComponentStack stack) { - interpretSpec(spec, spec.getNumToken(), stack) - } - - /** Holds if the first `n` tokens of `spec` resolves to `stack`. */ - private predicate interpretSpec(AccessPath spec, int n, SummaryComponentStack stack) { - n = 1 and - stack = SummaryComponentStack::singleton(interpretComponent(spec.getToken(0))) - or - exists(SummaryComponent head, SummaryComponentStack tail | - interpretSpec(spec, n, head, tail) and - stack = SummaryComponentStack::push(head, tail) - ) - } - - /** Holds if the first `n` tokens of `spec` resolves to `head` followed by `tail` */ - private predicate interpretSpec( - AccessPath spec, int n, SummaryComponent head, SummaryComponentStack tail - ) { - interpretSpec(spec, n - 1, tail) and - head = interpretComponent(spec.getToken(n - 1)) - } - - private class MkStack extends RequiredSummaryComponentStack { - override predicate required(SummaryComponent head, SummaryComponentStack tail) { - interpretSpec(_, _, head, tail) - } - } - - private class SummarizedCallableExternal extends SummarizedCallable { - SummarizedCallableExternal() { summaryElement(this, _, _, _, _) } - - private predicate relevantSummaryElementGenerated( - AccessPath inSpec, AccessPath outSpec, string kind - ) { - exists(Provenance provenance | - provenance.isGenerated() and - summaryElement(this, inSpec, outSpec, kind, provenance) - ) and - not this.applyManualModel() - } - - private predicate relevantSummaryElement(AccessPath inSpec, AccessPath outSpec, string kind) { - exists(Provenance provenance | - provenance.isManual() and - summaryElement(this, inSpec, outSpec, kind, provenance) - ) - or - this.relevantSummaryElementGenerated(inSpec, outSpec, kind) - } - - override predicate propagatesFlow( - SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue - ) { - exists(AccessPath inSpec, AccessPath outSpec, string kind | - this.relevantSummaryElement(inSpec, outSpec, kind) and - interpretSpec(inSpec, input) and - interpretSpec(outSpec, output) - | - kind = "value" and preservesValue = true - or - kind = "taint" and preservesValue = false - ) - } - - override predicate hasProvenance(Provenance provenance) { - summaryElement(this, _, _, _, provenance) - } - } - - /** Holds if component `c` of specification `spec` cannot be parsed. */ - predicate invalidSpecComponent(AccessPath spec, string c) { - c = spec.getToken(_) and - not exists(interpretComponent(c)) - } - - /** Holds if `provenance` is not a valid provenance value. */ - bindingset[provenance] - predicate invalidProvenance(string provenance) { not provenance instanceof Provenance } - - /** - * Holds if token `part` of specification `spec` has an invalid index. - * E.g., `Argument[-1]`. - */ - predicate invalidIndexComponent(AccessPath spec, AccessPathToken part) { - part = spec.getToken(_) and - part.getName() = ["Parameter", "Argument"] and - AccessPath::parseInt(part.getArgumentList()) < 0 - } - - private predicate inputNeedsReference(AccessPathToken c) { - c.getName() = "Argument" or - inputNeedsReferenceSpecific(c) - } - - private predicate outputNeedsReference(AccessPathToken c) { - c.getName() = ["Argument", "ReturnValue"] or - outputNeedsReferenceSpecific(c) - } - - private predicate sourceElementRef(InterpretNode ref, AccessPath output, string kind) { - exists(SourceOrSinkElement e | - sourceElement(e, output, kind, _) and - if outputNeedsReference(output.getToken(0)) - then e = ref.getCallTarget() - else e = ref.asElement() - ) - } - - private predicate sinkElementRef(InterpretNode ref, AccessPath input, string kind) { - exists(SourceOrSinkElement e | - sinkElement(e, input, kind, _) and - if inputNeedsReference(input.getToken(0)) - then e = ref.getCallTarget() - else e = ref.asElement() - ) - } - - /** Holds if the first `n` tokens of `output` resolve to the given interpretation. */ - private predicate interpretOutput( - AccessPath output, int n, InterpretNode ref, InterpretNode node - ) { - sourceElementRef(ref, output, _) and - n = 0 and - ( - if output = "" - then - // Allow language-specific interpretation of the empty access path - interpretOutputSpecific("", ref, node) - else node = ref - ) - or - exists(InterpretNode mid, AccessPathToken c | - interpretOutput(output, n - 1, ref, mid) and - c = output.getToken(n - 1) - | - exists(ArgumentPosition apos, ParameterPosition ppos | - node.asNode().(PostUpdateNode).getPreUpdateNode().(ArgNode).argumentOf(mid.asCall(), apos) and - parameterMatch(ppos, apos) - | - c = "Argument" or parseArg(c, ppos) - ) - or - exists(ArgumentPosition apos, ParameterPosition ppos | - node.asNode().(ParamNode).isParameterOf(mid.asCallable(), ppos) and - parameterMatch(ppos, apos) - | - c = "Parameter" or parseParam(c, apos) - ) - or - c = "ReturnValue" and - node.asNode() = getAnOutNodeExt(mid.asCall(), TValueReturn(getReturnValueKind())) - or - interpretOutputSpecific(c, mid, node) - ) - } - - /** Holds if the first `n` tokens of `input` resolve to the given interpretation. */ - private predicate interpretInput(AccessPath input, int n, InterpretNode ref, InterpretNode node) { - sinkElementRef(ref, input, _) and - n = 0 and - ( - if input = "" - then - // Allow language-specific interpretation of the empty access path - interpretInputSpecific("", ref, node) - else node = ref - ) - or - exists(InterpretNode mid, AccessPathToken c | - interpretInput(input, n - 1, ref, mid) and - c = input.getToken(n - 1) - | - exists(ArgumentPosition apos, ParameterPosition ppos | - node.asNode().(ArgNode).argumentOf(mid.asCall(), apos) and - parameterMatch(ppos, apos) - | - c = "Argument" or parseArg(c, ppos) - ) - or - exists(ReturnNodeExt ret | - c = "ReturnValue" and - ret = node.asNode() and - ret.getKind().(ValueReturnKind).getKind() = getReturnValueKind() and - mid.asCallable() = getNodeEnclosingCallable(ret) - ) - or - interpretInputSpecific(c, mid, node) - ) - } - - /** - * Holds if `node` is specified as a source with the given kind in a MaD flow - * model. - */ - predicate isSourceNode(InterpretNode node, string kind) { - exists(InterpretNode ref, AccessPath output | - sourceElementRef(ref, output, kind) and - interpretOutput(output, output.getNumToken(), ref, node) - ) - } - - /** - * Holds if `node` is specified as a sink with the given kind in a MaD flow - * model. - */ - predicate isSinkNode(InterpretNode node, string kind) { - exists(InterpretNode ref, AccessPath input | - sinkElementRef(ref, input, kind) and - interpretInput(input, input.getNumToken(), ref, node) - ) - } - } - - /** Provides a query predicate for outputting a set of relevant flow summaries. */ - module TestOutput { - /** A flow summary to include in the `summary/1` query predicate. */ - abstract class RelevantSummarizedCallable instanceof SummarizedCallable { - /** Gets the string representation of this callable used by `summary/1`. */ - abstract string getCallableCsv(); - - /** Holds if flow is propagated between `input` and `output`. */ - predicate relevantSummary( - SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue - ) { - super.propagatesFlow(input, output, preservesValue) - } - - string toString() { result = super.toString() } - } - - /** A model to include in the `neutral/1` query predicate. */ - abstract class RelevantNeutralCallable instanceof NeutralCallable { - /** Gets the string representation of this callable used by `neutral/1`. */ - abstract string getCallableCsv(); - - /** - * Gets the kind of the neutral. - */ - string getKind() { result = super.getKind() } - - string toString() { result = super.toString() } - } - - /** Render the kind in the format used in flow summaries. */ - private string renderKind(boolean preservesValue) { - preservesValue = true and result = "value" - or - preservesValue = false and result = "taint" - } - - private string renderProvenance(SummarizedCallable c) { - if c.applyManualModel() then result = "manual" else c.hasProvenance(result) - } - - private string renderProvenanceNeutral(NeutralCallable c) { - if c.hasManualModel() then result = "manual" else c.hasProvenance(result) - } - - /** - * A query predicate for outputting flow summaries in semi-colon separated format in QL tests. - * The syntax is: "namespace;type;overrides;name;signature;ext;inputspec;outputspec;kind;provenance", - * ext is hardcoded to empty. - */ - query predicate summary(string csv) { - exists( - RelevantSummarizedCallable c, SummaryComponentStack input, SummaryComponentStack output, - boolean preservesValue - | - c.relevantSummary(input, output, preservesValue) and - csv = - c.getCallableCsv() // Callable information - + input.getMadRepresentation() + ";" // input - + output.getMadRepresentation() + ";" // output - + renderKind(preservesValue) + ";" // kind - + renderProvenance(c) // provenance - ) - } - - /** - * Holds if a neutral model `csv` exists (semi-colon separated format). Used for testing purposes. - * The syntax is: "namespace;type;name;signature;kind;provenance"", - */ - query predicate neutral(string csv) { - exists(RelevantNeutralCallable c | - csv = - c.getCallableCsv() // Callable information - + c.getKind() + ";" // kind - + renderProvenanceNeutral(c) // provenance - ) - } + import Impl::Private::External + import Impl::Private::External::SourceSinkInterpretation } /** - * Provides query predicates for rendering the generated data flow graph for - * a summarized callable. - * - * Import this module into a `.ql` file of `@kind graph` to render the graph. - * The graph is restricted to callables from `RelevantSummarizedCallable`. + * Provides predicates for constructing summary components. */ - module RenderSummarizedCallable { - /** A summarized callable to include in the graph. */ - abstract class RelevantSummarizedCallable instanceof SummarizedCallable { - string toString() { result = super.toString() } + module SummaryComponent { + private import Impl::Private::SummaryComponent as SC + + predicate parameter = SC::parameter/1; + + predicate argument = SC::argument/1; + + predicate content = SC::content/1; + + predicate withoutContent = SC::withoutContent/1; + + predicate withContent = SC::withContent/1; + + /** Gets a summary component that represents a qualifier. */ + SummaryComponent qualifier() { result = argument(-1) } + + /** Gets a summary component for field `f`. */ + SummaryComponent field(Field f) { result = content(any(FieldContent c | c.getField() = f)) } + + /** Gets a summary component that represents the return value of a call. */ + SummaryComponent return() { result = SC::return(_) } + } + + /** + * Provides predicates for constructing stacks of summary components. + */ + module SummaryComponentStack { + private import Impl::Private::SummaryComponentStack as SCS + + predicate singleton = SCS::singleton/1; + + predicate push = SCS::push/2; + + predicate argument = SCS::argument/1; + + /** Gets a singleton stack representing a qualifier. */ + SummaryComponentStack qualifier() { result = singleton(SummaryComponent::qualifier()) } + + /** Gets a stack representing a field `f` of `object`. */ + SummaryComponentStack fieldOf(Field f, SummaryComponentStack object) { + result = push(SummaryComponent::field(f), object) } - private newtype TNodeOrCall = - MkNode(SummaryNode n) { - exists(RelevantSummarizedCallable c | - n = TSummaryInternalNode(c, _) - or - n = TSummaryParameterNode(c, _) - ) - } or - MkCall(DataFlowCall call) { - call = summaryDataFlowCall(_) and - call.getEnclosingCallable() = inject(any(RelevantSummarizedCallable c)) - } - - private class NodeOrCall extends TNodeOrCall { - SummaryNode asNode() { this = MkNode(result) } - - DataFlowCall asCall() { this = MkCall(result) } - - string toString() { - result = this.asNode().toString() - or - result = this.asCall().toString() - } - - /** - * Holds if this element is at the specified location. - * The location spans column `startcolumn` of line `startline` to - * column `endcolumn` of line `endline` in file `filepath`. - * For more information, see - * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). - */ - predicate hasLocationInfo( - string filepath, int startline, int startcolumn, int endline, int endcolumn - ) { - filepath = "" and - startline = 0 and - startcolumn = 0 and - endline = 0 and - endcolumn = 0 - } - } - - query predicate nodes(NodeOrCall n, string key, string val) { - key = "semmle.label" and val = n.toString() - } - - private predicate edgesComponent(NodeOrCall a, NodeOrCall b, string value) { - exists(boolean preservesValue | - Private::Steps::summaryLocalStep(a.asNode(), b.asNode(), preservesValue) and - if preservesValue = true then value = "value" else value = "taint" - ) - or - exists(ContentSet c | - Private::Steps::summaryReadStep(a.asNode(), c, b.asNode()) and - value = "read (" + c + ")" - or - Private::Steps::summaryStoreStep(a.asNode(), c, b.asNode()) and - value = "store (" + c + ")" - or - Private::Steps::summaryClearsContent(a.asNode(), c) and - b = a and - value = "clear (" + c + ")" - or - Private::Steps::summaryExpectsContent(a.asNode(), c) and - b = a and - value = "expect (" + c + ")" - ) - or - summaryPostUpdateNode(b.asNode(), a.asNode()) and - value = "post-update" - or - b.asCall() = summaryDataFlowCall(a.asNode()) and - value = "receiver" - or - exists(ArgumentPosition pos | - summaryArgumentNode(b.asCall(), a.asNode(), pos) and - value = "argument (" + pos + ")" - ) - } - - query predicate edges(NodeOrCall a, NodeOrCall b, string key, string value) { - key = "semmle.label" and - value = strictconcat(string s | edgesComponent(a, b, s) | s, " / ") - } + /** Gets a singleton stack representing a (normal) return. */ + SummaryComponentStack return() { result = singleton(SummaryComponent::return()) } } } + +module Public = Impl::Public; diff --git a/go/ql/lib/semmle/go/dataflow/internal/FlowSummaryImplSpecific.qll b/go/ql/lib/semmle/go/dataflow/internal/FlowSummaryImplSpecific.qll deleted file mode 100644 index f27b27892de..00000000000 --- a/go/ql/lib/semmle/go/dataflow/internal/FlowSummaryImplSpecific.qll +++ /dev/null @@ -1,292 +0,0 @@ -/** - * Provides Go-specific classes and predicates for defining flow summaries. - */ - -private import go -private import DataFlowDispatch -private import DataFlowPrivate -private import DataFlowUtil -private import FlowSummaryImpl::Private -private import FlowSummaryImpl::Public -private import semmle.go.dataflow.ExternalFlow -private import DataFlowImplCommon - -private module FlowSummaries { - private import semmle.go.dataflow.FlowSummary as F -} - -/** - * A class of callables that are candidates for flow summary modeling. - */ -class SummarizedCallableBase = Callable; - -/** - * A class of callables that are candidates for neutral modeling. - */ -class NeutralCallableBase = Callable; - -DataFlowCallable inject(SummarizedCallable c) { result.asSummarizedCallable() = c or none() } - -/** Gets the parameter position of the instance parameter. */ -ArgumentPosition callbackSelfParameterPosition() { result = -1 } - -/** Gets the textual representation of a parameter position in the format used for flow summaries. */ -string getParameterPosition(ParameterPosition pos) { result = pos.toString() } - -/** Gets the textual representation of an argument position in the format used for flow summaries. */ -string getArgumentPosition(ArgumentPosition pos) { result = pos.toString() } - -/** Gets the synthesized data-flow call for `receiver`. */ -DataFlowCall summaryDataFlowCall(SummaryNode receiver) { - // We do not currently have support for callback-based library models. - none() -} - -/** Gets the type of content `c`. */ -DataFlowType getContentType(Content c) { result = c.getType() } - -/** Gets the type of the parameter at the given position. */ -DataFlowType getParameterType(SummarizedCallable c, ParameterPosition pos) { any() } - -/** Gets the return type of kind `rk` for callable `c`. */ -DataFlowType getReturnType(SummarizedCallable c, ReturnKind rk) { any() } - -/** - * Gets the type of the `i`th parameter in a synthesized call that targets a - * callback of type `t`. - */ -bindingset[t, pos] -DataFlowType getCallbackParameterType(DataFlowType t, ArgumentPosition pos) { any() } - -/** - * Gets the return type of kind `rk` in a synthesized call that targets a - * callback of type `t`. - */ -DataFlowType getCallbackReturnType(DataFlowType t, ReturnKind rk) { any() } - -/** Gets the type of synthetic global `sg`. */ -DataFlowType getSyntheticGlobalType(SummaryComponent::SyntheticGlobal sg) { any() } - -/** - * Holds if an external flow summary exists for `c` with input specification - * `input`, output specification `output`, kind `kind`, and provenance `provenance`. - */ -predicate summaryElement( - SummarizedCallableBase c, string input, string output, string kind, string provenance -) { - exists(string package, string type, boolean subtypes, string name, string signature, string ext | - summaryModel(package, type, subtypes, name, signature, ext, input, output, kind, provenance) and - c.asFunction() = interpretElement(package, type, subtypes, name, signature, ext).asEntity() - ) -} - -/** - * Holds if a neutral model exists for `c` of kind `kind` - * and with provenance `provenance`. - * Note. Neutral models have not been implemented for Go. - */ -predicate neutralElement(NeutralCallableBase c, string kind, string provenance) { none() } - -/** Gets the summary component for specification component `c`, if any. */ -bindingset[c] -SummaryComponent interpretComponentSpecific(string c) { - exists(int pos | parseReturn(c, pos) and result = SummaryComponent::return(getReturnKind(pos))) - or - exists(Content content | parseContent(c, content) and result = SummaryComponent::content(content)) -} - -/** Gets the summary component for specification component `c`, if any. */ -private string getContentSpecific(Content c) { - exists(Field f, string package, string className, string fieldName | - f = c.(FieldContent).getField() and - f.hasQualifiedName(package, className, fieldName) and - result = "Field[" + package + "." + className + "." + fieldName + "]" - ) - or - exists(SyntheticField f | - f = c.(SyntheticFieldContent).getField() and result = "SyntheticField[" + f + "]" - ) - or - c instanceof ArrayContent and result = "ArrayElement" - or - c instanceof CollectionContent and result = "Element" - or - c instanceof MapKeyContent and result = "MapKey" - or - c instanceof MapValueContent and result = "MapValue" - or - c instanceof PointerContent and result = "Dereference" -} - -/** Gets the textual representation of the content in the format used for MaD models. */ -string getMadRepresentationSpecific(SummaryComponent sc) { - exists(Content c | sc = TContentSummaryComponent(c) and result = getContentSpecific(c)) - or - exists(ReturnKind rk | - sc = TReturnSummaryComponent(rk) and - not rk = getReturnValueKind() and - result = "ReturnValue[" + rk.getIndex() + "]" - ) -} - -/** Holds if input specification component `c` needs a reference. */ -predicate inputNeedsReferenceSpecific(string c) { none() } - -/** Holds if output specification component `c` needs a reference. */ -predicate outputNeedsReferenceSpecific(string c) { parseReturn(c, _) } - -private newtype TSourceOrSinkElement = - TEntityElement(Entity e) or - TAstElement(AstNode n) - -/** An element representable by CSV modeling. */ -class SourceOrSinkElement extends TSourceOrSinkElement { - /** Gets this source or sink element as an entity, if it is one. */ - Entity asEntity() { this = TEntityElement(result) } - - /** Gets this source or sink element as an AST node, if it is one. */ - AstNode asAstNode() { this = TAstElement(result) } - - /** Gets a textual representation of this source or sink element. */ - string toString() { - result = "element representing " + [this.asEntity().toString(), this.asAstNode().toString()] - } - - predicate hasLocationInfo(string fp, int sl, int sc, int el, int ec) { - this.asEntity().hasLocationInfo(fp, sl, sc, el, ec) or - this.asAstNode().hasLocationInfo(fp, sl, sc, el, ec) - } -} - -/** - * Holds if an external source specification exists for `e` with output specification - * `output`, kind `kind`, and provenance `provenance`. - */ -predicate sourceElement(SourceOrSinkElement e, string output, string kind, string provenance) { - exists(string package, string type, boolean subtypes, string name, string signature, string ext | - sourceModel(package, type, subtypes, name, signature, ext, output, kind, provenance) and - e = interpretElement(package, type, subtypes, name, signature, ext) - ) -} - -/** - * Holds if an external sink specification exists for `e` with input specification - * `input`, kind `kind` and provenance `provenance`. - */ -predicate sinkElement(SourceOrSinkElement e, string input, string kind, string provenance) { - exists(string package, string type, boolean subtypes, string name, string signature, string ext | - sinkModel(package, type, subtypes, name, signature, ext, input, kind, provenance) and - e = interpretElement(package, type, subtypes, name, signature, ext) - ) -} - -/** Gets the return kind corresponding to specification `"ReturnValue"`. */ -ReturnKind getReturnValueKind() { result = getReturnKind(0) } - -private newtype TInterpretNode = - TElement(SourceOrSinkElement n) or - TNode(Node n) - -/** An entity used to interpret a source/sink specification. */ -class InterpretNode extends TInterpretNode { - /** Gets the element that this node corresponds to, if any. */ - SourceOrSinkElement asElement() { this = TElement(result) } - - /** Gets the data-flow node that this node corresponds to, if any. */ - Node asNode() { this = TNode(result) } - - /** Gets the call that this node corresponds to, if any. */ - DataFlowCall asCall() { result = this.asElement().asAstNode() } - - /** Gets the callable that this node corresponds to, if any. */ - DataFlowCallable asCallable() { - result.asSummarizedCallable().asFunction() = this.asElement().asEntity() - } - - /** Gets the target of this call, if any. */ - SourceOrSinkElement getCallTarget() { - result.asEntity() = this.asCall().getNode().(DataFlow::CallNode).getTarget() - } - - /** Gets a textual representation of this node. */ - string toString() { - result = this.asElement().toString() - or - result = this.asNode().toString() - } - - /** Gets the location of this node. */ - predicate hasLocationInfo(string fp, int sl, int sc, int el, int ec) { - this.asElement().hasLocationInfo(fp, sl, sc, el, ec) - or - this.asNode().hasLocationInfo(fp, sl, sc, el, ec) - } -} - -/** Provides additional sink specification logic required for annotations. */ -pragma[inline] -predicate interpretOutputSpecific(string c, InterpretNode mid, InterpretNode node) { - exists(int pos | node.asNode() = getAnOutNodeExt(mid.asCall(), TValueReturn(getReturnKind(pos))) | - parseReturn(c, pos) - ) - or - exists(Node n, SourceOrSinkElement e | - n = node.asNode() and - e = mid.asElement() - | - (c = "Parameter" or c = "") and - node.asNode().asParameter() = e.asEntity() - or - c = "" and - n.(DataFlow::FieldReadNode).getField() = e.asEntity() - ) -} - -/** Provides additional source specification logic required for annotations. */ -pragma[inline] -predicate interpretInputSpecific(string c, InterpretNode mid, InterpretNode n) { - exists(int pos, ReturnNodeExt ret | - parseReturn(c, pos) and - ret = n.asNode() and - ret.getKind().(ValueReturnKind).getKind() = getReturnKind(pos) and - mid.asCallable() = getNodeEnclosingCallable(ret) - ) - or - exists(DataFlow::Write fw, Field f | - c = "" and - f = mid.asElement().asEntity() and - fw.writesField(_, f, n.asNode()) - ) -} - -/** - * Holds if specification component `c` parses as return value `n` or a range - * containing `n`. - */ -predicate parseReturn(AccessPathToken c, int n) { - ( - c = "ReturnValue" and n = 0 - or - c.getName() = "ReturnValue" and - n = parseConstantOrRange(c.getAnArgument()) - ) -} - -bindingset[arg] -private int parseConstantOrRange(string arg) { - result = arg.toInt() - or - exists(int n1, int n2 | - arg.regexpCapture("([-0-9]+)\\.\\.([0-9]+)", 1).toInt() = n1 and - arg.regexpCapture("([-0-9]+)\\.\\.([0-9]+)", 2).toInt() = n2 and - result = [n1 .. n2] - ) -} - -/** Gets the argument position obtained by parsing `X` in `Parameter[X]`. */ -bindingset[arg] -ArgumentPosition parseParamBody(string arg) { result = parseConstantOrRange(arg) } - -/** Gets the parameter position obtained by parsing `X` in `Argument[X]`. */ -bindingset[arg] -ParameterPosition parseArgBody(string arg) { result = parseConstantOrRange(arg) } diff --git a/go/ql/lib/semmle/go/frameworks/stdlib/NetHttp.qll b/go/ql/lib/semmle/go/frameworks/stdlib/NetHttp.qll index 177d129cbc0..787bf40e9cc 100644 --- a/go/ql/lib/semmle/go/frameworks/stdlib/NetHttp.qll +++ b/go/ql/lib/semmle/go/frameworks/stdlib/NetHttp.qll @@ -3,8 +3,8 @@ */ import go -private import semmle.go.dataflow.FlowSummary private import semmle.go.dataflow.internal.DataFlowPrivate +private import semmle.go.dataflow.internal.FlowSummaryImpl::Private /** Provides models of commonly used functions in the `net/http` package. */ module NetHttp { @@ -154,7 +154,7 @@ module NetHttp { ) or exists( - SummarizedCallable callable, DataFlow::CallNode call, SummaryComponentStack input, + SummarizedCallableImpl callable, DataFlow::CallNode call, SummaryComponentStack input, SummaryComponentStack output | this = call.getASyntacticArgument() and diff --git a/java/ql/lib/semmle/code/java/dataflow/ExternalFlow.qll b/java/ql/lib/semmle/code/java/dataflow/ExternalFlow.qll index 48ee41630d9..d2ce3d0a7d6 100644 --- a/java/ql/lib/semmle/code/java/dataflow/ExternalFlow.qll +++ b/java/ql/lib/semmle/code/java/dataflow/ExternalFlow.qll @@ -89,12 +89,13 @@ import java private import semmle.code.java.dataflow.DataFlow::DataFlow +private import FlowSummary as FlowSummary private import internal.DataFlowPrivate +private import internal.FlowSummaryImpl +private import internal.FlowSummaryImpl::Public +private import internal.FlowSummaryImpl::Private private import internal.FlowSummaryImpl::Private::External -private import internal.FlowSummaryImplSpecific as FlowSummaryImplSpecific -private import internal.AccessPathSyntax private import internal.ExternalFlowExtensions as Extensions -private import FlowSummary private import codeql.mad.ModelValidation as SharedModelVal /** @@ -234,6 +235,21 @@ predicate modelCoverage(string package, int pkgs, string kind, string part, int /** Provides a query predicate to check the MaD models for validation errors. */ module ModelValidation { + private import codeql.dataflow.internal.AccessPathSyntax as AccessPathSyntax + + private predicate getRelevantAccessPath(string path) { + summaryModel(_, _, _, _, _, _, path, _, _, _) or + summaryModel(_, _, _, _, _, _, _, path, _, _) or + sinkModel(_, _, _, _, _, _, path, _, _) or + sourceModel(_, _, _, _, _, _, path, _, _) + } + + private module MkAccessPath = AccessPathSyntax::AccessPath; + + class AccessPath = MkAccessPath::AccessPath; + + class AccessPathToken = MkAccessPath::AccessPathToken; + private string getInvalidModelInput() { exists(string pred, AccessPath input, AccessPathToken part | sinkModel(_, _, _, _, _, _, input, _, _) and pred = "sink" @@ -478,7 +494,9 @@ private module Cached { */ cached predicate sourceNode(Node node, string kind) { - exists(FlowSummaryImplSpecific::InterpretNode n | isSourceNode(n, kind) and n.asNode() = node) + exists(SourceSinkInterpretationInput::InterpretNode n | + isSourceNode(n, kind) and n.asNode() = node + ) } /** @@ -487,8 +505,56 @@ private module Cached { */ cached predicate sinkNode(Node node, string kind) { - exists(FlowSummaryImplSpecific::InterpretNode n | isSinkNode(n, kind) and n.asNode() = node) + exists(SourceSinkInterpretationInput::InterpretNode n | + isSinkNode(n, kind) and n.asNode() = node + ) } } import Cached + +// adapter class for converting Mad summaries to `SummarizedCallable`s +private class SummarizedCallableAdapter extends SummarizedCallable { + SummarizedCallableAdapter() { summaryElement(this, _, _, _, _) } + + private predicate relevantSummaryElementManual(string input, string output, string kind) { + exists(Provenance provenance | + summaryElement(this, input, output, kind, provenance) and + provenance.isManual() + ) + } + + private predicate relevantSummaryElementGenerated(string input, string output, string kind) { + exists(Provenance provenance | + summaryElement(this, input, output, kind, provenance) and + provenance.isGenerated() + ) + } + + override predicate propagatesFlow(string input, string output, boolean preservesValue) { + exists(string kind | + this.relevantSummaryElementManual(input, output, kind) + or + not this.relevantSummaryElementManual(_, _, _) and + this.relevantSummaryElementGenerated(input, output, kind) + | + if kind = "value" then preservesValue = true else preservesValue = false + ) + } + + override predicate hasProvenance(Provenance provenance) { + summaryElement(this, _, _, _, provenance) + } +} + +// adapter class for converting Mad neutrals to `NeutralCallable`s +private class NeutralCallableAdapter extends NeutralCallable { + string kind; + string provenance_; + + NeutralCallableAdapter() { neutralElement(this, kind, provenance_) } + + override string getKind() { result = kind } + + override predicate hasProvenance(Provenance provenance) { provenance = provenance_ } +} diff --git a/java/ql/lib/semmle/code/java/dataflow/FlowSummary.qll b/java/ql/lib/semmle/code/java/dataflow/FlowSummary.qll index d3c9fe3b08b..1dd0c2c11fa 100644 --- a/java/ql/lib/semmle/code/java/dataflow/FlowSummary.qll +++ b/java/ql/lib/semmle/code/java/dataflow/FlowSummary.qll @@ -6,63 +6,13 @@ import java private import internal.FlowSummaryImpl as Impl private import internal.DataFlowUtil -class SummaryComponent = Impl::Public::SummaryComponent; +deprecated class SummaryComponent = Impl::Private::SummaryComponent; -/** Provides predicates for constructing summary components. */ -module SummaryComponent { - import Impl::Public::SummaryComponent +deprecated module SummaryComponent = Impl::Private::SummaryComponent; - /** Gets a summary component that represents a qualifier. */ - SummaryComponent qualifier() { result = argument(-1) } +deprecated class SummaryComponentStack = Impl::Private::SummaryComponentStack; - /** Gets a summary component for field `f`. */ - SummaryComponent field(Field f) { result = content(any(FieldContent c | c.getField() = f)) } - - /** Gets a summary component for `Element`. */ - SummaryComponent element() { result = content(any(CollectionContent c)) } - - /** Gets a summary component for `ArrayElement`. */ - SummaryComponent arrayElement() { result = content(any(ArrayContent c)) } - - /** Gets a summary component for `MapValue`. */ - SummaryComponent mapValue() { result = content(any(MapValueContent c)) } - - /** Gets a summary component that represents the return value of a call. */ - SummaryComponent return() { result = return(_) } -} - -class SummaryComponentStack = Impl::Public::SummaryComponentStack; - -/** Provides predicates for constructing stacks of summary components. */ -module SummaryComponentStack { - import Impl::Public::SummaryComponentStack - - /** Gets a singleton stack representing a qualifier. */ - SummaryComponentStack qualifier() { result = singleton(SummaryComponent::qualifier()) } - - /** Gets a stack representing a field `f` of `object`. */ - SummaryComponentStack fieldOf(Field f, SummaryComponentStack object) { - result = push(SummaryComponent::field(f), object) - } - - /** Gets a stack representing `Element` of `object`. */ - SummaryComponentStack elementOf(SummaryComponentStack object) { - result = push(SummaryComponent::element(), object) - } - - /** Gets a stack representing `ArrayElement` of `object`. */ - SummaryComponentStack arrayElementOf(SummaryComponentStack object) { - result = push(SummaryComponent::arrayElement(), object) - } - - /** Gets a stack representing `MapValue` of `object`. */ - SummaryComponentStack mapValueOf(SummaryComponentStack object) { - result = push(SummaryComponent::mapValue(), object) - } - - /** Gets a singleton stack representing a (normal) return. */ - SummaryComponentStack return() { result = singleton(SummaryComponent::return()) } -} +deprecated module SummaryComponentStack = Impl::Private::SummaryComponentStack; /** A synthetic callable with a set of concrete call sites and a flow summary. */ abstract class SyntheticCallable extends string { @@ -77,11 +27,7 @@ abstract class SyntheticCallable extends string { * * See `SummarizedCallable::propagatesFlow` for details. */ - predicate propagatesFlow( - SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue - ) { - none() - } + abstract predicate propagatesFlow(string input, string output, boolean preservesValue); /** * Gets the type of the parameter at the specified position with -1 indicating @@ -180,11 +126,9 @@ class SummarizedCallable = Impl::Public::SummarizedCallable; * to `SummarizedCallable`. */ private class SummarizedSyntheticCallableAdapter extends SummarizedCallable, TSyntheticCallable { - override predicate propagatesFlow( - SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue - ) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { this.asSyntheticCallable().propagatesFlow(input, output, preservesValue) } } -class RequiredSummaryComponentStack = Impl::Public::RequiredSummaryComponentStack; +deprecated class RequiredSummaryComponentStack = Impl::Private::RequiredSummaryComponentStack; diff --git a/java/ql/lib/semmle/code/java/dataflow/internal/AccessPathSyntax.qll b/java/ql/lib/semmle/code/java/dataflow/internal/AccessPathSyntax.qll deleted file mode 100644 index 0c3dc8427b2..00000000000 --- a/java/ql/lib/semmle/code/java/dataflow/internal/AccessPathSyntax.qll +++ /dev/null @@ -1,182 +0,0 @@ -/** - * Module for parsing access paths from MaD models, both the identifying access path used - * by dynamic languages, and the input/output specifications for summary steps. - * - * This file is used by the shared data flow library and by the JavaScript libraries - * (which does not use the shared data flow libraries). - */ - -/** - * Convenience-predicate for extracting two capture groups at once. - */ -bindingset[input, regexp] -private predicate regexpCaptureTwo(string input, string regexp, string capture1, string capture2) { - capture1 = input.regexpCapture(regexp, 1) and - capture2 = input.regexpCapture(regexp, 2) -} - -/** Companion module to the `AccessPath` class. */ -module AccessPath { - /** A string that should be parsed as an access path. */ - abstract class Range extends string { - bindingset[this] - Range() { any() } - } - - /** - * Parses an integer constant `n` or interval `n1..n2` (inclusive) and gets the value - * of the constant or any value contained in the interval. - */ - bindingset[arg] - int parseInt(string arg) { - result = arg.toInt() - or - // Match "n1..n2" - exists(string lo, string hi | - regexpCaptureTwo(arg, "(-?\\d+)\\.\\.(-?\\d+)", lo, hi) and - result = [lo.toInt() .. hi.toInt()] - ) - } - - /** - * Parses a lower-bounded interval `n..` and gets the lower bound. - */ - bindingset[arg] - int parseLowerBound(string arg) { result = arg.regexpCapture("(-?\\d+)\\.\\.", 1).toInt() } - - /** - * Parses an integer constant or interval (bounded or unbounded) that explicitly - * references the arity, such as `N-1` or `N-3..N-1`. - * - * Note that expressions of form `N-x` will never resolve to a negative index, - * even if `N` is zero (it will have no result in that case). - */ - bindingset[arg, arity] - private int parseIntWithExplicitArity(string arg, int arity) { - result >= 0 and // do not allow N-1 to resolve to a negative index - exists(string lo | - // N-x - lo = arg.regexpCapture("N-(\\d+)", 1) and - result = arity - lo.toInt() - or - // N-x.. - lo = arg.regexpCapture("N-(\\d+)\\.\\.", 1) and - result = [arity - lo.toInt(), arity - 1] - ) - or - exists(string lo, string hi | - // x..N-y - regexpCaptureTwo(arg, "(-?\\d+)\\.\\.N-(\\d+)", lo, hi) and - result = [lo.toInt() .. arity - hi.toInt()] - or - // N-x..N-y - regexpCaptureTwo(arg, "N-(\\d+)\\.\\.N-(\\d+)", lo, hi) and - result = [arity - lo.toInt() .. arity - hi.toInt()] and - result >= 0 - or - // N-x..y - regexpCaptureTwo(arg, "N-(\\d+)\\.\\.(\\d+)", lo, hi) and - result = [arity - lo.toInt() .. hi.toInt()] and - result >= 0 - ) - } - - /** - * Parses an integer constant or interval (bounded or unbounded) and gets any - * of the integers contained within (of which there may be infinitely many). - * - * Has no result for arguments involving an explicit arity, such as `N-1`. - */ - bindingset[arg, result] - int parseIntUnbounded(string arg) { - result = parseInt(arg) - or - result >= parseLowerBound(arg) - } - - /** - * Parses an integer constant or interval (bounded or unbounded) that - * may reference the arity of a call, such as `N-1` or `N-3..N-1`. - * - * Note that expressions of form `N-x` will never resolve to a negative index, - * even if `N` is zero (it will have no result in that case). - */ - bindingset[arg, arity] - int parseIntWithArity(string arg, int arity) { - result = parseInt(arg) - or - result in [parseLowerBound(arg) .. arity - 1] - or - result = parseIntWithExplicitArity(arg, arity) - } -} - -/** Gets the `n`th token on the access path as a string. */ -private string getRawToken(AccessPath path, int n) { - // Avoid splitting by '.' since tokens may contain dots, e.g. `Field[foo.Bar.x]`. - // Instead use regexpFind to match valid tokens, and supplement with a final length - // check (in `AccessPath.hasSyntaxError`) to ensure all characters were included in a token. - result = path.regexpFind("\\w+(?:\\[[^\\]]*\\])?(?=\\.|$)", n, _) -} - -/** - * A string that occurs as an access path (either identifying or input/output spec) - * which might be relevant for this database. - */ -class AccessPath extends string instanceof AccessPath::Range { - /** Holds if this string is not a syntactically valid access path. */ - predicate hasSyntaxError() { - // If the lengths match, all characters must haven been included in a token - // or seen by the `.` lookahead pattern. - this != "" and - not this.length() = sum(int n | | getRawToken(this, n).length() + 1) - 1 - } - - /** Gets the `n`th token on the access path (if there are no syntax errors). */ - AccessPathToken getToken(int n) { - result = getRawToken(this, n) and - not this.hasSyntaxError() - } - - /** Gets the number of tokens on the path (if there are no syntax errors). */ - int getNumToken() { - result = count(int n | exists(getRawToken(this, n))) and - not this.hasSyntaxError() - } -} - -/** - * An access part token such as `Argument[1]` or `ReturnValue`, appearing in one or more access paths. - */ -class AccessPathToken extends string { - AccessPathToken() { this = getRawToken(_, _) } - - private string getPart(int part) { - result = this.regexpCapture("([^\\[]+)(?:\\[([^\\]]*)\\])?", part) - } - - /** Gets the name of the token, such as `Member` from `Member[x]` */ - string getName() { result = this.getPart(1) } - - /** - * Gets the argument list, such as `1,2` from `Member[1,2]`, - * or has no result if there are no arguments. - */ - string getArgumentList() { result = this.getPart(2) } - - /** Gets the `n`th argument to this token, such as `x` or `y` from `Member[x,y]`. */ - string getArgument(int n) { result = this.getArgumentList().splitAt(",", n).trim() } - - /** Gets the `n`th argument to this `name` token, such as `x` or `y` from `Member[x,y]`. */ - pragma[nomagic] - string getArgument(string name, int n) { name = this.getName() and result = this.getArgument(n) } - - /** Gets an argument to this token, such as `x` or `y` from `Member[x,y]`. */ - string getAnArgument() { result = this.getArgument(_) } - - /** Gets an argument to this `name` token, such as `x` or `y` from `Member[x,y]`. */ - string getAnArgument(string name) { result = this.getArgument(name, _) } - - /** Gets the number of arguments to this token, such as 2 for `Member[x,y]` or zero for `ReturnValue`. */ - int getNumArgument() { result = count(int n | exists(this.getArgument(n))) } -} diff --git a/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowNodes.qll b/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowNodes.qll index 415929f4f72..8bcbf0635a3 100644 --- a/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowNodes.qll +++ b/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowNodes.qll @@ -491,16 +491,16 @@ module Private { override string toString() { result = this.getSummaryNode().toString() } /** Holds if this summary node is the `i`th argument of `call`. */ - predicate isArgumentOf(DataFlowCall call, int i) { - FlowSummaryImpl::Private::summaryArgumentNode(call, this.getSummaryNode(), i) + predicate isArgumentOf(SummaryCall call, int i) { + FlowSummaryImpl::Private::summaryArgumentNode(call.getReceiver(), this.getSummaryNode(), i) } /** Holds if this summary node is a return node. */ predicate isReturn() { FlowSummaryImpl::Private::summaryReturnNode(this.getSummaryNode(), _) } /** Holds if this summary node is an out node for `call`. */ - predicate isOut(DataFlowCall call) { - FlowSummaryImpl::Private::summaryOutNode(call, this.getSummaryNode(), _) + predicate isOut(SummaryCall call) { + FlowSummaryImpl::Private::summaryOutNode(call.getReceiver(), this.getSummaryNode(), _) } } diff --git a/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowPrivate.qll b/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowPrivate.qll index f5466b2d739..2442671ac52 100644 --- a/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowPrivate.qll +++ b/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowPrivate.qll @@ -578,7 +578,10 @@ predicate additionalLambdaFlowStep(Node nodeFrom, Node nodeTo, boolean preserves * by default as a heuristic. */ predicate allowParameterReturnInSelf(ParameterNode p) { - FlowSummaryImpl::Private::summaryAllowParameterReturnInSelf(p) + exists(DataFlowCallable c, ParameterPosition pos | + parameterNode(p, c, pos) and + FlowSummaryImpl::Private::summaryAllowParameterReturnInSelf(c.asSummarizedCallable(), pos) + ) or CaptureFlow::heuristicAllowInstanceParameterReturnInSelf(p.(InstanceParameterNode).getCallable()) } diff --git a/java/ql/lib/semmle/code/java/dataflow/internal/FlowSummaryImpl.qll b/java/ql/lib/semmle/code/java/dataflow/internal/FlowSummaryImpl.qll index 0aa17c521b4..d5364567d88 100644 --- a/java/ql/lib/semmle/code/java/dataflow/internal/FlowSummaryImpl.qll +++ b/java/ql/lib/semmle/code/java/dataflow/internal/FlowSummaryImpl.qll @@ -1,1491 +1,408 @@ /** * Provides classes and predicates for defining flow summaries. - * - * The definitions in this file are language-independent, and language-specific - * definitions are passed in via the `DataFlowImplSpecific` and - * `FlowSummaryImplSpecific` modules. */ -private import FlowSummaryImplSpecific +private import java +private import codeql.dataflow.internal.FlowSummaryImpl +private import codeql.dataflow.internal.AccessPathSyntax as AccessPath +private import DataFlowDispatch +private import DataFlowPrivate +private import DataFlowUtil +private import DataFlowImplSpecific as DataFlowImplSpecific private import DataFlowImplSpecific::Private private import DataFlowImplSpecific::Public -private import DataFlowImplCommon -private import codeql.util.Unit +private import semmle.code.java.dataflow.ExternalFlow +private import semmle.code.java.dataflow.FlowSummary as FlowSummary -/** Provides classes and predicates for defining flow summaries. */ -module Public { - private import Private +/** + * A module for importing frameworks that define synthetic globals. + */ +private module SyntheticGlobals { + private import semmle.code.java.frameworks.android.Intent +} - /** - * A component used in a flow summary. - * - * Either a parameter or an argument at a given position, a specific - * content type, or a return kind. - */ - class SummaryComponent extends TSummaryComponent { - /** Gets a textual representation of this component used for MaD models. */ - string getMadRepresentation() { - result = getMadRepresentationSpecific(this) - or - exists(ArgumentPosition pos | - this = TParameterSummaryComponent(pos) and - result = "Parameter[" + getArgumentPosition(pos) + "]" +bindingset[pos] +private string positionToString(int pos) { + if pos = -1 then result = "this" else result = pos.toString() +} + +module Input implements InputSig { + class SummarizedCallableBase = FlowSummary::SummarizedCallableBase; + + ArgumentPosition callbackSelfParameterPosition() { result = -1 } + + ReturnKind getStandardReturnValueKind() { any() } + + string encodeParameterPosition(ParameterPosition pos) { result = positionToString(pos) } + + string encodeArgumentPosition(ArgumentPosition pos) { result = positionToString(pos) } + + string encodeContent(ContentSet c, string arg) { + exists(Field f, string package, string className, string fieldName | + f = c.(FieldContent).getField() and + f.hasQualifiedName(package, className, fieldName) and + result = "Field" and + arg = package + "." + className + "." + fieldName + ) + or + exists(SyntheticField f | + f = c.(SyntheticFieldContent).getField() and result = "SyntheticField" and arg = f + ) + or + c instanceof ArrayContent and result = "ArrayElement" and arg = "" + or + c instanceof CollectionContent and result = "Element" and arg = "" + or + c instanceof MapKeyContent and result = "MapKey" and arg = "" + or + c instanceof MapValueContent and result = "MapValue" and arg = "" + } + + string encodeWithoutContent(ContentSet c, string arg) { + result = "WithoutElement" and + c instanceof CollectionContent and + arg = "" + } + + string encodeWithContent(ContentSet c, string arg) { + result = "WithElement" and + c instanceof CollectionContent and + arg = "" + } + + bindingset[token] + ParameterPosition decodeUnknownParameterPosition(AccessPath::AccessPathTokenBase token) { + // needed to support `Argument[x..y]` ranges and `Argument[-1]` + token.getName() = "Argument" and + result = AccessPath::parseInt(token.getAnArgument()) + } + + bindingset[token] + ArgumentPosition decodeUnknownArgumentPosition(AccessPath::AccessPathTokenBase token) { + // needed to support `Parameter[x..y]` ranges and `Parameter[-1]` + token.getName() = "Parameter" and + result = AccessPath::parseInt(token.getAnArgument()) + } +} + +private import Make as Impl + +private module TypesInput implements Impl::Private::TypesInputSig { + DataFlowType getSyntheticGlobalType(Impl::Private::SyntheticGlobal sg) { + exists(sg) and + result instanceof TypeObject + } + + DataFlowType getContentType(ContentSet c) { result = c.(Content).getType() } + + DataFlowType getParameterType(Impl::Public::SummarizedCallable c, ParameterPosition pos) { + result = getErasedRepr(c.getParameterType(pos)) + } + + DataFlowType getReturnType(Impl::Public::SummarizedCallable c, ReturnKind rk) { + result = getErasedRepr(c.getReturnType()) and + exists(rk) + } + + DataFlowType getCallbackParameterType(DataFlowType t, ArgumentPosition pos) { + result = getErasedRepr(t.(FunctionalInterface).getRunMethod().getParameterType(pos)) + or + result = getErasedRepr(t.(FunctionalInterface)) and pos = -1 + } + + DataFlowType getCallbackReturnType(DataFlowType t, ReturnKind rk) { + result = getErasedRepr(t.(FunctionalInterface).getRunMethod().getReturnType()) and + exists(rk) + } +} + +private module StepsInput implements Impl::Private::StepsInputSig { + DataFlowCall getACall(Public::SummarizedCallable sc) { + sc = viableCallable(result).asSummarizedCallable() + } +} + +private predicate relatedArgSpec(Callable c, string spec) { + exists( + string namespace, string type, boolean subtypes, string name, string signature, string ext + | + summaryModel(namespace, type, subtypes, name, signature, ext, spec, _, _, _) or + summaryModel(namespace, type, subtypes, name, signature, ext, _, spec, _, _) or + sourceModel(namespace, type, subtypes, name, signature, ext, spec, _, _) or + sinkModel(namespace, type, subtypes, name, signature, ext, spec, _, _) + | + c = interpretElement(namespace, type, subtypes, name, signature, ext) + ) +} + +/** + * Holds if `defaultsCallable` is a Kotlin default-parameter proxy for `originalCallable`, and + * `originalCallable` has a model, and `defaultsArgSpec` is `originalArgSpec` adjusted to account + * for the additional dispatch receiver parameter that occurs in the default-parameter proxy's argument + * list. When no adjustment is required (e.g. for constructors, or non-argument-based specs), `defaultArgsSpec` + * equals `originalArgSpec`. + * + * Note in the case where `originalArgSpec` uses an integer range, like `Argument[1..3]...`, this will produce multiple + * results for `defaultsArgSpec`, like `{Argument[2]..., Argument[3]..., Argument[4]...}`. + */ +private predicate correspondingKotlinParameterDefaultsArgSpec( + Callable originalCallable, Callable defaultsCallable, string originalArgSpec, + string defaultsArgSpec +) { + relatedArgSpec(originalCallable, originalArgSpec) and + defaultsCallable = originalCallable.getKotlinParameterDefaultsProxy() and + ( + originalCallable instanceof Constructor and originalArgSpec = defaultsArgSpec + or + originalCallable instanceof Method and + exists(string regex | + // Note I use a regex and not AccessPathToken because this feeds summaryElement et al, + // which would introduce mutual recursion with the definition of AccessPathToken. + regex = "Argument\\[([0-9,\\. ]+)\\](.*)" and + ( + exists(string oldArgNumber, string rest, int paramOffset | + oldArgNumber = originalArgSpec.regexpCapture(regex, 1) and + rest = originalArgSpec.regexpCapture(regex, 2) and + paramOffset = + defaultsCallable.getNumberOfParameters() - + (originalCallable.getNumberOfParameters() + 2) and + exists(int oldArgParsed | + oldArgParsed = AccessPath::parseInt(oldArgNumber.splitAt(",").trim()) + | + if + ktExtensionFunctions(originalCallable, _, _) and + ktExtensionFunctions(defaultsCallable, _, _) and + oldArgParsed = 0 + then defaultsArgSpec = "Argument[" + paramOffset + "]" // 1 if dispatch receiver is present, 0 otherwise. + else defaultsArgSpec = "Argument[" + (oldArgParsed + paramOffset) + "]" + rest + ) + ) + or + not originalArgSpec.regexpMatch(regex) and + defaultsArgSpec = originalArgSpec ) - or - exists(ParameterPosition pos | - this = TArgumentSummaryComponent(pos) and - result = "Argument[" + getParameterPosition(pos) + "]" + ) + ) +} + +module SourceSinkInterpretationInput implements + Impl::Private::External::SourceSinkInterpretationInputSig +{ + private import java as J + + class Element = J::Element; + + predicate sourceElement(Element e, string output, string kind) { + exists( + string namespace, string type, boolean subtypes, string name, string signature, string ext, + SourceOrSinkElement baseSource, string originalOutput + | + sourceModel(namespace, type, subtypes, name, signature, ext, originalOutput, kind, _) and + baseSource = interpretElement(namespace, type, subtypes, name, signature, ext) and + ( + e = baseSource and output = originalOutput + or + correspondingKotlinParameterDefaultsArgSpec(baseSource, e, originalOutput, output) ) - or - exists(string synthetic | - this = TSyntheticGlobalSummaryComponent(synthetic) and - result = "SyntheticGlobal[" + synthetic + "]" + ) + } + + predicate sinkElement(Element e, string input, string kind) { + exists( + string namespace, string type, boolean subtypes, string name, string signature, string ext, + SourceOrSinkElement baseSink, string originalInput + | + sinkModel(namespace, type, subtypes, name, signature, ext, originalInput, kind, _) and + baseSink = interpretElement(namespace, type, subtypes, name, signature, ext) and + ( + e = baseSink and originalInput = input + or + correspondingKotlinParameterDefaultsArgSpec(baseSink, e, originalInput, input) ) + ) + } + + class SourceOrSinkElement = Element; + + private newtype TInterpretNode = + TElement(SourceOrSinkElement n) or + TNode(Node n) + + /** An entity used to interpret a source/sink specification. */ + class InterpretNode extends TInterpretNode { + /** Gets the element that this node corresponds to, if any. */ + SourceOrSinkElement asElement() { this = TElement(result) } + + /** Gets the data-flow node that this node corresponds to, if any. */ + Node asNode() { this = TNode(result) } + + /** Gets the call that this node corresponds to, if any. */ + DataFlowCall asCall() { result.asCall() = this.asElement() } + + /** Gets the callable that this node corresponds to, if any. */ + DataFlowCallable asCallable() { result.asCallable() = this.asElement() } + + /** Gets the target of this call, if any. */ + Element getCallTarget() { result = this.asCall().asCall().getCallee().getSourceDeclaration() } + + /** Gets a textual representation of this node. */ + string toString() { + result = this.asElement().toString() or - this = TReturnSummaryComponent(getReturnValueKind()) and result = "ReturnValue" + result = this.asNode().toString() } - /** Gets a textual representation of this summary component. */ - string toString() { result = this.getMadRepresentation() } + /** Gets the location of this node. */ + Location getLocation() { + result = this.asElement().getLocation() + or + result = this.asNode().getLocation() + } + } + + /** Provides additional sink specification logic required for annotations. */ + bindingset[c] + predicate interpretOutput(string c, InterpretNode mid, InterpretNode node) { + exists(Node n, Top ast | + n = node.asNode() and + ast = mid.asElement() + | + (c = "Parameter" or c = "") and + node.asNode().asParameter() = mid.asElement() + or + c = "" and + n.asExpr().(FieldRead).getField() = ast + ) + } + + /** Provides additional source specification logic required for annotations. */ + bindingset[c] + predicate interpretInput(string c, InterpretNode mid, InterpretNode n) { + exists(FieldWrite fw | + c = "" and + fw.getField() = mid.asElement() and + n.asNode().asExpr() = fw.getASource() + ) + } +} + +module Private { + import Impl::Private + import Impl::Private::Types + + module Steps = Impl::Private::Steps; + + module External { + import Impl::Private::External + import Impl::Private::External::SourceSinkInterpretation + + /** + * Holds if an external flow summary exists for `c` with input specification + * `input`, output specification `output`, kind `kind`, and provenance `provenance`. + */ + predicate summaryElement( + Input::SummarizedCallableBase c, string input, string output, string kind, string provenance + ) { + exists( + string namespace, string type, boolean subtypes, string name, string signature, string ext, + string originalInput, string originalOutput, Callable baseCallable + | + summaryModel(namespace, type, subtypes, name, signature, ext, originalInput, originalOutput, + kind, provenance) and + baseCallable = interpretElement(namespace, type, subtypes, name, signature, ext) and + ( + c.asCallable() = baseCallable and input = originalInput and output = originalOutput + or + correspondingKotlinParameterDefaultsArgSpec(baseCallable, c.asCallable(), originalInput, + input) and + correspondingKotlinParameterDefaultsArgSpec(baseCallable, c.asCallable(), originalOutput, + output) + ) + ) + } + + /** + * Holds if a neutral model exists for `c` of kind `kind` + * and with provenance `provenance`. + */ + predicate neutralElement(Input::SummarizedCallableBase c, string kind, string provenance) { + exists(string namespace, string type, string name, string signature | + neutralModel(namespace, type, name, signature, kind, provenance) and + c.asCallable() = interpretElement(namespace, type, false, name, signature, "") + ) + } } /** Provides predicates for constructing summary components. */ module SummaryComponent { - /** Gets a summary component for content `c`. */ - SummaryComponent content(ContentSet c) { result = TContentSummaryComponent(c) } + import Impl::Private::SummaryComponent - /** Gets a summary component where data is not allowed to be stored in `c`. */ - SummaryComponent withoutContent(ContentSet c) { result = TWithoutContentSummaryComponent(c) } + /** Gets a summary component that represents a qualifier. */ + SummaryComponent qualifier() { result = argument(-1) } - /** Gets a summary component where data must be stored in `c`. */ - SummaryComponent withContent(ContentSet c) { result = TWithContentSummaryComponent(c) } + /** Gets a summary component for field `f`. */ + SummaryComponent field(Field f) { result = content(any(FieldContent c | c.getField() = f)) } - /** Gets a summary component for a parameter at position `pos`. */ - SummaryComponent parameter(ArgumentPosition pos) { result = TParameterSummaryComponent(pos) } + /** Gets a summary component for `Element`. */ + SummaryComponent element() { result = content(any(CollectionContent c)) } - /** Gets a summary component for an argument at position `pos`. */ - SummaryComponent argument(ParameterPosition pos) { result = TArgumentSummaryComponent(pos) } + /** Gets a summary component for `ArrayElement`. */ + SummaryComponent arrayElement() { result = content(any(ArrayContent c)) } - /** Gets a summary component for a return of kind `rk`. */ - SummaryComponent return(ReturnKind rk) { result = TReturnSummaryComponent(rk) } + /** Gets a summary component for `MapValue`. */ + SummaryComponent mapValue() { result = content(any(MapValueContent c)) } - /** Gets a summary component for synthetic global `sg`. */ - SummaryComponent syntheticGlobal(SyntheticGlobal sg) { - result = TSyntheticGlobalSummaryComponent(sg) - } + /** Gets a summary component that represents the return value of a call. */ + SummaryComponent return() { result = return(_) } - /** - * A synthetic global. This represents some form of global state, which - * summaries can read and write individually. - */ - abstract class SyntheticGlobal extends string { - bindingset[this] - SyntheticGlobal() { any() } - } + class SyntheticGlobal = Impl::Private::SyntheticGlobal; } - /** - * A (non-empty) stack of summary components. - * - * A stack is used to represent where data is read from (input) or where it - * is written to (output). For example, an input stack `[Field f, Argument 0]` - * means that data is read from field `f` from the `0`th argument, while an - * output stack `[Field g, Return]` means that data is written to the field - * `g` of the returned object. - */ - class SummaryComponentStack extends TSummaryComponentStack { - /** Gets the head of this stack. */ - SummaryComponent head() { - this = TSingletonSummaryComponentStack(result) or - this = TConsSummaryComponentStack(result, _) - } - - /** Gets the tail of this stack, if any. */ - SummaryComponentStack tail() { this = TConsSummaryComponentStack(_, result) } - - /** Gets the length of this stack. */ - int length() { - this = TSingletonSummaryComponentStack(_) and result = 1 - or - result = 1 + this.tail().length() - } - - /** Gets the stack obtained by dropping the first `i` elements, if any. */ - SummaryComponentStack drop(int i) { - i = 0 and result = this - or - result = this.tail().drop(i - 1) - } - - /** Holds if this stack contains summary component `c`. */ - predicate contains(SummaryComponent c) { c = this.drop(_).head() } - - /** Gets the bottom element of this stack. */ - SummaryComponent bottom() { - this = TSingletonSummaryComponentStack(result) or result = this.tail().bottom() - } - - /** Gets a textual representation of this stack used for MaD models. */ - string getMadRepresentation() { - exists(SummaryComponent head, SummaryComponentStack tail | - head = this.head() and - tail = this.tail() and - result = tail.getMadRepresentation() + "." + head.getMadRepresentation() - ) - or - exists(SummaryComponent c | - this = TSingletonSummaryComponentStack(c) and - result = c.getMadRepresentation() - ) - } - - /** Gets a textual representation of this stack. */ - string toString() { result = this.getMadRepresentation() } - } - - /** Provides predicates for constructing stacks of summary components. */ module SummaryComponentStack { - /** Gets a singleton stack containing `c`. */ - SummaryComponentStack singleton(SummaryComponent c) { - result = TSingletonSummaryComponentStack(c) + import Impl::Private::SummaryComponentStack + + /** Gets a singleton stack representing a qualifier. */ + SummaryComponentStack qualifier() { result = singleton(SummaryComponent::qualifier()) } + + /** Gets a stack representing a field `f` of `object`. */ + SummaryComponentStack fieldOf(Field f, SummaryComponentStack object) { + result = push(SummaryComponent::field(f), object) } - /** - * Gets the stack obtained by pushing `head` onto `tail`. - * - * Make sure to override `RequiredSummaryComponentStack::required()` in order - * to ensure that the constructed stack exists. - */ - SummaryComponentStack push(SummaryComponent head, SummaryComponentStack tail) { - result = TConsSummaryComponentStack(head, tail) + /** Gets a stack representing `Element` of `object`. */ + SummaryComponentStack elementOf(SummaryComponentStack object) { + result = push(SummaryComponent::element(), object) } - /** Gets a singleton stack for an argument at position `pos`. */ - SummaryComponentStack argument(ParameterPosition pos) { - result = singleton(SummaryComponent::argument(pos)) + /** Gets a stack representing `ArrayElement` of `object`. */ + SummaryComponentStack arrayElementOf(SummaryComponentStack object) { + result = push(SummaryComponent::arrayElement(), object) } - /** Gets a singleton stack representing a return of kind `rk`. */ - SummaryComponentStack return(ReturnKind rk) { result = singleton(SummaryComponent::return(rk)) } + /** Gets a stack representing `MapValue` of `object`. */ + SummaryComponentStack mapValueOf(SummaryComponentStack object) { + result = push(SummaryComponent::mapValue(), object) + } + + /** Gets a singleton stack representing a (normal) return. */ + SummaryComponentStack return() { result = singleton(SummaryComponent::return()) } } - /** - * A class that exists for QL technical reasons only (the IPA type used - * to represent component stacks needs to be bounded). - */ - class RequiredSummaryComponentStack extends Unit { - /** - * Holds if the stack obtained by pushing `head` onto `tail` is required. - */ - abstract predicate required(SummaryComponent head, SummaryComponentStack tail); + /** Gets the argument position obtained by parsing `X` in `Parameter[X]`. */ + bindingset[s] + ArgumentPosition parseParamBody(string s) { + result = AccessPath::parseInt(s) + or + s = "this" and result = -1 } - /** - * Gets the valid model origin values. - */ - private string getValidModelOrigin() { - result = - [ - "ai", // AI (machine learning) - "df", // Dataflow (model generator) - "tb", // Type based (model generator) - "hq", // Heuristic query - ] - } - - /** - * A class used to represent provenance values for MaD models. - * - * The provenance value is a string of the form `origin-verification` - * (or just `manual`), where `origin` is a value indicating the - * origin of the model, and `verification` is a value indicating, how - * the model was verified. - * - * Examples could be: - * - `df-generated`: A model produced by the model generator, but not verified by a human. - * - `ai-manual`: A model produced by AI, but verified by a human. - */ - class Provenance extends string { - private string verification; - - Provenance() { - exists(string origin | origin = getValidModelOrigin() | - this = origin + "-" + verification and - verification = ["manual", "generated"] - ) - or - this = verification and verification = "manual" - } - - /** - * Holds if this is a valid generated provenance value. - */ - predicate isGenerated() { verification = "generated" } - - /** - * Holds if this is a valid manual provenance value. - */ - predicate isManual() { verification = "manual" } - } - - /** A callable with a flow summary. */ - abstract class SummarizedCallable extends SummarizedCallableBase { - bindingset[this] - SummarizedCallable() { any() } - - /** - * Holds if data may flow from `input` to `output` through this callable. - * - * `preservesValue` indicates whether this is a value-preserving step - * or a taint-step. - * - * Input specifications are restricted to stacks that end with - * `SummaryComponent::argument(_)`, preceded by zero or more - * `SummaryComponent::return(_)` or `SummaryComponent::content(_)` components. - * - * Output specifications are restricted to stacks that end with - * `SummaryComponent::return(_)` or `SummaryComponent::argument(_)`. - * - * Output stacks ending with `SummaryComponent::return(_)` can be preceded by zero - * or more `SummaryComponent::content(_)` components. - * - * Output stacks ending with `SummaryComponent::argument(_)` can be preceded by an - * optional `SummaryComponent::parameter(_)` component, which in turn can be preceded - * by zero or more `SummaryComponent::content(_)` components. - */ - pragma[nomagic] - predicate propagatesFlow( - SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue - ) { - none() - } - - /** - * Holds if there exists a generated summary that applies to this callable. - */ - final predicate hasGeneratedModel() { - exists(Provenance p | p.isGenerated() and this.hasProvenance(p)) - } - - /** - * Holds if all the summaries that apply to this callable are auto generated and not manually created. - * That is, only apply generated models, when there are no manual models. - */ - final predicate applyGeneratedModel() { - this.hasGeneratedModel() and - not this.hasManualModel() - } - - /** - * Holds if there exists a manual summary that applies to this callable. - */ - final predicate hasManualModel() { - exists(Provenance p | p.isManual() and this.hasProvenance(p)) - } - - /** - * Holds if there exists a manual summary that applies to this callable. - * Always apply manual models if they exist. - */ - final predicate applyManualModel() { this.hasManualModel() } - - /** - * Holds if there exists a summary that applies to this callable - * that has provenance `provenance`. - */ - predicate hasProvenance(Provenance provenance) { provenance = "manual" } - } - - /** - * A callable where there is no flow via the callable. - */ - class NeutralSummaryCallable extends NeutralCallable { - NeutralSummaryCallable() { this.getKind() = "summary" } - } - - /** - * A callable that has a neutral model. - */ - class NeutralCallable extends NeutralCallableBase { - private string kind; - private Provenance provenance; - - NeutralCallable() { neutralElement(this, kind, provenance) } - - /** - * Holds if the neutral is auto generated. - */ - final predicate hasGeneratedModel() { provenance.isGenerated() } - - /** - * Holds if there exists a manual neutral that applies to this callable. - */ - final predicate hasManualModel() { provenance.isManual() } - - /** - * Holds if the neutral has provenance `p`. - */ - predicate hasProvenance(Provenance p) { p = provenance } - - /** - * Gets the kind of the neutral. - */ - string getKind() { result = kind } + /** Gets the parameter position obtained by parsing `X` in `Argument[X]`. */ + bindingset[s] + ParameterPosition parseArgBody(string s) { + result = AccessPath::parseInt(s) + or + s = "this" and result = -1 } } -/** - * Provides predicates for compiling flow summaries down to atomic local steps, - * read steps, and store steps. - */ -module Private { - private import Public - import AccessPathSyntax - - newtype TSummaryComponent = - TContentSummaryComponent(ContentSet c) or - TParameterSummaryComponent(ArgumentPosition pos) or - TArgumentSummaryComponent(ParameterPosition pos) or - TReturnSummaryComponent(ReturnKind rk) or - TSyntheticGlobalSummaryComponent(SummaryComponent::SyntheticGlobal sg) or - TWithoutContentSummaryComponent(ContentSet c) or - TWithContentSummaryComponent(ContentSet c) - - private TParameterSummaryComponent callbackSelfParam() { - result = TParameterSummaryComponent(callbackSelfParameterPosition()) - } - - newtype TSummaryComponentStack = - TSingletonSummaryComponentStack(SummaryComponent c) or - TConsSummaryComponentStack(SummaryComponent head, SummaryComponentStack tail) { - any(RequiredSummaryComponentStack x).required(head, tail) - or - any(RequiredSummaryComponentStack x).required(TParameterSummaryComponent(_), tail) and - head = callbackSelfParam() - or - derivedFluentFlowPush(_, _, _, head, tail, _) - } - - pragma[nomagic] - private predicate summary( - SummarizedCallable c, SummaryComponentStack input, SummaryComponentStack output, - boolean preservesValue - ) { - c.propagatesFlow(input, output, preservesValue) - or - // observe side effects of callbacks on input arguments - c.propagatesFlow(output, input, preservesValue) and - preservesValue = true and - isCallbackParameter(input) and - isContentOfArgument(output, _) - or - // flow from the receiver of a callback into the instance-parameter - exists(SummaryComponentStack s, SummaryComponentStack callbackRef | - c.propagatesFlow(s, _, _) or c.propagatesFlow(_, s, _) - | - callbackRef = s.drop(_) and - (isCallbackParameter(callbackRef) or callbackRef.head() = TReturnSummaryComponent(_)) and - input = callbackRef.tail() and - output = TConsSummaryComponentStack(callbackSelfParam(), input) and - preservesValue = true - ) - or - exists(SummaryComponentStack arg, SummaryComponentStack return | - derivedFluentFlow(c, input, arg, return, preservesValue) - | - arg.length() = 1 and - output = return - or - exists(SummaryComponent head, SummaryComponentStack tail | - derivedFluentFlowPush(c, input, arg, head, tail, 0) and - output = SummaryComponentStack::push(head, tail) - ) - ) - or - // Chain together summaries where values get passed into callbacks along the way - exists(SummaryComponentStack mid, boolean preservesValue1, boolean preservesValue2 | - c.propagatesFlow(input, mid, preservesValue1) and - c.propagatesFlow(mid, output, preservesValue2) and - mid.drop(mid.length() - 2) = - SummaryComponentStack::push(TParameterSummaryComponent(_), - SummaryComponentStack::singleton(TArgumentSummaryComponent(_))) and - preservesValue = preservesValue1.booleanAnd(preservesValue2) - ) - } - - /** - * Holds if `c` has a flow summary from `input` to `arg`, where `arg` - * writes to (contents of) arguments at position `pos`, and `c` has a - * value-preserving flow summary from the arguments at position `pos` - * to a return value (`return`). - * - * In such a case, we derive flow from `input` to (contents of) the return - * value. - * - * As an example, this simplifies modeling of fluent methods: - * for `StringBuilder.append(x)` with a specified value flow from qualifier to - * return value and taint flow from argument 0 to the qualifier, then this - * allows us to infer taint flow from argument 0 to the return value. - */ - pragma[nomagic] - private predicate derivedFluentFlow( - SummarizedCallable c, SummaryComponentStack input, SummaryComponentStack arg, - SummaryComponentStack return, boolean preservesValue - ) { - exists(ParameterPosition pos | - summary(c, input, arg, preservesValue) and - isContentOfArgument(arg, pos) and - summary(c, SummaryComponentStack::argument(pos), return, true) and - return.bottom() = TReturnSummaryComponent(_) - ) - } - - pragma[nomagic] - private predicate derivedFluentFlowPush( - SummarizedCallable c, SummaryComponentStack input, SummaryComponentStack arg, - SummaryComponent head, SummaryComponentStack tail, int i - ) { - derivedFluentFlow(c, input, arg, tail, _) and - head = arg.drop(i).head() and - i = arg.length() - 2 - or - exists(SummaryComponent head0, SummaryComponentStack tail0 | - derivedFluentFlowPush(c, input, arg, head0, tail0, i + 1) and - head = arg.drop(i).head() and - tail = SummaryComponentStack::push(head0, tail0) - ) - } - - private predicate isCallbackParameter(SummaryComponentStack s) { - s.head() = TParameterSummaryComponent(_) and exists(s.tail()) - } - - private predicate isContentOfArgument(SummaryComponentStack s, ParameterPosition pos) { - s.head() = TContentSummaryComponent(_) and isContentOfArgument(s.tail(), pos) - or - s = SummaryComponentStack::argument(pos) - } - - private predicate outputState(SummarizedCallable c, SummaryComponentStack s) { - summary(c, _, s, _) - or - exists(SummaryComponentStack out | - outputState(c, out) and - out.head() = TContentSummaryComponent(_) and - s = out.tail() - ) - or - // Add the argument node corresponding to the requested post-update node - inputState(c, s) and isCallbackParameter(s) - } - - private predicate inputState(SummarizedCallable c, SummaryComponentStack s) { - summary(c, s, _, _) - or - exists(SummaryComponentStack inp | inputState(c, inp) and s = inp.tail()) - or - exists(SummaryComponentStack out | - outputState(c, out) and - out.head() = TParameterSummaryComponent(_) and - s = out.tail() - ) - or - // Add the post-update node corresponding to the requested argument node - outputState(c, s) and isCallbackParameter(s) - or - // Add the parameter node for parameter side-effects - outputState(c, s) and s = SummaryComponentStack::argument(_) - } - - private newtype TSummaryNodeState = - TSummaryNodeInputState(SummaryComponentStack s) { inputState(_, s) } or - TSummaryNodeOutputState(SummaryComponentStack s) { outputState(_, s) } - - /** - * A state used to break up (complex) flow summaries into atomic flow steps. - * For a flow summary - * - * ```ql - * propagatesFlow( - * SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue - * ) - * ``` - * - * the following states are used: - * - * - `TSummaryNodeInputState(SummaryComponentStack s)`: - * this state represents that the components in `s` _have been read_ from the - * input. - * - `TSummaryNodeOutputState(SummaryComponentStack s)`: - * this state represents that the components in `s` _remain to be written_ to - * the output. - */ - private class SummaryNodeState extends TSummaryNodeState { - /** Holds if this state is a valid input state for `c`. */ - pragma[nomagic] - predicate isInputState(SummarizedCallable c, SummaryComponentStack s) { - this = TSummaryNodeInputState(s) and - inputState(c, s) - } - - /** Holds if this state is a valid output state for `c`. */ - pragma[nomagic] - predicate isOutputState(SummarizedCallable c, SummaryComponentStack s) { - this = TSummaryNodeOutputState(s) and - outputState(c, s) - } - - /** Gets a textual representation of this state. */ - string toString() { - exists(SummaryComponentStack s | - this = TSummaryNodeInputState(s) and - result = "read: " + s - ) - or - exists(SummaryComponentStack s | - this = TSummaryNodeOutputState(s) and - result = "to write: " + s - ) - } - } - - private newtype TSummaryNode = - TSummaryInternalNode(SummarizedCallable c, SummaryNodeState state) { - summaryNodeRange(c, state) - } or - TSummaryParameterNode(SummarizedCallable c, ParameterPosition pos) { - summaryParameterNodeRange(c, pos) - } - - abstract class SummaryNode extends TSummaryNode { - abstract string toString(); - - abstract SummarizedCallable getSummarizedCallable(); - } - - private class SummaryInternalNode extends SummaryNode, TSummaryInternalNode { - private SummarizedCallable c; - private SummaryNodeState state; - - SummaryInternalNode() { this = TSummaryInternalNode(c, state) } - - override string toString() { result = "[summary] " + state + " in " + c } - - override SummarizedCallable getSummarizedCallable() { result = c } - } - - private class SummaryParamNode extends SummaryNode, TSummaryParameterNode { - private SummarizedCallable c; - private ParameterPosition pos; - - SummaryParamNode() { this = TSummaryParameterNode(c, pos) } - - override string toString() { result = "[summary param] " + pos + " in " + c } - - override SummarizedCallable getSummarizedCallable() { result = c } - } - - /** - * Holds if `state` represents having read from a parameter at position - * `pos` in `c`. In this case we are not synthesizing a data-flow node, - * but instead assume that a relevant parameter node already exists. - */ - private predicate parameterReadState( - SummarizedCallable c, SummaryNodeState state, ParameterPosition pos - ) { - state.isInputState(c, SummaryComponentStack::argument(pos)) - } - - /** - * Holds if a synthesized summary node is needed for the state `state` in summarized - * callable `c`. - */ - private predicate summaryNodeRange(SummarizedCallable c, SummaryNodeState state) { - state.isInputState(c, _) and - not parameterReadState(c, state, _) - or - state.isOutputState(c, _) - } - - pragma[noinline] - private SummaryNode summaryNodeInputState(SummarizedCallable c, SummaryComponentStack s) { - exists(SummaryNodeState state | state.isInputState(c, s) | - result = TSummaryInternalNode(c, state) - or - exists(ParameterPosition pos | - parameterReadState(c, state, pos) and - result = TSummaryParameterNode(c, pos) - ) - ) - } - - pragma[noinline] - private SummaryNode summaryNodeOutputState(SummarizedCallable c, SummaryComponentStack s) { - exists(SummaryNodeState state | - state.isOutputState(c, s) and - result = TSummaryInternalNode(c, state) - ) - } - - /** - * Holds if a write targets `post`, which is a post-update node for a - * parameter at position `pos` in `c`. - */ - private predicate isParameterPostUpdate( - SummaryNode post, SummarizedCallable c, ParameterPosition pos - ) { - post = summaryNodeOutputState(c, SummaryComponentStack::argument(pos)) - } - - /** Holds if a parameter node at position `pos` is required for `c`. */ - private predicate summaryParameterNodeRange(SummarizedCallable c, ParameterPosition pos) { - parameterReadState(c, _, pos) - or - // Same as `isParameterPostUpdate(_, c, pos)`, but can be used in a negative context - any(SummaryNodeState state).isOutputState(c, SummaryComponentStack::argument(pos)) - } - - private predicate callbackOutput( - SummarizedCallable c, SummaryComponentStack s, SummaryNode receiver, ReturnKind rk - ) { - any(SummaryNodeState state).isInputState(c, s) and - s.head() = TReturnSummaryComponent(rk) and - receiver = summaryNodeInputState(c, s.tail()) - } - - private predicate callbackInput( - SummarizedCallable c, SummaryComponentStack s, SummaryNode receiver, ArgumentPosition pos - ) { - any(SummaryNodeState state).isOutputState(c, s) and - s.head() = TParameterSummaryComponent(pos) and - receiver = summaryNodeInputState(c, s.tail()) - } - - /** Holds if a call targeting `receiver` should be synthesized inside `c`. */ - predicate summaryCallbackRange(SummarizedCallable c, SummaryNode receiver) { - callbackOutput(c, _, receiver, _) - or - callbackInput(c, _, receiver, _) - } - - /** - * Gets the type of synthesized summary node `n`. - * - * The type is computed based on the language-specific predicates - * `getContentType()`, `getReturnType()`, `getCallbackParameterType()`, and - * `getCallbackReturnType()`. - */ - DataFlowType summaryNodeType(SummaryNode n) { - exists(SummaryNode pre | - summaryPostUpdateNode(n, pre) and - result = summaryNodeType(pre) - ) - or - exists(SummarizedCallable c, SummaryComponentStack s, SummaryComponent head | head = s.head() | - n = summaryNodeInputState(c, s) and - ( - exists(ContentSet cont | result = getContentType(cont) | - head = TContentSummaryComponent(cont) or - head = TWithContentSummaryComponent(cont) - ) - or - head = TWithoutContentSummaryComponent(_) and - result = summaryNodeType(summaryNodeInputState(c, s.tail())) - or - exists(ReturnKind rk | - head = TReturnSummaryComponent(rk) and - result = - getCallbackReturnType(summaryNodeType(summaryNodeInputState(pragma[only_bind_out](c), - s.tail())), rk) - ) - or - exists(SummaryComponent::SyntheticGlobal sg | - head = TSyntheticGlobalSummaryComponent(sg) and - result = getSyntheticGlobalType(sg) - ) - or - exists(ParameterPosition pos | - head = TArgumentSummaryComponent(pos) and - result = getParameterType(c, pos) - ) - ) - or - n = summaryNodeOutputState(c, s) and - ( - exists(ContentSet cont | - head = TContentSummaryComponent(cont) and result = getContentType(cont) - ) - or - s.length() = 1 and - exists(ReturnKind rk | - head = TReturnSummaryComponent(rk) and - result = getReturnType(c, rk) - ) - or - exists(ArgumentPosition pos | head = TParameterSummaryComponent(pos) | - result = - getCallbackParameterType(summaryNodeType(summaryNodeInputState(pragma[only_bind_out](c), - s.tail())), pos) - ) - or - exists(SummaryComponent::SyntheticGlobal sg | - head = TSyntheticGlobalSummaryComponent(sg) and - result = getSyntheticGlobalType(sg) - ) - ) - ) - } - - /** Holds if summary node `p` is a parameter with position `pos`. */ - predicate summaryParameterNode(SummaryNode p, ParameterPosition pos) { - p = TSummaryParameterNode(_, pos) - } - - /** Holds if summary node `out` contains output of kind `rk` from call `c`. */ - predicate summaryOutNode(DataFlowCall c, SummaryNode out, ReturnKind rk) { - exists(SummarizedCallable callable, SummaryComponentStack s, SummaryNode receiver | - callbackOutput(callable, s, receiver, rk) and - out = summaryNodeInputState(callable, s) and - c = summaryDataFlowCall(receiver) - ) - } - - /** Holds if summary node `arg` is at position `pos` in the call `c`. */ - predicate summaryArgumentNode(DataFlowCall c, SummaryNode arg, ArgumentPosition pos) { - exists(SummarizedCallable callable, SummaryComponentStack s, SummaryNode receiver | - callbackInput(callable, s, receiver, pos) and - arg = summaryNodeOutputState(callable, s) and - c = summaryDataFlowCall(receiver) - ) - } - - /** Holds if summary node `post` is a post-update node with pre-update node `pre`. */ - predicate summaryPostUpdateNode(SummaryNode post, SummaryNode pre) { - exists(SummarizedCallable c, ParameterPosition pos | - isParameterPostUpdate(post, c, pos) and - pre = TSummaryParameterNode(c, pos) - ) - or - exists(SummarizedCallable callable, SummaryComponentStack s | - callbackInput(callable, s, _, _) and - pre = summaryNodeOutputState(callable, s) and - post = summaryNodeInputState(callable, s) - ) - } - - /** Holds if summary node `ret` is a return node of kind `rk`. */ - predicate summaryReturnNode(SummaryNode ret, ReturnKind rk) { - exists(SummaryComponentStack s | - ret = summaryNodeOutputState(_, s) and - s = TSingletonSummaryComponentStack(TReturnSummaryComponent(rk)) - ) - } - - /** - * Holds if flow is allowed to pass from parameter `p`, to a return - * node, and back out to `p`. - */ - predicate summaryAllowParameterReturnInSelf(ParamNode p) { - exists(SummarizedCallable c, ParameterPosition ppos | - p.isParameterOf(inject(c), pragma[only_bind_into](ppos)) - | - exists(SummaryComponentStack inputContents, SummaryComponentStack outputContents | - summary(c, inputContents, outputContents, _) and - inputContents.bottom() = pragma[only_bind_into](TArgumentSummaryComponent(ppos)) and - outputContents.bottom() = pragma[only_bind_into](TArgumentSummaryComponent(ppos)) - ) - ) - } - - /** Provides a compilation of flow summaries to atomic data-flow steps. */ - module Steps { - /** - * Holds if there is a local step from `pred` to `succ`, which is synthesized - * from a flow summary. - */ - predicate summaryLocalStep(SummaryNode pred, SummaryNode succ, boolean preservesValue) { - exists( - SummarizedCallable c, SummaryComponentStack inputContents, - SummaryComponentStack outputContents - | - summary(c, inputContents, outputContents, preservesValue) and - pred = summaryNodeInputState(c, inputContents) and - succ = summaryNodeOutputState(c, outputContents) - | - preservesValue = true - or - preservesValue = false and not summary(c, inputContents, outputContents, true) - ) - or - exists(SummarizedCallable c, SummaryComponentStack s | - pred = summaryNodeInputState(c, s.tail()) and - succ = summaryNodeInputState(c, s) and - s.head() = [SummaryComponent::withContent(_), SummaryComponent::withoutContent(_)] and - preservesValue = true - ) - } - - /** - * Holds if there is a read step of content `c` from `pred` to `succ`, which - * is synthesized from a flow summary. - */ - predicate summaryReadStep(SummaryNode pred, ContentSet c, SummaryNode succ) { - exists(SummarizedCallable sc, SummaryComponentStack s | - pred = summaryNodeInputState(sc, s.tail()) and - succ = summaryNodeInputState(sc, s) and - SummaryComponent::content(c) = s.head() - ) - } - - /** - * Holds if there is a store step of content `c` from `pred` to `succ`, which - * is synthesized from a flow summary. - */ - predicate summaryStoreStep(SummaryNode pred, ContentSet c, SummaryNode succ) { - exists(SummarizedCallable sc, SummaryComponentStack s | - pred = summaryNodeOutputState(sc, s) and - succ = summaryNodeOutputState(sc, s.tail()) and - SummaryComponent::content(c) = s.head() - ) - } - - /** - * Holds if there is a jump step from `pred` to `succ`, which is synthesized - * from a flow summary. - */ - predicate summaryJumpStep(SummaryNode pred, SummaryNode succ) { - exists(SummaryComponentStack s | - s = SummaryComponentStack::singleton(SummaryComponent::syntheticGlobal(_)) and - pred = summaryNodeOutputState(_, s) and - succ = summaryNodeInputState(_, s) - ) - } - - /** - * Holds if values stored inside content `c` are cleared at `n`. `n` is a - * synthesized summary node, so in order for values to be cleared at calls - * to the relevant method, it is important that flow does not pass over - * the argument, either via use-use flow or def-use flow. - * - * Example: - * - * ``` - * a.b = taint; - * a.clearB(); // assume we have a flow summary for `clearB` that clears `b` on the qualifier - * sink(a.b); - * ``` - * - * In the above, flow should not pass from `a` on the first line (or the second - * line) to `a` on the third line. Instead, there will be synthesized flow from - * `a` on line 2 to the post-update node for `a` on that line (via an intermediate - * node where field `b` is cleared). - */ - predicate summaryClearsContent(SummaryNode n, ContentSet c) { - exists(SummarizedCallable sc, SummaryNodeState state, SummaryComponentStack stack | - n = TSummaryInternalNode(sc, state) and - state.isInputState(sc, stack) and - stack.head() = SummaryComponent::withoutContent(c) - ) - } - - /** - * Holds if the value that is being tracked is expected to be stored inside - * content `c` at `n`. - */ - predicate summaryExpectsContent(SummaryNode n, ContentSet c) { - exists(SummarizedCallable sc, SummaryNodeState state, SummaryComponentStack stack | - n = TSummaryInternalNode(sc, state) and - state.isInputState(sc, stack) and - stack.head() = SummaryComponent::withContent(c) - ) - } - - pragma[noinline] - private predicate viableParam( - DataFlowCall call, SummarizedCallable sc, ParameterPosition ppos, SummaryParamNode p - ) { - exists(DataFlowCallable c | - c = inject(sc) and - p = TSummaryParameterNode(sc, ppos) and - c = viableCallable(call) - ) - } - - pragma[nomagic] - private SummaryParamNode summaryArgParam(DataFlowCall call, ArgNode arg, SummarizedCallable sc) { - exists(ParameterPosition ppos | - argumentPositionMatch(call, arg, ppos) and - viableParam(call, sc, ppos, result) - ) - } - - /** - * Holds if `p` can reach `n` in a summarized callable, using only value-preserving - * local steps. `clearsOrExpects` records whether any node on the path from `p` to - * `n` either clears or expects contents. - */ - private predicate paramReachesLocal(SummaryParamNode p, SummaryNode n, boolean clearsOrExpects) { - viableParam(_, _, _, p) and - n = p and - clearsOrExpects = false - or - exists(SummaryNode mid, boolean clearsOrExpectsMid | - paramReachesLocal(p, mid, clearsOrExpectsMid) and - summaryLocalStep(mid, n, true) and - if - summaryClearsContent(n, _) or - summaryExpectsContent(n, _) - then clearsOrExpects = true - else clearsOrExpects = clearsOrExpectsMid - ) - } - - /** - * Holds if use-use flow starting from `arg` should be prohibited. - * - * This is the case when `arg` is the argument of a call that targets a - * flow summary where the corresponding parameter either clears contents - * or expects contents. - */ - pragma[nomagic] - predicate prohibitsUseUseFlow(ArgNode arg, SummarizedCallable sc) { - exists(SummaryParamNode p, ParameterPosition ppos, SummaryNode ret | - paramReachesLocal(p, ret, true) and - p = summaryArgParam(_, arg, sc) and - p = TSummaryParameterNode(_, pragma[only_bind_into](ppos)) and - isParameterPostUpdate(ret, _, pragma[only_bind_into](ppos)) - ) - } - - pragma[nomagic] - private predicate summaryReturnNodeExt(SummaryNode ret, ReturnKindExt rk) { - summaryReturnNode(ret, rk.(ValueReturnKind).getKind()) - or - exists(SummaryParamNode p, SummaryNode pre, ParameterPosition pos | - paramReachesLocal(p, pre, _) and - summaryPostUpdateNode(ret, pre) and - p = TSummaryParameterNode(_, pos) and - rk.(ParamUpdateReturnKind).getPosition() = pos - ) - } - - bindingset[ret] - private SummaryParamNode summaryArgParamRetOut( - ArgNode arg, SummaryNode ret, OutNodeExt out, SummarizedCallable sc - ) { - exists(DataFlowCall call, ReturnKindExt rk | - result = summaryArgParam(call, arg, sc) and - summaryReturnNodeExt(ret, pragma[only_bind_into](rk)) and - out = pragma[only_bind_into](rk).getAnOutNode(call) - ) - } - - /** - * Holds if `arg` flows to `out` using a simple value-preserving flow - * summary, that is, a flow summary without reads and stores. - * - * NOTE: This step should not be used in global data-flow/taint-tracking, but may - * be useful to include in the exposed local data-flow/taint-tracking relations. - */ - predicate summaryThroughStepValue(ArgNode arg, Node out, SummarizedCallable sc) { - exists(ReturnKind rk, SummaryNode ret, DataFlowCall call | - summaryLocalStep(summaryArgParam(call, arg, sc), ret, true) and - summaryReturnNode(ret, pragma[only_bind_into](rk)) and - out = getAnOutNode(call, pragma[only_bind_into](rk)) - ) - } - - /** - * Holds if `arg` flows to `out` using a simple flow summary involving taint - * step, that is, a flow summary without reads and stores. - * - * NOTE: This step should not be used in global data-flow/taint-tracking, but may - * be useful to include in the exposed local data-flow/taint-tracking relations. - */ - predicate summaryThroughStepTaint(ArgNode arg, Node out, SummarizedCallable sc) { - exists(SummaryNode ret | - summaryLocalStep(summaryArgParamRetOut(arg, ret, out, sc), ret, false) - ) - } - - /** - * Holds if there is a read(+taint) of `c` from `arg` to `out` using a - * flow summary. - * - * NOTE: This step should not be used in global data-flow/taint-tracking, but may - * be useful to include in the exposed local data-flow/taint-tracking relations. - */ - predicate summaryGetterStep(ArgNode arg, ContentSet c, Node out, SummarizedCallable sc) { - exists(SummaryNode mid, SummaryNode ret | - summaryReadStep(summaryArgParamRetOut(arg, ret, out, sc), c, mid) and - summaryLocalStep(mid, ret, _) - ) - } - - /** - * Holds if there is a (taint+)store of `arg` into content `c` of `out` using a - * flow summary. - * - * NOTE: This step should not be used in global data-flow/taint-tracking, but may - * be useful to include in the exposed local data-flow/taint-tracking relations. - */ - predicate summarySetterStep(ArgNode arg, ContentSet c, Node out, SummarizedCallable sc) { - exists(SummaryNode mid, SummaryNode ret | - summaryLocalStep(summaryArgParamRetOut(arg, ret, out, sc), mid, _) and - summaryStoreStep(mid, c, ret) - ) - } - } - - /** - * Provides a means of translating externally (e.g., MaD) defined flow - * summaries into a `SummarizedCallable`s. - */ - module External { - /** Holds if `spec` is a relevant external specification. */ - private predicate relevantSpec(string spec) { - summaryElement(_, spec, _, _, _) or - summaryElement(_, _, spec, _, _) or - sourceElement(_, spec, _, _) or - sinkElement(_, spec, _, _) - } - - private class AccessPathRange extends AccessPath::Range { - AccessPathRange() { relevantSpec(this) } - } - - /** Holds if specification component `token` parses as parameter `pos`. */ - predicate parseParam(AccessPathToken token, ArgumentPosition pos) { - token.getName() = "Parameter" and - pos = parseParamBody(token.getAnArgument()) - } - - /** Holds if specification component `token` parses as argument `pos`. */ - predicate parseArg(AccessPathToken token, ParameterPosition pos) { - token.getName() = "Argument" and - pos = parseArgBody(token.getAnArgument()) - } - - /** Holds if specification component `token` parses as synthetic global `sg`. */ - predicate parseSynthGlobal(AccessPathToken token, string sg) { - token.getName() = "SyntheticGlobal" and - sg = token.getAnArgument() - } - - private class SyntheticGlobalFromAccessPath extends SummaryComponent::SyntheticGlobal { - SyntheticGlobalFromAccessPath() { parseSynthGlobal(_, this) } - } - - private SummaryComponent interpretComponent(AccessPathToken token) { - exists(ParameterPosition pos | - parseArg(token, pos) and result = SummaryComponent::argument(pos) - ) - or - exists(ArgumentPosition pos | - parseParam(token, pos) and result = SummaryComponent::parameter(pos) - ) - or - token = "ReturnValue" and result = SummaryComponent::return(getReturnValueKind()) - or - exists(string sg | - parseSynthGlobal(token, sg) and result = SummaryComponent::syntheticGlobal(sg) - ) - or - result = interpretComponentSpecific(token) - } - - /** - * Holds if `spec` specifies summary component stack `stack`. - */ - predicate interpretSpec(AccessPath spec, SummaryComponentStack stack) { - interpretSpec(spec, spec.getNumToken(), stack) - } - - /** Holds if the first `n` tokens of `spec` resolves to `stack`. */ - private predicate interpretSpec(AccessPath spec, int n, SummaryComponentStack stack) { - n = 1 and - stack = SummaryComponentStack::singleton(interpretComponent(spec.getToken(0))) - or - exists(SummaryComponent head, SummaryComponentStack tail | - interpretSpec(spec, n, head, tail) and - stack = SummaryComponentStack::push(head, tail) - ) - } - - /** Holds if the first `n` tokens of `spec` resolves to `head` followed by `tail` */ - private predicate interpretSpec( - AccessPath spec, int n, SummaryComponent head, SummaryComponentStack tail - ) { - interpretSpec(spec, n - 1, tail) and - head = interpretComponent(spec.getToken(n - 1)) - } - - private class MkStack extends RequiredSummaryComponentStack { - override predicate required(SummaryComponent head, SummaryComponentStack tail) { - interpretSpec(_, _, head, tail) - } - } - - private class SummarizedCallableExternal extends SummarizedCallable { - SummarizedCallableExternal() { summaryElement(this, _, _, _, _) } - - private predicate relevantSummaryElementGenerated( - AccessPath inSpec, AccessPath outSpec, string kind - ) { - exists(Provenance provenance | - provenance.isGenerated() and - summaryElement(this, inSpec, outSpec, kind, provenance) - ) and - not this.applyManualModel() - } - - private predicate relevantSummaryElement(AccessPath inSpec, AccessPath outSpec, string kind) { - exists(Provenance provenance | - provenance.isManual() and - summaryElement(this, inSpec, outSpec, kind, provenance) - ) - or - this.relevantSummaryElementGenerated(inSpec, outSpec, kind) - } - - override predicate propagatesFlow( - SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue - ) { - exists(AccessPath inSpec, AccessPath outSpec, string kind | - this.relevantSummaryElement(inSpec, outSpec, kind) and - interpretSpec(inSpec, input) and - interpretSpec(outSpec, output) - | - kind = "value" and preservesValue = true - or - kind = "taint" and preservesValue = false - ) - } - - override predicate hasProvenance(Provenance provenance) { - summaryElement(this, _, _, _, provenance) - } - } - - /** Holds if component `c` of specification `spec` cannot be parsed. */ - predicate invalidSpecComponent(AccessPath spec, string c) { - c = spec.getToken(_) and - not exists(interpretComponent(c)) - } - - /** Holds if `provenance` is not a valid provenance value. */ - bindingset[provenance] - predicate invalidProvenance(string provenance) { not provenance instanceof Provenance } - - /** - * Holds if token `part` of specification `spec` has an invalid index. - * E.g., `Argument[-1]`. - */ - predicate invalidIndexComponent(AccessPath spec, AccessPathToken part) { - part = spec.getToken(_) and - part.getName() = ["Parameter", "Argument"] and - AccessPath::parseInt(part.getArgumentList()) < 0 - } - - private predicate inputNeedsReference(AccessPathToken c) { - c.getName() = "Argument" or - inputNeedsReferenceSpecific(c) - } - - private predicate outputNeedsReference(AccessPathToken c) { - c.getName() = ["Argument", "ReturnValue"] or - outputNeedsReferenceSpecific(c) - } - - private predicate sourceElementRef(InterpretNode ref, AccessPath output, string kind) { - exists(SourceOrSinkElement e | - sourceElement(e, output, kind, _) and - if outputNeedsReference(output.getToken(0)) - then e = ref.getCallTarget() - else e = ref.asElement() - ) - } - - private predicate sinkElementRef(InterpretNode ref, AccessPath input, string kind) { - exists(SourceOrSinkElement e | - sinkElement(e, input, kind, _) and - if inputNeedsReference(input.getToken(0)) - then e = ref.getCallTarget() - else e = ref.asElement() - ) - } - - /** Holds if the first `n` tokens of `output` resolve to the given interpretation. */ - private predicate interpretOutput( - AccessPath output, int n, InterpretNode ref, InterpretNode node - ) { - sourceElementRef(ref, output, _) and - n = 0 and - ( - if output = "" - then - // Allow language-specific interpretation of the empty access path - interpretOutputSpecific("", ref, node) - else node = ref - ) - or - exists(InterpretNode mid, AccessPathToken c | - interpretOutput(output, n - 1, ref, mid) and - c = output.getToken(n - 1) - | - exists(ArgumentPosition apos, ParameterPosition ppos | - node.asNode().(PostUpdateNode).getPreUpdateNode().(ArgNode).argumentOf(mid.asCall(), apos) and - parameterMatch(ppos, apos) - | - c = "Argument" or parseArg(c, ppos) - ) - or - exists(ArgumentPosition apos, ParameterPosition ppos | - node.asNode().(ParamNode).isParameterOf(mid.asCallable(), ppos) and - parameterMatch(ppos, apos) - | - c = "Parameter" or parseParam(c, apos) - ) - or - c = "ReturnValue" and - node.asNode() = getAnOutNodeExt(mid.asCall(), TValueReturn(getReturnValueKind())) - or - interpretOutputSpecific(c, mid, node) - ) - } - - /** Holds if the first `n` tokens of `input` resolve to the given interpretation. */ - private predicate interpretInput(AccessPath input, int n, InterpretNode ref, InterpretNode node) { - sinkElementRef(ref, input, _) and - n = 0 and - ( - if input = "" - then - // Allow language-specific interpretation of the empty access path - interpretInputSpecific("", ref, node) - else node = ref - ) - or - exists(InterpretNode mid, AccessPathToken c | - interpretInput(input, n - 1, ref, mid) and - c = input.getToken(n - 1) - | - exists(ArgumentPosition apos, ParameterPosition ppos | - node.asNode().(ArgNode).argumentOf(mid.asCall(), apos) and - parameterMatch(ppos, apos) - | - c = "Argument" or parseArg(c, ppos) - ) - or - exists(ReturnNodeExt ret | - c = "ReturnValue" and - ret = node.asNode() and - ret.getKind().(ValueReturnKind).getKind() = getReturnValueKind() and - mid.asCallable() = getNodeEnclosingCallable(ret) - ) - or - interpretInputSpecific(c, mid, node) - ) - } - - /** - * Holds if `node` is specified as a source with the given kind in a MaD flow - * model. - */ - predicate isSourceNode(InterpretNode node, string kind) { - exists(InterpretNode ref, AccessPath output | - sourceElementRef(ref, output, kind) and - interpretOutput(output, output.getNumToken(), ref, node) - ) - } - - /** - * Holds if `node` is specified as a sink with the given kind in a MaD flow - * model. - */ - predicate isSinkNode(InterpretNode node, string kind) { - exists(InterpretNode ref, AccessPath input | - sinkElementRef(ref, input, kind) and - interpretInput(input, input.getNumToken(), ref, node) - ) - } - } - - /** Provides a query predicate for outputting a set of relevant flow summaries. */ - module TestOutput { - /** A flow summary to include in the `summary/1` query predicate. */ - abstract class RelevantSummarizedCallable instanceof SummarizedCallable { - /** Gets the string representation of this callable used by `summary/1`. */ - abstract string getCallableCsv(); - - /** Holds if flow is propagated between `input` and `output`. */ - predicate relevantSummary( - SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue - ) { - super.propagatesFlow(input, output, preservesValue) - } - - string toString() { result = super.toString() } - } - - /** A model to include in the `neutral/1` query predicate. */ - abstract class RelevantNeutralCallable instanceof NeutralCallable { - /** Gets the string representation of this callable used by `neutral/1`. */ - abstract string getCallableCsv(); - - /** - * Gets the kind of the neutral. - */ - string getKind() { result = super.getKind() } - - string toString() { result = super.toString() } - } - - /** Render the kind in the format used in flow summaries. */ - private string renderKind(boolean preservesValue) { - preservesValue = true and result = "value" - or - preservesValue = false and result = "taint" - } - - private string renderProvenance(SummarizedCallable c) { - if c.applyManualModel() then result = "manual" else c.hasProvenance(result) - } - - private string renderProvenanceNeutral(NeutralCallable c) { - if c.hasManualModel() then result = "manual" else c.hasProvenance(result) - } - - /** - * A query predicate for outputting flow summaries in semi-colon separated format in QL tests. - * The syntax is: "namespace;type;overrides;name;signature;ext;inputspec;outputspec;kind;provenance", - * ext is hardcoded to empty. - */ - query predicate summary(string csv) { - exists( - RelevantSummarizedCallable c, SummaryComponentStack input, SummaryComponentStack output, - boolean preservesValue - | - c.relevantSummary(input, output, preservesValue) and - csv = - c.getCallableCsv() // Callable information - + input.getMadRepresentation() + ";" // input - + output.getMadRepresentation() + ";" // output - + renderKind(preservesValue) + ";" // kind - + renderProvenance(c) // provenance - ) - } - - /** - * Holds if a neutral model `csv` exists (semi-colon separated format). Used for testing purposes. - * The syntax is: "namespace;type;name;signature;kind;provenance"", - */ - query predicate neutral(string csv) { - exists(RelevantNeutralCallable c | - csv = - c.getCallableCsv() // Callable information - + c.getKind() + ";" // kind - + renderProvenanceNeutral(c) // provenance - ) - } - } - - /** - * Provides query predicates for rendering the generated data flow graph for - * a summarized callable. - * - * Import this module into a `.ql` file of `@kind graph` to render the graph. - * The graph is restricted to callables from `RelevantSummarizedCallable`. - */ - module RenderSummarizedCallable { - /** A summarized callable to include in the graph. */ - abstract class RelevantSummarizedCallable instanceof SummarizedCallable { - string toString() { result = super.toString() } - } - - private newtype TNodeOrCall = - MkNode(SummaryNode n) { - exists(RelevantSummarizedCallable c | - n = TSummaryInternalNode(c, _) - or - n = TSummaryParameterNode(c, _) - ) - } or - MkCall(DataFlowCall call) { - call = summaryDataFlowCall(_) and - call.getEnclosingCallable() = inject(any(RelevantSummarizedCallable c)) - } - - private class NodeOrCall extends TNodeOrCall { - SummaryNode asNode() { this = MkNode(result) } - - DataFlowCall asCall() { this = MkCall(result) } - - string toString() { - result = this.asNode().toString() - or - result = this.asCall().toString() - } - - /** - * Holds if this element is at the specified location. - * The location spans column `startcolumn` of line `startline` to - * column `endcolumn` of line `endline` in file `filepath`. - * For more information, see - * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). - */ - predicate hasLocationInfo( - string filepath, int startline, int startcolumn, int endline, int endcolumn - ) { - filepath = "" and - startline = 0 and - startcolumn = 0 and - endline = 0 and - endcolumn = 0 - } - } - - query predicate nodes(NodeOrCall n, string key, string val) { - key = "semmle.label" and val = n.toString() - } - - private predicate edgesComponent(NodeOrCall a, NodeOrCall b, string value) { - exists(boolean preservesValue | - Private::Steps::summaryLocalStep(a.asNode(), b.asNode(), preservesValue) and - if preservesValue = true then value = "value" else value = "taint" - ) - or - exists(ContentSet c | - Private::Steps::summaryReadStep(a.asNode(), c, b.asNode()) and - value = "read (" + c + ")" - or - Private::Steps::summaryStoreStep(a.asNode(), c, b.asNode()) and - value = "store (" + c + ")" - or - Private::Steps::summaryClearsContent(a.asNode(), c) and - b = a and - value = "clear (" + c + ")" - or - Private::Steps::summaryExpectsContent(a.asNode(), c) and - b = a and - value = "expect (" + c + ")" - ) - or - summaryPostUpdateNode(b.asNode(), a.asNode()) and - value = "post-update" - or - b.asCall() = summaryDataFlowCall(a.asNode()) and - value = "receiver" - or - exists(ArgumentPosition pos | - summaryArgumentNode(b.asCall(), a.asNode(), pos) and - value = "argument (" + pos + ")" - ) - } - - query predicate edges(NodeOrCall a, NodeOrCall b, string key, string value) { - key = "semmle.label" and - value = strictconcat(string s | edgesComponent(a, b, s) | s, " / ") - } - } -} +module Public = Impl::Public; diff --git a/java/ql/lib/semmle/code/java/dataflow/internal/FlowSummaryImplSpecific.qll b/java/ql/lib/semmle/code/java/dataflow/internal/FlowSummaryImplSpecific.qll deleted file mode 100644 index 996e791d442..00000000000 --- a/java/ql/lib/semmle/code/java/dataflow/internal/FlowSummaryImplSpecific.qll +++ /dev/null @@ -1,354 +0,0 @@ -/** - * Provides Java specific classes and predicates for defining flow summaries. - */ - -private import java -private import DataFlowDispatch -private import DataFlowPrivate -private import DataFlowUtil -private import FlowSummaryImpl::Private -private import FlowSummaryImpl::Public -private import semmle.code.java.dataflow.ExternalFlow -private import semmle.code.java.dataflow.FlowSummary as FlowSummary -private import semmle.code.java.dataflow.internal.AccessPathSyntax as AccessPathSyntax - -class SummarizedCallableBase = FlowSummary::SummarizedCallableBase; - -/** - * A class of callables that are candidates for neutral modeling. - */ -class NeutralCallableBase extends Callable { - NeutralCallableBase() { this.isSourceDeclaration() } - - /** Gets a call that targets this neutral. */ - Call getACall() { result.getCallee().getSourceDeclaration() = this } -} - -/** - * A module for importing frameworks that define synthetic globals. - */ -private module SyntheticGlobals { - private import semmle.code.java.frameworks.android.Intent -} - -DataFlowCallable inject(SummarizedCallable c) { result.asSummarizedCallable() = c } - -/** Gets the parameter position of the instance parameter. */ -ArgumentPosition callbackSelfParameterPosition() { result = -1 } - -/** Gets the synthesized data-flow call for `receiver`. */ -SummaryCall summaryDataFlowCall(SummaryNode receiver) { result.getReceiver() = receiver } - -/** Gets the type of content `c`. */ -DataFlowType getContentType(Content c) { result = c.getType() } - -/** Gets the type of the parameter at the given position. */ -DataFlowType getParameterType(SummarizedCallable c, ParameterPosition pos) { - result = getErasedRepr(c.getParameterType(pos)) -} - -/** Gets the return type of kind `rk` for callable `c`. */ -DataFlowType getReturnType(SummarizedCallable c, ReturnKind rk) { - result = getErasedRepr(c.getReturnType()) and - exists(rk) -} - -/** - * Gets the type of the `i`th parameter in a synthesized call that targets a - * callback of type `t`. - */ -DataFlowType getCallbackParameterType(DataFlowType t, int i) { - result = getErasedRepr(t.(FunctionalInterface).getRunMethod().getParameterType(i)) - or - result = getErasedRepr(t.(FunctionalInterface)) and i = -1 -} - -/** - * Gets the return type of kind `rk` in a synthesized call that targets a - * callback of type `t`. - */ -DataFlowType getCallbackReturnType(DataFlowType t, ReturnKind rk) { - result = getErasedRepr(t.(FunctionalInterface).getRunMethod().getReturnType()) and - exists(rk) -} - -/** Gets the type of synthetic global `sg`. */ -DataFlowType getSyntheticGlobalType(SummaryComponent::SyntheticGlobal sg) { - exists(sg) and - result instanceof TypeObject -} - -private predicate relatedArgSpec(Callable c, string spec) { - exists( - string namespace, string type, boolean subtypes, string name, string signature, string ext - | - summaryModel(namespace, type, subtypes, name, signature, ext, spec, _, _, _) or - summaryModel(namespace, type, subtypes, name, signature, ext, _, spec, _, _) or - sourceModel(namespace, type, subtypes, name, signature, ext, spec, _, _) or - sinkModel(namespace, type, subtypes, name, signature, ext, spec, _, _) - | - c = interpretElement(namespace, type, subtypes, name, signature, ext) - ) -} - -/** - * Holds if `defaultsCallable` is a Kotlin default-parameter proxy for `originalCallable`, and - * `originalCallable` has a model, and `defaultsArgSpec` is `originalArgSpec` adjusted to account - * for the additional dispatch receiver parameter that occurs in the default-parameter proxy's argument - * list. When no adjustment is required (e.g. for constructors, or non-argument-based specs), `defaultArgsSpec` - * equals `originalArgSpec`. - * - * Note in the case where `originalArgSpec` uses an integer range, like `Argument[1..3]...`, this will produce multiple - * results for `defaultsArgSpec`, like `{Argument[2]..., Argument[3]..., Argument[4]...}`. - */ -private predicate correspondingKotlinParameterDefaultsArgSpec( - Callable originalCallable, Callable defaultsCallable, string originalArgSpec, - string defaultsArgSpec -) { - relatedArgSpec(originalCallable, originalArgSpec) and - defaultsCallable = originalCallable.getKotlinParameterDefaultsProxy() and - ( - originalCallable instanceof Constructor and originalArgSpec = defaultsArgSpec - or - originalCallable instanceof Method and - exists(string regex | - // Note I use a regex and not AccessPathToken because this feeds summaryElement et al, - // which would introduce mutual recursion with the definition of AccessPathToken. - regex = "Argument\\[([0-9,\\. ]+)\\](.*)" and - ( - exists(string oldArgNumber, string rest, int paramOffset | - oldArgNumber = originalArgSpec.regexpCapture(regex, 1) and - rest = originalArgSpec.regexpCapture(regex, 2) and - paramOffset = - defaultsCallable.getNumberOfParameters() - - (originalCallable.getNumberOfParameters() + 2) and - exists(int oldArgParsed | - oldArgParsed = AccessPathSyntax::AccessPath::parseInt(oldArgNumber.splitAt(",").trim()) - | - if - ktExtensionFunctions(originalCallable, _, _) and - ktExtensionFunctions(defaultsCallable, _, _) and - oldArgParsed = 0 - then defaultsArgSpec = "Argument[" + paramOffset + "]" // 1 if dispatch receiver is present, 0 otherwise. - else defaultsArgSpec = "Argument[" + (oldArgParsed + paramOffset) + "]" + rest - ) - ) - or - not originalArgSpec.regexpMatch(regex) and - defaultsArgSpec = originalArgSpec - ) - ) - ) -} - -/** - * Holds if an external flow summary exists for `c` with input specification - * `input`, output specification `output`, kind `kind`, and provenance `provenance`. - */ -predicate summaryElement( - SummarizedCallableBase c, string input, string output, string kind, string provenance -) { - exists( - string namespace, string type, boolean subtypes, string name, string signature, string ext, - string originalInput, string originalOutput, Callable baseCallable - | - summaryModel(namespace, type, subtypes, name, signature, ext, originalInput, originalOutput, - kind, provenance) and - baseCallable = interpretElement(namespace, type, subtypes, name, signature, ext) and - ( - c.asCallable() = baseCallable and input = originalInput and output = originalOutput - or - correspondingKotlinParameterDefaultsArgSpec(baseCallable, c.asCallable(), originalInput, input) and - correspondingKotlinParameterDefaultsArgSpec(baseCallable, c.asCallable(), originalOutput, - output) - ) - ) -} - -/** - * Holds if a neutral model exists for `c` of kind `kind` - * and with provenance `provenance`. - */ -predicate neutralElement(NeutralCallableBase c, string kind, string provenance) { - exists(string namespace, string type, string name, string signature | - neutralModel(namespace, type, name, signature, kind, provenance) and - c = interpretElement(namespace, type, false, name, signature, "") - ) -} - -/** Gets the summary component for specification component `c`, if any. */ -bindingset[c] -SummaryComponent interpretComponentSpecific(AccessPathToken c) { - exists(Content content | parseContent(c, content) and result = SummaryComponent::content(content)) - or - c = "WithoutElement" and result = SummaryComponent::withoutContent(any(CollectionContent cc)) - or - c = "WithElement" and result = SummaryComponent::withContent(any(CollectionContent cc)) -} - -/** Gets the summary component for specification component `c`, if any. */ -private string getContentSpecific(Content c) { - exists(Field f, string package, string className, string fieldName | - f = c.(FieldContent).getField() and - f.hasQualifiedName(package, className, fieldName) and - result = "Field[" + package + "." + className + "." + fieldName + "]" - ) - or - exists(SyntheticField f | - f = c.(SyntheticFieldContent).getField() and result = "SyntheticField[" + f + "]" - ) - or - c instanceof ArrayContent and result = "ArrayElement" - or - c instanceof CollectionContent and result = "Element" - or - c instanceof MapKeyContent and result = "MapKey" - or - c instanceof MapValueContent and result = "MapValue" -} - -/** Gets the textual representation of the content in the format used for MaD models. */ -string getMadRepresentationSpecific(SummaryComponent sc) { - exists(Content c | sc = TContentSummaryComponent(c) and result = getContentSpecific(c)) - or - sc = TWithoutContentSummaryComponent(_) and result = "WithoutElement" - or - sc = TWithContentSummaryComponent(_) and result = "WithElement" -} - -bindingset[pos] -private string positionToString(int pos) { - if pos = -1 then result = "this" else result = pos.toString() -} - -/** Gets the textual representation of a parameter position in the format used for flow summaries. */ -string getParameterPosition(ParameterPosition pos) { result = positionToString(pos) } - -/** Gets the textual representation of an argument position in the format used for flow summaries. */ -string getArgumentPosition(ArgumentPosition pos) { result = positionToString(pos) } - -/** Holds if input specification component `c` needs a reference. */ -predicate inputNeedsReferenceSpecific(string c) { none() } - -/** Holds if output specification component `c` needs a reference. */ -predicate outputNeedsReferenceSpecific(string c) { none() } - -class SourceOrSinkElement = Top; - -/** - * Holds if an external source specification exists for `e` with output specification - * `output`, kind `kind`, and provenance `provenance`. - */ -predicate sourceElement(SourceOrSinkElement e, string output, string kind, string provenance) { - exists( - string namespace, string type, boolean subtypes, string name, string signature, string ext, - SourceOrSinkElement baseSource, string originalOutput - | - sourceModel(namespace, type, subtypes, name, signature, ext, originalOutput, kind, provenance) and - baseSource = interpretElement(namespace, type, subtypes, name, signature, ext) and - ( - e = baseSource and output = originalOutput - or - correspondingKotlinParameterDefaultsArgSpec(baseSource, e, originalOutput, output) - ) - ) -} - -/** - * Holds if an external sink specification exists for `e` with input specification - * `input`, kind `kind` and provenance `provenance`. - */ -predicate sinkElement(SourceOrSinkElement e, string input, string kind, string provenance) { - exists( - string namespace, string type, boolean subtypes, string name, string signature, string ext, - SourceOrSinkElement baseSink, string originalInput - | - sinkModel(namespace, type, subtypes, name, signature, ext, originalInput, kind, provenance) and - baseSink = interpretElement(namespace, type, subtypes, name, signature, ext) and - ( - e = baseSink and originalInput = input - or - correspondingKotlinParameterDefaultsArgSpec(baseSink, e, originalInput, input) - ) - ) -} - -/** Gets the return kind corresponding to specification `"ReturnValue"`. */ -ReturnKind getReturnValueKind() { any() } - -private newtype TInterpretNode = - TElement(SourceOrSinkElement n) or - TNode(Node n) - -/** An entity used to interpret a source/sink specification. */ -class InterpretNode extends TInterpretNode { - /** Gets the element that this node corresponds to, if any. */ - SourceOrSinkElement asElement() { this = TElement(result) } - - /** Gets the data-flow node that this node corresponds to, if any. */ - Node asNode() { this = TNode(result) } - - /** Gets the call that this node corresponds to, if any. */ - DataFlowCall asCall() { result.asCall() = this.asElement() } - - /** Gets the callable that this node corresponds to, if any. */ - DataFlowCallable asCallable() { result.asCallable() = this.asElement() } - - /** Gets the target of this call, if any. */ - Callable getCallTarget() { result = this.asCall().asCall().getCallee().getSourceDeclaration() } - - /** Gets a textual representation of this node. */ - string toString() { - result = this.asElement().toString() - or - result = this.asNode().toString() - } - - /** Gets the location of this node. */ - Location getLocation() { - result = this.asElement().getLocation() - or - result = this.asNode().getLocation() - } -} - -/** Provides additional sink specification logic required for annotations. */ -pragma[inline] -predicate interpretOutputSpecific(string c, InterpretNode mid, InterpretNode node) { - exists(Node n, Top ast | - n = node.asNode() and - ast = mid.asElement() - | - (c = "Parameter" or c = "") and - node.asNode().asParameter() = mid.asElement() - or - c = "" and - n.asExpr().(FieldRead).getField() = ast - ) -} - -/** Provides additional source specification logic required for annotations. */ -pragma[inline] -predicate interpretInputSpecific(string c, InterpretNode mid, InterpretNode n) { - exists(FieldWrite fw | - c = "" and - fw.getField() = mid.asElement() and - n.asNode().asExpr() = fw.getASource() - ) -} - -/** Gets the argument position obtained by parsing `X` in `Parameter[X]`. */ -bindingset[s] -ArgumentPosition parseParamBody(string s) { - result = AccessPath::parseInt(s) - or - s = "this" and result = -1 -} - -/** Gets the parameter position obtained by parsing `X` in `Argument[X]`. */ -bindingset[s] -ParameterPosition parseArgBody(string s) { - result = AccessPath::parseInt(s) - or - s = "this" and result = -1 -} diff --git a/java/ql/lib/semmle/code/java/dispatch/WrappedInvocation.qll b/java/ql/lib/semmle/code/java/dispatch/WrappedInvocation.qll index f41c79f9206..97e49812ae4 100644 --- a/java/ql/lib/semmle/code/java/dispatch/WrappedInvocation.qll +++ b/java/ql/lib/semmle/code/java/dispatch/WrappedInvocation.qll @@ -60,7 +60,6 @@ Method getRunnerTarget(MethodCall ma) { } import semmle.code.java.dataflow.FlowSummary -import semmle.code.java.dataflow.internal.FlowSummaryImplSpecific as ImplSpecific private predicate mayInvokeCallback(SrcMethod m, int n) { m.getParameterType(n).(RefType).getSourceDeclaration() instanceof FunctionalInterface and @@ -72,23 +71,11 @@ private class SummarizedCallableWithCallback extends SummarizedCallable { SummarizedCallableWithCallback() { mayInvokeCallback(this.asCallable(), pos) } - override predicate propagatesFlow( - SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue - ) { - input = SummaryComponentStack::argument(pos) and - output = SummaryComponentStack::push(SummaryComponent::parameter(-1), input) and + override predicate propagatesFlow(string input, string output, boolean preservesValue) { + input = "Argument[" + pos + "]" and + output = "Argument[" + pos + "].Parameter[-1]" and preservesValue = true } override predicate hasProvenance(Provenance provenance) { provenance = "hq-generated" } } - -private class RequiredComponentStackForCallback extends RequiredSummaryComponentStack { - override predicate required(SummaryComponent head, SummaryComponentStack tail) { - exists(int pos | - mayInvokeCallback(_, pos) and - head = SummaryComponent::parameter(-1) and - tail = SummaryComponentStack::argument(pos) - ) - } -} diff --git a/java/ql/lib/semmle/code/java/frameworks/Stream.qll b/java/ql/lib/semmle/code/java/frameworks/Stream.qll index 96d74d1473a..a449f8bd99a 100644 --- a/java/ql/lib/semmle/code/java/frameworks/Stream.qll +++ b/java/ql/lib/semmle/code/java/frameworks/Stream.qll @@ -32,11 +32,9 @@ private class CollectToContainer extends SyntheticCallable { ]) } - override predicate propagatesFlow( - SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue - ) { - input = SummaryComponentStack::elementOf(SummaryComponentStack::qualifier()) and - output = SummaryComponentStack::elementOf(SummaryComponentStack::return()) and + override predicate propagatesFlow(string input, string output, boolean preservesValue) { + input = "Argument[this].Element" and + output = "ReturnValue.Element" and preservesValue = true } } @@ -46,11 +44,9 @@ private class CollectToJoining extends SyntheticCallable { override Call getACall() { result.(CollectCall).getArgument(0).(Collector).hasName("joining") } - override predicate propagatesFlow( - SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue - ) { - input = SummaryComponentStack::elementOf(SummaryComponentStack::qualifier()) and - output = SummaryComponentStack::return() and + override predicate propagatesFlow(string input, string output, boolean preservesValue) { + input = "Argument[this].Element" and + output = "ReturnValue" and preservesValue = false } @@ -70,28 +66,9 @@ private class CollectToGroupingBy extends SyntheticCallable { ) } - override predicate propagatesFlow( - SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue - ) { - input = SummaryComponentStack::elementOf(SummaryComponentStack::qualifier()) and - output = - SummaryComponentStack::elementOf(SummaryComponentStack::mapValueOf(SummaryComponentStack::return())) and + override predicate propagatesFlow(string input, string output, boolean preservesValue) { + input = "Argument[this].Element" and + output = "ReturnValue.MapValue.Element" and preservesValue = true } } - -private class RequiredComponentStackForCollect extends RequiredSummaryComponentStack { - override predicate required(SummaryComponent head, SummaryComponentStack tail) { - head = SummaryComponent::element() and - tail = SummaryComponentStack::qualifier() - or - head = SummaryComponent::element() and - tail = SummaryComponentStack::return() - or - head = SummaryComponent::element() and - tail = SummaryComponentStack::mapValueOf(SummaryComponentStack::return()) - or - head = SummaryComponent::mapValue() and - tail = SummaryComponentStack::return() - } -} diff --git a/java/ql/lib/semmle/code/java/frameworks/android/Intent.qll b/java/ql/lib/semmle/code/java/frameworks/android/Intent.qll index 1aba2be33c0..58767a2d9bf 100644 --- a/java/ql/lib/semmle/code/java/frameworks/android/Intent.qll +++ b/java/ql/lib/semmle/code/java/frameworks/android/Intent.qll @@ -5,6 +5,7 @@ private import semmle.code.java.dataflow.ExternalFlow private import semmle.code.java.dataflow.FlowSteps private import semmle.code.java.dataflow.FlowSummary private import semmle.code.java.dataflow.internal.BaseSSA as BaseSsa +private import semmle.code.java.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl /** The class `android.content.Intent`. */ class TypeIntent extends Class { @@ -332,12 +333,10 @@ private class StartActivitiesSyntheticCallable extends SyntheticCallable { result.targetsComponentType(targetType) } - override predicate propagatesFlow( - SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue - ) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { exists(ActivityIntentSyntheticGlobal glob | glob.getTargetType() = targetType | - input = SummaryComponentStack::arrayElementOf(SummaryComponentStack::argument(0)) and - output = SummaryComponentStack::singleton(SummaryComponent::syntheticGlobal(glob)) and + input = "Argument[0].ArrayElement" and + output = "SyntheticGlobal[" + glob + "]" and preservesValue = true ) } @@ -358,18 +357,16 @@ private class GetIntentSyntheticCallable extends SyntheticCallable { result.getEnclosingCallable().getDeclaringType() = targetType } - override predicate propagatesFlow( - SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue - ) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { exists(ActivityIntentSyntheticGlobal glob | glob.getTargetType() = targetType | - input = SummaryComponentStack::singleton(SummaryComponent::syntheticGlobal(glob)) and - output = SummaryComponentStack::return() and + input = "SyntheticGlobal[" + glob + "]" and + output = "ReturnValue" and preservesValue = true ) } } -private class ActivityIntentSyntheticGlobal extends SummaryComponent::SyntheticGlobal { +private class ActivityIntentSyntheticGlobal extends FlowSummaryImpl::Private::SyntheticGlobal { AndroidComponent targetType; ActivityIntentSyntheticGlobal() { @@ -382,13 +379,6 @@ private class ActivityIntentSyntheticGlobal extends SummaryComponent::SyntheticG AndroidComponent getTargetType() { result = targetType } } -private class RequiredComponentStackForStartActivities extends RequiredSummaryComponentStack { - override predicate required(SummaryComponent head, SummaryComponentStack tail) { - head = SummaryComponent::arrayElement() and - tail = SummaryComponentStack::argument(0) - } -} - /** * A value-preserving step from the intent argument of a `sendBroadcast` call to * the intent parameter in the `onReceive` method of the receiver the diff --git a/java/ql/src/Metrics/Summaries/TopJdkApis.qll b/java/ql/src/Metrics/Summaries/TopJdkApis.qll index 1ba2a0aeed0..f9a516fc372 100644 --- a/java/ql/src/Metrics/Summaries/TopJdkApis.qll +++ b/java/ql/src/Metrics/Summaries/TopJdkApis.qll @@ -304,7 +304,7 @@ class TopJdkApi extends Callable { /** Holds if this API has a manual neutral summary model. */ private predicate hasManualNeutralSummary() { - this.(FlowSummaryImpl::Public::NeutralSummaryCallable).hasManualModel() + this = any(FlowSummaryImpl::Public::NeutralSummaryCallable n | n.hasManualModel()).asCallable() } /** Holds if this API has a manual MaD model. */ diff --git a/java/ql/src/Telemetry/ExternalApi.qll b/java/ql/src/Telemetry/ExternalApi.qll index b6e0de2f842..388908a26a9 100644 --- a/java/ql/src/Telemetry/ExternalApi.qll +++ b/java/ql/src/Telemetry/ExternalApi.qll @@ -79,7 +79,7 @@ class ExternalApi extends Callable { /** Holds if this API is a known neutral. */ pragma[nomagic] - predicate isNeutral() { this instanceof FlowSummaryImpl::Public::NeutralCallable } + predicate isNeutral() { this = any(FlowSummaryImpl::Public::NeutralCallable n).asCallable() } /** * Holds if this API is supported by existing CodeQL libraries, that is, it is either a diff --git a/java/ql/src/utils/modeleditor/FrameworkModeEndpointsQuery.qll b/java/ql/src/utils/modeleditor/FrameworkModeEndpointsQuery.qll index 4920ed4f011..d8af480c6d3 100644 --- a/java/ql/src/utils/modeleditor/FrameworkModeEndpointsQuery.qll +++ b/java/ql/src/utils/modeleditor/FrameworkModeEndpointsQuery.qll @@ -1,6 +1,6 @@ private import java private import semmle.code.java.dataflow.internal.DataFlowPrivate -private import semmle.code.java.dataflow.internal.FlowSummaryImplSpecific +private import semmle.code.java.dataflow.internal.FlowSummaryImpl private import semmle.code.java.dataflow.internal.ModelExclusions private import ModelEditor @@ -8,7 +8,7 @@ private import ModelEditor * A class of effectively public callables from source code. */ class PublicEndpointFromSource extends Endpoint, ModelApi { - override predicate isSource() { sourceElement(this, _, _, _) } + override predicate isSource() { SourceSinkInterpretationInput::sourceElement(this, _, _) } - override predicate isSink() { sinkElement(this, _, _, _) } + override predicate isSink() { SourceSinkInterpretationInput::sinkElement(this, _, _) } } diff --git a/java/ql/test/library-tests/dataflow/external-models/validatemodels.ql b/java/ql/test/library-tests/dataflow/external-models/validatemodels.ql index 84312d3d4e4..3f0a4ab9c1c 100644 --- a/java/ql/test/library-tests/dataflow/external-models/validatemodels.ql +++ b/java/ql/test/library-tests/dataflow/external-models/validatemodels.ql @@ -1,15 +1,2 @@ import java -import semmle.code.java.dataflow.ExternalFlow -import semmle.code.java.dataflow.internal.AccessPathSyntax -import ModelValidation - -private predicate getRelevantAccessPath(string path) { - summaryModel(_, _, _, _, _, _, path, _, _, _) or - summaryModel(_, _, _, _, _, _, _, path, _, _) or - sinkModel(_, _, _, _, _, _, path, _, _) or - sourceModel(_, _, _, _, _, _, path, _, _) -} - -private class AccessPathsExternal extends AccessPath::Range { - AccessPathsExternal() { getRelevantAccessPath(this) } -} +import semmle.code.java.dataflow.ExternalFlow::ModelValidation diff --git a/javascript/ql/lib/qlpack.yml b/javascript/ql/lib/qlpack.yml index f210c4ba9f9..e0174892c4c 100644 --- a/javascript/ql/lib/qlpack.yml +++ b/javascript/ql/lib/qlpack.yml @@ -6,6 +6,7 @@ extractor: javascript library: true upgrades: upgrades dependencies: + codeql/dataflow: ${workspace} codeql/mad: ${workspace} codeql/regex: ${workspace} codeql/tutorial: ${workspace} diff --git a/javascript/ql/lib/semmle/javascript/frameworks/data/internal/AccessPathSyntax.qll b/javascript/ql/lib/semmle/javascript/frameworks/data/internal/AccessPathSyntax.qll deleted file mode 100644 index 0c3dc8427b2..00000000000 --- a/javascript/ql/lib/semmle/javascript/frameworks/data/internal/AccessPathSyntax.qll +++ /dev/null @@ -1,182 +0,0 @@ -/** - * Module for parsing access paths from MaD models, both the identifying access path used - * by dynamic languages, and the input/output specifications for summary steps. - * - * This file is used by the shared data flow library and by the JavaScript libraries - * (which does not use the shared data flow libraries). - */ - -/** - * Convenience-predicate for extracting two capture groups at once. - */ -bindingset[input, regexp] -private predicate regexpCaptureTwo(string input, string regexp, string capture1, string capture2) { - capture1 = input.regexpCapture(regexp, 1) and - capture2 = input.regexpCapture(regexp, 2) -} - -/** Companion module to the `AccessPath` class. */ -module AccessPath { - /** A string that should be parsed as an access path. */ - abstract class Range extends string { - bindingset[this] - Range() { any() } - } - - /** - * Parses an integer constant `n` or interval `n1..n2` (inclusive) and gets the value - * of the constant or any value contained in the interval. - */ - bindingset[arg] - int parseInt(string arg) { - result = arg.toInt() - or - // Match "n1..n2" - exists(string lo, string hi | - regexpCaptureTwo(arg, "(-?\\d+)\\.\\.(-?\\d+)", lo, hi) and - result = [lo.toInt() .. hi.toInt()] - ) - } - - /** - * Parses a lower-bounded interval `n..` and gets the lower bound. - */ - bindingset[arg] - int parseLowerBound(string arg) { result = arg.regexpCapture("(-?\\d+)\\.\\.", 1).toInt() } - - /** - * Parses an integer constant or interval (bounded or unbounded) that explicitly - * references the arity, such as `N-1` or `N-3..N-1`. - * - * Note that expressions of form `N-x` will never resolve to a negative index, - * even if `N` is zero (it will have no result in that case). - */ - bindingset[arg, arity] - private int parseIntWithExplicitArity(string arg, int arity) { - result >= 0 and // do not allow N-1 to resolve to a negative index - exists(string lo | - // N-x - lo = arg.regexpCapture("N-(\\d+)", 1) and - result = arity - lo.toInt() - or - // N-x.. - lo = arg.regexpCapture("N-(\\d+)\\.\\.", 1) and - result = [arity - lo.toInt(), arity - 1] - ) - or - exists(string lo, string hi | - // x..N-y - regexpCaptureTwo(arg, "(-?\\d+)\\.\\.N-(\\d+)", lo, hi) and - result = [lo.toInt() .. arity - hi.toInt()] - or - // N-x..N-y - regexpCaptureTwo(arg, "N-(\\d+)\\.\\.N-(\\d+)", lo, hi) and - result = [arity - lo.toInt() .. arity - hi.toInt()] and - result >= 0 - or - // N-x..y - regexpCaptureTwo(arg, "N-(\\d+)\\.\\.(\\d+)", lo, hi) and - result = [arity - lo.toInt() .. hi.toInt()] and - result >= 0 - ) - } - - /** - * Parses an integer constant or interval (bounded or unbounded) and gets any - * of the integers contained within (of which there may be infinitely many). - * - * Has no result for arguments involving an explicit arity, such as `N-1`. - */ - bindingset[arg, result] - int parseIntUnbounded(string arg) { - result = parseInt(arg) - or - result >= parseLowerBound(arg) - } - - /** - * Parses an integer constant or interval (bounded or unbounded) that - * may reference the arity of a call, such as `N-1` or `N-3..N-1`. - * - * Note that expressions of form `N-x` will never resolve to a negative index, - * even if `N` is zero (it will have no result in that case). - */ - bindingset[arg, arity] - int parseIntWithArity(string arg, int arity) { - result = parseInt(arg) - or - result in [parseLowerBound(arg) .. arity - 1] - or - result = parseIntWithExplicitArity(arg, arity) - } -} - -/** Gets the `n`th token on the access path as a string. */ -private string getRawToken(AccessPath path, int n) { - // Avoid splitting by '.' since tokens may contain dots, e.g. `Field[foo.Bar.x]`. - // Instead use regexpFind to match valid tokens, and supplement with a final length - // check (in `AccessPath.hasSyntaxError`) to ensure all characters were included in a token. - result = path.regexpFind("\\w+(?:\\[[^\\]]*\\])?(?=\\.|$)", n, _) -} - -/** - * A string that occurs as an access path (either identifying or input/output spec) - * which might be relevant for this database. - */ -class AccessPath extends string instanceof AccessPath::Range { - /** Holds if this string is not a syntactically valid access path. */ - predicate hasSyntaxError() { - // If the lengths match, all characters must haven been included in a token - // or seen by the `.` lookahead pattern. - this != "" and - not this.length() = sum(int n | | getRawToken(this, n).length() + 1) - 1 - } - - /** Gets the `n`th token on the access path (if there are no syntax errors). */ - AccessPathToken getToken(int n) { - result = getRawToken(this, n) and - not this.hasSyntaxError() - } - - /** Gets the number of tokens on the path (if there are no syntax errors). */ - int getNumToken() { - result = count(int n | exists(getRawToken(this, n))) and - not this.hasSyntaxError() - } -} - -/** - * An access part token such as `Argument[1]` or `ReturnValue`, appearing in one or more access paths. - */ -class AccessPathToken extends string { - AccessPathToken() { this = getRawToken(_, _) } - - private string getPart(int part) { - result = this.regexpCapture("([^\\[]+)(?:\\[([^\\]]*)\\])?", part) - } - - /** Gets the name of the token, such as `Member` from `Member[x]` */ - string getName() { result = this.getPart(1) } - - /** - * Gets the argument list, such as `1,2` from `Member[1,2]`, - * or has no result if there are no arguments. - */ - string getArgumentList() { result = this.getPart(2) } - - /** Gets the `n`th argument to this token, such as `x` or `y` from `Member[x,y]`. */ - string getArgument(int n) { result = this.getArgumentList().splitAt(",", n).trim() } - - /** Gets the `n`th argument to this `name` token, such as `x` or `y` from `Member[x,y]`. */ - pragma[nomagic] - string getArgument(string name, int n) { name = this.getName() and result = this.getArgument(n) } - - /** Gets an argument to this token, such as `x` or `y` from `Member[x,y]`. */ - string getAnArgument() { result = this.getArgument(_) } - - /** Gets an argument to this `name` token, such as `x` or `y` from `Member[x,y]`. */ - string getAnArgument(string name) { result = this.getArgument(name, _) } - - /** Gets the number of arguments to this token, such as 2 for `Member[x,y]` or zero for `ReturnValue`. */ - int getNumArgument() { result = count(int n | exists(this.getArgument(n))) } -} diff --git a/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModels.qll b/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModels.qll index 1cb4e189339..dd433152751 100644 --- a/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModels.qll +++ b/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModels.qll @@ -70,8 +70,8 @@ private module API = Specific::API; private module DataFlow = Specific::DataFlow; -private import Specific::AccessPathSyntax private import ApiGraphModelsExtensions as Extensions +private import codeql.dataflow.internal.AccessPathSyntax /** Module containing hooks for providing input data to be interpreted as a model. */ module ModelInput { @@ -327,29 +327,29 @@ predicate isRelevantFullPath(string type, string path) { } /** A string from a CSV row that should be parsed as an access path. */ -private class AccessPathRange extends AccessPath::Range { - AccessPathRange() { - isRelevantFullPath(_, this) - or - exists(string type | isRelevantType(type) | - summaryModel(type, _, this, _, _) or - summaryModel(type, _, _, this, _) - ) - or - typeVariableModel(_, this) - } +private predicate accessPathRange(string s) { + isRelevantFullPath(_, s) + or + exists(string type | isRelevantType(type) | + summaryModel(type, _, s, _, _) or + summaryModel(type, _, _, s, _) + ) + or + typeVariableModel(_, s) } +import AccessPath + /** * Gets a successor of `node` in the API graph. */ bindingset[token] -API::Node getSuccessorFromNode(API::Node node, AccessPathToken token) { +API::Node getSuccessorFromNode(API::Node node, AccessPathTokenBase token) { // API graphs use the same label for arguments and parameters. An edge originating from a // use-node represents an argument, and an edge originating from a def-node represents a parameter. // We just map both to the same thing. token.getName() = ["Argument", "Parameter"] and - result = node.getParameter(AccessPath::parseIntUnbounded(token.getAnArgument())) + result = node.getParameter(parseIntUnbounded(token.getAnArgument())) or token.getName() = "ReturnValue" and result = node.getReturn() @@ -362,11 +362,9 @@ API::Node getSuccessorFromNode(API::Node node, AccessPathToken token) { * Gets an API-graph successor for the given invocation. */ bindingset[token] -API::Node getSuccessorFromInvoke(Specific::InvokeNode invoke, AccessPathToken token) { +API::Node getSuccessorFromInvoke(Specific::InvokeNode invoke, AccessPathTokenBase token) { token.getName() = "Argument" and - result = - invoke - .getParameter(AccessPath::parseIntWithArity(token.getAnArgument(), invoke.getNumArgument())) + result = invoke.getParameter(parseIntWithArity(token.getAnArgument(), invoke.getNumArgument())) or token.getName() = "ReturnValue" and result = invoke.getReturn() @@ -378,10 +376,12 @@ API::Node getSuccessorFromInvoke(Specific::InvokeNode invoke, AccessPathToken to /** * Holds if `invoke` invokes a call-site filter given by `token`. */ -pragma[inline] -private predicate invocationMatchesCallSiteFilter(Specific::InvokeNode invoke, AccessPathToken token) { +bindingset[token] +private predicate invocationMatchesCallSiteFilter( + Specific::InvokeNode invoke, AccessPathTokenBase token +) { token.getName() = "WithArity" and - invoke.getNumArgument() = AccessPath::parseIntUnbounded(token.getAnArgument()) + invoke.getNumArgument() = parseIntUnbounded(token.getAnArgument()) or Specific::invocationMatchesExtraCallSiteFilter(invoke, token) } diff --git a/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModelsSpecific.qll b/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModelsSpecific.qll index 4c9c8e147eb..3580a9addac 100644 --- a/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModelsSpecific.qll +++ b/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModelsSpecific.qll @@ -4,14 +4,13 @@ * It must export the following members: * ```ql * class Unit // a unit type - * module AccessPathSyntax // a re-export of the AccessPathSyntax module * class InvokeNode // a type representing an invocation connected to the API graph * module API // the API graph module * predicate isPackageUsed(string package) * API::Node getExtraNodeFromPath(string package, string type, string path, int n) - * API::Node getExtraSuccessorFromNode(API::Node node, AccessPathToken token) - * API::Node getExtraSuccessorFromInvoke(InvokeNode node, AccessPathToken token) - * predicate invocationMatchesExtraCallSiteFilter(InvokeNode invoke, AccessPathToken token) + * API::Node getExtraSuccessorFromNode(API::Node node, AccessPathTokenBase token) + * API::Node getExtraSuccessorFromInvoke(InvokeNode node, AccessPathTokenBase token) + * predicate invocationMatchesExtraCallSiteFilter(InvokeNode invoke, AccessPathTokenBase token) * InvokeNode getAnInvocationOf(API::Node node) * predicate isExtraValidTokenNameInIdentifyingAccessPath(string name) * predicate isExtraValidNoArgumentTokenInIdentifyingAccessPath(string name) @@ -21,13 +20,12 @@ private import javascript as JS private import ApiGraphModels +private import codeql.dataflow.internal.AccessPathSyntax // Re-export libraries needed by ApiGraphModels.qll module API = JS::API; -import semmle.javascript.frameworks.data.internal.AccessPathSyntax as AccessPathSyntax import JS::DataFlow as DataFlow -private import AccessPathSyntax /** * Holds if `rawType` represents the JavaScript type `qualifiedName` from the given NPM `package`. @@ -137,7 +135,7 @@ API::Node getExtraNodeFromType(string type) { * Gets a JavaScript-specific API graph successor of `node` reachable by resolving `token`. */ bindingset[token] -API::Node getExtraSuccessorFromNode(API::Node node, AccessPathToken token) { +API::Node getExtraSuccessorFromNode(API::Node node, AccessPathTokenBase token) { token.getName() = "Member" and result = node.getMember(token.getAnArgument()) or @@ -183,7 +181,7 @@ API::Node getExtraSuccessorFromNode(API::Node node, AccessPathToken token) { * Gets a JavaScript-specific API graph successor of `node` reachable by resolving `token`. */ bindingset[token] -API::Node getExtraSuccessorFromInvoke(API::InvokeNode node, AccessPathToken token) { +API::Node getExtraSuccessorFromInvoke(API::InvokeNode node, AccessPathTokenBase token) { token.getName() = "Instance" and result = node.getInstance() or @@ -233,7 +231,7 @@ API::Node getAFuzzySuccessor(API::Node node) { * Holds if `invoke` matches the JS-specific call site filter in `token`. */ bindingset[token] -predicate invocationMatchesExtraCallSiteFilter(API::InvokeNode invoke, AccessPathToken token) { +predicate invocationMatchesExtraCallSiteFilter(API::InvokeNode invoke, AccessPathTokenBase token) { token.getName() = "NewCall" and invoke instanceof API::NewNode or @@ -246,9 +244,8 @@ predicate invocationMatchesExtraCallSiteFilter(API::InvokeNode invoke, AccessPat operand = token.getAnArgument() and argIndex = operand.splitAt("=", 0) and stringValue = operand.splitAt("=", 1) and - invoke - .getArgument(AccessPath::parseIntWithArity(argIndex, invoke.getNumArgument())) - .getStringValue() = stringValue + invoke.getArgument(parseIntWithArity(argIndex, invoke.getNumArgument())).getStringValue() = + stringValue ) } @@ -338,7 +335,7 @@ predicate isExtraValidTokenArgumentInIdentifyingAccessPath(string name, string a or name = "WithStringArgument" and exists(argument.indexOf("=")) and - exists(AccessPath::parseIntWithArity(argument.splitAt("=", 0), 10)) + exists(parseIntWithArity(argument.splitAt("=", 0), 10)) } module ModelOutputSpecific { diff --git a/javascript/ql/test/library-tests/frameworks/data/test.ql b/javascript/ql/test/library-tests/frameworks/data/test.ql index 5ee8d0e3f9c..039a0aa3920 100644 --- a/javascript/ql/test/library-tests/frameworks/data/test.ql +++ b/javascript/ql/test/library-tests/frameworks/data/test.ql @@ -1,6 +1,6 @@ import javascript import testUtilities.ConsistencyChecking -import semmle.javascript.frameworks.data.internal.AccessPathSyntax as AccessPathSyntax +import semmle.javascript.frameworks.data.internal.ApiGraphModels as ApiGraphModels class Steps extends ModelInput::SummaryModelCsv { override predicate row(string row) { @@ -126,6 +126,6 @@ class SyntaxErrorTest extends ModelInput::SinkModelCsv { } } -query predicate syntaxErrors(AccessPathSyntax::AccessPath path) { path.hasSyntaxError() } +query predicate syntaxErrors(ApiGraphModels::AccessPath path) { path.hasSyntaxError() } query predicate warning = ModelOutput::getAWarning/0; diff --git a/javascript/ql/test/library-tests/frameworks/data/warnings.ql b/javascript/ql/test/library-tests/frameworks/data/warnings.ql index 94e6f74aae7..3a7e2de70e8 100644 --- a/javascript/ql/test/library-tests/frameworks/data/warnings.ql +++ b/javascript/ql/test/library-tests/frameworks/data/warnings.ql @@ -1,5 +1,4 @@ import javascript -import semmle.javascript.frameworks.data.internal.AccessPathSyntax as AccessPathSyntax import semmle.javascript.frameworks.data.internal.ApiGraphModels as ApiGraphModels private class InvalidTypeModel extends ModelInput::TypeModelCsv { diff --git a/python/ql/lib/semmle/python/dataflow/new/FlowSummary.qll b/python/ql/lib/semmle/python/dataflow/new/FlowSummary.qll index 8b80e13d06d..800c9592dcc 100644 --- a/python/ql/lib/semmle/python/dataflow/new/FlowSummary.qll +++ b/python/ql/lib/semmle/python/dataflow/new/FlowSummary.qll @@ -13,61 +13,14 @@ private module Summaries { private import semmle.python.Frameworks } -class SummaryComponent = Impl::Public::SummaryComponent; +deprecated class SummaryComponent = Impl::Private::SummaryComponent; /** Provides predicates for constructing summary components. */ -module SummaryComponent { - private import Impl::Public::SummaryComponent as SC +deprecated module SummaryComponent = Impl::Private::SummaryComponent; - predicate parameter = SC::parameter/1; +deprecated class SummaryComponentStack = Impl::Private::SummaryComponentStack; - predicate argument = SC::argument/1; - - predicate content = SC::content/1; - - /** Gets a summary component that represents a list element. */ - SummaryComponent listElement() { result = content(any(ListElementContent c)) } - - /** Gets a summary component that represents a set element. */ - SummaryComponent setElement() { result = content(any(SetElementContent c)) } - - /** Gets a summary component that represents a tuple element. */ - SummaryComponent tupleElement(int index) { - exists(TupleElementContent c | c.getIndex() = index and result = content(c)) - } - - /** Gets a summary component that represents a dictionary element. */ - SummaryComponent dictionaryElement(string key) { - exists(DictionaryElementContent c | c.getKey() = key and result = content(c)) - } - - /** Gets a summary component that represents a dictionary element at any key. */ - SummaryComponent dictionaryElementAny() { result = content(any(DictionaryElementAnyContent c)) } - - /** Gets a summary component that represents an attribute element. */ - SummaryComponent attribute(string attr) { - exists(AttributeContent c | c.getAttribute() = attr and result = content(c)) - } - - /** Gets a summary component that represents the return value of a call. */ - SummaryComponent return() { result = SC::return(any(ReturnKind rk)) } -} - -class SummaryComponentStack = Impl::Public::SummaryComponentStack; - -/** Provides predicates for constructing stacks of summary components. */ -module SummaryComponentStack { - private import Impl::Public::SummaryComponentStack as SCS - - predicate singleton = SCS::singleton/1; - - predicate push = SCS::push/2; - - predicate argument = SCS::argument/1; - - /** Gets a singleton stack representing the return value of a call. */ - SummaryComponentStack return() { result = singleton(SummaryComponent::return()) } -} +deprecated module SummaryComponentStack = Impl::Private::SummaryComponentStack; /** A callable with a flow summary, identified by a unique string. */ abstract class SummarizedCallable extends LibraryCallable, Impl::Public::SummarizedCallable { @@ -75,21 +28,14 @@ abstract class SummarizedCallable extends LibraryCallable, Impl::Public::Summari SummarizedCallable() { any() } /** - * Same as - * - * ```ql - * propagatesFlow( - * SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue - * ) - * ``` - * - * but uses an external (string) representation of the input and output stacks. + * DEPRECATED: Use `propagatesFlow` instead. */ - pragma[nomagic] - predicate propagatesFlowExt(string input, string output, boolean preservesValue) { none() } + deprecated predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + this.propagatesFlow(input, output, preservesValue) + } } -class RequiredSummaryComponentStack = Impl::Public::RequiredSummaryComponentStack; +deprecated class RequiredSummaryComponentStack = Impl::Private::RequiredSummaryComponentStack; private class SummarizedCallableFromModel extends SummarizedCallable { string type; @@ -109,7 +55,7 @@ private class SummarizedCallableFromModel extends SummarizedCallable { ) } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { exists(string kind | ModelOutput::relevantSummaryModel(type, path, input, output, kind) | kind = "value" and preservesValue = true diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/AccessPathSyntax.qll b/python/ql/lib/semmle/python/dataflow/new/internal/AccessPathSyntax.qll deleted file mode 100644 index 0c3dc8427b2..00000000000 --- a/python/ql/lib/semmle/python/dataflow/new/internal/AccessPathSyntax.qll +++ /dev/null @@ -1,182 +0,0 @@ -/** - * Module for parsing access paths from MaD models, both the identifying access path used - * by dynamic languages, and the input/output specifications for summary steps. - * - * This file is used by the shared data flow library and by the JavaScript libraries - * (which does not use the shared data flow libraries). - */ - -/** - * Convenience-predicate for extracting two capture groups at once. - */ -bindingset[input, regexp] -private predicate regexpCaptureTwo(string input, string regexp, string capture1, string capture2) { - capture1 = input.regexpCapture(regexp, 1) and - capture2 = input.regexpCapture(regexp, 2) -} - -/** Companion module to the `AccessPath` class. */ -module AccessPath { - /** A string that should be parsed as an access path. */ - abstract class Range extends string { - bindingset[this] - Range() { any() } - } - - /** - * Parses an integer constant `n` or interval `n1..n2` (inclusive) and gets the value - * of the constant or any value contained in the interval. - */ - bindingset[arg] - int parseInt(string arg) { - result = arg.toInt() - or - // Match "n1..n2" - exists(string lo, string hi | - regexpCaptureTwo(arg, "(-?\\d+)\\.\\.(-?\\d+)", lo, hi) and - result = [lo.toInt() .. hi.toInt()] - ) - } - - /** - * Parses a lower-bounded interval `n..` and gets the lower bound. - */ - bindingset[arg] - int parseLowerBound(string arg) { result = arg.regexpCapture("(-?\\d+)\\.\\.", 1).toInt() } - - /** - * Parses an integer constant or interval (bounded or unbounded) that explicitly - * references the arity, such as `N-1` or `N-3..N-1`. - * - * Note that expressions of form `N-x` will never resolve to a negative index, - * even if `N` is zero (it will have no result in that case). - */ - bindingset[arg, arity] - private int parseIntWithExplicitArity(string arg, int arity) { - result >= 0 and // do not allow N-1 to resolve to a negative index - exists(string lo | - // N-x - lo = arg.regexpCapture("N-(\\d+)", 1) and - result = arity - lo.toInt() - or - // N-x.. - lo = arg.regexpCapture("N-(\\d+)\\.\\.", 1) and - result = [arity - lo.toInt(), arity - 1] - ) - or - exists(string lo, string hi | - // x..N-y - regexpCaptureTwo(arg, "(-?\\d+)\\.\\.N-(\\d+)", lo, hi) and - result = [lo.toInt() .. arity - hi.toInt()] - or - // N-x..N-y - regexpCaptureTwo(arg, "N-(\\d+)\\.\\.N-(\\d+)", lo, hi) and - result = [arity - lo.toInt() .. arity - hi.toInt()] and - result >= 0 - or - // N-x..y - regexpCaptureTwo(arg, "N-(\\d+)\\.\\.(\\d+)", lo, hi) and - result = [arity - lo.toInt() .. hi.toInt()] and - result >= 0 - ) - } - - /** - * Parses an integer constant or interval (bounded or unbounded) and gets any - * of the integers contained within (of which there may be infinitely many). - * - * Has no result for arguments involving an explicit arity, such as `N-1`. - */ - bindingset[arg, result] - int parseIntUnbounded(string arg) { - result = parseInt(arg) - or - result >= parseLowerBound(arg) - } - - /** - * Parses an integer constant or interval (bounded or unbounded) that - * may reference the arity of a call, such as `N-1` or `N-3..N-1`. - * - * Note that expressions of form `N-x` will never resolve to a negative index, - * even if `N` is zero (it will have no result in that case). - */ - bindingset[arg, arity] - int parseIntWithArity(string arg, int arity) { - result = parseInt(arg) - or - result in [parseLowerBound(arg) .. arity - 1] - or - result = parseIntWithExplicitArity(arg, arity) - } -} - -/** Gets the `n`th token on the access path as a string. */ -private string getRawToken(AccessPath path, int n) { - // Avoid splitting by '.' since tokens may contain dots, e.g. `Field[foo.Bar.x]`. - // Instead use regexpFind to match valid tokens, and supplement with a final length - // check (in `AccessPath.hasSyntaxError`) to ensure all characters were included in a token. - result = path.regexpFind("\\w+(?:\\[[^\\]]*\\])?(?=\\.|$)", n, _) -} - -/** - * A string that occurs as an access path (either identifying or input/output spec) - * which might be relevant for this database. - */ -class AccessPath extends string instanceof AccessPath::Range { - /** Holds if this string is not a syntactically valid access path. */ - predicate hasSyntaxError() { - // If the lengths match, all characters must haven been included in a token - // or seen by the `.` lookahead pattern. - this != "" and - not this.length() = sum(int n | | getRawToken(this, n).length() + 1) - 1 - } - - /** Gets the `n`th token on the access path (if there are no syntax errors). */ - AccessPathToken getToken(int n) { - result = getRawToken(this, n) and - not this.hasSyntaxError() - } - - /** Gets the number of tokens on the path (if there are no syntax errors). */ - int getNumToken() { - result = count(int n | exists(getRawToken(this, n))) and - not this.hasSyntaxError() - } -} - -/** - * An access part token such as `Argument[1]` or `ReturnValue`, appearing in one or more access paths. - */ -class AccessPathToken extends string { - AccessPathToken() { this = getRawToken(_, _) } - - private string getPart(int part) { - result = this.regexpCapture("([^\\[]+)(?:\\[([^\\]]*)\\])?", part) - } - - /** Gets the name of the token, such as `Member` from `Member[x]` */ - string getName() { result = this.getPart(1) } - - /** - * Gets the argument list, such as `1,2` from `Member[1,2]`, - * or has no result if there are no arguments. - */ - string getArgumentList() { result = this.getPart(2) } - - /** Gets the `n`th argument to this token, such as `x` or `y` from `Member[x,y]`. */ - string getArgument(int n) { result = this.getArgumentList().splitAt(",", n).trim() } - - /** Gets the `n`th argument to this `name` token, such as `x` or `y` from `Member[x,y]`. */ - pragma[nomagic] - string getArgument(string name, int n) { name = this.getName() and result = this.getArgument(n) } - - /** Gets an argument to this token, such as `x` or `y` from `Member[x,y]`. */ - string getAnArgument() { result = this.getArgument(_) } - - /** Gets an argument to this `name` token, such as `x` or `y` from `Member[x,y]`. */ - string getAnArgument(string name) { result = this.getArgument(name, _) } - - /** Gets the number of arguments to this token, such as 2 for `Member[x,y]` or zero for `ReturnValue`. */ - int getNumArgument() { result = count(int n | exists(this.getArgument(n))) } -} diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll index 6e2a462cc5a..16410288800 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll @@ -36,7 +36,6 @@ private import python private import DataFlowPublic private import DataFlowPrivate private import FlowSummaryImpl as FlowSummaryImpl -private import FlowSummaryImplSpecific as FlowSummaryImplSpecific private import semmle.python.internal.CachedStages private import semmle.python.dataflow.new.internal.TypeTracker::CallGraphConstruction as CallGraphConstruction @@ -49,13 +48,13 @@ newtype TParameterPosition = // since synthetic parameters are made for a synthetic summary callable, based on // what Argument positions they have flow for, we need to make sure we have such // parameter positions available. - FlowSummaryImplSpecific::ParsePositions::isParsedPositionalArgumentPosition(_, index) + FlowSummaryImpl::ParsePositions::isParsedPositionalArgumentPosition(_, index) } or TKeywordParameterPosition(string name) { name = any(Parameter p).getName() or // see comment for TPositionalParameterPosition - FlowSummaryImplSpecific::ParsePositions::isParsedKeywordArgumentPosition(_, name) + FlowSummaryImpl::ParsePositions::isParsedKeywordArgumentPosition(_, name) } or TStarArgsParameterPosition(int index) { // since `.getPosition` does not work for `*args`, we need *args parameter positions @@ -136,13 +135,13 @@ newtype TArgumentPosition = // since synthetic calls within a summarized callable could use a unique argument // position, we need to ensure we make these available (these are specified as // parameters in the flow-summary spec) - FlowSummaryImplSpecific::ParsePositions::isParsedPositionalParameterPosition(_, index) + FlowSummaryImpl::ParsePositions::isParsedPositionalParameterPosition(_, index) } or TKeywordArgumentPosition(string name) { exists(any(CallNode c).getArgByName(name)) or // see comment for TPositionalArgumentPosition - FlowSummaryImplSpecific::ParsePositions::isParsedKeywordParameterPosition(_, name) + FlowSummaryImpl::ParsePositions::isParsedKeywordParameterPosition(_, name) } or TStarArgsArgumentPosition(int index) { exists(Call c | c.getPositionalArg(index) instanceof Starred) @@ -1559,12 +1558,15 @@ private class SummaryReturnNode extends FlowSummaryNode, ReturnNode { } private class SummaryArgumentNode extends FlowSummaryNode, ArgumentNode { + private SummaryCall call_; + private ArgumentPosition pos_; + SummaryArgumentNode() { - FlowSummaryImpl::Private::summaryArgumentNode(_, this.getSummaryNode(), _) + FlowSummaryImpl::Private::summaryArgumentNode(call_.getReceiver(), this.getSummaryNode(), pos_) } override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) { - FlowSummaryImpl::Private::summaryArgumentNode(call, this.getSummaryNode(), pos) + call = call_ and pos = pos_ } } @@ -1662,10 +1664,16 @@ private module OutNodes { } private class SummaryOutNode extends FlowSummaryNode, OutNode { - SummaryOutNode() { FlowSummaryImpl::Private::summaryOutNode(_, this.getSummaryNode(), _) } + private SummaryCall call; + private ReturnKind kind_; + + SummaryOutNode() { + FlowSummaryImpl::Private::summaryOutNode(call.getReceiver(), this.getSummaryNode(), kind_) + } override DataFlowCall getCall(ReturnKind kind) { - FlowSummaryImpl::Private::summaryOutNode(result, this.getSummaryNode(), kind) + result = call and + kind = kind_ } } } diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll index 839f147411e..f1f9668e856 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll @@ -1028,7 +1028,10 @@ predicate additionalLambdaFlowStep(Node nodeFrom, Node nodeTo, boolean preserves * by default as a heuristic. */ predicate allowParameterReturnInSelf(ParameterNode p) { - FlowSummaryImpl::Private::summaryAllowParameterReturnInSelf(p) + exists(DataFlowCallable c, ParameterPosition pos | + p.(ParameterNodeImpl).isParameterOf(c, pos) and + FlowSummaryImpl::Private::summaryAllowParameterReturnInSelf(c.asLibraryCallable(), pos) + ) } /** An approximated `Content`. */ diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImpl.qll b/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImpl.qll index 0aa17c521b4..055a7cee03b 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImpl.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImpl.qll @@ -1,1491 +1,196 @@ /** * Provides classes and predicates for defining flow summaries. - * - * The definitions in this file are language-independent, and language-specific - * definitions are passed in via the `DataFlowImplSpecific` and - * `FlowSummaryImplSpecific` modules. */ -private import FlowSummaryImplSpecific +private import python +private import codeql.dataflow.internal.FlowSummaryImpl +private import codeql.dataflow.internal.AccessPathSyntax as AccessPath +private import DataFlowImplSpecific as DataFlowImplSpecific private import DataFlowImplSpecific::Private private import DataFlowImplSpecific::Public -private import DataFlowImplCommon -private import codeql.util.Unit -/** Provides classes and predicates for defining flow summaries. */ -module Public { +module Input implements InputSig { + class SummarizedCallableBase = string; + + ArgumentPosition callbackSelfParameterPosition() { none() } + + ReturnKind getStandardReturnValueKind() { any() } + + string encodeParameterPosition(ParameterPosition pos) { + pos.isSelf() and result = "self" + or + exists(int i | + pos.isPositional(i) and + result = i.toString() + ) + or + exists(string name | + pos.isKeyword(name) and + result = name + ":" + ) + } + + string encodeArgumentPosition(ArgumentPosition pos) { + pos.isSelf() and result = "self" + or + exists(int i | + pos.isPositional(i) and + result = i.toString() + ) + or + exists(string name | + pos.isKeyword(name) and + result = name + ":" + ) + } + + string encodeContent(ContentSet cs, string arg) { + cs = TListElementContent() and result = "ListElement" and arg = "" + or + cs = TSetElementContent() and result = "SetElement" and arg = "" + or + exists(int index | + cs = TTupleElementContent(index) and result = "TupleElement" and arg = index.toString() + ) + or + exists(string key | + cs = TDictionaryElementContent(key) and result = "DictionaryElement" and arg = key + ) + or + cs = TDictionaryElementAnyContent() and result = "DictionaryElementAny" and arg = "" + or + exists(string attr | cs = TAttributeContent(attr) and result = "Attribute" and arg = attr) + } + + bindingset[token] + ParameterPosition decodeUnknownParameterPosition(AccessPath::AccessPathTokenBase token) { + // needed to support `Argument[x..y]` ranges + token.getName() = "Argument" and + result.isPositional(AccessPath::parseInt(token.getAnArgument())) + } + + bindingset[token] + ArgumentPosition decodeUnknownArgumentPosition(AccessPath::AccessPathTokenBase token) { + // needed to support `Parameter[x..y]` ranges + token.getName() = "Parameter" and + result.isPositional(AccessPath::parseInt(token.getAnArgument())) + } +} + +private import Make as Impl + +private module StepsInput implements Impl::Private::StepsInputSig { + DataFlowCall getACall(Public::SummarizedCallable sc) { + result = + TPotentialLibraryCall([ + sc.(LibraryCallable).getACall().asCfgNode(), + sc.(LibraryCallable).getACallSimple().asCfgNode() + ]) + } +} + +module Private { + import Impl::Private + + module Steps = Impl::Private::Steps; + + /** + * Provides predicates for constructing summary components. + */ + module SummaryComponent { + private import Impl::Private::SummaryComponent as SC + + predicate parameter = SC::parameter/1; + + predicate argument = SC::argument/1; + + predicate content = SC::content/1; + + predicate withoutContent = SC::withoutContent/1; + + predicate withContent = SC::withContent/1; + + /** Gets a summary component that represents a list element. */ + SummaryComponent listElement() { result = content(any(ListElementContent c)) } + + /** Gets a summary component that represents a set element. */ + SummaryComponent setElement() { result = content(any(SetElementContent c)) } + + /** Gets a summary component that represents a tuple element. */ + SummaryComponent tupleElement(int index) { + exists(TupleElementContent c | c.getIndex() = index and result = content(c)) + } + + /** Gets a summary component that represents a dictionary element. */ + SummaryComponent dictionaryElement(string key) { + exists(DictionaryElementContent c | c.getKey() = key and result = content(c)) + } + + /** Gets a summary component that represents a dictionary element at any key. */ + SummaryComponent dictionaryElementAny() { result = content(any(DictionaryElementAnyContent c)) } + + /** Gets a summary component that represents an attribute element. */ + SummaryComponent attribute(string attr) { + exists(AttributeContent c | c.getAttribute() = attr and result = content(c)) + } + + /** Gets a summary component that represents the return value of a call. */ + SummaryComponent return() { result = SC::return(any(ReturnKind rk)) } + } + + /** + * Provides predicates for constructing stacks of summary components. + */ + module SummaryComponentStack { + private import Impl::Private::SummaryComponentStack as SCS + + predicate singleton = SCS::singleton/1; + + predicate push = SCS::push/2; + + predicate argument = SCS::argument/1; + + /** Gets a singleton stack representing the return value of a call. */ + SummaryComponentStack return() { result = singleton(SummaryComponent::return()) } + } +} + +module Public = Impl::Public; + +module ParsePositions { private import Private - /** - * A component used in a flow summary. - * - * Either a parameter or an argument at a given position, a specific - * content type, or a return kind. - */ - class SummaryComponent extends TSummaryComponent { - /** Gets a textual representation of this component used for MaD models. */ - string getMadRepresentation() { - result = getMadRepresentationSpecific(this) - or - exists(ArgumentPosition pos | - this = TParameterSummaryComponent(pos) and - result = "Parameter[" + getArgumentPosition(pos) + "]" - ) - or - exists(ParameterPosition pos | - this = TArgumentSummaryComponent(pos) and - result = "Argument[" + getParameterPosition(pos) + "]" - ) - or - exists(string synthetic | - this = TSyntheticGlobalSummaryComponent(synthetic) and - result = "SyntheticGlobal[" + synthetic + "]" - ) - or - this = TReturnSummaryComponent(getReturnValueKind()) and result = "ReturnValue" - } - - /** Gets a textual representation of this summary component. */ - string toString() { result = this.getMadRepresentation() } + private predicate isParamBody(string body) { + exists(AccessPathToken tok | + tok.getName() = "Parameter" and + body = tok.getAnArgument() + ) } - /** Provides predicates for constructing summary components. */ - module SummaryComponent { - /** Gets a summary component for content `c`. */ - SummaryComponent content(ContentSet c) { result = TContentSummaryComponent(c) } - - /** Gets a summary component where data is not allowed to be stored in `c`. */ - SummaryComponent withoutContent(ContentSet c) { result = TWithoutContentSummaryComponent(c) } - - /** Gets a summary component where data must be stored in `c`. */ - SummaryComponent withContent(ContentSet c) { result = TWithContentSummaryComponent(c) } - - /** Gets a summary component for a parameter at position `pos`. */ - SummaryComponent parameter(ArgumentPosition pos) { result = TParameterSummaryComponent(pos) } - - /** Gets a summary component for an argument at position `pos`. */ - SummaryComponent argument(ParameterPosition pos) { result = TArgumentSummaryComponent(pos) } - - /** Gets a summary component for a return of kind `rk`. */ - SummaryComponent return(ReturnKind rk) { result = TReturnSummaryComponent(rk) } - - /** Gets a summary component for synthetic global `sg`. */ - SummaryComponent syntheticGlobal(SyntheticGlobal sg) { - result = TSyntheticGlobalSummaryComponent(sg) - } - - /** - * A synthetic global. This represents some form of global state, which - * summaries can read and write individually. - */ - abstract class SyntheticGlobal extends string { - bindingset[this] - SyntheticGlobal() { any() } - } + private predicate isArgBody(string body) { + exists(AccessPathToken tok | + tok.getName() = "Argument" and + body = tok.getAnArgument() + ) } - /** - * A (non-empty) stack of summary components. - * - * A stack is used to represent where data is read from (input) or where it - * is written to (output). For example, an input stack `[Field f, Argument 0]` - * means that data is read from field `f` from the `0`th argument, while an - * output stack `[Field g, Return]` means that data is written to the field - * `g` of the returned object. - */ - class SummaryComponentStack extends TSummaryComponentStack { - /** Gets the head of this stack. */ - SummaryComponent head() { - this = TSingletonSummaryComponentStack(result) or - this = TConsSummaryComponentStack(result, _) - } - - /** Gets the tail of this stack, if any. */ - SummaryComponentStack tail() { this = TConsSummaryComponentStack(_, result) } - - /** Gets the length of this stack. */ - int length() { - this = TSingletonSummaryComponentStack(_) and result = 1 - or - result = 1 + this.tail().length() - } - - /** Gets the stack obtained by dropping the first `i` elements, if any. */ - SummaryComponentStack drop(int i) { - i = 0 and result = this - or - result = this.tail().drop(i - 1) - } - - /** Holds if this stack contains summary component `c`. */ - predicate contains(SummaryComponent c) { c = this.drop(_).head() } - - /** Gets the bottom element of this stack. */ - SummaryComponent bottom() { - this = TSingletonSummaryComponentStack(result) or result = this.tail().bottom() - } - - /** Gets a textual representation of this stack used for MaD models. */ - string getMadRepresentation() { - exists(SummaryComponent head, SummaryComponentStack tail | - head = this.head() and - tail = this.tail() and - result = tail.getMadRepresentation() + "." + head.getMadRepresentation() - ) - or - exists(SummaryComponent c | - this = TSingletonSummaryComponentStack(c) and - result = c.getMadRepresentation() - ) - } - - /** Gets a textual representation of this stack. */ - string toString() { result = this.getMadRepresentation() } + predicate isParsedPositionalParameterPosition(string c, int i) { + isParamBody(c) and + i = AccessPath::parseInt(c) } - /** Provides predicates for constructing stacks of summary components. */ - module SummaryComponentStack { - /** Gets a singleton stack containing `c`. */ - SummaryComponentStack singleton(SummaryComponent c) { - result = TSingletonSummaryComponentStack(c) - } - - /** - * Gets the stack obtained by pushing `head` onto `tail`. - * - * Make sure to override `RequiredSummaryComponentStack::required()` in order - * to ensure that the constructed stack exists. - */ - SummaryComponentStack push(SummaryComponent head, SummaryComponentStack tail) { - result = TConsSummaryComponentStack(head, tail) - } - - /** Gets a singleton stack for an argument at position `pos`. */ - SummaryComponentStack argument(ParameterPosition pos) { - result = singleton(SummaryComponent::argument(pos)) - } - - /** Gets a singleton stack representing a return of kind `rk`. */ - SummaryComponentStack return(ReturnKind rk) { result = singleton(SummaryComponent::return(rk)) } + predicate isParsedKeywordParameterPosition(string c, string paramName) { + isParamBody(c) and + c = paramName + ":" } - /** - * A class that exists for QL technical reasons only (the IPA type used - * to represent component stacks needs to be bounded). - */ - class RequiredSummaryComponentStack extends Unit { - /** - * Holds if the stack obtained by pushing `head` onto `tail` is required. - */ - abstract predicate required(SummaryComponent head, SummaryComponentStack tail); + predicate isParsedPositionalArgumentPosition(string c, int i) { + isArgBody(c) and + i = AccessPath::parseInt(c) } - /** - * Gets the valid model origin values. - */ - private string getValidModelOrigin() { - result = - [ - "ai", // AI (machine learning) - "df", // Dataflow (model generator) - "tb", // Type based (model generator) - "hq", // Heuristic query - ] - } - - /** - * A class used to represent provenance values for MaD models. - * - * The provenance value is a string of the form `origin-verification` - * (or just `manual`), where `origin` is a value indicating the - * origin of the model, and `verification` is a value indicating, how - * the model was verified. - * - * Examples could be: - * - `df-generated`: A model produced by the model generator, but not verified by a human. - * - `ai-manual`: A model produced by AI, but verified by a human. - */ - class Provenance extends string { - private string verification; - - Provenance() { - exists(string origin | origin = getValidModelOrigin() | - this = origin + "-" + verification and - verification = ["manual", "generated"] - ) - or - this = verification and verification = "manual" - } - - /** - * Holds if this is a valid generated provenance value. - */ - predicate isGenerated() { verification = "generated" } - - /** - * Holds if this is a valid manual provenance value. - */ - predicate isManual() { verification = "manual" } - } - - /** A callable with a flow summary. */ - abstract class SummarizedCallable extends SummarizedCallableBase { - bindingset[this] - SummarizedCallable() { any() } - - /** - * Holds if data may flow from `input` to `output` through this callable. - * - * `preservesValue` indicates whether this is a value-preserving step - * or a taint-step. - * - * Input specifications are restricted to stacks that end with - * `SummaryComponent::argument(_)`, preceded by zero or more - * `SummaryComponent::return(_)` or `SummaryComponent::content(_)` components. - * - * Output specifications are restricted to stacks that end with - * `SummaryComponent::return(_)` or `SummaryComponent::argument(_)`. - * - * Output stacks ending with `SummaryComponent::return(_)` can be preceded by zero - * or more `SummaryComponent::content(_)` components. - * - * Output stacks ending with `SummaryComponent::argument(_)` can be preceded by an - * optional `SummaryComponent::parameter(_)` component, which in turn can be preceded - * by zero or more `SummaryComponent::content(_)` components. - */ - pragma[nomagic] - predicate propagatesFlow( - SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue - ) { - none() - } - - /** - * Holds if there exists a generated summary that applies to this callable. - */ - final predicate hasGeneratedModel() { - exists(Provenance p | p.isGenerated() and this.hasProvenance(p)) - } - - /** - * Holds if all the summaries that apply to this callable are auto generated and not manually created. - * That is, only apply generated models, when there are no manual models. - */ - final predicate applyGeneratedModel() { - this.hasGeneratedModel() and - not this.hasManualModel() - } - - /** - * Holds if there exists a manual summary that applies to this callable. - */ - final predicate hasManualModel() { - exists(Provenance p | p.isManual() and this.hasProvenance(p)) - } - - /** - * Holds if there exists a manual summary that applies to this callable. - * Always apply manual models if they exist. - */ - final predicate applyManualModel() { this.hasManualModel() } - - /** - * Holds if there exists a summary that applies to this callable - * that has provenance `provenance`. - */ - predicate hasProvenance(Provenance provenance) { provenance = "manual" } - } - - /** - * A callable where there is no flow via the callable. - */ - class NeutralSummaryCallable extends NeutralCallable { - NeutralSummaryCallable() { this.getKind() = "summary" } - } - - /** - * A callable that has a neutral model. - */ - class NeutralCallable extends NeutralCallableBase { - private string kind; - private Provenance provenance; - - NeutralCallable() { neutralElement(this, kind, provenance) } - - /** - * Holds if the neutral is auto generated. - */ - final predicate hasGeneratedModel() { provenance.isGenerated() } - - /** - * Holds if there exists a manual neutral that applies to this callable. - */ - final predicate hasManualModel() { provenance.isManual() } - - /** - * Holds if the neutral has provenance `p`. - */ - predicate hasProvenance(Provenance p) { p = provenance } - - /** - * Gets the kind of the neutral. - */ - string getKind() { result = kind } - } -} - -/** - * Provides predicates for compiling flow summaries down to atomic local steps, - * read steps, and store steps. - */ -module Private { - private import Public - import AccessPathSyntax - - newtype TSummaryComponent = - TContentSummaryComponent(ContentSet c) or - TParameterSummaryComponent(ArgumentPosition pos) or - TArgumentSummaryComponent(ParameterPosition pos) or - TReturnSummaryComponent(ReturnKind rk) or - TSyntheticGlobalSummaryComponent(SummaryComponent::SyntheticGlobal sg) or - TWithoutContentSummaryComponent(ContentSet c) or - TWithContentSummaryComponent(ContentSet c) - - private TParameterSummaryComponent callbackSelfParam() { - result = TParameterSummaryComponent(callbackSelfParameterPosition()) - } - - newtype TSummaryComponentStack = - TSingletonSummaryComponentStack(SummaryComponent c) or - TConsSummaryComponentStack(SummaryComponent head, SummaryComponentStack tail) { - any(RequiredSummaryComponentStack x).required(head, tail) - or - any(RequiredSummaryComponentStack x).required(TParameterSummaryComponent(_), tail) and - head = callbackSelfParam() - or - derivedFluentFlowPush(_, _, _, head, tail, _) - } - - pragma[nomagic] - private predicate summary( - SummarizedCallable c, SummaryComponentStack input, SummaryComponentStack output, - boolean preservesValue - ) { - c.propagatesFlow(input, output, preservesValue) - or - // observe side effects of callbacks on input arguments - c.propagatesFlow(output, input, preservesValue) and - preservesValue = true and - isCallbackParameter(input) and - isContentOfArgument(output, _) - or - // flow from the receiver of a callback into the instance-parameter - exists(SummaryComponentStack s, SummaryComponentStack callbackRef | - c.propagatesFlow(s, _, _) or c.propagatesFlow(_, s, _) - | - callbackRef = s.drop(_) and - (isCallbackParameter(callbackRef) or callbackRef.head() = TReturnSummaryComponent(_)) and - input = callbackRef.tail() and - output = TConsSummaryComponentStack(callbackSelfParam(), input) and - preservesValue = true - ) - or - exists(SummaryComponentStack arg, SummaryComponentStack return | - derivedFluentFlow(c, input, arg, return, preservesValue) - | - arg.length() = 1 and - output = return - or - exists(SummaryComponent head, SummaryComponentStack tail | - derivedFluentFlowPush(c, input, arg, head, tail, 0) and - output = SummaryComponentStack::push(head, tail) - ) - ) - or - // Chain together summaries where values get passed into callbacks along the way - exists(SummaryComponentStack mid, boolean preservesValue1, boolean preservesValue2 | - c.propagatesFlow(input, mid, preservesValue1) and - c.propagatesFlow(mid, output, preservesValue2) and - mid.drop(mid.length() - 2) = - SummaryComponentStack::push(TParameterSummaryComponent(_), - SummaryComponentStack::singleton(TArgumentSummaryComponent(_))) and - preservesValue = preservesValue1.booleanAnd(preservesValue2) - ) - } - - /** - * Holds if `c` has a flow summary from `input` to `arg`, where `arg` - * writes to (contents of) arguments at position `pos`, and `c` has a - * value-preserving flow summary from the arguments at position `pos` - * to a return value (`return`). - * - * In such a case, we derive flow from `input` to (contents of) the return - * value. - * - * As an example, this simplifies modeling of fluent methods: - * for `StringBuilder.append(x)` with a specified value flow from qualifier to - * return value and taint flow from argument 0 to the qualifier, then this - * allows us to infer taint flow from argument 0 to the return value. - */ - pragma[nomagic] - private predicate derivedFluentFlow( - SummarizedCallable c, SummaryComponentStack input, SummaryComponentStack arg, - SummaryComponentStack return, boolean preservesValue - ) { - exists(ParameterPosition pos | - summary(c, input, arg, preservesValue) and - isContentOfArgument(arg, pos) and - summary(c, SummaryComponentStack::argument(pos), return, true) and - return.bottom() = TReturnSummaryComponent(_) - ) - } - - pragma[nomagic] - private predicate derivedFluentFlowPush( - SummarizedCallable c, SummaryComponentStack input, SummaryComponentStack arg, - SummaryComponent head, SummaryComponentStack tail, int i - ) { - derivedFluentFlow(c, input, arg, tail, _) and - head = arg.drop(i).head() and - i = arg.length() - 2 - or - exists(SummaryComponent head0, SummaryComponentStack tail0 | - derivedFluentFlowPush(c, input, arg, head0, tail0, i + 1) and - head = arg.drop(i).head() and - tail = SummaryComponentStack::push(head0, tail0) - ) - } - - private predicate isCallbackParameter(SummaryComponentStack s) { - s.head() = TParameterSummaryComponent(_) and exists(s.tail()) - } - - private predicate isContentOfArgument(SummaryComponentStack s, ParameterPosition pos) { - s.head() = TContentSummaryComponent(_) and isContentOfArgument(s.tail(), pos) - or - s = SummaryComponentStack::argument(pos) - } - - private predicate outputState(SummarizedCallable c, SummaryComponentStack s) { - summary(c, _, s, _) - or - exists(SummaryComponentStack out | - outputState(c, out) and - out.head() = TContentSummaryComponent(_) and - s = out.tail() - ) - or - // Add the argument node corresponding to the requested post-update node - inputState(c, s) and isCallbackParameter(s) - } - - private predicate inputState(SummarizedCallable c, SummaryComponentStack s) { - summary(c, s, _, _) - or - exists(SummaryComponentStack inp | inputState(c, inp) and s = inp.tail()) - or - exists(SummaryComponentStack out | - outputState(c, out) and - out.head() = TParameterSummaryComponent(_) and - s = out.tail() - ) - or - // Add the post-update node corresponding to the requested argument node - outputState(c, s) and isCallbackParameter(s) - or - // Add the parameter node for parameter side-effects - outputState(c, s) and s = SummaryComponentStack::argument(_) - } - - private newtype TSummaryNodeState = - TSummaryNodeInputState(SummaryComponentStack s) { inputState(_, s) } or - TSummaryNodeOutputState(SummaryComponentStack s) { outputState(_, s) } - - /** - * A state used to break up (complex) flow summaries into atomic flow steps. - * For a flow summary - * - * ```ql - * propagatesFlow( - * SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue - * ) - * ``` - * - * the following states are used: - * - * - `TSummaryNodeInputState(SummaryComponentStack s)`: - * this state represents that the components in `s` _have been read_ from the - * input. - * - `TSummaryNodeOutputState(SummaryComponentStack s)`: - * this state represents that the components in `s` _remain to be written_ to - * the output. - */ - private class SummaryNodeState extends TSummaryNodeState { - /** Holds if this state is a valid input state for `c`. */ - pragma[nomagic] - predicate isInputState(SummarizedCallable c, SummaryComponentStack s) { - this = TSummaryNodeInputState(s) and - inputState(c, s) - } - - /** Holds if this state is a valid output state for `c`. */ - pragma[nomagic] - predicate isOutputState(SummarizedCallable c, SummaryComponentStack s) { - this = TSummaryNodeOutputState(s) and - outputState(c, s) - } - - /** Gets a textual representation of this state. */ - string toString() { - exists(SummaryComponentStack s | - this = TSummaryNodeInputState(s) and - result = "read: " + s - ) - or - exists(SummaryComponentStack s | - this = TSummaryNodeOutputState(s) and - result = "to write: " + s - ) - } - } - - private newtype TSummaryNode = - TSummaryInternalNode(SummarizedCallable c, SummaryNodeState state) { - summaryNodeRange(c, state) - } or - TSummaryParameterNode(SummarizedCallable c, ParameterPosition pos) { - summaryParameterNodeRange(c, pos) - } - - abstract class SummaryNode extends TSummaryNode { - abstract string toString(); - - abstract SummarizedCallable getSummarizedCallable(); - } - - private class SummaryInternalNode extends SummaryNode, TSummaryInternalNode { - private SummarizedCallable c; - private SummaryNodeState state; - - SummaryInternalNode() { this = TSummaryInternalNode(c, state) } - - override string toString() { result = "[summary] " + state + " in " + c } - - override SummarizedCallable getSummarizedCallable() { result = c } - } - - private class SummaryParamNode extends SummaryNode, TSummaryParameterNode { - private SummarizedCallable c; - private ParameterPosition pos; - - SummaryParamNode() { this = TSummaryParameterNode(c, pos) } - - override string toString() { result = "[summary param] " + pos + " in " + c } - - override SummarizedCallable getSummarizedCallable() { result = c } - } - - /** - * Holds if `state` represents having read from a parameter at position - * `pos` in `c`. In this case we are not synthesizing a data-flow node, - * but instead assume that a relevant parameter node already exists. - */ - private predicate parameterReadState( - SummarizedCallable c, SummaryNodeState state, ParameterPosition pos - ) { - state.isInputState(c, SummaryComponentStack::argument(pos)) - } - - /** - * Holds if a synthesized summary node is needed for the state `state` in summarized - * callable `c`. - */ - private predicate summaryNodeRange(SummarizedCallable c, SummaryNodeState state) { - state.isInputState(c, _) and - not parameterReadState(c, state, _) - or - state.isOutputState(c, _) - } - - pragma[noinline] - private SummaryNode summaryNodeInputState(SummarizedCallable c, SummaryComponentStack s) { - exists(SummaryNodeState state | state.isInputState(c, s) | - result = TSummaryInternalNode(c, state) - or - exists(ParameterPosition pos | - parameterReadState(c, state, pos) and - result = TSummaryParameterNode(c, pos) - ) - ) - } - - pragma[noinline] - private SummaryNode summaryNodeOutputState(SummarizedCallable c, SummaryComponentStack s) { - exists(SummaryNodeState state | - state.isOutputState(c, s) and - result = TSummaryInternalNode(c, state) - ) - } - - /** - * Holds if a write targets `post`, which is a post-update node for a - * parameter at position `pos` in `c`. - */ - private predicate isParameterPostUpdate( - SummaryNode post, SummarizedCallable c, ParameterPosition pos - ) { - post = summaryNodeOutputState(c, SummaryComponentStack::argument(pos)) - } - - /** Holds if a parameter node at position `pos` is required for `c`. */ - private predicate summaryParameterNodeRange(SummarizedCallable c, ParameterPosition pos) { - parameterReadState(c, _, pos) - or - // Same as `isParameterPostUpdate(_, c, pos)`, but can be used in a negative context - any(SummaryNodeState state).isOutputState(c, SummaryComponentStack::argument(pos)) - } - - private predicate callbackOutput( - SummarizedCallable c, SummaryComponentStack s, SummaryNode receiver, ReturnKind rk - ) { - any(SummaryNodeState state).isInputState(c, s) and - s.head() = TReturnSummaryComponent(rk) and - receiver = summaryNodeInputState(c, s.tail()) - } - - private predicate callbackInput( - SummarizedCallable c, SummaryComponentStack s, SummaryNode receiver, ArgumentPosition pos - ) { - any(SummaryNodeState state).isOutputState(c, s) and - s.head() = TParameterSummaryComponent(pos) and - receiver = summaryNodeInputState(c, s.tail()) - } - - /** Holds if a call targeting `receiver` should be synthesized inside `c`. */ - predicate summaryCallbackRange(SummarizedCallable c, SummaryNode receiver) { - callbackOutput(c, _, receiver, _) - or - callbackInput(c, _, receiver, _) - } - - /** - * Gets the type of synthesized summary node `n`. - * - * The type is computed based on the language-specific predicates - * `getContentType()`, `getReturnType()`, `getCallbackParameterType()`, and - * `getCallbackReturnType()`. - */ - DataFlowType summaryNodeType(SummaryNode n) { - exists(SummaryNode pre | - summaryPostUpdateNode(n, pre) and - result = summaryNodeType(pre) - ) - or - exists(SummarizedCallable c, SummaryComponentStack s, SummaryComponent head | head = s.head() | - n = summaryNodeInputState(c, s) and - ( - exists(ContentSet cont | result = getContentType(cont) | - head = TContentSummaryComponent(cont) or - head = TWithContentSummaryComponent(cont) - ) - or - head = TWithoutContentSummaryComponent(_) and - result = summaryNodeType(summaryNodeInputState(c, s.tail())) - or - exists(ReturnKind rk | - head = TReturnSummaryComponent(rk) and - result = - getCallbackReturnType(summaryNodeType(summaryNodeInputState(pragma[only_bind_out](c), - s.tail())), rk) - ) - or - exists(SummaryComponent::SyntheticGlobal sg | - head = TSyntheticGlobalSummaryComponent(sg) and - result = getSyntheticGlobalType(sg) - ) - or - exists(ParameterPosition pos | - head = TArgumentSummaryComponent(pos) and - result = getParameterType(c, pos) - ) - ) - or - n = summaryNodeOutputState(c, s) and - ( - exists(ContentSet cont | - head = TContentSummaryComponent(cont) and result = getContentType(cont) - ) - or - s.length() = 1 and - exists(ReturnKind rk | - head = TReturnSummaryComponent(rk) and - result = getReturnType(c, rk) - ) - or - exists(ArgumentPosition pos | head = TParameterSummaryComponent(pos) | - result = - getCallbackParameterType(summaryNodeType(summaryNodeInputState(pragma[only_bind_out](c), - s.tail())), pos) - ) - or - exists(SummaryComponent::SyntheticGlobal sg | - head = TSyntheticGlobalSummaryComponent(sg) and - result = getSyntheticGlobalType(sg) - ) - ) - ) - } - - /** Holds if summary node `p` is a parameter with position `pos`. */ - predicate summaryParameterNode(SummaryNode p, ParameterPosition pos) { - p = TSummaryParameterNode(_, pos) - } - - /** Holds if summary node `out` contains output of kind `rk` from call `c`. */ - predicate summaryOutNode(DataFlowCall c, SummaryNode out, ReturnKind rk) { - exists(SummarizedCallable callable, SummaryComponentStack s, SummaryNode receiver | - callbackOutput(callable, s, receiver, rk) and - out = summaryNodeInputState(callable, s) and - c = summaryDataFlowCall(receiver) - ) - } - - /** Holds if summary node `arg` is at position `pos` in the call `c`. */ - predicate summaryArgumentNode(DataFlowCall c, SummaryNode arg, ArgumentPosition pos) { - exists(SummarizedCallable callable, SummaryComponentStack s, SummaryNode receiver | - callbackInput(callable, s, receiver, pos) and - arg = summaryNodeOutputState(callable, s) and - c = summaryDataFlowCall(receiver) - ) - } - - /** Holds if summary node `post` is a post-update node with pre-update node `pre`. */ - predicate summaryPostUpdateNode(SummaryNode post, SummaryNode pre) { - exists(SummarizedCallable c, ParameterPosition pos | - isParameterPostUpdate(post, c, pos) and - pre = TSummaryParameterNode(c, pos) - ) - or - exists(SummarizedCallable callable, SummaryComponentStack s | - callbackInput(callable, s, _, _) and - pre = summaryNodeOutputState(callable, s) and - post = summaryNodeInputState(callable, s) - ) - } - - /** Holds if summary node `ret` is a return node of kind `rk`. */ - predicate summaryReturnNode(SummaryNode ret, ReturnKind rk) { - exists(SummaryComponentStack s | - ret = summaryNodeOutputState(_, s) and - s = TSingletonSummaryComponentStack(TReturnSummaryComponent(rk)) - ) - } - - /** - * Holds if flow is allowed to pass from parameter `p`, to a return - * node, and back out to `p`. - */ - predicate summaryAllowParameterReturnInSelf(ParamNode p) { - exists(SummarizedCallable c, ParameterPosition ppos | - p.isParameterOf(inject(c), pragma[only_bind_into](ppos)) - | - exists(SummaryComponentStack inputContents, SummaryComponentStack outputContents | - summary(c, inputContents, outputContents, _) and - inputContents.bottom() = pragma[only_bind_into](TArgumentSummaryComponent(ppos)) and - outputContents.bottom() = pragma[only_bind_into](TArgumentSummaryComponent(ppos)) - ) - ) - } - - /** Provides a compilation of flow summaries to atomic data-flow steps. */ - module Steps { - /** - * Holds if there is a local step from `pred` to `succ`, which is synthesized - * from a flow summary. - */ - predicate summaryLocalStep(SummaryNode pred, SummaryNode succ, boolean preservesValue) { - exists( - SummarizedCallable c, SummaryComponentStack inputContents, - SummaryComponentStack outputContents - | - summary(c, inputContents, outputContents, preservesValue) and - pred = summaryNodeInputState(c, inputContents) and - succ = summaryNodeOutputState(c, outputContents) - | - preservesValue = true - or - preservesValue = false and not summary(c, inputContents, outputContents, true) - ) - or - exists(SummarizedCallable c, SummaryComponentStack s | - pred = summaryNodeInputState(c, s.tail()) and - succ = summaryNodeInputState(c, s) and - s.head() = [SummaryComponent::withContent(_), SummaryComponent::withoutContent(_)] and - preservesValue = true - ) - } - - /** - * Holds if there is a read step of content `c` from `pred` to `succ`, which - * is synthesized from a flow summary. - */ - predicate summaryReadStep(SummaryNode pred, ContentSet c, SummaryNode succ) { - exists(SummarizedCallable sc, SummaryComponentStack s | - pred = summaryNodeInputState(sc, s.tail()) and - succ = summaryNodeInputState(sc, s) and - SummaryComponent::content(c) = s.head() - ) - } - - /** - * Holds if there is a store step of content `c` from `pred` to `succ`, which - * is synthesized from a flow summary. - */ - predicate summaryStoreStep(SummaryNode pred, ContentSet c, SummaryNode succ) { - exists(SummarizedCallable sc, SummaryComponentStack s | - pred = summaryNodeOutputState(sc, s) and - succ = summaryNodeOutputState(sc, s.tail()) and - SummaryComponent::content(c) = s.head() - ) - } - - /** - * Holds if there is a jump step from `pred` to `succ`, which is synthesized - * from a flow summary. - */ - predicate summaryJumpStep(SummaryNode pred, SummaryNode succ) { - exists(SummaryComponentStack s | - s = SummaryComponentStack::singleton(SummaryComponent::syntheticGlobal(_)) and - pred = summaryNodeOutputState(_, s) and - succ = summaryNodeInputState(_, s) - ) - } - - /** - * Holds if values stored inside content `c` are cleared at `n`. `n` is a - * synthesized summary node, so in order for values to be cleared at calls - * to the relevant method, it is important that flow does not pass over - * the argument, either via use-use flow or def-use flow. - * - * Example: - * - * ``` - * a.b = taint; - * a.clearB(); // assume we have a flow summary for `clearB` that clears `b` on the qualifier - * sink(a.b); - * ``` - * - * In the above, flow should not pass from `a` on the first line (or the second - * line) to `a` on the third line. Instead, there will be synthesized flow from - * `a` on line 2 to the post-update node for `a` on that line (via an intermediate - * node where field `b` is cleared). - */ - predicate summaryClearsContent(SummaryNode n, ContentSet c) { - exists(SummarizedCallable sc, SummaryNodeState state, SummaryComponentStack stack | - n = TSummaryInternalNode(sc, state) and - state.isInputState(sc, stack) and - stack.head() = SummaryComponent::withoutContent(c) - ) - } - - /** - * Holds if the value that is being tracked is expected to be stored inside - * content `c` at `n`. - */ - predicate summaryExpectsContent(SummaryNode n, ContentSet c) { - exists(SummarizedCallable sc, SummaryNodeState state, SummaryComponentStack stack | - n = TSummaryInternalNode(sc, state) and - state.isInputState(sc, stack) and - stack.head() = SummaryComponent::withContent(c) - ) - } - - pragma[noinline] - private predicate viableParam( - DataFlowCall call, SummarizedCallable sc, ParameterPosition ppos, SummaryParamNode p - ) { - exists(DataFlowCallable c | - c = inject(sc) and - p = TSummaryParameterNode(sc, ppos) and - c = viableCallable(call) - ) - } - - pragma[nomagic] - private SummaryParamNode summaryArgParam(DataFlowCall call, ArgNode arg, SummarizedCallable sc) { - exists(ParameterPosition ppos | - argumentPositionMatch(call, arg, ppos) and - viableParam(call, sc, ppos, result) - ) - } - - /** - * Holds if `p` can reach `n` in a summarized callable, using only value-preserving - * local steps. `clearsOrExpects` records whether any node on the path from `p` to - * `n` either clears or expects contents. - */ - private predicate paramReachesLocal(SummaryParamNode p, SummaryNode n, boolean clearsOrExpects) { - viableParam(_, _, _, p) and - n = p and - clearsOrExpects = false - or - exists(SummaryNode mid, boolean clearsOrExpectsMid | - paramReachesLocal(p, mid, clearsOrExpectsMid) and - summaryLocalStep(mid, n, true) and - if - summaryClearsContent(n, _) or - summaryExpectsContent(n, _) - then clearsOrExpects = true - else clearsOrExpects = clearsOrExpectsMid - ) - } - - /** - * Holds if use-use flow starting from `arg` should be prohibited. - * - * This is the case when `arg` is the argument of a call that targets a - * flow summary where the corresponding parameter either clears contents - * or expects contents. - */ - pragma[nomagic] - predicate prohibitsUseUseFlow(ArgNode arg, SummarizedCallable sc) { - exists(SummaryParamNode p, ParameterPosition ppos, SummaryNode ret | - paramReachesLocal(p, ret, true) and - p = summaryArgParam(_, arg, sc) and - p = TSummaryParameterNode(_, pragma[only_bind_into](ppos)) and - isParameterPostUpdate(ret, _, pragma[only_bind_into](ppos)) - ) - } - - pragma[nomagic] - private predicate summaryReturnNodeExt(SummaryNode ret, ReturnKindExt rk) { - summaryReturnNode(ret, rk.(ValueReturnKind).getKind()) - or - exists(SummaryParamNode p, SummaryNode pre, ParameterPosition pos | - paramReachesLocal(p, pre, _) and - summaryPostUpdateNode(ret, pre) and - p = TSummaryParameterNode(_, pos) and - rk.(ParamUpdateReturnKind).getPosition() = pos - ) - } - - bindingset[ret] - private SummaryParamNode summaryArgParamRetOut( - ArgNode arg, SummaryNode ret, OutNodeExt out, SummarizedCallable sc - ) { - exists(DataFlowCall call, ReturnKindExt rk | - result = summaryArgParam(call, arg, sc) and - summaryReturnNodeExt(ret, pragma[only_bind_into](rk)) and - out = pragma[only_bind_into](rk).getAnOutNode(call) - ) - } - - /** - * Holds if `arg` flows to `out` using a simple value-preserving flow - * summary, that is, a flow summary without reads and stores. - * - * NOTE: This step should not be used in global data-flow/taint-tracking, but may - * be useful to include in the exposed local data-flow/taint-tracking relations. - */ - predicate summaryThroughStepValue(ArgNode arg, Node out, SummarizedCallable sc) { - exists(ReturnKind rk, SummaryNode ret, DataFlowCall call | - summaryLocalStep(summaryArgParam(call, arg, sc), ret, true) and - summaryReturnNode(ret, pragma[only_bind_into](rk)) and - out = getAnOutNode(call, pragma[only_bind_into](rk)) - ) - } - - /** - * Holds if `arg` flows to `out` using a simple flow summary involving taint - * step, that is, a flow summary without reads and stores. - * - * NOTE: This step should not be used in global data-flow/taint-tracking, but may - * be useful to include in the exposed local data-flow/taint-tracking relations. - */ - predicate summaryThroughStepTaint(ArgNode arg, Node out, SummarizedCallable sc) { - exists(SummaryNode ret | - summaryLocalStep(summaryArgParamRetOut(arg, ret, out, sc), ret, false) - ) - } - - /** - * Holds if there is a read(+taint) of `c` from `arg` to `out` using a - * flow summary. - * - * NOTE: This step should not be used in global data-flow/taint-tracking, but may - * be useful to include in the exposed local data-flow/taint-tracking relations. - */ - predicate summaryGetterStep(ArgNode arg, ContentSet c, Node out, SummarizedCallable sc) { - exists(SummaryNode mid, SummaryNode ret | - summaryReadStep(summaryArgParamRetOut(arg, ret, out, sc), c, mid) and - summaryLocalStep(mid, ret, _) - ) - } - - /** - * Holds if there is a (taint+)store of `arg` into content `c` of `out` using a - * flow summary. - * - * NOTE: This step should not be used in global data-flow/taint-tracking, but may - * be useful to include in the exposed local data-flow/taint-tracking relations. - */ - predicate summarySetterStep(ArgNode arg, ContentSet c, Node out, SummarizedCallable sc) { - exists(SummaryNode mid, SummaryNode ret | - summaryLocalStep(summaryArgParamRetOut(arg, ret, out, sc), mid, _) and - summaryStoreStep(mid, c, ret) - ) - } - } - - /** - * Provides a means of translating externally (e.g., MaD) defined flow - * summaries into a `SummarizedCallable`s. - */ - module External { - /** Holds if `spec` is a relevant external specification. */ - private predicate relevantSpec(string spec) { - summaryElement(_, spec, _, _, _) or - summaryElement(_, _, spec, _, _) or - sourceElement(_, spec, _, _) or - sinkElement(_, spec, _, _) - } - - private class AccessPathRange extends AccessPath::Range { - AccessPathRange() { relevantSpec(this) } - } - - /** Holds if specification component `token` parses as parameter `pos`. */ - predicate parseParam(AccessPathToken token, ArgumentPosition pos) { - token.getName() = "Parameter" and - pos = parseParamBody(token.getAnArgument()) - } - - /** Holds if specification component `token` parses as argument `pos`. */ - predicate parseArg(AccessPathToken token, ParameterPosition pos) { - token.getName() = "Argument" and - pos = parseArgBody(token.getAnArgument()) - } - - /** Holds if specification component `token` parses as synthetic global `sg`. */ - predicate parseSynthGlobal(AccessPathToken token, string sg) { - token.getName() = "SyntheticGlobal" and - sg = token.getAnArgument() - } - - private class SyntheticGlobalFromAccessPath extends SummaryComponent::SyntheticGlobal { - SyntheticGlobalFromAccessPath() { parseSynthGlobal(_, this) } - } - - private SummaryComponent interpretComponent(AccessPathToken token) { - exists(ParameterPosition pos | - parseArg(token, pos) and result = SummaryComponent::argument(pos) - ) - or - exists(ArgumentPosition pos | - parseParam(token, pos) and result = SummaryComponent::parameter(pos) - ) - or - token = "ReturnValue" and result = SummaryComponent::return(getReturnValueKind()) - or - exists(string sg | - parseSynthGlobal(token, sg) and result = SummaryComponent::syntheticGlobal(sg) - ) - or - result = interpretComponentSpecific(token) - } - - /** - * Holds if `spec` specifies summary component stack `stack`. - */ - predicate interpretSpec(AccessPath spec, SummaryComponentStack stack) { - interpretSpec(spec, spec.getNumToken(), stack) - } - - /** Holds if the first `n` tokens of `spec` resolves to `stack`. */ - private predicate interpretSpec(AccessPath spec, int n, SummaryComponentStack stack) { - n = 1 and - stack = SummaryComponentStack::singleton(interpretComponent(spec.getToken(0))) - or - exists(SummaryComponent head, SummaryComponentStack tail | - interpretSpec(spec, n, head, tail) and - stack = SummaryComponentStack::push(head, tail) - ) - } - - /** Holds if the first `n` tokens of `spec` resolves to `head` followed by `tail` */ - private predicate interpretSpec( - AccessPath spec, int n, SummaryComponent head, SummaryComponentStack tail - ) { - interpretSpec(spec, n - 1, tail) and - head = interpretComponent(spec.getToken(n - 1)) - } - - private class MkStack extends RequiredSummaryComponentStack { - override predicate required(SummaryComponent head, SummaryComponentStack tail) { - interpretSpec(_, _, head, tail) - } - } - - private class SummarizedCallableExternal extends SummarizedCallable { - SummarizedCallableExternal() { summaryElement(this, _, _, _, _) } - - private predicate relevantSummaryElementGenerated( - AccessPath inSpec, AccessPath outSpec, string kind - ) { - exists(Provenance provenance | - provenance.isGenerated() and - summaryElement(this, inSpec, outSpec, kind, provenance) - ) and - not this.applyManualModel() - } - - private predicate relevantSummaryElement(AccessPath inSpec, AccessPath outSpec, string kind) { - exists(Provenance provenance | - provenance.isManual() and - summaryElement(this, inSpec, outSpec, kind, provenance) - ) - or - this.relevantSummaryElementGenerated(inSpec, outSpec, kind) - } - - override predicate propagatesFlow( - SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue - ) { - exists(AccessPath inSpec, AccessPath outSpec, string kind | - this.relevantSummaryElement(inSpec, outSpec, kind) and - interpretSpec(inSpec, input) and - interpretSpec(outSpec, output) - | - kind = "value" and preservesValue = true - or - kind = "taint" and preservesValue = false - ) - } - - override predicate hasProvenance(Provenance provenance) { - summaryElement(this, _, _, _, provenance) - } - } - - /** Holds if component `c` of specification `spec` cannot be parsed. */ - predicate invalidSpecComponent(AccessPath spec, string c) { - c = spec.getToken(_) and - not exists(interpretComponent(c)) - } - - /** Holds if `provenance` is not a valid provenance value. */ - bindingset[provenance] - predicate invalidProvenance(string provenance) { not provenance instanceof Provenance } - - /** - * Holds if token `part` of specification `spec` has an invalid index. - * E.g., `Argument[-1]`. - */ - predicate invalidIndexComponent(AccessPath spec, AccessPathToken part) { - part = spec.getToken(_) and - part.getName() = ["Parameter", "Argument"] and - AccessPath::parseInt(part.getArgumentList()) < 0 - } - - private predicate inputNeedsReference(AccessPathToken c) { - c.getName() = "Argument" or - inputNeedsReferenceSpecific(c) - } - - private predicate outputNeedsReference(AccessPathToken c) { - c.getName() = ["Argument", "ReturnValue"] or - outputNeedsReferenceSpecific(c) - } - - private predicate sourceElementRef(InterpretNode ref, AccessPath output, string kind) { - exists(SourceOrSinkElement e | - sourceElement(e, output, kind, _) and - if outputNeedsReference(output.getToken(0)) - then e = ref.getCallTarget() - else e = ref.asElement() - ) - } - - private predicate sinkElementRef(InterpretNode ref, AccessPath input, string kind) { - exists(SourceOrSinkElement e | - sinkElement(e, input, kind, _) and - if inputNeedsReference(input.getToken(0)) - then e = ref.getCallTarget() - else e = ref.asElement() - ) - } - - /** Holds if the first `n` tokens of `output` resolve to the given interpretation. */ - private predicate interpretOutput( - AccessPath output, int n, InterpretNode ref, InterpretNode node - ) { - sourceElementRef(ref, output, _) and - n = 0 and - ( - if output = "" - then - // Allow language-specific interpretation of the empty access path - interpretOutputSpecific("", ref, node) - else node = ref - ) - or - exists(InterpretNode mid, AccessPathToken c | - interpretOutput(output, n - 1, ref, mid) and - c = output.getToken(n - 1) - | - exists(ArgumentPosition apos, ParameterPosition ppos | - node.asNode().(PostUpdateNode).getPreUpdateNode().(ArgNode).argumentOf(mid.asCall(), apos) and - parameterMatch(ppos, apos) - | - c = "Argument" or parseArg(c, ppos) - ) - or - exists(ArgumentPosition apos, ParameterPosition ppos | - node.asNode().(ParamNode).isParameterOf(mid.asCallable(), ppos) and - parameterMatch(ppos, apos) - | - c = "Parameter" or parseParam(c, apos) - ) - or - c = "ReturnValue" and - node.asNode() = getAnOutNodeExt(mid.asCall(), TValueReturn(getReturnValueKind())) - or - interpretOutputSpecific(c, mid, node) - ) - } - - /** Holds if the first `n` tokens of `input` resolve to the given interpretation. */ - private predicate interpretInput(AccessPath input, int n, InterpretNode ref, InterpretNode node) { - sinkElementRef(ref, input, _) and - n = 0 and - ( - if input = "" - then - // Allow language-specific interpretation of the empty access path - interpretInputSpecific("", ref, node) - else node = ref - ) - or - exists(InterpretNode mid, AccessPathToken c | - interpretInput(input, n - 1, ref, mid) and - c = input.getToken(n - 1) - | - exists(ArgumentPosition apos, ParameterPosition ppos | - node.asNode().(ArgNode).argumentOf(mid.asCall(), apos) and - parameterMatch(ppos, apos) - | - c = "Argument" or parseArg(c, ppos) - ) - or - exists(ReturnNodeExt ret | - c = "ReturnValue" and - ret = node.asNode() and - ret.getKind().(ValueReturnKind).getKind() = getReturnValueKind() and - mid.asCallable() = getNodeEnclosingCallable(ret) - ) - or - interpretInputSpecific(c, mid, node) - ) - } - - /** - * Holds if `node` is specified as a source with the given kind in a MaD flow - * model. - */ - predicate isSourceNode(InterpretNode node, string kind) { - exists(InterpretNode ref, AccessPath output | - sourceElementRef(ref, output, kind) and - interpretOutput(output, output.getNumToken(), ref, node) - ) - } - - /** - * Holds if `node` is specified as a sink with the given kind in a MaD flow - * model. - */ - predicate isSinkNode(InterpretNode node, string kind) { - exists(InterpretNode ref, AccessPath input | - sinkElementRef(ref, input, kind) and - interpretInput(input, input.getNumToken(), ref, node) - ) - } - } - - /** Provides a query predicate for outputting a set of relevant flow summaries. */ - module TestOutput { - /** A flow summary to include in the `summary/1` query predicate. */ - abstract class RelevantSummarizedCallable instanceof SummarizedCallable { - /** Gets the string representation of this callable used by `summary/1`. */ - abstract string getCallableCsv(); - - /** Holds if flow is propagated between `input` and `output`. */ - predicate relevantSummary( - SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue - ) { - super.propagatesFlow(input, output, preservesValue) - } - - string toString() { result = super.toString() } - } - - /** A model to include in the `neutral/1` query predicate. */ - abstract class RelevantNeutralCallable instanceof NeutralCallable { - /** Gets the string representation of this callable used by `neutral/1`. */ - abstract string getCallableCsv(); - - /** - * Gets the kind of the neutral. - */ - string getKind() { result = super.getKind() } - - string toString() { result = super.toString() } - } - - /** Render the kind in the format used in flow summaries. */ - private string renderKind(boolean preservesValue) { - preservesValue = true and result = "value" - or - preservesValue = false and result = "taint" - } - - private string renderProvenance(SummarizedCallable c) { - if c.applyManualModel() then result = "manual" else c.hasProvenance(result) - } - - private string renderProvenanceNeutral(NeutralCallable c) { - if c.hasManualModel() then result = "manual" else c.hasProvenance(result) - } - - /** - * A query predicate for outputting flow summaries in semi-colon separated format in QL tests. - * The syntax is: "namespace;type;overrides;name;signature;ext;inputspec;outputspec;kind;provenance", - * ext is hardcoded to empty. - */ - query predicate summary(string csv) { - exists( - RelevantSummarizedCallable c, SummaryComponentStack input, SummaryComponentStack output, - boolean preservesValue - | - c.relevantSummary(input, output, preservesValue) and - csv = - c.getCallableCsv() // Callable information - + input.getMadRepresentation() + ";" // input - + output.getMadRepresentation() + ";" // output - + renderKind(preservesValue) + ";" // kind - + renderProvenance(c) // provenance - ) - } - - /** - * Holds if a neutral model `csv` exists (semi-colon separated format). Used for testing purposes. - * The syntax is: "namespace;type;name;signature;kind;provenance"", - */ - query predicate neutral(string csv) { - exists(RelevantNeutralCallable c | - csv = - c.getCallableCsv() // Callable information - + c.getKind() + ";" // kind - + renderProvenanceNeutral(c) // provenance - ) - } - } - - /** - * Provides query predicates for rendering the generated data flow graph for - * a summarized callable. - * - * Import this module into a `.ql` file of `@kind graph` to render the graph. - * The graph is restricted to callables from `RelevantSummarizedCallable`. - */ - module RenderSummarizedCallable { - /** A summarized callable to include in the graph. */ - abstract class RelevantSummarizedCallable instanceof SummarizedCallable { - string toString() { result = super.toString() } - } - - private newtype TNodeOrCall = - MkNode(SummaryNode n) { - exists(RelevantSummarizedCallable c | - n = TSummaryInternalNode(c, _) - or - n = TSummaryParameterNode(c, _) - ) - } or - MkCall(DataFlowCall call) { - call = summaryDataFlowCall(_) and - call.getEnclosingCallable() = inject(any(RelevantSummarizedCallable c)) - } - - private class NodeOrCall extends TNodeOrCall { - SummaryNode asNode() { this = MkNode(result) } - - DataFlowCall asCall() { this = MkCall(result) } - - string toString() { - result = this.asNode().toString() - or - result = this.asCall().toString() - } - - /** - * Holds if this element is at the specified location. - * The location spans column `startcolumn` of line `startline` to - * column `endcolumn` of line `endline` in file `filepath`. - * For more information, see - * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). - */ - predicate hasLocationInfo( - string filepath, int startline, int startcolumn, int endline, int endcolumn - ) { - filepath = "" and - startline = 0 and - startcolumn = 0 and - endline = 0 and - endcolumn = 0 - } - } - - query predicate nodes(NodeOrCall n, string key, string val) { - key = "semmle.label" and val = n.toString() - } - - private predicate edgesComponent(NodeOrCall a, NodeOrCall b, string value) { - exists(boolean preservesValue | - Private::Steps::summaryLocalStep(a.asNode(), b.asNode(), preservesValue) and - if preservesValue = true then value = "value" else value = "taint" - ) - or - exists(ContentSet c | - Private::Steps::summaryReadStep(a.asNode(), c, b.asNode()) and - value = "read (" + c + ")" - or - Private::Steps::summaryStoreStep(a.asNode(), c, b.asNode()) and - value = "store (" + c + ")" - or - Private::Steps::summaryClearsContent(a.asNode(), c) and - b = a and - value = "clear (" + c + ")" - or - Private::Steps::summaryExpectsContent(a.asNode(), c) and - b = a and - value = "expect (" + c + ")" - ) - or - summaryPostUpdateNode(b.asNode(), a.asNode()) and - value = "post-update" - or - b.asCall() = summaryDataFlowCall(a.asNode()) and - value = "receiver" - or - exists(ArgumentPosition pos | - summaryArgumentNode(b.asCall(), a.asNode(), pos) and - value = "argument (" + pos + ")" - ) - } - - query predicate edges(NodeOrCall a, NodeOrCall b, string key, string value) { - key = "semmle.label" and - value = strictconcat(string s | edgesComponent(a, b, s) | s, " / ") - } + predicate isParsedKeywordArgumentPosition(string c, string argName) { + isArgBody(c) and + c = argName + ":" } } diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImplSpecific.qll b/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImplSpecific.qll deleted file mode 100644 index a29b97b72c2..00000000000 --- a/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImplSpecific.qll +++ /dev/null @@ -1,324 +0,0 @@ -/** - * Provides Python specific classes and predicates for defining flow summaries. - * - * Flow summaries are defined for callables that are not extracted. - * Such callables go by different names in different parts of our codebase: - * - * - in `FlowSummary.qll`, which is user facing, they are called `SummarizedCallable`s. - * These contain summaries, implemented by the user via the predicates `propagatesFlow` and `propagatesFlowExt`. - * - * - in the data flow layer, they are called `LibraryCallable`s (as in the Ruby codebase). - * These are identified by strings and has predicates for finding calls to them. - * - * Having both extracted and non-extracted callables means that we now have three types of calls: - * - Extracted calls to extracted callables, either `NormalCall` or `SpecialCall`. These are handled by standard data flow. - * - Extracted calls to non-extracted callables, `LibraryCall`. These are handled by looking up the relevant summary when the - * global data flow graph is connected up via `getViableCallable`. - * - Non-extracted calls, `SummaryCall`. These are synthesised by the flow summary framework. - * - * The first two can be referred to as `ExtractedDataFlowCall`. In fact, `LibraryCall` is a subclass of `NormalCall`, where - * `getCallable` is set to `none()`. The member predicate `ExtractedDataFlowCall::getCallable` is _not_ the mechanism for - * call resolution in global data flow. That mechanism is `getViableCallable`. - * Resolving a call to a non-extracted callable goes via `LibraryCallable::getACall`, which may involve type tracking. - * To avoid that type tracking becomes mutually recursive with data flow, type tracking must use a call graph not including summaries. - * Type tracking sees the callgraph given by `ExtractedDataFlowCall::getACallable`. - * - * We do not support summaries of special methods via the special methods framework, - * the summary would have to identify the call. - * - * We might, while we still extract the standard library, want to support flow summaries of - * extracted callables, so that we can model part of the standard library with flow summaries. - * For this to work, we have be careful with the enclosing callable predicate. - */ - -private import python -private import DataFlowPrivate -private import DataFlowPublic -private import DataFlowImplCommon -private import FlowSummaryImpl::Private -private import FlowSummaryImpl::Public -private import semmle.python.dataflow.new.FlowSummary as FlowSummary - -/** - * A class of callables that are candidates for flow summary modeling. - */ -class SummarizedCallableBase = string; - -/** - * A class of callables that are candidates for neutral modeling. - */ -class NeutralCallableBase = string; - -/** View a `SummarizedCallable` as a `DataFlowCallable`. */ -DataFlowCallable inject(SummarizedCallable c) { result.asLibraryCallable() = c } - -/** Gets the parameter position of the instance parameter. */ -ArgumentPosition callbackSelfParameterPosition() { none() } // disables implicit summary flow to `this` for callbacks - -/** Gets the synthesized data-flow call for `receiver`. */ -SummaryCall summaryDataFlowCall(SummaryNode receiver) { receiver = result.getReceiver() } - -/** Gets the type of content `c`. */ -DataFlowType getContentType(Content c) { any() } - -/** Gets the type of the parameter at the given position. */ -DataFlowType getParameterType(SummarizedCallable c, ParameterPosition pos) { any() } - -/** Gets the return type of kind `rk` for callable `c`. */ -bindingset[c, rk] -DataFlowType getReturnType(SummarizedCallable c, ReturnKind rk) { any() } - -/** - * Gets the type of the parameter matching arguments at position `pos` in a - * synthesized call that targets a callback of type `t`. - */ -bindingset[t, pos] -DataFlowType getCallbackParameterType(DataFlowType t, ArgumentPosition pos) { any() } - -/** - * Gets the return type of kind `rk` in a synthesized call that targets a - * callback of type `t`. - */ -DataFlowType getCallbackReturnType(DataFlowType t, ReturnKind rk) { any() } - -/** Gets the type of synthetic global `sg`. */ -DataFlowType getSyntheticGlobalType(SummaryComponent::SyntheticGlobal sg) { any() } - -/** - * Holds if an external flow summary exists for `c` with input specification - * `input`, output specification `output`, kind `kind`, and provenance `provenance`. - */ -predicate summaryElement( - FlowSummary::SummarizedCallable c, string input, string output, string kind, string provenance -) { - exists(boolean preservesValue | - c.propagatesFlowExt(input, output, preservesValue) and - (if preservesValue = true then kind = "value" else kind = "taint") and - provenance = "manual" - ) -} - -/** - * Holds if a neutral model exists for `c` of kind `kind` - * and with provenance `provenance`. - * Note. Neutral models have not been implemented for Python. - */ -predicate neutralElement(NeutralCallableBase c, string kind, string provenance) { none() } - -/** - * Gets the summary component for specification component `c`, if any. - * - * This covers all the Python-specific components of a flow summary. - */ -SummaryComponent interpretComponentSpecific(AccessPathToken c) { - c = "ListElement" and - result = FlowSummary::SummaryComponent::listElement() - or - c = "SetElement" and - result = FlowSummary::SummaryComponent::setElement() - or - exists(int index | - c.getAnArgument("TupleElement") = index.toString() and - result = FlowSummary::SummaryComponent::tupleElement(index) - ) - or - exists(string key | - c.getAnArgument("DictionaryElement") = key and - result = FlowSummary::SummaryComponent::dictionaryElement(key) - ) - or - c = "DictionaryElementAny" and - result = FlowSummary::SummaryComponent::dictionaryElementAny() - or - exists(string attr | - c.getAnArgument("Attribute") = attr and - result = FlowSummary::SummaryComponent::attribute(attr) - ) -} - -private string getContentSpecific(Content cs) { - cs = TListElementContent() and result = "ListElement" - or - cs = TSetElementContent() and result = "SetElement" - or - exists(int index | - cs = TTupleElementContent(index) and result = "TupleElement[" + index.toString() + "]" - ) - or - exists(string key | - cs = TDictionaryElementContent(key) and result = "DictionaryElement[" + key + "]" - ) - or - cs = TDictionaryElementAnyContent() and result = "DictionaryElementAny" - or - exists(string attr | cs = TAttributeContent(attr) and result = "Attribute[" + attr + "]") -} - -/** Gets the textual representation of a summary component in the format used for MaD models. */ -string getMadRepresentationSpecific(SummaryComponent sc) { - exists(Content c | - sc = TContentSummaryComponent(c) and - result = getContentSpecific(c) - ) -} - -/** Gets the textual representation of a parameter position in the format used for flow summaries. */ -string getParameterPosition(ParameterPosition pos) { - pos.isSelf() and result = "self" - or - exists(int i | - pos.isPositional(i) and - result = i.toString() - ) - or - exists(string name | - pos.isKeyword(name) and - result = name + ":" - ) -} - -/** Gets the textual representation of an argument position in the format used for flow summaries. */ -string getArgumentPosition(ArgumentPosition pos) { - pos.isSelf() and result = "self" - or - exists(int i | - pos.isPositional(i) and - result = i.toString() - ) - or - exists(string name | - pos.isKeyword(name) and - result = name + ":" - ) -} - -/** Holds if input specification component `c` needs a reference. */ -predicate inputNeedsReferenceSpecific(string c) { none() } - -/** Holds if output specification component `c` needs a reference. */ -predicate outputNeedsReferenceSpecific(string c) { none() } - -/** Gets the return kind corresponding to specification `"ReturnValue"`. */ -ReturnKind getReturnValueKind() { any() } - -/** - * All definitions in this module are required by the shared implementation - * (for source/sink interpretation), but they are unused for Python, where - * we rely on API graphs instead. - */ -private module UnusedSourceSinkInterpretation { - /** - * Holds if an external source specification exists for `n` with output specification - * `output`, kind `kind`, and provenance `provenance`. - */ - predicate sourceElement(AstNode n, string output, string kind, string provenance) { none() } - - /** - * Holds if an external sink specification exists for `n` with input specification - * `input`, kind `kind` and provenance `provenance`. - */ - predicate sinkElement(AstNode n, string input, string kind, string provenance) { none() } - - class SourceOrSinkElement = AstNode; - - /** An entity used to interpret a source/sink specification. */ - class InterpretNode extends AstNode_ { - // InterpretNode is going away, this is just a dummy implementation. - // However, we have some old location tests picking them up, so we - // explicitly define them to not exist. - InterpretNode() { none() } - - /** Gets the element that this node corresponds to, if any. */ - SourceOrSinkElement asElement() { none() } - - /** Gets the data-flow node that this node corresponds to, if any. */ - Node asNode() { none() } - - /** Gets the call that this node corresponds to, if any. */ - DataFlowCall asCall() { none() } - - /** Gets the callable that this node corresponds to, if any. */ - DataFlowCallable asCallable() { none() } - - /** Gets the target of this call, if any. */ - SourceOrSinkElement getCallTarget() { none() } - } - - /** Provides additional sink specification logic. */ - predicate interpretOutputSpecific(string c, InterpretNode mid, InterpretNode node) { none() } - - /** Provides additional source specification logic. */ - predicate interpretInputSpecific(string c, InterpretNode mid, InterpretNode node) { none() } -} - -import UnusedSourceSinkInterpretation - -module ParsePositions { - private import FlowSummaryImpl - - private predicate isParamBody(string body) { - exists(AccessPathToken tok | - tok.getName() = "Parameter" and - body = tok.getAnArgument() - ) - } - - private predicate isArgBody(string body) { - exists(AccessPathToken tok | - tok.getName() = "Argument" and - body = tok.getAnArgument() - ) - } - - predicate isParsedPositionalParameterPosition(string c, int i) { - isParamBody(c) and - i = AccessPath::parseInt(c) - } - - predicate isParsedKeywordParameterPosition(string c, string paramName) { - isParamBody(c) and - c = paramName + ":" - } - - predicate isParsedPositionalArgumentPosition(string c, int i) { - isArgBody(c) and - i = AccessPath::parseInt(c) - } - - predicate isParsedKeywordArgumentPosition(string c, string argName) { - isArgBody(c) and - c = argName + ":" - } -} - -/** Gets the argument position obtained by parsing `X` in `Parameter[X]`. */ -ArgumentPosition parseParamBody(string s) { - exists(int i | - ParsePositions::isParsedPositionalParameterPosition(s, i) and - result.isPositional(i) - ) - or - exists(string name | - ParsePositions::isParsedKeywordParameterPosition(s, name) and - result.isKeyword(name) - ) - or - s = "self" and - result.isSelf() -} - -/** Gets the parameter position obtained by parsing `X` in `Argument[X]`. */ -ParameterPosition parseArgBody(string s) { - exists(int i | - ParsePositions::isParsedPositionalArgumentPosition(s, i) and - result.isPositional(i) - ) - or - exists(string name | - ParsePositions::isParsedKeywordArgumentPosition(s, name) and - result.isKeyword(name) - ) - or - s = "self" and - result.isSelf() -} diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll index 7b9d4f06f31..5fe6dc154a8 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll @@ -177,7 +177,7 @@ class Boolean extends boolean { } private import SummaryTypeTracker as SummaryTypeTracker -private import semmle.python.dataflow.new.FlowSummary as FlowSummary +private import semmle.python.dataflow.new.internal.FlowSummaryImpl as FlowSummaryImpl private import semmle.python.dataflow.new.internal.DataFlowDispatch as DataFlowDispatch pragma[noinline] @@ -205,30 +205,30 @@ private module SummaryTypeTrackerInput implements SummaryTypeTracker::Input { TypeTrackerContentFilter getFilterFromWithContentStep(TypeTrackerContent content) { none() } // Callables - class SummarizedCallable = FlowSummary::SummarizedCallable; + class SummarizedCallable = FlowSummaryImpl::Private::SummarizedCallableImpl; // Summaries and their stacks - class SummaryComponent = FlowSummary::SummaryComponent; + class SummaryComponent = FlowSummaryImpl::Private::SummaryComponent; - class SummaryComponentStack = FlowSummary::SummaryComponentStack; + class SummaryComponentStack = FlowSummaryImpl::Private::SummaryComponentStack; - predicate singleton = FlowSummary::SummaryComponentStack::singleton/1; + predicate singleton = FlowSummaryImpl::Private::SummaryComponentStack::singleton/1; - predicate push = FlowSummary::SummaryComponentStack::push/2; + predicate push = FlowSummaryImpl::Private::SummaryComponentStack::push/2; // Relating content to summaries - predicate content = FlowSummary::SummaryComponent::content/1; + predicate content = FlowSummaryImpl::Private::SummaryComponent::content/1; SummaryComponent withoutContent(TypeTrackerContent contents) { none() } SummaryComponent withContent(TypeTrackerContent contents) { none() } - predicate return = FlowSummary::SummaryComponent::return/0; + predicate return = FlowSummaryImpl::Private::SummaryComponent::return/0; // Relating nodes to summaries Node argumentOf(Node call, SummaryComponent arg, boolean isPostUpdate) { exists(DataFlowDispatch::ParameterPosition pos | - arg = FlowSummary::SummaryComponent::argument(pos) and + arg = FlowSummaryImpl::Private::SummaryComponent::argument(pos) and argumentPositionMatch(call, result, pos) and isPostUpdate = [false, true] // todo: implement when/if Python uses post-update nodes in type tracking ) @@ -238,7 +238,7 @@ private module SummaryTypeTrackerInput implements SummaryTypeTracker::Input { exists( DataFlowDispatch::ArgumentPosition apos, DataFlowDispatch::ParameterPosition ppos, Parameter p | - param = FlowSummary::SummaryComponent::parameter(apos) and + param = FlowSummaryImpl::Private::SummaryComponent::parameter(apos) and DataFlowDispatch::parameterMatch(ppos, apos) and result.asCfgNode().getNode() = p and ( @@ -254,14 +254,16 @@ private module SummaryTypeTrackerInput implements SummaryTypeTracker::Input { } Node returnOf(Node callable, SummaryComponent return) { - return = FlowSummary::SummaryComponent::return() and + return = FlowSummaryImpl::Private::SummaryComponent::return() and // `result` should be the return value of a callable expression (lambda or function) referenced by `callable` result.asCfgNode() = callable.getALocalSource().asExpr().(CallableExpr).getInnerScope().getAReturnValueFlowNode() } // Relating callables to nodes - Node callTo(SummarizedCallable callable) { result = callable.getACallSimple() } + Node callTo(SummarizedCallable callable) { + result = callable.(DataFlowDispatch::LibraryCallable).getACallSimple() + } } private module TypeTrackerSummaryFlow = SummaryTypeTracker::SummaryFlow; diff --git a/python/ql/lib/semmle/python/frameworks/Flask.qll b/python/ql/lib/semmle/python/frameworks/Flask.qll index a0d431de312..1c000250b3a 100644 --- a/python/ql/lib/semmle/python/frameworks/Flask.qll +++ b/python/ql/lib/semmle/python/frameworks/Flask.qll @@ -624,7 +624,7 @@ module Flask { .getAValueReachableFromSource() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[0]" and output = "ReturnValue" and preservesValue = false @@ -650,7 +650,7 @@ module Flask { .getAValueReachableFromSource() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[0]" and // Technically it's `Iterator[str]`, but list will do :) output = "ReturnValue.ListElement" and diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index 11806ad3a59..9def059cdcf 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -3085,7 +3085,7 @@ private module StdlibPrivate { result = API::moduleImport("re").getMember("compile").getAValueReachableFromSource() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input in ["Argument[0]", "Argument[pattern:]"] and output = "ReturnValue.Attribute[pattern]" and preservesValue = true @@ -3116,7 +3116,7 @@ private module StdlibPrivate { override DataFlow::ArgumentNode getACallback() { none() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { exists(string arg | this = "re.Match" and arg = "Argument[1]" or @@ -3173,7 +3173,7 @@ private module StdlibPrivate { override DataFlow::ArgumentNode getACallback() { none() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { methodName = "expand" and preservesValue = false and ( @@ -3229,7 +3229,7 @@ private module StdlibPrivate { override DataFlow::ArgumentNode getACallback() { none() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { exists(int offset | // for non-compiled regex the first argument is the pattern, so we need to // account for this difference @@ -4079,7 +4079,7 @@ private module StdlibPrivate { result = API::builtin("dict").getAValueReachableFromSource() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() | input = "Argument[0].DictionaryElement[" + key + "]" and output = "ReturnValue.DictionaryElement[" + key + "]" and @@ -4108,7 +4108,7 @@ private module StdlibPrivate { result = API::builtin("list").getAValueReachableFromSource() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( input = "Argument[0].ListElement" or @@ -4138,7 +4138,7 @@ private module StdlibPrivate { result = API::builtin("tuple").getAValueReachableFromSource() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() | input = "Argument[0].TupleElement[" + i.toString() + "]" and output = "ReturnValue.TupleElement[" + i.toString() + "]" and @@ -4163,7 +4163,7 @@ private module StdlibPrivate { result = API::builtin("set").getAValueReachableFromSource() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( input = "Argument[0].ListElement" or @@ -4193,8 +4193,8 @@ private module StdlibPrivate { result = API::builtin("frozenset").getAValueReachableFromSource() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { - any(SetSummary s).propagatesFlowExt(input, output, preservesValue) + override predicate propagatesFlow(string input, string output, boolean preservesValue) { + any(SetSummary s).propagatesFlow(input, output, preservesValue) } } @@ -4211,7 +4211,7 @@ private module StdlibPrivate { result = API::builtin("reversed").getAValueReachableFromSource() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( input = "Argument[0].ListElement" or @@ -4241,7 +4241,7 @@ private module StdlibPrivate { result = API::builtin("sorted").getAValueReachableFromSource() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { exists(string content | content = "ListElement" or @@ -4273,7 +4273,7 @@ private module StdlibPrivate { result = API::builtin("iter").getAValueReachableFromSource() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( input = "Argument[0].ListElement" or @@ -4303,7 +4303,7 @@ private module StdlibPrivate { result = API::builtin("next").getAValueReachableFromSource() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( input = "Argument[0].ListElement" or @@ -4336,7 +4336,7 @@ private module StdlibPrivate { override DataFlow::ArgumentNode getACallback() { none() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { exists(string content | content = "ListElement" or @@ -4378,7 +4378,7 @@ private module StdlibPrivate { override DataFlow::ArgumentNode getACallback() { none() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].ListElement" and output = "ReturnValue" and preservesValue = true @@ -4415,7 +4415,7 @@ private module StdlibPrivate { override DataFlow::ArgumentNode getACallback() { none() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].DictionaryElement[" + key + "]" and output = "ReturnValue" and preservesValue = true @@ -4438,7 +4438,7 @@ private module StdlibPrivate { override DataFlow::ArgumentNode getACallback() { none() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].DictionaryElement[" + key + "]" and output = "ReturnValue" and preservesValue = true @@ -4460,7 +4460,7 @@ private module StdlibPrivate { override DataFlow::ArgumentNode getACallback() { none() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { // default value input = "Argument[1]" and output = "ReturnValue" and @@ -4483,7 +4483,7 @@ private module StdlibPrivate { override DataFlow::ArgumentNode getACallback() { none() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() | input = "Argument[self].DictionaryElement[" + key + "]" and output = "ReturnValue.TupleElement[1]" and @@ -4509,7 +4509,7 @@ private module StdlibPrivate { result.(DataFlow::AttrRead).getAttributeName() = "setdefault" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { // store/read steps with dictionary content of this is modeled in DataFlowPrivate input = "Argument[1]" and output = "ReturnValue" and @@ -4538,7 +4538,7 @@ private module StdlibPrivate { override DataFlow::ArgumentNode getACallback() { none() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { // If key is in the dictionary, return its value. input = "Argument[self].DictionaryElement[" + key + "]" and output = "ReturnValue" and @@ -4567,7 +4567,7 @@ private module StdlibPrivate { result.(DataFlow::AttrRead).getAttributeName() = "values" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() | input = "Argument[self].DictionaryElement[" + key + "]" and output = "ReturnValue.ListElement" and @@ -4594,7 +4594,7 @@ private module StdlibPrivate { result.(DataFlow::AttrRead).getAttributeName() = "keys" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { // TODO: Once we have DictKeyContent, we need to transform that into ListElementContent input = "Argument[self]" and output = "ReturnValue" and @@ -4618,7 +4618,7 @@ private module StdlibPrivate { result.(DataFlow::AttrRead).getAttributeName() = "items" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() | input = "Argument[self].DictionaryElement[" + key + "]" and output = "ReturnValue.ListElement.TupleElement[1]" and @@ -4648,7 +4648,7 @@ private module StdlibPrivate { result.(DataFlow::AttrRead).getAttributeName() = "append" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { // newly added element added to this input = "Argument[0]" and output = "Argument[self].ListElement" and @@ -4675,7 +4675,7 @@ private module StdlibPrivate { result.(DataFlow::AttrRead).getAttributeName() = "add" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { // newly added element added to this input = "Argument[0]" and output = "Argument[self].SetElement" and @@ -4705,7 +4705,7 @@ private module StdlibPrivate { API::moduleImport("os").getMember(["getenv", "getenvb"]).getAValueReachableFromSource() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input in ["Argument[1]", "Argument[default:]"] and output = "ReturnValue" and preservesValue = true diff --git a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModels.qll b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModels.qll index 1cb4e189339..dd433152751 100644 --- a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModels.qll +++ b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModels.qll @@ -70,8 +70,8 @@ private module API = Specific::API; private module DataFlow = Specific::DataFlow; -private import Specific::AccessPathSyntax private import ApiGraphModelsExtensions as Extensions +private import codeql.dataflow.internal.AccessPathSyntax /** Module containing hooks for providing input data to be interpreted as a model. */ module ModelInput { @@ -327,29 +327,29 @@ predicate isRelevantFullPath(string type, string path) { } /** A string from a CSV row that should be parsed as an access path. */ -private class AccessPathRange extends AccessPath::Range { - AccessPathRange() { - isRelevantFullPath(_, this) - or - exists(string type | isRelevantType(type) | - summaryModel(type, _, this, _, _) or - summaryModel(type, _, _, this, _) - ) - or - typeVariableModel(_, this) - } +private predicate accessPathRange(string s) { + isRelevantFullPath(_, s) + or + exists(string type | isRelevantType(type) | + summaryModel(type, _, s, _, _) or + summaryModel(type, _, _, s, _) + ) + or + typeVariableModel(_, s) } +import AccessPath + /** * Gets a successor of `node` in the API graph. */ bindingset[token] -API::Node getSuccessorFromNode(API::Node node, AccessPathToken token) { +API::Node getSuccessorFromNode(API::Node node, AccessPathTokenBase token) { // API graphs use the same label for arguments and parameters. An edge originating from a // use-node represents an argument, and an edge originating from a def-node represents a parameter. // We just map both to the same thing. token.getName() = ["Argument", "Parameter"] and - result = node.getParameter(AccessPath::parseIntUnbounded(token.getAnArgument())) + result = node.getParameter(parseIntUnbounded(token.getAnArgument())) or token.getName() = "ReturnValue" and result = node.getReturn() @@ -362,11 +362,9 @@ API::Node getSuccessorFromNode(API::Node node, AccessPathToken token) { * Gets an API-graph successor for the given invocation. */ bindingset[token] -API::Node getSuccessorFromInvoke(Specific::InvokeNode invoke, AccessPathToken token) { +API::Node getSuccessorFromInvoke(Specific::InvokeNode invoke, AccessPathTokenBase token) { token.getName() = "Argument" and - result = - invoke - .getParameter(AccessPath::parseIntWithArity(token.getAnArgument(), invoke.getNumArgument())) + result = invoke.getParameter(parseIntWithArity(token.getAnArgument(), invoke.getNumArgument())) or token.getName() = "ReturnValue" and result = invoke.getReturn() @@ -378,10 +376,12 @@ API::Node getSuccessorFromInvoke(Specific::InvokeNode invoke, AccessPathToken to /** * Holds if `invoke` invokes a call-site filter given by `token`. */ -pragma[inline] -private predicate invocationMatchesCallSiteFilter(Specific::InvokeNode invoke, AccessPathToken token) { +bindingset[token] +private predicate invocationMatchesCallSiteFilter( + Specific::InvokeNode invoke, AccessPathTokenBase token +) { token.getName() = "WithArity" and - invoke.getNumArgument() = AccessPath::parseIntUnbounded(token.getAnArgument()) + invoke.getNumArgument() = parseIntUnbounded(token.getAnArgument()) or Specific::invocationMatchesExtraCallSiteFilter(invoke, token) } diff --git a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll index d0a5d1b9da5..6b20cf2398e 100644 --- a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll +++ b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModelsSpecific.qll @@ -4,14 +4,14 @@ * It must export the following members: * ```ql * class Unit // a unit type - * module AccessPathSyntax // a re-export of the AccessPathSyntax module + * * class InvokeNode // a type representing an invocation connected to the API graph * module API // the API graph module * predicate isPackageUsed(string package) * API::Node getExtraNodeFromPath(string package, string type, string path, int n) - * API::Node getExtraSuccessorFromNode(API::Node node, AccessPathToken token) - * API::Node getExtraSuccessorFromInvoke(API::InvokeNode node, AccessPathToken token) - * predicate invocationMatchesExtraCallSiteFilter(API::InvokeNode invoke, AccessPathToken token) + * API::Node getExtraSuccessorFromNode(API::Node node, AccessPathTokenBase token) + * API::Node getExtraSuccessorFromInvoke(API::InvokeNode node, AccessPathTokenBase token) + * predicate invocationMatchesExtraCallSiteFilter(API::InvokeNode invoke, AccessPathTokenBase token) * InvokeNode getAnInvocationOf(API::Node node) * predicate isExtraValidTokenNameInIdentifyingAccessPath(string name) * predicate isExtraValidNoArgumentTokenInIdentifyingAccessPath(string name) @@ -21,11 +21,10 @@ private import python as PY private import ApiGraphModels +private import codeql.dataflow.internal.AccessPathSyntax import semmle.python.ApiGraphs::API as API // Re-export libraries needed by ApiGraphModels.qll -import semmle.python.dataflow.new.internal.AccessPathSyntax as AccessPathSyntax import semmle.python.dataflow.new.DataFlow::DataFlow as DataFlow -private import AccessPathSyntax /** * Holds if models describing `type` may be relevant for the analysis of this database. @@ -49,7 +48,7 @@ API::Node getExtraNodeFromType(string type) { result = API::moduleImport(type) } * Gets a Python-specific API graph successor of `node` reachable by resolving `token`. */ bindingset[token] -API::Node getExtraSuccessorFromNode(API::Node node, AccessPathToken token) { +API::Node getExtraSuccessorFromNode(API::Node node, AccessPathTokenBase token) { token.getName() = "Member" and result = node.getMember(token.getAnArgument()) or @@ -89,7 +88,7 @@ API::Node getExtraSuccessorFromNode(API::Node node, AccessPathToken token) { * Gets a Python-specific API graph successor of `node` reachable by resolving `token`. */ bindingset[token] -API::Node getExtraSuccessorFromInvoke(API::CallNode node, AccessPathToken token) { +API::Node getExtraSuccessorFromInvoke(API::CallNode node, AccessPathTokenBase token) { token.getName() = "Instance" and result = node.getReturn() or @@ -129,7 +128,7 @@ API::Node getAFuzzySuccessor(API::Node node) { * Holds if `invoke` matches the PY-specific call site filter in `token`. */ bindingset[token] -predicate invocationMatchesExtraCallSiteFilter(API::CallNode invoke, AccessPathToken token) { +predicate invocationMatchesExtraCallSiteFilter(API::CallNode invoke, AccessPathTokenBase token) { token.getName() = "Call" and exists(invoke) // there is only one kind of call in Python. } diff --git a/python/ql/test/experimental/dataflow/summaries-checks/invalid-spec.ql b/python/ql/test/experimental/dataflow/summaries-checks/invalid-spec.ql index 74863d2fde3..19af6c8e744 100644 --- a/python/ql/test/experimental/dataflow/summaries-checks/invalid-spec.ql +++ b/python/ql/test/experimental/dataflow/summaries-checks/invalid-spec.ql @@ -3,6 +3,6 @@ import semmle.python.dataflow.new.FlowSummary import semmle.python.dataflow.new.internal.FlowSummaryImpl query predicate invalidSpecComponent(SummarizedCallable sc, string s, string c) { - (sc.propagatesFlowExt(s, _, _) or sc.propagatesFlowExt(_, s, _)) and + (sc.propagatesFlow(s, _, _) or sc.propagatesFlow(_, s, _)) and Private::External::invalidSpecComponent(s, c) } diff --git a/python/ql/test/experimental/dataflow/summaries-checks/missing-attribute-content.ql b/python/ql/test/experimental/dataflow/summaries-checks/missing-attribute-content.ql index 08287efa52c..eb5133318c6 100644 --- a/python/ql/test/experimental/dataflow/summaries-checks/missing-attribute-content.ql +++ b/python/ql/test/experimental/dataflow/summaries-checks/missing-attribute-content.ql @@ -4,7 +4,7 @@ import semmle.python.dataflow.new.internal.FlowSummaryImpl from SummarizedCallable sc, string s, string c, string attr where - (sc.propagatesFlowExt(s, _, _) or sc.propagatesFlowExt(_, s, _)) and + (sc.propagatesFlow(s, _, _) or sc.propagatesFlow(_, s, _)) and Private::External::invalidSpecComponent(s, c) and c = "Attribute[" + attr + "]" select "The attribute \"" + attr + diff --git a/python/ql/test/experimental/dataflow/summaries/TestSummaries.qll b/python/ql/test/experimental/dataflow/summaries/TestSummaries.qll index cdd61420bbb..acd9c2136c7 100644 --- a/python/ql/test/experimental/dataflow/summaries/TestSummaries.qll +++ b/python/ql/test/experimental/dataflow/summaries/TestSummaries.qll @@ -18,6 +18,10 @@ module RecursionGuard { (TT::callStep(_, _) implies any()) } + override predicate propagatesFlow(string input, string output, boolean preservesValue) { + none() + } + override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } } } @@ -31,7 +35,7 @@ private class SummarizedCallableIdentity extends SummarizedCallable { override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[0]" and output = "ReturnValue" and preservesValue = true @@ -48,7 +52,7 @@ private class SummarizedCallableApplyLambda extends SummarizedCallable { override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[1]" and output = "Argument[0].Parameter[0]" and preservesValue = true @@ -68,7 +72,7 @@ private class SummarizedCallableReversed extends SummarizedCallable { override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[0].ListElement" and output = "ReturnValue.ListElement" and preservesValue = true @@ -84,7 +88,7 @@ private class SummarizedCallableMap extends SummarizedCallable { override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[1].ListElement" and output = "Argument[0].Parameter[0]" and preservesValue = true @@ -104,7 +108,7 @@ private class SummarizedCallableAppend extends SummarizedCallable { override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[0]" and output = "ReturnValue" and preservesValue = false @@ -126,7 +130,7 @@ private class SummarizedCallableJsonLoads extends SummarizedCallable { result = API::moduleImport("json").getMember("loads").getAValueReachableFromSource() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[0]" and output = "ReturnValue.ListElement" and preservesValue = true diff --git a/python/ql/test/experimental/dataflow/summaries/summaries.ql b/python/ql/test/experimental/dataflow/summaries/summaries.ql index d3c0206d41f..e2a61cd6f46 100644 --- a/python/ql/test/experimental/dataflow/summaries/summaries.ql +++ b/python/ql/test/experimental/dataflow/summaries/summaries.ql @@ -12,7 +12,7 @@ import experimental.dataflow.testTaintConfig private import TestSummaries query predicate invalidSpecComponent(SummarizedCallable sc, string s, string c) { - (sc.propagatesFlowExt(s, _, _) or sc.propagatesFlowExt(_, s, _)) and + (sc.propagatesFlow(s, _, _) or sc.propagatesFlow(_, s, _)) and Private::External::invalidSpecComponent(s, c) } diff --git a/python/ql/test/experimental/dataflow/typetracking-summaries/TestSummaries.qll b/python/ql/test/experimental/dataflow/typetracking-summaries/TestSummaries.qll index 8d626b332a3..fa98b6f84a0 100644 --- a/python/ql/test/experimental/dataflow/typetracking-summaries/TestSummaries.qll +++ b/python/ql/test/experimental/dataflow/typetracking-summaries/TestSummaries.qll @@ -18,6 +18,10 @@ module RecursionGuard { (TT::callStep(_, _) implies any()) } + override predicate propagatesFlow(string input, string output, boolean preservesValue) { + none() + } + override DataFlow::CallCfgNode getACallSimple() { none() } override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } @@ -39,7 +43,7 @@ private class SummarizedCallableIdentity extends SummarizedCallable { override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[0]" and output = "ReturnValue" and preservesValue = true @@ -58,7 +62,7 @@ private class SummarizedCallableApplyLambda extends SummarizedCallable { override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[1]" and output = "Argument[0].Parameter[0]" and preservesValue = true @@ -80,7 +84,7 @@ private class SummarizedCallableReversed extends SummarizedCallable { override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[0].ListElement" and output = "ReturnValue.ListElement" and preservesValue = true @@ -98,7 +102,7 @@ private class SummarizedCallableMap extends SummarizedCallable { override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[1].ListElement" and output = "Argument[0].Parameter[0]" and preservesValue = true @@ -120,7 +124,7 @@ private class SummarizedCallableAppend extends SummarizedCallable { override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[0]" and output = "ReturnValue" and preservesValue = false @@ -144,7 +148,7 @@ private class SummarizedCallableJsonLoads extends SummarizedCallable { result = API::moduleImport("json").getMember("loads").getAValueReachableFromSource() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[0]" and output = "ReturnValue.ListElement" and preservesValue = true @@ -163,7 +167,7 @@ private class SummarizedCallableReadSecret extends SummarizedCallable { override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[0].Attribute[secret]" and output = "ReturnValue" and preservesValue = true @@ -181,7 +185,7 @@ private class SummarizedCallableSetSecret extends SummarizedCallable { override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[1]" and output = "Argument[0].Attribute[secret]" and preservesValue = true diff --git a/python/ql/test/library-tests/frameworks/data/test.ql b/python/ql/test/library-tests/frameworks/data/test.ql index 1564c0eb40a..701c74f1246 100644 --- a/python/ql/test/library-tests/frameworks/data/test.ql +++ b/python/ql/test/library-tests/frameworks/data/test.ql @@ -1,5 +1,5 @@ import python -import semmle.python.dataflow.new.internal.AccessPathSyntax as AccessPathSyntax +private import semmle.python.frameworks.data.internal.ApiGraphModels as ApiGraphModels import semmle.python.frameworks.data.ModelsAsData import semmle.python.dataflow.new.TaintTracking import semmle.python.dataflow.new.DataFlow @@ -27,6 +27,6 @@ query predicate isSource(DataFlow::Node node, string kind) { node = ModelOutput::getASourceNode(kind).asSource() } -query predicate syntaxErrors(AccessPathSyntax::AccessPath path) { path.hasSyntaxError() } +query predicate syntaxErrors(ApiGraphModels::AccessPath path) { path.hasSyntaxError() } query predicate warning = ModelOutput::getAWarning/0; diff --git a/python/ql/test/library-tests/frameworks/data/warnings.ql b/python/ql/test/library-tests/frameworks/data/warnings.ql index c6561797164..71487889ca1 100644 --- a/python/ql/test/library-tests/frameworks/data/warnings.ql +++ b/python/ql/test/library-tests/frameworks/data/warnings.ql @@ -1,5 +1,4 @@ import python -import semmle.python.dataflow.new.internal.AccessPathSyntax as AccessPathSyntax import semmle.python.frameworks.data.internal.ApiGraphModels as ApiGraphModels import semmle.python.frameworks.data.ModelsAsData diff --git a/ruby/ql/docs/flow_summaries.md b/ruby/ql/docs/flow_summaries.md index 0bc8c5e190a..5f39e158588 100644 --- a/ruby/ql/docs/flow_summaries.md +++ b/ruby/ql/docs/flow_summaries.md @@ -22,7 +22,7 @@ have no source code, so we include a flow summary for it: private class ChompSummary extends SimpleSummarizedCallable { ChompSummary() { this = "chomp" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self]" and output = "ReturnValue" and preservesValue = false diff --git a/ruby/ql/lib/codeql/ruby/dataflow/FlowSummary.qll b/ruby/ql/lib/codeql/ruby/dataflow/FlowSummary.qll index ae625fd44ee..f29876534ef 100644 --- a/ruby/ql/lib/codeql/ruby/dataflow/FlowSummary.qll +++ b/ruby/ql/lib/codeql/ruby/dataflow/FlowSummary.qll @@ -8,7 +8,6 @@ private import internal.FlowSummaryImpl as Impl private import internal.DataFlowDispatch private import internal.DataFlowImplCommon as DataFlowImplCommon private import internal.DataFlowPrivate -private import internal.FlowSummaryImplSpecific // import all instances below private module Summaries { @@ -16,104 +15,13 @@ private module Summaries { private import codeql.ruby.frameworks.data.ModelsAsData } -class SummaryComponent = Impl::Public::SummaryComponent; +deprecated class SummaryComponent = Impl::Private::SummaryComponent; -/** Provides predicates for constructing summary components. */ -module SummaryComponent { - private import Impl::Public::SummaryComponent as SC +deprecated module SummaryComponent = Impl::Private::SummaryComponent; - predicate parameter = SC::parameter/1; +deprecated class SummaryComponentStack = Impl::Private::SummaryComponentStack; - predicate argument = SC::argument/1; - - predicate content = SC::content/1; - - predicate withoutContent = SC::withoutContent/1; - - predicate withContent = SC::withContent/1; - - class SyntheticGlobal = SC::SyntheticGlobal; - - /** Gets a summary component that represents a receiver. */ - SummaryComponent receiver() { result = argument(any(ParameterPosition pos | pos.isSelf())) } - - /** Gets a summary component that represents a block argument. */ - SummaryComponent block() { result = argument(any(ParameterPosition pos | pos.isBlock())) } - - /** Gets a summary component that represents an element in a collection at an unknown index. */ - SummaryComponent elementUnknown() { - result = SC::content(TSingletonContent(TUnknownElementContent())) - } - - /** Gets a summary component that represents an element in a collection at a known index. */ - SummaryComponent elementKnown(ConstantValue cv) { - result = SC::content(TSingletonContent(DataFlow::Content::getElementContent(cv))) - } - - /** - * Gets a summary component that represents an element in a collection at a specific - * known index `cv`, or an unknown index. - */ - SummaryComponent elementKnownOrUnknown(ConstantValue cv) { - result = SC::content(TKnownOrUnknownElementContent(TKnownElementContent(cv))) - or - not exists(TKnownElementContent(cv)) and - result = elementUnknown() - } - - /** - * Gets a summary component that represents an element in a collection at either an unknown - * index or known index. This has the same semantics as - * - * ```ql - * elementKnown() or elementUnknown(_) - * ``` - * - * but is more efficient, because it is represented by a single value. - */ - SummaryComponent elementAny() { result = SC::content(TAnyElementContent()) } - - /** - * Gets a summary component that represents an element in a collection at known - * integer index `lower` or above. - */ - SummaryComponent elementLowerBound(int lower) { - result = SC::content(TElementLowerBoundContent(lower, false)) - } - - /** - * Gets a summary component that represents an element in a collection at known - * integer index `lower` or above, or possibly at an unknown index. - */ - SummaryComponent elementLowerBoundOrUnknown(int lower) { - result = SC::content(TElementLowerBoundContent(lower, true)) - } - - /** Gets a summary component that represents the return value of a call. */ - SummaryComponent return() { result = SC::return(any(NormalReturnKind rk)) } -} - -class SummaryComponentStack = Impl::Public::SummaryComponentStack; - -/** Provides predicates for constructing stacks of summary components. */ -module SummaryComponentStack { - private import Impl::Public::SummaryComponentStack as SCS - - predicate singleton = SCS::singleton/1; - - predicate push = SCS::push/2; - - predicate argument = SCS::argument/1; - - /** Gets a singleton stack representing a receiver. */ - SummaryComponentStack receiver() { result = singleton(SummaryComponent::receiver()) } - - /** Gets a singleton stack representing a block argument. */ - SummaryComponentStack block() { result = singleton(SummaryComponent::block()) } - - /** Gets a singleton stack representing the return value of a call. */ - SummaryComponentStack return() { result = singleton(SummaryComponent::return()) } -} +deprecated module SummaryComponentStack = Impl::Private::SummaryComponentStack; /** A callable with a flow summary, identified by a unique string. */ abstract class SummarizedCallable extends LibraryCallable, Impl::Public::SummarizedCallable { @@ -121,18 +29,11 @@ abstract class SummarizedCallable extends LibraryCallable, Impl::Public::Summari SummarizedCallable() { any() } /** - * Same as - * - * ```ql - * propagatesFlow( - * SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue - * ) - * ``` - * - * but uses an external (string) representation of the input and output stacks. + * DEPRECATED: Use `propagatesFlow` instead. */ - pragma[nomagic] - predicate propagatesFlowExt(string input, string output, boolean preservesValue) { none() } + deprecated predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + this.propagatesFlow(input, output, preservesValue) + } /** * Gets the synthesized parameter that results from an input specification @@ -141,7 +42,7 @@ abstract class SummarizedCallable extends LibraryCallable, Impl::Public::Summari DataFlow::ParameterNode getParameter(string s) { exists(ParameterPosition pos | DataFlowImplCommon::parameterNode(result, TLibraryCallable(this), pos) and - s = getParameterPosition(pos) + s = Impl::Input::encodeParameterPosition(pos) ) } } @@ -159,7 +60,7 @@ abstract class SimpleSummarizedCallable extends SummarizedCallable { final override MethodCall getACallSimple() { result = mc } } -class RequiredSummaryComponentStack = Impl::Public::RequiredSummaryComponentStack; +deprecated class RequiredSummaryComponentStack = Impl::Private::RequiredSummaryComponentStack; /** * Provides a set of special flow summaries to ensure that callbacks passed into @@ -199,7 +100,7 @@ private module LibraryCallbackSummaries { libraryCallHasLambdaArg(result.getAControlFlowNode(), _) } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( input = "Argument[block]" and output = "Argument[block].Parameter[lambda-self]" diff --git a/ruby/ql/lib/codeql/ruby/dataflow/internal/AccessPathSyntax.qll b/ruby/ql/lib/codeql/ruby/dataflow/internal/AccessPathSyntax.qll deleted file mode 100644 index 0c3dc8427b2..00000000000 --- a/ruby/ql/lib/codeql/ruby/dataflow/internal/AccessPathSyntax.qll +++ /dev/null @@ -1,182 +0,0 @@ -/** - * Module for parsing access paths from MaD models, both the identifying access path used - * by dynamic languages, and the input/output specifications for summary steps. - * - * This file is used by the shared data flow library and by the JavaScript libraries - * (which does not use the shared data flow libraries). - */ - -/** - * Convenience-predicate for extracting two capture groups at once. - */ -bindingset[input, regexp] -private predicate regexpCaptureTwo(string input, string regexp, string capture1, string capture2) { - capture1 = input.regexpCapture(regexp, 1) and - capture2 = input.regexpCapture(regexp, 2) -} - -/** Companion module to the `AccessPath` class. */ -module AccessPath { - /** A string that should be parsed as an access path. */ - abstract class Range extends string { - bindingset[this] - Range() { any() } - } - - /** - * Parses an integer constant `n` or interval `n1..n2` (inclusive) and gets the value - * of the constant or any value contained in the interval. - */ - bindingset[arg] - int parseInt(string arg) { - result = arg.toInt() - or - // Match "n1..n2" - exists(string lo, string hi | - regexpCaptureTwo(arg, "(-?\\d+)\\.\\.(-?\\d+)", lo, hi) and - result = [lo.toInt() .. hi.toInt()] - ) - } - - /** - * Parses a lower-bounded interval `n..` and gets the lower bound. - */ - bindingset[arg] - int parseLowerBound(string arg) { result = arg.regexpCapture("(-?\\d+)\\.\\.", 1).toInt() } - - /** - * Parses an integer constant or interval (bounded or unbounded) that explicitly - * references the arity, such as `N-1` or `N-3..N-1`. - * - * Note that expressions of form `N-x` will never resolve to a negative index, - * even if `N` is zero (it will have no result in that case). - */ - bindingset[arg, arity] - private int parseIntWithExplicitArity(string arg, int arity) { - result >= 0 and // do not allow N-1 to resolve to a negative index - exists(string lo | - // N-x - lo = arg.regexpCapture("N-(\\d+)", 1) and - result = arity - lo.toInt() - or - // N-x.. - lo = arg.regexpCapture("N-(\\d+)\\.\\.", 1) and - result = [arity - lo.toInt(), arity - 1] - ) - or - exists(string lo, string hi | - // x..N-y - regexpCaptureTwo(arg, "(-?\\d+)\\.\\.N-(\\d+)", lo, hi) and - result = [lo.toInt() .. arity - hi.toInt()] - or - // N-x..N-y - regexpCaptureTwo(arg, "N-(\\d+)\\.\\.N-(\\d+)", lo, hi) and - result = [arity - lo.toInt() .. arity - hi.toInt()] and - result >= 0 - or - // N-x..y - regexpCaptureTwo(arg, "N-(\\d+)\\.\\.(\\d+)", lo, hi) and - result = [arity - lo.toInt() .. hi.toInt()] and - result >= 0 - ) - } - - /** - * Parses an integer constant or interval (bounded or unbounded) and gets any - * of the integers contained within (of which there may be infinitely many). - * - * Has no result for arguments involving an explicit arity, such as `N-1`. - */ - bindingset[arg, result] - int parseIntUnbounded(string arg) { - result = parseInt(arg) - or - result >= parseLowerBound(arg) - } - - /** - * Parses an integer constant or interval (bounded or unbounded) that - * may reference the arity of a call, such as `N-1` or `N-3..N-1`. - * - * Note that expressions of form `N-x` will never resolve to a negative index, - * even if `N` is zero (it will have no result in that case). - */ - bindingset[arg, arity] - int parseIntWithArity(string arg, int arity) { - result = parseInt(arg) - or - result in [parseLowerBound(arg) .. arity - 1] - or - result = parseIntWithExplicitArity(arg, arity) - } -} - -/** Gets the `n`th token on the access path as a string. */ -private string getRawToken(AccessPath path, int n) { - // Avoid splitting by '.' since tokens may contain dots, e.g. `Field[foo.Bar.x]`. - // Instead use regexpFind to match valid tokens, and supplement with a final length - // check (in `AccessPath.hasSyntaxError`) to ensure all characters were included in a token. - result = path.regexpFind("\\w+(?:\\[[^\\]]*\\])?(?=\\.|$)", n, _) -} - -/** - * A string that occurs as an access path (either identifying or input/output spec) - * which might be relevant for this database. - */ -class AccessPath extends string instanceof AccessPath::Range { - /** Holds if this string is not a syntactically valid access path. */ - predicate hasSyntaxError() { - // If the lengths match, all characters must haven been included in a token - // or seen by the `.` lookahead pattern. - this != "" and - not this.length() = sum(int n | | getRawToken(this, n).length() + 1) - 1 - } - - /** Gets the `n`th token on the access path (if there are no syntax errors). */ - AccessPathToken getToken(int n) { - result = getRawToken(this, n) and - not this.hasSyntaxError() - } - - /** Gets the number of tokens on the path (if there are no syntax errors). */ - int getNumToken() { - result = count(int n | exists(getRawToken(this, n))) and - not this.hasSyntaxError() - } -} - -/** - * An access part token such as `Argument[1]` or `ReturnValue`, appearing in one or more access paths. - */ -class AccessPathToken extends string { - AccessPathToken() { this = getRawToken(_, _) } - - private string getPart(int part) { - result = this.regexpCapture("([^\\[]+)(?:\\[([^\\]]*)\\])?", part) - } - - /** Gets the name of the token, such as `Member` from `Member[x]` */ - string getName() { result = this.getPart(1) } - - /** - * Gets the argument list, such as `1,2` from `Member[1,2]`, - * or has no result if there are no arguments. - */ - string getArgumentList() { result = this.getPart(2) } - - /** Gets the `n`th argument to this token, such as `x` or `y` from `Member[x,y]`. */ - string getArgument(int n) { result = this.getArgumentList().splitAt(",", n).trim() } - - /** Gets the `n`th argument to this `name` token, such as `x` or `y` from `Member[x,y]`. */ - pragma[nomagic] - string getArgument(string name, int n) { name = this.getName() and result = this.getArgument(n) } - - /** Gets an argument to this token, such as `x` or `y` from `Member[x,y]`. */ - string getAnArgument() { result = this.getArgument(_) } - - /** Gets an argument to this `name` token, such as `x` or `y` from `Member[x,y]`. */ - string getAnArgument(string name) { result = this.getArgument(name, _) } - - /** Gets the number of arguments to this token, such as 2 for `Member[x,y]` or zero for `ReturnValue`. */ - int getNumArgument() { result = count(int n | exists(this.getArgument(n))) } -} diff --git a/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowDispatch.qll b/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowDispatch.qll index 7bec16f2c37..b898fb3be06 100644 --- a/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowDispatch.qll +++ b/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowDispatch.qll @@ -4,7 +4,6 @@ private import DataFlowPrivate private import codeql.ruby.typetracking.internal.TypeTrackingImpl private import codeql.ruby.ast.internal.Module private import FlowSummaryImpl as FlowSummaryImpl -private import FlowSummaryImplSpecific as FlowSummaryImplSpecific private import codeql.ruby.dataflow.FlowSummary private import codeql.ruby.dataflow.SSA private import codeql.util.Boolean @@ -426,14 +425,14 @@ private module Cached { TPositionalArgumentPosition(int pos) { exists(Call c | exists(c.getArgument(pos))) or - FlowSummaryImplSpecific::ParsePositions::isParsedParameterPosition(_, pos) + FlowSummaryImpl::ParsePositions::isParsedParameterPosition(_, pos) } or TKeywordArgumentPosition(string name) { name = any(KeywordParameter kp).getName() or exists(any(Call c).getKeywordArgument(name)) or - FlowSummaryImplSpecific::ParsePositions::isParsedKeywordParameterPosition(_, name) + FlowSummaryImpl::ParsePositions::isParsedKeywordParameterPosition(_, name) } or THashSplatArgumentPosition() or TSynthHashSplatArgumentPosition() or @@ -450,15 +449,17 @@ private module Cached { TPositionalParameterPosition(int pos) { pos = any(Parameter p).getPosition() or - FlowSummaryImplSpecific::ParsePositions::isParsedArgumentPosition(_, pos) + FlowSummaryImpl::ParsePositions::isParsedArgumentPosition(_, pos) } or TPositionalParameterLowerBoundPosition(int pos) { - FlowSummaryImplSpecific::ParsePositions::isParsedArgumentLowerBoundPosition(_, pos) + FlowSummaryImpl::ParsePositions::isParsedArgumentLowerBoundPosition(_, pos) } or TKeywordParameterPosition(string name) { name = any(KeywordParameter kp).getName() or - FlowSummaryImplSpecific::ParsePositions::isParsedKeywordArgumentPosition(_, name) + exists(any(Call c).getKeywordArgument(name)) + or + FlowSummaryImpl::ParsePositions::isParsedKeywordArgumentPosition(_, name) } or THashSplatParameterPosition() or TSynthHashSplatParameterPosition() or diff --git a/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPrivate.qll b/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPrivate.qll index e5c2e7886c9..2d09834e623 100644 --- a/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPrivate.qll +++ b/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPrivate.qll @@ -8,7 +8,6 @@ private import DataFlowPublic private import DataFlowDispatch private import SsaImpl as SsaImpl private import FlowSummaryImpl as FlowSummaryImpl -private import FlowSummaryImplSpecific as FlowSummaryImplSpecific private import codeql.ruby.frameworks.data.ModelsAsData /** Gets the callable in which this node occurs. */ @@ -629,8 +628,7 @@ private module Cached { TAnyElementContent() or TKnownOrUnknownElementContent(Content::KnownElementContent c) or TElementLowerBoundContent(int lower, boolean includeUnknown) { - FlowSummaryImplSpecific::ParsePositions::isParsedElementLowerBoundPosition(_, includeUnknown, - lower) + FlowSummaryImpl::ParsePositions::isParsedElementLowerBoundPosition(_, includeUnknown, lower) } or TElementContentOfTypeContent(string type, Boolean includeUnknown) { type = any(Content::KnownElementContent content).getIndex().getValueType() @@ -700,6 +698,21 @@ private module Cached { THashSplatContentApprox(string approx) { approx = approxKnownElementIndex(_) } or TNonElementContentApprox(Content c) { not c instanceof Content::ElementContent } or TCapturedVariableContentApprox(VariableCapture::CapturedVariable v) + + cached + newtype TDataFlowType = + TLambdaDataFlowType(Callable c) { c = any(LambdaSelfReferenceNode n).getCallable() } or + // In order to reduce the set of cons-candidates, we annotate all implicit (hash) splat + // creations with the name of the method that they are passed into. This includes + // array/hash literals as well (where the name is simply `[]`), because of how they + // are modeled (see `Array.qll` and `Hash.qll`). + TSynthHashSplatArgumentType(string methodName) { + methodName = any(SynthHashSplatArgumentNode n).getMethodName() + } or + TSynthSplatArgumentType(string methodName) { + methodName = any(SynthSplatArgumentNode n).getMethodName() + } or + TUnknownDataFlowType() } class TElementContent = @@ -1254,11 +1267,11 @@ module ArgumentNodes { } private class SummaryArgumentNode extends FlowSummaryNode, ArgumentNode { - private DataFlowCall call_; + private SummaryCall call_; private ArgumentPosition pos_; SummaryArgumentNode() { - FlowSummaryImpl::Private::summaryArgumentNode(call_, this.getSummaryNode(), pos_) + FlowSummaryImpl::Private::summaryArgumentNode(call_.getReceiver(), this.getSummaryNode(), pos_) } override predicate sourceArgumentOf(CfgNodes::ExprNodes::CallCfgNode call, ArgumentPosition pos) { @@ -1641,11 +1654,11 @@ private module OutNodes { } private class SummaryOutNode extends FlowSummaryNode, OutNode { - private DataFlowCall call; + private SummaryCall call; private ReturnKind kind_; SummaryOutNode() { - FlowSummaryImpl::Private::summaryOutNode(call, this.getSummaryNode(), kind_) + FlowSummaryImpl::Private::summaryOutNode(call.getReceiver(), this.getSummaryNode(), kind_) } override DataFlowCall getCall(ReturnKind kind) { result = call and kind = kind_ } @@ -1803,20 +1816,6 @@ predicate expectsContent(Node n, ContentSet c) { FlowSummaryImpl::Private::Steps::summaryExpectsContent(n.(FlowSummaryNode).getSummaryNode(), c) } -private newtype TDataFlowType = - TLambdaDataFlowType(Callable c) { c = any(LambdaSelfReferenceNode n).getCallable() } or - // In order to reduce the set of cons-candidates, we annotate all implicit (hash) splat - // creations with the name of the method that they are passed into. This includes - // array/hash literals as well (where the name is simply `[]`), because of how they - // are modeled (see `Array.qll` and `Hash.qll`). - TSynthHashSplatArgumentType(string methodName) { - methodName = any(SynthHashSplatArgumentNode n).getMethodName() - } or - TSynthSplatArgumentType(string methodName) { - methodName = any(SynthSplatArgumentNode n).getMethodName() - } or - TUnknownDataFlowType() - class DataFlowType extends TDataFlowType { string toString() { result = "" } } @@ -2043,7 +2042,10 @@ predicate additionalLambdaFlowStep(Node nodeFrom, Node nodeTo, boolean preserves * by default as a heuristic. */ predicate allowParameterReturnInSelf(ParameterNodeImpl p) { - FlowSummaryImpl::Private::summaryAllowParameterReturnInSelf(p) + exists(DataFlowCallable c, ParameterPosition pos | + p.isParameterOf(c, pos) and + FlowSummaryImpl::Private::summaryAllowParameterReturnInSelf(c.asLibraryCallable(), pos) + ) or VariableCapture::Flow::heuristicAllowInstanceParameterReturnInSelf(p.(SelfParameterNode) .getCallable()) diff --git a/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPublic.qll b/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPublic.qll index 075375990bf..68573d37c15 100644 --- a/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPublic.qll +++ b/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPublic.qll @@ -212,12 +212,14 @@ class ExprNode extends Node, TExprNode { * The value of a parameter at function entry, viewed as a node in a data * flow graph. */ -class ParameterNode extends LocalSourceNode instanceof ParameterNodeImpl { +class ParameterNode extends LocalSourceNode { + ParameterNode() { exists(getParameterPosition(this, _)) } + /** Gets the parameter corresponding to this node, if any. */ - final Parameter getParameter() { result = super.getParameter() } + final Parameter getParameter() { result = getParameter(this) } /** Gets the callable that this parameter belongs to. */ - final Callable getCallable() { result = super.getCfgScope() } + final Callable getCallable() { result = getCfgScope(this) } /** Gets the name of the parameter, if any. */ final string getName() { result = this.getParameter().(NamedParameter).getName() } @@ -348,9 +350,13 @@ class LocalSourceNode extends Node { * Nodes corresponding to AST elements, for example `ExprNode`, usually refer * to the value before the update. */ -class PostUpdateNode extends Node instanceof PostUpdateNodeImpl { +class PostUpdateNode extends Node { + private Node pre; + + PostUpdateNode() { pre = getPreUpdateNode(this) } + /** Gets the node before the state update. */ - Node getPreUpdateNode() { result = super.getPreUpdateNode() } + Node getPreUpdateNode() { result = pre } } /** An SSA definition, viewed as a node in a data flow graph. */ @@ -383,6 +389,28 @@ private module Cached { ) } + cached + CfgScope getCfgScope(NodeImpl node) { result = node.getCfgScope() } + + cached + ReturnNode getAReturnNode(Callable callable) { getCfgScope(result) = callable } + + cached + Parameter getParameter(ParameterNodeImpl param) { result = param.getParameter() } + + cached + ParameterPosition getParameterPosition(ParameterNodeImpl param, DataFlowCallable c) { + param.isParameterOf(c, result) + } + + cached + ParameterPosition getSourceParameterPosition(ParameterNodeImpl param, Callable c) { + param.isSourceParameterOf(c, result) + } + + cached + Node getPreUpdateNode(PostUpdateNodeImpl node) { result = node.getPreUpdateNode() } + cached predicate methodHasSuperCall(MethodNode method, CallNode call) { call.isSuperCall() and method = call.getEnclosingMethod() @@ -1271,7 +1299,7 @@ class CallableNode extends StmtSequenceNode { Callable asCallableAstNode() { result = callable } private ParameterPosition getParameterPosition(ParameterNodeImpl node) { - node.isSourceParameterOf(callable, result) + result = getSourceParameterPosition(node, callable) } /** Gets the `n`th positional parameter. */ @@ -1311,7 +1339,7 @@ class CallableNode extends StmtSequenceNode { /** * Gets a data flow node whose value is about to be returned by this callable. */ - Node getAReturnNode() { result.(ReturnNode).(NodeImpl).getCfgScope() = callable } + Node getAReturnNode() { result = getAReturnNode(callable) } /** * DEPRECATED. Use `getAReturnNode` instead. diff --git a/ruby/ql/lib/codeql/ruby/dataflow/internal/FlowSummaryImpl.qll b/ruby/ql/lib/codeql/ruby/dataflow/internal/FlowSummaryImpl.qll index 0aa17c521b4..59f8e541b3d 100644 --- a/ruby/ql/lib/codeql/ruby/dataflow/internal/FlowSummaryImpl.qll +++ b/ruby/ql/lib/codeql/ruby/dataflow/internal/FlowSummaryImpl.qll @@ -1,1491 +1,318 @@ /** * Provides classes and predicates for defining flow summaries. - * - * The definitions in this file are language-independent, and language-specific - * definitions are passed in via the `DataFlowImplSpecific` and - * `FlowSummaryImplSpecific` modules. */ -private import FlowSummaryImplSpecific +private import codeql.dataflow.internal.FlowSummaryImpl +private import codeql.dataflow.internal.AccessPathSyntax as AccessPath +private import codeql.ruby.AST +private import codeql.ruby.dataflow.internal.DataFlowImplSpecific as DataFlowImplSpecific private import DataFlowImplSpecific::Private private import DataFlowImplSpecific::Public -private import DataFlowImplCommon -private import codeql.util.Unit -/** Provides classes and predicates for defining flow summaries. */ -module Public { +module Input implements InputSig { + class SummarizedCallableBase = string; + + ArgumentPosition callbackSelfParameterPosition() { result.isLambdaSelf() } + + ReturnKind getStandardReturnValueKind() { result instanceof NormalReturnKind } + + string encodeParameterPosition(ParameterPosition pos) { + exists(int i | + pos.isPositional(i) and + result = i.toString() + ) + or + exists(int i | + pos.isPositionalLowerBound(i) and + result = i + ".." + ) + or + exists(string name | + pos.isKeyword(name) and + result = name + ":" + ) + or + pos.isSelf() and + result = "self" + or + pos.isLambdaSelf() and + result = "lambda-self" + or + pos.isBlock() and + result = "block" + or + pos.isAny() and + result = "any" + or + pos.isAnyNamed() and + result = "any-named" + or + pos.isHashSplat() and + result = "hash-splat" + or + pos.isSplat(0) and + result = "splat" + } + + string encodeArgumentPosition(ArgumentPosition pos) { + pos.isSelf() and result = "self" + or + pos.isLambdaSelf() and result = "lambda-self" + or + pos.isBlock() and result = "block" + or + exists(int i | + pos.isPositional(i) and + result = i.toString() + ) + or + exists(string name | + pos.isKeyword(name) and + result = name + ":" + ) + or + pos.isAny() and + result = "any" + or + pos.isAnyNamed() and + result = "any-named" + } + + string encodeContent(ContentSet cs, string arg) { + exists(Content c | cs = TSingletonContent(c) | + c = TFieldContent(arg) and result = "Field" + or + exists(ConstantValue cv | + c = TKnownElementContent(cv) and + result = "Element" and + arg = cv.serialize() + "!" + ) + or + c = TUnknownElementContent() and result = "Element" and arg = "?" + ) + or + cs = TAnyElementContent() and result = "Element" and arg = "any" + or + exists(Content::KnownElementContent kec | + cs = TKnownOrUnknownElementContent(kec) and + result = "Element" and + arg = kec.getIndex().serialize() + ) + or + exists(int lower, boolean includeUnknown, string unknown | + cs = TElementLowerBoundContent(lower, includeUnknown) and + (if includeUnknown = true then unknown = "" else unknown = "!") and + result = "Element" and + arg = lower.toString() + ".." + unknown + ) + } + + string encodeReturn(ReturnKind rk, string arg) { + not rk = Input::getStandardReturnValueKind() and + result = "ReturnValue" and + arg = rk.toString() + } + + string encodeWithoutContent(ContentSet c, string arg) { + result = "Without" + encodeContent(c, arg) + } + + string encodeWithContent(ContentSet c, string arg) { result = "With" + encodeContent(c, arg) } + + bindingset[token] + ParameterPosition decodeUnknownParameterPosition(AccessPath::AccessPathTokenBase token) { + // needed to support `Argument[x..y]` ranges + token.getName() = "Argument" and + result.isPositional(AccessPath::parseInt(token.getAnArgument())) + } + + bindingset[token] + ArgumentPosition decodeUnknownArgumentPosition(AccessPath::AccessPathTokenBase token) { + // needed to support `Parameter[x..y]` ranges + token.getName() = "Parameter" and + result.isPositional(AccessPath::parseInt(token.getAnArgument())) + } + + bindingset[token] + ContentSet decodeUnknownContent(AccessPath::AccessPathTokenBase token) { + token.getName() = "Element" and + result = TSingletonContent(TUnknownElementContent()) + } + + bindingset[token] + ContentSet decodeUnknownWithContent(AccessPath::AccessPathTokenBase token) { + token.getName() = "WithElement" and + result = TAnyElementContent() + } +} + +private import Make as Impl + +private module StepsInput implements Impl::Private::StepsInputSig { + DataFlowCall getACall(Public::SummarizedCallable sc) { + result.asCall().getAstNode() = sc.(LibraryCallable).getACall() + or + result.asCall().getAstNode() = sc.(LibraryCallable).getACallSimple() + } +} + +module Private { + import Impl::Private + + module Steps = Impl::Private::Steps; + + /** + * Provides predicates for constructing summary components. + */ + module SummaryComponent { + private import Impl::Private::SummaryComponent as SC + + predicate parameter = SC::parameter/1; + + predicate argument = SC::argument/1; + + predicate content = SC::content/1; + + predicate withoutContent = SC::withoutContent/1; + + predicate withContent = SC::withContent/1; + + /** Gets a summary component that represents a receiver. */ + SummaryComponent receiver() { result = argument(any(ParameterPosition pos | pos.isSelf())) } + + /** Gets a summary component that represents a block argument. */ + SummaryComponent block() { result = argument(any(ParameterPosition pos | pos.isBlock())) } + + /** Gets a summary component that represents an element in a collection at an unknown index. */ + SummaryComponent elementUnknown() { + result = SC::content(TSingletonContent(TUnknownElementContent())) + } + + /** Gets a summary component that represents an element in a collection at a known index. */ + SummaryComponent elementKnown(ConstantValue cv) { + result = SC::content(TSingletonContent(Content::getElementContent(cv))) + } + + /** + * Gets a summary component that represents an element in a collection at a specific + * known index `cv`, or an unknown index. + */ + SummaryComponent elementKnownOrUnknown(ConstantValue cv) { + result = SC::content(TKnownOrUnknownElementContent(TKnownElementContent(cv))) + or + not exists(TKnownElementContent(cv)) and + result = elementUnknown() + } + + /** + * Gets a summary component that represents an element in a collection at either an unknown + * index or known index. This has the same semantics as + * + * ```ql + * elementKnown() or elementUnknown(_) + * ``` + * + * but is more efficient, because it is represented by a single value. + */ + SummaryComponent elementAny() { result = SC::content(TAnyElementContent()) } + + /** + * Gets a summary component that represents an element in a collection at known + * integer index `lower` or above. + */ + SummaryComponent elementLowerBound(int lower) { + result = SC::content(TElementLowerBoundContent(lower, false)) + } + + /** + * Gets a summary component that represents an element in a collection at known + * integer index `lower` or above, or possibly at an unknown index. + */ + SummaryComponent elementLowerBoundOrUnknown(int lower) { + result = SC::content(TElementLowerBoundContent(lower, true)) + } + + /** Gets a summary component that represents the return value of a call. */ + SummaryComponent return() { result = SC::return(any(NormalReturnKind rk)) } + } + + /** + * Provides predicates for constructing stacks of summary components. + */ + module SummaryComponentStack { + private import Impl::Private::SummaryComponentStack as SCS + + predicate singleton = SCS::singleton/1; + + predicate push = SCS::push/2; + + predicate argument = SCS::argument/1; + + /** Gets a singleton stack representing a receiver. */ + SummaryComponentStack receiver() { result = singleton(SummaryComponent::receiver()) } + + /** Gets a singleton stack representing a block argument. */ + SummaryComponentStack block() { result = singleton(SummaryComponent::block()) } + + /** Gets a singleton stack representing the return value of a call. */ + SummaryComponentStack return() { result = singleton(SummaryComponent::return()) } + } +} + +module Public = Impl::Public; + +module ParsePositions { private import Private - /** - * A component used in a flow summary. - * - * Either a parameter or an argument at a given position, a specific - * content type, or a return kind. - */ - class SummaryComponent extends TSummaryComponent { - /** Gets a textual representation of this component used for MaD models. */ - string getMadRepresentation() { - result = getMadRepresentationSpecific(this) - or - exists(ArgumentPosition pos | - this = TParameterSummaryComponent(pos) and - result = "Parameter[" + getArgumentPosition(pos) + "]" - ) - or - exists(ParameterPosition pos | - this = TArgumentSummaryComponent(pos) and - result = "Argument[" + getParameterPosition(pos) + "]" - ) - or - exists(string synthetic | - this = TSyntheticGlobalSummaryComponent(synthetic) and - result = "SyntheticGlobal[" + synthetic + "]" - ) - or - this = TReturnSummaryComponent(getReturnValueKind()) and result = "ReturnValue" - } - - /** Gets a textual representation of this summary component. */ - string toString() { result = this.getMadRepresentation() } + private predicate isParamBody(string body) { + body = any(AccessPathToken tok).getAnArgument("Parameter") } - /** Provides predicates for constructing summary components. */ - module SummaryComponent { - /** Gets a summary component for content `c`. */ - SummaryComponent content(ContentSet c) { result = TContentSummaryComponent(c) } - - /** Gets a summary component where data is not allowed to be stored in `c`. */ - SummaryComponent withoutContent(ContentSet c) { result = TWithoutContentSummaryComponent(c) } - - /** Gets a summary component where data must be stored in `c`. */ - SummaryComponent withContent(ContentSet c) { result = TWithContentSummaryComponent(c) } - - /** Gets a summary component for a parameter at position `pos`. */ - SummaryComponent parameter(ArgumentPosition pos) { result = TParameterSummaryComponent(pos) } - - /** Gets a summary component for an argument at position `pos`. */ - SummaryComponent argument(ParameterPosition pos) { result = TArgumentSummaryComponent(pos) } - - /** Gets a summary component for a return of kind `rk`. */ - SummaryComponent return(ReturnKind rk) { result = TReturnSummaryComponent(rk) } - - /** Gets a summary component for synthetic global `sg`. */ - SummaryComponent syntheticGlobal(SyntheticGlobal sg) { - result = TSyntheticGlobalSummaryComponent(sg) - } - - /** - * A synthetic global. This represents some form of global state, which - * summaries can read and write individually. - */ - abstract class SyntheticGlobal extends string { - bindingset[this] - SyntheticGlobal() { any() } - } + private predicate isArgBody(string body) { + body = any(AccessPathToken tok).getAnArgument("Argument") } - /** - * A (non-empty) stack of summary components. - * - * A stack is used to represent where data is read from (input) or where it - * is written to (output). For example, an input stack `[Field f, Argument 0]` - * means that data is read from field `f` from the `0`th argument, while an - * output stack `[Field g, Return]` means that data is written to the field - * `g` of the returned object. - */ - class SummaryComponentStack extends TSummaryComponentStack { - /** Gets the head of this stack. */ - SummaryComponent head() { - this = TSingletonSummaryComponentStack(result) or - this = TConsSummaryComponentStack(result, _) - } - - /** Gets the tail of this stack, if any. */ - SummaryComponentStack tail() { this = TConsSummaryComponentStack(_, result) } - - /** Gets the length of this stack. */ - int length() { - this = TSingletonSummaryComponentStack(_) and result = 1 - or - result = 1 + this.tail().length() - } - - /** Gets the stack obtained by dropping the first `i` elements, if any. */ - SummaryComponentStack drop(int i) { - i = 0 and result = this - or - result = this.tail().drop(i - 1) - } - - /** Holds if this stack contains summary component `c`. */ - predicate contains(SummaryComponent c) { c = this.drop(_).head() } - - /** Gets the bottom element of this stack. */ - SummaryComponent bottom() { - this = TSingletonSummaryComponentStack(result) or result = this.tail().bottom() - } - - /** Gets a textual representation of this stack used for MaD models. */ - string getMadRepresentation() { - exists(SummaryComponent head, SummaryComponentStack tail | - head = this.head() and - tail = this.tail() and - result = tail.getMadRepresentation() + "." + head.getMadRepresentation() - ) - or - exists(SummaryComponent c | - this = TSingletonSummaryComponentStack(c) and - result = c.getMadRepresentation() - ) - } - - /** Gets a textual representation of this stack. */ - string toString() { result = this.getMadRepresentation() } + private predicate isElementBody(string body) { + body = any(AccessPathToken tok).getAnArgument(["Element", "WithElement", "WithoutElement"]) } - /** Provides predicates for constructing stacks of summary components. */ - module SummaryComponentStack { - /** Gets a singleton stack containing `c`. */ - SummaryComponentStack singleton(SummaryComponent c) { - result = TSingletonSummaryComponentStack(c) - } - - /** - * Gets the stack obtained by pushing `head` onto `tail`. - * - * Make sure to override `RequiredSummaryComponentStack::required()` in order - * to ensure that the constructed stack exists. - */ - SummaryComponentStack push(SummaryComponent head, SummaryComponentStack tail) { - result = TConsSummaryComponentStack(head, tail) - } - - /** Gets a singleton stack for an argument at position `pos`. */ - SummaryComponentStack argument(ParameterPosition pos) { - result = singleton(SummaryComponent::argument(pos)) - } - - /** Gets a singleton stack representing a return of kind `rk`. */ - SummaryComponentStack return(ReturnKind rk) { result = singleton(SummaryComponent::return(rk)) } + predicate isParsedParameterPosition(string c, int i) { + isParamBody(c) and + i = AccessPath::parseInt(c) } - /** - * A class that exists for QL technical reasons only (the IPA type used - * to represent component stacks needs to be bounded). - */ - class RequiredSummaryComponentStack extends Unit { - /** - * Holds if the stack obtained by pushing `head` onto `tail` is required. - */ - abstract predicate required(SummaryComponent head, SummaryComponentStack tail); + predicate isParsedArgumentPosition(string c, int i) { + isArgBody(c) and + i = AccessPath::parseInt(c) } - /** - * Gets the valid model origin values. - */ - private string getValidModelOrigin() { - result = - [ - "ai", // AI (machine learning) - "df", // Dataflow (model generator) - "tb", // Type based (model generator) - "hq", // Heuristic query - ] + predicate isParsedArgumentLowerBoundPosition(string c, int i) { + isArgBody(c) and + i = AccessPath::parseLowerBound(c) } - /** - * A class used to represent provenance values for MaD models. - * - * The provenance value is a string of the form `origin-verification` - * (or just `manual`), where `origin` is a value indicating the - * origin of the model, and `verification` is a value indicating, how - * the model was verified. - * - * Examples could be: - * - `df-generated`: A model produced by the model generator, but not verified by a human. - * - `ai-manual`: A model produced by AI, but verified by a human. - */ - class Provenance extends string { - private string verification; - - Provenance() { - exists(string origin | origin = getValidModelOrigin() | - this = origin + "-" + verification and - verification = ["manual", "generated"] - ) - or - this = verification and verification = "manual" - } - - /** - * Holds if this is a valid generated provenance value. - */ - predicate isGenerated() { verification = "generated" } - - /** - * Holds if this is a valid manual provenance value. - */ - predicate isManual() { verification = "manual" } + predicate isParsedKeywordParameterPosition(string c, string paramName) { + isParamBody(c) and + c = paramName + ":" } - /** A callable with a flow summary. */ - abstract class SummarizedCallable extends SummarizedCallableBase { - bindingset[this] - SummarizedCallable() { any() } - - /** - * Holds if data may flow from `input` to `output` through this callable. - * - * `preservesValue` indicates whether this is a value-preserving step - * or a taint-step. - * - * Input specifications are restricted to stacks that end with - * `SummaryComponent::argument(_)`, preceded by zero or more - * `SummaryComponent::return(_)` or `SummaryComponent::content(_)` components. - * - * Output specifications are restricted to stacks that end with - * `SummaryComponent::return(_)` or `SummaryComponent::argument(_)`. - * - * Output stacks ending with `SummaryComponent::return(_)` can be preceded by zero - * or more `SummaryComponent::content(_)` components. - * - * Output stacks ending with `SummaryComponent::argument(_)` can be preceded by an - * optional `SummaryComponent::parameter(_)` component, which in turn can be preceded - * by zero or more `SummaryComponent::content(_)` components. - */ - pragma[nomagic] - predicate propagatesFlow( - SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue - ) { - none() - } - - /** - * Holds if there exists a generated summary that applies to this callable. - */ - final predicate hasGeneratedModel() { - exists(Provenance p | p.isGenerated() and this.hasProvenance(p)) - } - - /** - * Holds if all the summaries that apply to this callable are auto generated and not manually created. - * That is, only apply generated models, when there are no manual models. - */ - final predicate applyGeneratedModel() { - this.hasGeneratedModel() and - not this.hasManualModel() - } - - /** - * Holds if there exists a manual summary that applies to this callable. - */ - final predicate hasManualModel() { - exists(Provenance p | p.isManual() and this.hasProvenance(p)) - } - - /** - * Holds if there exists a manual summary that applies to this callable. - * Always apply manual models if they exist. - */ - final predicate applyManualModel() { this.hasManualModel() } - - /** - * Holds if there exists a summary that applies to this callable - * that has provenance `provenance`. - */ - predicate hasProvenance(Provenance provenance) { provenance = "manual" } + predicate isParsedKeywordArgumentPosition(string c, string paramName) { + isArgBody(c) and + c = paramName + ":" } - /** - * A callable where there is no flow via the callable. - */ - class NeutralSummaryCallable extends NeutralCallable { - NeutralSummaryCallable() { this.getKind() = "summary" } + bindingset[arg] + private string adjustElementArgument(string arg, boolean includeUnknown) { + result = arg.regexpCapture("(.*)!", 1) and + includeUnknown = false + or + result = arg and + not arg.matches("%!") and + includeUnknown = true } - /** - * A callable that has a neutral model. - */ - class NeutralCallable extends NeutralCallableBase { - private string kind; - private Provenance provenance; - - NeutralCallable() { neutralElement(this, kind, provenance) } - - /** - * Holds if the neutral is auto generated. - */ - final predicate hasGeneratedModel() { provenance.isGenerated() } - - /** - * Holds if there exists a manual neutral that applies to this callable. - */ - final predicate hasManualModel() { provenance.isManual() } - - /** - * Holds if the neutral has provenance `p`. - */ - predicate hasProvenance(Provenance p) { p = provenance } - - /** - * Gets the kind of the neutral. - */ - string getKind() { result = kind } - } -} - -/** - * Provides predicates for compiling flow summaries down to atomic local steps, - * read steps, and store steps. - */ -module Private { - private import Public - import AccessPathSyntax - - newtype TSummaryComponent = - TContentSummaryComponent(ContentSet c) or - TParameterSummaryComponent(ArgumentPosition pos) or - TArgumentSummaryComponent(ParameterPosition pos) or - TReturnSummaryComponent(ReturnKind rk) or - TSyntheticGlobalSummaryComponent(SummaryComponent::SyntheticGlobal sg) or - TWithoutContentSummaryComponent(ContentSet c) or - TWithContentSummaryComponent(ContentSet c) - - private TParameterSummaryComponent callbackSelfParam() { - result = TParameterSummaryComponent(callbackSelfParameterPosition()) - } - - newtype TSummaryComponentStack = - TSingletonSummaryComponentStack(SummaryComponent c) or - TConsSummaryComponentStack(SummaryComponent head, SummaryComponentStack tail) { - any(RequiredSummaryComponentStack x).required(head, tail) - or - any(RequiredSummaryComponentStack x).required(TParameterSummaryComponent(_), tail) and - head = callbackSelfParam() - or - derivedFluentFlowPush(_, _, _, head, tail, _) - } - - pragma[nomagic] - private predicate summary( - SummarizedCallable c, SummaryComponentStack input, SummaryComponentStack output, - boolean preservesValue - ) { - c.propagatesFlow(input, output, preservesValue) - or - // observe side effects of callbacks on input arguments - c.propagatesFlow(output, input, preservesValue) and - preservesValue = true and - isCallbackParameter(input) and - isContentOfArgument(output, _) - or - // flow from the receiver of a callback into the instance-parameter - exists(SummaryComponentStack s, SummaryComponentStack callbackRef | - c.propagatesFlow(s, _, _) or c.propagatesFlow(_, s, _) - | - callbackRef = s.drop(_) and - (isCallbackParameter(callbackRef) or callbackRef.head() = TReturnSummaryComponent(_)) and - input = callbackRef.tail() and - output = TConsSummaryComponentStack(callbackSelfParam(), input) and - preservesValue = true - ) - or - exists(SummaryComponentStack arg, SummaryComponentStack return | - derivedFluentFlow(c, input, arg, return, preservesValue) - | - arg.length() = 1 and - output = return - or - exists(SummaryComponent head, SummaryComponentStack tail | - derivedFluentFlowPush(c, input, arg, head, tail, 0) and - output = SummaryComponentStack::push(head, tail) - ) - ) - or - // Chain together summaries where values get passed into callbacks along the way - exists(SummaryComponentStack mid, boolean preservesValue1, boolean preservesValue2 | - c.propagatesFlow(input, mid, preservesValue1) and - c.propagatesFlow(mid, output, preservesValue2) and - mid.drop(mid.length() - 2) = - SummaryComponentStack::push(TParameterSummaryComponent(_), - SummaryComponentStack::singleton(TArgumentSummaryComponent(_))) and - preservesValue = preservesValue1.booleanAnd(preservesValue2) - ) - } - - /** - * Holds if `c` has a flow summary from `input` to `arg`, where `arg` - * writes to (contents of) arguments at position `pos`, and `c` has a - * value-preserving flow summary from the arguments at position `pos` - * to a return value (`return`). - * - * In such a case, we derive flow from `input` to (contents of) the return - * value. - * - * As an example, this simplifies modeling of fluent methods: - * for `StringBuilder.append(x)` with a specified value flow from qualifier to - * return value and taint flow from argument 0 to the qualifier, then this - * allows us to infer taint flow from argument 0 to the return value. - */ - pragma[nomagic] - private predicate derivedFluentFlow( - SummarizedCallable c, SummaryComponentStack input, SummaryComponentStack arg, - SummaryComponentStack return, boolean preservesValue - ) { - exists(ParameterPosition pos | - summary(c, input, arg, preservesValue) and - isContentOfArgument(arg, pos) and - summary(c, SummaryComponentStack::argument(pos), return, true) and - return.bottom() = TReturnSummaryComponent(_) - ) - } - - pragma[nomagic] - private predicate derivedFluentFlowPush( - SummarizedCallable c, SummaryComponentStack input, SummaryComponentStack arg, - SummaryComponent head, SummaryComponentStack tail, int i - ) { - derivedFluentFlow(c, input, arg, tail, _) and - head = arg.drop(i).head() and - i = arg.length() - 2 - or - exists(SummaryComponent head0, SummaryComponentStack tail0 | - derivedFluentFlowPush(c, input, arg, head0, tail0, i + 1) and - head = arg.drop(i).head() and - tail = SummaryComponentStack::push(head0, tail0) - ) - } - - private predicate isCallbackParameter(SummaryComponentStack s) { - s.head() = TParameterSummaryComponent(_) and exists(s.tail()) - } - - private predicate isContentOfArgument(SummaryComponentStack s, ParameterPosition pos) { - s.head() = TContentSummaryComponent(_) and isContentOfArgument(s.tail(), pos) - or - s = SummaryComponentStack::argument(pos) - } - - private predicate outputState(SummarizedCallable c, SummaryComponentStack s) { - summary(c, _, s, _) - or - exists(SummaryComponentStack out | - outputState(c, out) and - out.head() = TContentSummaryComponent(_) and - s = out.tail() - ) - or - // Add the argument node corresponding to the requested post-update node - inputState(c, s) and isCallbackParameter(s) - } - - private predicate inputState(SummarizedCallable c, SummaryComponentStack s) { - summary(c, s, _, _) - or - exists(SummaryComponentStack inp | inputState(c, inp) and s = inp.tail()) - or - exists(SummaryComponentStack out | - outputState(c, out) and - out.head() = TParameterSummaryComponent(_) and - s = out.tail() - ) - or - // Add the post-update node corresponding to the requested argument node - outputState(c, s) and isCallbackParameter(s) - or - // Add the parameter node for parameter side-effects - outputState(c, s) and s = SummaryComponentStack::argument(_) - } - - private newtype TSummaryNodeState = - TSummaryNodeInputState(SummaryComponentStack s) { inputState(_, s) } or - TSummaryNodeOutputState(SummaryComponentStack s) { outputState(_, s) } - - /** - * A state used to break up (complex) flow summaries into atomic flow steps. - * For a flow summary - * - * ```ql - * propagatesFlow( - * SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue - * ) - * ``` - * - * the following states are used: - * - * - `TSummaryNodeInputState(SummaryComponentStack s)`: - * this state represents that the components in `s` _have been read_ from the - * input. - * - `TSummaryNodeOutputState(SummaryComponentStack s)`: - * this state represents that the components in `s` _remain to be written_ to - * the output. - */ - private class SummaryNodeState extends TSummaryNodeState { - /** Holds if this state is a valid input state for `c`. */ - pragma[nomagic] - predicate isInputState(SummarizedCallable c, SummaryComponentStack s) { - this = TSummaryNodeInputState(s) and - inputState(c, s) - } - - /** Holds if this state is a valid output state for `c`. */ - pragma[nomagic] - predicate isOutputState(SummarizedCallable c, SummaryComponentStack s) { - this = TSummaryNodeOutputState(s) and - outputState(c, s) - } - - /** Gets a textual representation of this state. */ - string toString() { - exists(SummaryComponentStack s | - this = TSummaryNodeInputState(s) and - result = "read: " + s - ) - or - exists(SummaryComponentStack s | - this = TSummaryNodeOutputState(s) and - result = "to write: " + s - ) - } - } - - private newtype TSummaryNode = - TSummaryInternalNode(SummarizedCallable c, SummaryNodeState state) { - summaryNodeRange(c, state) - } or - TSummaryParameterNode(SummarizedCallable c, ParameterPosition pos) { - summaryParameterNodeRange(c, pos) - } - - abstract class SummaryNode extends TSummaryNode { - abstract string toString(); - - abstract SummarizedCallable getSummarizedCallable(); - } - - private class SummaryInternalNode extends SummaryNode, TSummaryInternalNode { - private SummarizedCallable c; - private SummaryNodeState state; - - SummaryInternalNode() { this = TSummaryInternalNode(c, state) } - - override string toString() { result = "[summary] " + state + " in " + c } - - override SummarizedCallable getSummarizedCallable() { result = c } - } - - private class SummaryParamNode extends SummaryNode, TSummaryParameterNode { - private SummarizedCallable c; - private ParameterPosition pos; - - SummaryParamNode() { this = TSummaryParameterNode(c, pos) } - - override string toString() { result = "[summary param] " + pos + " in " + c } - - override SummarizedCallable getSummarizedCallable() { result = c } - } - - /** - * Holds if `state` represents having read from a parameter at position - * `pos` in `c`. In this case we are not synthesizing a data-flow node, - * but instead assume that a relevant parameter node already exists. - */ - private predicate parameterReadState( - SummarizedCallable c, SummaryNodeState state, ParameterPosition pos - ) { - state.isInputState(c, SummaryComponentStack::argument(pos)) - } - - /** - * Holds if a synthesized summary node is needed for the state `state` in summarized - * callable `c`. - */ - private predicate summaryNodeRange(SummarizedCallable c, SummaryNodeState state) { - state.isInputState(c, _) and - not parameterReadState(c, state, _) - or - state.isOutputState(c, _) - } - - pragma[noinline] - private SummaryNode summaryNodeInputState(SummarizedCallable c, SummaryComponentStack s) { - exists(SummaryNodeState state | state.isInputState(c, s) | - result = TSummaryInternalNode(c, state) - or - exists(ParameterPosition pos | - parameterReadState(c, state, pos) and - result = TSummaryParameterNode(c, pos) - ) - ) - } - - pragma[noinline] - private SummaryNode summaryNodeOutputState(SummarizedCallable c, SummaryComponentStack s) { - exists(SummaryNodeState state | - state.isOutputState(c, s) and - result = TSummaryInternalNode(c, state) - ) - } - - /** - * Holds if a write targets `post`, which is a post-update node for a - * parameter at position `pos` in `c`. - */ - private predicate isParameterPostUpdate( - SummaryNode post, SummarizedCallable c, ParameterPosition pos - ) { - post = summaryNodeOutputState(c, SummaryComponentStack::argument(pos)) - } - - /** Holds if a parameter node at position `pos` is required for `c`. */ - private predicate summaryParameterNodeRange(SummarizedCallable c, ParameterPosition pos) { - parameterReadState(c, _, pos) - or - // Same as `isParameterPostUpdate(_, c, pos)`, but can be used in a negative context - any(SummaryNodeState state).isOutputState(c, SummaryComponentStack::argument(pos)) - } - - private predicate callbackOutput( - SummarizedCallable c, SummaryComponentStack s, SummaryNode receiver, ReturnKind rk - ) { - any(SummaryNodeState state).isInputState(c, s) and - s.head() = TReturnSummaryComponent(rk) and - receiver = summaryNodeInputState(c, s.tail()) - } - - private predicate callbackInput( - SummarizedCallable c, SummaryComponentStack s, SummaryNode receiver, ArgumentPosition pos - ) { - any(SummaryNodeState state).isOutputState(c, s) and - s.head() = TParameterSummaryComponent(pos) and - receiver = summaryNodeInputState(c, s.tail()) - } - - /** Holds if a call targeting `receiver` should be synthesized inside `c`. */ - predicate summaryCallbackRange(SummarizedCallable c, SummaryNode receiver) { - callbackOutput(c, _, receiver, _) - or - callbackInput(c, _, receiver, _) - } - - /** - * Gets the type of synthesized summary node `n`. - * - * The type is computed based on the language-specific predicates - * `getContentType()`, `getReturnType()`, `getCallbackParameterType()`, and - * `getCallbackReturnType()`. - */ - DataFlowType summaryNodeType(SummaryNode n) { - exists(SummaryNode pre | - summaryPostUpdateNode(n, pre) and - result = summaryNodeType(pre) - ) - or - exists(SummarizedCallable c, SummaryComponentStack s, SummaryComponent head | head = s.head() | - n = summaryNodeInputState(c, s) and - ( - exists(ContentSet cont | result = getContentType(cont) | - head = TContentSummaryComponent(cont) or - head = TWithContentSummaryComponent(cont) - ) - or - head = TWithoutContentSummaryComponent(_) and - result = summaryNodeType(summaryNodeInputState(c, s.tail())) - or - exists(ReturnKind rk | - head = TReturnSummaryComponent(rk) and - result = - getCallbackReturnType(summaryNodeType(summaryNodeInputState(pragma[only_bind_out](c), - s.tail())), rk) - ) - or - exists(SummaryComponent::SyntheticGlobal sg | - head = TSyntheticGlobalSummaryComponent(sg) and - result = getSyntheticGlobalType(sg) - ) - or - exists(ParameterPosition pos | - head = TArgumentSummaryComponent(pos) and - result = getParameterType(c, pos) - ) - ) - or - n = summaryNodeOutputState(c, s) and - ( - exists(ContentSet cont | - head = TContentSummaryComponent(cont) and result = getContentType(cont) - ) - or - s.length() = 1 and - exists(ReturnKind rk | - head = TReturnSummaryComponent(rk) and - result = getReturnType(c, rk) - ) - or - exists(ArgumentPosition pos | head = TParameterSummaryComponent(pos) | - result = - getCallbackParameterType(summaryNodeType(summaryNodeInputState(pragma[only_bind_out](c), - s.tail())), pos) - ) - or - exists(SummaryComponent::SyntheticGlobal sg | - head = TSyntheticGlobalSummaryComponent(sg) and - result = getSyntheticGlobalType(sg) - ) - ) - ) - } - - /** Holds if summary node `p` is a parameter with position `pos`. */ - predicate summaryParameterNode(SummaryNode p, ParameterPosition pos) { - p = TSummaryParameterNode(_, pos) - } - - /** Holds if summary node `out` contains output of kind `rk` from call `c`. */ - predicate summaryOutNode(DataFlowCall c, SummaryNode out, ReturnKind rk) { - exists(SummarizedCallable callable, SummaryComponentStack s, SummaryNode receiver | - callbackOutput(callable, s, receiver, rk) and - out = summaryNodeInputState(callable, s) and - c = summaryDataFlowCall(receiver) - ) - } - - /** Holds if summary node `arg` is at position `pos` in the call `c`. */ - predicate summaryArgumentNode(DataFlowCall c, SummaryNode arg, ArgumentPosition pos) { - exists(SummarizedCallable callable, SummaryComponentStack s, SummaryNode receiver | - callbackInput(callable, s, receiver, pos) and - arg = summaryNodeOutputState(callable, s) and - c = summaryDataFlowCall(receiver) - ) - } - - /** Holds if summary node `post` is a post-update node with pre-update node `pre`. */ - predicate summaryPostUpdateNode(SummaryNode post, SummaryNode pre) { - exists(SummarizedCallable c, ParameterPosition pos | - isParameterPostUpdate(post, c, pos) and - pre = TSummaryParameterNode(c, pos) - ) - or - exists(SummarizedCallable callable, SummaryComponentStack s | - callbackInput(callable, s, _, _) and - pre = summaryNodeOutputState(callable, s) and - post = summaryNodeInputState(callable, s) - ) - } - - /** Holds if summary node `ret` is a return node of kind `rk`. */ - predicate summaryReturnNode(SummaryNode ret, ReturnKind rk) { - exists(SummaryComponentStack s | - ret = summaryNodeOutputState(_, s) and - s = TSingletonSummaryComponentStack(TReturnSummaryComponent(rk)) - ) - } - - /** - * Holds if flow is allowed to pass from parameter `p`, to a return - * node, and back out to `p`. - */ - predicate summaryAllowParameterReturnInSelf(ParamNode p) { - exists(SummarizedCallable c, ParameterPosition ppos | - p.isParameterOf(inject(c), pragma[only_bind_into](ppos)) - | - exists(SummaryComponentStack inputContents, SummaryComponentStack outputContents | - summary(c, inputContents, outputContents, _) and - inputContents.bottom() = pragma[only_bind_into](TArgumentSummaryComponent(ppos)) and - outputContents.bottom() = pragma[only_bind_into](TArgumentSummaryComponent(ppos)) - ) - ) - } - - /** Provides a compilation of flow summaries to atomic data-flow steps. */ - module Steps { - /** - * Holds if there is a local step from `pred` to `succ`, which is synthesized - * from a flow summary. - */ - predicate summaryLocalStep(SummaryNode pred, SummaryNode succ, boolean preservesValue) { - exists( - SummarizedCallable c, SummaryComponentStack inputContents, - SummaryComponentStack outputContents - | - summary(c, inputContents, outputContents, preservesValue) and - pred = summaryNodeInputState(c, inputContents) and - succ = summaryNodeOutputState(c, outputContents) - | - preservesValue = true - or - preservesValue = false and not summary(c, inputContents, outputContents, true) - ) - or - exists(SummarizedCallable c, SummaryComponentStack s | - pred = summaryNodeInputState(c, s.tail()) and - succ = summaryNodeInputState(c, s) and - s.head() = [SummaryComponent::withContent(_), SummaryComponent::withoutContent(_)] and - preservesValue = true - ) - } - - /** - * Holds if there is a read step of content `c` from `pred` to `succ`, which - * is synthesized from a flow summary. - */ - predicate summaryReadStep(SummaryNode pred, ContentSet c, SummaryNode succ) { - exists(SummarizedCallable sc, SummaryComponentStack s | - pred = summaryNodeInputState(sc, s.tail()) and - succ = summaryNodeInputState(sc, s) and - SummaryComponent::content(c) = s.head() - ) - } - - /** - * Holds if there is a store step of content `c` from `pred` to `succ`, which - * is synthesized from a flow summary. - */ - predicate summaryStoreStep(SummaryNode pred, ContentSet c, SummaryNode succ) { - exists(SummarizedCallable sc, SummaryComponentStack s | - pred = summaryNodeOutputState(sc, s) and - succ = summaryNodeOutputState(sc, s.tail()) and - SummaryComponent::content(c) = s.head() - ) - } - - /** - * Holds if there is a jump step from `pred` to `succ`, which is synthesized - * from a flow summary. - */ - predicate summaryJumpStep(SummaryNode pred, SummaryNode succ) { - exists(SummaryComponentStack s | - s = SummaryComponentStack::singleton(SummaryComponent::syntheticGlobal(_)) and - pred = summaryNodeOutputState(_, s) and - succ = summaryNodeInputState(_, s) - ) - } - - /** - * Holds if values stored inside content `c` are cleared at `n`. `n` is a - * synthesized summary node, so in order for values to be cleared at calls - * to the relevant method, it is important that flow does not pass over - * the argument, either via use-use flow or def-use flow. - * - * Example: - * - * ``` - * a.b = taint; - * a.clearB(); // assume we have a flow summary for `clearB` that clears `b` on the qualifier - * sink(a.b); - * ``` - * - * In the above, flow should not pass from `a` on the first line (or the second - * line) to `a` on the third line. Instead, there will be synthesized flow from - * `a` on line 2 to the post-update node for `a` on that line (via an intermediate - * node where field `b` is cleared). - */ - predicate summaryClearsContent(SummaryNode n, ContentSet c) { - exists(SummarizedCallable sc, SummaryNodeState state, SummaryComponentStack stack | - n = TSummaryInternalNode(sc, state) and - state.isInputState(sc, stack) and - stack.head() = SummaryComponent::withoutContent(c) - ) - } - - /** - * Holds if the value that is being tracked is expected to be stored inside - * content `c` at `n`. - */ - predicate summaryExpectsContent(SummaryNode n, ContentSet c) { - exists(SummarizedCallable sc, SummaryNodeState state, SummaryComponentStack stack | - n = TSummaryInternalNode(sc, state) and - state.isInputState(sc, stack) and - stack.head() = SummaryComponent::withContent(c) - ) - } - - pragma[noinline] - private predicate viableParam( - DataFlowCall call, SummarizedCallable sc, ParameterPosition ppos, SummaryParamNode p - ) { - exists(DataFlowCallable c | - c = inject(sc) and - p = TSummaryParameterNode(sc, ppos) and - c = viableCallable(call) - ) - } - - pragma[nomagic] - private SummaryParamNode summaryArgParam(DataFlowCall call, ArgNode arg, SummarizedCallable sc) { - exists(ParameterPosition ppos | - argumentPositionMatch(call, arg, ppos) and - viableParam(call, sc, ppos, result) - ) - } - - /** - * Holds if `p` can reach `n` in a summarized callable, using only value-preserving - * local steps. `clearsOrExpects` records whether any node on the path from `p` to - * `n` either clears or expects contents. - */ - private predicate paramReachesLocal(SummaryParamNode p, SummaryNode n, boolean clearsOrExpects) { - viableParam(_, _, _, p) and - n = p and - clearsOrExpects = false - or - exists(SummaryNode mid, boolean clearsOrExpectsMid | - paramReachesLocal(p, mid, clearsOrExpectsMid) and - summaryLocalStep(mid, n, true) and - if - summaryClearsContent(n, _) or - summaryExpectsContent(n, _) - then clearsOrExpects = true - else clearsOrExpects = clearsOrExpectsMid - ) - } - - /** - * Holds if use-use flow starting from `arg` should be prohibited. - * - * This is the case when `arg` is the argument of a call that targets a - * flow summary where the corresponding parameter either clears contents - * or expects contents. - */ - pragma[nomagic] - predicate prohibitsUseUseFlow(ArgNode arg, SummarizedCallable sc) { - exists(SummaryParamNode p, ParameterPosition ppos, SummaryNode ret | - paramReachesLocal(p, ret, true) and - p = summaryArgParam(_, arg, sc) and - p = TSummaryParameterNode(_, pragma[only_bind_into](ppos)) and - isParameterPostUpdate(ret, _, pragma[only_bind_into](ppos)) - ) - } - - pragma[nomagic] - private predicate summaryReturnNodeExt(SummaryNode ret, ReturnKindExt rk) { - summaryReturnNode(ret, rk.(ValueReturnKind).getKind()) - or - exists(SummaryParamNode p, SummaryNode pre, ParameterPosition pos | - paramReachesLocal(p, pre, _) and - summaryPostUpdateNode(ret, pre) and - p = TSummaryParameterNode(_, pos) and - rk.(ParamUpdateReturnKind).getPosition() = pos - ) - } - - bindingset[ret] - private SummaryParamNode summaryArgParamRetOut( - ArgNode arg, SummaryNode ret, OutNodeExt out, SummarizedCallable sc - ) { - exists(DataFlowCall call, ReturnKindExt rk | - result = summaryArgParam(call, arg, sc) and - summaryReturnNodeExt(ret, pragma[only_bind_into](rk)) and - out = pragma[only_bind_into](rk).getAnOutNode(call) - ) - } - - /** - * Holds if `arg` flows to `out` using a simple value-preserving flow - * summary, that is, a flow summary without reads and stores. - * - * NOTE: This step should not be used in global data-flow/taint-tracking, but may - * be useful to include in the exposed local data-flow/taint-tracking relations. - */ - predicate summaryThroughStepValue(ArgNode arg, Node out, SummarizedCallable sc) { - exists(ReturnKind rk, SummaryNode ret, DataFlowCall call | - summaryLocalStep(summaryArgParam(call, arg, sc), ret, true) and - summaryReturnNode(ret, pragma[only_bind_into](rk)) and - out = getAnOutNode(call, pragma[only_bind_into](rk)) - ) - } - - /** - * Holds if `arg` flows to `out` using a simple flow summary involving taint - * step, that is, a flow summary without reads and stores. - * - * NOTE: This step should not be used in global data-flow/taint-tracking, but may - * be useful to include in the exposed local data-flow/taint-tracking relations. - */ - predicate summaryThroughStepTaint(ArgNode arg, Node out, SummarizedCallable sc) { - exists(SummaryNode ret | - summaryLocalStep(summaryArgParamRetOut(arg, ret, out, sc), ret, false) - ) - } - - /** - * Holds if there is a read(+taint) of `c` from `arg` to `out` using a - * flow summary. - * - * NOTE: This step should not be used in global data-flow/taint-tracking, but may - * be useful to include in the exposed local data-flow/taint-tracking relations. - */ - predicate summaryGetterStep(ArgNode arg, ContentSet c, Node out, SummarizedCallable sc) { - exists(SummaryNode mid, SummaryNode ret | - summaryReadStep(summaryArgParamRetOut(arg, ret, out, sc), c, mid) and - summaryLocalStep(mid, ret, _) - ) - } - - /** - * Holds if there is a (taint+)store of `arg` into content `c` of `out` using a - * flow summary. - * - * NOTE: This step should not be used in global data-flow/taint-tracking, but may - * be useful to include in the exposed local data-flow/taint-tracking relations. - */ - predicate summarySetterStep(ArgNode arg, ContentSet c, Node out, SummarizedCallable sc) { - exists(SummaryNode mid, SummaryNode ret | - summaryLocalStep(summaryArgParamRetOut(arg, ret, out, sc), mid, _) and - summaryStoreStep(mid, c, ret) - ) - } - } - - /** - * Provides a means of translating externally (e.g., MaD) defined flow - * summaries into a `SummarizedCallable`s. - */ - module External { - /** Holds if `spec` is a relevant external specification. */ - private predicate relevantSpec(string spec) { - summaryElement(_, spec, _, _, _) or - summaryElement(_, _, spec, _, _) or - sourceElement(_, spec, _, _) or - sinkElement(_, spec, _, _) - } - - private class AccessPathRange extends AccessPath::Range { - AccessPathRange() { relevantSpec(this) } - } - - /** Holds if specification component `token` parses as parameter `pos`. */ - predicate parseParam(AccessPathToken token, ArgumentPosition pos) { - token.getName() = "Parameter" and - pos = parseParamBody(token.getAnArgument()) - } - - /** Holds if specification component `token` parses as argument `pos`. */ - predicate parseArg(AccessPathToken token, ParameterPosition pos) { - token.getName() = "Argument" and - pos = parseArgBody(token.getAnArgument()) - } - - /** Holds if specification component `token` parses as synthetic global `sg`. */ - predicate parseSynthGlobal(AccessPathToken token, string sg) { - token.getName() = "SyntheticGlobal" and - sg = token.getAnArgument() - } - - private class SyntheticGlobalFromAccessPath extends SummaryComponent::SyntheticGlobal { - SyntheticGlobalFromAccessPath() { parseSynthGlobal(_, this) } - } - - private SummaryComponent interpretComponent(AccessPathToken token) { - exists(ParameterPosition pos | - parseArg(token, pos) and result = SummaryComponent::argument(pos) - ) - or - exists(ArgumentPosition pos | - parseParam(token, pos) and result = SummaryComponent::parameter(pos) - ) - or - token = "ReturnValue" and result = SummaryComponent::return(getReturnValueKind()) - or - exists(string sg | - parseSynthGlobal(token, sg) and result = SummaryComponent::syntheticGlobal(sg) - ) - or - result = interpretComponentSpecific(token) - } - - /** - * Holds if `spec` specifies summary component stack `stack`. - */ - predicate interpretSpec(AccessPath spec, SummaryComponentStack stack) { - interpretSpec(spec, spec.getNumToken(), stack) - } - - /** Holds if the first `n` tokens of `spec` resolves to `stack`. */ - private predicate interpretSpec(AccessPath spec, int n, SummaryComponentStack stack) { - n = 1 and - stack = SummaryComponentStack::singleton(interpretComponent(spec.getToken(0))) - or - exists(SummaryComponent head, SummaryComponentStack tail | - interpretSpec(spec, n, head, tail) and - stack = SummaryComponentStack::push(head, tail) - ) - } - - /** Holds if the first `n` tokens of `spec` resolves to `head` followed by `tail` */ - private predicate interpretSpec( - AccessPath spec, int n, SummaryComponent head, SummaryComponentStack tail - ) { - interpretSpec(spec, n - 1, tail) and - head = interpretComponent(spec.getToken(n - 1)) - } - - private class MkStack extends RequiredSummaryComponentStack { - override predicate required(SummaryComponent head, SummaryComponentStack tail) { - interpretSpec(_, _, head, tail) - } - } - - private class SummarizedCallableExternal extends SummarizedCallable { - SummarizedCallableExternal() { summaryElement(this, _, _, _, _) } - - private predicate relevantSummaryElementGenerated( - AccessPath inSpec, AccessPath outSpec, string kind - ) { - exists(Provenance provenance | - provenance.isGenerated() and - summaryElement(this, inSpec, outSpec, kind, provenance) - ) and - not this.applyManualModel() - } - - private predicate relevantSummaryElement(AccessPath inSpec, AccessPath outSpec, string kind) { - exists(Provenance provenance | - provenance.isManual() and - summaryElement(this, inSpec, outSpec, kind, provenance) - ) - or - this.relevantSummaryElementGenerated(inSpec, outSpec, kind) - } - - override predicate propagatesFlow( - SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue - ) { - exists(AccessPath inSpec, AccessPath outSpec, string kind | - this.relevantSummaryElement(inSpec, outSpec, kind) and - interpretSpec(inSpec, input) and - interpretSpec(outSpec, output) - | - kind = "value" and preservesValue = true - or - kind = "taint" and preservesValue = false - ) - } - - override predicate hasProvenance(Provenance provenance) { - summaryElement(this, _, _, _, provenance) - } - } - - /** Holds if component `c` of specification `spec` cannot be parsed. */ - predicate invalidSpecComponent(AccessPath spec, string c) { - c = spec.getToken(_) and - not exists(interpretComponent(c)) - } - - /** Holds if `provenance` is not a valid provenance value. */ - bindingset[provenance] - predicate invalidProvenance(string provenance) { not provenance instanceof Provenance } - - /** - * Holds if token `part` of specification `spec` has an invalid index. - * E.g., `Argument[-1]`. - */ - predicate invalidIndexComponent(AccessPath spec, AccessPathToken part) { - part = spec.getToken(_) and - part.getName() = ["Parameter", "Argument"] and - AccessPath::parseInt(part.getArgumentList()) < 0 - } - - private predicate inputNeedsReference(AccessPathToken c) { - c.getName() = "Argument" or - inputNeedsReferenceSpecific(c) - } - - private predicate outputNeedsReference(AccessPathToken c) { - c.getName() = ["Argument", "ReturnValue"] or - outputNeedsReferenceSpecific(c) - } - - private predicate sourceElementRef(InterpretNode ref, AccessPath output, string kind) { - exists(SourceOrSinkElement e | - sourceElement(e, output, kind, _) and - if outputNeedsReference(output.getToken(0)) - then e = ref.getCallTarget() - else e = ref.asElement() - ) - } - - private predicate sinkElementRef(InterpretNode ref, AccessPath input, string kind) { - exists(SourceOrSinkElement e | - sinkElement(e, input, kind, _) and - if inputNeedsReference(input.getToken(0)) - then e = ref.getCallTarget() - else e = ref.asElement() - ) - } - - /** Holds if the first `n` tokens of `output` resolve to the given interpretation. */ - private predicate interpretOutput( - AccessPath output, int n, InterpretNode ref, InterpretNode node - ) { - sourceElementRef(ref, output, _) and - n = 0 and - ( - if output = "" - then - // Allow language-specific interpretation of the empty access path - interpretOutputSpecific("", ref, node) - else node = ref - ) - or - exists(InterpretNode mid, AccessPathToken c | - interpretOutput(output, n - 1, ref, mid) and - c = output.getToken(n - 1) - | - exists(ArgumentPosition apos, ParameterPosition ppos | - node.asNode().(PostUpdateNode).getPreUpdateNode().(ArgNode).argumentOf(mid.asCall(), apos) and - parameterMatch(ppos, apos) - | - c = "Argument" or parseArg(c, ppos) - ) - or - exists(ArgumentPosition apos, ParameterPosition ppos | - node.asNode().(ParamNode).isParameterOf(mid.asCallable(), ppos) and - parameterMatch(ppos, apos) - | - c = "Parameter" or parseParam(c, apos) - ) - or - c = "ReturnValue" and - node.asNode() = getAnOutNodeExt(mid.asCall(), TValueReturn(getReturnValueKind())) - or - interpretOutputSpecific(c, mid, node) - ) - } - - /** Holds if the first `n` tokens of `input` resolve to the given interpretation. */ - private predicate interpretInput(AccessPath input, int n, InterpretNode ref, InterpretNode node) { - sinkElementRef(ref, input, _) and - n = 0 and - ( - if input = "" - then - // Allow language-specific interpretation of the empty access path - interpretInputSpecific("", ref, node) - else node = ref - ) - or - exists(InterpretNode mid, AccessPathToken c | - interpretInput(input, n - 1, ref, mid) and - c = input.getToken(n - 1) - | - exists(ArgumentPosition apos, ParameterPosition ppos | - node.asNode().(ArgNode).argumentOf(mid.asCall(), apos) and - parameterMatch(ppos, apos) - | - c = "Argument" or parseArg(c, ppos) - ) - or - exists(ReturnNodeExt ret | - c = "ReturnValue" and - ret = node.asNode() and - ret.getKind().(ValueReturnKind).getKind() = getReturnValueKind() and - mid.asCallable() = getNodeEnclosingCallable(ret) - ) - or - interpretInputSpecific(c, mid, node) - ) - } - - /** - * Holds if `node` is specified as a source with the given kind in a MaD flow - * model. - */ - predicate isSourceNode(InterpretNode node, string kind) { - exists(InterpretNode ref, AccessPath output | - sourceElementRef(ref, output, kind) and - interpretOutput(output, output.getNumToken(), ref, node) - ) - } - - /** - * Holds if `node` is specified as a sink with the given kind in a MaD flow - * model. - */ - predicate isSinkNode(InterpretNode node, string kind) { - exists(InterpretNode ref, AccessPath input | - sinkElementRef(ref, input, kind) and - interpretInput(input, input.getNumToken(), ref, node) - ) - } - } - - /** Provides a query predicate for outputting a set of relevant flow summaries. */ - module TestOutput { - /** A flow summary to include in the `summary/1` query predicate. */ - abstract class RelevantSummarizedCallable instanceof SummarizedCallable { - /** Gets the string representation of this callable used by `summary/1`. */ - abstract string getCallableCsv(); - - /** Holds if flow is propagated between `input` and `output`. */ - predicate relevantSummary( - SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue - ) { - super.propagatesFlow(input, output, preservesValue) - } - - string toString() { result = super.toString() } - } - - /** A model to include in the `neutral/1` query predicate. */ - abstract class RelevantNeutralCallable instanceof NeutralCallable { - /** Gets the string representation of this callable used by `neutral/1`. */ - abstract string getCallableCsv(); - - /** - * Gets the kind of the neutral. - */ - string getKind() { result = super.getKind() } - - string toString() { result = super.toString() } - } - - /** Render the kind in the format used in flow summaries. */ - private string renderKind(boolean preservesValue) { - preservesValue = true and result = "value" - or - preservesValue = false and result = "taint" - } - - private string renderProvenance(SummarizedCallable c) { - if c.applyManualModel() then result = "manual" else c.hasProvenance(result) - } - - private string renderProvenanceNeutral(NeutralCallable c) { - if c.hasManualModel() then result = "manual" else c.hasProvenance(result) - } - - /** - * A query predicate for outputting flow summaries in semi-colon separated format in QL tests. - * The syntax is: "namespace;type;overrides;name;signature;ext;inputspec;outputspec;kind;provenance", - * ext is hardcoded to empty. - */ - query predicate summary(string csv) { - exists( - RelevantSummarizedCallable c, SummaryComponentStack input, SummaryComponentStack output, - boolean preservesValue - | - c.relevantSummary(input, output, preservesValue) and - csv = - c.getCallableCsv() // Callable information - + input.getMadRepresentation() + ";" // input - + output.getMadRepresentation() + ";" // output - + renderKind(preservesValue) + ";" // kind - + renderProvenance(c) // provenance - ) - } - - /** - * Holds if a neutral model `csv` exists (semi-colon separated format). Used for testing purposes. - * The syntax is: "namespace;type;name;signature;kind;provenance"", - */ - query predicate neutral(string csv) { - exists(RelevantNeutralCallable c | - csv = - c.getCallableCsv() // Callable information - + c.getKind() + ";" // kind - + renderProvenanceNeutral(c) // provenance - ) - } - } - - /** - * Provides query predicates for rendering the generated data flow graph for - * a summarized callable. - * - * Import this module into a `.ql` file of `@kind graph` to render the graph. - * The graph is restricted to callables from `RelevantSummarizedCallable`. - */ - module RenderSummarizedCallable { - /** A summarized callable to include in the graph. */ - abstract class RelevantSummarizedCallable instanceof SummarizedCallable { - string toString() { result = super.toString() } - } - - private newtype TNodeOrCall = - MkNode(SummaryNode n) { - exists(RelevantSummarizedCallable c | - n = TSummaryInternalNode(c, _) - or - n = TSummaryParameterNode(c, _) - ) - } or - MkCall(DataFlowCall call) { - call = summaryDataFlowCall(_) and - call.getEnclosingCallable() = inject(any(RelevantSummarizedCallable c)) - } - - private class NodeOrCall extends TNodeOrCall { - SummaryNode asNode() { this = MkNode(result) } - - DataFlowCall asCall() { this = MkCall(result) } - - string toString() { - result = this.asNode().toString() - or - result = this.asCall().toString() - } - - /** - * Holds if this element is at the specified location. - * The location spans column `startcolumn` of line `startline` to - * column `endcolumn` of line `endline` in file `filepath`. - * For more information, see - * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). - */ - predicate hasLocationInfo( - string filepath, int startline, int startcolumn, int endline, int endcolumn - ) { - filepath = "" and - startline = 0 and - startcolumn = 0 and - endline = 0 and - endcolumn = 0 - } - } - - query predicate nodes(NodeOrCall n, string key, string val) { - key = "semmle.label" and val = n.toString() - } - - private predicate edgesComponent(NodeOrCall a, NodeOrCall b, string value) { - exists(boolean preservesValue | - Private::Steps::summaryLocalStep(a.asNode(), b.asNode(), preservesValue) and - if preservesValue = true then value = "value" else value = "taint" - ) - or - exists(ContentSet c | - Private::Steps::summaryReadStep(a.asNode(), c, b.asNode()) and - value = "read (" + c + ")" - or - Private::Steps::summaryStoreStep(a.asNode(), c, b.asNode()) and - value = "store (" + c + ")" - or - Private::Steps::summaryClearsContent(a.asNode(), c) and - b = a and - value = "clear (" + c + ")" - or - Private::Steps::summaryExpectsContent(a.asNode(), c) and - b = a and - value = "expect (" + c + ")" - ) - or - summaryPostUpdateNode(b.asNode(), a.asNode()) and - value = "post-update" - or - b.asCall() = summaryDataFlowCall(a.asNode()) and - value = "receiver" - or - exists(ArgumentPosition pos | - summaryArgumentNode(b.asCall(), a.asNode(), pos) and - value = "argument (" + pos + ")" - ) - } - - query predicate edges(NodeOrCall a, NodeOrCall b, string key, string value) { - key = "semmle.label" and - value = strictconcat(string s | edgesComponent(a, b, s) | s, " / ") - } + predicate isParsedElementLowerBoundPosition(string c, boolean includeUnknown, int lower) { + isElementBody(c) and + lower = AccessPath::parseLowerBound(adjustElementArgument(c, includeUnknown)) } } diff --git a/ruby/ql/lib/codeql/ruby/dataflow/internal/FlowSummaryImplSpecific.qll b/ruby/ql/lib/codeql/ruby/dataflow/internal/FlowSummaryImplSpecific.qll deleted file mode 100644 index 9db4f01dfb6..00000000000 --- a/ruby/ql/lib/codeql/ruby/dataflow/internal/FlowSummaryImplSpecific.qll +++ /dev/null @@ -1,430 +0,0 @@ -/** - * Provides Ruby specific classes and predicates for defining flow summaries. - */ - -private import codeql.ruby.AST -private import DataFlowDispatch -private import DataFlowPrivate -private import DataFlowPublic -private import DataFlowImplCommon -private import FlowSummaryImpl::Private -private import FlowSummaryImpl::Public -private import codeql.ruby.dataflow.FlowSummary as FlowSummary - -/** - * A class of callables that are candidates for flow summary modeling. - */ -class SummarizedCallableBase = string; - -/** - * A class of callables that are candidates for neutral modeling. - */ -class NeutralCallableBase = string; - -DataFlowCallable inject(SummarizedCallable c) { result.asLibraryCallable() = c } - -/** Gets the parameter position representing a callback itself, if any. */ -ArgumentPosition callbackSelfParameterPosition() { result.isLambdaSelf() } - -/** Gets the synthesized data-flow call for `receiver`. */ -SummaryCall summaryDataFlowCall(SummaryNode receiver) { receiver = result.getReceiver() } - -/** Gets the type of content `c`. */ -DataFlowType getContentType(ContentSet c) { any() } - -/** Gets the type of the parameter at the given position. */ -DataFlowType getParameterType(SummarizedCallable c, ParameterPosition pos) { any() } - -/** Gets the return type of kind `rk` for callable `c`. */ -bindingset[c, rk] -DataFlowType getReturnType(SummarizedCallable c, ReturnKind rk) { any() } - -/** - * Gets the type of the `i`th parameter in a synthesized call that targets a - * callback of type `t`. - */ -bindingset[t, pos] -DataFlowType getCallbackParameterType(DataFlowType t, ArgumentPosition pos) { any() } - -/** - * Gets the return type of kind `rk` in a synthesized call that targets a - * callback of type `t`. - */ -DataFlowType getCallbackReturnType(DataFlowType t, ReturnKind rk) { any() } - -/** Gets the type of synthetic global `sg`. */ -DataFlowType getSyntheticGlobalType(SummaryComponent::SyntheticGlobal sg) { any() } - -/** - * Holds if an external flow summary exists for `c` with input specification - * `input`, output specification `output`, kind `kind`, and provenance `provenance`. - */ -predicate summaryElement( - FlowSummary::SummarizedCallable c, string input, string output, string kind, string provenance -) { - exists(boolean preservesValue | - c.propagatesFlowExt(input, output, preservesValue) and - (if preservesValue = true then kind = "value" else kind = "taint") and - provenance = "manual" - ) -} - -/** - * Holds if a neutral model exists for `c` of kind `kind` - * and with provenance `provenance`. - * Note. Neutral models have not been implemented for Ruby. - */ -predicate neutralElement(NeutralCallableBase c, string kind, string provenance) { none() } - -bindingset[arg] -private SummaryComponent interpretElementArg(string arg) { - arg = "?" and - result = FlowSummary::SummaryComponent::elementUnknown() - or - arg = "any" and - result = FlowSummary::SummaryComponent::elementAny() - or - exists(int lower, boolean includeUnknown | - ParsePositions::isParsedElementLowerBoundPosition(arg, includeUnknown, lower) - | - includeUnknown = false and - result = FlowSummary::SummaryComponent::elementLowerBound(lower) - or - includeUnknown = true and - result = FlowSummary::SummaryComponent::elementLowerBoundOrUnknown(lower) - ) - or - exists(ConstantValue cv, string argAdjusted, boolean includeUnknown | - argAdjusted = ParsePositions::adjustElementArgument(arg, includeUnknown) and - ( - includeUnknown = false and - result = FlowSummary::SummaryComponent::elementKnown(cv) - or - includeUnknown = true and - result = FlowSummary::SummaryComponent::elementKnownOrUnknown(cv) - ) - | - cv.isInt(AccessPath::parseInt(argAdjusted)) - or - not exists(AccessPath::parseInt(argAdjusted)) and - cv.serialize() = argAdjusted - ) -} - -/** - * Gets the summary component for specification component `c`, if any. - * - * This covers all the Ruby-specific components of a flow summary. - */ -SummaryComponent interpretComponentSpecific(AccessPathToken c) { - exists(string arg, ParameterPosition ppos | - arg = c.getAnArgument("Argument") and - result = FlowSummary::SummaryComponent::argument(ppos) - | - arg = "any" and - ppos.isAny() - or - ppos.isPositionalLowerBound(AccessPath::parseLowerBound(arg)) - or - arg = "hash-splat" and - ppos.isHashSplat() - or - arg = "splat" and - ppos.isSplat(0) - ) - or - result = interpretElementArg(c.getAnArgument("Element")) - or - result = - FlowSummary::SummaryComponent::content(TSingletonContent(TFieldContent(c.getAnArgument("Field")))) - or - exists(ContentSet cs | - FlowSummary::SummaryComponent::content(cs) = interpretElementArg(c.getAnArgument("WithElement")) and - result = FlowSummary::SummaryComponent::withContent(cs) - ) - or - exists(ContentSet cs | - FlowSummary::SummaryComponent::content(cs) = - interpretElementArg(c.getAnArgument("WithoutElement")) and - result = FlowSummary::SummaryComponent::withoutContent(cs) - ) -} - -private string getContentSpecific(Content c) { - exists(string name | c = TFieldContent(name) and result = "Field[" + name + "]") - or - exists(ConstantValue cv | - c = TKnownElementContent(cv) and result = "Element[" + cv.serialize() + "!]" - ) - or - c = TUnknownElementContent() and result = "Element[?]" -} - -private string getContentSetSpecific(ContentSet cs) { - exists(Content c | cs = TSingletonContent(c) and result = getContentSpecific(c)) - or - cs = TAnyElementContent() and result = "Element[any]" - or - exists(Content::KnownElementContent kec | - cs = TKnownOrUnknownElementContent(kec) and - result = "Element[" + kec.getIndex().serialize() + "]" - ) - or - exists(int lower, boolean includeUnknown, string unknown | - cs = TElementLowerBoundContent(lower, includeUnknown) and - (if includeUnknown = true then unknown = "" else unknown = "!") and - result = "Element[" + lower + ".." + unknown + "]" - ) -} - -/** Gets the textual representation of a summary component in the format used for MaD models. */ -string getMadRepresentationSpecific(SummaryComponent sc) { - exists(ContentSet cs | sc = TContentSummaryComponent(cs) and result = getContentSetSpecific(cs)) - or - exists(ContentSet cs | - sc = TWithoutContentSummaryComponent(cs) and - result = "WithoutElement[" + getContentSetSpecific(cs) + "]" - ) - or - exists(ContentSet cs | - sc = TWithContentSummaryComponent(cs) and - result = "WithElement[" + getContentSetSpecific(cs) + "]" - ) - or - exists(ReturnKind rk | - sc = TReturnSummaryComponent(rk) and - not rk = getReturnValueKind() and - result = "ReturnValue[" + rk + "]" - ) -} - -/** Gets the textual representation of a parameter position in the format used for flow summaries. */ -string getParameterPosition(ParameterPosition pos) { - exists(int i | - pos.isPositional(i) and - result = i.toString() - ) - or - exists(int i | - pos.isPositionalLowerBound(i) and - result = i + ".." - ) - or - exists(string name | - pos.isKeyword(name) and - result = name + ":" - ) - or - pos.isSelf() and - result = "self" - or - pos.isLambdaSelf() and - result = "lambda-self" - or - pos.isBlock() and - result = "block" - or - pos.isAny() and - result = "any" - or - pos.isAnyNamed() and - result = "any-named" - or - pos.isHashSplat() and - result = "hash-splat" - or - pos.isSplat(0) and - result = "splat" -} - -/** Gets the textual representation of an argument position in the format used for flow summaries. */ -string getArgumentPosition(ArgumentPosition pos) { - pos.isSelf() and result = "self" - or - pos.isLambdaSelf() and result = "lambda-self" - or - pos.isBlock() and result = "block" - or - exists(int i | - pos.isPositional(i) and - result = i.toString() - ) - or - exists(string name | - pos.isKeyword(name) and - result = name + ":" - ) -} - -/** Holds if input specification component `c` needs a reference. */ -predicate inputNeedsReferenceSpecific(string c) { none() } - -/** Holds if output specification component `c` needs a reference. */ -predicate outputNeedsReferenceSpecific(string c) { none() } - -/** Gets the return kind corresponding to specification `"ReturnValue"`. */ -NormalReturnKind getReturnValueKind() { any() } - -/** - * All definitions in this module are required by the shared implementation - * (for source/sink interpretation), but they are unused for Ruby, where - * we rely on API graphs instead. - */ -private module UnusedSourceSinkInterpretation { - /** - * Holds if an external source specification exists for `n` with output specification - * `output`, kind `kind`, and provenance `provenance`. - */ - predicate sourceElement(AstNode n, string output, string kind, string provenance) { none() } - - /** - * Holds if an external sink specification exists for `n` with input specification - * `input`, kind `kind` and provenance `provenance`. - */ - predicate sinkElement(AstNode n, string input, string kind, string provenance) { none() } - - class SourceOrSinkElement = AstNode; - - /** An entity used to interpret a source/sink specification. */ - class InterpretNode extends AstNode { - /** Gets the element that this node corresponds to, if any. */ - SourceOrSinkElement asElement() { none() } - - /** Gets the data-flow node that this node corresponds to, if any. */ - Node asNode() { none() } - - /** Gets the call that this node corresponds to, if any. */ - DataFlowCall asCall() { none() } - - /** Gets the callable that this node corresponds to, if any. */ - DataFlowCallable asCallable() { none() } - - /** Gets the target of this call, if any. */ - Callable getCallTarget() { none() } - } - - /** Provides additional sink specification logic. */ - predicate interpretOutputSpecific(string c, InterpretNode mid, InterpretNode node) { none() } - - /** Provides additional source specification logic. */ - predicate interpretInputSpecific(string c, InterpretNode mid, InterpretNode node) { none() } -} - -import UnusedSourceSinkInterpretation - -module ParsePositions { - private import FlowSummaryImpl - - private predicate isParamBody(string body) { - body = any(AccessPathToken tok).getAnArgument("Parameter") - } - - private predicate isArgBody(string body) { - body = any(AccessPathToken tok).getAnArgument("Argument") - } - - private predicate isElementBody(string body) { - body = any(AccessPathToken tok).getAnArgument(["Element", "WithElement", "WithoutElement"]) - } - - predicate isParsedParameterPosition(string c, int i) { - isParamBody(c) and - i = AccessPath::parseInt(c) - } - - predicate isParsedArgumentPosition(string c, int i) { - isArgBody(c) and - i = AccessPath::parseInt(c) - } - - predicate isParsedArgumentLowerBoundPosition(string c, int i) { - isArgBody(c) and - i = AccessPath::parseLowerBound(c) - } - - predicate isParsedKeywordParameterPosition(string c, string paramName) { - isParamBody(c) and - c = paramName + ":" - } - - predicate isParsedKeywordArgumentPosition(string c, string paramName) { - isArgBody(c) and - c = paramName + ":" - } - - bindingset[arg] - string adjustElementArgument(string arg, boolean includeUnknown) { - result = arg.regexpCapture("(.*)!", 1) and - includeUnknown = false - or - result = arg and - not arg.matches("%!") and - includeUnknown = true - } - - predicate isParsedElementLowerBoundPosition(string c, boolean includeUnknown, int lower) { - isElementBody(c) and - lower = AccessPath::parseLowerBound(adjustElementArgument(c, includeUnknown)) - } -} - -/** Gets the argument position obtained by parsing `X` in `Parameter[X]`. */ -ArgumentPosition parseParamBody(string s) { - exists(int i | - ParsePositions::isParsedParameterPosition(s, i) and - result.isPositional(i) - ) - or - exists(string name | - ParsePositions::isParsedKeywordParameterPosition(s, name) and - result.isKeyword(name) - ) - or - s = "self" and - result.isSelf() - or - s = "lambda-self" and - result.isLambdaSelf() - or - s = "block" and - result.isBlock() - or - s = "any" and - result.isAny() - or - s = "any-named" and - result.isAnyNamed() -} - -/** Gets the parameter position obtained by parsing `X` in `Argument[X]`. */ -ParameterPosition parseArgBody(string s) { - exists(int i | - ParsePositions::isParsedArgumentPosition(s, i) and - result.isPositional(i) - ) - or - exists(int i | - ParsePositions::isParsedArgumentLowerBoundPosition(s, i) and - result.isPositionalLowerBound(i) - ) - or - exists(string name | - ParsePositions::isParsedKeywordArgumentPosition(s, name) and - result.isKeyword(name) - ) - or - s = "self" and - result.isSelf() - or - s = "lambda-self" and - result.isLambdaSelf() - or - s = "block" and - result.isBlock() - or - s = "any" and - result.isAny() - or - s = "any-named" and - result.isAnyNamed() -} diff --git a/ruby/ql/lib/codeql/ruby/frameworks/ActionController.qll b/ruby/ql/lib/codeql/ruby/frameworks/ActionController.qll index a687837f8fd..06e1400d799 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/ActionController.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/ActionController.qll @@ -541,7 +541,7 @@ private module ParamsSummaries { result = paramsInstance().getAMethodCall(methodReturnsTaintFromSelf()).asExpr().getExpr() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self]" and output = "ReturnValue" and preservesValue = false @@ -564,7 +564,7 @@ private module ParamsSummaries { [result.getReceiver(), result.getArgument(0)] } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = ["Argument[self]", "Argument[0]"] and output = "ReturnValue" and preservesValue = false @@ -588,7 +588,7 @@ private module ParamsSummaries { [result.getReceiver(), result.getArgument(0)] } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = ["Argument[self]", "Argument[0]"] and output = ["ReturnValue", "Argument[self]"] and preservesValue = false diff --git a/ruby/ql/lib/codeql/ruby/frameworks/ActiveSupport.qll b/ruby/ql/lib/codeql/ruby/frameworks/ActiveSupport.qll index 441c75a81f4..880690a1a4b 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/ActiveSupport.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/ActiveSupport.qll @@ -61,7 +61,7 @@ module ActiveSupport { ] } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self]" and output = "ReturnValue" and preservesValue = false } } @@ -75,7 +75,7 @@ module ActiveSupport { private class IdentitySummary extends SimpleSummarizedCallable { IdentitySummary() { this = ["presence", "deep_dup"] } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self]" and output = "ReturnValue" and preservesValue = true @@ -109,7 +109,7 @@ module ActiveSupport { private class ToJsonSummary extends SimpleSummarizedCallable { ToJsonSummary() { this = "to_json" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = ["Argument[self]", "Argument[self].Element[any]"] and output = "ReturnValue" and preservesValue = false @@ -124,7 +124,7 @@ module ActiveSupport { private class WithIndifferentAccessSummary extends SimpleSummarizedCallable { WithIndifferentAccessSummary() { this = "with_indifferent_access" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = "ReturnValue.Element[any]" and preservesValue = true @@ -137,7 +137,7 @@ module ActiveSupport { private class ReverseMergeSummary extends SimpleSummarizedCallable { ReverseMergeSummary() { this = ["reverse_merge", "with_defaults"] } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self,0].WithElement[any]" and output = "ReturnValue" and preservesValue = true @@ -150,7 +150,7 @@ module ActiveSupport { private class ReverseMergeBangSummary extends SimpleSummarizedCallable { ReverseMergeBangSummary() { this = ["reverse_merge!", "with_defaults!", "reverse_update"] } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self,0].WithElement[any]" and output = ["ReturnValue", "Argument[self]"] and preservesValue = true @@ -166,7 +166,7 @@ module ActiveSupport { ] } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = "ReturnValue.Element[?]" and preservesValue = true @@ -209,7 +209,7 @@ module ActiveSupport { final override MethodCall getACall() { result = mc } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( exists(string s | s = getExtractComponent(mc, _) | input = "Argument[self].Element[" + s + "!]" and @@ -244,7 +244,7 @@ module ActiveSupport { private class CompactBlankSummary extends SimpleSummarizedCallable { CompactBlankSummary() { this = "compact_blank" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = "ReturnValue.Element[?]" and preservesValue = true @@ -254,7 +254,7 @@ module ActiveSupport { private class ExcludingSummary extends SimpleSummarizedCallable { ExcludingSummary() { this = ["excluding", "without"] } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = "ReturnValue.Element[?]" and preservesValue = true @@ -264,7 +264,7 @@ module ActiveSupport { private class InOrderOfSummary extends SimpleSummarizedCallable { InOrderOfSummary() { this = "in_order_of" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = "ReturnValue.Element[?]" and preservesValue = true @@ -277,7 +277,7 @@ module ActiveSupport { private class IncludingSummary extends SimpleSummarizedCallable { IncludingSummary() { this = "including" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( exists(ArrayIndex i | input = "Argument[self].Element[" + i + "]" and @@ -299,7 +299,7 @@ module ActiveSupport { private class IndexBySummary extends SimpleSummarizedCallable { IndexBySummary() { this = "index_by" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = ["Argument[block].Parameter[0]", "ReturnValue.Element[?]"] and preservesValue = true @@ -309,7 +309,7 @@ module ActiveSupport { private class IndexWithSummary extends SimpleSummarizedCallable { IndexWithSummary() { this = "index_with" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = "Argument[block].Parameter[0]" and preservesValue = true @@ -338,7 +338,7 @@ module ActiveSupport { override MethodCall getACall() { result = mc } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[0].Element[" + key + "]" and output = "ReturnValue" and preservesValue = true @@ -369,7 +369,7 @@ module ActiveSupport { override MethodCall getACall() { result = mc } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { exists(string s, int i | s = getKeyArgument(mc, i) and input = "Argument[self].Element[0].Element[" + s + "]" and @@ -392,7 +392,7 @@ module ActiveSupport { override MethodCall getACall() { result = mc } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any].Element[" + key + "]" and output = "ReturnValue.Element[any]" and preservesValue = true @@ -423,7 +423,7 @@ module ActiveSupport { override MethodCall getACall() { result = mc } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { exists(string s, int i | s = getKeyArgument(mc, i) and input = "Argument[self].Element[any].Element[" + s + "]" and @@ -436,7 +436,7 @@ module ActiveSupport { private class SoleSummary extends SimpleSummarizedCallable { SoleSummary() { this = "sole" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[0]" and output = "ReturnValue" and preservesValue = true @@ -470,7 +470,7 @@ module ActiveSupport { private class JsonEscapeSummary extends SimpleSummarizedCallable { JsonEscapeSummary() { this = "json_escape" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[0]" and output = "ReturnValue" and preservesValue = false diff --git a/ruby/ql/lib/codeql/ruby/frameworks/Arel.qll b/ruby/ql/lib/codeql/ruby/frameworks/Arel.qll index f57fa41c740..92fcb9ac5b4 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/Arel.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/Arel.qll @@ -25,7 +25,7 @@ module Arel { result = API::getTopLevelMember("Arel").getAMethodCall("sql").asExpr().getExpr() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[0]" and output = "ReturnValue" and preservesValue = false } } diff --git a/ruby/ql/lib/codeql/ruby/frameworks/Core.qll b/ruby/ql/lib/codeql/ruby/frameworks/Core.qll index 9835894b82b..7711b1f774f 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/Core.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/Core.qll @@ -63,7 +63,7 @@ private class SplatSummary extends SummarizedCallable { override SplatExpr getACallSimple() { any() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( // *1 = [1] input = "Argument[self].WithoutElement[any]" and @@ -82,7 +82,7 @@ private class HashSplatSummary extends SummarizedCallable { override HashSplatExpr getACallSimple() { any() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].WithElement[any]" and output = "ReturnValue" and preservesValue = true diff --git a/ruby/ql/lib/codeql/ruby/frameworks/Erb.qll b/ruby/ql/lib/codeql/ruby/frameworks/Erb.qll index 2d080091b2b..d29eda88e40 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/Erb.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/Erb.qll @@ -18,7 +18,7 @@ module Erb { override MethodCall getACall() { result = any(ErbTemplateNewCall c).asExpr().getExpr() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[0]" and output = "ReturnValue" and preservesValue = false } } diff --git a/ruby/ql/lib/codeql/ruby/frameworks/Files.qll b/ruby/ql/lib/codeql/ruby/frameworks/Files.qll index a23bf3f2ed3..b908e3da8f9 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/Files.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/Files.qll @@ -115,7 +115,7 @@ module File { result = API::getTopLevelMember("File").getAMethodCall(methodName).asExpr().getExpr() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[0]" and output = "ReturnValue" and preservesValue = false @@ -133,7 +133,7 @@ module File { result = API::getTopLevelMember("File").getAMethodCall("join").asExpr().getExpr() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[0,1..]" and output = "ReturnValue" and preservesValue = false diff --git a/ruby/ql/lib/codeql/ruby/frameworks/Ldap.qll b/ruby/ql/lib/codeql/ruby/frameworks/Ldap.qll index 71186c717fd..f3f12bbd55b 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/Ldap.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/Ldap.qll @@ -19,7 +19,7 @@ module NetLdap { override MethodCall getACall() { result = any(NetLdapConnection l).asExpr().getExpr() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[0]" and output = "ReturnValue" and preservesValue = false } } @@ -32,7 +32,7 @@ module NetLdap { override MethodCall getACall() { result = any(NetLdapFilter l).asExpr().getExpr() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = ["Argument[0]", "Argument[1]"] and output = "ReturnValue" and preservesValue = false } } diff --git a/ruby/ql/lib/codeql/ruby/frameworks/Mysql2.qll b/ruby/ql/lib/codeql/ruby/frameworks/Mysql2.qll index 1b7c1cde61e..efd295c44e7 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/Mysql2.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/Mysql2.qll @@ -18,7 +18,7 @@ module Mysql2 { override MethodCall getACall() { result = any(Mysql2Connection c).asExpr().getExpr() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[0]" and output = "ReturnValue" and preservesValue = false } } @@ -66,7 +66,7 @@ module Mysql2 { override MethodCall getACall() { result = any(Mysql2EscapeSanitization c).asExpr().getExpr() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[0]" and output = "ReturnValue" and preservesValue = false } } diff --git a/ruby/ql/lib/codeql/ruby/frameworks/Pg.qll b/ruby/ql/lib/codeql/ruby/frameworks/Pg.qll index e0f60730721..2c3007cd20b 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/Pg.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/Pg.qll @@ -18,7 +18,7 @@ module Pg { override MethodCall getACall() { result = any(PgConnection c).asExpr().getExpr() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[0]" and output = "ReturnValue" and preservesValue = false } } diff --git a/ruby/ql/lib/codeql/ruby/frameworks/Rails.qll b/ruby/ql/lib/codeql/ruby/frameworks/Rails.qll index 42d038a303d..a4de348af32 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/Rails.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/Rails.qll @@ -314,7 +314,7 @@ private predicate isPotentialRenderCall(MethodCall renderCall, Location loc, Erb // TODO: initialization hooks, e.g. before_configuration, after_initialize... // TODO: initializers /** A synthetic global to represent the value passed to the `locals` argument of a render call for a specific ERB file. */ -private class LocalAssignsHashSyntheticGlobal extends SummaryComponent::SyntheticGlobal { +private class LocalAssignsHashSyntheticGlobal extends string { private ErbFile erbFile; private string id; // Note that we can't use an actual `Rails::RenderCall` here due to problems with non-monotonic recursion @@ -346,7 +346,7 @@ private class RenderLocalsSummary extends SummarizedCallable { override Rails::RenderCall getACall() { result = glob.getARenderCall() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[locals:]" and output = "SyntheticGlobal[" + glob + "]" and preservesValue = true @@ -364,7 +364,7 @@ private class AccessLocalsSummary extends SummarizedCallable { result.getMethodName() = "local_assigns" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "SyntheticGlobal[" + glob + "]" and output = "ReturnValue" and preservesValue = true @@ -394,7 +394,7 @@ private class AccessLocalsKeySummary extends SummarizedCallable { result.getReceiver() instanceof SelfVariableReadAccess } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "SyntheticGlobal[" + glob + "].Element[:" + methodName + "]" and output = "ReturnValue" and preservesValue = true diff --git a/ruby/ql/lib/codeql/ruby/frameworks/Sequel.qll b/ruby/ql/lib/codeql/ruby/frameworks/Sequel.qll index b9488a92016..65d091e8229 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/Sequel.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/Sequel.qll @@ -19,7 +19,7 @@ module Sequel { override MethodCall getACall() { result = any(SequelConnection c).asExpr().getExpr() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[0]" and output = "ReturnValue" and preservesValue = false } } diff --git a/ruby/ql/lib/codeql/ruby/frameworks/Sinatra.qll b/ruby/ql/lib/codeql/ruby/frameworks/Sinatra.qll index 01795386a30..8c7162eeec8 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/Sinatra.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/Sinatra.qll @@ -133,7 +133,7 @@ module Sinatra { /** * A synthetic global representing the hash of local variables passed to an ERB template. */ - class ErbLocalsHashSyntheticGlobal extends SummaryComponent::SyntheticGlobal { + class ErbLocalsHashSyntheticGlobal extends string { private string id; private MethodCall erbCall; private ErbFile erbFile; @@ -172,7 +172,7 @@ module Sinatra { override MethodCall getACall() { result = any(ErbCall c).asExpr().getExpr() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[locals:]" and output = "SyntheticGlobal[" + any(ErbLocalsHashSyntheticGlobal global) + "]" and preservesValue = true @@ -207,7 +207,7 @@ module Sinatra { result.getReceiver() instanceof SelfVariableReadAccess } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "SyntheticGlobal[" + global + "].Element[:" + local + "]" and output = "ReturnValue" and preservesValue = true diff --git a/ruby/ql/lib/codeql/ruby/frameworks/Sqlite3.qll b/ruby/ql/lib/codeql/ruby/frameworks/Sqlite3.qll index 981ace2e7da..abb7eec297d 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/Sqlite3.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/Sqlite3.qll @@ -94,7 +94,7 @@ module Sqlite3 { override MethodCall getACall() { result = any(SQLite3QuoteSanitization c).asExpr().getExpr() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[0]" and output = "ReturnValue" and preservesValue = false } } diff --git a/ruby/ql/lib/codeql/ruby/frameworks/core/Array.qll b/ruby/ql/lib/codeql/ruby/frameworks/core/Array.qll index 301b9ba6bf0..b2a30beafc3 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/core/Array.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/core/Array.qll @@ -46,7 +46,7 @@ module Array { override MethodCall getACallSimple() { result = getAStaticArrayCall("[]") } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { // we make use of the special `splat` argument kind, which contains all positional // arguments wrapped in an implicit array, as well as explicit splat arguments input = "Argument[splat]" and @@ -60,7 +60,7 @@ module Array { override MethodCall getACallSimple() { result = getAStaticArrayCall("new") } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( input = "Argument[1]" and output = "ReturnValue.Element[?]" @@ -80,7 +80,7 @@ module Array { override MethodCall getACallSimple() { result = getAStaticArrayCall("try_convert") } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[0].WithElement[any]" and output = "ReturnValue" and preservesValue = true @@ -92,7 +92,7 @@ module Array { override BitwiseAndExpr getACallSimple() { any() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = ["Argument[self].Element[any]", "Argument[0].Element[any]"] and output = "ReturnValue.Element[?]" and preservesValue = true @@ -104,7 +104,7 @@ module Array { override BitwiseOrExpr getACallSimple() { any() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = ["Argument[self].Element[any]", "Argument[0].Element[any]"] and output = "ReturnValue.Element[?]" and preservesValue = true @@ -116,7 +116,7 @@ module Array { override MulExpr getACallSimple() { any() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = "ReturnValue.Element[?]" and preservesValue = true @@ -128,7 +128,7 @@ module Array { override AddExpr getACallSimple() { any() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( input = "Argument[self].WithElement[any]" and output = "ReturnValue" @@ -144,7 +144,7 @@ module Array { bindingset[this] DifferenceSummaryShared() { any() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = "ReturnValue.Element[?]" and preservesValue = true @@ -163,7 +163,7 @@ module Array { override LShiftExpr getACallSimple() { any() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( input = "Argument[self].WithElement[any]" and output = "ReturnValue" @@ -203,7 +203,7 @@ module Array { if methodName = "slice" then index.isInt(_) else any() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[" + index.serialize() + "]" and output = "ReturnValue" and preservesValue = true @@ -240,7 +240,7 @@ module Array { isUnknownElementIndex(mc.getArgument(0)) } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = "ReturnValue" and preservesValue = true @@ -265,7 +265,7 @@ module Array { this = methodName + "(" + start + ".." + end + ")" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { preservesValue = true and ( input = "Argument[self].WithElement[?]" and @@ -298,7 +298,7 @@ module Array { ) } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[0..]" and output = "ReturnValue.Element[?]" and preservesValue = true @@ -325,7 +325,7 @@ module Array { this = "[" + index.serialize() + "]=" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[1]" and output = "Argument[self].Element[" + index.serialize() + "]" and preservesValue = true @@ -344,7 +344,7 @@ module Array { this = "[]=" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[1]" and output = "Argument[self].Element[?]" and preservesValue = true @@ -363,7 +363,7 @@ module Array { ) } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { // We model this imprecisely, saying that there's flow from any element of // the argument or the receiver to any element of the receiver. This could // be made more precise when the range is known, similar to the way it's @@ -384,7 +384,7 @@ module Array { private class AssocSummary extends SimpleSummarizedCallable { AssocSummary() { this = ["assoc", "rassoc"] } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any].WithElement[any]" and output = "ReturnValue" and preservesValue = true @@ -409,7 +409,7 @@ module Array { index = DataFlow::Content::getKnownElementIndex(mc.getArgument(0)) } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[" + index.serialize() + "]" and output = "ReturnValue" and preservesValue = true @@ -423,7 +423,7 @@ module Array { isUnknownElementIndex(mc.getArgument(0)) } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = "ReturnValue" and preservesValue = true @@ -433,7 +433,7 @@ module Array { private class BSearchSummary extends SimpleSummarizedCallable { BSearchSummary() { this = "bsearch" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = ["Argument[block].Parameter[0]", "ReturnValue"] and preservesValue = true @@ -443,7 +443,7 @@ module Array { private class BSearchIndexSummary extends SimpleSummarizedCallable { BSearchIndexSummary() { this = "bsearch_index" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = "Argument[block].Parameter[0]" and preservesValue = true @@ -453,7 +453,7 @@ module Array { private class ClearSummary extends SimpleSummarizedCallable { ClearSummary() { this = "clear" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].WithoutElement[any]" and output = "Argument[self]" and preservesValue = true @@ -464,7 +464,7 @@ module Array { // `map!` is an alias of `collect!`. CollectBangSummary() { this = ["collect!", "map!"] } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = "Argument[block].Parameter[0]" and preservesValue = true @@ -478,7 +478,7 @@ module Array { private class CombinationSummary extends SimpleSummarizedCallable { CombinationSummary() { this = "combination" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( input = "Argument[self].Element[any]" and output = "Argument[block].Parameter[0].Element[?]" @@ -492,7 +492,7 @@ module Array { private class CompactBangSummary extends SimpleSummarizedCallable { CompactBangSummary() { this = "compact!" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[0..]" and output = ["ReturnValue.Element[?]", "Argument[self].Element[?]"] and preservesValue = true @@ -502,7 +502,7 @@ module Array { private class ConcatSummary extends SimpleSummarizedCallable { ConcatSummary() { this = "concat" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[0..].Element[any]" and output = "Argument[self].Element[?]" and preservesValue = true @@ -512,7 +512,7 @@ module Array { private class DeconstructSummary extends SimpleSummarizedCallable { DeconstructSummary() { this = "deconstruct" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { // The documentation of `deconstruct` is blank, but the implementation // shows that it just returns the receiver, unchanged: // https://github.com/ruby/ruby/blob/71bc99900914ef3bc3800a22d9221f5acf528082/array.c#L7810-L7814. @@ -530,7 +530,7 @@ module Array { final override MethodCall getACallSimple() { result = mc } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( input = "Argument[self].WithoutElement[any]" and output = "Argument[self]" @@ -553,8 +553,8 @@ module Array { mc.getArgument(0).getConstantValue() = index } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { - super.propagatesFlowExt(input, output, preservesValue) + override predicate propagatesFlow(string input, string output, boolean preservesValue) { + super.propagatesFlow(input, output, preservesValue) or ( ( @@ -586,8 +586,8 @@ module Array { not exists(DataFlow::Content::getKnownElementIndex(mc.getArgument(0))) } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { - super.propagatesFlowExt(input, output, preservesValue) + override predicate propagatesFlow(string input, string output, boolean preservesValue) { + super.propagatesFlow(input, output, preservesValue) or ( // array indices may get shifted @@ -610,7 +610,7 @@ module Array { bindingset[this] DeleteAtSummary() { mc.getMethodName() = "delete_at" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].WithoutElement[any]" and output = "Argument[self]" and preservesValue = true @@ -628,8 +628,8 @@ module Array { i >= 0 } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { - super.propagatesFlowExt(input, output, preservesValue) + override predicate propagatesFlow(string input, string output, boolean preservesValue) { + super.propagatesFlow(input, output, preservesValue) or ( input = "Argument[self].Element[?]" and @@ -658,8 +658,8 @@ module Array { not mc.getArgument(0).getConstantValue().isInt(_) } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { - super.propagatesFlowExt(input, output, preservesValue) + override predicate propagatesFlow(string input, string output, boolean preservesValue) { + super.propagatesFlow(input, output, preservesValue) or input = "Argument[self].Element[any]" and output = ["ReturnValue", "Argument[self].Element[?]"] and @@ -675,7 +675,7 @@ module Array { final override MethodCall getACallSimple() { result = mc } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( input = "Argument[self].Element[any]" and output = "Argument[block].Parameter[" + lastBlockParam + "]" @@ -743,7 +743,7 @@ module Array { override MethodCall getACallSimple() { result = dig } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self]" + buildDigInputSpec(dig) and output = "ReturnValue" and preservesValue = true @@ -764,7 +764,7 @@ module Array { final override MethodCall getACallSimple() { result = mc } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( input = "Argument[self].Element[any]" and output = "Argument[block].Parameter[" + lastBlockParam + "]" @@ -779,7 +779,7 @@ module Array { private class EachIndexSummary extends SimpleSummarizedCallable { EachIndexSummary() { this = ["each_index", "each_key"] } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].WithElement[any]" and output = "ReturnValue" and preservesValue = true @@ -804,7 +804,7 @@ module Array { not index.isInt(any(int i | i < 0)) } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( input = "Argument[self].Element[" + index.serialize() + "]" and output = "ReturnValue" @@ -827,7 +827,7 @@ module Array { ) } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( input = ["Argument[self].Element[any]", "Argument[1]"] and output = "ReturnValue" @@ -847,7 +847,7 @@ module Array { override MethodCall getACallSimple() { result = mc } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = ["Argument[0]", "Argument[block].ReturnValue"] and output = "Argument[self].Element[?]" and preservesValue = true @@ -860,8 +860,8 @@ module Array { if exists(mc.getBlock()) then mc.getNumberOfArguments() = 0 else mc.getNumberOfArguments() = 1 } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { - super.propagatesFlowExt(input, output, preservesValue) + override predicate propagatesFlow(string input, string output, boolean preservesValue) { + super.propagatesFlow(input, output, preservesValue) or input = "Argument[self].WithoutElement[any]" and output = "Argument[self]" and @@ -885,7 +885,7 @@ module Array { private class FlattenSummary extends SimpleSummarizedCallable { FlattenSummary() { this = "flatten" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( input = [ @@ -901,7 +901,7 @@ module Array { private class FlattenBangSummary extends SimpleSummarizedCallable { FlattenBangSummary() { this = "flatten!" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( input = [ @@ -920,7 +920,7 @@ module Array { private class IndexSummary extends SimpleSummarizedCallable { IndexSummary() { this = ["index", "rindex"] } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = "Argument[block].Parameter[0]" and preservesValue = true @@ -944,7 +944,7 @@ module Array { mc.getArgument(0).getConstantValue().isInt(i) } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { exists(int numValues, string r | numValues = mc.getNumberOfArguments() - 1 and r = ["ReturnValue", "Argument[self]"] and @@ -984,7 +984,7 @@ module Array { not mc.getArgument(0).getConstantValue().isInt(_) } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( input = "Argument[self].Element[any]" or @@ -1000,7 +1000,7 @@ module Array { IntersectionSummary() { this = "intersection" and mc.getMethodName() = this } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( input = "Argument[self].Element[any]" or @@ -1023,7 +1023,7 @@ module Array { final override MethodCall getACallSimple() { result = mc } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( input = "Argument[self].WithoutElement[any]" and output = "Argument[self]" @@ -1054,7 +1054,7 @@ module Array { private class LastNoArgSummary extends LastSummary { LastNoArgSummary() { this = "last(no_arg)" and mc.getNumberOfArguments() = 0 } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = "ReturnValue" and preservesValue = true @@ -1064,7 +1064,7 @@ module Array { private class LastArgSummary extends LastSummary { LastArgSummary() { this = "last(arg)" and mc.getNumberOfArguments() > 0 } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = "ReturnValue.Element[?]" and preservesValue = true @@ -1074,7 +1074,7 @@ module Array { private class PackSummary extends SimpleSummarizedCallable { PackSummary() { this = "pack" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = "ReturnValue" and preservesValue = false @@ -1084,7 +1084,7 @@ module Array { private class PermutationSummary extends SimpleSummarizedCallable { PermutationSummary() { this = ["permutation", "repeated_combination", "repeated_permutation"] } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( input = "Argument[self].Element[any]" and output = "Argument[block].Parameter[0].Element[?]" @@ -1111,7 +1111,7 @@ module Array { // We don't track the length of the array, so we can't model that this // clears the last element of the receiver, and we can't be precise about // which particular element flows to the return value. - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = "ReturnValue" and preservesValue = true @@ -1124,7 +1124,7 @@ module Array { // We don't track the length of the array, so we can't model that this // clears elements from the end of the receiver, and we can't be precise // about which particular elements flow to the return value. - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = "ReturnValue.Element[?]" and preservesValue = true @@ -1147,7 +1147,7 @@ module Array { not result.getReceiver().(SelfVariableAccess).getCfgScope() instanceof ModuleBase } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { exists(int num | num = mc.getNumberOfArguments() and preservesValue = true | exists(ArrayIndex i | input = "Argument[self].Element[" + i + "!]" and @@ -1172,7 +1172,7 @@ module Array { private class ProductSummary extends SimpleSummarizedCallable { ProductSummary() { this = "product" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( input = "Argument[self].Element[any]" or @@ -1188,7 +1188,7 @@ module Array { private class JoinSummary extends SimpleSummarizedCallable { JoinSummary() { this = ["join"] } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = "ReturnValue" and preservesValue = false @@ -1199,7 +1199,7 @@ module Array { // `append` is an alias for `push` PushSummary() { this = ["push", "append"] } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( input = "Argument[self].WithElement[any]" and output = "ReturnValue" @@ -1221,7 +1221,7 @@ module Array { final override MethodCall getACallSimple() { result = mc } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( // array indices may get shifted input = "Argument[self].Element[0..!]" and @@ -1240,7 +1240,7 @@ module Array { private class ReplaceSummary extends SimpleSummarizedCallable { ReplaceSummary() { this = "replace" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[0].WithElement[any]" and output = ["ReturnValue", "Argument[self]"] and preservesValue = true @@ -1254,7 +1254,7 @@ module Array { private class ReverseSummary extends SimpleSummarizedCallable { ReverseSummary() { this = "reverse" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = "ReturnValue.Element[?]" and preservesValue = true @@ -1264,7 +1264,7 @@ module Array { private class ReverseBangSummary extends SimpleSummarizedCallable { ReverseBangSummary() { this = "reverse!" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = ["Argument[self]", "ReturnValue"] + ".Element[?]" and preservesValue = true @@ -1290,7 +1290,7 @@ module Array { not exists(mc.getArgument(0)) and c = 1 and this = "rotate" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { preservesValue = true and ( input = "Argument[self].Element[?]" and @@ -1315,7 +1315,7 @@ module Array { not DataFlow::Content::getKnownElementIndex(mc.getArgument(0)).isInt(_) } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = "ReturnValue.Element[?]" and preservesValue = true @@ -1330,7 +1330,7 @@ module Array { override MethodCall getACallSimple() { result = mc } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].WithoutElement[any]" and output = "Argument[self]" and preservesValue = true @@ -1347,8 +1347,8 @@ module Array { not exists(mc.getArgument(0)) and c = 1 and this = "rotate!" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { - super.propagatesFlowExt(input, output, preservesValue) + override predicate propagatesFlow(string input, string output, boolean preservesValue) { + super.propagatesFlow(input, output, preservesValue) or exists(string r | r = ["Argument[self]", "ReturnValue"] and preservesValue = true | input = "Argument[self].Element[?]" and @@ -1373,8 +1373,8 @@ module Array { not mc.getArgument(0).getConstantValue().isInt(_) } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { - super.propagatesFlowExt(input, output, preservesValue) + override predicate propagatesFlow(string input, string output, boolean preservesValue) { + super.propagatesFlow(input, output, preservesValue) or input = "Argument[self].Element[any]" and output = ["Argument[self].Element[?]", "ReturnValue.Element[?]"] and @@ -1395,7 +1395,7 @@ module Array { final override MethodCall getACallSimple() { result = mc } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( input = "Argument[self].Element[any]" and output = "Argument[block].Parameter[" + lastBlockParam + "]" @@ -1422,7 +1422,7 @@ module Array { override MethodCall getACallSimple() { result = mc } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].WithoutElement[any]" and output = "Argument[self]" and preservesValue = true @@ -1432,8 +1432,8 @@ module Array { private class ShiftNoArgSummary extends ShiftSummary { ShiftNoArgSummary() { this = "shift" and not exists(mc.getArgument(0)) } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { - super.propagatesFlowExt(input, output, preservesValue) + override predicate propagatesFlow(string input, string output, boolean preservesValue) { + super.propagatesFlow(input, output, preservesValue) or preservesValue = true and ( @@ -1467,8 +1467,8 @@ module Array { this = "shift(" + n + ")" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { - super.propagatesFlowExt(input, output, preservesValue) + override predicate propagatesFlow(string input, string output, boolean preservesValue) { + super.propagatesFlow(input, output, preservesValue) or preservesValue = true and ( @@ -1495,7 +1495,7 @@ module Array { not mc.getArgument(0).getConstantValue().isInt(_) } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = ["Argument[self].Element[?]", "ReturnValue.Element[?]"] and preservesValue = true @@ -1505,7 +1505,7 @@ module Array { private class ShuffleSummary extends SimpleSummarizedCallable { ShuffleSummary() { this = "shuffle" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = "ReturnValue.Element[?]" and preservesValue = true @@ -1515,7 +1515,7 @@ module Array { private class ShuffleBangSummary extends SimpleSummarizedCallable { ShuffleBangSummary() { this = "shuffle!" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = ["ReturnValue.Element[?]", "Argument[self].Element[?]"] and preservesValue = true @@ -1528,7 +1528,7 @@ module Array { bindingset[this] SliceBangSummary() { mc.getMethodName() = "slice!" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].WithoutElement[any]" and output = "Argument[self]" and preservesValue = true @@ -1547,8 +1547,8 @@ module Array { n = DataFlow::Content::getKnownElementIndex(mc.getArgument(0)).getInt() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { - super.propagatesFlowExt(input, output, preservesValue) + override predicate propagatesFlow(string input, string output, boolean preservesValue) { + super.propagatesFlow(input, output, preservesValue) or preservesValue = true and ( @@ -1581,8 +1581,8 @@ module Array { isUnknownElementIndex(mc.getArgument(0)) } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { - super.propagatesFlowExt(input, output, preservesValue) + override predicate propagatesFlow(string input, string output, boolean preservesValue) { + super.propagatesFlow(input, output, preservesValue) or input = "Argument[self].Element[any]" and output = @@ -1625,8 +1625,8 @@ module Array { ) } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { - super.propagatesFlowExt(input, output, preservesValue) + override predicate propagatesFlow(string input, string output, boolean preservesValue) { + super.propagatesFlow(input, output, preservesValue) or preservesValue = true and ( @@ -1675,8 +1675,8 @@ module Array { ) } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { - super.propagatesFlowExt(input, output, preservesValue) + override predicate propagatesFlow(string input, string output, boolean preservesValue) { + super.propagatesFlow(input, output, preservesValue) or input = "Argument[self].Element[any]" and output = ["Argument[self].Element[?]", "ReturnValue.Element[?]"] and @@ -1687,7 +1687,7 @@ module Array { private class SortBangSummary extends SimpleSummarizedCallable { SortBangSummary() { this = "sort!" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = [ @@ -1705,7 +1705,7 @@ module Array { private class SortByBangSummary extends SimpleSummarizedCallable { SortByBangSummary() { this = "sort_by!" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = ["Argument[block].Parameter[0]", "Argument[self].Element[?]", "ReturnValue.Element[?]"] and @@ -1720,7 +1720,7 @@ module Array { private class TransposeSummary extends SimpleSummarizedCallable { TransposeSummary() { this = "transpose" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { preservesValue = true and ( input = "Argument[self].Element[?].Element[?]" and @@ -1745,7 +1745,7 @@ module Array { private class UniqBangSummary extends SimpleSummarizedCallable { UniqBangSummary() { this = "uniq!" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = ["Argument[self].Element[?]", "ReturnValue.Element[?]", "Argument[block].Parameter[0]"] and @@ -1760,7 +1760,7 @@ module Array { private class UnionSummary extends SimpleSummarizedCallable { UnionSummary() { this = "union" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( input = "Argument[self].Element[any]" or @@ -1798,9 +1798,7 @@ module Array { ) + ")" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { - super.propagatesFlowExt(input, output, preservesValue) - or + override predicate propagatesFlow(string input, string output, boolean preservesValue) { exists(string s, int i | s = getValuesAtComponent(mc, i) and input = "Argument[self].Element[" + s + "]" and @@ -1816,9 +1814,7 @@ module Array { this = "values_at(?)" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { - super.propagatesFlowExt(input, output, preservesValue) - or + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = "ReturnValue.Element[?]" and preservesValue = true @@ -1836,7 +1832,7 @@ module Enumerable { private class ChunkSummary extends SimpleSummarizedCallable { ChunkSummary() { this = "chunk" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = "Argument[block].Parameter[0]" and preservesValue = true @@ -1846,7 +1842,7 @@ module Enumerable { private class ChunkWhileSummary extends SimpleSummarizedCallable { ChunkWhileSummary() { this = "chunk_while" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = ["Argument[block].Parameter[0]", "Argument[block].Parameter[1]"] and preservesValue = true @@ -1857,7 +1853,7 @@ module Enumerable { // `map` is an alias of `collect`. CollectSummary() { this = ["collect", "map"] } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = "Argument[block].Parameter[0]" and preservesValue = true @@ -1872,7 +1868,7 @@ module Enumerable { // `flat_map` is an alias of `collect_concat`. CollectConcatSummary() { this = ["collect_concat", "flat_map"] } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = "Argument[block].Parameter[0]" and preservesValue = true @@ -1886,7 +1882,7 @@ module Enumerable { private class CompactSummary extends SimpleSummarizedCallable { CompactSummary() { this = "compact" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[0..]" and output = "ReturnValue.Element[?]" and preservesValue = true @@ -1903,7 +1899,7 @@ module Enumerable { private class CountSummary extends SimpleSummarizedCallable { CountSummary() { this = "count" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = "Argument[block].Parameter[0]" and preservesValue = true @@ -1913,7 +1909,7 @@ module Enumerable { private class CycleSummary extends SimpleSummarizedCallable { CycleSummary() { this = "cycle" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = "Argument[block].Parameter[0]" and preservesValue = true @@ -1924,7 +1920,7 @@ module Enumerable { // `find` is an alias of `detect`. DetectSummary() { this = ["detect", "find"] } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( input = "Argument[self].Element[any]" and output = ["Argument[block].Parameter[0]", "ReturnValue"] @@ -1953,14 +1949,15 @@ module Enumerable { mc.getArgument(0).getConstantValue().isInt(i) } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( input = "Argument[self].Element[?]" and output = "ReturnValue.Element[?]" or - exists(ArrayIndex j | + exists(ArrayIndex j, ArrayIndex h | + h = j - i and input = "Argument[self].Element[" + j + "!]" and - output = "ReturnValue.Element[" + (j - i) + "]" + output = "ReturnValue.Element[" + h + "]" ) ) and preservesValue = true @@ -1973,7 +1970,7 @@ module Enumerable { not mc.getArgument(0).getConstantValue().isInt(_) } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = "ReturnValue.Element[?]" and preservesValue = true @@ -1983,7 +1980,7 @@ module Enumerable { private class DropWhileSummary extends SimpleSummarizedCallable { DropWhileSummary() { this = "drop_while" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = ["ReturnValue.Element[?]", "Argument[block].Parameter[0]"] and preservesValue = true @@ -1993,7 +1990,7 @@ module Enumerable { private class EachConsSummary extends SimpleSummarizedCallable { EachConsSummary() { this = "each_cons" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = "Argument[block].Parameter[0].Element[?]" and preservesValue = true @@ -2003,7 +2000,7 @@ module Enumerable { private class EachEntrySummary extends SimpleSummarizedCallable { EachEntrySummary() { this = "each_entry" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( input = "Argument[self].Element[any]" and output = "Argument[block].Parameter[0]" @@ -2018,7 +2015,7 @@ module Enumerable { private class EachSliceSummary extends SimpleSummarizedCallable { EachSliceSummary() { this = "each_slice" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( input = "Argument[self].Element[any]" and output = "Argument[block].Parameter[0].Element[?]" @@ -2033,7 +2030,7 @@ module Enumerable { private class EachWithIndexSummary extends SimpleSummarizedCallable { EachWithIndexSummary() { this = "each_with_index" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( input = "Argument[self].Element[any]" and output = "Argument[block].Parameter[0]" @@ -2048,7 +2045,7 @@ module Enumerable { private class EachWithObjectSummary extends SimpleSummarizedCallable { EachWithObjectSummary() { this = "each_with_object" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( input = "Argument[self].Element[any]" and output = "Argument[block].Parameter[0]" @@ -2063,7 +2060,7 @@ module Enumerable { private class FilterMapSummary extends SimpleSummarizedCallable { FilterMapSummary() { this = "filter_map" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = "Argument[block].Parameter[0]" and preservesValue = true @@ -2077,7 +2074,7 @@ module Enumerable { private class FindIndexSummary extends SimpleSummarizedCallable { FindIndexSummary() { this = "find_index" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = "Argument[block].Parameter[0]" and preservesValue = true @@ -2096,7 +2093,7 @@ module Enumerable { private class FirstNoArgSummary extends FirstSummary { FirstNoArgSummary() { this = "first(no_arg)" and mc.getNumberOfArguments() = 0 } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[0]" and output = "ReturnValue" and preservesValue = true @@ -2110,7 +2107,7 @@ module Enumerable { this = "first(" + n + ")" and mc.getArgument(0).getConstantValue().isInt(n) } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( exists(ArrayIndex i | i < n and @@ -2132,7 +2129,7 @@ module Enumerable { not mc.getArgument(0).getConstantValue().isInt(_) } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].WithElement[any]" and output = "ReturnValue" and preservesValue = true @@ -2156,7 +2153,7 @@ module Enumerable { private class GrepBlockSummary extends GrepSummary { GrepBlockSummary() { this = methodName + "(block)" and exists(mc.getBlock()) } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( input = "Argument[self].Element[any]" and output = "Argument[block].Parameter[0]" @@ -2171,7 +2168,7 @@ module Enumerable { private class GrepNoBlockSummary extends GrepSummary { GrepNoBlockSummary() { this = methodName + "(no_block)" and not exists(mc.getBlock()) } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = "ReturnValue.Element[?]" and preservesValue = true @@ -2181,7 +2178,7 @@ module Enumerable { private class GroupBySummary extends SimpleSummarizedCallable { GroupBySummary() { this = "group_by" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { // TODO: Add flow to return value once we have flow through hashes input = "Argument[self].Element[any]" and output = "Argument[block].Parameter[0]" and @@ -2207,7 +2204,7 @@ module Enumerable { private class InjectNoArgSummary extends InjectSummary { InjectNoArgSummary() { this = methodName + "_no_arg" and mc.getNumberOfArguments() = 0 } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { // The no-argument variant of inject passes element 0 to the first block // parameter (first iteration only). All other elements are passed to the // second block parameter. @@ -2227,7 +2224,7 @@ module Enumerable { private class InjectArgSummary extends InjectSummary { InjectArgSummary() { this = methodName + "_arg" and mc.getNumberOfArguments() > 0 } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( // The first argument of the call is passed to the first block parameter. input = "Argument[0]" and @@ -2263,7 +2260,7 @@ module Enumerable { mc.getNumberOfArguments() = 0 } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = ["Argument[block].Parameter[0]", "ReturnValue"] and preservesValue = true @@ -2276,7 +2273,7 @@ module Enumerable { mc.getNumberOfArguments() > 0 } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = ["Argument[block].Parameter[0]", "ReturnValue.Element[?]"] and preservesValue = true @@ -2304,7 +2301,7 @@ module Enumerable { not exists(mc.getBlock()) } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = "ReturnValue" and preservesValue = true @@ -2318,7 +2315,7 @@ module Enumerable { not exists(mc.getBlock()) } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = "ReturnValue.Element[?]" and preservesValue = true @@ -2332,7 +2329,7 @@ module Enumerable { exists(mc.getBlock()) } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = ["Argument[block].Parameter[0]", "Argument[block].Parameter[1]", "ReturnValue"] and preservesValue = true @@ -2346,7 +2343,7 @@ module Enumerable { exists(mc.getBlock()) } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = ["Argument[block].Parameter[0]", "Argument[block].Parameter[1]", "ReturnValue.Element[?]"] and @@ -2369,7 +2366,7 @@ module Enumerable { not exists(mc.getBlock()) } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = "ReturnValue.Element[?]" and preservesValue = true @@ -2382,7 +2379,7 @@ module Enumerable { exists(mc.getBlock()) } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = ["Argument[block].Parameter[0]", "Argument[block].Parameter[1]", "ReturnValue.Element[?]"] and @@ -2393,7 +2390,7 @@ module Enumerable { private class MinmaxBySummary extends SimpleSummarizedCallable { MinmaxBySummary() { this = "minmax_by" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = ["Argument[block].Parameter[0]", "ReturnValue.Element[?]"] and preservesValue = true @@ -2403,7 +2400,7 @@ module Enumerable { private class PartitionSummary extends SimpleSummarizedCallable { PartitionSummary() { this = "partition" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = ["Argument[block].Parameter[0]", "ReturnValue.Element[?].Element[?]"] and preservesValue = true @@ -2423,7 +2420,7 @@ module Enumerable { final override MethodCall getACallSimple() { result = mc } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = "Argument[block].Parameter[" + lastBlockParam + "]" and preservesValue = true @@ -2438,7 +2435,7 @@ module Enumerable { final override MethodCall getACallSimple() { result = mc } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( // array indices may get shifted input = "Argument[self].Element[0..!]" and @@ -2467,7 +2464,7 @@ module Enumerable { final override MethodCall getACallSimple() { result = mc } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( // array indices may get shifted input = "Argument[self].Element[0..!]" and @@ -2486,7 +2483,7 @@ module Enumerable { private class SliceBeforeAfterSummary extends SimpleSummarizedCallable { SliceBeforeAfterSummary() { this = ["slice_before", "slice_after"] } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = "Argument[block].Parameter[0]" and preservesValue = true @@ -2496,7 +2493,7 @@ module Enumerable { private class SliceWhenSummary extends SimpleSummarizedCallable { SliceWhenSummary() { this = "slice_when" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = ["Argument[block].Parameter[0]", "Argument[block].Parameter[1]"] and preservesValue = true @@ -2506,7 +2503,7 @@ module Enumerable { private class SortSummary extends SimpleSummarizedCallable { SortSummary() { this = "sort" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = ["Argument[block].Parameter[0]", "Argument[block].Parameter[1]", "ReturnValue.Element[?]"] and @@ -2517,7 +2514,7 @@ module Enumerable { private class SortBySummary extends SimpleSummarizedCallable { SortBySummary() { this = "sort_by" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = ["Argument[block].Parameter[0]", "ReturnValue.Element[?]"] and preservesValue = true @@ -2527,7 +2524,7 @@ module Enumerable { private class SumSummary extends SimpleSummarizedCallable { SumSummary() { this = "sum" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = "Argument[block].Parameter[0]" and preservesValue = true @@ -2551,7 +2548,7 @@ module Enumerable { mc.getArgument(0).getConstantValue().isInt(i) } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( input = "Argument[self].WithElement[?]" and output = "ReturnValue" @@ -2571,7 +2568,7 @@ module Enumerable { not mc.getArgument(0).getConstantValue().isInt(_) } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { // When the index is unknown, we can't know the size of the result, but we // know that indices are preserved, so, as an approximation, we just treat // it like the array is copied. @@ -2584,7 +2581,7 @@ module Enumerable { private class TakeWhileSummary extends SimpleSummarizedCallable { TakeWhileSummary() { this = "take_while" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = "Argument[block].Parameter[0]" and preservesValue = true @@ -2603,7 +2600,7 @@ module Enumerable { // `to_ary` works a bit like `to_a` (close enough for our purposes). ToASummary() { this = ["to_a", "entries", "to_ary"] } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].WithElement[0..]" and output = "ReturnValue" and preservesValue = true @@ -2613,7 +2610,7 @@ module Enumerable { private class UniqSummary extends SimpleSummarizedCallable { UniqSummary() { this = "uniq" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = ["ReturnValue.Element[?]", "Argument[block].Parameter[0]"] and preservesValue = true @@ -2632,7 +2629,7 @@ module Enumerable { private class ZipBlockSummary extends ZipSummary { ZipBlockSummary() { this = "zip(block)" and exists(mc.getBlock()) } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( input = "Argument[self].Element[any]" and output = "Argument[block].Parameter[0].Element[0]" @@ -2649,7 +2646,7 @@ module Enumerable { private class ZipNoBlockSummary extends ZipSummary { ZipNoBlockSummary() { this = "zip(no_block)" and not exists(mc.getBlock()) } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( // receiver[i] -> return_value[i][0] exists(ArrayIndex i | diff --git a/ruby/ql/lib/codeql/ruby/frameworks/core/Base64.qll b/ruby/ql/lib/codeql/ruby/frameworks/core/Base64.qll index de5bc984597..fbf7470847e 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/core/Base64.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/core/Base64.qll @@ -17,7 +17,7 @@ private class Base64Decode extends SummarizedCallable { .getExpr() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[0]" and output = "ReturnValue" and preservesValue = false diff --git a/ruby/ql/lib/codeql/ruby/frameworks/core/Hash.qll b/ruby/ql/lib/codeql/ruby/frameworks/core/Hash.qll index 8811d21c918..4871d8d9924 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/core/Hash.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/core/Hash.qll @@ -31,7 +31,7 @@ module Hash { final override MethodCall getACallSimple() { result = getAStaticHashCall("[]") } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { // we make use of the special `hash-splat` argument kind, which contains all keyword // arguments wrapped in an implicit hash, as well as explicit hash splat arguments input = "Argument[hash-splat]" and @@ -62,7 +62,7 @@ module Hash { result.getNumberOfArguments() = 1 } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( // Hash[{symbol: x}] input = "Argument[0].WithElement[any]" and @@ -102,7 +102,7 @@ module Hash { exists(result.getArgument(i)) } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { // Hash[:symbol, x] input = "Argument[" + i + "]" and output = "ReturnValue.Element[" + key.serialize() + "]" and @@ -115,7 +115,7 @@ module Hash { override MethodCall getACallSimple() { result = getAStaticHashCall("try_convert") } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[0].WithElement[any]" and output = "ReturnValue" and preservesValue = true @@ -130,7 +130,7 @@ module Hash { final override MethodCall getACallSimple() { result = mc } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[1]" and output = "ReturnValue" and preservesValue = true @@ -145,8 +145,8 @@ module Hash { this = "store(" + key.serialize() + ")" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { - super.propagatesFlowExt(input, output, preservesValue) + override predicate propagatesFlow(string input, string output, boolean preservesValue) { + super.propagatesFlow(input, output, preservesValue) or input = "Argument[1]" and output = "Argument[self].Element[" + key.serialize() + "]" and @@ -164,8 +164,8 @@ module Hash { this = "store" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { - super.propagatesFlowExt(input, output, preservesValue) + override predicate propagatesFlow(string input, string output, boolean preservesValue) { + super.propagatesFlow(input, output, preservesValue) or input = "Argument[1]" and output = "Argument[self].Element[?]" and @@ -192,7 +192,7 @@ module Hash { key = DataFlow::Content::getKnownElementIndex(mc.getArgument(0)) } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[" + key.serialize() + "]" and output = "ReturnValue.Element[1]" and preservesValue = true @@ -208,7 +208,7 @@ module Hash { not exists(DataFlow::Content::getKnownElementIndex(result.getArgument(0))) } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any].WithoutElement[any]" and output = "ReturnValue.Element[1]" and preservesValue = true @@ -218,7 +218,7 @@ module Hash { private class EachPairSummary extends SimpleSummarizedCallable { EachPairSummary() { this = "each_pair" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( input = "Argument[self].Element[any]" and output = "Argument[block].Parameter[1]" @@ -233,7 +233,7 @@ module Hash { private class EachValueSummary extends SimpleSummarizedCallable { EachValueSummary() { this = "each_value" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( input = "Argument[self].Element[any]" and output = "Argument[block].Parameter[0]" @@ -264,7 +264,7 @@ module Hash { final override MethodCall getACallSimple() { result = mc } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self]" + concat(int i, string s | @@ -290,7 +290,7 @@ abstract private class FetchValuesSummary extends SummarizedCallable { final override MethodCall getACallSimple() { result = mc } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( input = "Argument[self].WithElement[?]" and output = "ReturnValue" @@ -314,8 +314,8 @@ private class FetchValuesKnownSummary extends FetchValuesSummary { this = "fetch_values(" + key.serialize() + ")" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { - super.propagatesFlowExt(input, output, preservesValue) + override predicate propagatesFlow(string input, string output, boolean preservesValue) { + super.propagatesFlow(input, output, preservesValue) or input = "Argument[self].Element[" + key.serialize() + "]" and output = "ReturnValue.Element[?]" and @@ -329,8 +329,8 @@ private class FetchValuesUnknownSummary extends FetchValuesSummary { this = "fetch_values(?)" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { - super.propagatesFlowExt(input, output, preservesValue) + override predicate propagatesFlow(string input, string output, boolean preservesValue) { + super.propagatesFlow(input, output, preservesValue) or input = "Argument[self].Element[any]" and output = "ReturnValue.Element[?]" and @@ -345,7 +345,7 @@ private class MergeSummary extends SimpleSummarizedCallable { this = ["merge", "deep_merge"] } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( input = "Argument[self,any].WithElement[any]" and output = "ReturnValue" @@ -364,7 +364,7 @@ private class MergeBangSummary extends SimpleSummarizedCallable { this = ["merge!", "deep_merge!", "update"] } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( input = "Argument[self,any].WithElement[any]" and output = ["ReturnValue", "Argument[self]"] @@ -379,7 +379,7 @@ private class MergeBangSummary extends SimpleSummarizedCallable { private class RassocSummary extends SimpleSummarizedCallable { RassocSummary() { this = "rassoc" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any].WithoutElement[any]" and output = "ReturnValue.Element[1]" and preservesValue = true @@ -404,7 +404,7 @@ private class SliceKnownSummary extends SliceSummary { not key.isInt(_) // covered in `Array.qll` } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].WithElement[" + key.serialize() + "]" and output = "ReturnValue" and preservesValue = true @@ -417,7 +417,7 @@ private class SliceUnknownSummary extends SliceSummary { this = "slice(?)" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].WithoutElement[0..!].WithElement[any]" and output = "ReturnValue" and preservesValue = true @@ -427,7 +427,7 @@ private class SliceUnknownSummary extends SliceSummary { private class ToASummary extends SimpleSummarizedCallable { ToASummary() { this = "to_a" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].WithoutElement[0..!].Element[any]" and output = "ReturnValue.Element[?].Element[1]" and preservesValue = true @@ -437,7 +437,7 @@ private class ToASummary extends SimpleSummarizedCallable { private class ToHWithoutBlockSummary extends SimpleSummarizedCallable { ToHWithoutBlockSummary() { this = ["to_h", "to_hash"] and not exists(mc.getBlock()) } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].WithElement[any]" and output = "ReturnValue" and preservesValue = true @@ -447,7 +447,7 @@ private class ToHWithoutBlockSummary extends SimpleSummarizedCallable { private class ToHWithBlockSummary extends SimpleSummarizedCallable { ToHWithBlockSummary() { this = "to_h" and exists(mc.getBlock()) } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( input = "Argument[self].Element[any]" and output = "Argument[block].Parameter[1]" @@ -462,7 +462,7 @@ private class ToHWithBlockSummary extends SimpleSummarizedCallable { private class TransformKeysSummary extends SimpleSummarizedCallable { TransformKeysSummary() { this = "transform_keys" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = "ReturnValue.Element[?]" and preservesValue = true @@ -472,7 +472,7 @@ private class TransformKeysSummary extends SimpleSummarizedCallable { private class TransformKeysBangSummary extends SimpleSummarizedCallable { TransformKeysBangSummary() { this = "transform_keys!" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( input = "Argument[self].Element[any]" and output = "Argument[self].Element[?]" @@ -484,7 +484,7 @@ private class TransformKeysBangSummary extends SimpleSummarizedCallable { private class TransformValuesSummary extends SimpleSummarizedCallable { TransformValuesSummary() { this = "transform_values" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( input = "Argument[self].Element[any]" and output = "Argument[block].Parameter[0]" @@ -499,7 +499,7 @@ private class TransformValuesSummary extends SimpleSummarizedCallable { private class TransformValuesBangSummary extends SimpleSummarizedCallable { TransformValuesBangSummary() { this = "transform_values!" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( input = "Argument[self].Element[any]" and output = "Argument[block].Parameter[0]" @@ -517,7 +517,7 @@ private class TransformValuesBangSummary extends SimpleSummarizedCallable { private class ValuesSummary extends SimpleSummarizedCallable { ValuesSummary() { this = "values" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self].Element[any]" and output = "ReturnValue.Element[?]" and preservesValue = true diff --git a/ruby/ql/lib/codeql/ruby/frameworks/core/Kernel.qll b/ruby/ql/lib/codeql/ruby/frameworks/core/Kernel.qll index ad87ee37ecd..a17bbf91237 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/core/Kernel.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/core/Kernel.qll @@ -177,7 +177,7 @@ module Kernel { private class TapSummary extends SimpleSummarizedCallable { TapSummary() { this = "tap" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self]" and output = ["ReturnValue", "Argument[block].Parameter[0]"] and preservesValue = true @@ -219,7 +219,7 @@ module Kernel { ) } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { ( // already an array input = "Argument[0].WithElement[0..]" and diff --git a/ruby/ql/lib/codeql/ruby/frameworks/core/Object.qll b/ruby/ql/lib/codeql/ruby/frameworks/core/Object.qll index 355a65d0c72..5fbb1b6eff7 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/core/Object.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/core/Object.qll @@ -36,7 +36,7 @@ module Object { private class DupSummary extends SimpleSummarizedCallable { DupSummary() { this = "dup" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self]" and output = "ReturnValue" and preservesValue = true diff --git a/ruby/ql/lib/codeql/ruby/frameworks/core/String.qll b/ruby/ql/lib/codeql/ruby/frameworks/core/String.qll index 8e88eb033e8..86246ba80a2 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/core/String.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/core/String.qll @@ -127,7 +127,7 @@ module String { result = API::getTopLevelMember("String").getAnInstantiation().getExprNode().getExpr() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[0]" and output = "ReturnValue" and preservesValue = true @@ -142,7 +142,7 @@ module String { API::getTopLevelMember("String").getAMethodCall("try_convert").getExprNode().getExpr() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[0]" and output = "ReturnValue" and preservesValue = false @@ -155,7 +155,7 @@ module String { private class FormatSummary extends SimpleSummarizedCallable { FormatSummary() { this = "%" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = ["Argument[self]", "Argument[0]", "Argument[0].Element[any]"] and output = "ReturnValue" and preservesValue = false @@ -169,7 +169,7 @@ module String { private class BSummary extends SimpleSummarizedCallable { BSummary() { this = "b" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { taintIdentityFlow(input, output, preservesValue) } } @@ -180,7 +180,7 @@ module String { private class BytesliceSummary extends SimpleSummarizedCallable { BytesliceSummary() { this = "byteslice" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { taintIdentityFlow(input, output, preservesValue) } } @@ -191,7 +191,7 @@ module String { private class CapitalizeSummary extends SimpleSummarizedCallable { CapitalizeSummary() { this = ["capitalize", "capitalize!"] } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self]" and preservesValue = false and output = "ReturnValue" @@ -204,7 +204,7 @@ module String { private class CenterSummary extends SimpleSummarizedCallable { CenterSummary() { this = ["center", "ljust", "rjust"] } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { taintIdentityFlow(input, output, preservesValue) or input = "Argument[1]" and @@ -219,7 +219,7 @@ module String { private class ChompSummary extends SimpleSummarizedCallable { ChompSummary() { this = ["chomp", "chomp!", "chop", "chop!"] } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { taintIdentityFlow(input, output, preservesValue) or this = ["chomp!", "chop!"] and @@ -236,6 +236,10 @@ module String { */ private class ClearSummary extends SimpleSummarizedCallable { ClearSummary() { none() } + + override predicate propagatesFlow(string input, string output, boolean preservesValue) { + none() + } } /** @@ -249,7 +253,7 @@ module String { none() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self,0..]" and output = ["ReturnValue", "Argument[self]"] and preservesValue = false @@ -262,7 +266,7 @@ module String { private class DeleteSummary extends SimpleSummarizedCallable { DeleteSummary() { this = ["delete", "delete_prefix", "delete_suffix"] + ["", "!"] } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { taintIdentityFlow(input, output, preservesValue) } } @@ -273,7 +277,7 @@ module String { private class DowncaseSummary extends SimpleSummarizedCallable { DowncaseSummary() { this = ["downcase", "upcase", "swapcase"] + ["", "!"] } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { taintIdentityFlow(input, output, preservesValue) } } @@ -284,7 +288,7 @@ module String { private class DumpSummary extends SimpleSummarizedCallable { DumpSummary() { this = ["dump", "undump"] } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { taintIdentityFlow(input, output, preservesValue) } } @@ -308,7 +312,7 @@ module String { private class EachLineBlockSummary extends EachLineSummary { EachLineBlockSummary() { this = "each_line_with_block" and exists(mc.getBlock()) } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { preservesValue = false and input = "Argument[self]" and output = ["Argument[block].Parameter[0]", "ReturnValue"] @@ -321,7 +325,7 @@ module String { private class EachLineNoBlockSummary extends EachLineSummary { EachLineNoBlockSummary() { this = "each_line_without_block" and not exists(mc.getBlock()) } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { preservesValue = false and input = "Argument[self]" and output = "ReturnValue.Element[?]" @@ -334,7 +338,7 @@ module String { private class EncodeSummary extends SimpleSummarizedCallable { EncodeSummary() { this = ["encode", "unicode_normalize"] + ["", "!"] } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { taintIdentityFlow(input, output, preservesValue) } } @@ -345,7 +349,7 @@ module String { private class ForceEncodingSummary extends SimpleSummarizedCallable { ForceEncodingSummary() { this = "force_encoding" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { taintIdentityFlow(input, output, preservesValue) } } @@ -356,7 +360,7 @@ module String { private class FreezeSummary extends SimpleSummarizedCallable { FreezeSummary() { this = "freeze" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { taintIdentityFlow(input, output, preservesValue) } } @@ -370,7 +374,7 @@ module String { // str.gsub(pattern, replacement) -> new_str // str.gsub(pattern) {|match| block } -> new_str // str.gsub(pattern) -> enumerator of matches - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { // receiver -> return value // replacement -> return value // block return -> return value @@ -390,7 +394,7 @@ module String { none() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { taintIdentityFlow(input, output, preservesValue) or input = "Argument[1]" and output = "ReturnValue" and preservesValue = false @@ -403,7 +407,7 @@ module String { private class InspectSummary extends SimpleSummarizedCallable { InspectSummary() { this = "inspect" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { taintIdentityFlow(input, output, preservesValue) } } @@ -414,7 +418,7 @@ module String { private class StripSummary extends SimpleSummarizedCallable { StripSummary() { this = ["strip", "lstrip", "rstrip"] + ["", "!"] } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { taintIdentityFlow(input, output, preservesValue) } } @@ -425,7 +429,7 @@ module String { private class NextSummary extends SimpleSummarizedCallable { NextSummary() { this = ["next", "succ"] + ["", "!"] } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { taintIdentityFlow(input, output, preservesValue) } } @@ -436,7 +440,7 @@ module String { private class PartitionSummary extends SimpleSummarizedCallable { PartitionSummary() { this = ["partition", "rpartition"] } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self]" and output = "ReturnValue.Element[0,1,2]" and preservesValue = false @@ -449,7 +453,7 @@ module String { private class ReplaceSummary extends SimpleSummarizedCallable { ReplaceSummary() { this = "replace" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[0]" and output = ["ReturnValue", "Argument[self]"] and preservesValue = false @@ -463,7 +467,7 @@ module String { private class ReverseSummary extends SimpleSummarizedCallable { ReverseSummary() { this = ["reverse", "reverse!"] } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { taintIdentityFlow(input, output, preservesValue) } } @@ -483,7 +487,7 @@ module String { private class ScanBlockSummary extends ScanSummary { ScanBlockSummary() { this = "scan_with_block" and exists(mc.getBlock()) } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self]" and preservesValue = false and output = @@ -500,7 +504,7 @@ module String { private class ScanNoBlockSummary extends ScanSummary { ScanNoBlockSummary() { this = "scan_no_block" and not exists(mc.getBlock()) } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { // scan(pattern) -> array input = "Argument[self]" and output = "ReturnValue.Element[?]" and @@ -523,7 +527,7 @@ module String { private class ScrubBlockSummary extends ScrubSummary { ScrubBlockSummary() { this = "scrub_block" and exists(mc.getBlock()) } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { taintIdentityFlow(input, output, preservesValue) or preservesValue = false and @@ -542,7 +546,7 @@ module String { private class ScrubNoBlockSummary extends ScrubSummary { ScrubNoBlockSummary() { this = "scrub_no_block" and not exists(mc.getBlock()) } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { taintIdentityFlow(input, output, preservesValue) or preservesValue = false and @@ -557,7 +561,7 @@ module String { private class ShellescapeSummary extends SimpleSummarizedCallable { ShellescapeSummary() { this = "shellescape" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { taintIdentityFlow(input, output, preservesValue) } } @@ -568,7 +572,7 @@ module String { private class ShellSplitSummary extends SimpleSummarizedCallable { ShellSplitSummary() { this = "shellsplit" } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self]" and output = "ReturnValue.Element[?]" and preservesValue = false @@ -581,7 +585,7 @@ module String { private class SliceSummary extends SimpleSummarizedCallable { SliceSummary() { this = ["slice", "slice!", "split", "[]"] } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { taintIdentityFlow(input, output, preservesValue) } } @@ -592,7 +596,7 @@ module String { private class SqueezeSummary extends SimpleSummarizedCallable { SqueezeSummary() { this = ["squeeze", "squeeze!"] } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { taintIdentityFlow(input, output, preservesValue) } } @@ -603,7 +607,7 @@ module String { private class ToStrSummary extends SimpleSummarizedCallable { ToStrSummary() { this = ["to_str", "to_s"] } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { taintIdentityFlow(input, output, preservesValue) } } @@ -614,7 +618,7 @@ module String { private class TrSummary extends SimpleSummarizedCallable { TrSummary() { this = ["tr", "tr_s"] + ["", "!"] } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { taintIdentityFlow(input, output, preservesValue) or input = "Argument[1]" and output = "ReturnValue" and preservesValue = false @@ -646,7 +650,7 @@ module String { } // TODO: if second arg ('exclusive') is true, the first arg is excluded - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { taintIdentityFlow(input, output, preservesValue) or input = ["Argument[self]", "Argument[0]"] and @@ -668,7 +672,7 @@ module String { mc.getArgument(1).getConstantValue().isBoolean(true) } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[self]" and output = "Argument[block].Parameter[0]" and preservesValue = false diff --git a/ruby/ql/lib/codeql/ruby/frameworks/data/ModelsAsData.qll b/ruby/ql/lib/codeql/ruby/frameworks/data/ModelsAsData.qll index 21bc5f69dcb..5e43dc5249a 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/data/ModelsAsData.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/data/ModelsAsData.qll @@ -48,7 +48,7 @@ private class SummarizedCallableFromModel extends SummarizedCallable { ) } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { exists(string kind | ModelOutput::relevantSummaryModel(type, path, input, output, kind) | kind = "value" and preservesValue = true diff --git a/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModels.qll b/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModels.qll index 1cb4e189339..dd433152751 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModels.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModels.qll @@ -70,8 +70,8 @@ private module API = Specific::API; private module DataFlow = Specific::DataFlow; -private import Specific::AccessPathSyntax private import ApiGraphModelsExtensions as Extensions +private import codeql.dataflow.internal.AccessPathSyntax /** Module containing hooks for providing input data to be interpreted as a model. */ module ModelInput { @@ -327,29 +327,29 @@ predicate isRelevantFullPath(string type, string path) { } /** A string from a CSV row that should be parsed as an access path. */ -private class AccessPathRange extends AccessPath::Range { - AccessPathRange() { - isRelevantFullPath(_, this) - or - exists(string type | isRelevantType(type) | - summaryModel(type, _, this, _, _) or - summaryModel(type, _, _, this, _) - ) - or - typeVariableModel(_, this) - } +private predicate accessPathRange(string s) { + isRelevantFullPath(_, s) + or + exists(string type | isRelevantType(type) | + summaryModel(type, _, s, _, _) or + summaryModel(type, _, _, s, _) + ) + or + typeVariableModel(_, s) } +import AccessPath + /** * Gets a successor of `node` in the API graph. */ bindingset[token] -API::Node getSuccessorFromNode(API::Node node, AccessPathToken token) { +API::Node getSuccessorFromNode(API::Node node, AccessPathTokenBase token) { // API graphs use the same label for arguments and parameters. An edge originating from a // use-node represents an argument, and an edge originating from a def-node represents a parameter. // We just map both to the same thing. token.getName() = ["Argument", "Parameter"] and - result = node.getParameter(AccessPath::parseIntUnbounded(token.getAnArgument())) + result = node.getParameter(parseIntUnbounded(token.getAnArgument())) or token.getName() = "ReturnValue" and result = node.getReturn() @@ -362,11 +362,9 @@ API::Node getSuccessorFromNode(API::Node node, AccessPathToken token) { * Gets an API-graph successor for the given invocation. */ bindingset[token] -API::Node getSuccessorFromInvoke(Specific::InvokeNode invoke, AccessPathToken token) { +API::Node getSuccessorFromInvoke(Specific::InvokeNode invoke, AccessPathTokenBase token) { token.getName() = "Argument" and - result = - invoke - .getParameter(AccessPath::parseIntWithArity(token.getAnArgument(), invoke.getNumArgument())) + result = invoke.getParameter(parseIntWithArity(token.getAnArgument(), invoke.getNumArgument())) or token.getName() = "ReturnValue" and result = invoke.getReturn() @@ -378,10 +376,12 @@ API::Node getSuccessorFromInvoke(Specific::InvokeNode invoke, AccessPathToken to /** * Holds if `invoke` invokes a call-site filter given by `token`. */ -pragma[inline] -private predicate invocationMatchesCallSiteFilter(Specific::InvokeNode invoke, AccessPathToken token) { +bindingset[token] +private predicate invocationMatchesCallSiteFilter( + Specific::InvokeNode invoke, AccessPathTokenBase token +) { token.getName() = "WithArity" and - invoke.getNumArgument() = AccessPath::parseIntUnbounded(token.getAnArgument()) + invoke.getNumArgument() = parseIntUnbounded(token.getAnArgument()) or Specific::invocationMatchesExtraCallSiteFilter(invoke, token) } diff --git a/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModelsSpecific.qll b/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModelsSpecific.qll index eec603ad78b..e4359f6d4ca 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModelsSpecific.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModelsSpecific.qll @@ -4,14 +4,13 @@ * It must export the following members: * ```ql * class Unit // a unit type - * module AccessPathSyntax // a re-export of the AccessPathSyntax module * class InvokeNode // a type representing an invocation connected to the API graph * module API // the API graph module * predicate isPackageUsed(string package) * API::Node getExtraNodeFromPath(string package, string type, string path, int n) - * API::Node getExtraSuccessorFromNode(API::Node node, AccessPathToken token) - * API::Node getExtraSuccessorFromInvoke(InvokeNode node, AccessPathToken token) - * predicate invocationMatchesExtraCallSiteFilter(InvokeNode invoke, AccessPathToken token) + * API::Node getExtraSuccessorFromNode(API::Node node, AccessPathTokenBase token) + * API::Node getExtraSuccessorFromInvoke(InvokeNode node, AccessPathTokenBase token) + * predicate invocationMatchesExtraCallSiteFilter(InvokeNode invoke, AccessPathTokenBase token) * InvokeNode getAnInvocationOf(API::Node node) * predicate isExtraValidTokenNameInIdentifyingAccessPath(string name) * predicate isExtraValidNoArgumentTokenInIdentifyingAccessPath(string name) @@ -21,13 +20,12 @@ private import codeql.ruby.AST private import ApiGraphModels +private import codeql.ruby.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl +private import codeql.dataflow.internal.AccessPathSyntax // Re-export libraries needed by ApiGraphModels.qll import codeql.ruby.ApiGraphs -import codeql.ruby.dataflow.internal.AccessPathSyntax as AccessPathSyntax import codeql.ruby.DataFlow::DataFlow as DataFlow -private import AccessPathSyntax -private import codeql.ruby.dataflow.internal.FlowSummaryImplSpecific as FlowSummaryImplSpecific -private import codeql.ruby.dataflow.internal.FlowSummaryImpl::Public +private import FlowSummaryImpl::Public private import codeql.ruby.dataflow.internal.DataFlowDispatch as DataFlowDispatch pragma[nomagic] @@ -140,7 +138,7 @@ private predicate methodMatchedByName(AccessPath path, string methodName) { * Gets a Ruby-specific API graph successor of `node` reachable by resolving `token`. */ bindingset[token] -API::Node getExtraSuccessorFromNode(API::Node node, AccessPathToken token) { +API::Node getExtraSuccessorFromNode(API::Node node, AccessPathTokenBase token) { token.getName() = "Member" and result = node.getMember(token.getAnArgument()) or @@ -152,13 +150,13 @@ API::Node getExtraSuccessorFromNode(API::Node node, AccessPathToken token) { or token.getName() = "Parameter" and exists(DataFlowDispatch::ArgumentPosition argPos, DataFlowDispatch::ParameterPosition paramPos | - argPos = FlowSummaryImplSpecific::parseParamBody(token.getAnArgument()) and + token.getAnArgument() = FlowSummaryImpl::Input::encodeArgumentPosition(argPos) and DataFlowDispatch::parameterMatch(paramPos, argPos) and result = node.getParameterAtPosition(paramPos) ) or exists(DataFlow::ContentSet contents | - SummaryComponent::content(contents) = FlowSummaryImplSpecific::interpretComponentSpecific(token) and + token.getName() = FlowSummaryImpl::Input::encodeContent(contents, token.getAnArgument()) and result = node.getContents(contents) ) } @@ -167,10 +165,10 @@ API::Node getExtraSuccessorFromNode(API::Node node, AccessPathToken token) { * Gets a Ruby-specific API graph successor of `node` reachable by resolving `token`. */ bindingset[token] -API::Node getExtraSuccessorFromInvoke(InvokeNode node, AccessPathToken token) { +API::Node getExtraSuccessorFromInvoke(InvokeNode node, AccessPathTokenBase token) { token.getName() = "Argument" and exists(DataFlowDispatch::ArgumentPosition argPos, DataFlowDispatch::ParameterPosition paramPos | - paramPos = FlowSummaryImplSpecific::parseArgBody(token.getAnArgument()) and + token.getAnArgument() = FlowSummaryImpl::Input::encodeParameterPosition(paramPos) and DataFlowDispatch::parameterMatch(paramPos, argPos) and result = node.getArgumentAtPosition(argPos) ) @@ -199,7 +197,7 @@ API::Node getAFuzzySuccessor(API::Node node) { * Holds if `invoke` matches the Ruby-specific call site filter in `token`. */ bindingset[token] -predicate invocationMatchesExtraCallSiteFilter(InvokeNode invoke, AccessPathToken token) { +predicate invocationMatchesExtraCallSiteFilter(InvokeNode invoke, AccessPathTokenBase token) { token.getName() = "WithBlock" and exists(invoke.getBlock()) or diff --git a/ruby/ql/lib/codeql/ruby/frameworks/rack/internal/Utils.qll b/ruby/ql/lib/codeql/ruby/frameworks/rack/internal/Utils.qll index 4d0b948d650..c41c97b006f 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/rack/internal/Utils.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/rack/internal/Utils.qll @@ -22,7 +22,7 @@ module Utils { .getExpr() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[0]" and output = "ReturnValue" and preservesValue = false } } diff --git a/ruby/ql/lib/codeql/ruby/typetracking/TypeTracker.qll b/ruby/ql/lib/codeql/ruby/typetracking/TypeTracker.qll index 8d0c6d6b543..a9ae8fbf60e 100644 --- a/ruby/ql/lib/codeql/ruby/typetracking/TypeTracker.qll +++ b/ruby/ql/lib/codeql/ruby/typetracking/TypeTracker.qll @@ -5,6 +5,7 @@ */ private import TypeTrackerSpecific +private import codeql.util.Boolean cached private module Cached { diff --git a/ruby/ql/lib/codeql/ruby/typetracking/TypeTrackerSpecific.qll b/ruby/ql/lib/codeql/ruby/typetracking/TypeTrackerSpecific.qll index 39b188753d1..df92128b608 100644 --- a/ruby/ql/lib/codeql/ruby/typetracking/TypeTrackerSpecific.qll +++ b/ruby/ql/lib/codeql/ruby/typetracking/TypeTrackerSpecific.qll @@ -1,16 +1,7 @@ -private import codeql.ruby.AST as Ast -private import codeql.ruby.CFG as Cfg -private import Cfg::CfgNodes -private import codeql.ruby.dataflow.FlowSummary -private import codeql.ruby.dataflow.internal.DataFlowImplCommon as DataFlowImplCommon private import codeql.ruby.dataflow.internal.DataFlowPublic as DataFlowPublic private import codeql.ruby.dataflow.internal.DataFlowPrivate as DataFlowPrivate -private import codeql.ruby.dataflow.internal.DataFlowDispatch as DataFlowDispatch -private import codeql.ruby.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl -private import codeql.ruby.dataflow.internal.FlowSummaryImplSpecific as FlowSummaryImplSpecific -private import codeql.ruby.dataflow.internal.AccessPathSyntax private import internal.TypeTrackingImpl as TypeTrackingImpl -import codeql.util.Boolean +deprecated import codeql.util.Boolean deprecated class Node = DataFlowPublic::Node; diff --git a/ruby/ql/lib/codeql/ruby/typetracking/internal/TypeTrackingImpl.qll b/ruby/ql/lib/codeql/ruby/typetracking/internal/TypeTrackingImpl.qll index b50140a7b58..13f6c1de149 100644 --- a/ruby/ql/lib/codeql/ruby/typetracking/internal/TypeTrackingImpl.qll +++ b/ruby/ql/lib/codeql/ruby/typetracking/internal/TypeTrackingImpl.qll @@ -11,8 +11,6 @@ private import codeql.ruby.dataflow.internal.DataFlowPublic as DataFlowPublic private import codeql.ruby.dataflow.internal.DataFlowPrivate as DataFlowPrivate private import codeql.ruby.dataflow.internal.DataFlowDispatch as DataFlowDispatch private import codeql.ruby.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl -private import codeql.ruby.dataflow.internal.FlowSummaryImplSpecific as FlowSummaryImplSpecific -private import codeql.ruby.dataflow.internal.AccessPathSyntax /** Holds if there is direct flow from `param` to a return. */ pragma[nomagic] @@ -170,30 +168,30 @@ private module SummaryTypeTrackerInput implements SummaryTypeTracker::Input { } // Summaries and their stacks - class SummaryComponent = FlowSummary::SummaryComponent; + class SummaryComponent = FlowSummaryImpl::Private::SummaryComponent; - class SummaryComponentStack = FlowSummary::SummaryComponentStack; + class SummaryComponentStack = FlowSummaryImpl::Private::SummaryComponentStack; - predicate singleton = FlowSummary::SummaryComponentStack::singleton/1; + predicate singleton = FlowSummaryImpl::Private::SummaryComponentStack::singleton/1; - predicate push = FlowSummary::SummaryComponentStack::push/2; + predicate push = FlowSummaryImpl::Private::SummaryComponentStack::push/2; // Relating content to summaries - predicate content = FlowSummary::SummaryComponent::content/1; + predicate content = FlowSummaryImpl::Private::SummaryComponent::content/1; - predicate withoutContent = FlowSummary::SummaryComponent::withoutContent/1; + predicate withoutContent = FlowSummaryImpl::Private::SummaryComponent::withoutContent/1; - predicate withContent = FlowSummary::SummaryComponent::withContent/1; + predicate withContent = FlowSummaryImpl::Private::SummaryComponent::withContent/1; - predicate return = FlowSummary::SummaryComponent::return/0; + predicate return = FlowSummaryImpl::Private::SummaryComponent::return/0; // Callables - class SummarizedCallable = FlowSummary::SummarizedCallable; + class SummarizedCallable = FlowSummaryImpl::Private::SummarizedCallableImpl; // Relating nodes to summaries Node argumentOf(Node call, SummaryComponent arg, boolean isPostUpdate) { exists(DataFlowDispatch::ParameterPosition pos, DataFlowPrivate::ArgumentNode n | - arg = FlowSummary::SummaryComponent::argument(pos) and + arg = FlowSummaryImpl::Private::SummaryComponent::argument(pos) and argumentPositionMatch(call.asExpr(), n, pos) | isPostUpdate = false and result = n @@ -204,7 +202,7 @@ private module SummaryTypeTrackerInput implements SummaryTypeTracker::Input { Node parameterOf(Node callable, SummaryComponent param) { exists(DataFlowDispatch::ArgumentPosition apos, DataFlowDispatch::ParameterPosition ppos | - param = FlowSummary::SummaryComponent::parameter(apos) and + param = FlowSummaryImpl::Private::SummaryComponent::parameter(apos) and DataFlowDispatch::parameterMatch(ppos, apos) and result .(DataFlowPrivate::ParameterNodeImpl) @@ -213,13 +211,15 @@ private module SummaryTypeTrackerInput implements SummaryTypeTracker::Input { } Node returnOf(Node callable, SummaryComponent return) { - return = FlowSummary::SummaryComponent::return() and + return = FlowSummaryImpl::Private::SummaryComponent::return() and result.(DataFlowPrivate::ReturnNode).(DataFlowPrivate::NodeImpl).getCfgScope() = callable.asExpr().getExpr() } // Relating callables to nodes - Node callTo(SummarizedCallable callable) { result.asExpr().getExpr() = callable.getACallSimple() } + Node callTo(SummarizedCallable callable) { + result.asExpr().getExpr() = callable.(FlowSummary::SummarizedCallable).getACallSimple() + } } private module TypeTrackerSummaryFlow = SummaryTypeTracker::SummaryFlow; diff --git a/ruby/ql/src/utils/modeleditor/ModelEditor.qll b/ruby/ql/src/utils/modeleditor/ModelEditor.qll index 77b3ea95e17..e8d7ab0b713 100644 --- a/ruby/ql/src/utils/modeleditor/ModelEditor.qll +++ b/ruby/ql/src/utils/modeleditor/ModelEditor.qll @@ -4,7 +4,6 @@ private import ruby private import codeql.ruby.dataflow.FlowSummary private import codeql.ruby.dataflow.internal.DataFlowPrivate private import codeql.ruby.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl -private import codeql.ruby.dataflow.internal.FlowSummaryImplSpecific private import codeql.ruby.frameworks.core.Gem private import codeql.ruby.frameworks.data.ModelsAsData private import codeql.ruby.frameworks.data.internal.ApiGraphModelsExtensions diff --git a/ruby/ql/test/library-tests/dataflow/api-graphs/VerifyApiGraphExpectations.ql b/ruby/ql/test/library-tests/dataflow/api-graphs/VerifyApiGraphExpectations.ql index 93b5aaf745e..1fd815c260b 100644 --- a/ruby/ql/test/library-tests/dataflow/api-graphs/VerifyApiGraphExpectations.ql +++ b/ruby/ql/test/library-tests/dataflow/api-graphs/VerifyApiGraphExpectations.ql @@ -1,16 +1,15 @@ import ruby +import codeql.dataflow.internal.AccessPathSyntax import codeql.ruby.ast.internal.TreeSitter -import codeql.ruby.dataflow.internal.AccessPathSyntax -import codeql.ruby.frameworks.data.internal.ApiGraphModels +import codeql.ruby.frameworks.data.internal.ApiGraphModels as ApiGraphModels import codeql.ruby.ApiGraphs import TestUtilities.InlineExpectationsTest -class AccessPathFromExpectation extends AccessPath::Range { - AccessPathFromExpectation() { hasExpectationWithValue(_, this) } -} +private predicate accessPathRange(string s) { hasExpectationWithValue(_, s) } + +import AccessPath API::Node evaluatePath(AccessPath path, int n) { - path instanceof AccessPathFromExpectation and n = 1 and exists(AccessPathToken token | token = path.getToken(0) | token.getName() = "Member" and @@ -23,9 +22,9 @@ API::Node evaluatePath(AccessPath path, int n) { result = token.getAnArgument().(API::EntryPoint).getANode() ) or - result = getSuccessorFromNode(evaluatePath(path, n - 1), path.getToken(n - 1)) + result = ApiGraphModels::getSuccessorFromNode(evaluatePath(path, n - 1), path.getToken(n - 1)) or - result = getSuccessorFromInvoke(evaluatePath(path, n - 1), path.getToken(n - 1)) + result = ApiGraphModels::getSuccessorFromInvoke(evaluatePath(path, n - 1), path.getToken(n - 1)) or // TODO this is a workaround, support parsing of Method['[]'] instead path.getToken(n - 1).getName() = "MethodBracket" and diff --git a/ruby/ql/test/library-tests/dataflow/flow-summaries/semantics.ql b/ruby/ql/test/library-tests/dataflow/flow-summaries/semantics.ql index 158b544c6f7..455ed970538 100644 --- a/ruby/ql/test/library-tests/dataflow/flow-summaries/semantics.ql +++ b/ruby/ql/test/library-tests/dataflow/flow-summaries/semantics.ql @@ -16,7 +16,7 @@ abstract private class Summary extends SimpleSummarizedCallable { bindingset[this] Summary() { any() } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { this.propagates(input, output) and preservesValue = true } diff --git a/ruby/ql/test/library-tests/dataflow/summaries/Summaries.ql b/ruby/ql/test/library-tests/dataflow/summaries/Summaries.ql index 89dce373b32..c65482999e2 100644 --- a/ruby/ql/test/library-tests/dataflow/summaries/Summaries.ql +++ b/ruby/ql/test/library-tests/dataflow/summaries/Summaries.ql @@ -7,13 +7,12 @@ import codeql.ruby.ApiGraphs import codeql.ruby.dataflow.FlowSummary import codeql.ruby.TaintTracking import codeql.ruby.dataflow.internal.FlowSummaryImpl -import codeql.ruby.dataflow.internal.AccessPathSyntax import codeql.ruby.frameworks.data.ModelsAsData import TestUtilities.InlineFlowTest import PathGraph query predicate invalidSpecComponent(SummarizedCallable sc, string s, string c) { - (sc.propagatesFlowExt(s, _, _) or sc.propagatesFlowExt(_, s, _)) and + (sc.propagatesFlow(s, _, _) or sc.propagatesFlow(_, s, _)) and Private::External::invalidSpecComponent(s, c) } @@ -24,7 +23,7 @@ private class SummarizedCallableIdentity extends SummarizedCallable { override MethodCall getACall() { result.getMethodName() = this } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[0]" and output = "ReturnValue" and preservesValue = true @@ -36,7 +35,7 @@ private class SummarizedCallableApplyBlock extends SummarizedCallable { override MethodCall getACall() { result.getMethodName() = this } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[0]" and output = "Argument[block].Parameter[0]" and preservesValue = true @@ -52,7 +51,7 @@ private class SummarizedCallableApplyLambda extends SummarizedCallable { override MethodCall getACall() { result.getMethodName() = this } - override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + override predicate propagatesFlow(string input, string output, boolean preservesValue) { input = "Argument[1]" and output = "Argument[0].Parameter[0]" and preservesValue = true diff --git a/shared/dataflow/codeql/dataflow/DataFlow.qll b/shared/dataflow/codeql/dataflow/DataFlow.qll index e9d96445fa8..9f8556902eb 100644 --- a/shared/dataflow/codeql/dataflow/DataFlow.qll +++ b/shared/dataflow/codeql/dataflow/DataFlow.qll @@ -140,6 +140,9 @@ signature module InputSig { * stored into (`getAStoreContent`) or read from (`getAReadContent`). */ class ContentSet { + /** Gets a textual representation of this element. */ + string toString(); + /** Gets a content that may be stored into when storing into this set. */ Content getAStoreContent(); diff --git a/shared/dataflow/codeql/dataflow/internal/AccessPathSyntax.qll b/shared/dataflow/codeql/dataflow/internal/AccessPathSyntax.qll new file mode 100644 index 00000000000..17b979e42a6 --- /dev/null +++ b/shared/dataflow/codeql/dataflow/internal/AccessPathSyntax.qll @@ -0,0 +1,220 @@ +/** + * Module for parsing access paths from MaD models, both the identifying access path used + * by dynamic languages, and the input/output specifications for summary steps. + * + * This file is used by the shared data flow library and by the JavaScript libraries + * (which does not use the shared data flow libraries). + */ + +/** + * Convenience-predicate for extracting two capture groups at once. + */ +bindingset[input, regexp] +private predicate regexpCaptureTwo(string input, string regexp, string capture1, string capture2) { + capture1 = input.regexpCapture(regexp, 1) and + capture2 = input.regexpCapture(regexp, 2) +} + +/** + * Parses an integer constant `n` or interval `n1..n2` (inclusive) and gets the value + * of the constant or any value contained in the interval. + */ +bindingset[arg] +int parseInt(string arg) { + result = arg.toInt() + or + // Match "n1..n2" + exists(string lo, string hi | + regexpCaptureTwo(arg, "(-?\\d+)\\.\\.(-?\\d+)", lo, hi) and + result = [lo.toInt() .. hi.toInt()] + ) +} + +/** + * Parses a lower-bounded interval `n..` and gets the lower bound. + */ +bindingset[arg] +int parseLowerBound(string arg) { result = arg.regexpCapture("(-?\\d+)\\.\\.", 1).toInt() } + +/** + * An access path token such as `Argument[1]` or `ReturnValue`. + */ +class AccessPathTokenBase extends string { + bindingset[this] + AccessPathTokenBase() { exists(this) } + + bindingset[this] + private string getPart(int part) { + result = this.regexpCapture("([^\\[]+)(?:\\[([^\\]]*)\\])?", part) + } + + /** Gets the name of the token, such as `Member` from `Member[x]` */ + bindingset[this] + string getName() { result = this.getPart(1) } + + /** + * Gets the argument list, such as `1,2` from `Member[1,2]`, + * or has no result if there are no arguments. + */ + bindingset[this] + string getArgumentList() { result = this.getPart(2) } + + /** Gets the `n`th argument to this token, such as `x` or `y` from `Member[x,y]`. */ + bindingset[this] + string getArgument(int n) { result = this.getArgumentList().splitAt(",", n).trim() } + + /** Gets an argument to this token, such as `x` or `y` from `Member[x,y]`. */ + bindingset[this] + string getAnArgument() { result = this.getArgument(_) } + + /** Gets the number of arguments to this token, such as 2 for `Member[x,y]` or zero for `ReturnValue`. */ + bindingset[this] + int getNumArgument() { result = count(int n | exists(this.getArgument(n))) } +} + +final private class AccessPathTokenBaseFinal = AccessPathTokenBase; + +signature predicate accessPathRangeSig(string s); + +/** Companion module to the `AccessPath` class. */ +module AccessPath { + /** + * Parses an integer constant or interval (bounded or unbounded) that explicitly + * references the arity, such as `N-1` or `N-3..N-1`. + * + * Note that expressions of form `N-x` will never resolve to a negative index, + * even if `N` is zero (it will have no result in that case). + */ + bindingset[arg, arity] + private int parseIntWithExplicitArity(string arg, int arity) { + result >= 0 and // do not allow N-1 to resolve to a negative index + exists(string lo | + // N-x + lo = arg.regexpCapture("N-(\\d+)", 1) and + result = arity - lo.toInt() + or + // N-x.. + lo = arg.regexpCapture("N-(\\d+)\\.\\.", 1) and + result = [arity - lo.toInt(), arity - 1] + ) + or + exists(string lo, string hi | + // x..N-y + regexpCaptureTwo(arg, "(-?\\d+)\\.\\.N-(\\d+)", lo, hi) and + result = [lo.toInt() .. arity - hi.toInt()] + or + // N-x..N-y + regexpCaptureTwo(arg, "N-(\\d+)\\.\\.N-(\\d+)", lo, hi) and + result = [arity - lo.toInt() .. arity - hi.toInt()] and + result >= 0 + or + // N-x..y + regexpCaptureTwo(arg, "N-(\\d+)\\.\\.(\\d+)", lo, hi) and + result = [arity - lo.toInt() .. hi.toInt()] and + result >= 0 + ) + } + + /** + * Parses an integer constant or interval (bounded or unbounded) and gets any + * of the integers contained within (of which there may be infinitely many). + * + * Has no result for arguments involving an explicit arity, such as `N-1`. + */ + bindingset[arg, result] + int parseIntUnbounded(string arg) { + result = parseInt(arg) + or + result >= parseLowerBound(arg) + } + + /** + * Parses an integer constant or interval (bounded or unbounded) that + * may reference the arity of a call, such as `N-1` or `N-3..N-1`. + * + * Note that expressions of form `N-x` will never resolve to a negative index, + * even if `N` is zero (it will have no result in that case). + */ + bindingset[arg, arity] + int parseIntWithArity(string arg, int arity) { + result = parseInt(arg) + or + result in [parseLowerBound(arg) .. arity - 1] + or + result = parseIntWithExplicitArity(arg, arity) + } + + /** Gets the `n`th token on the access path as a string. */ + private string getRawToken(AccessPath path, int n) { + // Avoid splitting by '.' since tokens may contain dots, e.g. `Field[foo.Bar.x]`. + // Instead use regexpFind to match valid tokens, and supplement with a final length + // check (in `AccessPath.hasSyntaxError`) to ensure all characters were included in a token. + result = path.regexpFind("\\w+(?:\\[[^\\]]*\\])?(?=\\.|$)", n, _) + } + + /** + * A string that occurs as an access path (either identifying or input/output spec) + * which might be relevant for this database. + */ + final class AccessPath extends string { + AccessPath() { accessPathRange(this) } + + /** Holds if this string is not a syntactically valid access path. */ + predicate hasSyntaxError() { + // If the lengths match, all characters must haven been included in a token + // or seen by the `.` lookahead pattern. + this != "" and + not this.length() = sum(int n | | getRawToken(this, n).length() + 1) - 1 + } + + /** Gets the `n`th token on the access path (if there are no syntax errors). */ + AccessPathToken getToken(int n) { + result = getRawToken(this, n) and + not this.hasSyntaxError() + } + + /** Gets the number of tokens on the path (if there are no syntax errors). */ + int getNumToken() { + result = count(int n | exists(getRawToken(this, n))) and + not this.hasSyntaxError() + } + } + + /** + * An access path token such as `Argument[1]` or `ReturnValue`, appearing in one or more access paths. + */ + class AccessPathToken extends AccessPathTokenBaseFinal { + AccessPathToken() { this = getRawToken(_, _) } + + /** Gets the name of the token, such as `Member` from `Member[x]` */ + pragma[nomagic] + string getName() { result = super.getName() } + + /** + * Gets the argument list, such as `1,2` from `Member[1,2]`, + * or has no result if there are no arguments. + */ + pragma[nomagic] + string getArgumentList() { result = super.getArgumentList() } + + /** Gets the `n`th argument to this token, such as `x` or `y` from `Member[x,y]`. */ + pragma[nomagic] + string getArgument(int n) { result = super.getArgument(n) } + + /** Gets the `n`th argument to this `name` token, such as `x` or `y` from `Member[x,y]`. */ + pragma[nomagic] + string getArgument(string name, int n) { + name = this.getName() and result = this.getArgument(n) + } + + /** Gets an argument to this token, such as `x` or `y` from `Member[x,y]`. */ + string getAnArgument() { result = this.getArgument(_) } + + /** Gets an argument to this `name` token, such as `x` or `y` from `Member[x,y]`. */ + string getAnArgument(string name) { result = this.getArgument(name, _) } + + /** Gets the number of arguments to this token, such as 2 for `Member[x,y]` or zero for `ReturnValue`. */ + pragma[nomagic] + int getNumArgument() { result = count(int n | exists(this.getArgument(n))) } + } +} diff --git a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll index 97f7597e640..8ad413b96a7 100644 --- a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll +++ b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll @@ -2725,7 +2725,7 @@ module MakeImpl { pragma[noinline] ApHeadContent getHeadContent(Ap ap) { result = ap.getHead() } - predicate projectToHeadContent = getContentApprox/1; + predicate projectToHeadContent = getContentApproxCached/1; class ApOption = ApproxAccessPathFrontOption; diff --git a/shared/dataflow/codeql/dataflow/internal/DataFlowImplCommon.qll b/shared/dataflow/codeql/dataflow/internal/DataFlowImplCommon.qll index 3d3b629670e..ea1378ce05d 100644 --- a/shared/dataflow/codeql/dataflow/internal/DataFlowImplCommon.qll +++ b/shared/dataflow/codeql/dataflow/internal/DataFlowImplCommon.qll @@ -975,6 +975,9 @@ module MakeImplCommon { cached predicate paramMustFlow(ParamNode p, ArgNode arg) { localMustFlowStep+(p, arg) } + cached + ContentApprox getContentApproxCached(Content c) { result = getContentApprox(c) } + cached newtype TCallContext = TAnyCallContext() or @@ -1885,7 +1888,7 @@ module MakeImplCommon { Content getAHead() { exists(ContentApprox cont | this = TApproxFrontHead(cont) and - cont = getContentApprox(result) + cont = getContentApproxCached(result) ) } } diff --git a/shared/dataflow/codeql/dataflow/internal/FlowSummaryImpl.qll b/shared/dataflow/codeql/dataflow/internal/FlowSummaryImpl.qll new file mode 100644 index 00000000000..25a276d41a7 --- /dev/null +++ b/shared/dataflow/codeql/dataflow/internal/FlowSummaryImpl.qll @@ -0,0 +1,1886 @@ +/** + * Provides classes and predicates for defining flow summaries. + */ + +private import codeql.dataflow.DataFlow as DF +private import codeql.util.Location +private import DataFlowImpl +private import AccessPathSyntax as AccessPathSyntax + +/** + * Provides language-specific parameters. + */ +signature module InputSig { + /** + * A base class of callables that are candidates for flow summary modeling. + */ + bindingset[this] + class SummarizedCallableBase { + bindingset[this] + string toString(); + } + + /** Gets the parameter position representing a callback itself, if any. */ + default Lang::ArgumentPosition callbackSelfParameterPosition() { none() } + + /** Gets the return kind corresponding to specification `"ReturnValue"`. */ + Lang::ReturnKind getStandardReturnValueKind(); + + /** Gets the textual representation of parameter position `pos` used in MaD. */ + string encodeParameterPosition(Lang::ParameterPosition pos); + + /** Gets the textual representation of argument position `pos` used in MaD. */ + string encodeArgumentPosition(Lang::ArgumentPosition pos); + + /** + * Gets the textual representation of content `c` used in MaD. + * + * `arg` will be printed in square brackets (`[]`) after the result, unless + * `arg` is the empty string. + */ + default string encodeContent(Lang::ContentSet c, string arg) { none() } + + /** + * Gets the textual representation of return kind `rk` used in MaD. + * + * `arg` will be printed in square brackets (`[]`) after the result, unless + * `arg` is the empty string. + */ + default string encodeReturn(Lang::ReturnKind rk, string arg) { none() } + + /** + * Gets the textual representation of without-content `c` used in MaD. + * + * `arg` will be printed in square brackets (`[]`) after the result, unless + * `arg` is the empty string. + */ + default string encodeWithoutContent(Lang::ContentSet c, string arg) { none() } + + /** + * Gets the textual representation of with-content `c` used in MaD. + * + * `arg` will be printed in square brackets (`[]`) after the result, unless + * `arg` is the empty string. + */ + default string encodeWithContent(Lang::ContentSet c, string arg) { none() } + + /** + * Gets a parameter position corresponding to the unknown token `token`. + * + * The token is unknown because it could not be reverse-encoded using the + * `encodeParameterPosition` predicate. This is useful for example when a + * single token gives rise to multiple parameter positions, such as ranges + * `0..n`. + */ + bindingset[token] + default Lang::ParameterPosition decodeUnknownParameterPosition( + AccessPathSyntax::AccessPathTokenBase token + ) { + none() + } + + /** + * Gets an argument position corresponding to the unknown token `token`. + * + * The token is unknown because it could not be reverse-encoded using the + * `encodeArgumentPosition` predicate. This is useful for example when a + * single token gives rise to multiple argument positions, such as ranges + * `0..n`. + */ + bindingset[token] + default Lang::ArgumentPosition decodeUnknownArgumentPosition( + AccessPathSyntax::AccessPathTokenBase token + ) { + none() + } + + /** + * Gets a content corresponding to the unknown token `token`. + * + * The token is unknown because it could not be reverse-encoded using the + * `encodeContent` predicate. + */ + bindingset[token] + default Lang::ContentSet decodeUnknownContent(AccessPathSyntax::AccessPathTokenBase token) { + none() + } + + /** + * Gets a return kind corresponding to the unknown token `token`. + * + * The token is unknown because it could not be reverse-encoded using the + * `encodeReturn` predicate. + */ + bindingset[token] + default Lang::ReturnKind decodeUnknownReturn(AccessPathSyntax::AccessPathTokenBase token) { + none() + } + + /** + * Gets a without-content corresponding to the unknown token `token`. + * + * The token is unknown because it could not be reverse-encoded using the + * `encodeWithoutContent` predicate. + */ + bindingset[token] + default Lang::ContentSet decodeUnknownWithoutContent(AccessPathSyntax::AccessPathTokenBase token) { + none() + } + + /** + * Gets a with-content corresponding to the unknown token `token`. + * + * The token is unknown because it could not be reverse-encoded using the + * `encodeWithContent` predicate. + */ + bindingset[token] + default Lang::ContentSet decodeUnknownWithContent(AccessPathSyntax::AccessPathTokenBase token) { + none() + } +} + +module Make Input> { + private import DataFlowLang + private import Input + private import codeql.dataflow.internal.DataFlowImplCommon::MakeImplCommon + private import codeql.util.Unit + + final private class SummarizedCallableBaseFinal = SummarizedCallableBase; + + /** Provides classes and predicates for defining flow summaries. */ + module Public { + private import Private + + /** + * Gets the valid model origin values. + */ + private string getValidModelOrigin() { + result = + [ + "ai", // AI (machine learning) + "df", // Dataflow (model generator) + "tb", // Type based (model generator) + "hq", // Heuristic query + ] + } + + /** + * A class used to represent provenance values for MaD models. + * + * The provenance value is a string of the form `origin-verification` + * (or just `manual`), where `origin` is a value indicating the + * origin of the model, and `verification` is a value indicating, how + * the model was verified. + * + * Examples could be: + * - `df-generated`: A model produced by the model generator, but not verified by a human. + * - `ai-manual`: A model produced by AI, but verified by a human. + */ + class Provenance extends string { + private string verification; + + Provenance() { + exists(string origin | origin = getValidModelOrigin() | + this = origin + "-" + verification and + verification = ["manual", "generated"] + ) + or + this = verification and verification = "manual" + } + + /** + * Holds if this is a valid generated provenance value. + */ + predicate isGenerated() { verification = "generated" } + + /** + * Holds if this is a valid manual provenance value. + */ + predicate isManual() { verification = "manual" } + } + + /** A callable with a flow summary. */ + abstract class SummarizedCallable extends SummarizedCallableBaseFinal { + bindingset[this] + SummarizedCallable() { any() } + + /** + * Holds if data may flow from `input` to `output` through this callable. + * + * `preservesValue` indicates whether this is a value-preserving step or a taint-step. + */ + pragma[nomagic] + abstract predicate propagatesFlow(string input, string output, boolean preservesValue); + + /** + * Holds if there exists a generated summary that applies to this callable. + */ + final predicate hasGeneratedModel() { + exists(Provenance p | p.isGenerated() and this.hasProvenance(p)) + } + + /** + * Holds if all the summaries that apply to this callable are auto generated and not manually created. + * That is, only apply generated models, when there are no manual models. + */ + final predicate applyGeneratedModel() { + this.hasGeneratedModel() and + not this.hasManualModel() + } + + /** + * Holds if there exists a manual summary that applies to this callable. + */ + final predicate hasManualModel() { + exists(Provenance p | p.isManual() and this.hasProvenance(p)) + } + + /** + * Holds if there exists a manual summary that applies to this callable. + * Always apply manual models if they exist. + */ + final predicate applyManualModel() { this.hasManualModel() } + + /** + * Holds if there exists a summary that applies to this callable + * that has provenance `provenance`. + */ + predicate hasProvenance(Provenance provenance) { provenance = "manual" } + } + + final private class NeutralCallableFinal = NeutralCallable; + + /** + * A callable where there is no flow via the callable. + */ + class NeutralSummaryCallable extends NeutralCallableFinal { + NeutralSummaryCallable() { this.getKind() = "summary" } + } + + /** + * A callable that has a neutral model. + */ + abstract class NeutralCallable extends SummarizedCallableBaseFinal { + bindingset[this] + NeutralCallable() { exists(this) } + + /** + * Holds if the neutral is auto generated. + */ + final predicate hasGeneratedModel() { + any(Provenance p | this.hasProvenance(p)).isGenerated() + } + + /** + * Holds if there exists a manual neutral that applies to this callable. + */ + final predicate hasManualModel() { any(Provenance p | this.hasProvenance(p)).isManual() } + + /** + * Holds if the neutral has provenance `p`. + */ + abstract predicate hasProvenance(Provenance p); + + /** + * Gets the kind of the neutral. + */ + abstract string getKind(); + } + } + + /** + * Provides predicates for compiling flow summaries down to atomic local steps, + * read steps, and store steps. + */ + module Private { + private import Public + + /** + * A synthetic global. This represents some form of global state, which + * summaries can read and write individually. + */ + abstract class SyntheticGlobal extends string { + bindingset[this] + SyntheticGlobal() { any() } + } + + private newtype TSummaryComponent = + TContentSummaryComponent(ContentSet c) or + TParameterSummaryComponent(ArgumentPosition pos) or + TArgumentSummaryComponent(ParameterPosition pos) or + TReturnSummaryComponent(ReturnKind rk) or + TSyntheticGlobalSummaryComponent(SyntheticGlobal sg) or + TWithoutContentSummaryComponent(ContentSet c) or + TWithContentSummaryComponent(ContentSet c) + + bindingset[name, arg] + private string encodeArg(string name, string arg) { + if arg = "" then result = name else result = name + "[" + arg + "]" + } + + /** + * A component used in a flow summary. + * + * Either a parameter or an argument at a given position, a specific + * content type, or a return kind. + */ + class SummaryComponent instanceof TSummaryComponent { + /** Gets a textual representation of this component used for MaD models. */ + string getMadRepresentation() { + exists(ContentSet c, string arg | + this = TContentSummaryComponent(c) and + result = encodeArg(encodeContent(c, arg), arg) + ) + or + exists(ArgumentPosition pos | + this = TParameterSummaryComponent(pos) and + result = "Parameter[" + encodeArgumentPosition(pos) + "]" + ) + or + exists(ParameterPosition pos | + this = TArgumentSummaryComponent(pos) and + result = "Argument[" + encodeParameterPosition(pos) + "]" + ) + or + exists(string synthetic | + this = TSyntheticGlobalSummaryComponent(synthetic) and + result = "SyntheticGlobal[" + synthetic + "]" + ) + or + exists(ReturnKind rk | this = TReturnSummaryComponent(rk) | + rk = getStandardReturnValueKind() and result = "ReturnValue" + or + exists(string arg | result = encodeArg(encodeReturn(rk, arg), arg)) + ) + or + exists(ContentSet c, string arg | + this = TWithoutContentSummaryComponent(c) and + result = encodeArg(encodeWithoutContent(c, arg), arg) + ) + or + exists(ContentSet c, string arg | + this = TWithContentSummaryComponent(c) and + result = encodeArg(encodeWithContent(c, arg), arg) + ) + } + + /** Gets a textual representation of this summary component. */ + string toString() { result = this.getMadRepresentation() } + } + + /** Provides predicates for constructing summary components. */ + module SummaryComponent { + /** Gets a summary component for content `c`. */ + SummaryComponent content(ContentSet c) { result = TContentSummaryComponent(c) } + + /** Gets a summary component where data is not allowed to be stored in `c`. */ + SummaryComponent withoutContent(ContentSet c) { result = TWithoutContentSummaryComponent(c) } + + /** Gets a summary component where data must be stored in `c`. */ + SummaryComponent withContent(ContentSet c) { result = TWithContentSummaryComponent(c) } + + /** Gets a summary component for a parameter at position `pos`. */ + SummaryComponent parameter(ArgumentPosition pos) { result = TParameterSummaryComponent(pos) } + + /** Gets a summary component for an argument at position `pos`. */ + SummaryComponent argument(ParameterPosition pos) { result = TArgumentSummaryComponent(pos) } + + /** Gets a summary component for a return of kind `rk`. */ + SummaryComponent return(ReturnKind rk) { result = TReturnSummaryComponent(rk) } + + /** Gets a summary component for synthetic global `sg`. */ + SummaryComponent syntheticGlobal(SyntheticGlobal sg) { + result = TSyntheticGlobalSummaryComponent(sg) + } + } + + private predicate summarySpec(string spec) { + exists(SummarizedCallable c | + c.propagatesFlow(spec, _, _) + or + c.propagatesFlow(_, spec, _) + ) + } + + import AccessPathSyntax::AccessPath + + /** Holds if specification component `token` parses as parameter `pos`. */ + predicate parseParam(AccessPathToken token, ArgumentPosition pos) { + token.getName() = "Parameter" and + token.getAnArgument() = encodeArgumentPosition(pos) + or + pos = decodeUnknownArgumentPosition(token) + } + + /** Holds if specification component `token` parses as argument `pos`. */ + predicate parseArg(AccessPathToken token, ParameterPosition pos) { + token.getName() = "Argument" and + token.getAnArgument() = encodeParameterPosition(pos) + or + pos = decodeUnknownParameterPosition(token) + } + + /** Holds if specification component `token` parses as synthetic global `sg`. */ + predicate parseSynthGlobal(AccessPathToken token, string sg) { + token.getName() = "SyntheticGlobal" and + sg = token.getAnArgument() + } + + private class SyntheticGlobalFromAccessPath extends SyntheticGlobal { + SyntheticGlobalFromAccessPath() { parseSynthGlobal(_, this) } + } + + private TParameterSummaryComponent callbackSelfParam() { + result = TParameterSummaryComponent(callbackSelfParameterPosition()) + } + + newtype TSummaryComponentStack = + TSingletonSummaryComponentStack(SummaryComponent c) or + TConsSummaryComponentStack(SummaryComponent head, SummaryComponentStack tail) { + any(RequiredSummaryComponentStack x).required(head, tail) + or + any(RequiredSummaryComponentStack x).required(TParameterSummaryComponent(_), tail) and + head = callbackSelfParam() + or + derivedFluentFlowPush(_, _, _, head, tail, _) + } + + /** + * A (non-empty) stack of summary components. + * + * A stack is used to represent where data is read from (input) or where it + * is written to (output). For example, an input stack `[Field f, Argument 0]` + * means that data is read from field `f` from the `0`th argument, while an + * output stack `[Field g, Return]` means that data is written to the field + * `g` of the returned object. + */ + class SummaryComponentStack extends TSummaryComponentStack { + /** Gets the head of this stack. */ + SummaryComponent head() { + this = TSingletonSummaryComponentStack(result) or + this = TConsSummaryComponentStack(result, _) + } + + /** Gets the tail of this stack, if any. */ + SummaryComponentStack tail() { this = TConsSummaryComponentStack(_, result) } + + /** Gets the length of this stack. */ + int length() { + this = TSingletonSummaryComponentStack(_) and result = 1 + or + result = 1 + this.tail().length() + } + + /** Gets the stack obtained by dropping the first `i` elements, if any. */ + SummaryComponentStack drop(int i) { + i = 0 and result = this + or + result = this.tail().drop(i - 1) + } + + /** Holds if this stack contains summary component `c`. */ + predicate contains(SummaryComponent c) { c = this.drop(_).head() } + + /** Gets the bottom element of this stack. */ + SummaryComponent bottom() { + this = TSingletonSummaryComponentStack(result) or result = this.tail().bottom() + } + + /** Gets a textual representation of this stack used for MaD models. */ + string getMadRepresentation() { + exists(SummaryComponent head, SummaryComponentStack tail | + head = this.head() and + tail = this.tail() and + result = tail.getMadRepresentation() + "." + head.getMadRepresentation() + ) + or + exists(SummaryComponent c | + this = TSingletonSummaryComponentStack(c) and + result = c.getMadRepresentation() + ) + } + + /** Gets a textual representation of this stack. */ + string toString() { result = this.getMadRepresentation() } + } + + /** Provides predicates for constructing stacks of summary components. */ + module SummaryComponentStack { + /** Gets a singleton stack containing `c`. */ + SummaryComponentStack singleton(SummaryComponent c) { + result = TSingletonSummaryComponentStack(c) + } + + /** + * Gets the stack obtained by pushing `head` onto `tail`. + * + * Make sure to override `RequiredSummaryComponentStack::required()` in order + * to ensure that the constructed stack exists. + */ + SummaryComponentStack push(SummaryComponent head, SummaryComponentStack tail) { + result = TConsSummaryComponentStack(head, tail) + } + + /** Gets a singleton stack for an argument at position `pos`. */ + SummaryComponentStack argument(ParameterPosition pos) { + result = singleton(SummaryComponent::argument(pos)) + } + + /** Gets a singleton stack representing a return of kind `rk`. */ + SummaryComponentStack return(ReturnKind rk) { + result = singleton(SummaryComponent::return(rk)) + } + } + + /** + * A class that exists for QL technical reasons only (the IPA type used + * to represent component stacks needs to be bounded). + */ + class RequiredSummaryComponentStack extends Unit { + /** + * Holds if the stack obtained by pushing `head` onto `tail` is required. + */ + abstract predicate required(SummaryComponent head, SummaryComponentStack tail); + } + + /** + * A callable with a flow summary. + * + * This interface is not meant to be used directly, instead use the public + * `SummarizedCallable` interface. However, _if_ you need to use this, make + * sure that that all classes `C` that extend `SummarizedCallableImpl` also + * extend `SummarizedCallable`, using the following adapter pattern: + * + * ```ql + * private class CAdapter extends SummarizedCallable instanceof C { + * override predicate propagatesFlow(string input, string output, boolean preservesValue) { + * none() + * } + * + * override predicate hasProvenance(Provenance provenance) { + * C.super.hasProvenance(provenance) + * } + * } + * ``` + */ + abstract class SummarizedCallableImpl extends SummarizedCallableBaseFinal { + bindingset[this] + SummarizedCallableImpl() { any() } + + /** + * Holds if data may flow from `input` to `output` through this callable. + * + * `preservesValue` indicates whether this is a value-preserving step + * or a taint-step. + * + * Input specifications are restricted to stacks that end with + * `SummaryComponent::argument(_)`, preceded by zero or more + * `SummaryComponent::return(_)` or `SummaryComponent::content(_)` components. + * + * Output specifications are restricted to stacks that end with + * `SummaryComponent::return(_)` or `SummaryComponent::argument(_)`. + * + * Output stacks ending with `SummaryComponent::return(_)` can be preceded by zero + * or more `SummaryComponent::content(_)` components. + * + * Output stacks ending with `SummaryComponent::argument(_)` can be preceded by an + * optional `SummaryComponent::parameter(_)` component, which in turn can be preceded + * by zero or more `SummaryComponent::content(_)` components. + */ + pragma[nomagic] + abstract predicate propagatesFlow( + SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue + ); + + /** + * Holds if there exists a summary that applies to this callable + * that has provenance `provenance`. + */ + abstract predicate hasProvenance(Provenance provenance); + } + + pragma[nomagic] + private predicate summary( + SummarizedCallableImpl c, SummaryComponentStack input, SummaryComponentStack output, + boolean preservesValue + ) { + c.propagatesFlow(input, output, preservesValue) + or + // observe side effects of callbacks on input arguments + c.propagatesFlow(output, input, preservesValue) and + preservesValue = true and + isCallbackParameter(input) and + isContentOfArgument(output, _) + or + // flow from the receiver of a callback into the instance-parameter + exists(SummaryComponentStack s, SummaryComponentStack callbackRef | + c.propagatesFlow(s, _, _) or c.propagatesFlow(_, s, _) + | + callbackRef = s.drop(_) and + (isCallbackParameter(callbackRef) or callbackRef.head() = TReturnSummaryComponent(_)) and + input = callbackRef.tail() and + output = TConsSummaryComponentStack(callbackSelfParam(), input) and + preservesValue = true + ) + or + exists(SummaryComponentStack arg, SummaryComponentStack return | + derivedFluentFlow(c, input, arg, return, preservesValue) + | + arg.length() = 1 and + output = return + or + exists(SummaryComponent head, SummaryComponentStack tail | + derivedFluentFlowPush(c, input, arg, head, tail, 0) and + output = SummaryComponentStack::push(head, tail) + ) + ) + or + // Chain together summaries where values get passed into callbacks along the way + exists(SummaryComponentStack mid, boolean preservesValue1, boolean preservesValue2 | + c.propagatesFlow(input, mid, preservesValue1) and + c.propagatesFlow(mid, output, preservesValue2) and + mid.drop(mid.length() - 2) = + SummaryComponentStack::push(TParameterSummaryComponent(_), + SummaryComponentStack::singleton(TArgumentSummaryComponent(_))) and + preservesValue = preservesValue1.booleanAnd(preservesValue2) + ) + } + + /** + * Holds if `c` has a flow summary from `input` to `arg`, where `arg` + * writes to (contents of) arguments at (some) position `pos`, and `c` has a + * value-preserving flow summary from the arguments at position `pos` + * to a return value (`return`). + * + * In such a case, we derive flow from `input` to (contents of) the return + * value. + * + * As an example, this simplifies modeling of fluent methods: + * for `StringBuilder.append(x)` with a specified value flow from qualifier to + * return value and taint flow from argument 0 to the qualifier, then this + * allows us to infer taint flow from argument 0 to the return value. + */ + pragma[nomagic] + private predicate derivedFluentFlow( + SummarizedCallable c, SummaryComponentStack input, SummaryComponentStack arg, + SummaryComponentStack return, boolean preservesValue + ) { + exists(ParameterPosition pos | + summary(c, input, arg, preservesValue) and + isContentOfArgument(arg, pos) and + summary(c, SummaryComponentStack::argument(pos), return, true) and + return.bottom() = TReturnSummaryComponent(_) + ) + } + + pragma[nomagic] + private predicate derivedFluentFlowPush( + SummarizedCallable c, SummaryComponentStack input, SummaryComponentStack arg, + SummaryComponent head, SummaryComponentStack tail, int i + ) { + derivedFluentFlow(c, input, arg, tail, _) and + head = arg.drop(i).head() and + i = arg.length() - 2 + or + exists(SummaryComponent head0, SummaryComponentStack tail0 | + derivedFluentFlowPush(c, input, arg, head0, tail0, i + 1) and + head = arg.drop(i).head() and + tail = SummaryComponentStack::push(head0, tail0) + ) + } + + private predicate isCallbackParameter(SummaryComponentStack s) { + s.head() = TParameterSummaryComponent(_) and exists(s.tail()) + } + + private predicate isContentOfArgument(SummaryComponentStack s, ParameterPosition pos) { + s.head() = TContentSummaryComponent(_) and isContentOfArgument(s.tail(), pos) + or + s = SummaryComponentStack::argument(pos) + } + + private predicate outputState(SummarizedCallable c, SummaryComponentStack s) { + summary(c, _, s, _) + or + exists(SummaryComponentStack out | + outputState(c, out) and + out.head() = TContentSummaryComponent(_) and + s = out.tail() + ) + or + // Add the argument node corresponding to the requested post-update node + inputState(c, s) and isCallbackParameter(s) + } + + private predicate inputState(SummarizedCallable c, SummaryComponentStack s) { + summary(c, s, _, _) + or + exists(SummaryComponentStack inp | inputState(c, inp) and s = inp.tail()) + or + exists(SummaryComponentStack out | + outputState(c, out) and + out.head() = TParameterSummaryComponent(_) and + s = out.tail() + ) + or + // Add the post-update node corresponding to the requested argument node + outputState(c, s) and isCallbackParameter(s) + or + // Add the parameter node for parameter side-effects + outputState(c, s) and s = SummaryComponentStack::argument(_) + } + + private newtype TSummaryNodeState = + TSummaryNodeInputState(SummaryComponentStack s) { inputState(_, s) } or + TSummaryNodeOutputState(SummaryComponentStack s) { outputState(_, s) } + + /** + * A state used to break up (complex) flow summaries into atomic flow steps. + * For a flow summary + * + * ```ql + * propagatesFlow( + * SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue + * ) + * ``` + * + * the following states are used: + * + * - `TSummaryNodeInputState(SummaryComponentStack s)`: + * this state represents that the components in `s` _have been read_ from the + * input. + * - `TSummaryNodeOutputState(SummaryComponentStack s)`: + * this state represents that the components in `s` _remain to be written_ to + * the output. + */ + private class SummaryNodeState extends TSummaryNodeState { + /** Holds if this state is a valid input state for `c`. */ + pragma[nomagic] + predicate isInputState(SummarizedCallable c, SummaryComponentStack s) { + this = TSummaryNodeInputState(s) and + inputState(c, s) + } + + /** Holds if this state is a valid output state for `c`. */ + pragma[nomagic] + predicate isOutputState(SummarizedCallable c, SummaryComponentStack s) { + this = TSummaryNodeOutputState(s) and + outputState(c, s) + } + + /** Gets a textual representation of this state. */ + string toString() { + exists(SummaryComponentStack s | + this = TSummaryNodeInputState(s) and + result = "read: " + s + ) + or + exists(SummaryComponentStack s | + this = TSummaryNodeOutputState(s) and + result = "to write: " + s + ) + } + } + + private newtype TSummaryNode = + TSummaryInternalNode(SummarizedCallable c, SummaryNodeState state) { + summaryNodeRange(c, state) + } or + TSummaryParameterNode(SummarizedCallable c, ParameterPosition pos) { + summaryParameterNodeRange(c, pos) + } + + abstract class SummaryNode extends TSummaryNode { + abstract string toString(); + + abstract SummarizedCallable getSummarizedCallable(); + } + + private class SummaryInternalNode extends SummaryNode, TSummaryInternalNode { + private SummarizedCallable c; + private SummaryNodeState state; + + SummaryInternalNode() { this = TSummaryInternalNode(c, state) } + + override string toString() { result = "[summary] " + state + " in " + c } + + override SummarizedCallable getSummarizedCallable() { result = c } + } + + private class SummaryParamNode extends SummaryNode, TSummaryParameterNode { + private SummarizedCallable c; + private ParameterPosition pos; + + SummaryParamNode() { this = TSummaryParameterNode(c, pos) } + + override string toString() { result = "[summary param] " + pos + " in " + c } + + override SummarizedCallable getSummarizedCallable() { result = c } + } + + /** + * Holds if `state` represents having read from a parameter at position + * `pos` in `c`. In this case we are not synthesizing a data-flow node, + * but instead assume that a relevant parameter node already exists. + */ + private predicate parameterReadState( + SummarizedCallable c, SummaryNodeState state, ParameterPosition pos + ) { + state.isInputState(c, SummaryComponentStack::argument(pos)) + } + + /** + * Holds if a synthesized summary node is needed for the state `state` in summarized + * callable `c`. + */ + private predicate summaryNodeRange(SummarizedCallable c, SummaryNodeState state) { + state.isInputState(c, _) and + not parameterReadState(c, state, _) + or + state.isOutputState(c, _) + } + + pragma[noinline] + private SummaryNode summaryNodeInputState(SummarizedCallable c, SummaryComponentStack s) { + exists(SummaryNodeState state | state.isInputState(c, s) | + result = TSummaryInternalNode(c, state) + or + exists(ParameterPosition pos | + parameterReadState(c, state, pos) and + result = TSummaryParameterNode(c, pos) + ) + ) + } + + pragma[noinline] + private SummaryNode summaryNodeOutputState(SummarizedCallable c, SummaryComponentStack s) { + exists(SummaryNodeState state | + state.isOutputState(c, s) and + result = TSummaryInternalNode(c, state) + ) + } + + /** + * Holds if a write targets `post`, which is a post-update node for a + * parameter at position `pos` in `c`. + */ + private predicate isParameterPostUpdate( + SummaryNode post, SummarizedCallable c, ParameterPosition pos + ) { + post = summaryNodeOutputState(c, SummaryComponentStack::argument(pos)) + } + + /** Holds if a parameter node at position `pos` is required for `c`. */ + private predicate summaryParameterNodeRange(SummarizedCallable c, ParameterPosition pos) { + parameterReadState(c, _, pos) + or + // Same as `isParameterPostUpdate(_, c, pos)`, but can be used in a negative context + any(SummaryNodeState state).isOutputState(c, SummaryComponentStack::argument(pos)) + } + + private predicate callbackOutput( + SummarizedCallable c, SummaryComponentStack s, SummaryNode receiver, ReturnKind rk + ) { + any(SummaryNodeState state).isInputState(c, s) and + s.head() = TReturnSummaryComponent(rk) and + receiver = summaryNodeInputState(c, s.tail()) + } + + private predicate callbackInput( + SummarizedCallable c, SummaryComponentStack s, SummaryNode receiver, ArgumentPosition pos + ) { + any(SummaryNodeState state).isOutputState(c, s) and + s.head() = TParameterSummaryComponent(pos) and + receiver = summaryNodeInputState(c, s.tail()) + } + + /** Holds if a call targeting `receiver` should be synthesized inside `c`. */ + predicate summaryCallbackRange(SummarizedCallable c, SummaryNode receiver) { + callbackOutput(c, _, receiver, _) + or + callbackInput(c, _, receiver, _) + } + + /** Holds if summary node `p` is a parameter with position `pos`. */ + predicate summaryParameterNode(SummaryNode p, ParameterPosition pos) { + p = TSummaryParameterNode(_, pos) + } + + /** Holds if summary node `out` contains output of kind `rk` from a call targeting `receiver`. */ + predicate summaryOutNode(SummaryNode receiver, SummaryNode out, ReturnKind rk) { + exists(SummarizedCallable callable, SummaryComponentStack s | + callbackOutput(callable, s, receiver, rk) and + out = summaryNodeInputState(callable, s) + ) + } + + /** Holds if summary node `arg` is at position `pos` in a call targeting `receiver`. */ + predicate summaryArgumentNode(SummaryNode receiver, SummaryNode arg, ArgumentPosition pos) { + exists(SummarizedCallable callable, SummaryComponentStack s | + callbackInput(callable, s, receiver, pos) and + arg = summaryNodeOutputState(callable, s) + ) + } + + /** Holds if summary node `post` is a post-update node with pre-update node `pre`. */ + predicate summaryPostUpdateNode(SummaryNode post, SummaryNode pre) { + exists(SummarizedCallable c, ParameterPosition pos | + isParameterPostUpdate(post, c, pos) and + pre = TSummaryParameterNode(c, pos) + ) + or + exists(SummarizedCallable callable, SummaryComponentStack s | + callbackInput(callable, s, _, _) and + pre = summaryNodeOutputState(callable, s) and + post = summaryNodeInputState(callable, s) + ) + } + + /** Holds if summary node `ret` is a return node of kind `rk`. */ + predicate summaryReturnNode(SummaryNode ret, ReturnKind rk) { + exists(SummaryComponentStack s | + ret = summaryNodeOutputState(_, s) and + s = TSingletonSummaryComponentStack(TReturnSummaryComponent(rk)) + ) + } + + /** + * Holds if flow is allowed to pass from the parameter at position `pos` of `c`, + * to a return node, and back out to the parameter. + */ + predicate summaryAllowParameterReturnInSelf(SummarizedCallable c, ParameterPosition ppos) { + exists(SummaryComponentStack inputContents, SummaryComponentStack outputContents | + summary(c, inputContents, outputContents, _) and + inputContents.bottom() = pragma[only_bind_into](TArgumentSummaryComponent(ppos)) and + outputContents.bottom() = pragma[only_bind_into](TArgumentSummaryComponent(ppos)) + ) + } + + signature module TypesInputSig { + /** Gets the type of content `c`. */ + DataFlowType getContentType(ContentSet c); + + /** Gets the type of the parameter at the given position. */ + bindingset[c, pos] + DataFlowType getParameterType(SummarizedCallable c, ParameterPosition pos); + + /** Gets the return type of kind `rk` for callable `c`. */ + bindingset[c, rk] + DataFlowType getReturnType(SummarizedCallable c, ReturnKind rk); + + /** + * Gets the type of the `i`th parameter in a synthesized call that targets a + * callback of type `t`. + */ + bindingset[t, pos] + DataFlowType getCallbackParameterType(DataFlowType t, ArgumentPosition pos); + + /** + * Gets the return type of kind `rk` in a synthesized call that targets a + * callback of type `t`. + */ + bindingset[t, rk] + DataFlowType getCallbackReturnType(DataFlowType t, ReturnKind rk); + + DataFlowType getSyntheticGlobalType(SyntheticGlobal sg); + } + + /** + * Provides the predicate `summaryNodeType` for associating types with summary nodes. + * + * Only relevant for typed languages. + */ + module Types { + private import TypesInput + + /** + * Gets the type of synthesized summary node `n`. + * + * The type is computed based on the language-specific predicates + * `getContentType()`, `getReturnType()`, `getCallbackParameterType()`, and + * `getCallbackReturnType()`. + */ + DataFlowType summaryNodeType(SummaryNode n) { + exists(SummaryNode pre | + summaryPostUpdateNode(n, pre) and + result = summaryNodeType(pre) + ) + or + exists(SummarizedCallable c, SummaryComponentStack s, SummaryComponent head | + head = s.head() + | + n = summaryNodeInputState(c, s) and + ( + exists(ContentSet cont | result = getContentType(cont) | + head = TContentSummaryComponent(cont) or + head = TWithContentSummaryComponent(cont) + ) + or + head = TWithoutContentSummaryComponent(_) and + result = summaryNodeType(summaryNodeInputState(c, s.tail())) + or + exists(ReturnKind rk | + head = TReturnSummaryComponent(rk) and + result = + getCallbackReturnType(summaryNodeType(summaryNodeInputState(pragma[only_bind_out](c), + s.tail())), rk) + ) + or + exists(SyntheticGlobal sg | + head = TSyntheticGlobalSummaryComponent(sg) and + result = getSyntheticGlobalType(sg) + ) + or + exists(ParameterPosition pos | + head = TArgumentSummaryComponent(pos) and + result = getParameterType(c, pos) + ) + ) + or + n = summaryNodeOutputState(c, s) and + ( + exists(ContentSet cont | + head = TContentSummaryComponent(cont) and result = getContentType(cont) + ) + or + s.length() = 1 and + exists(ReturnKind rk | + head = TReturnSummaryComponent(rk) and + result = getReturnType(c, rk) + ) + or + exists(ArgumentPosition pos | head = TParameterSummaryComponent(pos) | + result = + getCallbackParameterType(summaryNodeType(summaryNodeInputState(pragma[only_bind_out](c), + s.tail())), pos) + ) + or + exists(SyntheticGlobal sg | + head = TSyntheticGlobalSummaryComponent(sg) and + result = getSyntheticGlobalType(sg) + ) + ) + ) + } + } + + signature module StepsInputSig { + /** Gets a call that targets summarized callable `sc`. */ + DataFlowCall getACall(SummarizedCallable sc); + } + + /** Provides a compilation of flow summaries to atomic data-flow steps. */ + module Steps { + /** + * Holds if there is a local step from `pred` to `succ`, which is synthesized + * from a flow summary. + */ + predicate summaryLocalStep(SummaryNode pred, SummaryNode succ, boolean preservesValue) { + exists( + SummarizedCallable c, SummaryComponentStack inputContents, + SummaryComponentStack outputContents + | + summary(c, inputContents, outputContents, preservesValue) and + pred = summaryNodeInputState(c, inputContents) and + succ = summaryNodeOutputState(c, outputContents) + | + preservesValue = true + or + preservesValue = false and not summary(c, inputContents, outputContents, true) + ) + or + exists(SummarizedCallable c, SummaryComponentStack s | + pred = summaryNodeInputState(c, s.tail()) and + succ = summaryNodeInputState(c, s) and + s.head() = [SummaryComponent::withContent(_), SummaryComponent::withoutContent(_)] and + preservesValue = true + ) + } + + /** + * Holds if there is a read step of content `c` from `pred` to `succ`, which + * is synthesized from a flow summary. + */ + predicate summaryReadStep(SummaryNode pred, ContentSet c, SummaryNode succ) { + exists(SummarizedCallable sc, SummaryComponentStack s | + pred = summaryNodeInputState(sc, s.tail()) and + succ = summaryNodeInputState(sc, s) and + SummaryComponent::content(c) = s.head() + ) + } + + /** + * Holds if there is a store step of content `c` from `pred` to `succ`, which + * is synthesized from a flow summary. + */ + predicate summaryStoreStep(SummaryNode pred, ContentSet c, SummaryNode succ) { + exists(SummarizedCallable sc, SummaryComponentStack s | + pred = summaryNodeOutputState(sc, s) and + succ = summaryNodeOutputState(sc, s.tail()) and + SummaryComponent::content(c) = s.head() + ) + } + + /** + * Holds if there is a jump step from `pred` to `succ`, which is synthesized + * from a flow summary. + */ + predicate summaryJumpStep(SummaryNode pred, SummaryNode succ) { + exists(SummaryComponentStack s | + s = SummaryComponentStack::singleton(SummaryComponent::syntheticGlobal(_)) and + pred = summaryNodeOutputState(_, s) and + succ = summaryNodeInputState(_, s) + ) + } + + /** + * Holds if values stored inside content `c` are cleared at `n`. `n` is a + * synthesized summary node, so in order for values to be cleared at calls + * to the relevant method, it is important that flow does not pass over + * the argument, either via use-use flow or def-use flow. + * + * Example: + * + * ``` + * a.b = taint; + * a.clearB(); // assume we have a flow summary for `clearB` that clears `b` on the qualifier + * sink(a.b); + * ``` + * + * In the above, flow should not pass from `a` on the first line (or the second + * line) to `a` on the third line. Instead, there will be synthesized flow from + * `a` on line 2 to the post-update node for `a` on that line (via an intermediate + * node where field `b` is cleared). + */ + predicate summaryClearsContent(SummaryNode n, ContentSet c) { + exists(SummarizedCallable sc, SummaryNodeState state, SummaryComponentStack stack | + n = TSummaryInternalNode(sc, state) and + state.isInputState(sc, stack) and + stack.head() = SummaryComponent::withoutContent(c) + ) + } + + /** + * Holds if the value that is being tracked is expected to be stored inside + * content `c` at `n`. + */ + predicate summaryExpectsContent(SummaryNode n, ContentSet c) { + exists(SummarizedCallable sc, SummaryNodeState state, SummaryComponentStack stack | + n = TSummaryInternalNode(sc, state) and + state.isInputState(sc, stack) and + stack.head() = SummaryComponent::withContent(c) + ) + } + + pragma[noinline] + private predicate viableParam( + DataFlowCall call, SummarizedCallable sc, ParameterPosition ppos, SummaryParamNode p + ) { + p = TSummaryParameterNode(sc, ppos) and + call = StepsInput::getACall(sc) + } + + pragma[nomagic] + private SummaryParamNode summaryArgParam(DataFlowCall call, ArgNode arg, SummarizedCallable sc) { + exists(ParameterPosition ppos | + argumentPositionMatch(call, arg, ppos) and + viableParam(call, sc, ppos, result) + ) + } + + /** + * Holds if `p` can reach `n` in a summarized callable, using only value-preserving + * local steps. `clearsOrExpects` records whether any node on the path from `p` to + * `n` either clears or expects contents. + */ + private predicate paramReachesLocal(SummaryParamNode p, SummaryNode n, boolean clearsOrExpects) { + viableParam(_, _, _, p) and + n = p and + clearsOrExpects = false + or + exists(SummaryNode mid, boolean clearsOrExpectsMid | + paramReachesLocal(p, mid, clearsOrExpectsMid) and + summaryLocalStep(mid, n, true) and + if + summaryClearsContent(n, _) or + summaryExpectsContent(n, _) + then clearsOrExpects = true + else clearsOrExpects = clearsOrExpectsMid + ) + } + + /** + * Holds if use-use flow starting from `arg` should be prohibited. + * + * This is the case when `arg` is the argument of a call that targets a + * flow summary where the corresponding parameter either clears contents + * or expects contents. + */ + pragma[nomagic] + predicate prohibitsUseUseFlow(ArgNode arg, SummarizedCallable sc) { + exists(SummaryParamNode p, ParameterPosition ppos, SummaryNode ret | + paramReachesLocal(p, ret, true) and + p = summaryArgParam(_, arg, sc) and + p = TSummaryParameterNode(_, pragma[only_bind_into](ppos)) and + isParameterPostUpdate(ret, _, pragma[only_bind_into](ppos)) + ) + } + + pragma[nomagic] + private predicate summaryReturnNodeExt(SummaryNode ret, ReturnKindExt rk) { + summaryReturnNode(ret, rk.(ValueReturnKind).getKind()) + or + exists(SummaryParamNode p, SummaryNode pre, ParameterPosition pos | + paramReachesLocal(p, pre, _) and + summaryPostUpdateNode(ret, pre) and + p = TSummaryParameterNode(_, pos) and + rk.(ParamUpdateReturnKind).getPosition() = pos + ) + } + + bindingset[ret] + private SummaryParamNode summaryArgParamRetOut( + ArgNode arg, SummaryNode ret, OutNodeExt out, SummarizedCallable sc + ) { + exists(DataFlowCall call, ReturnKindExt rk | + result = summaryArgParam(call, arg, sc) and + summaryReturnNodeExt(ret, pragma[only_bind_into](rk)) and + out = pragma[only_bind_into](rk).getAnOutNode(call) + ) + } + + /** + * Holds if `arg` flows to `out` using a simple value-preserving flow + * summary, that is, a flow summary without reads and stores. + * + * NOTE: This step should not be used in global data-flow/taint-tracking, but may + * be useful to include in the exposed local data-flow/taint-tracking relations. + */ + predicate summaryThroughStepValue(ArgNode arg, Node out, SummarizedCallable sc) { + exists(ReturnKind rk, SummaryNode ret, DataFlowCall call | + summaryLocalStep(summaryArgParam(call, arg, sc), ret, true) and + summaryReturnNode(ret, pragma[only_bind_into](rk)) and + out = getAnOutNode(call, pragma[only_bind_into](rk)) + ) + } + + /** + * Holds if `arg` flows to `out` using a simple flow summary involving taint + * step, that is, a flow summary without reads and stores. + * + * NOTE: This step should not be used in global data-flow/taint-tracking, but may + * be useful to include in the exposed local data-flow/taint-tracking relations. + */ + predicate summaryThroughStepTaint(ArgNode arg, Node out, SummarizedCallable sc) { + exists(SummaryNode ret | + summaryLocalStep(summaryArgParamRetOut(arg, ret, out, sc), ret, false) + ) + } + + /** + * Holds if there is a read(+taint) of `c` from `arg` to `out` using a + * flow summary. + * + * NOTE: This step should not be used in global data-flow/taint-tracking, but may + * be useful to include in the exposed local data-flow/taint-tracking relations. + */ + predicate summaryGetterStep(ArgNode arg, ContentSet c, Node out, SummarizedCallable sc) { + exists(SummaryNode mid, SummaryNode ret | + summaryReadStep(summaryArgParamRetOut(arg, ret, out, sc), c, mid) and + summaryLocalStep(mid, ret, _) + ) + } + + /** + * Holds if there is a (taint+)store of `arg` into content `c` of `out` using a + * flow summary. + * + * NOTE: This step should not be used in global data-flow/taint-tracking, but may + * be useful to include in the exposed local data-flow/taint-tracking relations. + */ + predicate summarySetterStep(ArgNode arg, ContentSet c, Node out, SummarizedCallable sc) { + exists(SummaryNode mid, SummaryNode ret | + summaryLocalStep(summaryArgParamRetOut(arg, ret, out, sc), mid, _) and + summaryStoreStep(mid, c, ret) + ) + } + } + + /** + * Provides a means of translating externally (e.g., MaD) defined flow + * summaries into a `SummarizedCallable`s. + */ + module External { + private ContentSet decodeContent(AccessPathToken token) { + exists(string name | name = encodeContent(result, token.getAnArgument(name))) + or + token = encodeContent(result, "") + } + + private ReturnKind decodeReturn(AccessPathToken token) { + exists(string name | name = encodeReturn(result, token.getAnArgument(name))) or + token = encodeReturn(result, "") + } + + private ContentSet decodeWithoutContent(AccessPathToken token) { + exists(string name | name = encodeWithoutContent(result, token.getAnArgument(name))) + or + token = encodeWithoutContent(result, "") + } + + private ContentSet decodeWithContent(AccessPathToken token) { + exists(string name | name = encodeWithContent(result, token.getAnArgument(name))) or + token = encodeWithContent(result, "") + } + + private SummaryComponent interpretComponent(AccessPathToken token) { + exists(ContentSet c | + c = decodeContent(token) + or + not exists(decodeContent(token)) and + c = decodeUnknownContent(token) + | + result = SummaryComponent::content(c) + ) + or + exists(ParameterPosition pos | + parseArg(token, pos) and + result = SummaryComponent::argument(pos) + ) + or + exists(ArgumentPosition pos | + parseParam(token, pos) and + result = SummaryComponent::parameter(pos) + ) + or + token = "ReturnValue" and result = SummaryComponent::return(getStandardReturnValueKind()) + or + exists(ReturnKind rk | + rk = decodeReturn(token) + or + not exists(decodeReturn(token)) and + rk = decodeUnknownReturn(token) + | + result = SummaryComponent::return(rk) + ) + or + exists(string sg | + parseSynthGlobal(token, sg) and result = SummaryComponent::syntheticGlobal(sg) + ) + or + exists(ContentSet c | + c = decodeWithoutContent(token) + or + not exists(decodeWithoutContent(token)) and + c = decodeUnknownWithoutContent(token) + | + result = SummaryComponent::withoutContent(c) + ) + or + exists(ContentSet c | + c = decodeWithContent(token) + or + not exists(decodeWithContent(token)) and + c = decodeUnknownWithContent(token) + | + result = SummaryComponent::withContent(c) + ) + } + + /** + * Holds if `spec` specifies summary component stack `stack`. + */ + predicate interpretSpec(AccessPath spec, SummaryComponentStack stack) { + interpretSpec(spec, spec.getNumToken(), stack) + } + + /** Holds if the first `n` tokens of `spec` resolves to `stack`. */ + private predicate interpretSpec(AccessPath spec, int n, SummaryComponentStack stack) { + n = 1 and + stack = SummaryComponentStack::singleton(interpretComponent(spec.getToken(0))) + or + exists(SummaryComponent head, SummaryComponentStack tail | + interpretSpec(spec, n, head, tail) and + stack = SummaryComponentStack::push(head, tail) + ) + } + + /** Holds if the first `n` tokens of `spec` resolves to `head` followed by `tail` */ + private predicate interpretSpec( + AccessPath spec, int n, SummaryComponent head, SummaryComponentStack tail + ) { + interpretSpec(spec, n - 1, tail) and + head = interpretComponent(spec.getToken(n - 1)) + } + + private class MkStack extends RequiredSummaryComponentStack { + override predicate required(SummaryComponent head, SummaryComponentStack tail) { + interpretSpec(_, _, head, tail) + } + } + + // adapter class for converting `SummarizedCallable`s to `SummarizedCallableImpl`s + private class SummarizedCallableImplAdapter extends SummarizedCallableImpl instanceof SummarizedCallable + { + override predicate propagatesFlow( + SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue + ) { + exists(AccessPath inSpec, AccessPath outSpec | + SummarizedCallable.super.propagatesFlow(inSpec, outSpec, preservesValue) and + interpretSpec(inSpec, input) and + interpretSpec(outSpec, output) + ) + } + + override predicate hasProvenance(Provenance provenance) { + SummarizedCallable.super.hasProvenance(provenance) + } + } + + /** Holds if component `c` of specification `spec` cannot be parsed. */ + predicate invalidSpecComponent(AccessPath spec, string c) { + c = spec.getToken(_) and + not exists(interpretComponent(c)) + } + + /** Holds if `provenance` is not a valid provenance value. */ + bindingset[provenance] + predicate invalidProvenance(string provenance) { not provenance instanceof Provenance } + + /** + * Holds if token `part` of specification `spec` has an invalid index. + * E.g., `Argument[-1]`. + */ + predicate invalidIndexComponent(AccessPath spec, AccessPathToken part) { + part = spec.getToken(_) and + part.getName() = ["Parameter", "Argument"] and + AccessPathSyntax::parseInt(part.getArgumentList()) < 0 + } + + signature module SourceSinkInterpretationInputSig { + class Element { + string toString(); + + Location getLocation(); + } + + /** + * Holds if an external source specification exists for `n` with output specification + * `output` and kind `kind`. + */ + predicate sourceElement(Element n, string output, string kind); + + /** + * Holds if an external sink specification exists for `n` with input specification + * `input` and kind `kind`. + */ + predicate sinkElement(Element n, string input, string kind); + + class SourceOrSinkElement extends Element; + + /** An entity used to interpret a source/sink specification. */ + class InterpretNode { + string toString(); + + Location getLocation(); + + /** Gets the element that this node corresponds to, if any. */ + SourceOrSinkElement asElement(); + + /** Gets the data-flow node that this node corresponds to, if any. */ + DataFlowLang::Node asNode(); + + /** Gets the call that this node corresponds to, if any. */ + DataFlowLang::DataFlowCall asCall(); + + /** Gets the callable that this node corresponds to, if any. */ + DataFlowLang::DataFlowCallable asCallable(); + + /** Gets the target of this call, if any. */ + Element getCallTarget(); + } + + /** Provides additional sink specification logic. */ + bindingset[c] + predicate interpretOutput(string c, InterpretNode mid, InterpretNode node); + + /** Provides additional source specification logic. */ + bindingset[c] + predicate interpretInput(string c, InterpretNode mid, InterpretNode node); + + /** Holds if output specification component `c` needs a reference. */ + bindingset[c] + default predicate outputNeedsReference(string c) { none() } + + /** Holds if input specification component `c` needs a reference. */ + bindingset[c] + default predicate inputNeedsReference(string c) { none() } + } + + /** + * Legacy interface for interpreting source/sink specifications in static languages. + * + * Should eventually be replaced with API graphs like in dynamic languages. + */ + module SourceSinkInterpretation< + LocationSig Location, + SourceSinkInterpretationInputSig SourceSinkInterpretationInput> + { + private import SourceSinkInterpretationInput + + private predicate sourceSinkSpec(string spec) { + sourceElement(_, spec, _) or + sinkElement(_, spec, _) + } + + private module AccessPath = AccessPathSyntax::AccessPath; + + private class SourceSinkAccessPathToken = AccessPath::AccessPathToken; + + private class SourceSinkAccessPath = AccessPath::AccessPath; + + private predicate parseParamSourceSink(SourceSinkAccessPathToken token, ArgumentPosition pos) { + token.getName() = "Parameter" and + token.getAnArgument() = encodeArgumentPosition(pos) + or + pos = decodeUnknownArgumentPosition(token) + } + + private predicate parseArgSourceSink(SourceSinkAccessPathToken token, ParameterPosition pos) { + token.getName() = "Argument" and + token.getAnArgument() = encodeParameterPosition(pos) + or + pos = decodeUnknownParameterPosition(token) + } + + private predicate outputNeedsReferenceExt(SourceSinkAccessPathToken c) { + c.getName() = ["Argument", "ReturnValue"] or + outputNeedsReference(c) + } + + private predicate sourceElementRef( + InterpretNode ref, SourceSinkAccessPath output, string kind + ) { + exists(SourceOrSinkElement e | + sourceElement(e, output, kind) and + if outputNeedsReferenceExt(output.getToken(0)) + then e = ref.getCallTarget() + else e = ref.asElement() + ) + } + + private predicate inputNeedsReferenceExt(SourceSinkAccessPathToken c) { + c.getName() = "Argument" or + inputNeedsReference(c) + } + + private predicate sinkElementRef(InterpretNode ref, SourceSinkAccessPath input, string kind) { + exists(SourceOrSinkElement e | + sinkElement(e, input, kind) and + if inputNeedsReferenceExt(input.getToken(0)) + then e = ref.getCallTarget() + else e = ref.asElement() + ) + } + + /** Holds if the first `n` tokens of `output` resolve to the given interpretation. */ + private predicate interpretOutput( + SourceSinkAccessPath output, int n, InterpretNode ref, InterpretNode node + ) { + sourceElementRef(ref, output, _) and + n = 0 and + ( + if output = "" + then + // Allow language-specific interpretation of the empty access path + SourceSinkInterpretationInput::interpretOutput("", ref, node) + else node = ref + ) + or + exists(InterpretNode mid, SourceSinkAccessPathToken c | + interpretOutput(output, n - 1, ref, mid) and + c = output.getToken(n - 1) + | + exists(ArgumentPosition apos | + node.asNode() + .(PostUpdateNode) + .getPreUpdateNode() + .(ArgNode) + .argumentOf(mid.asCall(), apos) + | + c = "Argument" + or + exists(ParameterPosition ppos | + parameterMatch(ppos, apos) and parseArgSourceSink(c, ppos) + ) + ) + or + exists(ParameterPosition ppos | + node.asNode().(ParamNode).isParameterOf(mid.asCallable(), ppos) + | + c = "Parameter" + or + exists(ArgumentPosition apos | + parameterMatch(ppos, apos) and parseParamSourceSink(c, apos) + ) + ) + or + c = "ReturnValue" and + node.asNode() = + getAnOutNodeExt(mid.asCall(), TValueReturn(getStandardReturnValueKind())) + or + SourceSinkInterpretationInput::interpretOutput(c, mid, node) + ) + } + + /** Holds if the first `n` tokens of `input` resolve to the given interpretation. */ + private predicate interpretInput( + SourceSinkAccessPath input, int n, InterpretNode ref, InterpretNode node + ) { + sinkElementRef(ref, input, _) and + n = 0 and + ( + if input = "" + then + // Allow language-specific interpretation of the empty access path + SourceSinkInterpretationInput::interpretInput("", ref, node) + else node = ref + ) + or + exists(InterpretNode mid, SourceSinkAccessPathToken c | + interpretInput(input, n - 1, ref, mid) and + c = input.getToken(n - 1) + | + exists(ArgumentPosition apos | node.asNode().(ArgNode).argumentOf(mid.asCall(), apos) | + c = "Argument" + or + exists(ParameterPosition ppos | + parameterMatch(ppos, apos) and parseArgSourceSink(c, ppos) + ) + ) + or + exists(ReturnNodeExt ret | + c = "ReturnValue" and + ret = node.asNode() and + ret.getKind().(ValueReturnKind).getKind() = getStandardReturnValueKind() and + mid.asCallable() = getNodeEnclosingCallable(ret) + ) + or + SourceSinkInterpretationInput::interpretInput(c, mid, node) + ) + } + + /** + * Holds if `node` is specified as a source with the given kind in a MaD flow + * model. + */ + predicate isSourceNode(InterpretNode node, string kind) { + exists(InterpretNode ref, SourceSinkAccessPath output | + sourceElementRef(ref, output, kind) and + interpretOutput(output, output.getNumToken(), ref, node) + ) + } + + /** + * Holds if `node` is specified as a sink with the given kind in a MaD flow + * model. + */ + predicate isSinkNode(InterpretNode node, string kind) { + exists(InterpretNode ref, SourceSinkAccessPath input | + sinkElementRef(ref, input, kind) and + interpretInput(input, input.getNumToken(), ref, node) + ) + } + } + } + + /** Provides a query predicate for outputting a set of relevant flow summaries. */ + module TestOutput { + final private class SummarizedCallableImplFinal = SummarizedCallableImpl; + + /** A flow summary to include in the `summary/1` query predicate. */ + abstract class RelevantSummarizedCallable extends SummarizedCallableImplFinal { + /** Gets the string representation of this callable used by `summary/1`. */ + abstract string getCallableCsv(); + + /** Holds if flow is propagated between `input` and `output`. */ + predicate relevantSummary( + SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue + ) { + super.propagatesFlow(input, output, preservesValue) + } + } + + /** A model to include in the `neutral/1` query predicate. */ + abstract class RelevantNeutralCallable instanceof NeutralCallable { + /** Gets the string representation of this callable used by `neutral/1`. */ + abstract string getCallableCsv(); + + /** + * Gets the kind of the neutral. + */ + string getKind() { result = super.getKind() } + + string toString() { result = super.toString() } + } + + /** Render the kind in the format used in flow summaries. */ + private string renderKind(boolean preservesValue) { + preservesValue = true and result = "value" + or + preservesValue = false and result = "taint" + } + + private string renderProvenance(SummarizedCallable c) { + exists(Provenance p | p.isManual() and c.hasProvenance(p) and result = p.toString()) + or + not c.applyManualModel() and + c.hasProvenance(result) + } + + private string renderProvenanceNeutral(NeutralCallable c) { + exists(Provenance p | p.isManual() and c.hasProvenance(p) and result = p.toString()) + or + not c.hasManualModel() and + c.hasProvenance(result) + } + + /** + * A query predicate for outputting flow summaries in semi-colon separated format in QL tests. + * The syntax is: "namespace;type;overrides;name;signature;ext;inputspec;outputspec;kind;provenance", + * ext is hardcoded to empty. + */ + query predicate summary(string csv) { + exists( + RelevantSummarizedCallable c, SummaryComponentStack input, SummaryComponentStack output, + boolean preservesValue + | + c.relevantSummary(input, output, preservesValue) and + csv = + c.getCallableCsv() // Callable information + + input.getMadRepresentation() + ";" // input + + output.getMadRepresentation() + ";" // output + + renderKind(preservesValue) + ";" // kind + + renderProvenance(c) // provenance + ) + } + + /** + * Holds if a neutral model `csv` exists (semi-colon separated format). Used for testing purposes. + * The syntax is: "namespace;type;name;signature;kind;provenance"", + */ + query predicate neutral(string csv) { + exists(RelevantNeutralCallable c | + csv = + c.getCallableCsv() // Callable information + + c.getKind() + ";" // kind + + renderProvenanceNeutral(c) // provenance + ) + } + } + + /** + * Provides query predicates for rendering the generated data flow graph for + * a summarized callable. + * + * Import this module into a `.ql` file of `@kind graph` to render the graph. + * The graph is restricted to callables from `RelevantSummarizedCallable`. + */ + module RenderSummarizedCallable { + private module PrivateSteps = Private::Steps; + + /** A summarized callable to include in the graph. */ + abstract class RelevantSummarizedCallable instanceof SummarizedCallable { + string toString() { result = super.toString() } + } + + private newtype TNodeOrCall = + MkNode(SummaryNode n) { + exists(RelevantSummarizedCallable c | + n = TSummaryInternalNode(c, _) + or + n = TSummaryParameterNode(c, _) + ) + } or + MkCall(SummaryNode receiver) { + receiver.getSummarizedCallable() instanceof RelevantSummarizedCallable and + ( + callbackInput(_, _, receiver, _) or + callbackOutput(_, _, receiver, _) + ) + } + + private class NodeOrCall extends TNodeOrCall { + SummaryNode asNode() { this = MkNode(result) } + + SummaryNode asCallReceiver() { this = MkCall(result) } + + string toString() { + result = this.asNode().toString() + or + result = this.asCallReceiver().toString() + } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + filepath = "" and + startline = 0 and + startcolumn = 0 and + endline = 0 and + endcolumn = 0 + } + } + + query predicate nodes(NodeOrCall n, string key, string val) { + key = "semmle.label" and val = n.toString() + } + + private predicate edgesComponent(NodeOrCall a, NodeOrCall b, string value) { + exists(boolean preservesValue | + PrivateSteps::summaryLocalStep(a.asNode(), b.asNode(), preservesValue) and + if preservesValue = true then value = "value" else value = "taint" + ) + or + exists(ContentSet c | + PrivateSteps::summaryReadStep(a.asNode(), c, b.asNode()) and + value = "read (" + c + ")" + or + PrivateSteps::summaryStoreStep(a.asNode(), c, b.asNode()) and + value = "store (" + c + ")" + or + PrivateSteps::summaryClearsContent(a.asNode(), c) and + b = a and + value = "clear (" + c + ")" + or + PrivateSteps::summaryExpectsContent(a.asNode(), c) and + b = a and + value = "expect (" + c + ")" + ) + or + summaryPostUpdateNode(b.asNode(), a.asNode()) and + value = "post-update" + or + b.asCallReceiver() = a.asNode() and + value = "receiver" + or + exists(ArgumentPosition pos | + summaryArgumentNode(b.asCallReceiver(), a.asNode(), pos) and + value = "argument (" + pos + ")" + ) + } + + query predicate edges(NodeOrCall a, NodeOrCall b, string key, string value) { + key = "semmle.label" and + value = strictconcat(string s | edgesComponent(a, b, s) | s, " / ") + } + } + } +} diff --git a/swift/ql/lib/codeql/swift/dataflow/ExternalFlow.qll b/swift/ql/lib/codeql/swift/dataflow/ExternalFlow.qll index 524e5f5720a..35515cb548c 100644 --- a/swift/ql/lib/codeql/swift/dataflow/ExternalFlow.qll +++ b/swift/ql/lib/codeql/swift/dataflow/ExternalFlow.qll @@ -66,13 +66,13 @@ */ import swift -private import internal.AccessPathSyntax private import internal.DataFlowDispatch private import internal.DataFlowPrivate private import internal.DataFlowPublic +private import internal.FlowSummaryImpl private import internal.FlowSummaryImpl::Public +private import internal.FlowSummaryImpl::Private private import internal.FlowSummaryImpl::Private::External -private import internal.FlowSummaryImplSpecific private import FlowSummary as FlowSummary private import codeql.mad.ModelValidation as SharedModelVal @@ -451,7 +451,7 @@ Element interpretElement( ) } -private predicate parseField(AccessPathToken c, Content::FieldContent f) { +deprecated private predicate parseField(AccessPathToken c, Content::FieldContent f) { exists(string fieldRegex, string name | c.getName() = "Field" and fieldRegex = "^([^.]+)$" and @@ -460,12 +460,12 @@ private predicate parseField(AccessPathToken c, Content::FieldContent f) { ) } -private predicate parseTuple(AccessPathToken c, Content::TupleContent t) { +deprecated private predicate parseTuple(AccessPathToken c, Content::TupleContent t) { c.getName() = "TupleElement" and t.getIndex() = c.getAnArgument().toInt() } -private predicate parseEnum(AccessPathToken c, Content::EnumContent e) { +deprecated private predicate parseEnum(AccessPathToken c, Content::EnumContent e) { c.getName() = "EnumElement" and c.getAnArgument() = e.getSignature() or @@ -474,7 +474,7 @@ private predicate parseEnum(AccessPathToken c, Content::EnumContent e) { } /** Holds if the specification component parses as a `Content`. */ -predicate parseContent(AccessPathToken component, Content content) { +deprecated predicate parseContent(AccessPathToken component, Content content) { parseField(component, content) or parseTuple(component, content) @@ -497,7 +497,9 @@ private module Cached { */ cached predicate sourceNode(Node node, string kind) { - exists(InterpretNode n | isSourceNode(n, kind) and n.asNode() = node) + exists(SourceSinkInterpretationInput::InterpretNode n | + isSourceNode(n, kind) and n.asNode() = node + ) } /** @@ -506,8 +508,73 @@ private module Cached { */ cached predicate sinkNode(Node node, string kind) { - exists(InterpretNode n | isSinkNode(n, kind) and n.asNode() = node) + exists(SourceSinkInterpretationInput::InterpretNode n | + isSinkNode(n, kind) and n.asNode() = node + ) } } import Cached + +private predicate interpretSummary( + Function f, string input, string output, string kind, string provenance +) { + exists( + string namespace, string type, boolean subtypes, string name, string signature, string ext + | + summaryModel(namespace, type, subtypes, name, signature, ext, input, output, kind, provenance) and + f = interpretElement(namespace, type, subtypes, name, signature, ext) + ) +} + +// adapter class for converting Mad summaries to `SummarizedCallable`s +private class SummarizedCallableAdapter extends SummarizedCallable { + SummarizedCallableAdapter() { interpretSummary(this, _, _, _, _) } + + private predicate relevantSummaryElementManual(string input, string output, string kind) { + exists(Provenance provenance | + interpretSummary(this, input, output, kind, provenance) and + provenance.isManual() + ) + } + + private predicate relevantSummaryElementGenerated(string input, string output, string kind) { + exists(Provenance provenance | + interpretSummary(this, input, output, kind, provenance) and + provenance.isGenerated() + ) + } + + override predicate propagatesFlow(string input, string output, boolean preservesValue) { + exists(string kind | + this.relevantSummaryElementManual(input, output, kind) + or + not this.relevantSummaryElementManual(_, _, _) and + this.relevantSummaryElementGenerated(input, output, kind) + | + if kind = "value" then preservesValue = true else preservesValue = false + ) + } + + override predicate hasProvenance(Provenance provenance) { + interpretSummary(this, _, _, _, provenance) + } +} + +// adapter class for converting Mad neutrals to `NeutralCallable`s +private class NeutralCallableAdapter extends NeutralCallable { + string kind; + string provenance_; + + NeutralCallableAdapter() { + // Neutral models have not been implemented for Swift. + none() and + exists(this) and + exists(kind) and + exists(provenance_) + } + + override string getKind() { result = kind } + + override predicate hasProvenance(Provenance provenance) { provenance = provenance_ } +} diff --git a/swift/ql/lib/codeql/swift/dataflow/FlowSummary.qll b/swift/ql/lib/codeql/swift/dataflow/FlowSummary.qll index c42cd81f9ea..fadee4aee6f 100644 --- a/swift/ql/lib/codeql/swift/dataflow/FlowSummary.qll +++ b/swift/ql/lib/codeql/swift/dataflow/FlowSummary.qll @@ -13,36 +13,14 @@ private module Summaries { private import codeql.swift.frameworks.Frameworks } -class SummaryComponent = Impl::Public::SummaryComponent; +deprecated class SummaryComponent = Impl::Private::SummaryComponent; -/** Provides predicates for constructing summary components. */ -module SummaryComponent { - private import Impl::Public::SummaryComponent as SummaryComponentInternal +deprecated module SummaryComponent = Impl::Private::SummaryComponent; - predicate content = SummaryComponentInternal::content/1; +deprecated class SummaryComponentStack = Impl::Private::SummaryComponentStack; - predicate parameter = SummaryComponentInternal::parameter/1; - - predicate argument = SummaryComponentInternal::argument/1; - - predicate return = SummaryComponentInternal::return/1; -} - -class SummaryComponentStack = Impl::Public::SummaryComponentStack; - -/** Provides predicates for constructing stacks of summary components. */ -module SummaryComponentStack { - private import Impl::Public::SummaryComponentStack as SummaryComponentStackInternal - - predicate singleton = SummaryComponentStackInternal::singleton/1; - - predicate push = SummaryComponentStackInternal::push/2; - - predicate argument = SummaryComponentStackInternal::argument/1; - - predicate return = SummaryComponentStackInternal::return/1; -} +deprecated module SummaryComponentStack = Impl::Private::SummaryComponentStack; class SummarizedCallable = Impl::Public::SummarizedCallable; -class RequiredSummaryComponentStack = Impl::Public::RequiredSummaryComponentStack; +deprecated class RequiredSummaryComponentStack = Impl::Private::RequiredSummaryComponentStack; diff --git a/swift/ql/lib/codeql/swift/dataflow/internal/AccessPathSyntax.qll b/swift/ql/lib/codeql/swift/dataflow/internal/AccessPathSyntax.qll deleted file mode 100644 index 0c3dc8427b2..00000000000 --- a/swift/ql/lib/codeql/swift/dataflow/internal/AccessPathSyntax.qll +++ /dev/null @@ -1,182 +0,0 @@ -/** - * Module for parsing access paths from MaD models, both the identifying access path used - * by dynamic languages, and the input/output specifications for summary steps. - * - * This file is used by the shared data flow library and by the JavaScript libraries - * (which does not use the shared data flow libraries). - */ - -/** - * Convenience-predicate for extracting two capture groups at once. - */ -bindingset[input, regexp] -private predicate regexpCaptureTwo(string input, string regexp, string capture1, string capture2) { - capture1 = input.regexpCapture(regexp, 1) and - capture2 = input.regexpCapture(regexp, 2) -} - -/** Companion module to the `AccessPath` class. */ -module AccessPath { - /** A string that should be parsed as an access path. */ - abstract class Range extends string { - bindingset[this] - Range() { any() } - } - - /** - * Parses an integer constant `n` or interval `n1..n2` (inclusive) and gets the value - * of the constant or any value contained in the interval. - */ - bindingset[arg] - int parseInt(string arg) { - result = arg.toInt() - or - // Match "n1..n2" - exists(string lo, string hi | - regexpCaptureTwo(arg, "(-?\\d+)\\.\\.(-?\\d+)", lo, hi) and - result = [lo.toInt() .. hi.toInt()] - ) - } - - /** - * Parses a lower-bounded interval `n..` and gets the lower bound. - */ - bindingset[arg] - int parseLowerBound(string arg) { result = arg.regexpCapture("(-?\\d+)\\.\\.", 1).toInt() } - - /** - * Parses an integer constant or interval (bounded or unbounded) that explicitly - * references the arity, such as `N-1` or `N-3..N-1`. - * - * Note that expressions of form `N-x` will never resolve to a negative index, - * even if `N` is zero (it will have no result in that case). - */ - bindingset[arg, arity] - private int parseIntWithExplicitArity(string arg, int arity) { - result >= 0 and // do not allow N-1 to resolve to a negative index - exists(string lo | - // N-x - lo = arg.regexpCapture("N-(\\d+)", 1) and - result = arity - lo.toInt() - or - // N-x.. - lo = arg.regexpCapture("N-(\\d+)\\.\\.", 1) and - result = [arity - lo.toInt(), arity - 1] - ) - or - exists(string lo, string hi | - // x..N-y - regexpCaptureTwo(arg, "(-?\\d+)\\.\\.N-(\\d+)", lo, hi) and - result = [lo.toInt() .. arity - hi.toInt()] - or - // N-x..N-y - regexpCaptureTwo(arg, "N-(\\d+)\\.\\.N-(\\d+)", lo, hi) and - result = [arity - lo.toInt() .. arity - hi.toInt()] and - result >= 0 - or - // N-x..y - regexpCaptureTwo(arg, "N-(\\d+)\\.\\.(\\d+)", lo, hi) and - result = [arity - lo.toInt() .. hi.toInt()] and - result >= 0 - ) - } - - /** - * Parses an integer constant or interval (bounded or unbounded) and gets any - * of the integers contained within (of which there may be infinitely many). - * - * Has no result for arguments involving an explicit arity, such as `N-1`. - */ - bindingset[arg, result] - int parseIntUnbounded(string arg) { - result = parseInt(arg) - or - result >= parseLowerBound(arg) - } - - /** - * Parses an integer constant or interval (bounded or unbounded) that - * may reference the arity of a call, such as `N-1` or `N-3..N-1`. - * - * Note that expressions of form `N-x` will never resolve to a negative index, - * even if `N` is zero (it will have no result in that case). - */ - bindingset[arg, arity] - int parseIntWithArity(string arg, int arity) { - result = parseInt(arg) - or - result in [parseLowerBound(arg) .. arity - 1] - or - result = parseIntWithExplicitArity(arg, arity) - } -} - -/** Gets the `n`th token on the access path as a string. */ -private string getRawToken(AccessPath path, int n) { - // Avoid splitting by '.' since tokens may contain dots, e.g. `Field[foo.Bar.x]`. - // Instead use regexpFind to match valid tokens, and supplement with a final length - // check (in `AccessPath.hasSyntaxError`) to ensure all characters were included in a token. - result = path.regexpFind("\\w+(?:\\[[^\\]]*\\])?(?=\\.|$)", n, _) -} - -/** - * A string that occurs as an access path (either identifying or input/output spec) - * which might be relevant for this database. - */ -class AccessPath extends string instanceof AccessPath::Range { - /** Holds if this string is not a syntactically valid access path. */ - predicate hasSyntaxError() { - // If the lengths match, all characters must haven been included in a token - // or seen by the `.` lookahead pattern. - this != "" and - not this.length() = sum(int n | | getRawToken(this, n).length() + 1) - 1 - } - - /** Gets the `n`th token on the access path (if there are no syntax errors). */ - AccessPathToken getToken(int n) { - result = getRawToken(this, n) and - not this.hasSyntaxError() - } - - /** Gets the number of tokens on the path (if there are no syntax errors). */ - int getNumToken() { - result = count(int n | exists(getRawToken(this, n))) and - not this.hasSyntaxError() - } -} - -/** - * An access part token such as `Argument[1]` or `ReturnValue`, appearing in one or more access paths. - */ -class AccessPathToken extends string { - AccessPathToken() { this = getRawToken(_, _) } - - private string getPart(int part) { - result = this.regexpCapture("([^\\[]+)(?:\\[([^\\]]*)\\])?", part) - } - - /** Gets the name of the token, such as `Member` from `Member[x]` */ - string getName() { result = this.getPart(1) } - - /** - * Gets the argument list, such as `1,2` from `Member[1,2]`, - * or has no result if there are no arguments. - */ - string getArgumentList() { result = this.getPart(2) } - - /** Gets the `n`th argument to this token, such as `x` or `y` from `Member[x,y]`. */ - string getArgument(int n) { result = this.getArgumentList().splitAt(",", n).trim() } - - /** Gets the `n`th argument to this `name` token, such as `x` or `y` from `Member[x,y]`. */ - pragma[nomagic] - string getArgument(string name, int n) { name = this.getName() and result = this.getArgument(n) } - - /** Gets an argument to this token, such as `x` or `y` from `Member[x,y]`. */ - string getAnArgument() { result = this.getArgument(_) } - - /** Gets an argument to this `name` token, such as `x` or `y` from `Member[x,y]`. */ - string getAnArgument(string name) { result = this.getArgument(name, _) } - - /** Gets the number of arguments to this token, such as 2 for `Member[x,y]` or zero for `ReturnValue`. */ - int getNumArgument() { result = count(int n | exists(this.getArgument(n))) } -} diff --git a/swift/ql/lib/codeql/swift/dataflow/internal/DataFlowDispatch.qll b/swift/ql/lib/codeql/swift/dataflow/internal/DataFlowDispatch.qll index c8ecc47e0fb..cde008f0a9e 100644 --- a/swift/ql/lib/codeql/swift/dataflow/internal/DataFlowDispatch.qll +++ b/swift/ql/lib/codeql/swift/dataflow/internal/DataFlowDispatch.qll @@ -5,7 +5,6 @@ private import codeql.swift.controlflow.ControlFlowGraph private import codeql.swift.controlflow.CfgNodes private import codeql.swift.controlflow.internal.Scope private import FlowSummaryImpl as FlowSummaryImpl -private import FlowSummaryImplSpecific as FlowSummaryImplSpecific private import codeql.swift.dataflow.FlowSummary as FlowSummary newtype TReturnKind = diff --git a/swift/ql/lib/codeql/swift/dataflow/internal/DataFlowPrivate.qll b/swift/ql/lib/codeql/swift/dataflow/internal/DataFlowPrivate.qll index 2fbcaf29374..d981ffee940 100644 --- a/swift/ql/lib/codeql/swift/dataflow/internal/DataFlowPrivate.qll +++ b/swift/ql/lib/codeql/swift/dataflow/internal/DataFlowPrivate.qll @@ -625,12 +625,16 @@ private module ArgumentNodes { } class SummaryArgumentNode extends FlowSummaryNode, ArgumentNode { + private SummaryCall call_; + private ArgumentPosition pos_; + SummaryArgumentNode() { - FlowSummaryImpl::Private::summaryArgumentNode(_, this.getSummaryNode(), _) + FlowSummaryImpl::Private::summaryArgumentNode(call_.getReceiver(), this.getSummaryNode(), pos_) } override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) { - FlowSummaryImpl::Private::summaryArgumentNode(call, this.getSummaryNode(), pos) + call = call_ and + pos = pos_ } } @@ -782,10 +786,16 @@ private module OutNodes { } class SummaryOutNode extends OutNode, FlowSummaryNode { - SummaryOutNode() { FlowSummaryImpl::Private::summaryOutNode(_, this.getSummaryNode(), _) } + private SummaryCall call; + private ReturnKind kind_; + + SummaryOutNode() { + FlowSummaryImpl::Private::summaryOutNode(call.getReceiver(), this.getSummaryNode(), kind_) + } override DataFlowCall getCall(ReturnKind kind) { - FlowSummaryImpl::Private::summaryOutNode(result, this.getSummaryNode(), kind) + result = call and + kind = kind_ } } @@ -1398,6 +1408,11 @@ predicate allowParameterReturnInSelf(ParameterNode p) { c = p.(ParameterNodeImpl).getEnclosingCallable().asSourceCallable() and CaptureFlow::heuristicAllowInstanceParameterReturnInSelf(c) ) + or + exists(DataFlowCallable c, ParameterPosition pos | + p.(ParameterNodeImpl).isParameterOf(c, pos) and + FlowSummaryImpl::Private::summaryAllowParameterReturnInSelf(c.asSummarizedCallable(), pos) + ) } /** An approximated `Content`. */ diff --git a/swift/ql/lib/codeql/swift/dataflow/internal/FlowSummaryImpl.qll b/swift/ql/lib/codeql/swift/dataflow/internal/FlowSummaryImpl.qll index 0aa17c521b4..1151a7aeec8 100644 --- a/swift/ql/lib/codeql/swift/dataflow/internal/FlowSummaryImpl.qll +++ b/swift/ql/lib/codeql/swift/dataflow/internal/FlowSummaryImpl.qll @@ -1,1491 +1,259 @@ /** * Provides classes and predicates for defining flow summaries. - * - * The definitions in this file are language-independent, and language-specific - * definitions are passed in via the `DataFlowImplSpecific` and - * `FlowSummaryImplSpecific` modules. */ -private import FlowSummaryImplSpecific +private import swift +private import codeql.dataflow.internal.FlowSummaryImpl +private import codeql.dataflow.internal.AccessPathSyntax as AccessPath +private import DataFlowImplSpecific as DataFlowImplSpecific private import DataFlowImplSpecific::Private private import DataFlowImplSpecific::Public private import DataFlowImplCommon -private import codeql.util.Unit +private import codeql.swift.dataflow.ExternalFlow -/** Provides classes and predicates for defining flow summaries. */ -module Public { - private import Private +module Input implements InputSig { + class SummarizedCallableBase = Function; - /** - * A component used in a flow summary. - * - * Either a parameter or an argument at a given position, a specific - * content type, or a return kind. - */ - class SummaryComponent extends TSummaryComponent { - /** Gets a textual representation of this component used for MaD models. */ - string getMadRepresentation() { - result = getMadRepresentationSpecific(this) - or - exists(ArgumentPosition pos | - this = TParameterSummaryComponent(pos) and - result = "Parameter[" + getArgumentPosition(pos) + "]" + ArgumentPosition callbackSelfParameterPosition() { result instanceof ThisArgumentPosition } + + ReturnKind getStandardReturnValueKind() { result instanceof NormalReturnKind } + + string encodeParameterPosition(ParameterPosition pos) { result = pos.toString() } + + string encodeArgumentPosition(ArgumentPosition pos) { result = pos.toString() } + + string encodeReturn(ReturnKind rk, string arg) { + rk != getStandardReturnValueKind() and + result = "ReturnValue" and + arg = rk.toString() + } + + string encodeContent(ContentSet cs, string arg) { + exists(Content::FieldContent c | + cs.isSingleton(c) and + result = "Field" and + arg = c.getField().getName() + ) + or + exists(Content::TupleContent c | + cs.isSingleton(c) and + result = "TupleElement" and + arg = c.getIndex().toString() + ) + or + exists(Content::EnumContent c, string sig | + cs.isSingleton(c) and + sig = c.getSignature() + | + if sig = "some:0" + then + result = "OptionalSome" and + arg = "" + else ( + result = "EnumElement" and + arg = sig ) + ) + or + exists(Content::CollectionContent c | + cs.isSingleton(c) and + result = "CollectionElement" and + arg = "" + ) + } + + string encodeWithoutContent(ContentSet c, string arg) { + result = "WithoutContent" + c and arg = "" + } + + string encodeWithContent(ContentSet c, string arg) { result = "WithContent" + c and arg = "" } + + bindingset[token] + ContentSet decodeUnknownContent(AccessPath::AccessPathTokenBase token) { + // map legacy "ArrayElement" specification components to `CollectionContent` + token.getName() = "ArrayElement" and + result.isSingleton(any(Content::CollectionContent c)) + or + token.getName() = "CollectionElement" and + result.isSingleton(any(Content::CollectionContent c)) + } + + bindingset[token] + ParameterPosition decodeUnknownParameterPosition(AccessPath::AccessPathTokenBase token) { + // needed to support `Argument[x..y]` ranges and `Argument[-1]` + token.getName() = "Argument" and + exists(int pos | pos = AccessPath::parseInt(token.getAnArgument()) | + result.(PositionalParameterPosition).getIndex() = pos or - exists(ParameterPosition pos | - this = TArgumentSummaryComponent(pos) and - result = "Argument[" + getParameterPosition(pos) + "]" - ) + pos = -1 and result instanceof ThisParameterPosition + ) + } + + bindingset[token] + ArgumentPosition decodeUnknownArgumentPosition(AccessPath::AccessPathTokenBase token) { + // needed to support `Parameter[x..y]` ranges and `Parameter[-1]` + token.getName() = "Parameter" and + exists(int pos | pos = AccessPath::parseInt(token.getAnArgument()) | + result.(PositionalArgumentPosition).getIndex() = pos or - exists(string synthetic | - this = TSyntheticGlobalSummaryComponent(synthetic) and - result = "SyntheticGlobal[" + synthetic + "]" - ) - or - this = TReturnSummaryComponent(getReturnValueKind()) and result = "ReturnValue" - } - - /** Gets a textual representation of this summary component. */ - string toString() { result = this.getMadRepresentation() } - } - - /** Provides predicates for constructing summary components. */ - module SummaryComponent { - /** Gets a summary component for content `c`. */ - SummaryComponent content(ContentSet c) { result = TContentSummaryComponent(c) } - - /** Gets a summary component where data is not allowed to be stored in `c`. */ - SummaryComponent withoutContent(ContentSet c) { result = TWithoutContentSummaryComponent(c) } - - /** Gets a summary component where data must be stored in `c`. */ - SummaryComponent withContent(ContentSet c) { result = TWithContentSummaryComponent(c) } - - /** Gets a summary component for a parameter at position `pos`. */ - SummaryComponent parameter(ArgumentPosition pos) { result = TParameterSummaryComponent(pos) } - - /** Gets a summary component for an argument at position `pos`. */ - SummaryComponent argument(ParameterPosition pos) { result = TArgumentSummaryComponent(pos) } - - /** Gets a summary component for a return of kind `rk`. */ - SummaryComponent return(ReturnKind rk) { result = TReturnSummaryComponent(rk) } - - /** Gets a summary component for synthetic global `sg`. */ - SummaryComponent syntheticGlobal(SyntheticGlobal sg) { - result = TSyntheticGlobalSummaryComponent(sg) - } - - /** - * A synthetic global. This represents some form of global state, which - * summaries can read and write individually. - */ - abstract class SyntheticGlobal extends string { - bindingset[this] - SyntheticGlobal() { any() } - } - } - - /** - * A (non-empty) stack of summary components. - * - * A stack is used to represent where data is read from (input) or where it - * is written to (output). For example, an input stack `[Field f, Argument 0]` - * means that data is read from field `f` from the `0`th argument, while an - * output stack `[Field g, Return]` means that data is written to the field - * `g` of the returned object. - */ - class SummaryComponentStack extends TSummaryComponentStack { - /** Gets the head of this stack. */ - SummaryComponent head() { - this = TSingletonSummaryComponentStack(result) or - this = TConsSummaryComponentStack(result, _) - } - - /** Gets the tail of this stack, if any. */ - SummaryComponentStack tail() { this = TConsSummaryComponentStack(_, result) } - - /** Gets the length of this stack. */ - int length() { - this = TSingletonSummaryComponentStack(_) and result = 1 - or - result = 1 + this.tail().length() - } - - /** Gets the stack obtained by dropping the first `i` elements, if any. */ - SummaryComponentStack drop(int i) { - i = 0 and result = this - or - result = this.tail().drop(i - 1) - } - - /** Holds if this stack contains summary component `c`. */ - predicate contains(SummaryComponent c) { c = this.drop(_).head() } - - /** Gets the bottom element of this stack. */ - SummaryComponent bottom() { - this = TSingletonSummaryComponentStack(result) or result = this.tail().bottom() - } - - /** Gets a textual representation of this stack used for MaD models. */ - string getMadRepresentation() { - exists(SummaryComponent head, SummaryComponentStack tail | - head = this.head() and - tail = this.tail() and - result = tail.getMadRepresentation() + "." + head.getMadRepresentation() - ) - or - exists(SummaryComponent c | - this = TSingletonSummaryComponentStack(c) and - result = c.getMadRepresentation() - ) - } - - /** Gets a textual representation of this stack. */ - string toString() { result = this.getMadRepresentation() } - } - - /** Provides predicates for constructing stacks of summary components. */ - module SummaryComponentStack { - /** Gets a singleton stack containing `c`. */ - SummaryComponentStack singleton(SummaryComponent c) { - result = TSingletonSummaryComponentStack(c) - } - - /** - * Gets the stack obtained by pushing `head` onto `tail`. - * - * Make sure to override `RequiredSummaryComponentStack::required()` in order - * to ensure that the constructed stack exists. - */ - SummaryComponentStack push(SummaryComponent head, SummaryComponentStack tail) { - result = TConsSummaryComponentStack(head, tail) - } - - /** Gets a singleton stack for an argument at position `pos`. */ - SummaryComponentStack argument(ParameterPosition pos) { - result = singleton(SummaryComponent::argument(pos)) - } - - /** Gets a singleton stack representing a return of kind `rk`. */ - SummaryComponentStack return(ReturnKind rk) { result = singleton(SummaryComponent::return(rk)) } - } - - /** - * A class that exists for QL technical reasons only (the IPA type used - * to represent component stacks needs to be bounded). - */ - class RequiredSummaryComponentStack extends Unit { - /** - * Holds if the stack obtained by pushing `head` onto `tail` is required. - */ - abstract predicate required(SummaryComponent head, SummaryComponentStack tail); - } - - /** - * Gets the valid model origin values. - */ - private string getValidModelOrigin() { - result = - [ - "ai", // AI (machine learning) - "df", // Dataflow (model generator) - "tb", // Type based (model generator) - "hq", // Heuristic query - ] - } - - /** - * A class used to represent provenance values for MaD models. - * - * The provenance value is a string of the form `origin-verification` - * (or just `manual`), where `origin` is a value indicating the - * origin of the model, and `verification` is a value indicating, how - * the model was verified. - * - * Examples could be: - * - `df-generated`: A model produced by the model generator, but not verified by a human. - * - `ai-manual`: A model produced by AI, but verified by a human. - */ - class Provenance extends string { - private string verification; - - Provenance() { - exists(string origin | origin = getValidModelOrigin() | - this = origin + "-" + verification and - verification = ["manual", "generated"] - ) - or - this = verification and verification = "manual" - } - - /** - * Holds if this is a valid generated provenance value. - */ - predicate isGenerated() { verification = "generated" } - - /** - * Holds if this is a valid manual provenance value. - */ - predicate isManual() { verification = "manual" } - } - - /** A callable with a flow summary. */ - abstract class SummarizedCallable extends SummarizedCallableBase { - bindingset[this] - SummarizedCallable() { any() } - - /** - * Holds if data may flow from `input` to `output` through this callable. - * - * `preservesValue` indicates whether this is a value-preserving step - * or a taint-step. - * - * Input specifications are restricted to stacks that end with - * `SummaryComponent::argument(_)`, preceded by zero or more - * `SummaryComponent::return(_)` or `SummaryComponent::content(_)` components. - * - * Output specifications are restricted to stacks that end with - * `SummaryComponent::return(_)` or `SummaryComponent::argument(_)`. - * - * Output stacks ending with `SummaryComponent::return(_)` can be preceded by zero - * or more `SummaryComponent::content(_)` components. - * - * Output stacks ending with `SummaryComponent::argument(_)` can be preceded by an - * optional `SummaryComponent::parameter(_)` component, which in turn can be preceded - * by zero or more `SummaryComponent::content(_)` components. - */ - pragma[nomagic] - predicate propagatesFlow( - SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue - ) { - none() - } - - /** - * Holds if there exists a generated summary that applies to this callable. - */ - final predicate hasGeneratedModel() { - exists(Provenance p | p.isGenerated() and this.hasProvenance(p)) - } - - /** - * Holds if all the summaries that apply to this callable are auto generated and not manually created. - * That is, only apply generated models, when there are no manual models. - */ - final predicate applyGeneratedModel() { - this.hasGeneratedModel() and - not this.hasManualModel() - } - - /** - * Holds if there exists a manual summary that applies to this callable. - */ - final predicate hasManualModel() { - exists(Provenance p | p.isManual() and this.hasProvenance(p)) - } - - /** - * Holds if there exists a manual summary that applies to this callable. - * Always apply manual models if they exist. - */ - final predicate applyManualModel() { this.hasManualModel() } - - /** - * Holds if there exists a summary that applies to this callable - * that has provenance `provenance`. - */ - predicate hasProvenance(Provenance provenance) { provenance = "manual" } - } - - /** - * A callable where there is no flow via the callable. - */ - class NeutralSummaryCallable extends NeutralCallable { - NeutralSummaryCallable() { this.getKind() = "summary" } - } - - /** - * A callable that has a neutral model. - */ - class NeutralCallable extends NeutralCallableBase { - private string kind; - private Provenance provenance; - - NeutralCallable() { neutralElement(this, kind, provenance) } - - /** - * Holds if the neutral is auto generated. - */ - final predicate hasGeneratedModel() { provenance.isGenerated() } - - /** - * Holds if there exists a manual neutral that applies to this callable. - */ - final predicate hasManualModel() { provenance.isManual() } - - /** - * Holds if the neutral has provenance `p`. - */ - predicate hasProvenance(Provenance p) { p = provenance } - - /** - * Gets the kind of the neutral. - */ - string getKind() { result = kind } + pos = -1 and + result instanceof ThisArgumentPosition + ) } } -/** - * Provides predicates for compiling flow summaries down to atomic local steps, - * read steps, and store steps. - */ -module Private { - private import Public - import AccessPathSyntax +private import Make as Impl - newtype TSummaryComponent = - TContentSummaryComponent(ContentSet c) or - TParameterSummaryComponent(ArgumentPosition pos) or - TArgumentSummaryComponent(ParameterPosition pos) or - TReturnSummaryComponent(ReturnKind rk) or - TSyntheticGlobalSummaryComponent(SummaryComponent::SyntheticGlobal sg) or - TWithoutContentSummaryComponent(ContentSet c) or - TWithContentSummaryComponent(ContentSet c) +private module StepsInput implements Impl::Private::StepsInputSig { + DataFlowCall getACall(Public::SummarizedCallable sc) { result.asCall().getStaticTarget() = sc } +} - private TParameterSummaryComponent callbackSelfParam() { - result = TParameterSummaryComponent(callbackSelfParameterPosition()) - } +module SourceSinkInterpretationInput implements + Impl::Private::External::SourceSinkInterpretationInputSig +{ + class Element = AstNode; - newtype TSummaryComponentStack = - TSingletonSummaryComponentStack(SummaryComponent c) or - TConsSummaryComponentStack(SummaryComponent head, SummaryComponentStack tail) { - any(RequiredSummaryComponentStack x).required(head, tail) - or - any(RequiredSummaryComponentStack x).required(TParameterSummaryComponent(_), tail) and - head = callbackSelfParam() - or - derivedFluentFlowPush(_, _, _, head, tail, _) - } + class SourceOrSinkElement = Element; - pragma[nomagic] - private predicate summary( - SummarizedCallable c, SummaryComponentStack input, SummaryComponentStack output, - boolean preservesValue - ) { - c.propagatesFlow(input, output, preservesValue) - or - // observe side effects of callbacks on input arguments - c.propagatesFlow(output, input, preservesValue) and - preservesValue = true and - isCallbackParameter(input) and - isContentOfArgument(output, _) - or - // flow from the receiver of a callback into the instance-parameter - exists(SummaryComponentStack s, SummaryComponentStack callbackRef | - c.propagatesFlow(s, _, _) or c.propagatesFlow(_, s, _) + /** + * Holds if an external source specification exists for `e` with output specification + * `output`, kind `kind`, and provenance `provenance`. + */ + predicate sourceElement(SourceOrSinkElement e, string output, string kind) { + exists( + string namespace, string type, boolean subtypes, string name, string signature, string ext | - callbackRef = s.drop(_) and - (isCallbackParameter(callbackRef) or callbackRef.head() = TReturnSummaryComponent(_)) and - input = callbackRef.tail() and - output = TConsSummaryComponentStack(callbackSelfParam(), input) and - preservesValue = true - ) - or - exists(SummaryComponentStack arg, SummaryComponentStack return | - derivedFluentFlow(c, input, arg, return, preservesValue) - | - arg.length() = 1 and - output = return - or - exists(SummaryComponent head, SummaryComponentStack tail | - derivedFluentFlowPush(c, input, arg, head, tail, 0) and - output = SummaryComponentStack::push(head, tail) - ) - ) - or - // Chain together summaries where values get passed into callbacks along the way - exists(SummaryComponentStack mid, boolean preservesValue1, boolean preservesValue2 | - c.propagatesFlow(input, mid, preservesValue1) and - c.propagatesFlow(mid, output, preservesValue2) and - mid.drop(mid.length() - 2) = - SummaryComponentStack::push(TParameterSummaryComponent(_), - SummaryComponentStack::singleton(TArgumentSummaryComponent(_))) and - preservesValue = preservesValue1.booleanAnd(preservesValue2) + sourceModel(namespace, type, subtypes, name, signature, ext, output, kind, _) and + e = interpretElement(namespace, type, subtypes, name, signature, ext) ) } /** - * Holds if `c` has a flow summary from `input` to `arg`, where `arg` - * writes to (contents of) arguments at position `pos`, and `c` has a - * value-preserving flow summary from the arguments at position `pos` - * to a return value (`return`). - * - * In such a case, we derive flow from `input` to (contents of) the return - * value. - * - * As an example, this simplifies modeling of fluent methods: - * for `StringBuilder.append(x)` with a specified value flow from qualifier to - * return value and taint flow from argument 0 to the qualifier, then this - * allows us to infer taint flow from argument 0 to the return value. + * Holds if an external sink specification exists for `e` with input specification + * `input`, kind `kind` and provenance `provenance`. */ - pragma[nomagic] - private predicate derivedFluentFlow( - SummarizedCallable c, SummaryComponentStack input, SummaryComponentStack arg, - SummaryComponentStack return, boolean preservesValue - ) { - exists(ParameterPosition pos | - summary(c, input, arg, preservesValue) and - isContentOfArgument(arg, pos) and - summary(c, SummaryComponentStack::argument(pos), return, true) and - return.bottom() = TReturnSummaryComponent(_) + predicate sinkElement(SourceOrSinkElement e, string input, string kind) { + exists( + string package, string type, boolean subtypes, string name, string signature, string ext + | + sinkModel(package, type, subtypes, name, signature, ext, input, kind, _) and + e = interpretElement(package, type, subtypes, name, signature, ext) ) } - pragma[nomagic] - private predicate derivedFluentFlowPush( - SummarizedCallable c, SummaryComponentStack input, SummaryComponentStack arg, - SummaryComponent head, SummaryComponentStack tail, int i - ) { - derivedFluentFlow(c, input, arg, tail, _) and - head = arg.drop(i).head() and - i = arg.length() - 2 - or - exists(SummaryComponent head0, SummaryComponentStack tail0 | - derivedFluentFlowPush(c, input, arg, head0, tail0, i + 1) and - head = arg.drop(i).head() and - tail = SummaryComponentStack::push(head0, tail0) - ) - } + private newtype TInterpretNode = + TElement_(Element n) or + TNode_(Node n) or + TDataFlowCall_(DataFlowCall c) - private predicate isCallbackParameter(SummaryComponentStack s) { - s.head() = TParameterSummaryComponent(_) and exists(s.tail()) - } + /** An entity used to interpret a source/sink specification. */ + class InterpretNode extends TInterpretNode { + /** Gets the element that this node corresponds to, if any. */ + SourceOrSinkElement asElement() { this = TElement_(result) } - private predicate isContentOfArgument(SummaryComponentStack s, ParameterPosition pos) { - s.head() = TContentSummaryComponent(_) and isContentOfArgument(s.tail(), pos) - or - s = SummaryComponentStack::argument(pos) - } + /** Gets the data-flow node that this node corresponds to, if any. */ + Node asNode() { this = TNode_(result) } - private predicate outputState(SummarizedCallable c, SummaryComponentStack s) { - summary(c, _, s, _) - or - exists(SummaryComponentStack out | - outputState(c, out) and - out.head() = TContentSummaryComponent(_) and - s = out.tail() - ) - or - // Add the argument node corresponding to the requested post-update node - inputState(c, s) and isCallbackParameter(s) - } + /** Gets the call that this node corresponds to, if any. */ + DataFlowCall asCall() { this = TDataFlowCall_(result) } - private predicate inputState(SummarizedCallable c, SummaryComponentStack s) { - summary(c, s, _, _) - or - exists(SummaryComponentStack inp | inputState(c, inp) and s = inp.tail()) - or - exists(SummaryComponentStack out | - outputState(c, out) and - out.head() = TParameterSummaryComponent(_) and - s = out.tail() - ) - or - // Add the post-update node corresponding to the requested argument node - outputState(c, s) and isCallbackParameter(s) - or - // Add the parameter node for parameter side-effects - outputState(c, s) and s = SummaryComponentStack::argument(_) - } + /** Gets the callable that this node corresponds to, if any. */ + DataFlowCallable asCallable() { result.getUnderlyingCallable() = this.asElement() } - private newtype TSummaryNodeState = - TSummaryNodeInputState(SummaryComponentStack s) { inputState(_, s) } or - TSummaryNodeOutputState(SummaryComponentStack s) { outputState(_, s) } + /** Gets the target of this call, if any. */ + Element getCallTarget() { result = this.asCall().asCall().getStaticTarget() } - /** - * A state used to break up (complex) flow summaries into atomic flow steps. - * For a flow summary - * - * ```ql - * propagatesFlow( - * SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue - * ) - * ``` - * - * the following states are used: - * - * - `TSummaryNodeInputState(SummaryComponentStack s)`: - * this state represents that the components in `s` _have been read_ from the - * input. - * - `TSummaryNodeOutputState(SummaryComponentStack s)`: - * this state represents that the components in `s` _remain to be written_ to - * the output. - */ - private class SummaryNodeState extends TSummaryNodeState { - /** Holds if this state is a valid input state for `c`. */ - pragma[nomagic] - predicate isInputState(SummarizedCallable c, SummaryComponentStack s) { - this = TSummaryNodeInputState(s) and - inputState(c, s) - } - - /** Holds if this state is a valid output state for `c`. */ - pragma[nomagic] - predicate isOutputState(SummarizedCallable c, SummaryComponentStack s) { - this = TSummaryNodeOutputState(s) and - outputState(c, s) - } - - /** Gets a textual representation of this state. */ + /** Gets a textual representation of this node. */ string toString() { - exists(SummaryComponentStack s | - this = TSummaryNodeInputState(s) and - result = "read: " + s - ) + result = this.asElement().toString() or - exists(SummaryComponentStack s | - this = TSummaryNodeOutputState(s) and - result = "to write: " + s - ) + result = this.asNode().toString() + or + result = this.asCall().toString() + } + + /** Gets the location of this node. */ + Location getLocation() { + result = this.asElement().getLocation() + or + result = this.asNode().getLocation() + or + result = this.asCall().getLocation() } } - private newtype TSummaryNode = - TSummaryInternalNode(SummarizedCallable c, SummaryNodeState state) { - summaryNodeRange(c, state) - } or - TSummaryParameterNode(SummarizedCallable c, ParameterPosition pos) { - summaryParameterNodeRange(c, pos) - } - - abstract class SummaryNode extends TSummaryNode { - abstract string toString(); - - abstract SummarizedCallable getSummarizedCallable(); - } - - private class SummaryInternalNode extends SummaryNode, TSummaryInternalNode { - private SummarizedCallable c; - private SummaryNodeState state; - - SummaryInternalNode() { this = TSummaryInternalNode(c, state) } - - override string toString() { result = "[summary] " + state + " in " + c } - - override SummarizedCallable getSummarizedCallable() { result = c } - } - - private class SummaryParamNode extends SummaryNode, TSummaryParameterNode { - private SummarizedCallable c; - private ParameterPosition pos; - - SummaryParamNode() { this = TSummaryParameterNode(c, pos) } - - override string toString() { result = "[summary param] " + pos + " in " + c } - - override SummarizedCallable getSummarizedCallable() { result = c } - } - - /** - * Holds if `state` represents having read from a parameter at position - * `pos` in `c`. In this case we are not synthesizing a data-flow node, - * but instead assume that a relevant parameter node already exists. - */ - private predicate parameterReadState( - SummarizedCallable c, SummaryNodeState state, ParameterPosition pos - ) { - state.isInputState(c, SummaryComponentStack::argument(pos)) - } - - /** - * Holds if a synthesized summary node is needed for the state `state` in summarized - * callable `c`. - */ - private predicate summaryNodeRange(SummarizedCallable c, SummaryNodeState state) { - state.isInputState(c, _) and - not parameterReadState(c, state, _) - or - state.isOutputState(c, _) - } - - pragma[noinline] - private SummaryNode summaryNodeInputState(SummarizedCallable c, SummaryComponentStack s) { - exists(SummaryNodeState state | state.isInputState(c, s) | - result = TSummaryInternalNode(c, state) - or - exists(ParameterPosition pos | - parameterReadState(c, state, pos) and - result = TSummaryParameterNode(c, pos) - ) - ) - } - - pragma[noinline] - private SummaryNode summaryNodeOutputState(SummarizedCallable c, SummaryComponentStack s) { - exists(SummaryNodeState state | - state.isOutputState(c, s) and - result = TSummaryInternalNode(c, state) - ) - } - - /** - * Holds if a write targets `post`, which is a post-update node for a - * parameter at position `pos` in `c`. - */ - private predicate isParameterPostUpdate( - SummaryNode post, SummarizedCallable c, ParameterPosition pos - ) { - post = summaryNodeOutputState(c, SummaryComponentStack::argument(pos)) - } - - /** Holds if a parameter node at position `pos` is required for `c`. */ - private predicate summaryParameterNodeRange(SummarizedCallable c, ParameterPosition pos) { - parameterReadState(c, _, pos) - or - // Same as `isParameterPostUpdate(_, c, pos)`, but can be used in a negative context - any(SummaryNodeState state).isOutputState(c, SummaryComponentStack::argument(pos)) - } - - private predicate callbackOutput( - SummarizedCallable c, SummaryComponentStack s, SummaryNode receiver, ReturnKind rk - ) { - any(SummaryNodeState state).isInputState(c, s) and - s.head() = TReturnSummaryComponent(rk) and - receiver = summaryNodeInputState(c, s.tail()) - } - - private predicate callbackInput( - SummarizedCallable c, SummaryComponentStack s, SummaryNode receiver, ArgumentPosition pos - ) { - any(SummaryNodeState state).isOutputState(c, s) and - s.head() = TParameterSummaryComponent(pos) and - receiver = summaryNodeInputState(c, s.tail()) - } - - /** Holds if a call targeting `receiver` should be synthesized inside `c`. */ - predicate summaryCallbackRange(SummarizedCallable c, SummaryNode receiver) { - callbackOutput(c, _, receiver, _) - or - callbackInput(c, _, receiver, _) - } - - /** - * Gets the type of synthesized summary node `n`. - * - * The type is computed based on the language-specific predicates - * `getContentType()`, `getReturnType()`, `getCallbackParameterType()`, and - * `getCallbackReturnType()`. - */ - DataFlowType summaryNodeType(SummaryNode n) { - exists(SummaryNode pre | - summaryPostUpdateNode(n, pre) and - result = summaryNodeType(pre) - ) - or - exists(SummarizedCallable c, SummaryComponentStack s, SummaryComponent head | head = s.head() | - n = summaryNodeInputState(c, s) and - ( - exists(ContentSet cont | result = getContentType(cont) | - head = TContentSummaryComponent(cont) or - head = TWithContentSummaryComponent(cont) - ) - or - head = TWithoutContentSummaryComponent(_) and - result = summaryNodeType(summaryNodeInputState(c, s.tail())) - or - exists(ReturnKind rk | - head = TReturnSummaryComponent(rk) and - result = - getCallbackReturnType(summaryNodeType(summaryNodeInputState(pragma[only_bind_out](c), - s.tail())), rk) - ) - or - exists(SummaryComponent::SyntheticGlobal sg | - head = TSyntheticGlobalSummaryComponent(sg) and - result = getSyntheticGlobalType(sg) - ) - or - exists(ParameterPosition pos | - head = TArgumentSummaryComponent(pos) and - result = getParameterType(c, pos) - ) - ) - or - n = summaryNodeOutputState(c, s) and - ( - exists(ContentSet cont | - head = TContentSummaryComponent(cont) and result = getContentType(cont) - ) - or - s.length() = 1 and - exists(ReturnKind rk | - head = TReturnSummaryComponent(rk) and - result = getReturnType(c, rk) - ) - or - exists(ArgumentPosition pos | head = TParameterSummaryComponent(pos) | - result = - getCallbackParameterType(summaryNodeType(summaryNodeInputState(pragma[only_bind_out](c), - s.tail())), pos) - ) - or - exists(SummaryComponent::SyntheticGlobal sg | - head = TSyntheticGlobalSummaryComponent(sg) and - result = getSyntheticGlobalType(sg) - ) - ) - ) - } - - /** Holds if summary node `p` is a parameter with position `pos`. */ - predicate summaryParameterNode(SummaryNode p, ParameterPosition pos) { - p = TSummaryParameterNode(_, pos) - } - - /** Holds if summary node `out` contains output of kind `rk` from call `c`. */ - predicate summaryOutNode(DataFlowCall c, SummaryNode out, ReturnKind rk) { - exists(SummarizedCallable callable, SummaryComponentStack s, SummaryNode receiver | - callbackOutput(callable, s, receiver, rk) and - out = summaryNodeInputState(callable, s) and - c = summaryDataFlowCall(receiver) - ) - } - - /** Holds if summary node `arg` is at position `pos` in the call `c`. */ - predicate summaryArgumentNode(DataFlowCall c, SummaryNode arg, ArgumentPosition pos) { - exists(SummarizedCallable callable, SummaryComponentStack s, SummaryNode receiver | - callbackInput(callable, s, receiver, pos) and - arg = summaryNodeOutputState(callable, s) and - c = summaryDataFlowCall(receiver) - ) - } - - /** Holds if summary node `post` is a post-update node with pre-update node `pre`. */ - predicate summaryPostUpdateNode(SummaryNode post, SummaryNode pre) { - exists(SummarizedCallable c, ParameterPosition pos | - isParameterPostUpdate(post, c, pos) and - pre = TSummaryParameterNode(c, pos) - ) - or - exists(SummarizedCallable callable, SummaryComponentStack s | - callbackInput(callable, s, _, _) and - pre = summaryNodeOutputState(callable, s) and - post = summaryNodeInputState(callable, s) - ) - } - - /** Holds if summary node `ret` is a return node of kind `rk`. */ - predicate summaryReturnNode(SummaryNode ret, ReturnKind rk) { - exists(SummaryComponentStack s | - ret = summaryNodeOutputState(_, s) and - s = TSingletonSummaryComponentStack(TReturnSummaryComponent(rk)) - ) - } - - /** - * Holds if flow is allowed to pass from parameter `p`, to a return - * node, and back out to `p`. - */ - predicate summaryAllowParameterReturnInSelf(ParamNode p) { - exists(SummarizedCallable c, ParameterPosition ppos | - p.isParameterOf(inject(c), pragma[only_bind_into](ppos)) + /** Provides additional sink specification logic. */ + bindingset[c] + predicate interpretOutput(string c, InterpretNode mid, InterpretNode node) { + // Allow fields to be picked as output nodes. + exists(Node n, AstNode ast | + n = node.asNode() and + ast = mid.asElement() | - exists(SummaryComponentStack inputContents, SummaryComponentStack outputContents | - summary(c, inputContents, outputContents, _) and - inputContents.bottom() = pragma[only_bind_into](TArgumentSummaryComponent(ppos)) and - outputContents.bottom() = pragma[only_bind_into](TArgumentSummaryComponent(ppos)) - ) + c = "" and + n.asExpr().(MemberRefExpr).getMember() = ast ) } - /** Provides a compilation of flow summaries to atomic data-flow steps. */ - module Steps { - /** - * Holds if there is a local step from `pred` to `succ`, which is synthesized - * from a flow summary. - */ - predicate summaryLocalStep(SummaryNode pred, SummaryNode succ, boolean preservesValue) { - exists( - SummarizedCallable c, SummaryComponentStack inputContents, - SummaryComponentStack outputContents - | - summary(c, inputContents, outputContents, preservesValue) and - pred = summaryNodeInputState(c, inputContents) and - succ = summaryNodeOutputState(c, outputContents) - | - preservesValue = true - or - preservesValue = false and not summary(c, inputContents, outputContents, true) - ) + /** Provides additional source specification logic. */ + bindingset[c] + predicate interpretInput(string c, InterpretNode mid, InterpretNode node) { + exists(Node n, AstNode ast, MemberRefExpr e | + n = node.asNode() and + ast = mid.asElement() and + e.getMember() = ast + | + // Allow fields to be picked as input nodes. + c = "" and + e.getBase() = n.asExpr() or - exists(SummarizedCallable c, SummaryComponentStack s | - pred = summaryNodeInputState(c, s.tail()) and - succ = summaryNodeInputState(c, s) and - s.head() = [SummaryComponent::withContent(_), SummaryComponent::withoutContent(_)] and - preservesValue = true - ) - } - - /** - * Holds if there is a read step of content `c` from `pred` to `succ`, which - * is synthesized from a flow summary. - */ - predicate summaryReadStep(SummaryNode pred, ContentSet c, SummaryNode succ) { - exists(SummarizedCallable sc, SummaryComponentStack s | - pred = summaryNodeInputState(sc, s.tail()) and - succ = summaryNodeInputState(sc, s) and - SummaryComponent::content(c) = s.head() - ) - } - - /** - * Holds if there is a store step of content `c` from `pred` to `succ`, which - * is synthesized from a flow summary. - */ - predicate summaryStoreStep(SummaryNode pred, ContentSet c, SummaryNode succ) { - exists(SummarizedCallable sc, SummaryComponentStack s | - pred = summaryNodeOutputState(sc, s) and - succ = summaryNodeOutputState(sc, s.tail()) and - SummaryComponent::content(c) = s.head() - ) - } - - /** - * Holds if there is a jump step from `pred` to `succ`, which is synthesized - * from a flow summary. - */ - predicate summaryJumpStep(SummaryNode pred, SummaryNode succ) { - exists(SummaryComponentStack s | - s = SummaryComponentStack::singleton(SummaryComponent::syntheticGlobal(_)) and - pred = summaryNodeOutputState(_, s) and - succ = summaryNodeInputState(_, s) - ) - } - - /** - * Holds if values stored inside content `c` are cleared at `n`. `n` is a - * synthesized summary node, so in order for values to be cleared at calls - * to the relevant method, it is important that flow does not pass over - * the argument, either via use-use flow or def-use flow. - * - * Example: - * - * ``` - * a.b = taint; - * a.clearB(); // assume we have a flow summary for `clearB` that clears `b` on the qualifier - * sink(a.b); - * ``` - * - * In the above, flow should not pass from `a` on the first line (or the second - * line) to `a` on the third line. Instead, there will be synthesized flow from - * `a` on line 2 to the post-update node for `a` on that line (via an intermediate - * node where field `b` is cleared). - */ - predicate summaryClearsContent(SummaryNode n, ContentSet c) { - exists(SummarizedCallable sc, SummaryNodeState state, SummaryComponentStack stack | - n = TSummaryInternalNode(sc, state) and - state.isInputState(sc, stack) and - stack.head() = SummaryComponent::withoutContent(c) - ) - } - - /** - * Holds if the value that is being tracked is expected to be stored inside - * content `c` at `n`. - */ - predicate summaryExpectsContent(SummaryNode n, ContentSet c) { - exists(SummarizedCallable sc, SummaryNodeState state, SummaryComponentStack stack | - n = TSummaryInternalNode(sc, state) and - state.isInputState(sc, stack) and - stack.head() = SummaryComponent::withContent(c) - ) - } - - pragma[noinline] - private predicate viableParam( - DataFlowCall call, SummarizedCallable sc, ParameterPosition ppos, SummaryParamNode p - ) { - exists(DataFlowCallable c | - c = inject(sc) and - p = TSummaryParameterNode(sc, ppos) and - c = viableCallable(call) - ) - } - - pragma[nomagic] - private SummaryParamNode summaryArgParam(DataFlowCall call, ArgNode arg, SummarizedCallable sc) { - exists(ParameterPosition ppos | - argumentPositionMatch(call, arg, ppos) and - viableParam(call, sc, ppos, result) - ) - } - - /** - * Holds if `p` can reach `n` in a summarized callable, using only value-preserving - * local steps. `clearsOrExpects` records whether any node on the path from `p` to - * `n` either clears or expects contents. - */ - private predicate paramReachesLocal(SummaryParamNode p, SummaryNode n, boolean clearsOrExpects) { - viableParam(_, _, _, p) and - n = p and - clearsOrExpects = false - or - exists(SummaryNode mid, boolean clearsOrExpectsMid | - paramReachesLocal(p, mid, clearsOrExpectsMid) and - summaryLocalStep(mid, n, true) and - if - summaryClearsContent(n, _) or - summaryExpectsContent(n, _) - then clearsOrExpects = true - else clearsOrExpects = clearsOrExpectsMid - ) - } - - /** - * Holds if use-use flow starting from `arg` should be prohibited. - * - * This is the case when `arg` is the argument of a call that targets a - * flow summary where the corresponding parameter either clears contents - * or expects contents. - */ - pragma[nomagic] - predicate prohibitsUseUseFlow(ArgNode arg, SummarizedCallable sc) { - exists(SummaryParamNode p, ParameterPosition ppos, SummaryNode ret | - paramReachesLocal(p, ret, true) and - p = summaryArgParam(_, arg, sc) and - p = TSummaryParameterNode(_, pragma[only_bind_into](ppos)) and - isParameterPostUpdate(ret, _, pragma[only_bind_into](ppos)) - ) - } - - pragma[nomagic] - private predicate summaryReturnNodeExt(SummaryNode ret, ReturnKindExt rk) { - summaryReturnNode(ret, rk.(ValueReturnKind).getKind()) - or - exists(SummaryParamNode p, SummaryNode pre, ParameterPosition pos | - paramReachesLocal(p, pre, _) and - summaryPostUpdateNode(ret, pre) and - p = TSummaryParameterNode(_, pos) and - rk.(ParamUpdateReturnKind).getPosition() = pos - ) - } - - bindingset[ret] - private SummaryParamNode summaryArgParamRetOut( - ArgNode arg, SummaryNode ret, OutNodeExt out, SummarizedCallable sc - ) { - exists(DataFlowCall call, ReturnKindExt rk | - result = summaryArgParam(call, arg, sc) and - summaryReturnNodeExt(ret, pragma[only_bind_into](rk)) and - out = pragma[only_bind_into](rk).getAnOutNode(call) - ) - } - - /** - * Holds if `arg` flows to `out` using a simple value-preserving flow - * summary, that is, a flow summary without reads and stores. - * - * NOTE: This step should not be used in global data-flow/taint-tracking, but may - * be useful to include in the exposed local data-flow/taint-tracking relations. - */ - predicate summaryThroughStepValue(ArgNode arg, Node out, SummarizedCallable sc) { - exists(ReturnKind rk, SummaryNode ret, DataFlowCall call | - summaryLocalStep(summaryArgParam(call, arg, sc), ret, true) and - summaryReturnNode(ret, pragma[only_bind_into](rk)) and - out = getAnOutNode(call, pragma[only_bind_into](rk)) - ) - } - - /** - * Holds if `arg` flows to `out` using a simple flow summary involving taint - * step, that is, a flow summary without reads and stores. - * - * NOTE: This step should not be used in global data-flow/taint-tracking, but may - * be useful to include in the exposed local data-flow/taint-tracking relations. - */ - predicate summaryThroughStepTaint(ArgNode arg, Node out, SummarizedCallable sc) { - exists(SummaryNode ret | - summaryLocalStep(summaryArgParamRetOut(arg, ret, out, sc), ret, false) - ) - } - - /** - * Holds if there is a read(+taint) of `c` from `arg` to `out` using a - * flow summary. - * - * NOTE: This step should not be used in global data-flow/taint-tracking, but may - * be useful to include in the exposed local data-flow/taint-tracking relations. - */ - predicate summaryGetterStep(ArgNode arg, ContentSet c, Node out, SummarizedCallable sc) { - exists(SummaryNode mid, SummaryNode ret | - summaryReadStep(summaryArgParamRetOut(arg, ret, out, sc), c, mid) and - summaryLocalStep(mid, ret, _) - ) - } - - /** - * Holds if there is a (taint+)store of `arg` into content `c` of `out` using a - * flow summary. - * - * NOTE: This step should not be used in global data-flow/taint-tracking, but may - * be useful to include in the exposed local data-flow/taint-tracking relations. - */ - predicate summarySetterStep(ArgNode arg, ContentSet c, Node out, SummarizedCallable sc) { - exists(SummaryNode mid, SummaryNode ret | - summaryLocalStep(summaryArgParamRetOut(arg, ret, out, sc), mid, _) and - summaryStoreStep(mid, c, ret) - ) - } - } - - /** - * Provides a means of translating externally (e.g., MaD) defined flow - * summaries into a `SummarizedCallable`s. - */ - module External { - /** Holds if `spec` is a relevant external specification. */ - private predicate relevantSpec(string spec) { - summaryElement(_, spec, _, _, _) or - summaryElement(_, _, spec, _, _) or - sourceElement(_, spec, _, _) or - sinkElement(_, spec, _, _) - } - - private class AccessPathRange extends AccessPath::Range { - AccessPathRange() { relevantSpec(this) } - } - - /** Holds if specification component `token` parses as parameter `pos`. */ - predicate parseParam(AccessPathToken token, ArgumentPosition pos) { - token.getName() = "Parameter" and - pos = parseParamBody(token.getAnArgument()) - } - - /** Holds if specification component `token` parses as argument `pos`. */ - predicate parseArg(AccessPathToken token, ParameterPosition pos) { - token.getName() = "Argument" and - pos = parseArgBody(token.getAnArgument()) - } - - /** Holds if specification component `token` parses as synthetic global `sg`. */ - predicate parseSynthGlobal(AccessPathToken token, string sg) { - token.getName() = "SyntheticGlobal" and - sg = token.getAnArgument() - } - - private class SyntheticGlobalFromAccessPath extends SummaryComponent::SyntheticGlobal { - SyntheticGlobalFromAccessPath() { parseSynthGlobal(_, this) } - } - - private SummaryComponent interpretComponent(AccessPathToken token) { - exists(ParameterPosition pos | - parseArg(token, pos) and result = SummaryComponent::argument(pos) - ) - or - exists(ArgumentPosition pos | - parseParam(token, pos) and result = SummaryComponent::parameter(pos) - ) - or - token = "ReturnValue" and result = SummaryComponent::return(getReturnValueKind()) - or - exists(string sg | - parseSynthGlobal(token, sg) and result = SummaryComponent::syntheticGlobal(sg) - ) - or - result = interpretComponentSpecific(token) - } - - /** - * Holds if `spec` specifies summary component stack `stack`. - */ - predicate interpretSpec(AccessPath spec, SummaryComponentStack stack) { - interpretSpec(spec, spec.getNumToken(), stack) - } - - /** Holds if the first `n` tokens of `spec` resolves to `stack`. */ - private predicate interpretSpec(AccessPath spec, int n, SummaryComponentStack stack) { - n = 1 and - stack = SummaryComponentStack::singleton(interpretComponent(spec.getToken(0))) - or - exists(SummaryComponent head, SummaryComponentStack tail | - interpretSpec(spec, n, head, tail) and - stack = SummaryComponentStack::push(head, tail) - ) - } - - /** Holds if the first `n` tokens of `spec` resolves to `head` followed by `tail` */ - private predicate interpretSpec( - AccessPath spec, int n, SummaryComponent head, SummaryComponentStack tail - ) { - interpretSpec(spec, n - 1, tail) and - head = interpretComponent(spec.getToken(n - 1)) - } - - private class MkStack extends RequiredSummaryComponentStack { - override predicate required(SummaryComponent head, SummaryComponentStack tail) { - interpretSpec(_, _, head, tail) - } - } - - private class SummarizedCallableExternal extends SummarizedCallable { - SummarizedCallableExternal() { summaryElement(this, _, _, _, _) } - - private predicate relevantSummaryElementGenerated( - AccessPath inSpec, AccessPath outSpec, string kind - ) { - exists(Provenance provenance | - provenance.isGenerated() and - summaryElement(this, inSpec, outSpec, kind, provenance) - ) and - not this.applyManualModel() - } - - private predicate relevantSummaryElement(AccessPath inSpec, AccessPath outSpec, string kind) { - exists(Provenance provenance | - provenance.isManual() and - summaryElement(this, inSpec, outSpec, kind, provenance) - ) - or - this.relevantSummaryElementGenerated(inSpec, outSpec, kind) - } - - override predicate propagatesFlow( - SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue - ) { - exists(AccessPath inSpec, AccessPath outSpec, string kind | - this.relevantSummaryElement(inSpec, outSpec, kind) and - interpretSpec(inSpec, input) and - interpretSpec(outSpec, output) - | - kind = "value" and preservesValue = true - or - kind = "taint" and preservesValue = false - ) - } - - override predicate hasProvenance(Provenance provenance) { - summaryElement(this, _, _, _, provenance) - } - } - - /** Holds if component `c` of specification `spec` cannot be parsed. */ - predicate invalidSpecComponent(AccessPath spec, string c) { - c = spec.getToken(_) and - not exists(interpretComponent(c)) - } - - /** Holds if `provenance` is not a valid provenance value. */ - bindingset[provenance] - predicate invalidProvenance(string provenance) { not provenance instanceof Provenance } - - /** - * Holds if token `part` of specification `spec` has an invalid index. - * E.g., `Argument[-1]`. - */ - predicate invalidIndexComponent(AccessPath spec, AccessPathToken part) { - part = spec.getToken(_) and - part.getName() = ["Parameter", "Argument"] and - AccessPath::parseInt(part.getArgumentList()) < 0 - } - - private predicate inputNeedsReference(AccessPathToken c) { - c.getName() = "Argument" or - inputNeedsReferenceSpecific(c) - } - - private predicate outputNeedsReference(AccessPathToken c) { - c.getName() = ["Argument", "ReturnValue"] or - outputNeedsReferenceSpecific(c) - } - - private predicate sourceElementRef(InterpretNode ref, AccessPath output, string kind) { - exists(SourceOrSinkElement e | - sourceElement(e, output, kind, _) and - if outputNeedsReference(output.getToken(0)) - then e = ref.getCallTarget() - else e = ref.asElement() - ) - } - - private predicate sinkElementRef(InterpretNode ref, AccessPath input, string kind) { - exists(SourceOrSinkElement e | - sinkElement(e, input, kind, _) and - if inputNeedsReference(input.getToken(0)) - then e = ref.getCallTarget() - else e = ref.asElement() - ) - } - - /** Holds if the first `n` tokens of `output` resolve to the given interpretation. */ - private predicate interpretOutput( - AccessPath output, int n, InterpretNode ref, InterpretNode node - ) { - sourceElementRef(ref, output, _) and - n = 0 and - ( - if output = "" - then - // Allow language-specific interpretation of the empty access path - interpretOutputSpecific("", ref, node) - else node = ref - ) - or - exists(InterpretNode mid, AccessPathToken c | - interpretOutput(output, n - 1, ref, mid) and - c = output.getToken(n - 1) - | - exists(ArgumentPosition apos, ParameterPosition ppos | - node.asNode().(PostUpdateNode).getPreUpdateNode().(ArgNode).argumentOf(mid.asCall(), apos) and - parameterMatch(ppos, apos) - | - c = "Argument" or parseArg(c, ppos) - ) - or - exists(ArgumentPosition apos, ParameterPosition ppos | - node.asNode().(ParamNode).isParameterOf(mid.asCallable(), ppos) and - parameterMatch(ppos, apos) - | - c = "Parameter" or parseParam(c, apos) - ) - or - c = "ReturnValue" and - node.asNode() = getAnOutNodeExt(mid.asCall(), TValueReturn(getReturnValueKind())) - or - interpretOutputSpecific(c, mid, node) - ) - } - - /** Holds if the first `n` tokens of `input` resolve to the given interpretation. */ - private predicate interpretInput(AccessPath input, int n, InterpretNode ref, InterpretNode node) { - sinkElementRef(ref, input, _) and - n = 0 and - ( - if input = "" - then - // Allow language-specific interpretation of the empty access path - interpretInputSpecific("", ref, node) - else node = ref - ) - or - exists(InterpretNode mid, AccessPathToken c | - interpretInput(input, n - 1, ref, mid) and - c = input.getToken(n - 1) - | - exists(ArgumentPosition apos, ParameterPosition ppos | - node.asNode().(ArgNode).argumentOf(mid.asCall(), apos) and - parameterMatch(ppos, apos) - | - c = "Argument" or parseArg(c, ppos) - ) - or - exists(ReturnNodeExt ret | - c = "ReturnValue" and - ret = node.asNode() and - ret.getKind().(ValueReturnKind).getKind() = getReturnValueKind() and - mid.asCallable() = getNodeEnclosingCallable(ret) - ) - or - interpretInputSpecific(c, mid, node) - ) - } - - /** - * Holds if `node` is specified as a source with the given kind in a MaD flow - * model. - */ - predicate isSourceNode(InterpretNode node, string kind) { - exists(InterpretNode ref, AccessPath output | - sourceElementRef(ref, output, kind) and - interpretOutput(output, output.getNumToken(), ref, node) - ) - } - - /** - * Holds if `node` is specified as a sink with the given kind in a MaD flow - * model. - */ - predicate isSinkNode(InterpretNode node, string kind) { - exists(InterpretNode ref, AccessPath input | - sinkElementRef(ref, input, kind) and - interpretInput(input, input.getNumToken(), ref, node) - ) - } - } - - /** Provides a query predicate for outputting a set of relevant flow summaries. */ - module TestOutput { - /** A flow summary to include in the `summary/1` query predicate. */ - abstract class RelevantSummarizedCallable instanceof SummarizedCallable { - /** Gets the string representation of this callable used by `summary/1`. */ - abstract string getCallableCsv(); - - /** Holds if flow is propagated between `input` and `output`. */ - predicate relevantSummary( - SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue - ) { - super.propagatesFlow(input, output, preservesValue) - } - - string toString() { result = super.toString() } - } - - /** A model to include in the `neutral/1` query predicate. */ - abstract class RelevantNeutralCallable instanceof NeutralCallable { - /** Gets the string representation of this callable used by `neutral/1`. */ - abstract string getCallableCsv(); - - /** - * Gets the kind of the neutral. - */ - string getKind() { result = super.getKind() } - - string toString() { result = super.toString() } - } - - /** Render the kind in the format used in flow summaries. */ - private string renderKind(boolean preservesValue) { - preservesValue = true and result = "value" - or - preservesValue = false and result = "taint" - } - - private string renderProvenance(SummarizedCallable c) { - if c.applyManualModel() then result = "manual" else c.hasProvenance(result) - } - - private string renderProvenanceNeutral(NeutralCallable c) { - if c.hasManualModel() then result = "manual" else c.hasProvenance(result) - } - - /** - * A query predicate for outputting flow summaries in semi-colon separated format in QL tests. - * The syntax is: "namespace;type;overrides;name;signature;ext;inputspec;outputspec;kind;provenance", - * ext is hardcoded to empty. - */ - query predicate summary(string csv) { - exists( - RelevantSummarizedCallable c, SummaryComponentStack input, SummaryComponentStack output, - boolean preservesValue - | - c.relevantSummary(input, output, preservesValue) and - csv = - c.getCallableCsv() // Callable information - + input.getMadRepresentation() + ";" // input - + output.getMadRepresentation() + ";" // output - + renderKind(preservesValue) + ";" // kind - + renderProvenance(c) // provenance - ) - } - - /** - * Holds if a neutral model `csv` exists (semi-colon separated format). Used for testing purposes. - * The syntax is: "namespace;type;name;signature;kind;provenance"", - */ - query predicate neutral(string csv) { - exists(RelevantNeutralCallable c | - csv = - c.getCallableCsv() // Callable information - + c.getKind() + ";" // kind - + renderProvenanceNeutral(c) // provenance - ) - } - } - - /** - * Provides query predicates for rendering the generated data flow graph for - * a summarized callable. - * - * Import this module into a `.ql` file of `@kind graph` to render the graph. - * The graph is restricted to callables from `RelevantSummarizedCallable`. - */ - module RenderSummarizedCallable { - /** A summarized callable to include in the graph. */ - abstract class RelevantSummarizedCallable instanceof SummarizedCallable { - string toString() { result = super.toString() } - } - - private newtype TNodeOrCall = - MkNode(SummaryNode n) { - exists(RelevantSummarizedCallable c | - n = TSummaryInternalNode(c, _) - or - n = TSummaryParameterNode(c, _) - ) - } or - MkCall(DataFlowCall call) { - call = summaryDataFlowCall(_) and - call.getEnclosingCallable() = inject(any(RelevantSummarizedCallable c)) - } - - private class NodeOrCall extends TNodeOrCall { - SummaryNode asNode() { this = MkNode(result) } - - DataFlowCall asCall() { this = MkCall(result) } - - string toString() { - result = this.asNode().toString() - or - result = this.asCall().toString() - } - - /** - * Holds if this element is at the specified location. - * The location spans column `startcolumn` of line `startline` to - * column `endcolumn` of line `endline` in file `filepath`. - * For more information, see - * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). - */ - predicate hasLocationInfo( - string filepath, int startline, int startcolumn, int endline, int endcolumn - ) { - filepath = "" and - startline = 0 and - startcolumn = 0 and - endline = 0 and - endcolumn = 0 - } - } - - query predicate nodes(NodeOrCall n, string key, string val) { - key = "semmle.label" and val = n.toString() - } - - private predicate edgesComponent(NodeOrCall a, NodeOrCall b, string value) { - exists(boolean preservesValue | - Private::Steps::summaryLocalStep(a.asNode(), b.asNode(), preservesValue) and - if preservesValue = true then value = "value" else value = "taint" - ) - or - exists(ContentSet c | - Private::Steps::summaryReadStep(a.asNode(), c, b.asNode()) and - value = "read (" + c + ")" - or - Private::Steps::summaryStoreStep(a.asNode(), c, b.asNode()) and - value = "store (" + c + ")" - or - Private::Steps::summaryClearsContent(a.asNode(), c) and - b = a and - value = "clear (" + c + ")" - or - Private::Steps::summaryExpectsContent(a.asNode(), c) and - b = a and - value = "expect (" + c + ")" - ) - or - summaryPostUpdateNode(b.asNode(), a.asNode()) and - value = "post-update" - or - b.asCall() = summaryDataFlowCall(a.asNode()) and - value = "receiver" - or - exists(ArgumentPosition pos | - summaryArgumentNode(b.asCall(), a.asNode(), pos) and - value = "argument (" + pos + ")" - ) - } - - query predicate edges(NodeOrCall a, NodeOrCall b, string key, string value) { - key = "semmle.label" and - value = strictconcat(string s | edgesComponent(a, b, s) | s, " / ") - } + // Allow post update nodes to be picked as input nodes when the `input` column + // of the row is `PostUpdate`. + c = "PostUpdate" and + e.getBase() = n.(PostUpdateNode).getPreUpdateNode().asExpr() + ) } } + +module Private { + import Impl::Private + + module Steps = Impl::Private::Steps; + + module External { + import Impl::Private::External + import Impl::Private::External::SourceSinkInterpretation + } + + /** + * Provides predicates for constructing summary components. + */ + module SummaryComponent { + private import Impl::Private::SummaryComponent as SC + + predicate parameter = SC::parameter/1; + + predicate argument = SC::argument/1; + + predicate content = SC::content/1; + + predicate withoutContent = SC::withoutContent/1; + + predicate withContent = SC::withContent/1; + } + + /** + * Provides predicates for constructing stacks of summary components. + */ + module SummaryComponentStack { + private import Impl::Private::SummaryComponentStack as SCS + + predicate singleton = SCS::singleton/1; + + predicate push = SCS::push/2; + + predicate argument = SCS::argument/1; + } +} + +module Public = Impl::Public; diff --git a/swift/ql/lib/codeql/swift/dataflow/internal/FlowSummaryImplSpecific.qll b/swift/ql/lib/codeql/swift/dataflow/internal/FlowSummaryImplSpecific.qll deleted file mode 100644 index a8c99277f46..00000000000 --- a/swift/ql/lib/codeql/swift/dataflow/internal/FlowSummaryImplSpecific.qll +++ /dev/null @@ -1,265 +0,0 @@ -/** - * Provides Swift specific classes and predicates for defining flow summaries. - */ - -private import swift -private import DataFlowDispatch -private import DataFlowPrivate -private import DataFlowPublic -private import DataFlowImplCommon -private import FlowSummaryImpl::Private -private import FlowSummaryImpl::Public -private import codeql.swift.dataflow.ExternalFlow -private import codeql.swift.dataflow.FlowSummary as FlowSummary -private import codeql.swift.controlflow.CfgNodes - -/** - * A class of callables that are candidates for flow summary modeling. - */ -class SummarizedCallableBase = Function; - -/** - * A class of callables that are candidates for neutral modeling. - */ -class NeutralCallableBase = Function; - -DataFlowCallable inject(SummarizedCallable c) { result.getUnderlyingCallable() = c } - -/** Gets the parameter position of the instance parameter. */ -ArgumentPosition callbackSelfParameterPosition() { result instanceof ThisArgumentPosition } - -/** Gets the synthesized data-flow call for `receiver`. */ -SummaryCall summaryDataFlowCall(SummaryNode receiver) { receiver = result.getReceiver() } - -/** Gets the type of content `c`. */ -DataFlowType getContentType(ContentSet c) { any() } - -/** Gets the type of the parameter at the given position. */ -DataFlowType getParameterType(SummarizedCallable c, ParameterPosition pos) { any() } - -/** Gets the return type of kind `rk` for callable `c`. */ -bindingset[c] -DataFlowType getReturnType(SummarizedCallable c, ReturnKind rk) { any() } - -/** - * Gets the type of the parameter matching arguments at position `pos` in a - * synthesized call that targets a callback of type `t`. - */ -DataFlowType getCallbackParameterType(DataFlowType t, ArgumentPosition pos) { any() } - -/** - * Gets the return type of kind `rk` in a synthesized call that targets a - * callback of type `t`. - */ -DataFlowType getCallbackReturnType(DataFlowType t, ReturnKind rk) { any() } - -/** Gets the type of synthetic global `sg`. */ -DataFlowType getSyntheticGlobalType(SummaryComponent::SyntheticGlobal sg) { any() } - -/** - * Holds if an external flow summary exists for `c` with input specification - * `input`, output specification `output`, kind `kind`, and provenance `provenance`. - */ -predicate summaryElement(Function c, string input, string output, string kind, string provenance) { - exists( - string namespace, string type, boolean subtypes, string name, string signature, string ext - | - summaryModel(namespace, type, subtypes, name, signature, ext, input, output, kind, provenance) and - c = interpretElement(namespace, type, subtypes, name, signature, ext) - ) -} - -/** - * Holds if a neutral model exists for `c` of kind `kind` - * and with provenance `provenance`. - * Note. Neutral models have not been implemented for Swift. - */ -predicate neutralElement(NeutralCallableBase c, string kind, string provenance) { none() } - -/** - * Holds if an external source specification exists for `e` with output specification - * `output`, kind `kind`, and provenance `provenance`. - */ -predicate sourceElement(Element e, string output, string kind, string provenance) { - exists( - string namespace, string type, boolean subtypes, string name, string signature, string ext - | - sourceModel(namespace, type, subtypes, name, signature, ext, output, kind, provenance) and - e = interpretElement(namespace, type, subtypes, name, signature, ext) - ) -} - -/** - * Holds if an external sink specification exists for `e` with input specification - * `input`, kind `kind` and provenance `provenance`. - */ -predicate sinkElement(Element e, string input, string kind, string provenance) { - exists( - string namespace, string type, boolean subtypes, string name, string signature, string ext - | - sinkModel(namespace, type, subtypes, name, signature, ext, input, kind, provenance) and - e = interpretElement(namespace, type, subtypes, name, signature, ext) - ) -} - -/** Gets the summary component for specification component `c`, if any. */ -bindingset[c] -SummaryComponent interpretComponentSpecific(AccessPathToken c) { - exists(ContentSet cs, Content content | - cs.isSingleton(content) and - parseContent(c, content) and - result = SummaryComponent::content(cs) - ) -} - -/** Gets the textual representation of the content in the format used for MaD models. */ -private string getContentSpecific(ContentSet cs) { - exists(Content::FieldContent c | - cs.isSingleton(c) and - result = "Field[" + c.getField().getName() + "]" - ) - or - exists(Content::TupleContent c | - cs.isSingleton(c) and - result = "TupleElement[" + c.getIndex().toString() + "]" - ) - or - exists(Content::EnumContent c | - cs.isSingleton(c) and - result = "EnumElement[" + c.getSignature() + "]" - ) - or - exists(Content::CollectionContent c | - cs.isSingleton(c) and - result = "CollectionElement" - ) -} - -/** Gets the textual representation of a summary component in the format used for MaD models. */ -string getMadRepresentationSpecific(SummaryComponent sc) { - exists(ContentSet c | sc = TContentSummaryComponent(c) and result = getContentSpecific(c)) - or - exists(ReturnKind rk | - sc = TReturnSummaryComponent(rk) and - not rk = getReturnValueKind() and - result = "ReturnValue" + "[" + rk + "]" - ) - or - exists(ContentSet c | - sc = TWithoutContentSummaryComponent(c) and - result = "WithoutContent" + c.toString() - ) - or - exists(ContentSet c | - sc = TWithContentSummaryComponent(c) and - result = "WithContent" + c.toString() - ) -} - -/** Gets the textual representation of a parameter position in the format used for flow summaries. */ -string getParameterPosition(ParameterPosition pos) { result = pos.toString() } - -/** Gets the textual representation of an argument position in the format used for flow summaries. */ -string getArgumentPosition(ArgumentPosition pos) { result = pos.toString() } - -/** Holds if input specification component `c` needs a reference. */ -predicate inputNeedsReferenceSpecific(string c) { none() } - -/** Holds if output specification component `c` needs a reference. */ -predicate outputNeedsReferenceSpecific(string c) { none() } - -class SourceOrSinkElement = AstNode; - -/** Gets the return kind corresponding to specification `"ReturnValue"`. */ -NormalReturnKind getReturnValueKind() { any() } - -private newtype TInterpretNode = - TElement_(Element n) or - TNode_(Node n) or - TDataFlowCall_(DataFlowCall c) - -/** An entity used to interpret a source/sink specification. */ -class InterpretNode extends TInterpretNode { - /** Gets the element that this node corresponds to, if any. */ - SourceOrSinkElement asElement() { this = TElement_(result) } - - /** Gets the data-flow node that this node corresponds to, if any. */ - Node asNode() { this = TNode_(result) } - - /** Gets the call that this node corresponds to, if any. */ - DataFlowCall asCall() { this = TDataFlowCall_(result) } - - /** Gets the callable that this node corresponds to, if any. */ - DataFlowCallable asCallable() { result.getUnderlyingCallable() = this.asElement() } - - /** Gets the target of this call, if any. */ - Function getCallTarget() { result = this.asCall().asCall().getStaticTarget() } - - /** Gets a textual representation of this node. */ - string toString() { - result = this.asElement().toString() - or - result = this.asNode().toString() - or - result = this.asCall().toString() - } - - /** Gets the location of this node. */ - Location getLocation() { - result = this.asElement().getLocation() - or - result = this.asNode().getLocation() - or - result = this.asCall().getLocation() - } -} - -predicate interpretOutputSpecific(string c, InterpretNode mid, InterpretNode node) { - // Allow fields to be picked as output nodes. - exists(Node n, AstNode ast | - n = node.asNode() and - ast = mid.asElement() - | - c = "" and - n.asExpr().(MemberRefExpr).getMember() = ast - ) -} - -predicate interpretInputSpecific(string c, InterpretNode mid, InterpretNode node) { - exists(Node n, AstNode ast, MemberRefExpr e | - n = node.asNode() and - ast = mid.asElement() and - e.getMember() = ast - | - // Allow fields to be picked as input nodes. - c = "" and - e.getBase() = n.asExpr() - or - // Allow post update nodes to be picked as input nodes when the `input` column - // of the row is `PostUpdate`. - c = "PostUpdate" and - e.getBase() = n.(PostUpdateNode).getPreUpdateNode().asExpr() - ) -} - -/** Gets the argument position obtained by parsing `X` in `Parameter[X]`. */ -bindingset[s] -ArgumentPosition parseParamBody(string s) { - exists(int index | index = AccessPath::parseInt(s) | - result.(PositionalArgumentPosition).getIndex() = index - or - index = -1 and - result instanceof ThisArgumentPosition - ) -} - -/** Gets the parameter position obtained by parsing `X` in `Argument[X]`. */ -bindingset[s] -ParameterPosition parseArgBody(string s) { - exists(int index | index = AccessPath::parseInt(s) | - result.(PositionalParameterPosition).getIndex() = index - or - index = -1 and - result instanceof ThisParameterPosition - ) -}