From 28373e0fdf79e54523696f72152c950e0f40f0e0 Mon Sep 17 00:00:00 2001 From: Tom Hvitved Date: Thu, 7 Dec 2023 14:08:37 +0100 Subject: [PATCH] JS: Adapt to changes in shared code --- config/identical-files.json | 1 - javascript/ql/lib/qlpack.yml | 1 + .../data/internal/AccessPathSyntax.qll | 182 ------------------ .../data/internal/ApiGraphModels.qll | 42 ++-- .../data/internal/ApiGraphModelsSpecific.qll | 22 +-- .../library-tests/frameworks/data/test.ql | 4 +- .../library-tests/frameworks/data/warnings.ql | 1 - 7 files changed, 33 insertions(+), 220 deletions(-) delete mode 100644 javascript/ql/lib/semmle/javascript/frameworks/data/internal/AccessPathSyntax.qll diff --git a/config/identical-files.json b/config/identical-files.json index 6f652af2fa1..7045c97f9e9 100644 --- a/config/identical-files.json +++ b/config/identical-files.json @@ -469,7 +469,6 @@ "csharp/ql/lib/semmle/code/csharp/dataflow/internal/AccessPathSyntax.qll", "go/ql/lib/semmle/go/dataflow/internal/AccessPathSyntax.qll", "java/ql/lib/semmle/code/java/dataflow/internal/AccessPathSyntax.qll", - "javascript/ql/lib/semmle/javascript/frameworks/data/internal/AccessPathSyntax.qll", "python/ql/lib/semmle/python/dataflow/new/internal/AccessPathSyntax.qll", "swift/ql/lib/codeql/swift/dataflow/internal/AccessPathSyntax.qll" ], diff --git a/javascript/ql/lib/qlpack.yml b/javascript/ql/lib/qlpack.yml index a5890b96630..b834321a32a 100644 --- a/javascript/ql/lib/qlpack.yml +++ b/javascript/ql/lib/qlpack.yml @@ -6,6 +6,7 @@ extractor: javascript library: true upgrades: upgrades dependencies: + codeql/dataflow: ${workspace} codeql/mad: ${workspace} codeql/regex: ${workspace} codeql/tutorial: ${workspace} diff --git a/javascript/ql/lib/semmle/javascript/frameworks/data/internal/AccessPathSyntax.qll b/javascript/ql/lib/semmle/javascript/frameworks/data/internal/AccessPathSyntax.qll deleted file mode 100644 index 0c3dc8427b2..00000000000 --- a/javascript/ql/lib/semmle/javascript/frameworks/data/internal/AccessPathSyntax.qll +++ /dev/null @@ -1,182 +0,0 @@ -/** - * Module for parsing access paths from MaD models, both the identifying access path used - * by dynamic languages, and the input/output specifications for summary steps. - * - * This file is used by the shared data flow library and by the JavaScript libraries - * (which does not use the shared data flow libraries). - */ - -/** - * Convenience-predicate for extracting two capture groups at once. - */ -bindingset[input, regexp] -private predicate regexpCaptureTwo(string input, string regexp, string capture1, string capture2) { - capture1 = input.regexpCapture(regexp, 1) and - capture2 = input.regexpCapture(regexp, 2) -} - -/** Companion module to the `AccessPath` class. */ -module AccessPath { - /** A string that should be parsed as an access path. */ - abstract class Range extends string { - bindingset[this] - Range() { any() } - } - - /** - * Parses an integer constant `n` or interval `n1..n2` (inclusive) and gets the value - * of the constant or any value contained in the interval. - */ - bindingset[arg] - int parseInt(string arg) { - result = arg.toInt() - or - // Match "n1..n2" - exists(string lo, string hi | - regexpCaptureTwo(arg, "(-?\\d+)\\.\\.(-?\\d+)", lo, hi) and - result = [lo.toInt() .. hi.toInt()] - ) - } - - /** - * Parses a lower-bounded interval `n..` and gets the lower bound. - */ - bindingset[arg] - int parseLowerBound(string arg) { result = arg.regexpCapture("(-?\\d+)\\.\\.", 1).toInt() } - - /** - * Parses an integer constant or interval (bounded or unbounded) that explicitly - * references the arity, such as `N-1` or `N-3..N-1`. - * - * Note that expressions of form `N-x` will never resolve to a negative index, - * even if `N` is zero (it will have no result in that case). - */ - bindingset[arg, arity] - private int parseIntWithExplicitArity(string arg, int arity) { - result >= 0 and // do not allow N-1 to resolve to a negative index - exists(string lo | - // N-x - lo = arg.regexpCapture("N-(\\d+)", 1) and - result = arity - lo.toInt() - or - // N-x.. - lo = arg.regexpCapture("N-(\\d+)\\.\\.", 1) and - result = [arity - lo.toInt(), arity - 1] - ) - or - exists(string lo, string hi | - // x..N-y - regexpCaptureTwo(arg, "(-?\\d+)\\.\\.N-(\\d+)", lo, hi) and - result = [lo.toInt() .. arity - hi.toInt()] - or - // N-x..N-y - regexpCaptureTwo(arg, "N-(\\d+)\\.\\.N-(\\d+)", lo, hi) and - result = [arity - lo.toInt() .. arity - hi.toInt()] and - result >= 0 - or - // N-x..y - regexpCaptureTwo(arg, "N-(\\d+)\\.\\.(\\d+)", lo, hi) and - result = [arity - lo.toInt() .. hi.toInt()] and - result >= 0 - ) - } - - /** - * Parses an integer constant or interval (bounded or unbounded) and gets any - * of the integers contained within (of which there may be infinitely many). - * - * Has no result for arguments involving an explicit arity, such as `N-1`. - */ - bindingset[arg, result] - int parseIntUnbounded(string arg) { - result = parseInt(arg) - or - result >= parseLowerBound(arg) - } - - /** - * Parses an integer constant or interval (bounded or unbounded) that - * may reference the arity of a call, such as `N-1` or `N-3..N-1`. - * - * Note that expressions of form `N-x` will never resolve to a negative index, - * even if `N` is zero (it will have no result in that case). - */ - bindingset[arg, arity] - int parseIntWithArity(string arg, int arity) { - result = parseInt(arg) - or - result in [parseLowerBound(arg) .. arity - 1] - or - result = parseIntWithExplicitArity(arg, arity) - } -} - -/** Gets the `n`th token on the access path as a string. */ -private string getRawToken(AccessPath path, int n) { - // Avoid splitting by '.' since tokens may contain dots, e.g. `Field[foo.Bar.x]`. - // Instead use regexpFind to match valid tokens, and supplement with a final length - // check (in `AccessPath.hasSyntaxError`) to ensure all characters were included in a token. - result = path.regexpFind("\\w+(?:\\[[^\\]]*\\])?(?=\\.|$)", n, _) -} - -/** - * A string that occurs as an access path (either identifying or input/output spec) - * which might be relevant for this database. - */ -class AccessPath extends string instanceof AccessPath::Range { - /** Holds if this string is not a syntactically valid access path. */ - predicate hasSyntaxError() { - // If the lengths match, all characters must haven been included in a token - // or seen by the `.` lookahead pattern. - this != "" and - not this.length() = sum(int n | | getRawToken(this, n).length() + 1) - 1 - } - - /** Gets the `n`th token on the access path (if there are no syntax errors). */ - AccessPathToken getToken(int n) { - result = getRawToken(this, n) and - not this.hasSyntaxError() - } - - /** Gets the number of tokens on the path (if there are no syntax errors). */ - int getNumToken() { - result = count(int n | exists(getRawToken(this, n))) and - not this.hasSyntaxError() - } -} - -/** - * An access part token such as `Argument[1]` or `ReturnValue`, appearing in one or more access paths. - */ -class AccessPathToken extends string { - AccessPathToken() { this = getRawToken(_, _) } - - private string getPart(int part) { - result = this.regexpCapture("([^\\[]+)(?:\\[([^\\]]*)\\])?", part) - } - - /** Gets the name of the token, such as `Member` from `Member[x]` */ - string getName() { result = this.getPart(1) } - - /** - * Gets the argument list, such as `1,2` from `Member[1,2]`, - * or has no result if there are no arguments. - */ - string getArgumentList() { result = this.getPart(2) } - - /** Gets the `n`th argument to this token, such as `x` or `y` from `Member[x,y]`. */ - string getArgument(int n) { result = this.getArgumentList().splitAt(",", n).trim() } - - /** Gets the `n`th argument to this `name` token, such as `x` or `y` from `Member[x,y]`. */ - pragma[nomagic] - string getArgument(string name, int n) { name = this.getName() and result = this.getArgument(n) } - - /** Gets an argument to this token, such as `x` or `y` from `Member[x,y]`. */ - string getAnArgument() { result = this.getArgument(_) } - - /** Gets an argument to this `name` token, such as `x` or `y` from `Member[x,y]`. */ - string getAnArgument(string name) { result = this.getArgument(name, _) } - - /** Gets the number of arguments to this token, such as 2 for `Member[x,y]` or zero for `ReturnValue`. */ - int getNumArgument() { result = count(int n | exists(this.getArgument(n))) } -} diff --git a/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModels.qll b/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModels.qll index 1cb4e189339..e76a100263c 100644 --- a/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModels.qll +++ b/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModels.qll @@ -70,8 +70,8 @@ private module API = Specific::API; private module DataFlow = Specific::DataFlow; -private import Specific::AccessPathSyntax private import ApiGraphModelsExtensions as Extensions +import codeql.dataflow.internal.AccessPathSyntax /** Module containing hooks for providing input data to be interpreted as a model. */ module ModelInput { @@ -327,29 +327,29 @@ predicate isRelevantFullPath(string type, string path) { } /** A string from a CSV row that should be parsed as an access path. */ -private class AccessPathRange extends AccessPath::Range { - AccessPathRange() { - isRelevantFullPath(_, this) - or - exists(string type | isRelevantType(type) | - summaryModel(type, _, this, _, _) or - summaryModel(type, _, _, this, _) - ) - or - typeVariableModel(_, this) - } +private predicate accessPathRange(string s) { + isRelevantFullPath(_, s) + or + exists(string type | isRelevantType(type) | + summaryModel(type, _, s, _, _) or + summaryModel(type, _, _, s, _) + ) + or + typeVariableModel(_, s) } +import AccessPath + /** * Gets a successor of `node` in the API graph. */ bindingset[token] -API::Node getSuccessorFromNode(API::Node node, AccessPathToken token) { +API::Node getSuccessorFromNode(API::Node node, AccessPathTokenBase token) { // API graphs use the same label for arguments and parameters. An edge originating from a // use-node represents an argument, and an edge originating from a def-node represents a parameter. // We just map both to the same thing. token.getName() = ["Argument", "Parameter"] and - result = node.getParameter(AccessPath::parseIntUnbounded(token.getAnArgument())) + result = node.getParameter(parseIntUnbounded(token.getAnArgument())) or token.getName() = "ReturnValue" and result = node.getReturn() @@ -362,11 +362,9 @@ API::Node getSuccessorFromNode(API::Node node, AccessPathToken token) { * Gets an API-graph successor for the given invocation. */ bindingset[token] -API::Node getSuccessorFromInvoke(Specific::InvokeNode invoke, AccessPathToken token) { +API::Node getSuccessorFromInvoke(Specific::InvokeNode invoke, AccessPathTokenBase token) { token.getName() = "Argument" and - result = - invoke - .getParameter(AccessPath::parseIntWithArity(token.getAnArgument(), invoke.getNumArgument())) + result = invoke.getParameter(parseIntWithArity(token.getAnArgument(), invoke.getNumArgument())) or token.getName() = "ReturnValue" and result = invoke.getReturn() @@ -378,10 +376,12 @@ API::Node getSuccessorFromInvoke(Specific::InvokeNode invoke, AccessPathToken to /** * Holds if `invoke` invokes a call-site filter given by `token`. */ -pragma[inline] -private predicate invocationMatchesCallSiteFilter(Specific::InvokeNode invoke, AccessPathToken token) { +bindingset[token] +private predicate invocationMatchesCallSiteFilter( + Specific::InvokeNode invoke, AccessPathTokenBase token +) { token.getName() = "WithArity" and - invoke.getNumArgument() = AccessPath::parseIntUnbounded(token.getAnArgument()) + invoke.getNumArgument() = parseIntUnbounded(token.getAnArgument()) or Specific::invocationMatchesExtraCallSiteFilter(invoke, token) } diff --git a/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModelsSpecific.qll b/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModelsSpecific.qll index 4c9c8e147eb..536d6d5a966 100644 --- a/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModelsSpecific.qll +++ b/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModelsSpecific.qll @@ -4,14 +4,13 @@ * It must export the following members: * ```ql * class Unit // a unit type - * module AccessPathSyntax // a re-export of the AccessPathSyntax module * class InvokeNode // a type representing an invocation connected to the API graph * module API // the API graph module * predicate isPackageUsed(string package) * API::Node getExtraNodeFromPath(string package, string type, string path, int n) - * API::Node getExtraSuccessorFromNode(API::Node node, AccessPathToken token) - * API::Node getExtraSuccessorFromInvoke(InvokeNode node, AccessPathToken token) - * predicate invocationMatchesExtraCallSiteFilter(InvokeNode invoke, AccessPathToken token) + * API::Node getExtraSuccessorFromNode(API::Node node, AccessPathTokenBase token) + * API::Node getExtraSuccessorFromInvoke(InvokeNode node, AccessPathTokenBase token) + * predicate invocationMatchesExtraCallSiteFilter(InvokeNode invoke, AccessPathTokenBase token) * InvokeNode getAnInvocationOf(API::Node node) * predicate isExtraValidTokenNameInIdentifyingAccessPath(string name) * predicate isExtraValidNoArgumentTokenInIdentifyingAccessPath(string name) @@ -25,9 +24,7 @@ private import ApiGraphModels // Re-export libraries needed by ApiGraphModels.qll module API = JS::API; -import semmle.javascript.frameworks.data.internal.AccessPathSyntax as AccessPathSyntax import JS::DataFlow as DataFlow -private import AccessPathSyntax /** * Holds if `rawType` represents the JavaScript type `qualifiedName` from the given NPM `package`. @@ -137,7 +134,7 @@ API::Node getExtraNodeFromType(string type) { * Gets a JavaScript-specific API graph successor of `node` reachable by resolving `token`. */ bindingset[token] -API::Node getExtraSuccessorFromNode(API::Node node, AccessPathToken token) { +API::Node getExtraSuccessorFromNode(API::Node node, AccessPathTokenBase token) { token.getName() = "Member" and result = node.getMember(token.getAnArgument()) or @@ -183,7 +180,7 @@ API::Node getExtraSuccessorFromNode(API::Node node, AccessPathToken token) { * Gets a JavaScript-specific API graph successor of `node` reachable by resolving `token`. */ bindingset[token] -API::Node getExtraSuccessorFromInvoke(API::InvokeNode node, AccessPathToken token) { +API::Node getExtraSuccessorFromInvoke(API::InvokeNode node, AccessPathTokenBase token) { token.getName() = "Instance" and result = node.getInstance() or @@ -233,7 +230,7 @@ API::Node getAFuzzySuccessor(API::Node node) { * Holds if `invoke` matches the JS-specific call site filter in `token`. */ bindingset[token] -predicate invocationMatchesExtraCallSiteFilter(API::InvokeNode invoke, AccessPathToken token) { +predicate invocationMatchesExtraCallSiteFilter(API::InvokeNode invoke, AccessPathTokenBase token) { token.getName() = "NewCall" and invoke instanceof API::NewNode or @@ -246,9 +243,8 @@ predicate invocationMatchesExtraCallSiteFilter(API::InvokeNode invoke, AccessPat operand = token.getAnArgument() and argIndex = operand.splitAt("=", 0) and stringValue = operand.splitAt("=", 1) and - invoke - .getArgument(AccessPath::parseIntWithArity(argIndex, invoke.getNumArgument())) - .getStringValue() = stringValue + invoke.getArgument(parseIntWithArity(argIndex, invoke.getNumArgument())).getStringValue() = + stringValue ) } @@ -338,7 +334,7 @@ predicate isExtraValidTokenArgumentInIdentifyingAccessPath(string name, string a or name = "WithStringArgument" and exists(argument.indexOf("=")) and - exists(AccessPath::parseIntWithArity(argument.splitAt("=", 0), 10)) + exists(parseIntWithArity(argument.splitAt("=", 0), 10)) } module ModelOutputSpecific { diff --git a/javascript/ql/test/library-tests/frameworks/data/test.ql b/javascript/ql/test/library-tests/frameworks/data/test.ql index 5ee8d0e3f9c..039a0aa3920 100644 --- a/javascript/ql/test/library-tests/frameworks/data/test.ql +++ b/javascript/ql/test/library-tests/frameworks/data/test.ql @@ -1,6 +1,6 @@ import javascript import testUtilities.ConsistencyChecking -import semmle.javascript.frameworks.data.internal.AccessPathSyntax as AccessPathSyntax +import semmle.javascript.frameworks.data.internal.ApiGraphModels as ApiGraphModels class Steps extends ModelInput::SummaryModelCsv { override predicate row(string row) { @@ -126,6 +126,6 @@ class SyntaxErrorTest extends ModelInput::SinkModelCsv { } } -query predicate syntaxErrors(AccessPathSyntax::AccessPath path) { path.hasSyntaxError() } +query predicate syntaxErrors(ApiGraphModels::AccessPath path) { path.hasSyntaxError() } query predicate warning = ModelOutput::getAWarning/0; diff --git a/javascript/ql/test/library-tests/frameworks/data/warnings.ql b/javascript/ql/test/library-tests/frameworks/data/warnings.ql index 94e6f74aae7..3a7e2de70e8 100644 --- a/javascript/ql/test/library-tests/frameworks/data/warnings.ql +++ b/javascript/ql/test/library-tests/frameworks/data/warnings.ql @@ -1,5 +1,4 @@ import javascript -import semmle.javascript.frameworks.data.internal.AccessPathSyntax as AccessPathSyntax import semmle.javascript.frameworks.data.internal.ApiGraphModels as ApiGraphModels private class InvalidTypeModel extends ModelInput::TypeModelCsv {