From 253f932d2a1d52d815102f597e4ccc93218ce61a Mon Sep 17 00:00:00 2001 From: Tom Hvitved Date: Wed, 30 Aug 2023 14:31:08 +0200 Subject: [PATCH] Python: Use data flow consistency checks from shared pack --- config/identical-files.json | 3 - .../new/internal/DataFlowImplConsistency.qll | 313 ++---------------- .../dataflow/TestUtil/DataFlowConsistency.qll | 44 +-- 3 files changed, 35 insertions(+), 325 deletions(-) diff --git a/config/identical-files.json b/config/identical-files.json index f68806818cf..f814b0f1f81 100644 --- a/config/identical-files.json +++ b/config/identical-files.json @@ -55,9 +55,6 @@ "ruby/ql/lib/codeql/ruby/dataflow/internal/tainttracking1/TaintTrackingImpl.qll", "swift/ql/lib/codeql/swift/dataflow/internal/tainttracking1/TaintTrackingImpl.qll" ], - "DataFlow Java/C++/C#/Python/Ruby/Swift Consistency checks": [ - "python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImplConsistency.qll" - ], "DataFlow Java/C#/Go/Ruby/Python/Swift Flow Summaries": [ "java/ql/lib/semmle/code/java/dataflow/internal/FlowSummaryImpl.qll", "csharp/ql/lib/semmle/code/csharp/dataflow/internal/FlowSummaryImpl.qll", diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImplConsistency.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImplConsistency.qll index e154491f795..2513a4aa3bd 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImplConsistency.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImplConsistency.qll @@ -3,297 +3,50 @@ * data-flow classes and predicates. */ -private import DataFlowImplSpecific::Private -private import DataFlowImplSpecific::Public -private import tainttracking1.TaintTrackingParameter::Private -private import tainttracking1.TaintTrackingParameter::Public +private import python +private import DataFlowImplSpecific +private import TaintTrackingImplSpecific +private import codeql.dataflow.internal.DataFlowImplConsistency -module Consistency { - private newtype TConsistencyConfiguration = MkConsistencyConfiguration() +private module Input implements InputSig { + private import Private + private import Public - /** A class for configuring the consistency queries. */ - class ConsistencyConfiguration extends TConsistencyConfiguration { - string toString() { none() } - - /** Holds if `n` should be excluded from the consistency test `uniqueEnclosingCallable`. */ - predicate uniqueEnclosingCallableExclude(Node n) { none() } - - /** Holds if `call` should be excluded from the consistency test `uniqueCallEnclosingCallable`. */ - predicate uniqueCallEnclosingCallableExclude(DataFlowCall call) { none() } - - /** Holds if `n` should be excluded from the consistency test `uniqueNodeLocation`. */ - predicate uniqueNodeLocationExclude(Node n) { none() } - - /** Holds if `n` should be excluded from the consistency test `missingLocation`. */ - predicate missingLocationExclude(Node n) { none() } - - /** Holds if `n` should be excluded from the consistency test `postWithInFlow`. */ - predicate postWithInFlowExclude(Node n) { none() } - - /** Holds if `n` should be excluded from the consistency test `argHasPostUpdate`. */ - predicate argHasPostUpdateExclude(ArgumentNode n) { none() } - - /** Holds if `n` should be excluded from the consistency test `reverseRead`. */ - predicate reverseReadExclude(Node n) { none() } - - /** Holds if `n` should be excluded from the consistency test `postHasUniquePre`. */ - predicate postHasUniquePreExclude(PostUpdateNode n) { none() } - - /** Holds if `n` should be excluded from the consistency test `uniquePostUpdate`. */ - predicate uniquePostUpdateExclude(Node n) { none() } - - /** Holds if `(call, ctx)` should be excluded from the consistency test `viableImplInCallContextTooLargeExclude`. */ - predicate viableImplInCallContextTooLargeExclude( - DataFlowCall call, DataFlowCall ctx, DataFlowCallable callable - ) { - none() - } - - /** Holds if `(c, pos, p)` should be excluded from the consistency test `uniqueParameterNodeAtPosition`. */ - predicate uniqueParameterNodeAtPositionExclude(DataFlowCallable c, ParameterPosition pos, Node p) { - none() - } - - /** Holds if `(c, pos, p)` should be excluded from the consistency test `uniqueParameterNodePosition`. */ - predicate uniqueParameterNodePositionExclude(DataFlowCallable c, ParameterPosition pos, Node p) { - none() - } - - /** Holds if `n` should be excluded from the consistency test `identityLocalStep`. */ - predicate identityLocalStepExclude(Node n) { none() } - } - - private class RelevantNode extends Node { - RelevantNode() { - this instanceof ArgumentNode or - this instanceof ParameterNode or - this instanceof ReturnNode or - this = getAnOutNode(_, _) or - simpleLocalFlowStep(this, _) or - simpleLocalFlowStep(_, this) or - jumpStep(this, _) or - jumpStep(_, this) or - storeStep(this, _, _) or - storeStep(_, _, this) or - readStep(this, _, _) or - readStep(_, _, this) or - defaultAdditionalTaintStep(this, _) or - defaultAdditionalTaintStep(_, this) - } - } - - query predicate uniqueEnclosingCallable(Node n, string msg) { - exists(int c | - n instanceof RelevantNode and - c = count(nodeGetEnclosingCallable(n)) and - c != 1 and - not any(ConsistencyConfiguration conf).uniqueEnclosingCallableExclude(n) and - msg = "Node should have one enclosing callable but has " + c + "." - ) - } - - query predicate uniqueCallEnclosingCallable(DataFlowCall call, string msg) { - exists(int c | - c = count(call.getEnclosingCallable()) and - c != 1 and - not any(ConsistencyConfiguration conf).uniqueCallEnclosingCallableExclude(call) and - msg = "Call should have one enclosing callable but has " + c + "." - ) - } - - query predicate uniqueType(Node n, string msg) { - exists(int c | - n instanceof RelevantNode and - c = count(getNodeType(n)) and - c != 1 and - msg = "Node should have one type but has " + c + "." - ) - } - - query predicate uniqueNodeLocation(Node n, string msg) { - exists(int c | - c = - count(string filepath, int startline, int startcolumn, int endline, int endcolumn | - n.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) - ) and - c != 1 and - not any(ConsistencyConfiguration conf).uniqueNodeLocationExclude(n) and - msg = "Node should have one location but has " + c + "." - ) - } - - query predicate missingLocation(string msg) { - exists(int c | - c = - strictcount(Node n | - not n.hasLocationInfo(_, _, _, _, _) and - not any(ConsistencyConfiguration conf).missingLocationExclude(n) - ) and - msg = "Nodes without location: " + c - ) - } - - query predicate uniqueNodeToString(Node n, string msg) { - exists(int c | - c = count(n.toString()) and - c != 1 and - msg = "Node should have one toString but has " + c + "." - ) - } - - query predicate missingToString(string msg) { - exists(int c | - c = strictcount(Node n | not exists(n.toString())) and - msg = "Nodes without toString: " + c - ) - } - - query predicate parameterCallable(ParameterNode p, string msg) { - exists(DataFlowCallable c | isParameterNode(p, c, _) and c != nodeGetEnclosingCallable(p)) and - msg = "Callable mismatch for parameter." - } - - query predicate localFlowIsLocal(Node n1, Node n2, string msg) { - simpleLocalFlowStep(n1, n2) and - nodeGetEnclosingCallable(n1) != nodeGetEnclosingCallable(n2) and - msg = "Local flow step does not preserve enclosing callable." - } - - query predicate readStepIsLocal(Node n1, Node n2, string msg) { - readStep(n1, _, n2) and - nodeGetEnclosingCallable(n1) != nodeGetEnclosingCallable(n2) and - msg = "Read step does not preserve enclosing callable." - } - - query predicate storeStepIsLocal(Node n1, Node n2, string msg) { - storeStep(n1, _, n2) and - nodeGetEnclosingCallable(n1) != nodeGetEnclosingCallable(n2) and - msg = "Store step does not preserve enclosing callable." - } - - private DataFlowType typeRepr() { result = getNodeType(_) } - - query predicate compatibleTypesReflexive(DataFlowType t, string msg) { - t = typeRepr() and - not compatibleTypes(t, t) and - msg = "Type compatibility predicate is not reflexive." - } - - query predicate unreachableNodeCCtx(Node n, DataFlowCall call, string msg) { - isUnreachableInCall(n, call) and - exists(DataFlowCallable c | - c = nodeGetEnclosingCallable(n) and - not viableCallable(call) = c - ) and - msg = "Call context for isUnreachableInCall is inconsistent with call graph." - } - - query predicate localCallNodes(DataFlowCall call, Node n, string msg) { - ( - n = getAnOutNode(call, _) and - msg = "OutNode and call does not share enclosing callable." - or - n.(ArgumentNode).argumentOf(call, _) and - msg = "ArgumentNode and call does not share enclosing callable." - ) and - nodeGetEnclosingCallable(n) != call.getEnclosingCallable() - } - - // This predicate helps the compiler forget that in some languages - // it is impossible for a result of `getPreUpdateNode` to be an - // instance of `PostUpdateNode`. - private Node getPre(PostUpdateNode n) { - result = n.getPreUpdateNode() + predicate argHasPostUpdateExclude(ArgumentNode n) { + exists(ArgumentPosition apos | n.argumentOf(_, apos) and apos.isStarArgs(_)) or - none() + exists(ArgumentPosition apos | n.argumentOf(_, apos) and apos.isDictSplat()) } - query predicate postIsNotPre(PostUpdateNode n, string msg) { - getPre(n) = n and - msg = "PostUpdateNode should not equal its pre-update node." + predicate reverseReadExclude(Node n) { + // since `self`/`cls` parameters can be marked as implicit argument to `super()`, + // they will have PostUpdateNodes. We have a read-step from the synthetic `**kwargs` + // parameter, but dataflow-consistency queries should _not_ complain about there not + // being a post-update node for the synthetic `**kwargs` parameter. + n instanceof SynthDictSplatParameterNode } - query predicate postHasUniquePre(PostUpdateNode n, string msg) { - not any(ConsistencyConfiguration conf).postHasUniquePreExclude(n) and - exists(int c | - c = count(n.getPreUpdateNode()) and - c != 1 and - msg = "PostUpdateNode should have one pre-update node but has " + c + "." + predicate uniqueParameterNodePositionExclude(DataFlowCallable c, ParameterPosition pos, Node p) { + // For normal parameters that can both be passed as positional arguments or keyword + // arguments, we currently have parameter positions for both cases.. + // + // TODO: Figure out how bad breaking this consistency check is + exists(Function func, Parameter param | + c.getScope() = func and + p = parameterNode(param) and + c.getParameter(pos) = p and + param = func.getArg(_) and + param = func.getArgByName(_) ) } - query predicate uniquePostUpdate(Node n, string msg) { - not any(ConsistencyConfiguration conf).uniquePostUpdateExclude(n) and - 1 < strictcount(PostUpdateNode post | post.getPreUpdateNode() = n) and - msg = "Node has multiple PostUpdateNodes." + predicate uniqueCallEnclosingCallableExclude(DataFlowCall call) { + not exists(call.getLocation().getFile().getRelativePath()) } - query predicate postIsInSameCallable(PostUpdateNode n, string msg) { - nodeGetEnclosingCallable(n) != nodeGetEnclosingCallable(n.getPreUpdateNode()) and - msg = "PostUpdateNode does not share callable with its pre-update node." - } - - private predicate hasPost(Node n) { exists(PostUpdateNode post | post.getPreUpdateNode() = n) } - - query predicate reverseRead(Node n, string msg) { - exists(Node n2 | readStep(n, _, n2) and hasPost(n2) and not hasPost(n)) and - not any(ConsistencyConfiguration conf).reverseReadExclude(n) and - msg = "Origin of readStep is missing a PostUpdateNode." - } - - query predicate argHasPostUpdate(ArgumentNode n, string msg) { - not hasPost(n) and - not any(ConsistencyConfiguration c).argHasPostUpdateExclude(n) and - msg = "ArgumentNode is missing PostUpdateNode." - } - - // This predicate helps the compiler forget that in some languages - // it is impossible for a `PostUpdateNode` to be the target of - // `simpleLocalFlowStep`. - private predicate isPostUpdateNode(Node n) { n instanceof PostUpdateNode or none() } - - query predicate postWithInFlow(Node n, string msg) { - isPostUpdateNode(n) and - not clearsContent(n, _) and - simpleLocalFlowStep(_, n) and - not any(ConsistencyConfiguration c).postWithInFlowExclude(n) and - msg = "PostUpdateNode should not be the target of local flow." - } - - query predicate viableImplInCallContextTooLarge( - DataFlowCall call, DataFlowCall ctx, DataFlowCallable callable - ) { - callable = viableImplInCallContext(call, ctx) and - not callable = viableCallable(call) and - not any(ConsistencyConfiguration c).viableImplInCallContextTooLargeExclude(call, ctx, callable) - } - - query predicate uniqueParameterNodeAtPosition( - DataFlowCallable c, ParameterPosition pos, Node p, string msg - ) { - not any(ConsistencyConfiguration conf).uniqueParameterNodeAtPositionExclude(c, pos, p) and - isParameterNode(p, c, pos) and - not exists(unique(Node p0 | isParameterNode(p0, c, pos))) and - msg = "Parameters with overlapping positions." - } - - query predicate uniqueParameterNodePosition( - DataFlowCallable c, ParameterPosition pos, Node p, string msg - ) { - not any(ConsistencyConfiguration conf).uniqueParameterNodePositionExclude(c, pos, p) and - isParameterNode(p, c, pos) and - not exists(unique(ParameterPosition pos0 | isParameterNode(p, c, pos0))) and - msg = "Parameter node with multiple positions." - } - - query predicate uniqueContentApprox(Content c, string msg) { - not exists(unique(ContentApprox approx | approx = getContentApprox(c))) and - msg = "Non-unique content approximation." - } - - query predicate identityLocalStep(Node n, string msg) { - simpleLocalFlowStep(n, n) and - not any(ConsistencyConfiguration c).identityLocalStepExclude(n) and - msg = "Node steps to itself" + predicate identityLocalStepExclude(Node n) { + not exists(n.getLocation().getFile().getRelativePath()) } } + +module Consistency = MakeConsistency; diff --git a/python/ql/test/experimental/dataflow/TestUtil/DataFlowConsistency.qll b/python/ql/test/experimental/dataflow/TestUtil/DataFlowConsistency.qll index fd56070f86b..76ddc3643f1 100644 --- a/python/ql/test/experimental/dataflow/TestUtil/DataFlowConsistency.qll +++ b/python/ql/test/experimental/dataflow/TestUtil/DataFlowConsistency.qll @@ -1,46 +1,6 @@ +// TODO: this should be promoted to be a REAL consistency query by being placed in +// `python/ql/consistency-queries`. For for now it resides here. import python import semmle.python.dataflow.new.DataFlow::DataFlow import semmle.python.dataflow.new.internal.DataFlowPrivate import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency - -// TODO: this should be promoted to be a REAL consistency query by being placed in -// `python/ql/consistency-queries`. For for now it resides here. -private class MyConsistencyConfiguration extends ConsistencyConfiguration { - override predicate argHasPostUpdateExclude(ArgumentNode n) { - exists(ArgumentPosition apos | n.argumentOf(_, apos) and apos.isStarArgs(_)) - or - exists(ArgumentPosition apos | n.argumentOf(_, apos) and apos.isDictSplat()) - } - - override predicate reverseReadExclude(Node n) { - // since `self`/`cls` parameters can be marked as implicit argument to `super()`, - // they will have PostUpdateNodes. We have a read-step from the synthetic `**kwargs` - // parameter, but dataflow-consistency queries should _not_ complain about there not - // being a post-update node for the synthetic `**kwargs` parameter. - n instanceof SynthDictSplatParameterNode - } - - override predicate uniqueParameterNodePositionExclude( - DataFlowCallable c, ParameterPosition pos, Node p - ) { - // For normal parameters that can both be passed as positional arguments or keyword - // arguments, we currently have parameter positions for both cases.. - // - // TODO: Figure out how bad breaking this consistency check is - exists(Function func, Parameter param | - c.getScope() = func and - p = parameterNode(param) and - c.getParameter(pos) = p and - param = func.getArg(_) and - param = func.getArgByName(_) - ) - } - - override predicate uniqueCallEnclosingCallableExclude(DataFlowCall call) { - not exists(call.getLocation().getFile().getRelativePath()) - } - - override predicate identityLocalStepExclude(Node n) { - not exists(n.getLocation().getFile().getRelativePath()) - } -}