From 9502756874757c9fda52f6486340e60da47c97f2 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 23 Jan 2020 13:20:22 +0100 Subject: [PATCH 1/8] Python: Autoformat dataflow files --- .../semmle/python/dataflow/Configuration.qll | 23 +- .../src/semmle/python/dataflow/DataFlow.qll | 2 +- .../ql/src/semmle/python/dataflow/Files.qll | 11 +- .../semmle/python/dataflow/Implementation.qll | 521 +++++++++++------- .../ql/src/semmle/python/dataflow/Legacy.qll | 35 +- .../semmle/python/dataflow/StateTracking.qll | 77 ++- .../semmle/python/dataflow/TaintTracking.qll | 358 +++++------- python/ql/src/semmle/python/essa/Essa.qll | 15 +- .../src/semmle/python/essa/SsaDefinitions.qll | 6 +- 9 files changed, 512 insertions(+), 536 deletions(-) diff --git a/python/ql/src/semmle/python/dataflow/Configuration.qll b/python/ql/src/semmle/python/dataflow/Configuration.qll index efbcaf31a0d..851578999a8 100644 --- a/python/ql/src/semmle/python/dataflow/Configuration.qll +++ b/python/ql/src/semmle/python/dataflow/Configuration.qll @@ -4,7 +4,6 @@ private import semmle.python.objects.ObjectInternal private import semmle.python.dataflow.Implementation module TaintTracking { - class Source = TaintSource; class Sink = TaintSink; @@ -16,13 +15,11 @@ module TaintTracking { class PathSink = TaintTrackingNode; abstract class Configuration extends string { - /* Required to prevent compiler warning */ bindingset[this] Configuration() { this = this } /* Old implementation API */ - predicate isSource(Source src) { none() } predicate isSink(Sink sink) { none() } @@ -32,7 +29,6 @@ module TaintTracking { predicate isExtension(Extension extension) { none() } /* New implementation API */ - /** * Holds if `src` is a source of taint of `kind` that is relevant * for this configuration. @@ -66,7 +62,9 @@ module TaintTracking { /** * Holds if `src -> dest` is a flow edge converting taint from `srckind` to `destkind`. */ - predicate isAdditionalFlowStep(DataFlow::Node src, DataFlow::Node dest, TaintKind srckind, TaintKind destkind) { + predicate isAdditionalFlowStep( + DataFlow::Node src, DataFlow::Node dest, TaintKind srckind, TaintKind destkind + ) { none() } @@ -79,9 +77,7 @@ module TaintTracking { * Holds if `node` should be considered as a barrier to flow of `kind`. */ predicate isBarrier(DataFlow::Node node, TaintKind kind) { - exists(Sanitizer sanitizer | - this.isSanitizer(sanitizer) - | + exists(Sanitizer sanitizer | this.isSanitizer(sanitizer) | sanitizer.sanitizingNode(kind, node.asCfgNode()) or sanitizer.sanitizingEdge(kind, node.asVariable()) @@ -112,16 +108,18 @@ module TaintTracking { * Holds if flow from `src` to `dest` is prohibited when the incoming taint is `srckind` and the outgoing taint is `destkind`. * Note that `srckind` and `destkind` can be the same. */ - predicate isBarrierEdge(DataFlow::Node src, DataFlow::Node dest, TaintKind srckind, TaintKind destkind) { none() } + predicate isBarrierEdge( + DataFlow::Node src, DataFlow::Node dest, TaintKind srckind, TaintKind destkind + ) { + none() + } /* Common query API */ - predicate hasFlowPath(PathSource src, PathSink sink) { this.(TaintTrackingImplementation).hasFlowPath(src, sink) } /* Old query API */ - /* deprecated */ deprecated predicate hasFlow(Source src, Sink sink) { exists(PathSource psrc, PathSink psink | @@ -132,7 +130,6 @@ module TaintTracking { } /* New query API */ - predicate hasSimpleFlow(DataFlow::Node src, DataFlow::Node sink) { exists(PathSource psrc, PathSink psink | this.hasFlowPath(psrc, psink) and @@ -140,7 +137,5 @@ module TaintTracking { sink = psink.getNode() ) } - } - } diff --git a/python/ql/src/semmle/python/dataflow/DataFlow.qll b/python/ql/src/semmle/python/dataflow/DataFlow.qll index e2a05ee0118..4684441563c 100644 --- a/python/ql/src/semmle/python/dataflow/DataFlow.qll +++ b/python/ql/src/semmle/python/dataflow/DataFlow.qll @@ -1 +1 @@ -import semmle.python.security.TaintTracking \ No newline at end of file +import semmle.python.security.TaintTracking diff --git a/python/ql/src/semmle/python/dataflow/Files.qll b/python/ql/src/semmle/python/dataflow/Files.qll index 2cf2cca780c..651b7a93850 100644 --- a/python/ql/src/semmle/python/dataflow/Files.qll +++ b/python/ql/src/semmle/python/dataflow/Files.qll @@ -2,25 +2,18 @@ import python import semmle.python.security.TaintTracking class OpenFile extends TaintKind { - OpenFile() { this = "file.open" } override string repr() { result = "an open file" } - - } class OpenFileConfiguration extends TaintTracking::Configuration { - - OpenFileConfiguration() { this = "Open file configuration" } + OpenFileConfiguration() { this = "Open file configuration" } override predicate isSource(DataFlow::Node src, TaintKind kind) { theOpenFunction().(FunctionObject).getACall() = src.asCfgNode() and kind instanceof OpenFile } - override predicate isSink(DataFlow::Node sink, TaintKind kind) { - none() - } - + override predicate isSink(DataFlow::Node sink, TaintKind kind) { none() } } diff --git a/python/ql/src/semmle/python/dataflow/Implementation.qll b/python/ql/src/semmle/python/dataflow/Implementation.qll index edf8b9e60c6..dc569904b47 100644 --- a/python/ql/src/semmle/python/dataflow/Implementation.qll +++ b/python/ql/src/semmle/python/dataflow/Implementation.qll @@ -4,28 +4,27 @@ private import semmle.python.objects.ObjectInternal private import semmle.python.pointsto.Filters as Filters import semmle.python.dataflow.Legacy -/* See tests/library-tests/taint/examples +/* + * See tests/library-tests/taint/examples * For examples of taint sources, sinks and flow, * including attribute paths, contexts and edges. */ - newtype TTaintTrackingContext = - TNoParam() - or + TNoParam() or TParamContext(TaintKind param, AttributePath path, int n) { any(TaintTrackingImplementation impl).callWithTaintedArgument(_, _, _, _, n, path, param) } -/** The context for taint-tracking. +/** + * The context for taint-tracking. * There are two types of contexts: * * No context; the context at a source. * * Tainted parameter; tracks the taint and attribute-path for a parameter * Used to track taint through calls accurately and reasonably efficiently. */ class TaintTrackingContext extends TTaintTrackingContext { - - string toString() { + string toString() { this = TNoParam() and result = "" or exists(TaintKind param, AttributePath path, int n | @@ -34,17 +33,11 @@ class TaintTrackingContext extends TTaintTrackingContext { ) } - TaintKind getParameterTaint(int n) { - this = TParamContext(result, _, n) - } + TaintKind getParameterTaint(int n) { this = TParamContext(result, _, n) } - AttributePath getAttributePath() { - this = TParamContext(_, result, _) - } + AttributePath getAttributePath() { this = TParamContext(_, result, _) } - TaintTrackingContext getCaller() { - result = this.getCaller(_) - } + TaintTrackingContext getCaller() { result = this.getCaller(_) } TaintTrackingContext getCaller(CallNode call) { exists(TaintKind param, AttributePath path, int n | @@ -55,60 +48,46 @@ class TaintTrackingContext extends TTaintTrackingContext { ) } - predicate isTop() { - this = TNoParam() - } - + predicate isTop() { this = TNoParam() } } private newtype TAttributePath = - TNoAttribute() - or - TAttribute(string name) { - exists(Attribute a | a.getName() = name) - } - /* It might make sense to add another level, attribute of attribute. + TNoAttribute() or + /* + * It might make sense to add another level, attribute of attribute. * But some experimentation would be needed. */ + TAttribute(string name) { exists(Attribute a | a.getName() = name) } -/** The attribute of the tracked value holding the taint. + +/** + * The attribute of the tracked value holding the taint. * This is usually "no attribute". * Used for tracking tainted attributes of objects. */ abstract class AttributePath extends TAttributePath { - abstract string toString(); abstract string extension(); abstract AttributePath fromAttribute(string name); - AttributePath getAttribute(string name) { - this = result.fromAttribute(name) - } - - predicate noAttribute() { - this = TNoAttribute() - } + AttributePath getAttribute(string name) { this = result.fromAttribute(name) } + predicate noAttribute() { this = TNoAttribute() } } /** AttributePath for no attribute. */ class NoAttribute extends TNoAttribute, AttributePath { - override string toString() { result = "no attribute" } override string extension() { result = "" } - override AttributePath fromAttribute(string name) { - none() - } - + override AttributePath fromAttribute(string name) { none() } } /** AttributePath for an attribute. */ class NamedAttributePath extends TAttribute, AttributePath { - override string toString() { exists(string attr | this = TAttribute(attr) and @@ -126,76 +105,57 @@ class NamedAttributePath extends TAttribute, AttributePath { override AttributePath fromAttribute(string name) { this = TAttribute(name) and result = TNoAttribute() } - } -/** Type representing the (node, context, path, kind) tuple. - * Construction of this type is mutually recursive with `TaintTrackingImplementation.flowStep(...)` +/** + * Type representing the (node, context, path, kind) tuple. + * Construction of this type is mutually recursive with `TaintTrackingImplementation.flowStep(...)` */ newtype TTaintTrackingNode = - TTaintTrackingNode_(DataFlow::Node node, TaintTrackingContext context, AttributePath path, TaintKind kind, TaintTracking::Configuration config) { + TTaintTrackingNode_( + DataFlow::Node node, TaintTrackingContext context, AttributePath path, TaintKind kind, + TaintTracking::Configuration config + ) { config.(TaintTrackingImplementation).flowSource(node, context, path, kind) or config.(TaintTrackingImplementation).flowStep(_, node, context, path, kind, _) } -/** Class representing the (node, context, path, kind) tuple. +/** + * Class representing the (node, context, path, kind) tuple. * Used for context-sensitive path-aware taint-tracking. */ class TaintTrackingNode extends TTaintTrackingNode { - string toString() { - if this.getPath() instanceof NoAttribute then ( - result = this.getTaintKind().repr() - ) else ( - result = this.getPath().extension() + " = " + this.getTaintKind().repr() - ) + if this.getPath() instanceof NoAttribute + then result = this.getTaintKind().repr() + else result = this.getPath().extension() + " = " + this.getTaintKind().repr() } /** Gets the data flow node for this taint-tracking node */ - DataFlow::Node getNode() { - this = TTaintTrackingNode_(result, _, _, _, _) - } + DataFlow::Node getNode() { this = TTaintTrackingNode_(result, _, _, _, _) } /** Gets the taint kind for this taint-tracking node */ - TaintKind getTaintKind() { - this = TTaintTrackingNode_(_, _, _, result, _) - } + TaintKind getTaintKind() { this = TTaintTrackingNode_(_, _, _, result, _) } /** Gets the taint-tracking context for this taint-tracking node */ - TaintTrackingContext getContext() { - this = TTaintTrackingNode_(_, result, _, _, _) - } + TaintTrackingContext getContext() { this = TTaintTrackingNode_(_, result, _, _, _) } /** Gets the attribute path context for this taint-tracking node */ - AttributePath getPath() { - this = TTaintTrackingNode_(_, _, result, _, _) - } + AttributePath getPath() { this = TTaintTrackingNode_(_, _, result, _, _) } - TaintTracking::Configuration getConfiguration() { - this = TTaintTrackingNode_(_, _, _, _, result) - } + TaintTracking::Configuration getConfiguration() { this = TTaintTrackingNode_(_, _, _, _, result) } - Location getLocation() { - result = this.getNode().getLocation() - } + Location getLocation() { result = this.getNode().getLocation() } - predicate isSource() { - this.getConfiguration().(TaintTrackingImplementation).isPathSource(this) - } + predicate isSource() { this.getConfiguration().(TaintTrackingImplementation).isPathSource(this) } - predicate isSink() { - this.getConfiguration().(TaintTrackingImplementation).isPathSink(this) - } + predicate isSink() { this.getConfiguration().(TaintTrackingImplementation).isPathSink(this) } - ControlFlowNode getCfgNode() { - result = this.getNode().asCfgNode() - } + ControlFlowNode getCfgNode() { result = this.getNode().asCfgNode() } /** Get the AST node for this node. */ - AstNode getAstNode() { - result = this.getCfgNode().getNode() - } + AstNode getAstNode() { result = this.getCfgNode().getNode() } TaintTrackingNode getASuccessor(string edgeLabel) { this.isVisible() and @@ -225,24 +185,20 @@ class TaintTrackingNode extends TTaintTrackingNode { this.isSource() } - predicate flowsTo(TaintTrackingNode other) { - this.getASuccessor*() = other - } + predicate flowsTo(TaintTrackingNode other) { this.getASuccessor*() = other } } -/** The implementation of taint-tracking +/** + * The implementation of taint-tracking * Each `TaintTrackingImplementation` is also a `TaintTracking::Configuration` * It is implemented as a separate class for clarity and to keep the code * in `TaintTracking::Configuration` simpler. */ class TaintTrackingImplementation extends string { + TaintTrackingImplementation() { this instanceof TaintTracking::Configuration } - - TaintTrackingImplementation() { - this instanceof TaintTracking::Configuration - } - - /** Hold if there is a flow from `source`, which is a taint source, to + /** + * Hold if there is a flow from `source`, which is a taint source, to * `sink`, which is a taint sink, with this configuration. */ predicate hasFlowPath(TaintTrackingNode source, TaintTrackingNode sink) { @@ -251,10 +207,14 @@ class TaintTrackingImplementation extends string { source.flowsTo(sink) } - /** Hold if `node` is a source of taint `kind` with context `context` and attribute path `path`. + /** + * Hold if `node` is a source of taint `kind` with context `context` and attribute path `path`. */ - predicate flowSource(DataFlow::Node node, TaintTrackingContext context, AttributePath path, TaintKind kind) { - context = TNoParam() and path = TNoAttribute() and + predicate flowSource( + DataFlow::Node node, TaintTrackingContext context, AttributePath path, TaintKind kind + ) { + context = TNoParam() and + path = TNoAttribute() and this.(TaintTracking::Configuration).isSource(node, kind) } @@ -275,7 +235,8 @@ class TaintTrackingImplementation extends string { ) } - /** Hold if taint flows to `src` to `dest` in a single step, labeled with `edgeLabel` + /** + * Hold if taint flows to `src` to `dest` in a single step, labeled with `edgeLabel` * `edgeLabel` is purely informative. */ predicate flowStep(TaintTrackingNode src, TaintTrackingNode dest, string edgeLabel) { @@ -285,10 +246,14 @@ class TaintTrackingImplementation extends string { ) } - /** Hold if taint flows to `src` to `(node, context, path, kind)` in a single step, labelled with `egdeLabel` with this configuration. + /** + * Hold if taint flows to `src` to `(node, context, path, kind)` in a single step, labelled with `egdeLabel` with this configuration. * `edgeLabel` is purely informative. */ - predicate flowStep(TaintTrackingNode src, DataFlow::Node node, TaintTrackingContext context, AttributePath path, TaintKind kind, string edgeLabel) { + predicate flowStep( + TaintTrackingNode src, DataFlow::Node node, TaintTrackingContext context, AttributePath path, + TaintKind kind, string edgeLabel + ) { this.unprunedStep(src, node, context, path, kind, edgeLabel) and node.getBasicBlock().likelyReachable() and not this.(TaintTracking::Configuration).isBarrier(node) and @@ -303,13 +268,18 @@ class TaintTrackingImplementation extends string { ) } - private predicate prunedEdge(DataFlow::Node srcnode, DataFlow::Node destnode, TaintKind srckind, TaintKind destkind) { + private predicate prunedEdge( + DataFlow::Node srcnode, DataFlow::Node destnode, TaintKind srckind, TaintKind destkind + ) { this.(TaintTracking::Configuration).isBarrierEdge(srcnode, destnode, srckind, destkind) or srckind = destkind and this.(TaintTracking::Configuration).isBarrierEdge(srcnode, destnode) } - private predicate unprunedStep(TaintTrackingNode src, DataFlow::Node node, TaintTrackingContext context, AttributePath path, TaintKind kind, string edgeLabel) { + private predicate unprunedStep( + TaintTrackingNode src, DataFlow::Node node, TaintTrackingContext context, AttributePath path, + TaintKind kind, string edgeLabel + ) { this.importStep(src, node, context, path, kind, edgeLabel) or this.fromImportStep(src, node, context, path, kind, edgeLabel) @@ -343,19 +313,21 @@ class TaintTrackingImplementation extends string { exists(DataFlow::Node srcnode, TaintKind srckind | this.(TaintTracking::Configuration).isAdditionalFlowStep(srcnode, node, srckind, kind) and src = TTaintTrackingNode_(srcnode, context, path, srckind, this) and - path.noAttribute() and edgeLabel = "additional with kind" + path.noAttribute() and + edgeLabel = "additional with kind" ) or exists(DataFlow::Node srcnode | this.(TaintTracking::Configuration).isAdditionalFlowStep(srcnode, node) and src = TTaintTrackingNode_(srcnode, context, path, kind, this) and - path.noAttribute() and edgeLabel = "additional" + path.noAttribute() and + edgeLabel = "additional" ) or exists(DataFlow::Node srcnode, TaintKind srckind | src = TTaintTrackingNode_(srcnode, context, path, srckind, this) and path.noAttribute() - | + | kind = srckind.getTaintForFlowStep(srcnode.asCfgNode(), node.asCfgNode(), edgeLabel) or kind = srckind.(CollectionKind).getMember() and @@ -368,14 +340,21 @@ class TaintTrackingImplementation extends string { or kind = srckind and srckind.flowStep(srcnode, node, edgeLabel) or - kind = srckind and srckind instanceof DictKind and DictKind::flowStep(srcnode.asCfgNode(), node.asCfgNode(), edgeLabel) + kind = srckind and + srckind instanceof DictKind and + DictKind::flowStep(srcnode.asCfgNode(), node.asCfgNode(), edgeLabel) or - kind = srckind and srckind instanceof SequenceKind and SequenceKind::flowStep(srcnode.asCfgNode(), node.asCfgNode(), edgeLabel) + kind = srckind and + srckind instanceof SequenceKind and + SequenceKind::flowStep(srcnode.asCfgNode(), node.asCfgNode(), edgeLabel) ) } - pragma [noinline] - predicate importStep(TaintTrackingNode src, DataFlow::Node node, TaintTrackingContext context, AttributePath path, TaintKind kind, string edgeLabel) { + pragma[noinline] + predicate importStep( + TaintTrackingNode src, DataFlow::Node node, TaintTrackingContext context, AttributePath path, + TaintKind kind, string edgeLabel + ) { edgeLabel = "import" and exists(ModuleValue m, string name, AttributePath srcpath | src = TTaintTrackingNode_(_, context, srcpath, kind, this) and @@ -385,8 +364,11 @@ class TaintTrackingImplementation extends string { ) } - pragma [noinline] - predicate fromImportStep(TaintTrackingNode src, DataFlow::Node node, TaintTrackingContext context, AttributePath path, TaintKind kind, string edgeLabel) { + pragma[noinline] + predicate fromImportStep( + TaintTrackingNode src, DataFlow::Node node, TaintTrackingContext context, AttributePath path, + TaintKind kind, string edgeLabel + ) { edgeLabel = "from import" and exists(ModuleValue m, string name | src = TTaintTrackingNode_(_, context, path, kind, this) and @@ -395,24 +377,32 @@ class TaintTrackingImplementation extends string { ) } - pragma [noinline] - predicate attributeLoadStep(TaintTrackingNode src, DataFlow::Node node, TaintTrackingContext context, AttributePath path, TaintKind kind, string edgeLabel) { + pragma[noinline] + predicate attributeLoadStep( + TaintTrackingNode src, DataFlow::Node node, TaintTrackingContext context, AttributePath path, + TaintKind kind, string edgeLabel + ) { exists(DataFlow::Node srcnode, AttributePath srcpath, string attrname | src = TTaintTrackingNode_(srcnode, context, srcpath, kind, this) and srcnode.asCfgNode() = node.asCfgNode().(AttrNode).getObject(attrname) and - path = srcpath.fromAttribute(attrname) and edgeLabel = "from path attribute" + path = srcpath.fromAttribute(attrname) and + edgeLabel = "from path attribute" ) or exists(DataFlow::Node srcnode, TaintKind srckind, string attrname | src = TTaintTrackingNode_(srcnode, context, path, srckind, this) and srcnode.asCfgNode() = node.asCfgNode().(AttrNode).getObject(attrname) and - kind = srckind.getTaintOfAttribute(attrname) and edgeLabel = "from taint attribute" and + kind = srckind.getTaintOfAttribute(attrname) and + edgeLabel = "from taint attribute" and path instanceof NoAttribute ) } - pragma [noinline] - predicate getattrStep(TaintTrackingNode src, DataFlow::Node node, TaintTrackingContext context, AttributePath path, TaintKind kind, string edgeLabel) { + pragma[noinline] + predicate getattrStep( + TaintTrackingNode src, DataFlow::Node node, TaintTrackingContext context, AttributePath path, + TaintKind kind, string edgeLabel + ) { exists(DataFlow::Node srcnode, AttributePath srcpath, TaintKind srckind, string attrname | src = TTaintTrackingNode_(srcnode, context, srcpath, srckind, this) and exists(CallNode call, ControlFlowNode arg | @@ -421,67 +411,96 @@ class TaintTrackingImplementation extends string { arg = call.getArg(0) and attrname = call.getArg(1).getNode().(StrConst).getText() and arg = srcnode.asCfgNode() - | + | path = srcpath.fromAttribute(attrname) and kind = srckind or path = srcpath and kind = srckind.getTaintOfAttribute(attrname) ) - ) and edgeLabel = "getattr" + ) and + edgeLabel = "getattr" } - pragma [noinline] - predicate useStep(TaintTrackingNode src, DataFlow::Node node, TaintTrackingContext context, AttributePath path, TaintKind kind, string edgeLabel) { + pragma[noinline] + predicate useStep( + TaintTrackingNode src, DataFlow::Node node, TaintTrackingContext context, AttributePath path, + TaintKind kind, string edgeLabel + ) { exists(DataFlow::Node srcnode | src = TTaintTrackingNode_(srcnode, context, path, kind, this) and node.asCfgNode() = srcnode.asVariable().getASourceUse() - ) and edgeLabel = "use" + ) and + edgeLabel = "use" } /* If the return value is tainted without context, then it always flows back to the caller */ - pragma [noinline] - predicate returnFlowStep(TaintTrackingNode src, DataFlow::Node node, TaintTrackingContext context, AttributePath path, TaintKind kind, string edgeLabel) { + pragma[noinline] + predicate returnFlowStep( + TaintTrackingNode src, DataFlow::Node node, TaintTrackingContext context, AttributePath path, + TaintKind kind, string edgeLabel + ) { exists(CallNode call, PythonFunctionObjectInternal pyfunc, DataFlow::Node retval | pyfunc.getACall() = call and context = TNoParam() and src = TTaintTrackingNode_(retval, TNoParam(), path, kind, this) and node.asCfgNode() = call and - retval.asCfgNode() = any(Return ret | ret.getScope() = pyfunc.getScope()).getValue().getAFlowNode() + retval.asCfgNode() = any(Return ret | ret.getScope() = pyfunc.getScope()) + .getValue() + .getAFlowNode() ) and edgeLabel = "return" } - /* Avoid taint flow from return value to caller as it can produce imprecise flow graphs + /* + * Avoid taint flow from return value to caller as it can produce imprecise flow graphs * Step directly from tainted argument to call result. */ - pragma [noinline] - predicate callFlowStep(TaintTrackingNode src, DataFlow::Node node, TaintTrackingContext context, AttributePath path, TaintKind kind, string edgeLabel) { - exists(CallNode call, PythonFunctionObjectInternal pyfunc, TaintTrackingContext callee, DataFlow::Node retval, TaintTrackingNode retnode | + + pragma[noinline] + predicate callFlowStep( + TaintTrackingNode src, DataFlow::Node node, TaintTrackingContext context, AttributePath path, + TaintKind kind, string edgeLabel + ) { + exists( + CallNode call, PythonFunctionObjectInternal pyfunc, TaintTrackingContext callee, + DataFlow::Node retval, TaintTrackingNode retnode + | this.callContexts(call, src, pyfunc, context, callee) and retnode = TTaintTrackingNode_(retval, callee, path, kind, this) and node.asCfgNode() = call and - retval.asCfgNode() = any(Return ret | ret.getScope() = pyfunc.getScope()).getValue().getAFlowNode() + retval.asCfgNode() = any(Return ret | ret.getScope() = pyfunc.getScope()) + .getValue() + .getAFlowNode() ) and edgeLabel = "call" } - pragma [noinline] - predicate callContexts(CallNode call, TaintTrackingNode argnode, PythonFunctionObjectInternal pyfunc, TaintTrackingContext caller, TaintTrackingContext callee) { + pragma[noinline] + predicate callContexts( + CallNode call, TaintTrackingNode argnode, PythonFunctionObjectInternal pyfunc, + TaintTrackingContext caller, TaintTrackingContext callee + ) { exists(int arg, TaintKind callerKind, AttributePath callerPath | this.callWithTaintedArgument(argnode, call, caller, pyfunc, arg, callerPath, callerKind) and callee = TParamContext(callerKind, callerPath, arg) ) } - predicate callWithTaintedArgument(TaintTrackingNode src, CallNode call, TaintTrackingContext caller, CallableValue pyfunc, int arg, AttributePath path, TaintKind kind) { + predicate callWithTaintedArgument( + TaintTrackingNode src, CallNode call, TaintTrackingContext caller, CallableValue pyfunc, + int arg, AttributePath path, TaintKind kind + ) { exists(DataFlow::Node srcnode | src = TTaintTrackingNode_(srcnode, caller, path, kind, this) and srcnode.asCfgNode() = pyfunc.getArgumentForCall(call, arg) ) } - predicate instantiationStep(TaintTrackingNode src, DataFlow::Node node, TaintTrackingContext context, AttributePath path, TaintKind kind, string edgeLabel) { + predicate instantiationStep( + TaintTrackingNode src, DataFlow::Node node, TaintTrackingContext context, AttributePath path, + TaintKind kind, string edgeLabel + ) { exists(PythonFunctionValue init, EssaVariable self, TaintTrackingContext callee | instantiationCall(node.asCfgNode(), src, init, context, callee) and this.(EssaTaintTracking).taintedDefinition(_, self.getDefinition(), callee, path, kind) and @@ -492,51 +511,69 @@ class TaintTrackingImplementation extends string { edgeLabel = "instantiation" } - predicate instantiationCall(CallNode call, TaintTrackingNode argnode, PythonFunctionObjectInternal init, TaintTrackingContext caller, TaintTrackingContext callee) { + predicate instantiationCall( + CallNode call, TaintTrackingNode argnode, PythonFunctionObjectInternal init, + TaintTrackingContext caller, TaintTrackingContext callee + ) { exists(ClassValue cls | call.getFunction().pointsTo(cls) and cls.lookup("__init__") = init - | + | exists(int arg, TaintKind callerKind, AttributePath callerPath, DataFlow::Node argument | argnode = TTaintTrackingNode_(argument, caller, callerPath, callerKind, this) and - call.getArg(arg-1) = argument.asCfgNode() and + call.getArg(arg - 1) = argument.asCfgNode() and callee = TParamContext(callerKind, callerPath, arg) ) ) } - pragma [noinline] - predicate callTaintStep(TaintTrackingNode src, DataFlow::Node node, TaintTrackingContext context, AttributePath path, TaintKind kind, string edgeLabel) { + pragma[noinline] + predicate callTaintStep( + TaintTrackingNode src, DataFlow::Node node, TaintTrackingContext context, AttributePath path, + TaintKind kind, string edgeLabel + ) { exists(DataFlow::Node srcnode, CallNode call, TaintKind srckind, string name | src = TTaintTrackingNode_(srcnode, context, path, srckind, this) and call.getFunction().(AttrNode).getObject(name) = src.getNode().asCfgNode() and kind = srckind.getTaintOfMethodResult(name) and node.asCfgNode() = call - ) and edgeLabel = "call" + ) and + edgeLabel = "call" } - pragma [noinline] - predicate iterationStep(TaintTrackingNode src, DataFlow::Node node, TaintTrackingContext context, AttributePath path, TaintKind kind, string edgeLabel) { + pragma[noinline] + predicate iterationStep( + TaintTrackingNode src, DataFlow::Node node, TaintTrackingContext context, AttributePath path, + TaintKind kind, string edgeLabel + ) { exists(ForNode for, DataFlow::Node sequence, TaintKind seqkind | src = TTaintTrackingNode_(sequence, context, path, seqkind, this) and for.iterates(_, sequence.asCfgNode()) and node.asCfgNode() = for and path.noAttribute() and kind = seqkind.getTaintForIteration() - ) and edgeLabel = "iteration" + ) and + edgeLabel = "iteration" } - pragma [noinline] - predicate parameterStep(TaintTrackingNode src, DataFlow::Node node, TaintTrackingContext context, AttributePath path, TaintKind kind, string edgeLabel) { + pragma[noinline] + predicate parameterStep( + TaintTrackingNode src, DataFlow::Node node, TaintTrackingContext context, AttributePath path, + TaintKind kind, string edgeLabel + ) { exists(CallNode call, PythonFunctionObjectInternal pyfunc, int arg | this.callWithTaintedArgument(src, call, _, pyfunc, arg, path, kind) and node.asCfgNode() = pyfunc.getParameter(arg) and context = TParamContext(kind, path, arg) - ) and edgeLabel = "parameter" + ) and + edgeLabel = "parameter" } - pragma [noinline] - predicate yieldStep(TaintTrackingNode src, DataFlow::Node node, TaintTrackingContext context, AttributePath path, TaintKind kind, string edgeLabel) { + pragma[noinline] + predicate yieldStep( + TaintTrackingNode src, DataFlow::Node node, TaintTrackingContext context, AttributePath path, + TaintKind kind, string edgeLabel + ) { exists(DataFlow::Node srcnode, TaintKind itemkind | src = TTaintTrackingNode_(srcnode, context, path, itemkind, this) and itemkind = kind.getTaintForIteration() and @@ -548,35 +585,50 @@ class TaintTrackingImplementation extends string { yield.getValue() = srcnode.asCfgNode().getNode() ) ) - ) and edgeLabel = "yield" + ) and + edgeLabel = "yield" } - pragma [noinline] - predicate ifExpStep(TaintTrackingNode src, DataFlow::Node node, TaintTrackingContext context, AttributePath path, TaintKind kind, string edgeLabel) { + pragma[noinline] + predicate ifExpStep( + TaintTrackingNode src, DataFlow::Node node, TaintTrackingContext context, AttributePath path, + TaintKind kind, string edgeLabel + ) { exists(DataFlow::Node srcnode | src = TTaintTrackingNode_(srcnode, context, path, kind, this) and srcnode.asCfgNode() = node.asCfgNode().(IfExprNode).getAnOperand() - ) and edgeLabel = "if exp" + ) and + edgeLabel = "if exp" } - pragma [noinline] - predicate essaFlowStep(TaintTrackingNode src, DataFlow::Node node, TaintTrackingContext context, AttributePath path, TaintKind kind, string edgeLabel) { - this.(EssaTaintTracking).taintedDefinition(src, node.asVariable().getDefinition(), context, path, kind) and edgeLabel = "" + pragma[noinline] + predicate essaFlowStep( + TaintTrackingNode src, DataFlow::Node node, TaintTrackingContext context, AttributePath path, + TaintKind kind, string edgeLabel + ) { + this + .(EssaTaintTracking) + .taintedDefinition(src, node.asVariable().getDefinition(), context, path, kind) and + edgeLabel = "" } - pragma [noinline] - predicate legacyExtensionStep(TaintTrackingNode src, DataFlow::Node node, TaintTrackingContext context, AttributePath path, TaintKind kind, string edgeLabel) { + pragma[noinline] + predicate legacyExtensionStep( + TaintTrackingNode src, DataFlow::Node node, TaintTrackingContext context, AttributePath path, + TaintKind kind, string edgeLabel + ) { exists(TaintTracking::Extension extension, DataFlow::Node srcnode, TaintKind srckind | this.(TaintTracking::Configuration).isExtension(extension) and src = TTaintTrackingNode_(srcnode, context, path, srckind, this) and srcnode.asCfgNode() = extension - | + | extension.getASuccessorNode() = node.asCfgNode() and kind = srckind or extension.getASuccessorNode(srckind, kind) = node.asCfgNode() or extension.getASuccessorVariable() = node.asVariable() and kind = srckind - ) and edgeLabel = "legacy extension" + ) and + edgeLabel = "legacy extension" } predicate moduleAttributeTainted(ModuleValue m, string name, TaintTrackingNode taint) { @@ -588,18 +640,20 @@ class TaintTrackingImplementation extends string { var.getScope() = m.getScope() ) } - } -/** Another taint-tracking class to help partition the code for clarity - * This class handle tracking of ESSA variables. */ +/** + * Another taint-tracking class to help partition the code for clarity + * This class handle tracking of ESSA variables. + */ private class EssaTaintTracking extends string { + EssaTaintTracking() { this instanceof TaintTracking::Configuration } - EssaTaintTracking() { - this instanceof TaintTracking::Configuration - } - pragma [noinline] - predicate taintedDefinition(TaintTrackingNode src, EssaDefinition defn, TaintTrackingContext context, AttributePath path, TaintKind kind) { + pragma[noinline] + predicate taintedDefinition( + TaintTrackingNode src, EssaDefinition defn, TaintTrackingContext context, AttributePath path, + TaintKind kind + ) { this.taintedPhi(src, defn, context, path, kind) or this.taintedAssignment(src, defn, context, path, kind) @@ -625,8 +679,11 @@ private class EssaTaintTracking extends string { this.taintedWith(src, defn, context, path, kind) } - pragma [noinline] - private predicate taintedPhi(TaintTrackingNode src, PhiFunction defn, TaintTrackingContext context, AttributePath path, TaintKind kind) { + pragma[noinline] + private predicate taintedPhi( + TaintTrackingNode src, PhiFunction defn, TaintTrackingContext context, AttributePath path, + TaintKind kind + ) { exists(DataFlow::Node srcnode, BasicBlock pred, EssaVariable predvar, DataFlow::Node phi | src = TTaintTrackingNode_(srcnode, context, path, kind, this) and defn = phi.asVariable().getDefinition() and @@ -637,16 +694,22 @@ private class EssaTaintTracking extends string { ) } - pragma [noinline] - private predicate taintedAssignment(TaintTrackingNode src, AssignmentDefinition defn, TaintTrackingContext context, AttributePath path, TaintKind kind) { + pragma[noinline] + private predicate taintedAssignment( + TaintTrackingNode src, AssignmentDefinition defn, TaintTrackingContext context, + AttributePath path, TaintKind kind + ) { exists(DataFlow::Node srcnode | src = TTaintTrackingNode_(srcnode, context, path, kind, this) and defn.getValue() = srcnode.asCfgNode() ) } - pragma [noinline] - private predicate taintedAttributeAssignment(TaintTrackingNode src, AttributeAssignment defn, TaintTrackingContext context, AttributePath path, TaintKind kind) { + pragma[noinline] + private predicate taintedAttributeAssignment( + TaintTrackingNode src, AttributeAssignment defn, TaintTrackingContext context, + AttributePath path, TaintKind kind + ) { exists(DataFlow::Node srcnode, AttributePath srcpath, string attrname | src = TTaintTrackingNode_(srcnode, context, srcpath, kind, this) and defn.getValue() = srcnode.asCfgNode() and @@ -655,35 +718,49 @@ private class EssaTaintTracking extends string { ) } - pragma [noinline] - private predicate taintedParameterDefinition(TaintTrackingNode src, ParameterDefinition defn, TaintTrackingContext context, AttributePath path, TaintKind kind) { + pragma[noinline] + private predicate taintedParameterDefinition( + TaintTrackingNode src, ParameterDefinition defn, TaintTrackingContext context, + AttributePath path, TaintKind kind + ) { exists(DataFlow::Node srcnode | src = TTaintTrackingNode_(srcnode, context, path, kind, this) and srcnode.asCfgNode() = defn.getDefiningNode() ) } - pragma [noinline] - private predicate taintedCallsite(TaintTrackingNode src, CallsiteRefinement defn, TaintTrackingContext context, AttributePath path, TaintKind kind) { - /* In the interest of simplicity and performance we assume that tainted escaping variables remain tainted across calls. + pragma[noinline] + private predicate taintedCallsite( + TaintTrackingNode src, CallsiteRefinement defn, TaintTrackingContext context, + AttributePath path, TaintKind kind + ) { + /* + * In the interest of simplicity and performance we assume that tainted escaping variables remain tainted across calls. * In the cases were this assumption is false, it is easy enough to add an additional barrier. */ - exists(DataFlow::Node srcnode | - src = TTaintTrackingNode_(srcnode, context, path, kind, this) and - srcnode.asVariable() = defn.getInput() - ) - } - pragma [noinline] - private predicate taintedMethodCallsite(TaintTrackingNode src, MethodCallsiteRefinement defn, TaintTrackingContext context, AttributePath path, TaintKind kind) { exists(DataFlow::Node srcnode | src = TTaintTrackingNode_(srcnode, context, path, kind, this) and srcnode.asVariable() = defn.getInput() ) } - pragma [noinline] - private predicate taintedUniEdge(TaintTrackingNode src, SingleSuccessorGuard defn, TaintTrackingContext context, AttributePath path, TaintKind kind) { + pragma[noinline] + private predicate taintedMethodCallsite( + TaintTrackingNode src, MethodCallsiteRefinement defn, TaintTrackingContext context, + AttributePath path, TaintKind kind + ) { + exists(DataFlow::Node srcnode | + src = TTaintTrackingNode_(srcnode, context, path, kind, this) and + srcnode.asVariable() = defn.getInput() + ) + } + + pragma[noinline] + private predicate taintedUniEdge( + TaintTrackingNode src, SingleSuccessorGuard defn, TaintTrackingContext context, + AttributePath path, TaintKind kind + ) { exists(DataFlow::Node srcnode | src = TTaintTrackingNode_(srcnode, context, path, kind, this) and srcnode.asVariable() = defn.getInput() and @@ -691,14 +768,20 @@ private class EssaTaintTracking extends string { ) } - private predicate taintedPiNode(TaintTrackingNode src, PyEdgeRefinement defn, TaintTrackingContext context, AttributePath path, TaintKind kind) { + private predicate taintedPiNode( + TaintTrackingNode src, PyEdgeRefinement defn, TaintTrackingContext context, AttributePath path, + TaintKind kind + ) { taintedPiNodeOneway(src, defn, context, path, kind) or taintedPiNodeBothways(src, defn, context, path, kind) } - pragma [noinline] - private predicate taintedPiNodeOneway(TaintTrackingNode src, PyEdgeRefinement defn, TaintTrackingContext context, AttributePath path, TaintKind kind) { + pragma[noinline] + private predicate taintedPiNodeOneway( + TaintTrackingNode src, PyEdgeRefinement defn, TaintTrackingContext context, AttributePath path, + TaintKind kind + ) { exists(DataFlow::Node srcnode, ControlFlowNode use | src = TTaintTrackingNode_(srcnode, context, path, kind, this) and not this.(TaintTracking::Configuration).isBarrierTest(defn.getTest(), defn.getSense()) and @@ -706,8 +789,11 @@ private class EssaTaintTracking extends string { ) } - pragma [noinline] - private predicate taintedPiNodeBothways(TaintTrackingNode src, PyEdgeRefinement defn, TaintTrackingContext context, AttributePath path, TaintKind kind) { + pragma[noinline] + private predicate taintedPiNodeBothways( + TaintTrackingNode src, PyEdgeRefinement defn, TaintTrackingContext context, AttributePath path, + TaintKind kind + ) { exists(DataFlow::Node srcnode, ControlFlowNode test, ControlFlowNode use | src = TTaintTrackingNode_(srcnode, context, path, kind, this) and piNodeTestAndUse(defn, test, use) and @@ -717,49 +803,63 @@ private class EssaTaintTracking extends string { ) } - - pragma [noinline] - private predicate taintedArgument(TaintTrackingNode src, ArgumentRefinement defn, TaintTrackingContext context, AttributePath path, TaintKind kind) { + pragma[noinline] + private predicate taintedArgument( + TaintTrackingNode src, ArgumentRefinement defn, TaintTrackingContext context, + AttributePath path, TaintKind kind + ) { exists(DataFlow::Node srcnode | src = TTaintTrackingNode_(srcnode, context, path, kind, this) and defn.getInput() = srcnode.asVariable() ) } - pragma [noinline] - private predicate taintedExceptionCapture(TaintTrackingNode src, ExceptionCapture defn, TaintTrackingContext context, AttributePath path, TaintKind kind) { + pragma[noinline] + private predicate taintedExceptionCapture( + TaintTrackingNode src, ExceptionCapture defn, TaintTrackingContext context, AttributePath path, + TaintKind kind + ) { exists(DataFlow::Node srcnode | src = TTaintTrackingNode_(srcnode, context, path, kind, this) and srcnode.asCfgNode() = defn.getDefiningNode() ) } - pragma [noinline] - private predicate taintedScopeEntryDefinition(TaintTrackingNode src, ScopeEntryDefinition defn, TaintTrackingContext context, AttributePath path, TaintKind kind) { + pragma[noinline] + private predicate taintedScopeEntryDefinition( + TaintTrackingNode src, ScopeEntryDefinition defn, TaintTrackingContext context, + AttributePath path, TaintKind kind + ) { exists(EssaVariable var | BaseFlow::scope_entry_value_transfer_from_earlier(var, _, defn, _) and this.taintedDefinition(src, var.getDefinition(), context, path, kind) ) } - pragma [noinline] - private predicate taintedWith(TaintTrackingNode src, WithDefinition defn, TaintTrackingContext context, AttributePath path, TaintKind kind) { + pragma[noinline] + private predicate taintedWith( + TaintTrackingNode src, WithDefinition defn, TaintTrackingContext context, AttributePath path, + TaintKind kind + ) { exists(DataFlow::Node srcnode | src = TTaintTrackingNode_(srcnode, context, path, kind, this) and with_flow(_, srcnode.asCfgNode(), defn.getDefiningNode()) ) } - /** Gets the boolean value that `test` evaluates to when `use` is tainted with `kind` + /** + * Gets the boolean value that `test` evaluates to when `use` is tainted with `kind` * and `test` and `use` are part of a test in a branch. */ - private boolean testEvaluates(PyEdgeRefinement defn, ControlFlowNode test, ControlFlowNode use, TaintTrackingNode src) { + private boolean testEvaluates( + PyEdgeRefinement defn, ControlFlowNode test, ControlFlowNode use, TaintTrackingNode src + ) { defn.getTest().getAChild*() = use and exists(DataFlow::Node srcnode, TaintKind kind | srcnode.asVariable() = defn.getInput() and srcnode.asVariable().getASourceUse() = use and src = TTaintTrackingNode_(srcnode, _, TNoAttribute(), kind, this) - | + | test = use and result = kind.booleanValue() or exists(ControlFlowNode const | @@ -770,9 +870,8 @@ private class EssaTaintTracking extends string { exists(ControlFlowNode c, ClassValue cls | Filters::isinstance(test, c, use) and c.pointsTo(cls) - | - exists(ClassValue scls | - scls = kind.getType() | + | + exists(ClassValue scls | scls = kind.getType() | scls.getASuperType() = cls and result = true or not scls.getASuperType() = cls and result = false @@ -785,7 +884,8 @@ private class EssaTaintTracking extends string { result = testEvaluates(defn, not_operand(test), use, src).booleanNot() } - /** Holds if `test` is the test in a branch and `use` is that test + /** + * Holds if `test` is the test in a branch and `use` is that test * with all the `not` prefixes removed. */ private predicate boolean_filter(ControlFlowNode test, ControlFlowNode use) { @@ -799,7 +899,6 @@ private class EssaTaintTracking extends string { ) ) } - } private predicate testEvaluatesMaybe(ControlFlowNode test, ControlFlowNode use) { @@ -830,19 +929,17 @@ private predicate with_flow(With with, ControlFlowNode contextManager, ControlFl } /* Helper predicate for taintedPiNode */ -pragma [noinline] +pragma[noinline] private predicate piNodeTestAndUse(PyEdgeRefinement defn, ControlFlowNode test, ControlFlowNode use) { test = defn.getTest() and use = defn.getInput().getASourceUse() and test.getAChild*() = use } module Implementation { - /* A call that returns a copy (or similar) of the argument */ predicate copyCall(ControlFlowNode fromnode, CallNode tonode) { tonode.getFunction().(AttrNode).getObject("copy") = fromnode or - exists(ModuleObject copy, string name | - name = "copy" or name = "deepcopy" | + exists(ModuleObject copy, string name | name = "copy" or name = "deepcopy" | copy.attr(name).(FunctionObject).getACall() = tonode and tonode.getArg(0) = fromnode ) @@ -850,6 +947,4 @@ module Implementation { tonode.getFunction().pointsTo(ObjectInternal::builtin("reversed")) and tonode.getArg(0) = fromnode } - } - diff --git a/python/ql/src/semmle/python/dataflow/Legacy.qll b/python/ql/src/semmle/python/dataflow/Legacy.qll index 94dd55e20fa..61961921514 100644 --- a/python/ql/src/semmle/python/dataflow/Legacy.qll +++ b/python/ql/src/semmle/python/dataflow/Legacy.qll @@ -3,12 +3,8 @@ private import semmle.python.objects.ObjectInternal import semmle.python.dataflow.Implementation /* For backwards compatibility -- Use `TaintTrackingContext` instead. */ -deprecated -class CallContext extends TaintTrackingContext { - - TaintTrackingContext getCallee(CallNode call) { - result.getCaller(call) = this - } +deprecated class CallContext extends TaintTrackingContext { + TaintTrackingContext getCallee(CallNode call) { result.getCaller(call) = this } predicate appliesToScope(Scope s) { exists(PythonFunctionObjectInternal func, TaintKind param, AttributePath path, int n | @@ -21,33 +17,23 @@ class CallContext extends TaintTrackingContext { or this.isTop() } - } /* Backwards compatibility with config-less taint-tracking */ private class LegacyConfiguration extends TaintTracking::Configuration { - LegacyConfiguration() { /* A name that won't be accidentally chosen by users */ this = "Semmle: Internal legacy configuration" } - override predicate isSource(TaintSource src) { - src = src - } + override predicate isSource(TaintSource src) { src = src } - override predicate isSink(TaintSink sink) { - sink = sink - } + override predicate isSink(TaintSink sink) { sink = sink } - override predicate isSanitizer(Sanitizer sanitizer) { - sanitizer = sanitizer - } + override predicate isSanitizer(Sanitizer sanitizer) { sanitizer = sanitizer } override predicate isAdditionalFlowStep(DataFlow::Node src, DataFlow::Node dest) { - exists(DataFlowExtension::DataFlowNode legacyExtension | - src.asCfgNode() = legacyExtension - | + exists(DataFlowExtension::DataFlowNode legacyExtension | src.asCfgNode() = legacyExtension | dest.asCfgNode() = legacyExtension.getASuccessorNode() or dest.asVariable() = legacyExtension.getASuccessorVariable() @@ -58,10 +44,10 @@ private class LegacyConfiguration extends TaintTracking::Configuration { ) } - override predicate isAdditionalFlowStep(DataFlow::Node src, DataFlow::Node dest, TaintKind srckind, TaintKind destkind) { - exists(DataFlowExtension::DataFlowNode legacyExtension | - src.asCfgNode() = legacyExtension - | + override predicate isAdditionalFlowStep( + DataFlow::Node src, DataFlow::Node dest, TaintKind srckind, TaintKind destkind + ) { + exists(DataFlowExtension::DataFlowNode legacyExtension | src.asCfgNode() = legacyExtension | dest.asCfgNode() = legacyExtension.getASuccessorNode(srckind, destkind) ) } @@ -79,5 +65,4 @@ private class LegacyConfiguration extends TaintTracking::Configuration { ) ) } - } diff --git a/python/ql/src/semmle/python/dataflow/StateTracking.qll b/python/ql/src/semmle/python/dataflow/StateTracking.qll index 760aaf25dc3..55bed5bbfff 100644 --- a/python/ql/src/semmle/python/dataflow/StateTracking.qll +++ b/python/ql/src/semmle/python/dataflow/StateTracking.qll @@ -1,5 +1,6 @@ -/** Provides classes and predicates for tracking global state across the control flow and call graphs. - * +/** + * Provides classes and predicates for tracking global state across the control flow and call graphs. + * * NOTE: State tracking tracks both whether a state may apply to a given node in a given context *and* * whether it may not apply. * That `state.appliesTo(f, ctx)` holds implies nothing about whether `state.mayNotApplyTo(f, ctx)` holds. @@ -15,14 +16,11 @@ private import semmle.python.objects.ObjectInternal /** A state that should be tracked. */ abstract class TrackableState extends string { - bindingset[this] TrackableState() { this = this } /** Holds if this state may apply to the control flow node `f`, regardless of the context. */ - final predicate appliesTo(ControlFlowNode f) { - this.appliesTo(f, _) - } + final predicate appliesTo(ControlFlowNode f) { this.appliesTo(f, _) } /** Holds if this state may not apply to the control flow node `f`, given the context `ctx`. */ final predicate appliesTo(ControlFlowNode f, Context ctx) { @@ -35,60 +33,56 @@ abstract class TrackableState extends string { } /** Holds if this state may apply to the control flow node `f`, regardless of the context. */ - final predicate mayNotApplyTo(ControlFlowNode f) { - this.mayNotApplyTo(f, _) - } + final predicate mayNotApplyTo(ControlFlowNode f) { this.mayNotApplyTo(f, _) } /** Holds if `test` shows value to be untainted with `taint`, given the context `ctx`. */ - predicate testsFor(PyEdgeRefinement test, Context ctx, boolean sense) { + predicate testsFor(PyEdgeRefinement test, Context ctx, boolean sense) { ctx.appliesToScope(test.getScope()) and this.testsFor(test, sense) } /** Holds if `test` shows value to be untainted with `taint` */ predicate testsFor(PyEdgeRefinement test, boolean sense) { none() } - /** Holds if state starts at `f`. + /** + * Holds if state starts at `f`. * Either this predicate or `startsAt(ControlFlowNode f, Context ctx)` * should be overriden by sub-classes. */ predicate startsAt(ControlFlowNode f) { none() } - /** Holds if state starts at `f` given context `ctx`. + /** + * Holds if state starts at `f` given context `ctx`. * Either this predicate or `startsAt(ControlFlowNode f)` * should be overriden by sub-classes. */ - pragma [noinline] - predicate startsAt(ControlFlowNode f, Context ctx) { - ctx.appliesTo(f) and this.startsAt(f) - } + pragma[noinline] + predicate startsAt(ControlFlowNode f, Context ctx) { ctx.appliesTo(f) and this.startsAt(f) } - /** Holds if state ends at `f`. + /** + * Holds if state ends at `f`. * Either this predicate or `endsAt(ControlFlowNode f, Context ctx)` * may be overriden by sub-classes. */ predicate endsAt(ControlFlowNode f) { none() } - /** Holds if state ends at `f` given context `ctx`. + /** + * Holds if state ends at `f` given context `ctx`. * Either this predicate or `endsAt(ControlFlowNode f)` * may be overriden by sub-classes. */ - pragma [noinline] - predicate endsAt(ControlFlowNode f, Context ctx) { - ctx.appliesTo(f) and this.endsAt(f) - } - + pragma[noinline] + predicate endsAt(ControlFlowNode f, Context ctx) { ctx.appliesTo(f) and this.endsAt(f) } } - module StateTracking { - private predicate not_allowed(TrackableState state, ControlFlowNode f, Context ctx, boolean sense) { state.endsAt(f, ctx) and sense = true or state.startsAt(f, ctx) and sense = false } - /** Holds if `state` may apply (with `sense` = true) or may not apply (with `sense` = false) to + /** + * Holds if `state` may apply (with `sense` = true) or may not apply (with `sense` = false) to * control flow node `f` given the context `ctx`. */ predicate appliesToNode(TrackableState state, ControlFlowNode f, Context ctx, boolean sense) { @@ -96,8 +90,7 @@ module StateTracking { or state.startsAt(f, ctx) and sense = true or - not not_allowed(state, f, ctx, sense) - and + not not_allowed(state, f, ctx, sense) and ( exists(BasicBlock b | /* First node in a block */ @@ -106,7 +99,7 @@ module StateTracking { /* Other nodes in block, except trackable calls */ exists(int n | f = b.getNode(n) and - appliesToNode(state, b.getNode(n-1), ctx, sense) and + appliesToNode(state, b.getNode(n - 1), ctx, sense) and not exists(PythonFunctionObjectInternal func, Context callee | callee.fromCall(f, func, ctx) ) @@ -127,27 +120,32 @@ module StateTracking { ) or /* Other scope entries */ - exists(Scope s | + exists(Scope s | s.getEntryNode() = f and ctx.appliesToScope(s) - | + | not exists(Scope pred | pred.precedes(s)) and - (ctx.isImport() or ctx.isRuntime()) and sense = false + (ctx.isImport() or ctx.isRuntime()) and + sense = false or exists(Scope pred, Context pred_ctx | appliesToNode(state, pred.getANormalExit(), pred_ctx, sense) and pred.precedes(s) and - ctx.isRuntime() | + ctx.isRuntime() + | pred_ctx.isRuntime() or pred_ctx.isImport() ) ) ) } - /** Holds if `state` may apply (with `sense` = true) or may not apply (with `sense` = false) at the + /** + * Holds if `state` may apply (with `sense` = true) or may not apply (with `sense` = false) at the * start of basic block `block` given the context `ctx`. */ - private predicate appliesAtBlockStart(TrackableState state, BasicBlock block, Context ctx, boolean sense) { + private predicate appliesAtBlockStart( + TrackableState state, BasicBlock block, Context ctx, boolean sense + ) { exists(PyEdgeRefinement test | test.getSuccessor() = block and state.testsFor(test, ctx, sense) @@ -164,12 +162,13 @@ module StateTracking { ) } - /** Holds if `state` may apply (with `sense` = true) or may not apply (with `sense` = false) at the + /** + * Holds if `state` may apply (with `sense` = true) or may not apply (with `sense` = false) at the * end of basic block `block` given the context `ctx`. */ - private predicate appliesAtBlockEnd(TrackableState state, BasicBlock block, Context ctx, boolean sense) { + private predicate appliesAtBlockEnd( + TrackableState state, BasicBlock block, Context ctx, boolean sense + ) { appliesToNode(state, block.getLastNode(), ctx, sense) } - } - diff --git a/python/ql/src/semmle/python/dataflow/TaintTracking.qll b/python/ql/src/semmle/python/dataflow/TaintTracking.qll index fd8ac445d41..6ef9e5ae3e8 100755 --- a/python/ql/src/semmle/python/dataflow/TaintTracking.qll +++ b/python/ql/src/semmle/python/dataflow/TaintTracking.qll @@ -92,7 +92,8 @@ private import semmle.python.objects.ObjectInternal private import semmle.python.dataflow.Implementation import semmle.python.dataflow.Configuration -/** A 'kind' of taint. This may be almost anything, +/** + * A 'kind' of taint. This may be almost anything, * but it is typically something like a "user-defined string". * Examples include, data from a http request object, * data from an SMS or other mobile data source, @@ -100,33 +101,38 @@ import semmle.python.dataflow.Configuration * the local file system. */ abstract class TaintKind extends string { - bindingset[this] TaintKind() { any() } - /** Gets the kind of taint that the named attribute will have if an object is tainted with this taint. + /** + * Gets the kind of taint that the named attribute will have if an object is tainted with this taint. * In other words, if `x` has this kind of taint then it implies that `x.name` * has `result` kind of taint. */ TaintKind getTaintOfAttribute(string name) { none() } - /** Gets the kind of taint results from calling the named method if an object is tainted with this taint. + /** + * Gets the kind of taint results from calling the named method if an object is tainted with this taint. * In other words, if `x` has this kind of taint then it implies that `x.name()` * has `result` kind of taint. */ TaintKind getTaintOfMethodResult(string name) { none() } - /** Gets the taint resulting from the flow step `fromnode` -> `tonode`. + /** + * Gets the taint resulting from the flow step `fromnode` -> `tonode`. */ TaintKind getTaintForFlowStep(ControlFlowNode fromnode, ControlFlowNode tonode) { none() } - /** Gets the taint resulting from the flow step `fromnode` -> `tonode`, with `edgeLabel` + /** + * Gets the taint resulting from the flow step `fromnode` -> `tonode`, with `edgeLabel` */ TaintKind getTaintForFlowStep(ControlFlowNode fromnode, ControlFlowNode tonode, string edgeLabel) { - result = this.getTaintForFlowStep(fromnode, tonode) and edgeLabel = "custom taint flow step for " + this + result = this.getTaintForFlowStep(fromnode, tonode) and + edgeLabel = "custom taint flow step for " + this } - /** DEPRECATED -- Use `TaintFlow.additionalFlowStepVar(EssaVariable fromvar, EssaVariable tovar, TaintKind kind)` instead. + /** + * DEPRECATED -- Use `TaintFlow.additionalFlowStepVar(EssaVariable fromvar, EssaVariable tovar, TaintKind kind)` instead. * * Holds if this kind of taint passes from variable `fromvar` to variable `tovar` * This predicate is present for completeness. It is unlikely that any `TaintKind` @@ -134,40 +140,40 @@ abstract class TaintKind extends string { */ deprecated predicate additionalFlowStepVar(EssaVariable fromvar, EssaVariable tovar) { none() } - /** Holds if this kind of taint "taints" `expr`. + /** + * Holds if this kind of taint "taints" `expr`. */ final predicate taints(ControlFlowNode expr) { - exists(TaintedNode n | - n.getTaintKind() = this and n.getCfgNode() = expr - ) + exists(TaintedNode n | n.getTaintKind() = this and n.getCfgNode() = expr) } /** DEPRECATED -- Use getType() instead */ - deprecated ClassObject getClass() { - none() - } + deprecated ClassObject getClass() { none() } - /** Gets the class of this kind of taint. + /** + * Gets the class of this kind of taint. * For example, if this were a kind of string taint * the `result` would be `theStrType()`. */ - ClassValue getType() { - result.(ClassObjectInternal).getSource() = this.getClass() - } + ClassValue getType() { result.(ClassObjectInternal).getSource() = this.getClass() } - /** Gets the boolean values (may be one, neither, or both) that + /** + * Gets the boolean values (may be one, neither, or both) that * may result from the Python expression `bool(this)` */ boolean booleanValue() { - /* Default to true as the vast majority of taint is strings and + /* + * Default to true as the vast majority of taint is strings and * the empty string is almost always benign. */ + result = true } string repr() { result = this } - /** Gets the taint resulting from iterating over this kind of taint. + /** + * Gets the taint resulting from iterating over this kind of taint. * For example iterating over a text file produces lines. So iterating * over a tainted file would result in tainted strings */ @@ -184,20 +190,21 @@ abstract class TaintKind extends string { */ class FlowLabel = TaintKind; -/** Taint kinds representing collections of other taint kind. +/** + * Taint kinds representing collections of other taint kind. * We use `{kind}` to represent a mapping of string to `kind` and - * `[kind]` to represent a flat collection of `kind`. - * The use of `{` and `[` is chosen to reflect dict and list literals + * `[kind]` to represent a flat collection of `kind`. + * The use of `{` and `[` is chosen to reflect dict and list literals * in Python. We choose a single character prefix and suffix for simplicity * and ease of preventing infinite recursion. */ abstract class CollectionKind extends TaintKind { - bindingset[this] CollectionKind() { (this.charAt(0) = "[" or this.charAt(0) = "{") and /* Prevent any collection kinds more than 2 deep */ - not this.charAt(2) = "[" and not this.charAt(2) = "{" + not this.charAt(2) = "[" and + not this.charAt(2) = "{" } abstract TaintKind getMember(); @@ -207,20 +214,16 @@ abstract class CollectionKind extends TaintKind { abstract predicate flowToMember(DataFlow::Node fromnode, DataFlow::Node tonode); } -/** A taint kind representing a flat collections of kinds. +/** + * A taint kind representing a flat collections of kinds. * Typically a sequence, but can include sets. */ class SequenceKind extends CollectionKind { - TaintKind itemKind; - SequenceKind() { - this = "[" + itemKind + "]" - } + SequenceKind() { this = "[" + itemKind + "]" } - TaintKind getItem() { - result = itemKind - } + TaintKind getItem() { result = itemKind } override TaintKind getTaintForFlowStep(ControlFlowNode fromnode, ControlFlowNode tonode) { exists(BinaryExprNode mod | @@ -236,17 +239,11 @@ class SequenceKind extends CollectionKind { name = "pop" and result = this.getItem() } - override string repr() { - result = "sequence of " + itemKind - } + override string repr() { result = "sequence of " + itemKind } - override TaintKind getTaintForIteration() { - result = itemKind - } + override TaintKind getTaintForIteration() { result = itemKind } - override TaintKind getMember() { - result = itemKind - } + override TaintKind getMember() { result = itemKind } override predicate flowFromMember(DataFlow::Node fromnode, DataFlow::Node tonode) { sequence_construct(fromnode.asCfgNode(), tonode.asCfgNode()) @@ -255,12 +252,9 @@ class SequenceKind extends CollectionKind { override predicate flowToMember(DataFlow::Node fromnode, DataFlow::Node tonode) { SequenceKind::itemFlowStep(fromnode.asCfgNode(), tonode.asCfgNode()) } - } - module SequenceKind { - predicate flowStep(ControlFlowNode fromnode, ControlFlowNode tonode, string edgeLabel) { tonode.(BinaryExprNode).getAnOperand() = fromnode and edgeLabel = "binary operation" or @@ -275,11 +269,10 @@ module SequenceKind { predicate itemFlowStep(ControlFlowNode fromnode, ControlFlowNode tonode) { subscript_index(fromnode, tonode) } - } module DictKind { - predicate flowStep(ControlFlowNode fromnode, ControlFlowNode tonode, string edgeLabel) { + predicate flowStep(ControlFlowNode fromnode, ControlFlowNode tonode, string edgeLabel) { Implementation::copyCall(fromnode, tonode) and edgeLabel = "dict copy" or @@ -289,37 +282,31 @@ module DictKind { } } - /* Helper for sequence flow steps */ -pragma [noinline] +pragma[noinline] private predicate subscript_index(ControlFlowNode obj, SubscriptNode sub) { sub.isLoad() and sub.getValue() = obj and not sub.getNode().getIndex() instanceof Slice } -pragma [noinline] +pragma[noinline] private predicate subscript_slice(ControlFlowNode obj, SubscriptNode sub) { sub.isLoad() and sub.getValue() = obj and sub.getNode().getIndex() instanceof Slice } - -/** A taint kind representing a mapping of objects to kinds. +/** + * A taint kind representing a mapping of objects to kinds. * Typically a dict, but can include other mappings. */ class DictKind extends CollectionKind { - TaintKind valueKind; - DictKind() { - this = "{" + valueKind + "}" - } + DictKind() { this = "{" + valueKind + "}" } - TaintKind getValue() { - result = valueKind - } + TaintKind getValue() { result = valueKind } override TaintKind getTaintOfMethodResult(string name) { name = "get" and result = valueKind @@ -329,13 +316,9 @@ class DictKind extends CollectionKind { name = "itervalues" and result.(SequenceKind).getItem() = valueKind } - override string repr() { - result = "dict of " + valueKind - } + override string repr() { result = "dict of " + valueKind } - override TaintKind getMember() { - result = valueKind - } + override TaintKind getMember() { result = valueKind } override predicate flowFromMember(DataFlow::Node fromnode, DataFlow::Node tonode) { dict_construct(fromnode.asCfgNode(), tonode.asCfgNode()) @@ -344,17 +327,15 @@ class DictKind extends CollectionKind { override predicate flowToMember(DataFlow::Node fromnode, DataFlow::Node tonode) { subscript_index(fromnode.asCfgNode(), tonode.asCfgNode()) } - } - -/** A type of sanitizer of untrusted data. +/** + * A type of sanitizer of untrusted data. * Examples include sanitizers for http responses, for DB access or for shell commands. * Usually a sanitizer can only sanitize data for one particular use. * For example, a sanitizer for DB commands would not be safe to use for http responses. */ abstract class Sanitizer extends string { - bindingset[this] Sanitizer() { any() } @@ -372,42 +353,49 @@ abstract class Sanitizer extends string { /** Holds if `def` shows value to be untainted with `taint` */ predicate sanitizingDefinition(TaintKind taint, EssaDefinition def) { none() } - } -/** DEPRECATED -- Use DataFlowExtension instead. +/** + * DEPRECATED -- Use DataFlowExtension instead. * An extension to taint-flow. For adding library or framework specific flows. * Examples include flow from a request to untrusted part of that request or * from a socket to data from that socket. */ -deprecated abstract class TaintFlow extends string { - +abstract deprecated class TaintFlow extends string { bindingset[this] TaintFlow() { any() } - /** Holds if `fromnode` being tainted with `fromkind` will result in `tonode` being tainted with `tokind`. + /** + * Holds if `fromnode` being tainted with `fromkind` will result in `tonode` being tainted with `tokind`. * Extensions to `TaintFlow` should override this to provide additional taint steps. */ - predicate additionalFlowStep(ControlFlowNode fromnode, TaintKind fromkind, ControlFlowNode tonode, TaintKind tokind) { none() } + predicate additionalFlowStep( + ControlFlowNode fromnode, TaintKind fromkind, ControlFlowNode tonode, TaintKind tokind + ) { + none() + } - /** Holds if the given `kind` of taint passes from variable `fromvar` to variable `tovar`. + /** + * Holds if the given `kind` of taint passes from variable `fromvar` to variable `tovar`. * This predicate is present for completeness. Most `TaintFlow` implementations will not need to override it. */ - predicate additionalFlowStepVar(EssaVariable fromvar, EssaVariable tovar, TaintKind kind) { none() } + predicate additionalFlowStepVar(EssaVariable fromvar, EssaVariable tovar, TaintKind kind) { + none() + } - /** Holds if the given `kind` of taint cannot pass from variable `fromvar` to variable `tovar`. + /** + * Holds if the given `kind` of taint cannot pass from variable `fromvar` to variable `tovar`. * This predicate is present for completeness. Most `TaintFlow` implementations will not need to override it. */ - predicate prunedFlowStepVar(EssaVariable fromvar, EssaVariable tovar, TaintKind kind) { none() } - + predicate prunedFlowStepVar(EssaVariable fromvar, EssaVariable tovar, TaintKind kind) { none() } } -/** A source of taintedness. +/** + * A source of taintedness. * Users of the taint tracking library should override this * class to provide their own sources. */ abstract class TaintSource extends @py_flow_node { - string toString() { result = "Taint source" } /** @@ -429,9 +417,7 @@ abstract class TaintSource extends @py_flow_node { context.isTop() and this.isSourceOf(kind) } - Location getLocation() { - result = this.(ControlFlowNode).getLocation() - } + Location getLocation() { result = this.(ControlFlowNode).getLocation() } predicate hasLocationInfo(string fp, int bl, int bc, int el, int ec) { this.getLocation().hasLocationInfo(fp, bl, bc, el, ec) @@ -459,19 +445,16 @@ abstract class TaintSource extends @py_flow_node { } /** Holds if taint can flow from this source to taint sink `sink` */ - final predicate flowsToSink(TaintSink sink) { - this.flowsToSink(_, sink) - } + final predicate flowsToSink(TaintSink sink) { this.flowsToSink(_, sink) } } - -/** Warning: Advanced feature. Users are strongly recommended to use `TaintSource` instead. +/** + * Warning: Advanced feature. Users are strongly recommended to use `TaintSource` instead. * A source of taintedness on the ESSA data-flow graph. * Users of the taint tracking library can override this * class to provide their own sources on the ESSA graph. */ abstract class TaintedDefinition extends EssaNodeDefinition { - /** * Holds if `this` is a source of taint kind `kind` * @@ -490,48 +473,36 @@ abstract class TaintedDefinition extends EssaNodeDefinition { predicate isSourceOf(TaintKind kind, TaintTrackingContext context) { context.isTop() and this.isSourceOf(kind) } - } private class DictUpdate extends DataFlowExtension::DataFlowNode { - MethodCallsiteRefinement call; DictUpdate() { - exists(CallNode c | - c = call.getCall() - | + exists(CallNode c | c = call.getCall() | c.getFunction().(AttrNode).getName() = "update" and c.getArg(0) = this ) } - override EssaVariable getASuccessorVariable() { - call.getVariable() = result - } - + override EssaVariable getASuccessorVariable() { call.getVariable() = result } } private class SequenceExtends extends DataFlowExtension::DataFlowNode { - MethodCallsiteRefinement call; SequenceExtends() { - exists(CallNode c | - c = call.getCall() - | + exists(CallNode c | c = call.getCall() | c.getFunction().(AttrNode).getName() = "extend" and c.getArg(0) = this ) } - override EssaVariable getASuccessorVariable() { - call.getVariable() = result - } - + override EssaVariable getASuccessorVariable() { call.getVariable() = result } } -/** A node that is vulnerable to one or more types of taint. +/** + * A node that is vulnerable to one or more types of taint. * These nodes provide the sinks when computing the taint flow graph. * An example would be an argument to a write to a http response object, * such an argument would be vulnerable to unsanitized user-input (XSS). @@ -539,8 +510,7 @@ private class SequenceExtends extends DataFlowExtension::DataFlowNode { * Users of the taint tracking library should extend this * class to provide their own sink nodes. */ -abstract class TaintSink extends @py_flow_node { - +abstract class TaintSink extends @py_flow_node { string toString() { result = "Taint sink" } /** @@ -551,134 +521,119 @@ abstract class TaintSink extends @py_flow_node { */ abstract predicate sinks(TaintKind taint); - Location getLocation() { - result = this.(ControlFlowNode).getLocation() - } + Location getLocation() { result = this.(ControlFlowNode).getLocation() } predicate hasLocationInfo(string fp, int bl, int bc, int el, int ec) { this.getLocation().hasLocationInfo(fp, bl, bc, el, ec) } - } -/** Extension for data-flow, to help express data-flow paths that are +/** + * Extension for data-flow, to help express data-flow paths that are * library or framework specific and cannot be inferred by the general * data-flow machinery. */ module DataFlowExtension { - /** A control flow node that modifies the basic data-flow. */ abstract class DataFlowNode extends @py_flow_node { + string toString() { result = "Dataflow extension node" } - string toString() { - result = "Dataflow extension node" - } - - /** Gets a successor node for data-flow. + /** + * Gets a successor node for data-flow. * Data (all forms) is assumed to flow from `this` to `result` */ ControlFlowNode getASuccessorNode() { none() } - /** Gets a successor variable for data-flow. + /** + * Gets a successor variable for data-flow. * Data (all forms) is assumed to flow from `this` to `result`. * Note: This is an unlikely form of flow. See `DataFlowVariable.getASuccessorVariable()` */ EssaVariable getASuccessorVariable() { none() } - /** Holds if data cannot flow from `this` to `succ`, + /** + * Holds if data cannot flow from `this` to `succ`, * even though it would normally do so. */ predicate prunedSuccessor(ControlFlowNode succ) { none() } - /** Gets a successor node, where the successor node will be tainted with `tokind` + /** + * Gets a successor node, where the successor node will be tainted with `tokind` * when `this` is tainted with `fromkind`. * Extensions to `DataFlowNode` should override this to provide additional taint steps. */ ControlFlowNode getASuccessorNode(TaintKind fromkind, TaintKind tokind) { none() } - /** Gets a successor node for data-flow with a change of context from callee to caller + /** + * Gets a successor node for data-flow with a change of context from callee to caller * (going *up* the call-stack) across call-site `call`. * Data (all forms) is assumed to flow from `this` to `result` * Extensions to `DataFlowNode` should override this to provide additional taint steps. */ ControlFlowNode getAReturnSuccessorNode(CallNode call) { none() } - /** Gets a successor node for data-flow with a change of context from caller to callee + /** + * Gets a successor node for data-flow with a change of context from caller to callee * (going *down* the call-stack) across call-site `call`. * Data (all forms) is assumed to flow from `this` to `result` * Extensions to `DataFlowNode` should override this to provide additional taint steps. */ ControlFlowNode getACalleeSuccessorNode(CallNode call) { none() } - } /** Data flow variable that modifies the basic data-flow. */ class DataFlowVariable extends EssaVariable { - - /** Gets a successor node for data-flow. + /** + * Gets a successor node for data-flow. * Data (all forms) is assumed to flow from `this` to `result` * Note: This is an unlikely form of flow. See `DataFlowNode.getASuccessorNode()` */ ControlFlowNode getASuccessorNode() { none() } - /** Gets a successor variable for data-flow. + /** + * Gets a successor variable for data-flow. * Data (all forms) is assumed to flow from `this` to `result`. */ EssaVariable getASuccessorVariable() { none() } - /** Holds if data cannot flow from `this` to `succ`, + /** + * Holds if data cannot flow from `this` to `succ`, * even though it would normally do so. */ predicate prunedSuccessor(EssaVariable succ) { none() } - } } class TaintedPathSource extends TaintTrackingNode { + TaintedPathSource() { this.isSource() } - TaintedPathSource() { - this.isSource() - } - - DataFlow::Node getSource() { - result = this.getNode() - } - + DataFlow::Node getSource() { result = this.getNode() } } class TaintedPathSink extends TaintTrackingNode { + TaintedPathSink() { this.isSink() } - TaintedPathSink() { - this.isSink() - } - - DataFlow::Node getSink() { - result = this.getNode() - } - + DataFlow::Node getSink() { result = this.getNode() } } /* Backwards compatible name */ class TaintedNode = TaintTrackingNode; /* Helpers for Validating classes */ - private import semmle.python.pointsto.PointsTo - -/** Data flow module providing an interface compatible with +/** + * Data flow module providing an interface compatible with * the other language implementations. */ module DataFlow { - - /** Generic taint kind, source and sink classes for convenience and + /** + * Generic taint kind, source and sink classes for convenience and * compatibility with other language libraries */ - class Extension = DataFlowExtension::DataFlowNode; - deprecated abstract class Configuration extends string { - + abstract deprecated class Configuration extends string { bindingset[this] Configuration() { this = this } @@ -702,14 +657,10 @@ module DataFlow { this.hasFlowPath(psource, psink) ) } - } private class ConfigurationAdapter extends TaintTracking::Configuration { - - ConfigurationAdapter() { - this instanceof Configuration - } + ConfigurationAdapter() { this instanceof Configuration } override predicate isSource(DataFlow::Node node, TaintKind kind) { this.(Configuration).isSource(node.asCfgNode()) and @@ -720,16 +671,13 @@ module DataFlow { this.(Configuration).isSink(node.asCfgNode()) and kind instanceof DataFlowType } - } private newtype TDataFlowNode = - TEssaNode(EssaVariable var) - or + TEssaNode(EssaVariable var) or TCfgNode(ControlFlowNode node) abstract class Node extends TDataFlowNode { - abstract ControlFlowNode asCfgNode(); abstract EssaVariable asVariable(); @@ -742,86 +690,51 @@ module DataFlow { abstract Location getLocation(); - AstNode asAstNode() { - result = this.asCfgNode().getNode() - } + AstNode asAstNode() { result = this.asCfgNode().getNode() } /** For backwards compatibility -- Use asAstNode() instead */ - deprecated AstNode getNode() { - result = this.asAstNode() - } - + deprecated AstNode getNode() { result = this.asAstNode() } } class CfgNode extends Node, TCfgNode { + override ControlFlowNode asCfgNode() { this = TCfgNode(result) } - override ControlFlowNode asCfgNode() { - this = TCfgNode(result) - } + override EssaVariable asVariable() { none() } - override EssaVariable asVariable() { - none() - } + override string toString() { result = this.asAstNode().toString() } - override string toString() { - result = this.asAstNode().toString() - } + override Scope getScope() { result = this.asCfgNode().getScope() } - override Scope getScope() { - result = this.asCfgNode().getScope() - } - - override BasicBlock getBasicBlock() { - result = this.asCfgNode().getBasicBlock() - } - - override Location getLocation() { - result = this.asCfgNode().getLocation() - } + override BasicBlock getBasicBlock() { result = this.asCfgNode().getBasicBlock() } + override Location getLocation() { result = this.asCfgNode().getLocation() } } class EssaNode extends Node, TEssaNode { + override ControlFlowNode asCfgNode() { none() } - override ControlFlowNode asCfgNode() { - none() - } + override EssaVariable asVariable() { this = TEssaNode(result) } - override EssaVariable asVariable() { - this = TEssaNode(result) - } + override string toString() { result = this.asVariable().toString() } - override string toString() { - result = this.asVariable().toString() - } - - override Scope getScope() { - result = this.asVariable().getScope() - } + override Scope getScope() { result = this.asVariable().getScope() } override BasicBlock getBasicBlock() { result = this.asVariable().getDefinition().getBasicBlock() } - override Location getLocation() { - result = this.asVariable().getDefinition().getLocation() - } - + override Location getLocation() { result = this.asVariable().getDefinition().getLocation() } } - } private class DataFlowType extends TaintKind { - DataFlowType() { - this = "Data flow" and + this = "Data flow" and exists(DataFlow::Configuration c) } - } - -pragma [noinline] +pragma[noinline] private predicate dict_construct(ControlFlowNode itemnode, ControlFlowNode dictnode) { dictnode.(DictNode).getAValue() = itemnode or @@ -829,7 +742,7 @@ private predicate dict_construct(ControlFlowNode itemnode, ControlFlowNode dictn dictnode.(CallNode).getArgByName(_) = itemnode } -pragma [noinline] +pragma[noinline] private predicate sequence_construct(ControlFlowNode itemnode, ControlFlowNode seqnode) { seqnode.isLoad() and ( @@ -841,13 +754,11 @@ private predicate sequence_construct(ControlFlowNode itemnode, ControlFlowNode s ) } - /* A call to construct a sequence from a sequence or iterator*/ -pragma [noinline] +pragma[noinline] private predicate sequence_call(ControlFlowNode fromnode, CallNode tonode) { tonode.getArg(0) = fromnode and - exists(ControlFlowNode cls | - cls = tonode.getFunction() | + exists(ControlFlowNode cls | cls = tonode.getFunction() | cls.pointsTo(ObjectInternal::builtin("list")) or cls.pointsTo(ObjectInternal::builtin("tuple")) @@ -855,4 +766,3 @@ private predicate sequence_call(ControlFlowNode fromnode, CallNode tonode) { cls.pointsTo(ObjectInternal::builtin("set")) ) } - diff --git a/python/ql/src/semmle/python/essa/Essa.qll b/python/ql/src/semmle/python/essa/Essa.qll index cbc8df53a63..8111c70f071 100644 --- a/python/ql/src/semmle/python/essa/Essa.qll +++ b/python/ql/src/semmle/python/essa/Essa.qll @@ -37,7 +37,7 @@ class EssaVariable extends TEssaDefinition { result = "SSA variable " + this.getName() } - /** Gets a string representation of this variable. + /** Gets a string representation of this variable. * WARNING: The format of this may change and it may be very inefficient to compute. * To used for debugging and testing only. */ @@ -69,7 +69,7 @@ class EssaVariable extends TEssaDefinition { } -/* Helper for location_string +/* Helper for location_string * NOTE: This is Python specific, to make `getRepresentation()` portable will require further work. */ private int exception_handling(BasicBlock b) { @@ -153,10 +153,10 @@ abstract class EssaDefinition extends TEssaDefinition { abstract predicate reachesEndOfBlock(BasicBlock b); /** Gets the location of a control flow node that is indicative of this definition. - * Since definitions may occur on edges of the control flow graph, the given location may + * Since definitions may occur on edges of the control flow graph, the given location may * be imprecise. * Distinct `EssaDefinitions` may return the same ControlFlowNode even for - * the same variable. + * the same variable. */ abstract Location getLocation(); @@ -174,9 +174,9 @@ abstract class EssaDefinition extends TEssaDefinition { } -/** An ESSA definition corresponding to an edge refinement of the underlying variable. +/** An ESSA definition corresponding to an edge refinement of the underlying variable. * For example, the edges leaving a test on a variable both represent refinements of that - * variable. On one edge the test is true, on the other it is false. + * variable. On one edge the test is true, on the other it is false. */ class EssaEdgeRefinement extends EssaDefinition, TEssaEdgeDefinition { @@ -348,7 +348,7 @@ class PhiFunction extends EssaDefinition, TPhiFunction { } private EssaEdgeRefinement piInputDefinition(EssaVariable input) { - input = this.getAnInput() and + input = this.getAnInput() and result = input.getDefinition() or input = this.getAnInput() and result = input.getDefinition().(PhiFunction).piInputDefinition(_) @@ -851,4 +851,3 @@ class PyEdgeRefinement extends EssaEdgeRefinement { } } - diff --git a/python/ql/src/semmle/python/essa/SsaDefinitions.qll b/python/ql/src/semmle/python/essa/SsaDefinitions.qll index 9f7a5fc869b..d759e6df7a3 100644 --- a/python/ql/src/semmle/python/essa/SsaDefinitions.qll +++ b/python/ql/src/semmle/python/essa/SsaDefinitions.qll @@ -1,4 +1,4 @@ -/** Provides classes and predicates for determining the uses and definitions of +/** Provides classes and predicates for determining the uses and definitions of * variables for ESSA form. */ @@ -30,7 +30,7 @@ cached module SsaSource { /** Holds if `v` is defined by a with statement. */ cached predicate with_definition(Variable v, ControlFlowNode defn) { - exists(With with, Name var | + exists(With with, Name var | with.getOptionalVars() = var and var.getAFlowNode() = defn | var = v.getAStore() @@ -39,7 +39,7 @@ cached module SsaSource { /** Holds if `v` is defined by multiple assignment at `defn`. */ cached predicate multi_assignment_definition(Variable v, ControlFlowNode defn, int n, SequenceNode lhs) { - defn.(NameNode).defines(v) and + defn.(NameNode).defines(v) and not exists(defn.(DefinitionNode).getValue()) and lhs.getElement(n) = defn and lhs.getBasicBlock().dominates(defn.getBasicBlock()) From 9763ec71fe1e11fc9dd3fd3236d48141d7889c3f Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Mon, 27 Jan 2020 11:40:01 +0100 Subject: [PATCH 2/8] Python: Add tests for nested assignment --- .../taint/general/Contexts.expected | 3 +++ .../taint/general/TestDefn.expected | 3 +++ .../taint/general/TestSource.expected | 2 ++ .../taint/general/TestStep.expected | 8 ++++++++ .../taint/general/TestTaint.expected | 8 ++++++++ .../library-tests/taint/general/TestTaint.ql | 18 ++++++++++++++++++ .../taint/general/TestVar.expected | 4 ++++ .../library-tests/taint/general/assignment.py | 14 ++++++++++++++ 8 files changed, 60 insertions(+) create mode 100644 python/ql/test/library-tests/taint/general/TestTaint.expected create mode 100644 python/ql/test/library-tests/taint/general/TestTaint.ql create mode 100644 python/ql/test/library-tests/taint/general/assignment.py diff --git a/python/ql/test/library-tests/taint/general/Contexts.expected b/python/ql/test/library-tests/taint/general/Contexts.expected index b2e8065fdcb..f07c1452c6e 100644 --- a/python/ql/test/library-tests/taint/general/Contexts.expected +++ b/python/ql/test/library-tests/taint/general/Contexts.expected @@ -1,5 +1,8 @@ WARNING: Type CallContext has been deprecated and may be removed in future (Contexts.ql:6,6-17) WARNING: Type CallContext has been deprecated and may be removed in future (Contexts.ql:7,14-25) +| assignment.py:1 | p0 = simple.test | Function test | +| assignment.py:1 | p1 = simple.test | Function test | +| assignment.py:1 | p2 = simple.test | Function test | | carrier.py:4 | p1 = explicit.carrier | Function __init__ | | carrier.py:4 | p1 = simple.test | Function __init__ | | carrier.py:10 | p0.attr = simple.test | Function get_attr | diff --git a/python/ql/test/library-tests/taint/general/TestDefn.expected b/python/ql/test/library-tests/taint/general/TestDefn.expected index ce2282a5797..b9b52d5cf25 100644 --- a/python/ql/test/library-tests/taint/general/TestDefn.expected +++ b/python/ql/test/library-tests/taint/general/TestDefn.expected @@ -1,3 +1,6 @@ +| assignment.py:5 | SOURCE | assignment.py:5 | Taint simple.test | a | +| assignment.py:7 | a | assignment.py:7 | Taint simple.test | b | +| assignment.py:13 | SOURCE | assignment.py:13 | Taint simple.test | t2 | | carrier.py:4 | ParameterDefinition | carrier.py:4 | Taint explicit.carrier | arg | | carrier.py:4 | ParameterDefinition | carrier.py:4 | Taint simple.test | arg | | carrier.py:10 | ParameterDefinition | carrier.py:10 | Taint .attr = simple.test | self | diff --git a/python/ql/test/library-tests/taint/general/TestSource.expected b/python/ql/test/library-tests/taint/general/TestSource.expected index 0d45449fbad..6b98431498c 100644 --- a/python/ql/test/library-tests/taint/general/TestSource.expected +++ b/python/ql/test/library-tests/taint/general/TestSource.expected @@ -1,3 +1,5 @@ +| assignment.py:5 | SOURCE | simple.test | +| assignment.py:13 | SOURCE | simple.test | | carrier.py:17 | SOURCE | simple.test | | carrier.py:21 | TAINT_CARRIER_SOURCE | explicit.carrier | | carrier.py:25 | SOURCE | simple.test | diff --git a/python/ql/test/library-tests/taint/general/TestStep.expected b/python/ql/test/library-tests/taint/general/TestStep.expected index 9718d03c84d..eb5e3e827ac 100644 --- a/python/ql/test/library-tests/taint/general/TestStep.expected +++ b/python/ql/test/library-tests/taint/general/TestStep.expected @@ -103,6 +103,7 @@ | scissors | rockpaperscissors.py:29 | SCISSORS | | --> | scissors | rockpaperscissors.py:31 | x | | | scissors | rockpaperscissors.py:30 | x | | --> | paper | rockpaperscissors.py:30 | Attribute() | | | scissors | rockpaperscissors.py:31 | x | | --> | scissors | rockpaperscissors.py:6 | arg | p0 = scissors | +| sequence of simple.test | assignment.py:13 | Tuple | | --> | sequence of [simple.test] | assignment.py:13 | Tuple | | | sequence of simple.test | test.py:168 | List | | --> | sequence of simple.test | test.py:170 | l | | | sequence of simple.test | test.py:168 | List | | --> | sequence of simple.test | test.py:174 | l | | | sequence of simple.test | test.py:170 | SSA variable x | | --> | sequence of simple.test | test.py:172 | x | | @@ -112,6 +113,13 @@ | sequence of simple.test | test.py:208 | List | | --> | sequence of simple.test | test.py:209 | seq | | | sequence of simple.test | test.py:209 | seq | | --> | simple.test | test.py:209 | For | | | sequence of simple.test | test.py:213 | flow_in_generator() | | --> | simple.test | test.py:213 | For | | +| simple.test | assignment.py:5 | SOURCE | | --> | sequence of simple.test | assignment.py:5 | Tuple | | +| simple.test | assignment.py:5 | SOURCE | | --> | simple.test | assignment.py:6 | a | | +| simple.test | assignment.py:5 | SOURCE | | --> | simple.test | assignment.py:7 | a | | +| simple.test | assignment.py:7 | a | | --> | sequence of simple.test | assignment.py:7 | Tuple | | +| simple.test | assignment.py:7 | a | | --> | simple.test | assignment.py:8 | b | | +| simple.test | assignment.py:13 | SOURCE | | --> | sequence of simple.test | assignment.py:13 | Tuple | | +| simple.test | assignment.py:13 | SOURCE | | --> | simple.test | assignment.py:14 | t2 | | | simple.test | carrier.py:4 | arg | p1 = simple.test | --> | simple.test | carrier.py:5 | arg | p1 = simple.test | | simple.test | carrier.py:17 | SOURCE | | --> | .attr = simple.test | carrier.py:17 | ImplicitCarrier() | | | simple.test | carrier.py:17 | SOURCE | | --> | simple.test | carrier.py:4 | arg | p1 = simple.test | diff --git a/python/ql/test/library-tests/taint/general/TestTaint.expected b/python/ql/test/library-tests/taint/general/TestTaint.expected new file mode 100644 index 00000000000..01410a19cad --- /dev/null +++ b/python/ql/test/library-tests/taint/general/TestTaint.expected @@ -0,0 +1,8 @@ +| assignment.py:6 | swap_taint | a | simple.test | +| assignment.py:6 | swap_taint | b | NO TAINT | +| assignment.py:8 | swap_taint | a | NO TAINT | +| assignment.py:8 | swap_taint | b | simple.test | +| assignment.py:14 | nested_assignment | s1 | NO TAINT | +| assignment.py:14 | nested_assignment | s2 | NO TAINT | +| assignment.py:14 | nested_assignment | t1 | NO TAINT | +| assignment.py:14 | nested_assignment | t2 | simple.test | diff --git a/python/ql/test/library-tests/taint/general/TestTaint.ql b/python/ql/test/library-tests/taint/general/TestTaint.ql new file mode 100644 index 00000000000..3b8bf2b4bde --- /dev/null +++ b/python/ql/test/library-tests/taint/general/TestTaint.ql @@ -0,0 +1,18 @@ +import python +import semmle.python.security.TaintTracking +import TaintLib + +from Call call, Expr arg, string taint_string +where + call.getLocation().getFile().getShortName() = "assignment.py" and + call.getFunc().(Name).getId() = "test" and + arg = call.getAnArg() and + ( + not exists(TaintedNode tainted | tainted.getAstNode() = arg) and + taint_string = "NO TAINT" + or + exists(TaintedNode tainted | tainted.getAstNode() = arg | + taint_string = tainted.getTaintKind().toString() + ) + ) +select arg.getLocation().toString(), call.getScope().(Function).getName(), arg.toString(), taint_string diff --git a/python/ql/test/library-tests/taint/general/TestVar.expected b/python/ql/test/library-tests/taint/general/TestVar.expected index f60a1149881..fa20520d229 100644 --- a/python/ql/test/library-tests/taint/general/TestVar.expected +++ b/python/ql/test/library-tests/taint/general/TestVar.expected @@ -1,3 +1,7 @@ +| assignment.py:5 | a_0 | assignment.py:5 | Taint simple.test | +| assignment.py:6 | a_1 | assignment.py:6 | Taint simple.test | +| assignment.py:7 | b_1 | assignment.py:7 | Taint simple.test | +| assignment.py:13 | t2_0 | assignment.py:13 | Taint simple.test | | carrier.py:4 | arg_0 | carrier.py:4 | Taint explicit.carrier | | carrier.py:4 | arg_0 | carrier.py:4 | Taint simple.test | | carrier.py:5 | self_1 | carrier.py:5 | Taint .attr = explicit.carrier | diff --git a/python/ql/test/library-tests/taint/general/assignment.py b/python/ql/test/library-tests/taint/general/assignment.py new file mode 100644 index 00000000000..4eceff9a13e --- /dev/null +++ b/python/ql/test/library-tests/taint/general/assignment.py @@ -0,0 +1,14 @@ +def test(*args): + pass + +def swap_taint(): + a, b = SOURCE, "safe" + test(a, b) + a, b = b, a + test(a, b) + +def nested_assignment(): + # A contrived example, that is a bit silly (and is not even iterable unpacking). + # We do handle this case though. + ((t1, s1), t2, s2) = ((SOURCE, "safe"), SOURCE, "safe") + test(t1, s1, t2, s2) From fa48fb04f5d6656a37899e8f0e9f28a0a340f934 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 24 Jan 2020 15:07:17 +0100 Subject: [PATCH 3/8] Python: Recognize nested tuple/list assignment Now we recognize `[(x,y)] = [(1,2)]` -- in itself not a widely used idiom, but more of a warmup excersize for me --- python/ql/src/semmle/python/Flow.qll | 51 +++++++++++++++---- .../taint/general/TestDefn.expected | 1 + .../taint/general/TestStep.expected | 1 + .../taint/general/TestTaint.expected | 2 +- .../taint/general/TestVar.expected | 2 + 5 files changed, 46 insertions(+), 11 deletions(-) diff --git a/python/ql/src/semmle/python/Flow.qll b/python/ql/src/semmle/python/Flow.qll index df75c04cbbb..8699c0368d7 100755 --- a/python/ql/src/semmle/python/Flow.qll +++ b/python/ql/src/semmle/python/Flow.qll @@ -753,9 +753,8 @@ class DefinitionNode extends ControlFlowNode { or augstore(_, this) or - exists(Assign a | a.getATarget().(Tuple).getAnElt().getAFlowNode() = this) - or - exists(Assign a | a.getATarget().(List).getAnElt().getAFlowNode() = this) + // `x, y = 1, 2` where LHS is a combination of list or tuples + exists(Assign a | list_or_tuple_nested_element(a.getATarget()).getAFlowNode() = this) or exists(For for | for.getTarget().getAFlowNode() = this) } @@ -768,6 +767,18 @@ class DefinitionNode extends ControlFlowNode { } } +private Expr list_or_tuple_nested_element(Expr list_or_tuple) { + exists(Expr elt | + elt = list_or_tuple.(Tuple).getAnElt() + or + elt = list_or_tuple.(List).getAnElt() + | + result = elt + or + result = list_or_tuple_nested_element(elt) + ) +} + /** A control flow node corresponding to a deletion statement, such as `del x`. * There can be multiple `DeletionNode`s for each `Delete` such that each * target has own `DeletionNode`. The CFG for `del a, x.y` looks like: @@ -887,18 +898,38 @@ private AstNode assigned_value(Expr lhs) { /* lhs += x => result = (lhs + x) */ exists(AugAssign a, BinaryExpr b | b = a.getOperation() and result = b and lhs = b.getLeft()) or - /* ..., lhs, ... = ..., result, ... */ - exists(Assign a, Tuple target, Tuple values, int index | - a.getATarget() = target and - a.getValue() = values and - lhs = target.getElt(index) and - result = values.getElt(index) - ) + /* ..., lhs, ... = ..., result, ... + * or + * ..., (..., lhs, ...), ... = ..., (..., result, ...), ... + */ + exists(Assign a | nested_sequence_assign(a.getATarget(), a.getValue(), lhs, result)) or /* for lhs in seq: => `result` is the `for` node, representing the `iter(next(seq))` operation. */ result.(For).getTarget() = lhs } +predicate nested_sequence_assign(Expr left_parent, Expr right_parent, + Expr left_result, Expr right_result) { + exists(int i, Expr left_elem, Expr right_elem + | + ( + left_elem = left_parent.(Tuple).getElt(i) + or + left_elem = left_parent.(List).getElt(i) + ) + and + ( + right_elem = right_parent.(Tuple).getElt(i) + or + right_elem = right_parent.(List).getElt(i) + ) + | + left_result = left_elem and right_result = right_elem + or + nested_sequence_assign(left_elem, right_elem, left_result, right_result) + ) +} + /** A flow node for a `for` statement. */ class ForNode extends ControlFlowNode { diff --git a/python/ql/test/library-tests/taint/general/TestDefn.expected b/python/ql/test/library-tests/taint/general/TestDefn.expected index b9b52d5cf25..a8d0719bb2f 100644 --- a/python/ql/test/library-tests/taint/general/TestDefn.expected +++ b/python/ql/test/library-tests/taint/general/TestDefn.expected @@ -1,5 +1,6 @@ | assignment.py:5 | SOURCE | assignment.py:5 | Taint simple.test | a | | assignment.py:7 | a | assignment.py:7 | Taint simple.test | b | +| assignment.py:13 | SOURCE | assignment.py:13 | Taint simple.test | t1 | | assignment.py:13 | SOURCE | assignment.py:13 | Taint simple.test | t2 | | carrier.py:4 | ParameterDefinition | carrier.py:4 | Taint explicit.carrier | arg | | carrier.py:4 | ParameterDefinition | carrier.py:4 | Taint simple.test | arg | diff --git a/python/ql/test/library-tests/taint/general/TestStep.expected b/python/ql/test/library-tests/taint/general/TestStep.expected index eb5e3e827ac..47ccef17821 100644 --- a/python/ql/test/library-tests/taint/general/TestStep.expected +++ b/python/ql/test/library-tests/taint/general/TestStep.expected @@ -119,6 +119,7 @@ | simple.test | assignment.py:7 | a | | --> | sequence of simple.test | assignment.py:7 | Tuple | | | simple.test | assignment.py:7 | a | | --> | simple.test | assignment.py:8 | b | | | simple.test | assignment.py:13 | SOURCE | | --> | sequence of simple.test | assignment.py:13 | Tuple | | +| simple.test | assignment.py:13 | SOURCE | | --> | simple.test | assignment.py:14 | t1 | | | simple.test | assignment.py:13 | SOURCE | | --> | simple.test | assignment.py:14 | t2 | | | simple.test | carrier.py:4 | arg | p1 = simple.test | --> | simple.test | carrier.py:5 | arg | p1 = simple.test | | simple.test | carrier.py:17 | SOURCE | | --> | .attr = simple.test | carrier.py:17 | ImplicitCarrier() | | diff --git a/python/ql/test/library-tests/taint/general/TestTaint.expected b/python/ql/test/library-tests/taint/general/TestTaint.expected index 01410a19cad..6d3fcbe4491 100644 --- a/python/ql/test/library-tests/taint/general/TestTaint.expected +++ b/python/ql/test/library-tests/taint/general/TestTaint.expected @@ -4,5 +4,5 @@ | assignment.py:8 | swap_taint | b | simple.test | | assignment.py:14 | nested_assignment | s1 | NO TAINT | | assignment.py:14 | nested_assignment | s2 | NO TAINT | -| assignment.py:14 | nested_assignment | t1 | NO TAINT | +| assignment.py:14 | nested_assignment | t1 | simple.test | | assignment.py:14 | nested_assignment | t2 | simple.test | diff --git a/python/ql/test/library-tests/taint/general/TestVar.expected b/python/ql/test/library-tests/taint/general/TestVar.expected index fa20520d229..0aae5c27503 100644 --- a/python/ql/test/library-tests/taint/general/TestVar.expected +++ b/python/ql/test/library-tests/taint/general/TestVar.expected @@ -1,7 +1,9 @@ | assignment.py:5 | a_0 | assignment.py:5 | Taint simple.test | | assignment.py:6 | a_1 | assignment.py:6 | Taint simple.test | | assignment.py:7 | b_1 | assignment.py:7 | Taint simple.test | +| assignment.py:13 | t1_0 | assignment.py:13 | Taint simple.test | | assignment.py:13 | t2_0 | assignment.py:13 | Taint simple.test | +| assignment.py:14 | t1_1 | assignment.py:14 | Taint simple.test | | carrier.py:4 | arg_0 | carrier.py:4 | Taint explicit.carrier | | carrier.py:4 | arg_0 | carrier.py:4 | Taint simple.test | | carrier.py:5 | self_1 | carrier.py:5 | Taint .attr = explicit.carrier | From a3f1f4cb87d5248221c4acddfcae9deee352648d Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 24 Jan 2020 15:00:09 +0100 Subject: [PATCH 4/8] Python: Add iterable unpacking tests --- .../library-tests/taint/unpacking/Taint.qll | 27 +++++++++ .../taint/unpacking/TestStep.expected | 14 +++++ .../library-tests/taint/unpacking/TestStep.ql | 11 ++++ .../taint/unpacking/TestTaint.expected | 33 +++++++++++ .../taint/unpacking/TestTaint.ql | 18 ++++++ .../library-tests/taint/unpacking/test.py | 58 +++++++++++++++++++ 6 files changed, 161 insertions(+) create mode 100644 python/ql/test/library-tests/taint/unpacking/Taint.qll create mode 100644 python/ql/test/library-tests/taint/unpacking/TestStep.expected create mode 100644 python/ql/test/library-tests/taint/unpacking/TestStep.ql create mode 100644 python/ql/test/library-tests/taint/unpacking/TestTaint.expected create mode 100644 python/ql/test/library-tests/taint/unpacking/TestTaint.ql create mode 100644 python/ql/test/library-tests/taint/unpacking/test.py diff --git a/python/ql/test/library-tests/taint/unpacking/Taint.qll b/python/ql/test/library-tests/taint/unpacking/Taint.qll new file mode 100644 index 00000000000..b97f65225f2 --- /dev/null +++ b/python/ql/test/library-tests/taint/unpacking/Taint.qll @@ -0,0 +1,27 @@ +import python +import semmle.python.security.TaintTracking +import semmle.python.security.strings.Untrusted + +class SimpleSource extends TaintSource { + SimpleSource() { this.(NameNode).getId() = "TAINTED_STRING" } + + override predicate isSourceOf(TaintKind kind) { kind instanceof ExternalStringKind } + + override string toString() { result = "taint source" } +} + +class ListSource extends TaintSource { + ListSource() { this.(NameNode).getId() = "TAINTED_LIST" } + + override predicate isSourceOf(TaintKind kind) { kind instanceof ExternalStringSequenceKind } + + override string toString() { result = "list taint source" } +} + +class DictSource extends TaintSource { + DictSource() { this.(NameNode).getId() = "TAINTED_DICT" } + + override predicate isSourceOf(TaintKind kind) { kind instanceof ExternalStringDictKind } + + override string toString() { result = "dict taint source" } +} diff --git a/python/ql/test/library-tests/taint/unpacking/TestStep.expected b/python/ql/test/library-tests/taint/unpacking/TestStep.expected new file mode 100644 index 00000000000..51c4e7fc410 --- /dev/null +++ b/python/ql/test/library-tests/taint/unpacking/TestStep.expected @@ -0,0 +1,14 @@ +| Taint [[externally controlled string]] | test.py:19 | test.py:19:10:19:18 | List | | --> | Taint [[externally controlled string]] | test.py:22 | test.py:22:28:22:29 | ll | | +| Taint [[externally controlled string]] | test.py:19 | test.py:19:10:19:18 | List | | --> | Taint [[externally controlled string]] | test.py:26 | test.py:26:28:26:29 | ll | | +| Taint [[externally controlled string]] | test.py:19 | test.py:19:10:19:18 | List | | --> | Taint [[externally controlled string]] | test.py:30 | test.py:30:28:30:29 | ll | | +| Taint [externally controlled string] | test.py:6 | test.py:6:9:6:20 | TAINTED_LIST | | --> | Taint [externally controlled string] | test.py:7 | test.py:7:15:7:15 | l | | +| Taint [externally controlled string] | test.py:12 | test.py:12:9:12:20 | TAINTED_LIST | | --> | Taint [externally controlled string] | test.py:13 | test.py:13:17:13:17 | l | | +| Taint [externally controlled string] | test.py:18 | test.py:18:9:18:20 | TAINTED_LIST | | --> | Taint [externally controlled string] | test.py:19 | test.py:19:11:19:11 | l | | +| Taint [externally controlled string] | test.py:18 | test.py:18:9:18:20 | TAINTED_LIST | | --> | Taint [externally controlled string] | test.py:19 | test.py:19:14:19:14 | l | | +| Taint [externally controlled string] | test.py:18 | test.py:18:9:18:20 | TAINTED_LIST | | --> | Taint [externally controlled string] | test.py:19 | test.py:19:17:19:17 | l | | +| Taint [externally controlled string] | test.py:19 | test.py:19:11:19:11 | l | | --> | Taint [[externally controlled string]] | test.py:19 | test.py:19:10:19:18 | List | | +| Taint [externally controlled string] | test.py:19 | test.py:19:14:19:14 | l | | --> | Taint [[externally controlled string]] | test.py:19 | test.py:19:10:19:18 | List | | +| Taint [externally controlled string] | test.py:19 | test.py:19:17:19:17 | l | | --> | Taint [[externally controlled string]] | test.py:19 | test.py:19:10:19:18 | List | | +| Taint [externally controlled string] | test.py:43 | test.py:43:20:43:31 | TAINTED_LIST | | --> | Taint [externally controlled string] | test.py:47 | test.py:47:28:47:39 | tainted_list | | +| Taint [externally controlled string] | test.py:47 | test.py:47:28:47:39 | tainted_list | | --> | Taint [[externally controlled string]] | test.py:47 | test.py:47:28:47:54 | Tuple | | +| Taint [externally controlled string] | test.py:55 | test.py:55:27:55:38 | TAINTED_LIST | | --> | Taint [[externally controlled string]] | test.py:55 | test.py:55:25:55:40 | List | | diff --git a/python/ql/test/library-tests/taint/unpacking/TestStep.ql b/python/ql/test/library-tests/taint/unpacking/TestStep.ql new file mode 100644 index 00000000000..e7c014f2eb2 --- /dev/null +++ b/python/ql/test/library-tests/taint/unpacking/TestStep.ql @@ -0,0 +1,11 @@ +import python +import semmle.python.security.TaintTracking +import Taint + +from TaintedNode n, TaintedNode s +where + n.getLocation().getFile().getShortName() = "test.py" and + s.getLocation().getFile().getShortName() = "test.py" and + s = n.getASuccessor() +select "Taint " + n.getTaintKind(), n.getLocation().toString(), n.getAstNode(), n.getContext(), + " --> ", "Taint " + s.getTaintKind(), s.getLocation().toString(), s.getAstNode(), s.getContext() diff --git a/python/ql/test/library-tests/taint/unpacking/TestTaint.expected b/python/ql/test/library-tests/taint/unpacking/TestTaint.expected new file mode 100644 index 00000000000..dc704043c7d --- /dev/null +++ b/python/ql/test/library-tests/taint/unpacking/TestTaint.expected @@ -0,0 +1,33 @@ +| test.py:8 | unpacking | a | NO TAINT | +| test.py:8 | unpacking | b | NO TAINT | +| test.py:8 | unpacking | c | NO TAINT | +| test.py:14 | unpacking_to_list | a | NO TAINT | +| test.py:14 | unpacking_to_list | b | NO TAINT | +| test.py:14 | unpacking_to_list | c | NO TAINT | +| test.py:23 | nested | a1 | NO TAINT | +| test.py:23 | nested | a2 | NO TAINT | +| test.py:23 | nested | a3 | NO TAINT | +| test.py:23 | nested | b | NO TAINT | +| test.py:23 | nested | c | NO TAINT | +| test.py:27 | nested | a1 | NO TAINT | +| test.py:27 | nested | a2 | NO TAINT | +| test.py:27 | nested | a3 | NO TAINT | +| test.py:27 | nested | b | NO TAINT | +| test.py:27 | nested | c | NO TAINT | +| test.py:31 | nested | a1 | NO TAINT | +| test.py:31 | nested | a2 | NO TAINT | +| test.py:31 | nested | a3 | NO TAINT | +| test.py:31 | nested | b | NO TAINT | +| test.py:31 | nested | c | NO TAINT | +| test.py:38 | unpack_from_set | a | NO TAINT | +| test.py:38 | unpack_from_set | b | NO TAINT | +| test.py:38 | unpack_from_set | c | NO TAINT | +| test.py:48 | contrived_1 | a | NO TAINT | +| test.py:48 | contrived_1 | b | NO TAINT | +| test.py:48 | contrived_1 | c | NO TAINT | +| test.py:48 | contrived_1 | d | NO TAINT | +| test.py:48 | contrived_1 | e | NO TAINT | +| test.py:48 | contrived_1 | f | NO TAINT | +| test.py:56 | contrived_2 | a | NO TAINT | +| test.py:56 | contrived_2 | b | NO TAINT | +| test.py:56 | contrived_2 | c | NO TAINT | diff --git a/python/ql/test/library-tests/taint/unpacking/TestTaint.ql b/python/ql/test/library-tests/taint/unpacking/TestTaint.ql new file mode 100644 index 00000000000..92657b1fef9 --- /dev/null +++ b/python/ql/test/library-tests/taint/unpacking/TestTaint.ql @@ -0,0 +1,18 @@ +import python +import semmle.python.security.TaintTracking +import Taint + +from Call call, Expr arg, string taint_string +where + call.getLocation().getFile().getShortName() = "test.py" and + call.getFunc().(Name).getId() = "test" and + arg = call.getAnArg() and + ( + not exists(TaintedNode tainted | tainted.getAstNode() = arg) and + taint_string = "NO TAINT" + or + exists(TaintedNode tainted | tainted.getAstNode() = arg | + taint_string = tainted.getTaintKind().toString() + ) + ) +select arg.getLocation().toString(), call.getScope().(Function).getName(), arg.toString(), taint_string diff --git a/python/ql/test/library-tests/taint/unpacking/test.py b/python/ql/test/library-tests/taint/unpacking/test.py new file mode 100644 index 00000000000..df3961ad6ab --- /dev/null +++ b/python/ql/test/library-tests/taint/unpacking/test.py @@ -0,0 +1,58 @@ +def test(*args): + pass + + +def unpacking(): + l = TAINTED_LIST + a, b, c = l + test(a, b, c) + + +def unpacking_to_list(): + l = TAINTED_LIST + [a, b, c] = l + test(a, b, c) + + +def nested(): + l = TAINTED_LIST + ll = [l, l, l] + + # list + [[a1, a2, a3], b, c] = ll + test(a1, a2, a3, b, c) + + # tuple + ((a1, a2, a3), b, c) = ll + test(a1, a2, a3, b, c) + + # mixed + [(a1, a2, a3), b, c] = ll + test(a1, a2, a3, b, c) + + +def unpack_from_set(): + # no guarantee on ordering ... don't know why you would ever do this + a, b, c = {"foo", "bar", TAINTED_STRING} + # either all should be tainted, or none of them + test(a, b, c) + + +def contrived_1(): + # A contrived example. Don't know why anyone would ever actually do this. + tainted_list = TAINTED_LIST + no_taint_list = [1,2,3] + + # We don't handle this case currently, since we mark `d`, `e` and `f` as tainted. + (a, b, c), (d, e, f) = tainted_list, no_taint_list + test(a, b, c, d, e, f) + + +def contrived_2(): + # A contrived example. Don't know why anyone would ever actually do this. + + # We currently only handle taint nested 2 levels. + [[[ (a,b,c) ]]] = [[[ TAINTED_LIST ]]] + test(a, b, c) + +# For Python 3, see https://www.python.org/dev/peps/pep-3132/ From 781024d67955ac72947eb83beff29146b2077ede Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Mon, 27 Jan 2020 12:55:23 +0100 Subject: [PATCH 5/8] Python: Recognize taint for iterable unpacking --- .../semmle/python/dataflow/Implementation.qll | 28 ++++++++++ .../taint/collections/TestStep.expected | 3 ++ .../taint/collections/TestTaint.expected | 6 +-- .../library-tests/taint/collections/test.py | 2 +- .../taint/unpacking/TestStep.expected | 27 ++++++++++ .../taint/unpacking/TestTaint.expected | 54 +++++++++---------- 6 files changed, 89 insertions(+), 31 deletions(-) diff --git a/python/ql/src/semmle/python/dataflow/Implementation.qll b/python/ql/src/semmle/python/dataflow/Implementation.qll index dc569904b47..1267aab6078 100644 --- a/python/ql/src/semmle/python/dataflow/Implementation.qll +++ b/python/ql/src/semmle/python/dataflow/Implementation.qll @@ -658,6 +658,8 @@ private class EssaTaintTracking extends string { or this.taintedAssignment(src, defn, context, path, kind) or + this.taintedMultiAssignment(src, defn, context, path, kind) + or this.taintedAttributeAssignment(src, defn, context, path, kind) or this.taintedParameterDefinition(src, defn, context, path, kind) @@ -705,6 +707,32 @@ private class EssaTaintTracking extends string { ) } + pragma[noinline] + private predicate taintedMultiAssignment( + TaintTrackingNode src, MultiAssignmentDefinition defn, TaintTrackingContext context, + AttributePath path, TaintKind kind + ) { + exists(DataFlow::Node srcnode, TaintKind srckind, Assign assign | + src = TTaintTrackingNode_(srcnode, context, path, srckind, this) and + path.noAttribute() + | + assign.getValue().getAFlowNode() = srcnode.asCfgNode() and + kind = iterable_unpacking_decent(assign.getATarget().getAFlowNode(), defn.getDefiningNode(), + srckind) + ) + } + + /** `((x,y), ...) = value` with any nesting on LHS */ + private TaintKind iterable_unpacking_decent( + SequenceNode left_parent, ControlFlowNode left_defn, CollectionKind parent_kind + ) { + left_parent.getAnElement() = left_defn and + result = parent_kind.getMember() + or + result = iterable_unpacking_decent(left_parent.getAnElement(), left_defn, + parent_kind.getMember()) + } + pragma[noinline] private predicate taintedAttributeAssignment( TaintTrackingNode src, AttributeAssignment defn, TaintTrackingContext context, diff --git a/python/ql/test/library-tests/taint/collections/TestStep.expected b/python/ql/test/library-tests/taint/collections/TestStep.expected index 609c7c3bcd1..a9591db0b9f 100644 --- a/python/ql/test/library-tests/taint/collections/TestStep.expected +++ b/python/ql/test/library-tests/taint/collections/TestStep.expected @@ -26,6 +26,9 @@ | Taint [externally controlled string] | test.py:29 | test.py:29:9:29:25 | Subscript | | --> | Taint [externally controlled string] | test.py:32 | test.py:32:16:32:16 | c | | | Taint [externally controlled string] | test.py:30 | test.py:30:9:30:20 | tainted_list | | --> | Taint [externally controlled string] | test.py:30 | test.py:30:9:30:27 | Attribute() | | | Taint [externally controlled string] | test.py:30 | test.py:30:9:30:27 | Attribute() | | --> | Taint [externally controlled string] | test.py:32 | test.py:32:19:32:19 | d | | +| Taint [externally controlled string] | test.py:31 | test.py:31:15:31:26 | tainted_list | | --> | Taint externally controlled string | test.py:32 | test.py:32:22:32:22 | e | | +| Taint [externally controlled string] | test.py:31 | test.py:31:15:31:26 | tainted_list | | --> | Taint externally controlled string | test.py:32 | test.py:32:25:32:25 | f | | +| Taint [externally controlled string] | test.py:31 | test.py:31:15:31:26 | tainted_list | | --> | Taint externally controlled string | test.py:32 | test.py:32:28:32:28 | g | | | Taint [externally controlled string] | test.py:33 | test.py:33:14:33:25 | tainted_list | | --> | Taint externally controlled string | test.py:33 | test.py:33:5:33:26 | For | | | Taint [externally controlled string] | test.py:35 | test.py:35:14:35:35 | reversed() | | --> | Taint externally controlled string | test.py:35 | test.py:35:5:35:36 | For | | | Taint [externally controlled string] | test.py:35 | test.py:35:23:35:34 | tainted_list | | --> | Taint [externally controlled string] | test.py:35 | test.py:35:14:35:35 | reversed() | | diff --git a/python/ql/test/library-tests/taint/collections/TestTaint.expected b/python/ql/test/library-tests/taint/collections/TestTaint.expected index 6f3ca9515d1..aae51236e6d 100644 --- a/python/ql/test/library-tests/taint/collections/TestTaint.expected +++ b/python/ql/test/library-tests/taint/collections/TestTaint.expected @@ -10,9 +10,9 @@ | test.py:32 | test_access | b | externally controlled string | | test.py:32 | test_access | c | [externally controlled string] | | test.py:32 | test_access | d | [externally controlled string] | -| test.py:32 | test_access | e | NO TAINT | -| test.py:32 | test_access | f | NO TAINT | -| test.py:32 | test_access | g | NO TAINT | +| test.py:32 | test_access | e | externally controlled string | +| test.py:32 | test_access | f | externally controlled string | +| test.py:32 | test_access | g | externally controlled string | | test.py:34 | test_access | h | externally controlled string | | test.py:36 | test_access | i | externally controlled string | | test.py:43 | test_dict_access | a | externally controlled string | diff --git a/python/ql/test/library-tests/taint/collections/test.py b/python/ql/test/library-tests/taint/collections/test.py index 0778e8afacf..445effe19ce 100644 --- a/python/ql/test/library-tests/taint/collections/test.py +++ b/python/ql/test/library-tests/taint/collections/test.py @@ -28,7 +28,7 @@ def test_access(): b = tainted_list[x] c = tainted_list[y:z] d = tainted_list.copy() - e, f, g = tainted_list # TODO: currently not handled + e, f, g = tainted_list test(a, b, c, d, e, f, g) for h in tainted_list: test(h) diff --git a/python/ql/test/library-tests/taint/unpacking/TestStep.expected b/python/ql/test/library-tests/taint/unpacking/TestStep.expected index 51c4e7fc410..5d800e6b5b8 100644 --- a/python/ql/test/library-tests/taint/unpacking/TestStep.expected +++ b/python/ql/test/library-tests/taint/unpacking/TestStep.expected @@ -1,8 +1,35 @@ | Taint [[externally controlled string]] | test.py:19 | test.py:19:10:19:18 | List | | --> | Taint [[externally controlled string]] | test.py:22 | test.py:22:28:22:29 | ll | | | Taint [[externally controlled string]] | test.py:19 | test.py:19:10:19:18 | List | | --> | Taint [[externally controlled string]] | test.py:26 | test.py:26:28:26:29 | ll | | | Taint [[externally controlled string]] | test.py:19 | test.py:19:10:19:18 | List | | --> | Taint [[externally controlled string]] | test.py:30 | test.py:30:28:30:29 | ll | | +| Taint [[externally controlled string]] | test.py:22 | test.py:22:28:22:29 | ll | | --> | Taint [externally controlled string] | test.py:23 | test.py:23:22:23:22 | b | | +| Taint [[externally controlled string]] | test.py:22 | test.py:22:28:22:29 | ll | | --> | Taint [externally controlled string] | test.py:23 | test.py:23:25:23:25 | c | | +| Taint [[externally controlled string]] | test.py:22 | test.py:22:28:22:29 | ll | | --> | Taint externally controlled string | test.py:23 | test.py:23:10:23:11 | a1 | | +| Taint [[externally controlled string]] | test.py:22 | test.py:22:28:22:29 | ll | | --> | Taint externally controlled string | test.py:23 | test.py:23:14:23:15 | a2 | | +| Taint [[externally controlled string]] | test.py:22 | test.py:22:28:22:29 | ll | | --> | Taint externally controlled string | test.py:23 | test.py:23:18:23:19 | a3 | | +| Taint [[externally controlled string]] | test.py:26 | test.py:26:28:26:29 | ll | | --> | Taint [externally controlled string] | test.py:27 | test.py:27:22:27:22 | b | | +| Taint [[externally controlled string]] | test.py:26 | test.py:26:28:26:29 | ll | | --> | Taint [externally controlled string] | test.py:27 | test.py:27:25:27:25 | c | | +| Taint [[externally controlled string]] | test.py:26 | test.py:26:28:26:29 | ll | | --> | Taint externally controlled string | test.py:27 | test.py:27:10:27:11 | a1 | | +| Taint [[externally controlled string]] | test.py:26 | test.py:26:28:26:29 | ll | | --> | Taint externally controlled string | test.py:27 | test.py:27:14:27:15 | a2 | | +| Taint [[externally controlled string]] | test.py:26 | test.py:26:28:26:29 | ll | | --> | Taint externally controlled string | test.py:27 | test.py:27:18:27:19 | a3 | | +| Taint [[externally controlled string]] | test.py:30 | test.py:30:28:30:29 | ll | | --> | Taint [externally controlled string] | test.py:31 | test.py:31:22:31:22 | b | | +| Taint [[externally controlled string]] | test.py:30 | test.py:30:28:30:29 | ll | | --> | Taint [externally controlled string] | test.py:31 | test.py:31:25:31:25 | c | | +| Taint [[externally controlled string]] | test.py:30 | test.py:30:28:30:29 | ll | | --> | Taint externally controlled string | test.py:31 | test.py:31:10:31:11 | a1 | | +| Taint [[externally controlled string]] | test.py:30 | test.py:30:28:30:29 | ll | | --> | Taint externally controlled string | test.py:31 | test.py:31:14:31:15 | a2 | | +| Taint [[externally controlled string]] | test.py:30 | test.py:30:28:30:29 | ll | | --> | Taint externally controlled string | test.py:31 | test.py:31:18:31:19 | a3 | | +| Taint [[externally controlled string]] | test.py:47 | test.py:47:28:47:54 | Tuple | | --> | Taint externally controlled string | test.py:48 | test.py:48:10:48:10 | a | | +| Taint [[externally controlled string]] | test.py:47 | test.py:47:28:47:54 | Tuple | | --> | Taint externally controlled string | test.py:48 | test.py:48:13:48:13 | b | | +| Taint [[externally controlled string]] | test.py:47 | test.py:47:28:47:54 | Tuple | | --> | Taint externally controlled string | test.py:48 | test.py:48:16:48:16 | c | | +| Taint [[externally controlled string]] | test.py:47 | test.py:47:28:47:54 | Tuple | | --> | Taint externally controlled string | test.py:48 | test.py:48:19:48:19 | d | | +| Taint [[externally controlled string]] | test.py:47 | test.py:47:28:47:54 | Tuple | | --> | Taint externally controlled string | test.py:48 | test.py:48:22:48:22 | e | | +| Taint [[externally controlled string]] | test.py:47 | test.py:47:28:47:54 | Tuple | | --> | Taint externally controlled string | test.py:48 | test.py:48:25:48:25 | f | | | Taint [externally controlled string] | test.py:6 | test.py:6:9:6:20 | TAINTED_LIST | | --> | Taint [externally controlled string] | test.py:7 | test.py:7:15:7:15 | l | | +| Taint [externally controlled string] | test.py:7 | test.py:7:15:7:15 | l | | --> | Taint externally controlled string | test.py:8 | test.py:8:10:8:10 | a | | +| Taint [externally controlled string] | test.py:7 | test.py:7:15:7:15 | l | | --> | Taint externally controlled string | test.py:8 | test.py:8:13:8:13 | b | | +| Taint [externally controlled string] | test.py:7 | test.py:7:15:7:15 | l | | --> | Taint externally controlled string | test.py:8 | test.py:8:16:8:16 | c | | | Taint [externally controlled string] | test.py:12 | test.py:12:9:12:20 | TAINTED_LIST | | --> | Taint [externally controlled string] | test.py:13 | test.py:13:17:13:17 | l | | +| Taint [externally controlled string] | test.py:13 | test.py:13:17:13:17 | l | | --> | Taint externally controlled string | test.py:14 | test.py:14:10:14:10 | a | | +| Taint [externally controlled string] | test.py:13 | test.py:13:17:13:17 | l | | --> | Taint externally controlled string | test.py:14 | test.py:14:13:14:13 | b | | +| Taint [externally controlled string] | test.py:13 | test.py:13:17:13:17 | l | | --> | Taint externally controlled string | test.py:14 | test.py:14:16:14:16 | c | | | Taint [externally controlled string] | test.py:18 | test.py:18:9:18:20 | TAINTED_LIST | | --> | Taint [externally controlled string] | test.py:19 | test.py:19:11:19:11 | l | | | Taint [externally controlled string] | test.py:18 | test.py:18:9:18:20 | TAINTED_LIST | | --> | Taint [externally controlled string] | test.py:19 | test.py:19:14:19:14 | l | | | Taint [externally controlled string] | test.py:18 | test.py:18:9:18:20 | TAINTED_LIST | | --> | Taint [externally controlled string] | test.py:19 | test.py:19:17:19:17 | l | | diff --git a/python/ql/test/library-tests/taint/unpacking/TestTaint.expected b/python/ql/test/library-tests/taint/unpacking/TestTaint.expected index dc704043c7d..d1bad70f811 100644 --- a/python/ql/test/library-tests/taint/unpacking/TestTaint.expected +++ b/python/ql/test/library-tests/taint/unpacking/TestTaint.expected @@ -1,33 +1,33 @@ -| test.py:8 | unpacking | a | NO TAINT | -| test.py:8 | unpacking | b | NO TAINT | -| test.py:8 | unpacking | c | NO TAINT | -| test.py:14 | unpacking_to_list | a | NO TAINT | -| test.py:14 | unpacking_to_list | b | NO TAINT | -| test.py:14 | unpacking_to_list | c | NO TAINT | -| test.py:23 | nested | a1 | NO TAINT | -| test.py:23 | nested | a2 | NO TAINT | -| test.py:23 | nested | a3 | NO TAINT | -| test.py:23 | nested | b | NO TAINT | -| test.py:23 | nested | c | NO TAINT | -| test.py:27 | nested | a1 | NO TAINT | -| test.py:27 | nested | a2 | NO TAINT | -| test.py:27 | nested | a3 | NO TAINT | -| test.py:27 | nested | b | NO TAINT | -| test.py:27 | nested | c | NO TAINT | -| test.py:31 | nested | a1 | NO TAINT | -| test.py:31 | nested | a2 | NO TAINT | -| test.py:31 | nested | a3 | NO TAINT | -| test.py:31 | nested | b | NO TAINT | -| test.py:31 | nested | c | NO TAINT | +| test.py:8 | unpacking | a | externally controlled string | +| test.py:8 | unpacking | b | externally controlled string | +| test.py:8 | unpacking | c | externally controlled string | +| test.py:14 | unpacking_to_list | a | externally controlled string | +| test.py:14 | unpacking_to_list | b | externally controlled string | +| test.py:14 | unpacking_to_list | c | externally controlled string | +| test.py:23 | nested | a1 | externally controlled string | +| test.py:23 | nested | a2 | externally controlled string | +| test.py:23 | nested | a3 | externally controlled string | +| test.py:23 | nested | b | [externally controlled string] | +| test.py:23 | nested | c | [externally controlled string] | +| test.py:27 | nested | a1 | externally controlled string | +| test.py:27 | nested | a2 | externally controlled string | +| test.py:27 | nested | a3 | externally controlled string | +| test.py:27 | nested | b | [externally controlled string] | +| test.py:27 | nested | c | [externally controlled string] | +| test.py:31 | nested | a1 | externally controlled string | +| test.py:31 | nested | a2 | externally controlled string | +| test.py:31 | nested | a3 | externally controlled string | +| test.py:31 | nested | b | [externally controlled string] | +| test.py:31 | nested | c | [externally controlled string] | | test.py:38 | unpack_from_set | a | NO TAINT | | test.py:38 | unpack_from_set | b | NO TAINT | | test.py:38 | unpack_from_set | c | NO TAINT | -| test.py:48 | contrived_1 | a | NO TAINT | -| test.py:48 | contrived_1 | b | NO TAINT | -| test.py:48 | contrived_1 | c | NO TAINT | -| test.py:48 | contrived_1 | d | NO TAINT | -| test.py:48 | contrived_1 | e | NO TAINT | -| test.py:48 | contrived_1 | f | NO TAINT | +| test.py:48 | contrived_1 | a | externally controlled string | +| test.py:48 | contrived_1 | b | externally controlled string | +| test.py:48 | contrived_1 | c | externally controlled string | +| test.py:48 | contrived_1 | d | externally controlled string | +| test.py:48 | contrived_1 | e | externally controlled string | +| test.py:48 | contrived_1 | f | externally controlled string | | test.py:56 | contrived_2 | a | NO TAINT | | test.py:56 | contrived_2 | b | NO TAINT | | test.py:56 | contrived_2 | c | NO TAINT | From 1b670354b2b0cf4379d224cc687c7d45784cafb6 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Mon, 27 Jan 2020 15:21:11 +0100 Subject: [PATCH 6/8] Python: Add tests for extended iterable unpacking --- .../3/library-tests/taint/unpacking/Taint.qll | 27 +++++++++++++++++++ .../taint/unpacking/TestTaint.expected | 6 +++++ .../taint/unpacking/TestTaint.ql | 18 +++++++++++++ .../3/library-tests/taint/unpacking/test.py | 24 +++++++++++++++++ 4 files changed, 75 insertions(+) create mode 100644 python/ql/test/3/library-tests/taint/unpacking/Taint.qll create mode 100644 python/ql/test/3/library-tests/taint/unpacking/TestTaint.expected create mode 100644 python/ql/test/3/library-tests/taint/unpacking/TestTaint.ql create mode 100644 python/ql/test/3/library-tests/taint/unpacking/test.py diff --git a/python/ql/test/3/library-tests/taint/unpacking/Taint.qll b/python/ql/test/3/library-tests/taint/unpacking/Taint.qll new file mode 100644 index 00000000000..b97f65225f2 --- /dev/null +++ b/python/ql/test/3/library-tests/taint/unpacking/Taint.qll @@ -0,0 +1,27 @@ +import python +import semmle.python.security.TaintTracking +import semmle.python.security.strings.Untrusted + +class SimpleSource extends TaintSource { + SimpleSource() { this.(NameNode).getId() = "TAINTED_STRING" } + + override predicate isSourceOf(TaintKind kind) { kind instanceof ExternalStringKind } + + override string toString() { result = "taint source" } +} + +class ListSource extends TaintSource { + ListSource() { this.(NameNode).getId() = "TAINTED_LIST" } + + override predicate isSourceOf(TaintKind kind) { kind instanceof ExternalStringSequenceKind } + + override string toString() { result = "list taint source" } +} + +class DictSource extends TaintSource { + DictSource() { this.(NameNode).getId() = "TAINTED_DICT" } + + override predicate isSourceOf(TaintKind kind) { kind instanceof ExternalStringDictKind } + + override string toString() { result = "dict taint source" } +} diff --git a/python/ql/test/3/library-tests/taint/unpacking/TestTaint.expected b/python/ql/test/3/library-tests/taint/unpacking/TestTaint.expected new file mode 100644 index 00000000000..396f7d6e65c --- /dev/null +++ b/python/ql/test/3/library-tests/taint/unpacking/TestTaint.expected @@ -0,0 +1,6 @@ +| test.py:11 | extended_unpacking | first | externally controlled string | +| test.py:11 | extended_unpacking | last | externally controlled string | +| test.py:11 | extended_unpacking | rest | NO TAINT | +| test.py:16 | also_allowed | a | NO TAINT | +| test.py:24 | also_allowed | b | NO TAINT | +| test.py:24 | also_allowed | c | NO TAINT | diff --git a/python/ql/test/3/library-tests/taint/unpacking/TestTaint.ql b/python/ql/test/3/library-tests/taint/unpacking/TestTaint.ql new file mode 100644 index 00000000000..92657b1fef9 --- /dev/null +++ b/python/ql/test/3/library-tests/taint/unpacking/TestTaint.ql @@ -0,0 +1,18 @@ +import python +import semmle.python.security.TaintTracking +import Taint + +from Call call, Expr arg, string taint_string +where + call.getLocation().getFile().getShortName() = "test.py" and + call.getFunc().(Name).getId() = "test" and + arg = call.getAnArg() and + ( + not exists(TaintedNode tainted | tainted.getAstNode() = arg) and + taint_string = "NO TAINT" + or + exists(TaintedNode tainted | tainted.getAstNode() = arg | + taint_string = tainted.getTaintKind().toString() + ) + ) +select arg.getLocation().toString(), call.getScope().(Function).getName(), arg.toString(), taint_string diff --git a/python/ql/test/3/library-tests/taint/unpacking/test.py b/python/ql/test/3/library-tests/taint/unpacking/test.py new file mode 100644 index 00000000000..5ae3f114eaf --- /dev/null +++ b/python/ql/test/3/library-tests/taint/unpacking/test.py @@ -0,0 +1,24 @@ +# Extended Iterable Unpacking -- PEP 3132 +# https://www.python.org/dev/peps/pep-3132/ + + +def test(*args): + pass + + +def extended_unpacking(): + first, *rest, last = TAINTED_LIST + test(first, rest, last) # TODO: mark `rest` as [taint] + + +def also_allowed(): + *a, = TAINTED_LIST + test(a) # TODO: mark `a` as [taint] + + # for b, *c in [(1, 2, 3), (4, 5, 6, 7)]: + # print(c) + # i=0; c=[2,3] + # i=1; c=[5,6,7] + + for b, *c in [TAINTED_LIST, TAINTED_LIST]: + test(b, c) # TODO: mark `c` as [taint] From 081d66eaa38a911e06c4606ae6c18eb0a55c2c82 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Mon, 27 Jan 2020 15:22:01 +0100 Subject: [PATCH 7/8] Python: Recognize taint for extended iterable unpacking --- python/ql/src/semmle/python/Flow.qll | 11 +++++++++++ .../ql/src/semmle/python/dataflow/Implementation.qll | 5 ++++- python/ql/src/semmle/python/essa/SsaDefinitions.qll | 6 +++++- .../library-tests/taint/unpacking/TestTaint.expected | 7 +++++-- .../ql/test/3/library-tests/taint/unpacking/test.py | 11 +++++++++-- 5 files changed, 34 insertions(+), 6 deletions(-) diff --git a/python/ql/src/semmle/python/Flow.qll b/python/ql/src/semmle/python/Flow.qll index 8699c0368d7..c81e5e040e4 100755 --- a/python/ql/src/semmle/python/Flow.qll +++ b/python/ql/src/semmle/python/Flow.qll @@ -1068,6 +1068,17 @@ class NameConstantNode extends NameNode { */ } +/** A control flow node correspoinding to a starred expression, `*a`. */ +class StarredNode extends ControlFlowNode { + StarredNode() { + toAst(this) instanceof Starred + } + + ControlFlowNode getValue() { + toAst(result) = toAst(this).(Starred).getValue() + } +} + private module Scopes { private predicate fast_local(NameNode n) { diff --git a/python/ql/src/semmle/python/dataflow/Implementation.qll b/python/ql/src/semmle/python/dataflow/Implementation.qll index 1267aab6078..5978c16297e 100644 --- a/python/ql/src/semmle/python/dataflow/Implementation.qll +++ b/python/ql/src/semmle/python/dataflow/Implementation.qll @@ -727,7 +727,10 @@ private class EssaTaintTracking extends string { SequenceNode left_parent, ControlFlowNode left_defn, CollectionKind parent_kind ) { left_parent.getAnElement() = left_defn and - result = parent_kind.getMember() + // Handle `a, *b = some_iterable` + if left_defn instanceof StarredNode + then result = parent_kind + else result = parent_kind.getMember() or result = iterable_unpacking_decent(left_parent.getAnElement(), left_defn, parent_kind.getMember()) diff --git a/python/ql/src/semmle/python/essa/SsaDefinitions.qll b/python/ql/src/semmle/python/essa/SsaDefinitions.qll index d759e6df7a3..8a6bc274aa0 100644 --- a/python/ql/src/semmle/python/essa/SsaDefinitions.qll +++ b/python/ql/src/semmle/python/essa/SsaDefinitions.qll @@ -39,7 +39,11 @@ cached module SsaSource { /** Holds if `v` is defined by multiple assignment at `defn`. */ cached predicate multi_assignment_definition(Variable v, ControlFlowNode defn, int n, SequenceNode lhs) { - defn.(NameNode).defines(v) and + ( + defn.(NameNode).defines(v) + or + defn.(StarredNode).getValue().(NameNode).defines(v) + ) and not exists(defn.(DefinitionNode).getValue()) and lhs.getElement(n) = defn and lhs.getBasicBlock().dominates(defn.getBasicBlock()) diff --git a/python/ql/test/3/library-tests/taint/unpacking/TestTaint.expected b/python/ql/test/3/library-tests/taint/unpacking/TestTaint.expected index 396f7d6e65c..b6aacc7d670 100644 --- a/python/ql/test/3/library-tests/taint/unpacking/TestTaint.expected +++ b/python/ql/test/3/library-tests/taint/unpacking/TestTaint.expected @@ -1,6 +1,9 @@ | test.py:11 | extended_unpacking | first | externally controlled string | | test.py:11 | extended_unpacking | last | externally controlled string | -| test.py:11 | extended_unpacking | rest | NO TAINT | -| test.py:16 | also_allowed | a | NO TAINT | +| test.py:11 | extended_unpacking | rest | [externally controlled string] | +| test.py:16 | also_allowed | a | [externally controlled string] | | test.py:24 | also_allowed | b | NO TAINT | | test.py:24 | also_allowed | c | NO TAINT | +| test.py:31 | nested | x | externally controlled string | +| test.py:31 | nested | xs | [externally controlled string] | +| test.py:31 | nested | ys | [externally controlled string] | diff --git a/python/ql/test/3/library-tests/taint/unpacking/test.py b/python/ql/test/3/library-tests/taint/unpacking/test.py index 5ae3f114eaf..6807ddebee8 100644 --- a/python/ql/test/3/library-tests/taint/unpacking/test.py +++ b/python/ql/test/3/library-tests/taint/unpacking/test.py @@ -8,12 +8,12 @@ def test(*args): def extended_unpacking(): first, *rest, last = TAINTED_LIST - test(first, rest, last) # TODO: mark `rest` as [taint] + test(first, rest, last) def also_allowed(): *a, = TAINTED_LIST - test(a) # TODO: mark `a` as [taint] + test(a) # for b, *c in [(1, 2, 3), (4, 5, 6, 7)]: # print(c) @@ -22,3 +22,10 @@ def also_allowed(): for b, *c in [TAINTED_LIST, TAINTED_LIST]: test(b, c) # TODO: mark `c` as [taint] + +def nested(): + l = TAINTED_LIST + ll = [l,l] + + [[x, *xs], ys] = ll + test(x, xs, ys) From 1558cf2eae2727b908b337e5a10472590e46512d Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 13 Feb 2020 13:35:29 +0100 Subject: [PATCH 8/8] Python: Fix typo (decent => descent) --- python/ql/src/semmle/python/dataflow/Implementation.qll | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/ql/src/semmle/python/dataflow/Implementation.qll b/python/ql/src/semmle/python/dataflow/Implementation.qll index 5978c16297e..b2326f2d100 100644 --- a/python/ql/src/semmle/python/dataflow/Implementation.qll +++ b/python/ql/src/semmle/python/dataflow/Implementation.qll @@ -717,13 +717,13 @@ private class EssaTaintTracking extends string { path.noAttribute() | assign.getValue().getAFlowNode() = srcnode.asCfgNode() and - kind = iterable_unpacking_decent(assign.getATarget().getAFlowNode(), defn.getDefiningNode(), + kind = iterable_unpacking_descent(assign.getATarget().getAFlowNode(), defn.getDefiningNode(), srckind) ) } /** `((x,y), ...) = value` with any nesting on LHS */ - private TaintKind iterable_unpacking_decent( + private TaintKind iterable_unpacking_descent( SequenceNode left_parent, ControlFlowNode left_defn, CollectionKind parent_kind ) { left_parent.getAnElement() = left_defn and @@ -732,7 +732,7 @@ private class EssaTaintTracking extends string { then result = parent_kind else result = parent_kind.getMember() or - result = iterable_unpacking_decent(left_parent.getAnElement(), left_defn, + result = iterable_unpacking_descent(left_parent.getAnElement(), left_defn, parent_kind.getMember()) }