From 72d0dcdaba6b0feafd3f2067f9b276d3dc8407cc Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Tue, 10 Oct 2023 15:52:01 +0200 Subject: [PATCH] Python: Workaround for module level items from `import *` not being LocalSourceNodes --- .../dataflow/new/internal/LocalSources.qll | 17 +++++++++++++++++ .../ApiGraphs/py3/test_import_star.py | 8 ++++---- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/LocalSources.qll b/python/ql/lib/semmle/python/dataflow/new/internal/LocalSources.qll index 0ff8174e6f7..eeaf0872f6c 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/LocalSources.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/LocalSources.qll @@ -11,6 +11,7 @@ import DataFlowPublic private import DataFlowPrivate private import semmle.python.internal.CachedStages private import semmle.python.internal.Awaited +private import semmle.python.dataflow.new.internal.ImportStar /** * A data flow node that is a source of local flow. This includes things like @@ -39,6 +40,22 @@ class LocalSourceNode extends Node { this instanceof ExprNode and not simpleLocalFlowStepForTypetracking(_, this) or + // For `from foo import *; foo_function()`, we want to let the variables we think + // could originate in `foo` (such as `foo_function`) to be available in the API + // graph. This requires them to be local sources. They would not be from the code + // just above, since the CFG node has flow going into it from its corresponding + // `GlobalSsaVariable`. (a different work-around is to change API graphs to not rely + // as heavily on LocalSourceNode; I initially tried this, but it relied on a lot of + // copy-pasted code, and it requires some non-trivial deprecation for downgrading + // the result type of `.asSource()` to DataFlow::Node, so we've opted for this + // approach instead). + // + // Note: This is only needed at the module level -- uses inside functions appear as + // LocalSourceNodes as we expect. + // + // TODO: When rewriting SSA, we should be able to remove this workaround + ImportStar::namePossiblyDefinedInImportStar(this.(ExprNode).getNode(), _, any(Module m)) + or // We include all module variable nodes, as these act as stepping stones between writes and // reads of global variables. Without them, type tracking based on `LocalSourceNode`s would be // unable to track across global variables. diff --git a/python/ql/test/library-tests/ApiGraphs/py3/test_import_star.py b/python/ql/test/library-tests/ApiGraphs/py3/test_import_star.py index ca732ae8fb7..edf344bac29 100644 --- a/python/ql/test/library-tests/ApiGraphs/py3/test_import_star.py +++ b/python/ql/test/library-tests/ApiGraphs/py3/test_import_star.py @@ -2,11 +2,11 @@ from unknown import * #$ use=moduleImport("unknown") -# Currently missing, as we do not consider `hello` to be a `LocalSourceNode`, since it has flow -# going into it from its corresponding `GlobalSsaVariable`. -hello() #$ MISSING: use=moduleImport("unknown").getMember("hello").getReturn() +# This used to be missing, as we did not consider `hello` to be a `LocalSourceNode`, +# since it has flow going into it from its corresponding `GlobalSsaVariable`. +hello() #$ use=moduleImport("unknown").getMember("hello").getReturn() -print(const_from_unknown) #$ MISSING: use=moduleImport("unknown").getMember("const_from_unknown") +print(const_from_unknown) #$ use=moduleImport("unknown").getMember("const_from_unknown") # We don't want our analysis to think that either `non_module_member` or `outer_bar` can # come from `from unknown import *`