From 13815fe7282e14967e3f0e74f92a63b5917d5463 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Mon, 1 Nov 2021 11:47:22 +0100 Subject: [PATCH] Python: Model known APIView subclasses Added internal helper `.qll` file as well --- .../python/frameworks/RestFramework.qll | 50 ++++- .../frameworks/internal/SubclassFinder.qll | 189 ++++++++++++++++++ 2 files changed, 237 insertions(+), 2 deletions(-) create mode 100644 python/ql/lib/semmle/python/frameworks/internal/SubclassFinder.qll diff --git a/python/ql/lib/semmle/python/frameworks/RestFramework.qll b/python/ql/lib/semmle/python/frameworks/RestFramework.qll index 15fd70ddf78..36222358e15 100644 --- a/python/ql/lib/semmle/python/frameworks/RestFramework.qll +++ b/python/ql/lib/semmle/python/frameworks/RestFramework.qll @@ -37,8 +37,54 @@ private module RestFramework { */ private class ModeledApiViewClasses extends Django::Views::View::ModeledSubclass { ModeledApiViewClasses() { - this = API::moduleImport("rest_framework").getMember("views").getMember("APIView") - // TODO: Need to model all known subclasses + this = API::moduleImport("rest_framework").getMember("views").getMember("APIView") or + // imports generated by python/frameworks/internal/SubclassFinder.qll + this = + API::moduleImport("rest_framework") + .getMember("authtoken") + .getMember("views") + .getMember("APIView") or + this = + API::moduleImport("rest_framework") + .getMember("authtoken") + .getMember("views") + .getMember("ObtainAuthToken") or + this = API::moduleImport("rest_framework").getMember("decorators").getMember("APIView") or + this = API::moduleImport("rest_framework").getMember("generics").getMember("CreateAPIView") or + this = API::moduleImport("rest_framework").getMember("generics").getMember("DestroyAPIView") or + this = API::moduleImport("rest_framework").getMember("generics").getMember("GenericAPIView") or + this = API::moduleImport("rest_framework").getMember("generics").getMember("ListAPIView") or + this = + API::moduleImport("rest_framework").getMember("generics").getMember("ListCreateAPIView") or + this = API::moduleImport("rest_framework").getMember("generics").getMember("RetrieveAPIView") or + this = + API::moduleImport("rest_framework") + .getMember("generics") + .getMember("RetrieveDestroyAPIView") or + this = + API::moduleImport("rest_framework").getMember("generics").getMember("RetrieveUpdateAPIView") or + this = + API::moduleImport("rest_framework") + .getMember("generics") + .getMember("RetrieveUpdateDestroyAPIView") or + this = API::moduleImport("rest_framework").getMember("generics").getMember("UpdateAPIView") or + this = API::moduleImport("rest_framework").getMember("routers").getMember("APIRootView") or + this = API::moduleImport("rest_framework").getMember("routers").getMember("SchemaView") or + this = + API::moduleImport("rest_framework") + .getMember("schemas") + .getMember("views") + .getMember("APIView") or + this = + API::moduleImport("rest_framework") + .getMember("schemas") + .getMember("views") + .getMember("SchemaView") or + this = API::moduleImport("rest_framework").getMember("viewsets").getMember("GenericViewSet") or + this = API::moduleImport("rest_framework").getMember("viewsets").getMember("ModelViewSet") or + this = + API::moduleImport("rest_framework").getMember("viewsets").getMember("ReadOnlyModelViewSet") or + this = API::moduleImport("rest_framework").getMember("viewsets").getMember("ViewSet") } } diff --git a/python/ql/lib/semmle/python/frameworks/internal/SubclassFinder.qll b/python/ql/lib/semmle/python/frameworks/internal/SubclassFinder.qll new file mode 100644 index 00000000000..e09f2b457d6 --- /dev/null +++ b/python/ql/lib/semmle/python/frameworks/internal/SubclassFinder.qll @@ -0,0 +1,189 @@ +/** + * INTERNAL: Do not use. + * + * Has predicates to help find subclasses in library code. Should only be used to aid in + * the manual library modeling process, + */ + +private import python +private import semmle.python.dataflow.new.DataFlow +private import semmle.python.ApiGraphs +private import semmle.python.filters.Tests + +// very much inspired by the draft at https://github.com/github/codeql/pull/5632 +private module NotExposed { + // Instructions: + // This needs to be automated better, but for this prototype, here are some rough instructions: + // 1) fill out the `getAlreadyModeledClass` body below + // 2) quick-eval the `quickEvalMe` predicate below, and copy the output to your modeling predicate + class MySpec extends FindSubclassesSpec { + MySpec() { this = "MySpec" } + + override API::Node getAlreadyModeledClass() { + // FILL ME OUT ! (but don't commit with any changes) + none() + // for example + // result = API::moduleImport("rest_framework").getMember("views").getMember("APIView") + } + } + + predicate quickEvalMe(string newImport) { + newImport = + "// imports generated by python/frameworks/internal/SubclassFinder.qll\n" + "this = API::" + + concat(string newModelFullyQualified | + newModel(any(MySpec spec), newModelFullyQualified, _, _, _) + | + fullyQualifiedToAPIGraphPath(newModelFullyQualified), " or this = API::" + ) + } + + bindingset[fullyQaulified] + string fullyQualifiedToAPIGraphPath(string fullyQaulified) { + result = "moduleImport(\"" + fullyQaulified.replaceAll(".", "\").getMember(\"") + "\")" + } + + // -- Specs -- + bindingset[this] + abstract class FindSubclassesSpec extends string { + abstract API::Node getAlreadyModeledClass(); + } + + API::Node newOrExistingModeling(FindSubclassesSpec spec) { + result = spec.getAlreadyModeledClass() + or + exists(string newSubclassName | + newModel(spec, newSubclassName, _, _, _) and + result.getPath() = fullyQualifiedToAPIGraphPath(newSubclassName) + ) + } + + bindingset[fullyQualifiedName] + predicate alreadyModeled(FindSubclassesSpec spec, string fullyQualifiedName) { + fullyQualifiedToAPIGraphPath(fullyQualifiedName) = spec.getAlreadyModeledClass().getPath() + } + + predicate isNonTestProjectCode(AstNode ast) { + not ast.getScope*() instanceof TestScope and + not ast.getLocation().getFile().getRelativePath().matches("tests/%") and + exists(ast.getLocation().getFile().getRelativePath()) + } + + predicate hasAllStatement(Module mod) { + exists(AssignStmt a, GlobalVariable all | + a.defines(all) and + a.getScope() = mod and + all.getId() = "__all__" + ) + } + + /** + * Holds if `newAliasFullyQualified` describes new alias originating from the import + * `from import [as ]`, where `.` belongs to + * `spec`. + * So if this import happened in module `foo.bar`, `newAliasFullyQualified` would be + * `foo.bar.` (or `foo.bar.`). + * + * Note that this predicate currently respects `__all__` in sort of a backwards fashion. + * - if `__all__` is defined in module `foo.bar`, we only allow new aliases where the member name is also in `__all__`. (this doesn't map 100% to the semantics of imports though) + * - If `__all__` is not defined we don't impose any limitations. + * + * Also note that we don't currently consider deleting module-attributes at all, so in the code snippet below, we would consider that `my_module.foo` is a + * reference to `django.foo`, although `my_module.foo` isn't even available at runtime. (there currently also isn't any code to discover that `my_module.bar` + * is an alias to `django.foo`) + * ```py + * # module my_module + * from django import foo + * bar = foo + * del foo + * ``` + */ + predicate newDirectAlias( + FindSubclassesSpec spec, string newAliasFullyQualified, ImportMember importMember, Module mod, + Location loc + ) { + importMember = newOrExistingModeling(spec).getAUse().asExpr() and + importMember.getScope() = mod and + loc = importMember.getLocation() and + ( + mod.isPackageInit() and + newAliasFullyQualified = mod.getPackageName() + "." + importMember.getName() + or + not mod.isPackageInit() and + newAliasFullyQualified = mod.getName() + "." + importMember.getName() + ) and + ( + not hasAllStatement(mod) + or + mod.declaredInAll(importMember.getName()) + ) and + not alreadyModeled(spec, newAliasFullyQualified) and + isNonTestProjectCode(importMember) + } + + /** same as `newDirectAlias` predicate, but handling `from import *`, considering all ``, where `.` belongs to `spec`. */ + predicate newImportStar( + FindSubclassesSpec spec, string newAliasFullyQualified, ImportStar importStar, Module mod, + API::Node relevantClass, string relevantName, Location loc + ) { + relevantClass = newOrExistingModeling(spec) and + loc = importStar.getLocation() and + importStar.getScope() = mod and + // WHAT A HACK :D :D + relevantClass.getPath() = + relevantClass.getAPredecessor().getPath() + ".getMember(\"" + relevantName + "\")" and + relevantClass.getAPredecessor().getAUse().asExpr() = importStar.getModule() and + ( + mod.isPackageInit() and + newAliasFullyQualified = mod.getPackageName() + "." + relevantName + or + not mod.isPackageInit() and + newAliasFullyQualified = mod.getName() + "." + relevantName + ) and + ( + not hasAllStatement(mod) + or + mod.declaredInAll(relevantName) + ) and + not alreadyModeled(spec, newAliasFullyQualified) and + isNonTestProjectCode(importStar) + } + + /** Holds if `classExpr` defines a new subclass that belongs to `spec`, which has the fully qualified name `newSubclassQualified`. */ + predicate newSubclass( + FindSubclassesSpec spec, string newSubclassQualified, ClassExpr classExpr, Module mod, + Location loc + ) { + classExpr = newOrExistingModeling(spec).getASubclass*().getAUse().asExpr() and + classExpr.getScope() = mod and + newSubclassQualified = mod.getName() + "." + classExpr.getName() and + loc = classExpr.getLocation() and + not alreadyModeled(spec, newSubclassQualified) and + isNonTestProjectCode(classExpr) + } + + /** + * Holds if `newModelFullyQualified` describes either a new subclass, or a new alias, belonging to `spec` that we should include in our automated modeling. + * This new element is defined by `ast`, which is defined at `loc` in the module `mod`. + */ + query predicate newModel( + FindSubclassesSpec spec, string newModelFullyQualified, AstNode ast, Module mod, Location loc + ) { + ( + newSubclass(spec, newModelFullyQualified, ast, mod, loc) + or + newDirectAlias(spec, newModelFullyQualified, ast, mod, loc) + or + newImportStar(spec, newModelFullyQualified, ast, mod, _, _, loc) + ) + } + // inherint problem with API graphs is that there doesn't need to exist a result for all + // the stuff we have already modeled... as an example, the following query has no + // results when evaluated against Django + // + // select API::moduleImport("django") + // .getMember("contrib") + // .getMember("admin") + // .getMember("views") + // .getMember("main") + // .getMember("ChangeListSearchForm") +}