Python: fix def nodes for subscript

We were using `getMember` for dictionaries, these are now getIndex
Also add convenience predicate for string keys
This commit is contained in:
Rasmus Lerchedahl Petersen 2022-09-27 15:05:09 +02:00
Родитель 99b9101455
Коммит 0b8e908823
8 изменённых файлов: 139 добавлений и 29 удалений

Просмотреть файл

@ -0,0 +1,7 @@
---
category: minorAnalysis
---
* Fixed labels in the API graph pertaining to definitions of subscripts. Previously, these were found by `getMember` rather than `getASubscript`.
* Added edges for indices of subscripts to the API graph. Now a subscripted API node will have an edge to the API node for the index expression. So if `foo` is matched by API node `A`, then `"key"` in `foo["key"]` will be matched by the API node `A.getIndex()`. This can be used to track the origin of the index.
* Added member predicate `getSubscriptAt(API::Node index)` to `API::Node`. Like `getASubscript()`, this will return an API node that matches a subscript of the node, but here it will be restircted to subscripts where the index matches the `index` parameter.
* Added convenience predicate `getSubscript("key")` to obtain a subscript at a specific index, when the index happens to be a statically known string.

Просмотреть файл

@ -249,6 +249,60 @@ module API {
*/
Node getASubscript() { result = this.getASuccessor(Label::subscript()) }
/**
* Gets a node representing an index of a subscript of this node.
* For example, in `obj[x]`, `x` is an index of `obj`.
*/
Node getIndex() { result = this.getASuccessor(Label::index()) }
/**
* Gets a node representing a subscript of this node at (string) index `key`.
* This requires that the index can be statically determined.
*
* For example, the subscripts of `a` and `b` below would be found using
* the index `foo`:
* ```py
* a["foo"]
* x = "foo" if cond else "bar"
* b[x]
* ```
*/
Node getSubscript(string key) {
exists(API::Node index | result = this.getSubscriptAt(index) |
key = index.getAValueReachingSink().asExpr().(PY::StrConst).getText()
)
}
/**
* Gets a node representing a subscript of this node at index `index`.
*/
Node getSubscriptAt(API::Node index) {
result = this.getASubscript() and
index = this.getIndex() and
(
// subscripting
exists(PY::SubscriptNode subscript |
subscript.getObject() = this.getAValueReachableFromSource().asCfgNode() and
subscript.getIndex() = index.asSink().asCfgNode()
|
// reading
subscript = result.asSource().asCfgNode()
or
// writing
subscript.(PY::DefinitionNode).getValue() = result.asSink().asCfgNode()
)
or
// dictionary literals
exists(PY::Dict dict, PY::KeyValuePair item |
dict = this.getAValueReachingSink().asExpr() and
dict.getItem(_) = item and
item.getKey() = index.asSink().asExpr()
|
item.getValue() = result.asSink().asExpr()
)
)
}
/**
* Gets a string representation of the lexicographically least among all shortest access paths
* from the root to this node.
@ -405,7 +459,7 @@ module API {
Node builtin(string n) { result = moduleImport("builtins").getMember(n) }
/**
* An `CallCfgNode` that is connected to the API graph.
* A `CallCfgNode` that is connected to the API graph.
*
* Can be used to reason about calls to an external API in which the correlation between
* parameters and/or return values must be retained.
@ -694,12 +748,24 @@ module API {
rhs = aw.getValue()
)
or
// TODO: I had expected `DataFlow::AttrWrite` to contain the attribute writes from a dict, that's how JS works.
// dictionary literals
exists(PY::Dict dict, PY::KeyValuePair item |
dict = pred.(DataFlow::ExprNode).getNode().getNode() and
dict.getItem(_) = item and
lbl = Label::member(item.getKey().(PY::StrConst).getS()) and
rhs.(DataFlow::ExprNode).getNode().getNode() = item.getValue()
dict.getItem(_) = item
|
// from `x` to `{ "key": x }`
rhs.(DataFlow::ExprNode).getNode().getNode() = item.getValue() and
lbl = Label::subscript()
or
// from `"key"` to `{ "key": x }`
rhs.(DataFlow::ExprNode).getNode().getNode() = item.getKey() and
lbl = Label::index()
)
or
// list literals, from `x` to `[x]`
exists(PY::List list | list = pred.(DataFlow::ExprNode).getNode().getNode() |
rhs.(DataFlow::ExprNode).getNode().getNode() = list.getAnElt() and
lbl = Label::subscript()
)
or
exists(PY::CallableExpr fn | fn = pred.(DataFlow::ExprNode).getNode().getNode() |
@ -720,6 +786,20 @@ module API {
lbl = Label::memberFromRef(aw)
)
or
// subscripting
exists(DataFlow::LocalSourceNode src, DataFlow::Node subscript, DataFlow::Node index |
use(base, src) and
subscript = trackUseNode(src).getSubscript(index)
|
// from `x` to a definition of `x[...]`
rhs.asCfgNode() = subscript.asCfgNode().(PY::DefinitionNode).getValue() and
lbl = Label::subscript()
or
// from `x` to `"key"` in `x["key"]`
rhs = index and
lbl = Label::index()
)
or
exists(EntryPoint entry |
base = root() and
lbl = Label::entryPoint(entry) and
@ -757,7 +837,8 @@ module API {
or
// Subscripting a node that is a use of `base`
lbl = Label::subscript() and
ref = pred.getASubscript()
ref = pred.getSubscript(_) and
ref.asCfgNode().isLoad()
or
// Subclassing a node
lbl = Label::subclass() and
@ -973,8 +1054,7 @@ module API {
member = any(DataFlow::AttrRef pr).getAttributeName() or
exists(Builtins::likelyBuiltin(member)) or
ImportStar::namePossiblyDefinedInImportStar(_, member, _) or
Impl::prefix_member(_, member, _) or
member = any(PY::Dict d).getAnItem().(PY::KeyValuePair).getKey().(PY::StrConst).getS()
Impl::prefix_member(_, member, _)
} or
MkLabelUnknownMember() or
MkLabelParameter(int i) {
@ -992,6 +1072,7 @@ module API {
MkLabelSubclass() or
MkLabelAwait() or
MkLabelSubscript() or
MkLabelIndex() or
MkLabelEntryPoint(EntryPoint ep)
/** A label for a module. */
@ -1072,6 +1153,11 @@ module API {
override string toString() { result = "getASubscript()" }
}
/** A label that gets the index of a subscript. */
class LabelIndex extends ApiLabel, MkLabelIndex {
override string toString() { result = "getIndex()" }
}
/** A label for entry points. */
class LabelEntryPoint extends ApiLabel, MkLabelEntryPoint {
private EntryPoint entry;
@ -1120,6 +1206,9 @@ module API {
/** Gets the `subscript` edge label. */
LabelSubscript subscript() { any() }
/** Gets the `subscript` edge label. */
LabelIndex index() { any() }
/** Gets the label going from the root node to the nodes associated with the given entry point. */
LabelEntryPoint entryPoint(EntryPoint ep) { result = MkLabelEntryPoint(ep) }
}

Просмотреть файл

@ -104,7 +104,7 @@ class LocalSourceNode extends Node {
/**
* Gets a subscript of this node.
*/
Node getASubscript() { Cached::subscript(this, result) }
Node getSubscript(Node index) { Cached::subscript(this, result, index) }
/**
* Gets a call to the method `methodName` on this node.
@ -249,13 +249,14 @@ private module Cached {
}
/**
* Holds if `node` flows to a sequence/mapping of which `subscript` is a subscript.
* Holds if `node` flows to a sequence/mapping of which `subscript` is a subscript with index/key `index`.
*/
cached
predicate subscript(LocalSourceNode node, CfgNode subscript) {
predicate subscript(LocalSourceNode node, CfgNode subscript, CfgNode index) {
exists(CfgNode seq, SubscriptNode subscriptNode | subscriptNode = subscript.getNode() |
node.flowsTo(seq) and
seq.getNode() = subscriptNode.getObject()
seq.getNode() = subscriptNode.getObject() and
index.getNode() = subscriptNode.getIndex()
)
}
}

Просмотреть файл

@ -91,14 +91,10 @@ private module ExperimentalPrivateDjango {
result = baseClassRef().getReturn().getAMember()
}
/** Gets a reference to a header instance call with `__setitem__`. */
API::Node headerSetItem() {
result = headerInstance() and
result.asSource().(DataFlow::AttrRead).getAttributeName() = "__setitem__"
}
class DjangoResponseSetItemCall extends DataFlow::CallCfgNode, HeaderDeclaration::Range {
DjangoResponseSetItemCall() { this = headerSetItem().getACall() }
DjangoResponseSetItemCall() {
this = baseClassRef().getReturn().getMember("__setitem__").getACall()
}
override DataFlow::Node getNameArg() { result = this.getArg(0) }
@ -109,8 +105,7 @@ private module ExperimentalPrivateDjango {
DataFlow::Node headerInput;
DjangoResponseDefinition() {
this.asCfgNode().(DefinitionNode) =
headerInstance().getAValueReachableFromSource().asCfgNode() and
headerInput = headerInstance().asSink() and
headerInput.asCfgNode() = this.asCfgNode().(DefinitionNode).getValue()
}

Просмотреть файл

@ -5,12 +5,12 @@ def callback(x): #$ use=moduleImport("mypkg").getMember("foo").getMember("bar").
foo.bar(callback) #$ def=moduleImport("mypkg").getMember("foo").getMember("bar").getParameter(0) use=moduleImport("mypkg").getMember("foo").getMember("bar").getReturn()
def callback2(x): #$ use=moduleImport("mypkg").getMember("foo").getMember("baz").getParameter(0).getMember("c").getParameter(0)
x.baz2() #$ use=moduleImport("mypkg").getMember("foo").getMember("baz").getParameter(0).getMember("c").getParameter(0).getMember("baz2").getReturn()
def callback2(x): #$ use=moduleImport("mypkg").getMember("foo").getMember("baz").getParameter(0).getASubscript().getParameter(0)
x.baz2() #$ use=moduleImport("mypkg").getMember("foo").getMember("baz").getParameter(0).getASubscript().getParameter(0).getMember("baz2").getReturn()
mydict = {
"c": callback2, #$ def=moduleImport("mypkg").getMember("foo").getMember("baz").getParameter(0).getMember("c")
"other": "whatever" #$ def=moduleImport("mypkg").getMember("foo").getMember("baz").getParameter(0).getMember("other")
"c": callback2, #$ def=moduleImport("mypkg").getMember("foo").getMember("baz").getParameter(0).getASubscript()
"other": "whatever" #$ def=moduleImport("mypkg").getMember("foo").getMember("baz").getParameter(0).getASubscript()
}
foo.baz(mydict) #$ def=moduleImport("mypkg").getMember("foo").getMember("baz").getParameter(0) use=moduleImport("mypkg").getMember("foo").getMember("baz").getReturn()

Просмотреть файл

@ -0,0 +1,6 @@
| test_subscript.py:4:11:4:28 | Use moduleImport("mypkg").getMember("foo").getReturn().getASubscript() |
| test_subscript.py:5:26:5:27 | Def moduleImport("mypkg").getMember("foo").getReturn().getASubscript() |
| test_subscript.py:6:5:6:22 | Use moduleImport("mypkg").getMember("foo").getReturn().getASubscript() |
| test_subscript.py:6:5:6:28 | Def moduleImport("mypkg").getMember("foo").getReturn().getASubscript() |
| test_subscript.py:7:5:7:22 | Use moduleImport("mypkg").getMember("foo").getReturn().getASubscript() |
| test_subscript.py:7:5:7:28 | Def moduleImport("mypkg").getMember("foo").getReturn().getASubscript() |

Просмотреть файл

@ -0,0 +1,4 @@
import python
import semmle.python.ApiGraphs
select API::moduleImport("mypkg").getMember("foo").getReturn().getSubscript(["bar", "baz", "qux"])

Просмотреть файл

@ -0,0 +1,8 @@
import mypkg
def test_subscript():
bar = mypkg.foo()["bar"] #$ use=moduleImport("mypkg").getMember("foo").getReturn().getASubscript()
mypkg.foo()["baz"] = 42 #$ def=moduleImport("mypkg").getMember("foo").getReturn().getASubscript()
mypkg.foo()["qux"] += 42 #$ use=moduleImport("mypkg").getMember("foo").getReturn().getASubscript()
mypkg.foo()["qux"] += 42 #$ def=moduleImport("mypkg").getMember("foo").getReturn().getASubscript()
mypkg.foo()[mypkg.index] = mypkg.value #$ def=moduleImport("mypkg").getMember("foo").getReturn().getASubscript()