зеркало из https://github.com/github/codeql.git
Python: fix def nodes for subscript
We were using `getMember` for dictionaries, these are now getIndex Also add convenience predicate for string keys
This commit is contained in:
Родитель
99b9101455
Коммит
0b8e908823
|
@ -0,0 +1,7 @@
|
|||
---
|
||||
category: minorAnalysis
|
||||
---
|
||||
* Fixed labels in the API graph pertaining to definitions of subscripts. Previously, these were found by `getMember` rather than `getASubscript`.
|
||||
* Added edges for indices of subscripts to the API graph. Now a subscripted API node will have an edge to the API node for the index expression. So if `foo` is matched by API node `A`, then `"key"` in `foo["key"]` will be matched by the API node `A.getIndex()`. This can be used to track the origin of the index.
|
||||
* Added member predicate `getSubscriptAt(API::Node index)` to `API::Node`. Like `getASubscript()`, this will return an API node that matches a subscript of the node, but here it will be restircted to subscripts where the index matches the `index` parameter.
|
||||
* Added convenience predicate `getSubscript("key")` to obtain a subscript at a specific index, when the index happens to be a statically known string.
|
|
@ -249,6 +249,60 @@ module API {
|
|||
*/
|
||||
Node getASubscript() { result = this.getASuccessor(Label::subscript()) }
|
||||
|
||||
/**
|
||||
* Gets a node representing an index of a subscript of this node.
|
||||
* For example, in `obj[x]`, `x` is an index of `obj`.
|
||||
*/
|
||||
Node getIndex() { result = this.getASuccessor(Label::index()) }
|
||||
|
||||
/**
|
||||
* Gets a node representing a subscript of this node at (string) index `key`.
|
||||
* This requires that the index can be statically determined.
|
||||
*
|
||||
* For example, the subscripts of `a` and `b` below would be found using
|
||||
* the index `foo`:
|
||||
* ```py
|
||||
* a["foo"]
|
||||
* x = "foo" if cond else "bar"
|
||||
* b[x]
|
||||
* ```
|
||||
*/
|
||||
Node getSubscript(string key) {
|
||||
exists(API::Node index | result = this.getSubscriptAt(index) |
|
||||
key = index.getAValueReachingSink().asExpr().(PY::StrConst).getText()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a node representing a subscript of this node at index `index`.
|
||||
*/
|
||||
Node getSubscriptAt(API::Node index) {
|
||||
result = this.getASubscript() and
|
||||
index = this.getIndex() and
|
||||
(
|
||||
// subscripting
|
||||
exists(PY::SubscriptNode subscript |
|
||||
subscript.getObject() = this.getAValueReachableFromSource().asCfgNode() and
|
||||
subscript.getIndex() = index.asSink().asCfgNode()
|
||||
|
|
||||
// reading
|
||||
subscript = result.asSource().asCfgNode()
|
||||
or
|
||||
// writing
|
||||
subscript.(PY::DefinitionNode).getValue() = result.asSink().asCfgNode()
|
||||
)
|
||||
or
|
||||
// dictionary literals
|
||||
exists(PY::Dict dict, PY::KeyValuePair item |
|
||||
dict = this.getAValueReachingSink().asExpr() and
|
||||
dict.getItem(_) = item and
|
||||
item.getKey() = index.asSink().asExpr()
|
||||
|
|
||||
item.getValue() = result.asSink().asExpr()
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a string representation of the lexicographically least among all shortest access paths
|
||||
* from the root to this node.
|
||||
|
@ -405,7 +459,7 @@ module API {
|
|||
Node builtin(string n) { result = moduleImport("builtins").getMember(n) }
|
||||
|
||||
/**
|
||||
* An `CallCfgNode` that is connected to the API graph.
|
||||
* A `CallCfgNode` that is connected to the API graph.
|
||||
*
|
||||
* Can be used to reason about calls to an external API in which the correlation between
|
||||
* parameters and/or return values must be retained.
|
||||
|
@ -694,12 +748,24 @@ module API {
|
|||
rhs = aw.getValue()
|
||||
)
|
||||
or
|
||||
// TODO: I had expected `DataFlow::AttrWrite` to contain the attribute writes from a dict, that's how JS works.
|
||||
// dictionary literals
|
||||
exists(PY::Dict dict, PY::KeyValuePair item |
|
||||
dict = pred.(DataFlow::ExprNode).getNode().getNode() and
|
||||
dict.getItem(_) = item and
|
||||
lbl = Label::member(item.getKey().(PY::StrConst).getS()) and
|
||||
rhs.(DataFlow::ExprNode).getNode().getNode() = item.getValue()
|
||||
dict.getItem(_) = item
|
||||
|
|
||||
// from `x` to `{ "key": x }`
|
||||
rhs.(DataFlow::ExprNode).getNode().getNode() = item.getValue() and
|
||||
lbl = Label::subscript()
|
||||
or
|
||||
// from `"key"` to `{ "key": x }`
|
||||
rhs.(DataFlow::ExprNode).getNode().getNode() = item.getKey() and
|
||||
lbl = Label::index()
|
||||
)
|
||||
or
|
||||
// list literals, from `x` to `[x]`
|
||||
exists(PY::List list | list = pred.(DataFlow::ExprNode).getNode().getNode() |
|
||||
rhs.(DataFlow::ExprNode).getNode().getNode() = list.getAnElt() and
|
||||
lbl = Label::subscript()
|
||||
)
|
||||
or
|
||||
exists(PY::CallableExpr fn | fn = pred.(DataFlow::ExprNode).getNode().getNode() |
|
||||
|
@ -720,6 +786,20 @@ module API {
|
|||
lbl = Label::memberFromRef(aw)
|
||||
)
|
||||
or
|
||||
// subscripting
|
||||
exists(DataFlow::LocalSourceNode src, DataFlow::Node subscript, DataFlow::Node index |
|
||||
use(base, src) and
|
||||
subscript = trackUseNode(src).getSubscript(index)
|
||||
|
|
||||
// from `x` to a definition of `x[...]`
|
||||
rhs.asCfgNode() = subscript.asCfgNode().(PY::DefinitionNode).getValue() and
|
||||
lbl = Label::subscript()
|
||||
or
|
||||
// from `x` to `"key"` in `x["key"]`
|
||||
rhs = index and
|
||||
lbl = Label::index()
|
||||
)
|
||||
or
|
||||
exists(EntryPoint entry |
|
||||
base = root() and
|
||||
lbl = Label::entryPoint(entry) and
|
||||
|
@ -757,7 +837,8 @@ module API {
|
|||
or
|
||||
// Subscripting a node that is a use of `base`
|
||||
lbl = Label::subscript() and
|
||||
ref = pred.getASubscript()
|
||||
ref = pred.getSubscript(_) and
|
||||
ref.asCfgNode().isLoad()
|
||||
or
|
||||
// Subclassing a node
|
||||
lbl = Label::subclass() and
|
||||
|
@ -973,8 +1054,7 @@ module API {
|
|||
member = any(DataFlow::AttrRef pr).getAttributeName() or
|
||||
exists(Builtins::likelyBuiltin(member)) or
|
||||
ImportStar::namePossiblyDefinedInImportStar(_, member, _) or
|
||||
Impl::prefix_member(_, member, _) or
|
||||
member = any(PY::Dict d).getAnItem().(PY::KeyValuePair).getKey().(PY::StrConst).getS()
|
||||
Impl::prefix_member(_, member, _)
|
||||
} or
|
||||
MkLabelUnknownMember() or
|
||||
MkLabelParameter(int i) {
|
||||
|
@ -992,6 +1072,7 @@ module API {
|
|||
MkLabelSubclass() or
|
||||
MkLabelAwait() or
|
||||
MkLabelSubscript() or
|
||||
MkLabelIndex() or
|
||||
MkLabelEntryPoint(EntryPoint ep)
|
||||
|
||||
/** A label for a module. */
|
||||
|
@ -1072,6 +1153,11 @@ module API {
|
|||
override string toString() { result = "getASubscript()" }
|
||||
}
|
||||
|
||||
/** A label that gets the index of a subscript. */
|
||||
class LabelIndex extends ApiLabel, MkLabelIndex {
|
||||
override string toString() { result = "getIndex()" }
|
||||
}
|
||||
|
||||
/** A label for entry points. */
|
||||
class LabelEntryPoint extends ApiLabel, MkLabelEntryPoint {
|
||||
private EntryPoint entry;
|
||||
|
@ -1120,6 +1206,9 @@ module API {
|
|||
/** Gets the `subscript` edge label. */
|
||||
LabelSubscript subscript() { any() }
|
||||
|
||||
/** Gets the `subscript` edge label. */
|
||||
LabelIndex index() { any() }
|
||||
|
||||
/** Gets the label going from the root node to the nodes associated with the given entry point. */
|
||||
LabelEntryPoint entryPoint(EntryPoint ep) { result = MkLabelEntryPoint(ep) }
|
||||
}
|
||||
|
|
|
@ -104,7 +104,7 @@ class LocalSourceNode extends Node {
|
|||
/**
|
||||
* Gets a subscript of this node.
|
||||
*/
|
||||
Node getASubscript() { Cached::subscript(this, result) }
|
||||
Node getSubscript(Node index) { Cached::subscript(this, result, index) }
|
||||
|
||||
/**
|
||||
* Gets a call to the method `methodName` on this node.
|
||||
|
@ -249,13 +249,14 @@ private module Cached {
|
|||
}
|
||||
|
||||
/**
|
||||
* Holds if `node` flows to a sequence/mapping of which `subscript` is a subscript.
|
||||
* Holds if `node` flows to a sequence/mapping of which `subscript` is a subscript with index/key `index`.
|
||||
*/
|
||||
cached
|
||||
predicate subscript(LocalSourceNode node, CfgNode subscript) {
|
||||
predicate subscript(LocalSourceNode node, CfgNode subscript, CfgNode index) {
|
||||
exists(CfgNode seq, SubscriptNode subscriptNode | subscriptNode = subscript.getNode() |
|
||||
node.flowsTo(seq) and
|
||||
seq.getNode() = subscriptNode.getObject()
|
||||
seq.getNode() = subscriptNode.getObject() and
|
||||
index.getNode() = subscriptNode.getIndex()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -91,14 +91,10 @@ private module ExperimentalPrivateDjango {
|
|||
result = baseClassRef().getReturn().getAMember()
|
||||
}
|
||||
|
||||
/** Gets a reference to a header instance call with `__setitem__`. */
|
||||
API::Node headerSetItem() {
|
||||
result = headerInstance() and
|
||||
result.asSource().(DataFlow::AttrRead).getAttributeName() = "__setitem__"
|
||||
}
|
||||
|
||||
class DjangoResponseSetItemCall extends DataFlow::CallCfgNode, HeaderDeclaration::Range {
|
||||
DjangoResponseSetItemCall() { this = headerSetItem().getACall() }
|
||||
DjangoResponseSetItemCall() {
|
||||
this = baseClassRef().getReturn().getMember("__setitem__").getACall()
|
||||
}
|
||||
|
||||
override DataFlow::Node getNameArg() { result = this.getArg(0) }
|
||||
|
||||
|
@ -109,8 +105,7 @@ private module ExperimentalPrivateDjango {
|
|||
DataFlow::Node headerInput;
|
||||
|
||||
DjangoResponseDefinition() {
|
||||
this.asCfgNode().(DefinitionNode) =
|
||||
headerInstance().getAValueReachableFromSource().asCfgNode() and
|
||||
headerInput = headerInstance().asSink() and
|
||||
headerInput.asCfgNode() = this.asCfgNode().(DefinitionNode).getValue()
|
||||
}
|
||||
|
||||
|
|
|
@ -5,12 +5,12 @@ def callback(x): #$ use=moduleImport("mypkg").getMember("foo").getMember("bar").
|
|||
|
||||
foo.bar(callback) #$ def=moduleImport("mypkg").getMember("foo").getMember("bar").getParameter(0) use=moduleImport("mypkg").getMember("foo").getMember("bar").getReturn()
|
||||
|
||||
def callback2(x): #$ use=moduleImport("mypkg").getMember("foo").getMember("baz").getParameter(0).getMember("c").getParameter(0)
|
||||
x.baz2() #$ use=moduleImport("mypkg").getMember("foo").getMember("baz").getParameter(0).getMember("c").getParameter(0).getMember("baz2").getReturn()
|
||||
def callback2(x): #$ use=moduleImport("mypkg").getMember("foo").getMember("baz").getParameter(0).getASubscript().getParameter(0)
|
||||
x.baz2() #$ use=moduleImport("mypkg").getMember("foo").getMember("baz").getParameter(0).getASubscript().getParameter(0).getMember("baz2").getReturn()
|
||||
|
||||
mydict = {
|
||||
"c": callback2, #$ def=moduleImport("mypkg").getMember("foo").getMember("baz").getParameter(0).getMember("c")
|
||||
"other": "whatever" #$ def=moduleImport("mypkg").getMember("foo").getMember("baz").getParameter(0).getMember("other")
|
||||
"c": callback2, #$ def=moduleImport("mypkg").getMember("foo").getMember("baz").getParameter(0).getASubscript()
|
||||
"other": "whatever" #$ def=moduleImport("mypkg").getMember("foo").getMember("baz").getParameter(0).getASubscript()
|
||||
}
|
||||
|
||||
foo.baz(mydict) #$ def=moduleImport("mypkg").getMember("foo").getMember("baz").getParameter(0) use=moduleImport("mypkg").getMember("foo").getMember("baz").getReturn()
|
||||
|
@ -34,11 +34,11 @@ otherDict.fourth = callback4
|
|||
|
||||
foo.quack(otherDict.fourth) #$ def=moduleImport("mypkg").getMember("foo").getMember("quack").getParameter(0) use=moduleImport("mypkg").getMember("foo").getMember("quack").getReturn()
|
||||
|
||||
def namedCallback(myName, otherName):
|
||||
# Using named parameters:
|
||||
def namedCallback(myName, otherName):
|
||||
# Using named parameters:
|
||||
myName() #$ use=moduleImport("mypkg").getMember("foo").getMember("blob").getParameter(0).getKeywordParameter("myName").getReturn()
|
||||
otherName() #$ use=moduleImport("mypkg").getMember("foo").getMember("blob").getParameter(0).getKeywordParameter("otherName").getReturn()
|
||||
# Using numbered parameters:
|
||||
# Using numbered parameters:
|
||||
myName() #$ use=moduleImport("mypkg").getMember("foo").getMember("blob").getParameter(0).getParameter(0).getReturn()
|
||||
otherName() #$ use=moduleImport("mypkg").getMember("foo").getMember("blob").getParameter(0).getParameter(1).getReturn()
|
||||
|
||||
|
@ -58,4 +58,4 @@ recursiveDict.callback = recusisionCallback;
|
|||
recursiveDict.rec1 = recursiveDict;
|
||||
recursiveDict.rec2 = recursiveDict;
|
||||
|
||||
foo.rec(recursiveDict); #$ def=moduleImport("mypkg").getMember("foo").getMember("rec").getParameter(0)
|
||||
foo.rec(recursiveDict); #$ def=moduleImport("mypkg").getMember("foo").getMember("rec").getParameter(0)
|
||||
|
|
|
@ -0,0 +1,6 @@
|
|||
| test_subscript.py:4:11:4:28 | Use moduleImport("mypkg").getMember("foo").getReturn().getASubscript() |
|
||||
| test_subscript.py:5:26:5:27 | Def moduleImport("mypkg").getMember("foo").getReturn().getASubscript() |
|
||||
| test_subscript.py:6:5:6:22 | Use moduleImport("mypkg").getMember("foo").getReturn().getASubscript() |
|
||||
| test_subscript.py:6:5:6:28 | Def moduleImport("mypkg").getMember("foo").getReturn().getASubscript() |
|
||||
| test_subscript.py:7:5:7:22 | Use moduleImport("mypkg").getMember("foo").getReturn().getASubscript() |
|
||||
| test_subscript.py:7:5:7:28 | Def moduleImport("mypkg").getMember("foo").getReturn().getASubscript() |
|
|
@ -0,0 +1,4 @@
|
|||
import python
|
||||
import semmle.python.ApiGraphs
|
||||
|
||||
select API::moduleImport("mypkg").getMember("foo").getReturn().getSubscript(["bar", "baz", "qux"])
|
|
@ -0,0 +1,8 @@
|
|||
import mypkg
|
||||
|
||||
def test_subscript():
|
||||
bar = mypkg.foo()["bar"] #$ use=moduleImport("mypkg").getMember("foo").getReturn().getASubscript()
|
||||
mypkg.foo()["baz"] = 42 #$ def=moduleImport("mypkg").getMember("foo").getReturn().getASubscript()
|
||||
mypkg.foo()["qux"] += 42 #$ use=moduleImport("mypkg").getMember("foo").getReturn().getASubscript()
|
||||
mypkg.foo()["qux"] += 42 #$ def=moduleImport("mypkg").getMember("foo").getReturn().getASubscript()
|
||||
mypkg.foo()[mypkg.index] = mypkg.value #$ def=moduleImport("mypkg").getMember("foo").getReturn().getASubscript()
|
Загрузка…
Ссылка в новой задаче