Merge pull request #17888 from paldepind/rust-data-flow-consistency

Rust: Data flow additions
This commit is contained in:
Simon Friis Vindum 2024-11-05 21:29:18 +01:00 коммит произвёл GitHub
Родитель bb5ee525fc 262a9f0cfa
Коммит 6054855edb
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
7 изменённых файлов: 297 добавлений и 15 удалений

Просмотреть файл

@ -0,0 +1,22 @@
/**
* Provides subclasses of `CfgNode` that represents different types of nodes in
* the control flow graph.
*/
private import rust
private import ControlFlowGraph
/** A CFG node that corresponds to an element in the AST. */
class AstCfgNode extends CfgNode {
AstNode node;
AstCfgNode() { node = this.getAstNode() }
}
/** A CFG node that corresponds to an expression in the AST. */
class ExprCfgNode extends AstCfgNode {
override Expr node;
/** Gets the underlying expression. */
Expr getExpr() { result = node }
}

Просмотреть файл

@ -7,7 +7,9 @@ private import codeql.util.Unit
private import codeql.dataflow.DataFlow
private import codeql.dataflow.internal.DataFlowImpl
private import rust
private import SsaImpl as SsaImpl
private import codeql.rust.controlflow.ControlFlowGraph
private import codeql.rust.controlflow.CfgNodes
private import codeql.rust.dataflow.Ssa
module Node {
@ -52,22 +54,67 @@ module Node {
override Location getLocation() { none() }
}
/**
* A node in the data flow graph that corresponds to an expression in the
* AST.
*
* Note that because of control-flow splitting, one `Expr` may correspond
* to multiple `ExprNode`s, just like it may correspond to multiple
* `ControlFlow::Node`s.
*/
final class ExprNode extends Node, TExprNode {
ExprCfgNode n;
ExprNode() { this = TExprNode(n) }
override Location getLocation() { result = n.getExpr().getLocation() }
override string toString() { result = n.getExpr().toString() }
override Expr asExpr() { result = n.getExpr() }
override CfgNode getCfgNode() { result = n }
}
/**
* The value of a parameter at function entry, viewed as a node in a data
* flow graph.
*/
final class ParameterNode extends Node {
Param param;
final class ParameterNode extends Node, TParameterNode {
Param parameter;
ParameterNode() { this = TSourceParameterNode(param) }
ParameterNode() { this = TParameterNode(parameter) }
override Location getLocation() { result = param.getLocation() }
override Location getLocation() { result = parameter.getLocation() }
override string toString() { result = param.toString() }
override string toString() { result = parameter.toString() }
/** Gets the parameter in the AST that this node corresponds to. */
Param getParameter() { result = parameter }
}
final class ArgumentNode = NaNode;
/** An SSA node. */
class SsaNode extends Node, TSsaNode {
SsaImpl::DataFlowIntegration::SsaNode node;
SsaImpl::DefinitionExt def;
SsaNode() {
this = TSsaNode(node) and
def = node.getDefinitionExt()
}
SsaImpl::DefinitionExt getDefinitionExt() { result = def }
/** Holds if this node should be hidden from path explanations. */
abstract predicate isHidden();
override Location getLocation() { result = node.getLocation() }
override string toString() { result = node.toString() }
}
final class ReturnNode extends NaNode {
RustDataFlow::ReturnKind getKind() { none() }
}
@ -93,6 +140,64 @@ module Node {
final class CastNode = NaNode;
}
final class Node = Node::Node;
/** Provides logic related to SSA. */
module SsaFlow {
private module Impl = SsaImpl::DataFlowIntegration;
private Node::ParameterNode toParameterNode(Param p) { result = TParameterNode(p) }
/** Converts a control flow node into an SSA control flow node. */
Impl::Node asNode(Node n) {
n = TSsaNode(result)
or
result.(Impl::ExprNode).getExpr() = n.(Node::ExprNode).getCfgNode()
or
n = toParameterNode(result.(Impl::ParameterNode).getParameter())
}
predicate localFlowStep(SsaImpl::DefinitionExt def, Node nodeFrom, Node nodeTo, boolean isUseStep) {
Impl::localFlowStep(def, asNode(nodeFrom), asNode(nodeTo), isUseStep)
}
predicate localMustFlowStep(SsaImpl::DefinitionExt def, Node nodeFrom, Node nodeTo) {
Impl::localMustFlowStep(def, asNode(nodeFrom), asNode(nodeTo))
}
}
/**
* Holds for expressions `e` that evaluate to the value of any last (in
* evaluation order) subexpressions within it. E.g., expressions that propagate
* a values from a subexpression.
*
* For instance, the predicate holds for if expressions as `if b { e1 } else {
* e2 }` evalates to the value of one of the subexpressions `e1` or `e2`.
*/
private predicate propagatesValue(Expr e) {
e instanceof IfExpr or
e instanceof LoopExpr or
e instanceof ReturnExpr or
e instanceof BreakExpr or
e.(BlockExpr).getStmtList().hasTailExpr() or
e instanceof MatchExpr
}
/**
* Gets a node that may execute last in `n`, and which, when it executes last,
* will be the value of `n`.
*/
private ExprCfgNode getALastEvalNode(ExprCfgNode n) {
propagatesValue(n.getExpr()) and result.getASuccessor() = n
}
module LocalFlow {
pragma[nomagic]
predicate localFlowStepCommon(Node nodeFrom, Node nodeTo) {
nodeFrom.getCfgNode() = getALastEvalNode(nodeTo.getCfgNode())
}
}
module RustDataFlow implements InputSig<Location> {
/**
* An element, viewed as a node in a data flow graph. Either an expression
@ -122,10 +227,10 @@ module RustDataFlow implements InputSig<Location> {
predicate nodeIsHidden(Node node) { none() }
class DataFlowExpr = Void;
class DataFlowExpr = ExprCfgNode;
/** Gets the node corresponding to `e`. */
Node exprNode(DataFlowExpr e) { none() }
Node exprNode(DataFlowExpr e) { result.getCfgNode() = e }
final class DataFlowCall extends TNormalCall {
private CallExpr c;
@ -191,7 +296,7 @@ module RustDataFlow implements InputSig<Location> {
* Holds if there is a simple local flow step from `node1` to `node2`. These
* are the value-preserving intra-callable flow steps.
*/
predicate simpleLocalFlowStep(Node node1, Node node2, string model) { none() }
predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo, string model) { none() }
/**
* Holds if data can flow from `node1` to `node2` through a non-local step
@ -256,7 +361,9 @@ module RustDataFlow implements InputSig<Location> {
* `node2` must be visited along a flow path, then any type known for `node2`
* must also apply to `node1`.
*/
predicate localMustFlowStep(Node node1, Node node2) { none() }
predicate localMustFlowStep(Node node1, Node node2) {
SsaFlow::localMustFlowStep(_, node1, node2)
}
class LambdaCallKind = Void;
@ -267,7 +374,7 @@ module RustDataFlow implements InputSig<Location> {
/** Holds if `call` is a lambda call of kind `kind` where `receiver` is the lambda expression. */
predicate lambdaCall(DataFlowCall call, LambdaCallKind kind, Node receiver) { none() }
/** Extra data-flow steps needed for lambda flow analysis. */
/** Extra data flow steps needed for lambda flow analysis. */
predicate additionalLambdaFlowStep(Node nodeFrom, Node nodeTo, boolean preservesValue) { none() }
predicate knownSourceModel(Node source, string model) { none() }
@ -286,8 +393,9 @@ cached
private module Cached {
cached
newtype TNode =
TExprNode(CfgNode n, Expr e) { n.getAstNode() = e } or
TSourceParameterNode(Param param)
TExprNode(ExprCfgNode n) or
TParameterNode(Param p) or
TSsaNode(SsaImpl::DataFlowIntegration::SsaNode node)
cached
newtype TDataFlowCall = TNormalCall(CallExpr c)
@ -302,7 +410,11 @@ private module Cached {
/** This is the local flow predicate that is exposed. */
cached
predicate localFlowStepImpl(Node::Node nodeFrom, Node::Node nodeTo) { none() }
predicate localFlowStepImpl(Node::Node nodeFrom, Node::Node nodeTo) {
LocalFlow::localFlowStepCommon(nodeFrom, nodeTo)
or
SsaFlow::localFlowStep(_, nodeFrom, nodeTo, _)
}
}
import Cached

Просмотреть файл

@ -2,6 +2,7 @@ private import rust
private import codeql.rust.controlflow.BasicBlocks as BasicBlocks
private import BasicBlocks
private import codeql.rust.controlflow.ControlFlowGraph as Cfg
private import codeql.rust.controlflow.CfgNodes as CfgNodes
private import Cfg
private import codeql.rust.controlflow.internal.ControlFlowGraphImpl as ControlFlowGraphImpl
private import codeql.ssa.Ssa as SsaImplCommon
@ -395,6 +396,38 @@ private module Cached {
Definition uncertainWriteDefinitionInput(UncertainWriteDefinition def) {
Impl::uncertainWriteDefinitionInput(def, result)
}
cached
module DataFlowIntegration {
import DataFlowIntegrationImpl
cached
predicate localFlowStep(DefinitionExt def, Node nodeFrom, Node nodeTo, boolean isUseStep) {
DataFlowIntegrationImpl::localFlowStep(def, nodeFrom, nodeTo, isUseStep)
}
cached
predicate localMustFlowStep(DefinitionExt def, Node nodeFrom, Node nodeTo) {
DataFlowIntegrationImpl::localMustFlowStep(def, nodeFrom, nodeTo)
}
signature predicate guardChecksSig(CfgNodes::AstCfgNode g, Cfg::CfgNode e, boolean branch);
cached // nothing is actually cached
module BarrierGuard<guardChecksSig/3 guardChecks> {
private predicate guardChecksAdjTypes(
DataFlowIntegrationInput::Guard g, DataFlowIntegrationInput::Expr e, boolean branch
) {
guardChecks(g, e, branch)
}
private Node getABarrierNodeImpl() {
result = DataFlowIntegrationImpl::BarrierGuard<guardChecksAdjTypes/3>::getABarrierNode()
}
predicate getABarrierNode = getABarrierNodeImpl/0;
}
}
}
import Cached
@ -426,3 +459,46 @@ class PhiReadNode extends DefinitionExt, Impl::PhiReadNode {
override Location getLocation() { result = Impl::PhiReadNode.super.getLocation() }
}
private module DataFlowIntegrationInput implements Impl::DataFlowIntegrationInputSig {
class Expr extends CfgNodes::AstCfgNode {
predicate hasCfgNode(SsaInput::BasicBlock bb, int i) { this = bb.getNode(i) }
}
Expr getARead(Definition def) { result = Cached::getARead(def) }
/** Holds if SSA definition `def` assigns `value` to the underlying variable. */
predicate ssaDefAssigns(WriteDefinition def, Expr value) {
exists(BasicBlock bb, int i | def.definesAt(_, bb, i) and value = bb.getNode(i))
}
class Parameter = Param;
/** Holds if SSA definition `def` initializes parameter `p` at function entry. */
predicate ssaDefInitializesParam(WriteDefinition def, Parameter p) {
exists(BasicBlock bb, int i | bb.getNode(i).getAstNode() = p and def.definesAt(_, bb, i))
}
class Guard extends CfgNodes::AstCfgNode {
predicate hasCfgNode(SsaInput::BasicBlock bb, int i) { this = bb.getNode(i) }
}
/** Holds if the guard `guard` controls block `bb` upon evaluating to `branch`. */
predicate guardControlsBlock(Guard guard, SsaInput::BasicBlock bb, boolean branch) {
exists(ConditionBlock conditionBlock, ConditionalSuccessor s |
guard = conditionBlock.getLastNode() and
s.getValue() = branch and
conditionBlock.controls(bb, s)
)
}
/** Gets an immediate conditional successor of basic block `bb`, if any. */
SsaInput::BasicBlock getAConditionalBasicBlockSuccessor(SsaInput::BasicBlock bb, boolean branch) {
exists(Cfg::ConditionalSuccessor s |
result = bb.getASuccessor(s) and
s.getValue() = branch
)
}
}
private module DataFlowIntegrationImpl = Impl::DataFlowIntegration<DataFlowIntegrationInput>;

Просмотреть файл

@ -5,6 +5,10 @@ uniqueType
| common_definitions.rs:3:15:3:25 | Param | Node should have one type but has 0. |
| file://:0:0:0:0 | Param | Node should have one type but has 0. |
uniqueNodeLocation
| file://:0:0:0:0 | BlockExpr | Node should have one location but has 0. |
| file://:0:0:0:0 | MethodCallExpr | Node should have one location but has 0. |
| file://:0:0:0:0 | MethodCallExpr | Node should have one location but has 0. |
| file://:0:0:0:0 | Param | Node should have one location but has 0. |
| file://:0:0:0:0 | PathExpr | Node should have one location but has 0. |
missingLocation
| Nodes without location: 1 |
| Nodes without location: 5 |

Просмотреть файл

@ -1,3 +1,14 @@
uniqueEnclosingCallable
| main.rs:6:18:6:27 | Param | Node should have one enclosing callable but has 0. |
| main.rs:31:21:31:26 | Param | Node should have one enclosing callable but has 0. |
| main.rs:31:29:31:34 | Param | Node should have one enclosing callable but has 0. |
| main.rs:31:37:31:50 | Param | Node should have one enclosing callable but has 0. |
uniqueCallEnclosingCallable
| main.rs:3:14:3:33 | CallExpr | Call should have one enclosing callable but has 0. |
| main.rs:7:5:7:14 | CallExpr | Call should have one enclosing callable but has 0. |
| main.rs:39:5:39:14 | CallExpr | Call should have one enclosing callable but has 0. |
| main.rs:40:5:40:23 | CallExpr | Call should have one enclosing callable but has 0. |
uniqueType
| main.rs:6:18:6:27 | Param | Node should have one type but has 0. |
| main.rs:31:21:31:26 | Param | Node should have one type but has 0. |
| main.rs:31:29:31:34 | Param | Node should have one type but has 0. |
| main.rs:31:37:31:50 | Param | Node should have one type but has 0. |

Просмотреть файл

@ -0,0 +1,24 @@
| main.rs:2:9:2:9 | s | main.rs:3:33:3:33 | s |
| main.rs:6:18:6:21 | cond | main.rs:9:16:9:19 | cond |
| main.rs:7:9:7:9 | a | main.rs:10:9:10:9 | a |
| main.rs:8:9:8:9 | b | main.rs:12:9:12:9 | b |
| main.rs:9:9:9:9 | c | main.rs:14:5:14:5 | c |
| main.rs:9:21:11:5 | BlockExpr | main.rs:9:13:13:5 | IfExpr |
| main.rs:10:9:10:9 | a | main.rs:9:21:11:5 | BlockExpr |
| main.rs:11:12:13:5 | BlockExpr | main.rs:9:13:13:5 | IfExpr |
| main.rs:12:9:12:9 | b | main.rs:11:12:13:5 | BlockExpr |
| main.rs:14:5:14:5 | c | main.rs:6:37:15:1 | BlockExpr |
| main.rs:18:9:18:9 | a | main.rs:20:15:20:15 | a |
| main.rs:19:9:19:9 | b | main.rs:22:5:22:5 | b |
| main.rs:20:9:20:15 | BreakExpr | main.rs:19:13:21:5 | LoopExpr |
| main.rs:20:15:20:15 | a | main.rs:20:9:20:15 | BreakExpr |
| main.rs:22:5:22:5 | b | main.rs:17:29:23:1 | BlockExpr |
| main.rs:27:5:27:5 | i | main.rs:27:5:27:5 | i |
| main.rs:27:5:27:5 | i | main.rs:28:5:28:5 | i |
| main.rs:28:5:28:5 | i | main.rs:25:24:29:1 | BlockExpr |
| main.rs:31:21:31:21 | a | main.rs:33:20:33:20 | a |
| main.rs:31:29:31:29 | b | main.rs:34:17:34:17 | b |
| main.rs:31:37:31:37 | c | main.rs:32:11:32:11 | c |
| main.rs:32:5:35:5 | MatchExpr | main.rs:31:60:36:1 | BlockExpr |
| main.rs:33:20:33:20 | a | main.rs:32:5:35:5 | MatchExpr |
| main.rs:34:17:34:17 | b | main.rs:32:5:35:5 | MatchExpr |

Просмотреть файл

@ -3,6 +3,39 @@ fn variable() {
println!("{:?} data flow!", s);
}
fn if_expression(cond: bool) -> i64 {
let a = 1;
let b = 2;
let c = if cond {
a
} else {
b
};
c
}
fn loop_expression() -> i64 {
let a = 1;
let b = loop {
break a;
};
b
}
fn assignment() -> i64 {
let mut i = 1;
i = 2;
i
}
fn match_expression(a: i64, b: i64, c: Option<i64>) -> i64 {
match c {
Some(_) => a,
None => b,
}
}
fn main() {
variable();
if_expression(true);
}