From 63bb772ef9a67b4c2993cb370de4775b44c5f8b3 Mon Sep 17 00:00:00 2001 From: Tom Hvitved Date: Mon, 4 Mar 2024 13:25:15 +0100 Subject: [PATCH] Variable capture: Avoid overlapping and false-positive data flow paths --- .../codeql/dataflow/VariableCapture.qll | 80 +++++++++++++++++-- 1 file changed, 74 insertions(+), 6 deletions(-) diff --git a/shared/dataflow/codeql/dataflow/VariableCapture.qll b/shared/dataflow/codeql/dataflow/VariableCapture.qll index e90bf481442..9fd385d4458 100644 --- a/shared/dataflow/codeql/dataflow/VariableCapture.qll +++ b/shared/dataflow/codeql/dataflow/VariableCapture.qll @@ -601,16 +601,22 @@ module Flow Input> implements OutputSig * observed in a similarly synthesized post-update node for this read of `v`. */ private predicate synthRead( - CapturedVariable v, BasicBlock bb, int i, boolean topScope, Expr closure + CapturedVariable v, BasicBlock bb, int i, boolean topScope, Expr closure, boolean alias ) { exists(ClosureExpr ce | closureCaptures(ce, v) | - ce.hasCfgNode(bb, i) and ce = closure + ce.hasCfgNode(bb, i) and ce = closure and alias = false or - localOrNestedClosureAccess(ce, closure, bb, i) + localOrNestedClosureAccess(ce, closure, bb, i) and alias = true ) and if v.getCallable() != bb.getEnclosingCallable() then topScope = false else topScope = true } + private predicate synthRead( + CapturedVariable v, BasicBlock bb, int i, boolean topScope, Expr closure + ) { + synthRead(v, bb, i, topScope, closure, _) + } + /** * Holds if there is an access of a captured variable inside a closure in the * `i`th node of `bb`, such that we need to synthesize a `this.` qualifier. @@ -919,16 +925,22 @@ module Flow Input> implements OutputSig ) } - predicate storeStep(ClosureNode node1, CapturedVariable v, ClosureNode node2) { - // store v in the closure or in the malloc in case of a relevant constructor call + private predicate storeStepClosure( + ClosureNode node1, CapturedVariable v, ClosureNode node2, boolean alias + ) { exists(BasicBlock bb, int i, Expr closure | - synthRead(v, bb, i, _, closure) and + synthRead(v, bb, i, _, closure, alias) and node1 = TSynthRead(v, bb, i, false) | node2 = TExprNode(closure, false) or node2 = TMallocNode(closure) and hasConstructorCapture(closure, v) ) + } + + predicate storeStep(ClosureNode node1, CapturedVariable v, ClosureNode node2) { + // store v in the closure or in the malloc in case of a relevant constructor call + storeStepClosure(node1, v, node2, _) or // write to v inside the closure body exists(BasicBlock bb, int i, VariableWrite vw | @@ -964,6 +976,62 @@ module Flow Input> implements OutputSig } predicate clearsContent(ClosureNode node, CapturedVariable v) { + /* + * Stores into closure aliases block flow from previous stores, both to + * avoid overlapping data flow paths, but also to avoid false positive + * flow. + * + * Example 1 (overlapping paths): + * + * ```rb + * def m + * x = taint + * + * fn = -> { # (1) + * sink x + * } + * + * fn.call # (2) + * ``` + * + * If we don't clear `x` at `fn` (2), we will have two overlapping paths: + * + * ``` + * taint -> fn (2) [captured x] + * taint -> fn (1) [captured x] -> fn (2) [captured x] + * ``` + * + * where the step `fn (1) [captured x] -> fn [captured x]` arises from normal + * use-use flow for `fn`. Clearing `x` at `fn` (2) removes the second path above. + * + * Example 2 (false positive flow): + * + * ```rb + * def m + * x = taint + * + * fn = -> { # (1) + * sink x + * } + * + * x = nil # (2) + * + * fn.call # (3) + * end + * ``` + * + * If we don't clear `x` at `fn` (3), we will have the following false positive + * flow path: + * + * ``` + * taint -> fn (1) [captured x] -> fn (3) [captured x] + * ``` + * + * since normal use-use flow for `fn` does not take the overwrite at (2) into account. + */ + + storeStepClosure(_, v, node, true) + or exists(BasicBlock bb, int i | captureWrite(v, bb, i, false, _) and node = TSynthThisQualifier(bb, i, false)