From e7a3dc83bcbdbec27bf697ceb2a8e7c7c65a9e08 Mon Sep 17 00:00:00 2001 From: Tom Hvitved Date: Tue, 19 Sep 2023 12:20:17 +0200 Subject: [PATCH] Data flow: Performance improvements --- .../codeql/dataflow/internal/DataFlowImpl.qll | 312 +++++++++++++----- .../dataflow/internal/DataFlowImplCommon.qll | 37 ++- 2 files changed, 250 insertions(+), 99 deletions(-) diff --git a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll index e4182a32a4b..171f943a25a 100644 --- a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll +++ b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll @@ -1316,7 +1316,7 @@ module MakeImpl { ) or // flow into a callable - fwdFlowIn(_, _, node, state, _, cc, _, _, _, t, ap, apa, _) and + fwdFlowIn(node, apa, state, cc, t, ap) and if PrevStage::parameterMayFlowThrough(node, apa) then ( summaryCtx = TParamNodeSome(node.asNode()) and @@ -1327,7 +1327,7 @@ module MakeImpl { ) or // flow out of a callable - fwdFlowOut(_, _, node, state, cc, summaryCtx, argT, argAp, t, ap, apa) + fwdFlowOut(node, state, cc, summaryCtx, argT, argAp, t, ap, apa) or // flow through a callable exists( @@ -1415,75 +1415,148 @@ module MakeImpl { ) } - bindingset[call, ctx] - pragma[inline_late] - private DataFlowCallable viableImplCallContextReducedInlineLate( - DataFlowCall call, CcCall ctx - ) { - result = viableImplCallContextReduced(call, ctx) - } - - bindingset[arg, ctx] - pragma[inline_late] - private DataFlowCallable viableImplCallContextReducedInlineLate( - DataFlowCall call, ArgNodeEx arg, CcCall ctx - ) { - call = arg.getCall() and - result = viableImplCallContextReducedInlineLate(call, ctx) - } - - bindingset[call] - pragma[inline_late] - private predicate flowIntoCallApaInlineLate( - DataFlowCall call, DataFlowCallable c, ArgNodeEx arg, ParamNodeEx p, - boolean allowsFieldFlow, ApApprox apa - ) { - PrevStage::callEdgeArgParam(call, c, arg, p, allowsFieldFlow, apa) - } - - bindingset[call, ctx] - pragma[inline_late] - private predicate viableImplNotCallContextReducedInlineLate(DataFlowCall call, Cc ctx) { - viableImplNotCallContextReduced(call, ctx) - } - - bindingset[arg, outercc] - pragma[inline_late] - private predicate viableImplArgNotCallContextReduced( - DataFlowCall call, ArgNodeEx arg, Cc outercc - ) { - call = arg.getCall() and - viableImplNotCallContextReducedInlineLate(call, outercc) - } - pragma[nomagic] - private predicate fwdFlowInCand( - DataFlowCall call, DataFlowCallable inner, ParamNodeEx p, FlowState state, Cc outercc, - ParamNodeOption summaryCtx, TypOption argT, ApOption argAp, Typ t, Ap ap, ApApprox apa + private predicate fwdFlowIntoArg( + ArgNodeEx arg, FlowState state, Cc outercc, ParamNodeOption summaryCtx, TypOption argT, + ApOption argAp, Typ t, Ap ap, ApApprox apa, boolean cc ) { - exists(ArgNodeEx arg, boolean allowsFieldFlow | - fwdFlow(arg, state, outercc, summaryCtx, argT, argAp, t, ap, apa) and + fwdFlow(arg, state, outercc, summaryCtx, argT, argAp, t, ap, apa) and + if outercc instanceof CcCall then cc = true else cc = false + } + + private signature module FwdFlowInInputSig { + default predicate callRestriction(DataFlowCall call) { any() } + + bindingset[p, apa] + default predicate parameterRestriction(ParamNodeEx p, ApApprox apa) { any() } + } + + /** + * Exposes the inlined predicate `fwdFlowIn`, which is used to calculate both + * flow in and flow through. + * + * For flow in, only a subset of the columns are needed, specifically we don't + * need to record the argument that flows into the parameter. + * + * For flow through, we do need to record the argument, however, we can restrict + * this to arguments that may actually flow through, using `callRestriction` and + * `parameterRestriction`, which reduces the argument-to-parameter fan-in + * significantly. + */ + private module FwdFlowIn { + pragma[nomagic] + private predicate callEdgeArgParamRestricted( + DataFlowCall call, DataFlowCallable c, ArgNodeEx arg, ParamNodeEx p, + boolean allowsFieldFlow, ApApprox apa + ) { + PrevStage::callEdgeArgParam(call, c, arg, p, allowsFieldFlow, apa) and + I::callRestriction(call) and + I::parameterRestriction(p, apa) + } + + pragma[nomagic] + private DataFlowCallable viableImplCallContextReducedRestricted( + DataFlowCall call, CcCall ctx + ) { + result = viableImplCallContextReduced(call, ctx) and + callEdgeArgParamRestricted(call, result, _, _, _, _) + } + + bindingset[call, ctx] + pragma[inline_late] + private DataFlowCallable viableImplCallContextReducedInlineLate( + DataFlowCall call, CcCall ctx + ) { + result = viableImplCallContextReducedRestricted(call, ctx) + } + + bindingset[arg, ctx] + pragma[inline_late] + private DataFlowCallable viableImplCallContextReducedInlineLate( + DataFlowCall call, ArgNodeEx arg, CcCall ctx + ) { + callEdgeArgParamRestricted(call, _, arg, _, _, _) and + result = viableImplCallContextReducedInlineLate(call, ctx) + } + + bindingset[call] + pragma[inline_late] + private predicate callEdgeArgParamRestrictedInlineLate( + DataFlowCall call, DataFlowCallable c, ArgNodeEx arg, ParamNodeEx p, + boolean allowsFieldFlow, ApApprox apa + ) { + callEdgeArgParamRestricted(call, c, arg, p, allowsFieldFlow, apa) + } + + bindingset[call, ctx] + pragma[inline_late] + private predicate viableImplNotCallContextReducedInlineLate(DataFlowCall call, Cc ctx) { + viableImplNotCallContextReduced(call, ctx) + } + + bindingset[arg, outercc] + pragma[inline_late] + private predicate viableImplArgNotCallContextReduced( + DataFlowCall call, ArgNodeEx arg, Cc outercc + ) { + callEdgeArgParamRestricted(call, _, arg, _, _, _) and + viableImplNotCallContextReducedInlineLate(call, outercc) + } + + pragma[nomagic] + private predicate fwdFlowInCand( + DataFlowCall call, ArgNodeEx arg, Cc outercc, DataFlowCallable inner, ParamNodeEx p, + ApApprox apa, boolean allowsFieldFlow, boolean cc + ) { + fwdFlowIntoArg(arg, _, outercc, _, _, _, _, _, apa, cc) and ( inner = viableImplCallContextReducedInlineLate(call, arg, outercc) or viableImplArgNotCallContextReduced(call, arg, outercc) ) and - flowIntoCallApaInlineLate(call, inner, arg, p, allowsFieldFlow, apa) - | - if allowsFieldFlow = false then ap instanceof ApNil else any() - ) + callEdgeArgParamRestrictedInlineLate(call, inner, arg, p, allowsFieldFlow, apa) + } + + pragma[nomagic] + private predicate fwdFlowInValidEdge( + DataFlowCall call, ArgNodeEx arg, Cc outercc, DataFlowCallable inner, ParamNodeEx p, + CcCall innercc, ApApprox apa, boolean allowsFieldFlow, boolean cc + ) { + fwdFlowInCand(call, arg, outercc, inner, p, apa, allowsFieldFlow, cc) and + FwdTypeFlow::typeFlowValidEdgeIn(call, inner, cc) and + innercc = getCallContextCall(call, inner) + } + + pragma[inline] + predicate fwdFlowIn( + DataFlowCall call, DataFlowCallable inner, ParamNodeEx p, FlowState state, Cc outercc, + CcCall innercc, ParamNodeOption summaryCtx, TypOption argT, ApOption argAp, Typ t, + Ap ap, ApApprox apa, boolean cc + ) { + exists(ArgNodeEx arg, boolean allowsFieldFlow | + fwdFlowIntoArg(arg, state, outercc, summaryCtx, argT, argAp, t, ap, apa, cc) and + fwdFlowInValidEdge(call, arg, outercc, inner, p, innercc, apa, allowsFieldFlow, cc) and + if allowsFieldFlow = false then ap instanceof ApNil else any() + ) + } } + private module FwdFlowInNoRestriction implements FwdFlowInInputSig { } + pragma[nomagic] private predicate fwdFlowIn( - DataFlowCall call, DataFlowCallable inner, ParamNodeEx p, FlowState state, Cc outercc, - CcCall innercc, ParamNodeOption summaryCtx, TypOption argT, ApOption argAp, Typ t, Ap ap, - ApApprox apa, boolean cc + ParamNodeEx p, ApApprox apa, FlowState state, CcCall innercc, Typ t, Ap ap ) { - fwdFlowInCand(call, inner, p, state, outercc, summaryCtx, argT, argAp, t, ap, apa) and - FwdTypeFlow::typeFlowValidEdgeIn(call, inner, cc) and - innercc = getCallContextCall(call, inner) and - if outercc instanceof CcCall then cc = true else cc = false + FwdFlowIn::fwdFlowIn(_, _, p, state, _, innercc, _, _, _, t, ap, + apa, _) + } + + pragma[nomagic] + private DataFlowCallable viableImplCallContextReducedReverseRestricted( + DataFlowCall call, CcNoCall ctx + ) { + result = viableImplCallContextReducedReverse(call, ctx) and + PrevStage::callEdgeReturn(call, result, _, _, _, _, _) } bindingset[ctx, result] @@ -1491,7 +1564,7 @@ module MakeImpl { private DataFlowCallable viableImplCallContextReducedReverseInlineLate( DataFlowCall call, CcNoCall ctx ) { - result = viableImplCallContextReducedReverse(call, ctx) + result = viableImplCallContextReducedReverseRestricted(call, ctx) } bindingset[call] @@ -1514,34 +1587,58 @@ module MakeImpl { PrevStage::callEdgeReturn(call, c, ret, _, out, allowsFieldFlow, apa) } + pragma[nomagic] + private predicate fwdFlowIntoRet( + RetNodeEx ret, FlowState state, CcNoCall cc, ParamNodeOption summaryCtx, TypOption argT, + ApOption argAp, Typ t, Ap ap, ApApprox apa + ) { + fwdFlow(ret, state, cc, summaryCtx, argT, argAp, t, ap, apa) + } + pragma[nomagic] private predicate fwdFlowOutCand( - DataFlowCall call, DataFlowCallable inner, NodeEx out, FlowState state, + DataFlowCall call, RetNodeEx ret, CcNoCall innercc, DataFlowCallable inner, NodeEx out, + ApApprox apa, boolean allowsFieldFlow + ) { + fwdFlowIntoRet(ret, _, innercc, _, _, _, _, _, apa) and + inner = ret.getEnclosingCallable() and + ( + inner = viableImplCallContextReducedReverseInlineLate(call, innercc) and + flowOutOfCallApaInlineLate(call, inner, ret, out, allowsFieldFlow, apa) + or + flowOutOfCallApaNotCallContextReduced(call, inner, ret, out, allowsFieldFlow, apa, + innercc) + ) + } + + pragma[nomagic] + private predicate fwdFlowOutValidEdge( + DataFlowCall call, RetNodeEx ret, CcNoCall innercc, DataFlowCallable inner, NodeEx out, + CcNoCall outercc, ApApprox apa, boolean allowsFieldFlow + ) { + fwdFlowOutCand(call, ret, innercc, inner, out, apa, allowsFieldFlow) and + FwdTypeFlow::typeFlowValidEdgeOut(call, inner) and + outercc = getCallContextReturn(inner, call) + } + + pragma[inline] + private predicate fwdFlowOut( + DataFlowCall call, DataFlowCallable inner, NodeEx out, FlowState state, CcNoCall outercc, ParamNodeOption summaryCtx, TypOption argT, ApOption argAp, Typ t, Ap ap, ApApprox apa ) { - exists(RetNodeEx ret, boolean allowsFieldFlow, CcNoCall innercc | - fwdFlow(ret, state, innercc, summaryCtx, argT, argAp, t, ap, apa) and - inner = ret.getEnclosingCallable() and - ( - inner = viableImplCallContextReducedReverseInlineLate(call, innercc) and - flowOutOfCallApaInlineLate(call, inner, ret, out, allowsFieldFlow, apa) - or - flowOutOfCallApaNotCallContextReduced(call, inner, ret, out, allowsFieldFlow, apa, - innercc) - ) - | + exists(RetNodeEx ret, CcNoCall innercc, boolean allowsFieldFlow | + fwdFlowIntoRet(ret, state, innercc, summaryCtx, argT, argAp, t, ap, apa) and + fwdFlowOutValidEdge(call, ret, innercc, inner, out, outercc, apa, allowsFieldFlow) and if allowsFieldFlow = false then ap instanceof ApNil else any() ) } pragma[nomagic] private predicate fwdFlowOut( - DataFlowCall call, DataFlowCallable inner, NodeEx out, FlowState state, CcNoCall outercc, - ParamNodeOption summaryCtx, TypOption argT, ApOption argAp, Typ t, Ap ap, ApApprox apa + NodeEx out, FlowState state, CcNoCall outercc, ParamNodeOption summaryCtx, TypOption argT, + ApOption argAp, Typ t, Ap ap, ApApprox apa ) { - fwdFlowOutCand(call, inner, out, state, summaryCtx, argT, argAp, t, ap, apa) and - FwdTypeFlow::typeFlowValidEdgeOut(call, inner) and - outercc = getCallContextReturn(inner, call) + fwdFlowOut(_, _, out, state, outercc, summaryCtx, argT, argAp, t, ap, apa) } private module FwdTypeFlowInput implements TypeFlowInput { @@ -1555,19 +1652,48 @@ module MakeImpl { PrevStage::callEdgeReturn(call, c, _, _, _, _, _) } + pragma[nomagic] + private predicate dataFlowTakenCallEdgeIn0( + DataFlowCall call, DataFlowCallable c, ParamNodeEx p, FlowState state, Cc innercc, + Typ t, Ap ap, boolean cc + ) { + FwdFlowIn::fwdFlowIn(call, c, p, state, _, innercc, _, _, _, t, + ap, _, cc) + } + + pragma[nomagic] + private predicate fwdFlow1Param(ParamNodeEx p, FlowState state, CcCall cc, Typ t0, Ap ap) { + fwdFlow1(p, state, cc, _, _, _, t0, _, ap, _) + } + pragma[nomagic] predicate dataFlowTakenCallEdgeIn(DataFlowCall call, DataFlowCallable c, boolean cc) { - exists(ParamNodeEx p, FlowState state, Cc innercc, Typ t, Ap ap | - fwdFlowIn(call, c, p, state, _, innercc, _, _, _, t, ap, _, cc) and - fwdFlow1(p, state, innercc, _, _, _, t, _, ap, _) + exists(ParamNodeEx p, FlowState state, CcCall innercc, Typ t, Ap ap | + dataFlowTakenCallEdgeIn0(call, c, p, state, innercc, t, ap, cc) and + fwdFlow1Param(p, state, innercc, t, ap) + ) + } + + pragma[nomagic] + private predicate dataFlowTakenCallEdgeOut0( + DataFlowCall call, DataFlowCallable c, NodeEx node, FlowState state, Cc cc, Typ t, Ap ap + ) { + fwdFlowOut(call, c, node, state, cc, _, _, _, t, ap, _) + } + + pragma[nomagic] + private predicate fwdFlow1Out(NodeEx node, FlowState state, Cc cc, Typ t0, Ap ap) { + exists(ApApprox apa | + fwdFlow1(node, state, cc, _, _, _, t0, _, ap, apa) and + PrevStage::callEdgeReturn(_, _, _, _, node, _, apa) ) } pragma[nomagic] predicate dataFlowTakenCallEdgeOut(DataFlowCall call, DataFlowCallable c) { exists(NodeEx node, FlowState state, Cc cc, Typ t, Ap ap | - fwdFlowOut(call, c, node, state, cc, _, _, _, t, ap, _) and - fwdFlow1(node, state, cc, _, _, _, t, _, ap, _) + dataFlowTakenCallEdgeOut0(call, c, node, state, cc, t, ap) and + fwdFlow1Out(node, state, cc, t, ap) ) } @@ -1635,6 +1761,12 @@ module MakeImpl { innerArgApa) } + private module FwdFlowThroughRestriction implements FwdFlowInInputSig { + predicate callRestriction = PrevStage::callMayFlowThroughRev/1; + + predicate parameterRestriction = PrevStage::parameterMayFlowThrough/2; + } + /** * Holds if an argument to `call` is reached in the flow covered by `fwdFlow` * and data might flow through the target callable and back out at `call`. @@ -1644,12 +1776,8 @@ module MakeImpl { DataFlowCall call, Cc cc, CcCall innerCc, ParamNodeOption summaryCtx, TypOption argT, ApOption argAp, ParamNodeEx p, Typ t, Ap ap ) { - exists(ApApprox apa | - fwdFlowIn(call, _, pragma[only_bind_into](p), _, cc, innerCc, summaryCtx, argT, argAp, - t, ap, pragma[only_bind_into](apa), _) and - PrevStage::parameterMayFlowThrough(p, apa) and - PrevStage::callMayFlowThroughRev(call) - ) + FwdFlowIn::fwdFlowIn(call, _, p, _, cc, innerCc, summaryCtx, + argT, argAp, t, ap, _, _) } pragma[nomagic] @@ -3860,6 +3988,8 @@ module MakeImpl { ) } + private predicate parameterCandProj(DataFlowCallable c) { parameterCand(c, _, _) } + pragma[nomagic] private predicate pathIntoCallable0( PathNodeMid mid, DataFlowCallable callable, ParameterPosition pos, FlowState state, @@ -3868,7 +3998,7 @@ module MakeImpl { exists(AccessPathApprox apa | pathIntoArg(mid, pragma[only_bind_into](pos), state, outercc, call, t, ap, pragma[only_bind_into](apa)) and - callable = resolveCall(call, outercc) and + callable = ResolveCall::resolveCall(call, outercc) and parameterCand(callable, pragma[only_bind_into](pos), pragma[only_bind_into](apa)) ) } @@ -4791,13 +4921,15 @@ module MakeImpl { ) } + private predicate anyCallable(DataFlowCallable c) { any() } + pragma[nomagic] private predicate partialPathIntoCallable0( PartialPathNodeFwd mid, DataFlowCallable callable, ParameterPosition pos, FlowState state, CallContext outercc, DataFlowCall call, DataFlowType t, PartialAccessPath ap ) { partialPathIntoArg(mid, pos, state, outercc, call, t, ap) and - callable = resolveCall(call, outercc) + callable = ResolveCall::resolveCall(call, outercc) } private predicate partialPathIntoCallable( diff --git a/shared/dataflow/codeql/dataflow/internal/DataFlowImplCommon.qll b/shared/dataflow/codeql/dataflow/internal/DataFlowImplCommon.qll index e234de2e913..ab0562e17cb 100644 --- a/shared/dataflow/codeql/dataflow/internal/DataFlowImplCommon.qll +++ b/shared/dataflow/codeql/dataflow/internal/DataFlowImplCommon.qll @@ -1374,6 +1374,7 @@ module MakeImplCommon { * Holds if the edge `call`-to-`c` is valid in the in-going direction in the * call context `cc`. */ + pragma[nomagic] predicate typeFlowValidEdgeIn(DataFlowCall call, DataFlowCallable c, boolean cc) { Input::relevantCallEdgeIn(call, c) and cc = [true, false] and @@ -1416,6 +1417,7 @@ module MakeImplCommon { /** * Holds if the edge `call`-to-`c` is valid in the out-going direction. */ + pragma[nomagic] predicate typeFlowValidEdgeOut(DataFlowCall call, DataFlowCallable c) { Input::relevantCallEdgeOut(call, c) and ( @@ -1779,15 +1781,32 @@ module MakeImplCommon { call = prunedViableImplInCallContextReverse(callable, cc) } - /** - * Resolves a call from `call` in `cc` to `result`. - */ - bindingset[call, cc] - DataFlowCallable resolveCall(DataFlowCall call, CallContext cc) { - result = prunedViableImplInCallContext(call, cc) - or - noPrunedViableImplInCallContext(call, cc) and - result = viableCallableExt(call) + signature predicate relevantResolveTargetSig(DataFlowCallable c); + + module ResolveCall { + pragma[nomagic] + private DataFlowCallable prunedRelevantViableImplInCallContext(DataFlowCall call, CallContext cc) { + result = prunedViableImplInCallContext(call, cc) and + relevantResolveTarget(result) + } + + pragma[nomagic] + private DataFlowCallable viableRelevantCallableExt(DataFlowCall call) { + result = viableCallableExt(call) and + relevantResolveTarget(result) + } + + /** + * Resolves a call from `call` in `cc` to `result`, where `result` is + * restricted by `relevantResolveTarget`. + */ + bindingset[call, cc] + DataFlowCallable resolveCall(DataFlowCall call, CallContext cc) { + result = prunedRelevantViableImplInCallContext(call, cc) + or + noPrunedViableImplInCallContext(call, cc) and + result = viableRelevantCallableExt(call) + } } /** An optional Boolean value. */