зеркало из https://github.com/github/codeql.git
Python: Fix bad join in `syntactic_call_count`
On certain databases, the evaluation of this predicate was running out of memory due to the way the `count` aggregate was being used. Here's an example of the tuple counts involved: ``` Tuple counts for PointsToContext::syntactic_call_count#cf3039a0#ff#antijoin_rhs/1@d2199bb8 after 1m27s: 595518502 ~521250% {1} r1 = JOIN PointsToContext::syntactic_call_count#cf3039a0#ff#shared#3 WITH Flow::CallNode::getFunction#dispred#f0820431#ff_1#join_rhs ON FIRST 1 OUTPUT Lhs.1 'arg0' 26518709 ~111513% {1} r2 = JOIN PointsToContext::syntactic_call_count#cf3039a0#ff#shared#2 WITH Flow::CallNode::getFunction#dispred#f0820431#ff_1#join_rhs ON FIRST 1 OUTPUT Lhs.1 'arg0' 622037211 ~498045% {1} r3 = r1 UNION r2 return r3 ``` and a timing report that looked like this: ``` time | evals | max @ iter | predicate ------|-------|--------------|---------- 5m8s | | | PointsToContext::syntactic_call_count#cf3039a0#ff#shared#2@6d98d1nd 4m38s | | | PointsToContext::syntactic_call_count#cf3039a0#ff#count_range@f5df1do4 3m51s | | | PointsToContext::syntactic_call_count#cf3039a0#ff#shared#3@da3b4abf 1m58s | 7613 | 37ms @ 4609 | MRO::ClassListList::removedClassParts#f0820431#fffff#reorder_2_3_4_0_1@8155axyi 1m37s | 7613 | 33ms @ 3904 | MRO::ClassListList::bestMergeCandidate#f0820431#2#fff@8155a83w 1m27s | | | PointsToContext::syntactic_call_count#cf3039a0#ff#antijoin_rhs@d2199bb8 1m8s | 1825 | 63ms @ 404 | PointsTo::Expressions::equalityEvaluatesTo#741b54e2#fffff@8155aw7w 37.6s | | | PointsToContext::syntactic_call_count#cf3039a0#ff#join_rhs@e348fc1p ... ``` To make optimising this easier for the compiler, I moved the bodies of the `count` aggregate into their own helper predicates (with size linear in the number of `CallNode`s), and also factored out the many calls to `f.getName()`. The astute reader will notice that in writing this as a sum of `count`s rather than a count of a disjunction, the intersection (if it exists) will be counted twice, and so the semantics may be different. However, since `method_call` and `function_call` require `AttrNode` and `NameNode` functions respectively, and as these two types are disjoint, there is no intersection, and so the semantics should be preserved. After the change, the evaluation of `syntactic_call_count` now looks as follows: ``` Tuple counts for PointsToContext::syntactic_call_count#cf3039a0#ff/2@662dd8s0 after 216ms: 23960 ~0% {1} r1 = @py_scope#f AND NOT py_Functions_0#antijoin_rhs(Lhs.0 's') 23960 ~0% {2} r2 = SCAN r1 OUTPUT In.0 's', 0 276309 ~7% {2} r3 = SCAN @py_scope#f OUTPUT In.0 's', "__init__" 11763 ~0% {2} r4 = JOIN r3 WITH Scope::Scope::getName#dispred#f0820431#fb ON FIRST 2 OUTPUT Lhs.0 's', 1 35723 ~0% {2} r5 = r2 UNION r4 252349 ~0% {2} r6 = JOIN @py_scope#f WITH Function::Function::getName#dispred#f0820431#ff ON FIRST 1 OUTPUT Lhs.0 's', Rhs.1 240586 ~0% {2} r7 = SELECT r6 ON In.1 != "__init__" 131727 ~4% {2} r8 = r7 AND NOT project#PointsToContext::method_call#cf3039a0#ff(Lhs.1) 131727 ~0% {3} r9 = SCAN r8 OUTPUT In.1, In.0 's', 0 240586 ~0% {2} r10 = SCAN r7 OUTPUT In.1, In.0 's' 108859 ~0% {3} r11 = JOIN r10 WITH PointsToContext::syntactic_call_count#cf3039a0#ff#join_rhs ON FIRST 1 OUTPUT Lhs.0, Lhs.1 's', Rhs.1 240586 ~0% {3} r12 = r9 UNION r11 24100 ~0% {2} r13 = JOIN r12 WITH PointsToContext::syntactic_call_count#cf3039a0#ff#join_rhs#1 ON FIRST 1 OUTPUT Lhs.1 's', (Rhs.1 + Lhs.2) 240586 ~0% {2} r14 = SELECT r6 ON In.1 != "__init__" 131727 ~4% {2} r15 = r14 AND NOT project#PointsToContext::method_call#cf3039a0#ff(Lhs.1) 131727 ~0% {3} r16 = SCAN r15 OUTPUT In.0 's', In.1, 0 108859 ~4% {3} r17 = JOIN r10 WITH PointsToContext::syntactic_call_count#cf3039a0#ff#join_rhs ON FIRST 1 OUTPUT Lhs.1 's', Lhs.0, Rhs.1 240586 ~4% {3} r18 = r16 UNION r17 216486 ~2% {3} r19 = r18 AND NOT project#PointsToContext::function_call#cf3039a0#ff(Lhs.1) 216486 ~0% {2} r20 = SCAN r19 OUTPUT In.0 's', (0 + In.2) 240586 ~0% {2} r21 = r13 UNION r20 276309 ~0% {2} r22 = r5 UNION r21 return r22 ```
This commit is contained in:
Родитель
410167671f
Коммит
bdd771989f
|
@ -23,13 +23,8 @@ private int max_context_cost() {
|
|||
}
|
||||
|
||||
private int syntactic_call_count(Scope s) {
|
||||
exists(Function f | f = s and f.getName() != "__init__" |
|
||||
result =
|
||||
count(CallNode call |
|
||||
call.getFunction().(NameNode).getId() = f.getName()
|
||||
or
|
||||
call.getFunction().(AttrNode).getName() = f.getName()
|
||||
)
|
||||
exists(Function f, string name | f = s and name = f.getName() and name != "__init__" |
|
||||
result = count(function_call(name)) + count(method_call(name))
|
||||
)
|
||||
or
|
||||
s.getName() = "__init__" and result = 1
|
||||
|
@ -37,6 +32,12 @@ private int syntactic_call_count(Scope s) {
|
|||
not s instanceof Function and result = 0
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private CallNode function_call(string name) { result.getFunction().(NameNode).getId() = name }
|
||||
|
||||
pragma[nomagic]
|
||||
private CallNode method_call(string name) { result.getFunction().(AttrNode).getName() = name }
|
||||
|
||||
private int incoming_call_cost(Scope s) {
|
||||
/*
|
||||
* Syntactic call count will often be a considerable overestimate
|
||||
|
|
Загрузка…
Ссылка в новой задаче