Python: Fix bad join in `syntactic_call_count`

On certain databases, the evaluation of this predicate was running out
of memory due to the way the `count` aggregate was being used. Here's
an example of the tuple counts involved:

```
Tuple counts for PointsToContext::syntactic_call_count#cf3039a0#ff#antijoin_rhs/1@d2199bb8 after 1m27s:
595518502 ~521250%     {1} r1 = JOIN PointsToContext::syntactic_call_count#cf3039a0#ff#shared#3 WITH Flow::CallNode::getFunction#dispred#f0820431#ff_1#join_rhs ON FIRST 1 OUTPUT Lhs.1 'arg0'

26518709  ~111513%     {1} r2 = JOIN PointsToContext::syntactic_call_count#cf3039a0#ff#shared#2 WITH Flow::CallNode::getFunction#dispred#f0820431#ff_1#join_rhs ON FIRST 1 OUTPUT Lhs.1 'arg0'

622037211 ~498045%     {1} r3 = r1 UNION r2
                        return r3
```

and a timing report that looked like this:
```
time  | evals |   max @ iter | predicate
------|-------|--------------|----------
 5m8s |       |              | PointsToContext::syntactic_call_count#cf3039a0#ff#shared#2@6d98d1nd
4m38s |       |              | PointsToContext::syntactic_call_count#cf3039a0#ff#count_range@f5df1do4
3m51s |       |              | PointsToContext::syntactic_call_count#cf3039a0#ff#shared#3@da3b4abf
1m58s |  7613 |  37ms @ 4609 | MRO::ClassListList::removedClassParts#f0820431#fffff#reorder_2_3_4_0_1@8155axyi
1m37s |  7613 |  33ms @ 3904 | MRO::ClassListList::bestMergeCandidate#f0820431#2#fff@8155a83w
1m27s |       |              | PointsToContext::syntactic_call_count#cf3039a0#ff#antijoin_rhs@d2199bb8
 1m8s |  1825 |  63ms @ 404  | PointsTo::Expressions::equalityEvaluatesTo#741b54e2#fffff@8155aw7w
37.6s |       |              | PointsToContext::syntactic_call_count#cf3039a0#ff#join_rhs@e348fc1p
...
```

To make optimising this easier for the compiler, I moved the bodies of
the `count` aggregate into their own helper predicates (with size
linear in the number of `CallNode`s), and also factored out the many
calls to `f.getName()`.

The astute reader will notice that in writing this as a sum of `count`s
rather than a count of a disjunction, the intersection (if it exists)
will be counted twice, and so the semantics may be different. However,
since `method_call` and `function_call` require `AttrNode` and
`NameNode` functions respectively, and as these two types are disjoint,
there is no intersection, and so the semantics should be preserved.

After the change, the evaluation of `syntactic_call_count` now looks as
follows:
```
Tuple counts for PointsToContext::syntactic_call_count#cf3039a0#ff/2@662dd8s0 after 216ms:
23960  ~0%     {1} r1 = @py_scope#f AND NOT py_Functions_0#antijoin_rhs(Lhs.0 's')
23960  ~0%     {2} r2 = SCAN r1 OUTPUT In.0 's', 0

276309 ~7%     {2} r3 = SCAN @py_scope#f OUTPUT In.0 's', "__init__"
11763  ~0%     {2} r4 = JOIN r3 WITH Scope::Scope::getName#dispred#f0820431#fb ON FIRST 2 OUTPUT Lhs.0 's', 1

35723  ~0%     {2} r5 = r2 UNION r4

252349 ~0%     {2} r6 = JOIN @py_scope#f WITH Function::Function::getName#dispred#f0820431#ff ON FIRST 1 OUTPUT Lhs.0 's', Rhs.1

240586 ~0%     {2} r7 = SELECT r6 ON In.1 != "__init__"

131727 ~4%     {2} r8 = r7 AND NOT project#PointsToContext::method_call#cf3039a0#ff(Lhs.1)
131727 ~0%     {3} r9 = SCAN r8 OUTPUT In.1, In.0 's', 0

240586 ~0%     {2} r10 = SCAN r7 OUTPUT In.1, In.0 's'

108859 ~0%     {3} r11 = JOIN r10 WITH PointsToContext::syntactic_call_count#cf3039a0#ff#join_rhs ON FIRST 1 OUTPUT Lhs.0, Lhs.1 's', Rhs.1

240586 ~0%     {3} r12 = r9 UNION r11
24100  ~0%     {2} r13 = JOIN r12 WITH PointsToContext::syntactic_call_count#cf3039a0#ff#join_rhs#1 ON FIRST 1 OUTPUT Lhs.1 's', (Rhs.1 + Lhs.2)

240586 ~0%     {2} r14 = SELECT r6 ON In.1 != "__init__"
131727 ~4%     {2} r15 = r14 AND NOT project#PointsToContext::method_call#cf3039a0#ff(Lhs.1)
131727 ~0%     {3} r16 = SCAN r15 OUTPUT In.0 's', In.1, 0

108859 ~4%     {3} r17 = JOIN r10 WITH PointsToContext::syntactic_call_count#cf3039a0#ff#join_rhs ON FIRST 1 OUTPUT Lhs.1 's', Lhs.0, Rhs.1

240586 ~4%     {3} r18 = r16 UNION r17
216486 ~2%     {3} r19 = r18 AND NOT project#PointsToContext::function_call#cf3039a0#ff(Lhs.1)
216486 ~0%     {2} r20 = SCAN r19 OUTPUT In.0 's', (0 + In.2)

240586 ~0%     {2} r21 = r13 UNION r20
276309 ~0%     {2} r22 = r5 UNION r21
                return r22
```
This commit is contained in:
Taus 2022-07-18 13:58:00 +00:00 коммит произвёл GitHub
Родитель 410167671f
Коммит bdd771989f
1 изменённых файлов: 8 добавлений и 7 удалений

Просмотреть файл

@ -23,13 +23,8 @@ private int max_context_cost() {
}
private int syntactic_call_count(Scope s) {
exists(Function f | f = s and f.getName() != "__init__" |
result =
count(CallNode call |
call.getFunction().(NameNode).getId() = f.getName()
or
call.getFunction().(AttrNode).getName() = f.getName()
)
exists(Function f, string name | f = s and name = f.getName() and name != "__init__" |
result = count(function_call(name)) + count(method_call(name))
)
or
s.getName() = "__init__" and result = 1
@ -37,6 +32,12 @@ private int syntactic_call_count(Scope s) {
not s instanceof Function and result = 0
}
pragma[nomagic]
private CallNode function_call(string name) { result.getFunction().(NameNode).getId() = name }
pragma[nomagic]
private CallNode method_call(string name) { result.getFunction().(AttrNode).getName() = name }
private int incoming_call_cost(Scope s) {
/*
* Syntactic call count will often be a considerable overestimate