Python regex: Fix handling of character sets where first character in set is '['.

This commit is contained in:
Mark Shannon 2019-06-26 10:55:47 +01:00
Родитель 927d72414b
Коммит 347e3f3bd0
8 изменённых файлов: 34 добавлений и 3 удалений

Просмотреть файл

@ -68,7 +68,8 @@ abstract class RegexString extends Expr {
/** Whether there is a character class, between start (inclusive) and end (exclusive) */
predicate charSet(int start, int end) {
exists(int inner_start, int inner_end |
this.char_set_start(start, inner_start) |
this.char_set_start(start, inner_start) and
not this.char_set_start(_, start) |
end = inner_end + 1 and inner_end > inner_start and
this.nonEscapedCharAt(inner_end) = "]" and
not exists(int mid | this.nonEscapedCharAt(mid) = "]" |

Просмотреть файл

@ -56,6 +56,11 @@
| \\A[+-]?\\d+ | 3 | 4 |
| \\A[+-]?\\d+ | 4 | 5 |
| \\A[+-]?\\d+ | 7 | 9 |
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | 0 | 2 |
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | 12 | 13 |
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | 16 | 18 |
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | 18 | 20 |
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | 30 | 31 |
| \\\|\\[\\][123]\|\\{\\} | 0 | 2 |
| \\\|\\[\\][123]\|\\{\\} | 2 | 4 |
| \\\|\\[\\][123]\|\\{\\} | 4 | 6 |

Просмотреть файл

@ -45,6 +45,9 @@
| \\A[+-]?\\d+ | first | 0 | 2 |
| \\A[+-]?\\d+ | last | 7 | 9 |
| \\A[+-]?\\d+ | last | 7 | 10 |
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | first | 0 | 2 |
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | last | 28 | 32 |
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | last | 28 | 33 |
| \\\|\\[\\][123]\|\\{\\} | first | 0 | 2 |
| \\\|\\[\\][123]\|\\{\\} | first | 12 | 14 |
| \\\|\\[\\][123]\|\\{\\} | last | 6 | 11 |

Просмотреть файл

@ -10,7 +10,9 @@
| (?P<name>[\\w]+)\| | 0 | 15 | (?P<name>[\\w]+) | 9 | 14 | [\\w]+ |
| (?m)^(?!$) | 5 | 10 | (?!$) | 8 | 9 | $ |
| (\\033\|~{) | 0 | 9 | (\\033\|~{) | 1 | 8 | \\033\|~{ |
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | 2 | 16 | (?P<txt>[^[]*) | 10 | 15 | [^[]* |
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | 20 | 34 | (?P<uri>[^)]*) | 28 | 33 | [^)]* |
| ^(^y\|^z)(u$\|v$)$ | 1 | 8 | (^y\|^z) | 2 | 7 | ^y\|^z |
| ^(^y\|^z)(u$\|v$)$ | 8 | 15 | (u$\|v$) | 9 | 14 | u$\|v$ |
| ^[A-Z_]+$(?<!not-this) | 9 | 22 | (?<!not-this) | 13 | 21 | not-this |
| x\|(?<!\\w)l | 2 | 9 | (?<!\\w) | 6 | 8 | \\w |
| x\|(?<!\\w)l | 2 | 9 | (?<!\\w) | 6 | 8 | \\w |

Просмотреть файл

@ -6,7 +6,9 @@
| (?P<name>[\\w]+)\| | 9 | 14 | false |
| \\A[+-]?\\d+ | 2 | 7 | true |
| \\A[+-]?\\d+ | 7 | 10 | false |
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | 10 | 15 | true |
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | 28 | 33 | true |
| ^[A-Z_]+$(?<!not-this) | 1 | 8 | false |
| ax{01,3} | 1 | 8 | false |
| ax{3,} | 1 | 6 | false |
| ax{,3} | 1 | 6 | true |
| ax{,3} | 1 | 6 | true |

Просмотреть файл

@ -121,6 +121,18 @@
| \\A[+-]?\\d+ | qualified | 2 | 7 |
| \\A[+-]?\\d+ | qualified | 7 | 10 |
| \\A[+-]?\\d+ | sequence | 0 | 10 |
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | char | 0 | 2 |
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | char | 12 | 13 |
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | char | 16 | 18 |
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | char | 18 | 20 |
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | char | 30 | 31 |
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | char-set | 10 | 14 |
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | char-set | 28 | 32 |
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | non-empty group | 2 | 16 |
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | non-empty group | 20 | 34 |
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | qualified | 10 | 15 |
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | qualified | 28 | 33 |
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | sequence | 0 | 34 |
| \\\|\\[\\][123]\|\\{\\} | char | 0 | 2 |
| \\\|\\[\\][123]\|\\{\\} | char | 2 | 4 |
| \\\|\\[\\][123]\|\\{\\} | char | 4 | 6 |

Просмотреть файл

@ -57,3 +57,6 @@ re.compile(r'x|')
#Named group with caret and empty choice.
re.compile(r'(?:(?P<n1>^(?:|x)))')
#Misparsed on LGTM
re.compile(r"\[(?P<txt>[^[]*)\]\((?P<uri>[^)]*)")

Просмотреть файл

@ -136,3 +136,6 @@ ODASA_6786 = re.compile(VERBOSE_REGEX, re.VERBOSE)
#Named group with caret and empty choice.
re.compile(r'(?:(?P<n1>^(?:|x)))')
#Potentially mis-parsed character set
re.compile(r"\[(?P<txt>[^[]*)\]\((?P<uri>[^)]*)")