Merge pull request #12550 from erik-krogh/useNumberUtil

Java/Python: use Number.qll to parse hex numbers in regex parsing
This commit is contained in:
Erik Krogh Kristensen 2023-03-20 15:50:31 +01:00 коммит произвёл GitHub
Родитель 0f813ce2e8 ef498020c2
Коммит a9d40d39d9
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
4 изменённых файлов: 7 добавлений и 72 удалений

Просмотреть файл

@ -470,6 +470,8 @@ module Impl implements RegexTreeViewSig {
override string getPrimaryQLClass() { result = "RegExpAlt" }
}
private import codeql.util.Numbers as Numbers
/**
* An escaped regular expression term, that is, a regular expression
* term starting with a backslash, which is not a backreference.
@ -531,11 +533,7 @@ module Impl implements RegexTreeViewSig {
* Gets the unicode char for this escape.
* E.g. for `\u0061` this returns "a".
*/
private string getUnicode() {
exists(int codepoint | codepoint = sum(this.getHexValueFromUnicode(_)) |
result = codepoint.toUnicode()
)
}
private string getUnicode() { result = Numbers::parseHexInt(this.getHexString()).toUnicode() }
/** Gets the part of this escape that is a hexidecimal string */
private string getHexString() {
@ -547,18 +545,6 @@ module Impl implements RegexTreeViewSig {
then result = this.getText().substring(3, this.getText().length() - 1)
else result = this.getText().suffix(2) // \xhh
}
/**
* Gets int value for the `index`th char in the hex number of the unicode escape.
* E.g. for `\u0061` and `index = 2` this returns 96 (the number `6` interpreted as hex).
*/
private int getHexValueFromUnicode(int index) {
this.isUnicode() and
exists(string hex, string char | hex = this.getHexString() |
char = hex.charAt(index) and
result = 16.pow(hex.length() - index - 1) * toHex(char)
)
}
}
/**
@ -586,25 +572,6 @@ module Impl implements RegexTreeViewSig {
RegExpNonWordBoundary() { this.getChar() = "\\B" }
}
/**
* Gets the hex number for the `hex` char.
*/
private int toHex(string hex) {
result = [0 .. 9] and hex = result.toString()
or
result = 10 and hex = ["a", "A"]
or
result = 11 and hex = ["b", "B"]
or
result = 12 and hex = ["c", "C"]
or
result = 13 and hex = ["d", "D"]
or
result = 14 and hex = ["e", "E"]
or
result = 15 and hex = ["f", "F"]
}
/**
* A character class escape in a regular expression.
* That is, an escaped character that denotes multiple characters.

Просмотреть файл

@ -7,6 +7,7 @@ library: true
upgrades: upgrades
dependencies:
codeql/regex: ${workspace}
codeql/util: ${workspace}
codeql/tutorial: ${workspace}
dataExtensions:
- semmle/python/frameworks/**/model.yml

Просмотреть файл

@ -468,6 +468,8 @@ module Impl implements RegexTreeViewSig {
*/
class RegExpCharEscape = RegExpEscape;
private import codeql.util.Numbers as Numbers
/**
* An escaped regular expression term, that is, a regular expression
* term starting with a backslash, which is not a backreference.
@ -528,42 +530,8 @@ module Impl implements RegexTreeViewSig {
* E.g. for `\u0061` this returns "a".
*/
private string getUnicode() {
exists(int codepoint | codepoint = sum(this.getHexValueFromUnicode(_)) |
result = codepoint.toUnicode()
)
result = Numbers::parseHexInt(this.getText().suffix(2)).toUnicode()
}
/**
* Gets int value for the `index`th char in the hex number of the unicode escape.
* E.g. for `\u0061` and `index = 2` this returns 96 (the number `6` interpreted as hex).
*/
private int getHexValueFromUnicode(int index) {
this.isUnicode() and
exists(string hex, string char | hex = this.getText().suffix(2) |
char = hex.charAt(index) and
result = 16.pow(hex.length() - index - 1) * toHex(char)
)
}
}
/**
* Gets the hex number for the `hex` char.
*/
private int toHex(string hex) {
hex = [0 .. 9].toString() and
result = hex.toInt()
or
result = 10 and hex = ["a", "A"]
or
result = 11 and hex = ["b", "B"]
or
result = 12 and hex = ["c", "C"]
or
result = 13 and hex = ["d", "D"]
or
result = 14 and hex = ["e", "E"]
or
result = 15 and hex = ["f", "F"]
}
/**

Просмотреть файл

@ -6,7 +6,6 @@ groups:
dependencies:
codeql/python-all: ${workspace}
codeql/suite-helpers: ${workspace}
codeql/util: ${workspace}
suites: codeql-suites
extractor: python
defaultSuiteFile: codeql-suites/python-code-scanning.qls