fix getCanonicalCharClass in NfaUtils

This commit is contained in:
erik-krogh 2022-11-01 21:35:07 +01:00
Родитель 78e35e2f29
Коммит 15416a9c86
Не найден ключ, соответствующий данной подписи
3 изменённых файлов: 25 добавлений и 20 удалений

Просмотреть файл

@ -129,19 +129,20 @@ private predicate isCanonicalTerm(RelevantRegExpTerm term, string str) {
min(RelevantRegExpTerm t, Location loc, File file |
loc = t.getLocation() and
file = t.getFile() and
str = t.getRawValue() + "|" + getCanonicalizationFlags(t.getRootTerm())
str = getCanonicalizationString(t)
|
t order by t.getFile().getRelativePath(), loc.getStartLine(), loc.getStartColumn()
)
}
/**
* Gets a string representation of the flags used with the regular expression.
* Only the flags that are relevant for the canonicalization are included.
* Gets a string representation of `term` that is used for canonicalization.
*/
string getCanonicalizationFlags(RegExpTerm root) {
root.isRootTerm() and
(if RegExpFlags::isIgnoreCase(root) then result = "i" else result = "")
private string getCanonicalizationString(RelevantRegExpTerm term) {
exists(string ignoreCase |
(if RegExpFlags::isIgnoreCase(term.getRootTerm()) then ignoreCase = "i" else ignoreCase = "") and
result = term.getRawValue() + "|" + ignoreCase
)
}
/**
@ -186,12 +187,19 @@ private newtype TInputSymbol =
Epsilon()
/**
* Gets the canonical CharClass for `term`.
* Gets the the CharClass corresponding to the canonical representative `term`.
*/
CharClass getCanonicalCharClass(RegExpTerm term) {
private CharClass getCharClassForCanonicalTerm(RegExpTerm term) {
exists(string str | isCanonicalTerm(term, str) | result = CharClass(str))
}
/**
* Gets a char class that represents `term`, even when `term` is not the canonical representative.
*/
CharacterClass getCanonicalCharClass(RegExpTerm term) {
exists(string str | str = getCanonicalizationString(term) and result = CharClass(str))
}
/**
* Holds if `a` and `b` are input symbols from the same regexp.
*/
@ -284,7 +292,7 @@ private module CharacterClasses {
*/
pragma[noinline]
predicate hasChildThatMatchesIgnoringCasingFlags(RegExpCharacterClass cc, string char) {
exists(getCanonicalCharClass(cc)) and
exists(getCharClassForCanonicalTerm(cc)) and
exists(RegExpTerm child | child = cc.getAChild() |
char = child.(RegexpCharacterConstant).getValue()
or
@ -387,7 +395,7 @@ private module CharacterClasses {
private class PositiveCharacterClass extends CharacterClass {
RegExpCharacterClass cc;
PositiveCharacterClass() { this = getCanonicalCharClass(cc) and not cc.isInverted() }
PositiveCharacterClass() { this = getCharClassForCanonicalTerm(cc) and not cc.isInverted() }
override string getARelevantChar() { result = caseNormalize(getAMentionedChar(cc), cc) }
@ -400,7 +408,7 @@ private module CharacterClasses {
private class InvertedCharacterClass extends CharacterClass {
RegExpCharacterClass cc;
InvertedCharacterClass() { this = getCanonicalCharClass(cc) and cc.isInverted() }
InvertedCharacterClass() { this = getCharClassForCanonicalTerm(cc) and cc.isInverted() }
override string getARelevantChar() {
result = nextChar(caseNormalize(getAMentionedChar(cc), cc)) or
@ -435,7 +443,7 @@ private module CharacterClasses {
PositiveCharacterClassEscape() {
isEscapeClass(cc, charClass) and
this = getCanonicalCharClass(cc) and
this = getCharClassForCanonicalTerm(cc) and
charClass = ["d", "s", "w"]
}
@ -475,7 +483,7 @@ private module CharacterClasses {
NegativeCharacterClassEscape() {
exists(RegExpTerm cc |
isEscapeClass(cc, charClass) and
this = getCanonicalCharClass(cc) and
this = getCharClassForCanonicalTerm(cc) and
charClass = ["D", "S", "W"]
)
}
@ -652,17 +660,13 @@ predicate delta(State q1, EdgeLabel lbl, State q2) {
cc.isUniversalClass() and q1 = before(cc) and lbl = Any() and q2 = after(cc)
or
q1 = before(cc) and
lbl =
CharacterClasses::normalize(CharClass(cc.getRawValue() + "|" +
getCanonicalizationFlags(cc.getRootTerm()))) and
lbl = CharacterClasses::normalize(CharClass(getCanonicalizationString(cc))) and
q2 = after(cc)
)
or
exists(RegExpTerm cc | isEscapeClass(cc, _) |
q1 = before(cc) and
lbl =
CharacterClasses::normalize(CharClass(cc.getRawValue() + "|" +
getCanonicalizationFlags(cc.getRootTerm()))) and
lbl = CharacterClasses::normalize(CharClass(getCanonicalizationString(cc))) and
q2 = after(cc)
)
or

Просмотреть файл

@ -38,3 +38,4 @@
| tst-multi-character-sanitization.js:144:13:144:91 | content ... /g, '') | This string may still contain $@, which may cause an HTML element injection vulnerability. | tst-multi-character-sanitization.js:144:30:144:30 | < | <script |
| tst-multi-character-sanitization.js:145:13:145:90 | content ... /g, '') | This string may still contain $@, which may cause an HTML element injection vulnerability. | tst-multi-character-sanitization.js:145:30:145:30 | < | <script |
| tst-multi-character-sanitization.js:148:3:148:99 | n.clone ... gi, '') | This string may still contain $@, which may cause an HTML element injection vulnerability. | tst-multi-character-sanitization.js:148:41:148:41 | < | <script |
| tst-multi-character-sanitization.js:152:3:152:99 | n.clone ... gi, '') | This string may still contain $@, which may cause an HTML element injection vulnerability. | tst-multi-character-sanitization.js:152:41:152:41 | < | <script |

Просмотреть файл

@ -149,7 +149,7 @@
o.push({specified : 1, nodeName : a});
});
n.cloneNode(false).outerHTML.replace(/<\/?[\w:\-]+ ?|=[\"][^\"]+\"|=\'[^\']+\'|=[\w\-]+|>/gi, '').replace(/[\w:\-]+/gi, function(a) { // NOT OK - but not flagged
n.cloneNode(false).outerHTML.replace(/<\/?[\w:\-]+ ?|=[\"][^\"]+\"|=\'[^\']+\'|=[\w\-]+|>/gi, '').replace(/[\w:\-]+/gi, function(a) { // NOT OK
o.push({specified : 1, nodeName : a});
});
});