html: properly handle exclamation marks in comments

Properly handle the case where HTML comments begin with exclamation
marks and have no other content, i.e. "<!--!-->". Previously these
comments would cause the tokenizer to consider everything following to
also be considered part of the comment.

Fixes golang/go#37771

Change-Id: I78ea310debc3846f145d62cba017055abc7fa4e0
Reviewed-on: https://go-review.googlesource.com/c/net/+/442496
Run-TryBot: Roland Shoemaker <roland@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Damien Neil <dneil@google.com>
This commit is contained in:
Roland Shoemaker 2022-10-12 12:38:14 -07:00
Родитель da05058a03
Коммит 430a433969
2 изменённых файлов: 34 добавлений и 19 удалений

Просмотреть файл

@ -605,7 +605,10 @@ func (z *Tokenizer) readComment() {
z.data.end = z.data.start
}
}()
for dashCount := 2; ; {
var dashCount int
beginning := true
for {
c := z.readByte()
if z.err != nil {
// Ignore up to two dashes at EOF.
@ -620,7 +623,7 @@ func (z *Tokenizer) readComment() {
dashCount++
continue
case '>':
if dashCount >= 2 {
if dashCount >= 2 || beginning {
z.data.end = z.raw.end - len("-->")
return
}
@ -638,6 +641,7 @@ func (z *Tokenizer) readComment() {
}
}
dashCount = 0
beginning = false
}
}

Просмотреть файл

@ -366,6 +366,16 @@ var tokenTests = []tokenTest{
"a<!--x--!>z",
"a$<!--x-->$z",
},
{
"comment14",
"a<!--!-->z",
"a$<!--!-->$z",
},
{
"comment15",
"a<!-- !-->z",
"a$<!-- !-->$z",
},
// An attribute with a backslash.
{
"backslash",
@ -456,26 +466,27 @@ var tokenTests = []tokenTest{
}
func TestTokenizer(t *testing.T) {
loop:
for _, tt := range tokenTests {
z := NewTokenizer(strings.NewReader(tt.html))
if tt.golden != "" {
for i, s := range strings.Split(tt.golden, "$") {
if z.Next() == ErrorToken {
t.Errorf("%s token %d: want %q got error %v", tt.desc, i, s, z.Err())
continue loop
}
actual := z.Token().String()
if s != actual {
t.Errorf("%s token %d: want %q got %q", tt.desc, i, s, actual)
continue loop
t.Run(tt.desc, func(t *testing.T) {
z := NewTokenizer(strings.NewReader(tt.html))
if tt.golden != "" {
for i, s := range strings.Split(tt.golden, "$") {
if z.Next() == ErrorToken {
t.Errorf("%s token %d: want %q got error %v", tt.desc, i, s, z.Err())
return
}
actual := z.Token().String()
if s != actual {
t.Errorf("%s token %d: want %q got %q", tt.desc, i, s, actual)
return
}
}
}
}
z.Next()
if z.Err() != io.EOF {
t.Errorf("%s: want EOF got %q", tt.desc, z.Err())
}
z.Next()
if z.Err() != io.EOF {
t.Errorf("%s: want EOF got %q", tt.desc, z.Err())
}
})
}
}