From 430a433969d01010c5b12aae4698bcb6d4de8377 Mon Sep 17 00:00:00 2001 From: Roland Shoemaker Date: Wed, 12 Oct 2022 12:38:14 -0700 Subject: [PATCH] html: properly handle exclamation marks in comments Properly handle the case where HTML comments begin with exclamation marks and have no other content, i.e. "". Previously these comments would cause the tokenizer to consider everything following to also be considered part of the comment. Fixes golang/go#37771 Change-Id: I78ea310debc3846f145d62cba017055abc7fa4e0 Reviewed-on: https://go-review.googlesource.com/c/net/+/442496 Run-TryBot: Roland Shoemaker TryBot-Result: Gopher Robot Reviewed-by: Damien Neil --- html/token.go | 8 ++++++-- html/token_test.go | 45 ++++++++++++++++++++++++++++----------------- 2 files changed, 34 insertions(+), 19 deletions(-) diff --git a/html/token.go b/html/token.go index be3c7541..ae24a6fd 100644 --- a/html/token.go +++ b/html/token.go @@ -605,7 +605,10 @@ func (z *Tokenizer) readComment() { z.data.end = z.data.start } }() - for dashCount := 2; ; { + + var dashCount int + beginning := true + for { c := z.readByte() if z.err != nil { // Ignore up to two dashes at EOF. @@ -620,7 +623,7 @@ func (z *Tokenizer) readComment() { dashCount++ continue case '>': - if dashCount >= 2 { + if dashCount >= 2 || beginning { z.data.end = z.raw.end - len("-->") return } @@ -638,6 +641,7 @@ func (z *Tokenizer) readComment() { } } dashCount = 0 + beginning = false } } diff --git a/html/token_test.go b/html/token_test.go index ee33caf8..0b9a9470 100644 --- a/html/token_test.go +++ b/html/token_test.go @@ -366,6 +366,16 @@ var tokenTests = []tokenTest{ "az", "a$$z", }, + { + "comment14", + "az", + "a$$z", + }, + { + "comment15", + "az", + "a$$z", + }, // An attribute with a backslash. { "backslash", @@ -456,26 +466,27 @@ var tokenTests = []tokenTest{ } func TestTokenizer(t *testing.T) { -loop: for _, tt := range tokenTests { - z := NewTokenizer(strings.NewReader(tt.html)) - if tt.golden != "" { - for i, s := range strings.Split(tt.golden, "$") { - if z.Next() == ErrorToken { - t.Errorf("%s token %d: want %q got error %v", tt.desc, i, s, z.Err()) - continue loop - } - actual := z.Token().String() - if s != actual { - t.Errorf("%s token %d: want %q got %q", tt.desc, i, s, actual) - continue loop + t.Run(tt.desc, func(t *testing.T) { + z := NewTokenizer(strings.NewReader(tt.html)) + if tt.golden != "" { + for i, s := range strings.Split(tt.golden, "$") { + if z.Next() == ErrorToken { + t.Errorf("%s token %d: want %q got error %v", tt.desc, i, s, z.Err()) + return + } + actual := z.Token().String() + if s != actual { + t.Errorf("%s token %d: want %q got %q", tt.desc, i, s, actual) + return + } } } - } - z.Next() - if z.Err() != io.EOF { - t.Errorf("%s: want EOF got %q", tt.desc, z.Err()) - } + z.Next() + if z.Err() != io.EOF { + t.Errorf("%s: want EOF got %q", tt.desc, z.Err()) + } + }) } }