From 430a433969d01010c5b12aae4698bcb6d4de8377 Mon Sep 17 00:00:00 2001
From: Roland Shoemaker <roland@golang.org>
Date: Wed, 12 Oct 2022 12:38:14 -0700
Subject: [PATCH] html: properly handle exclamation marks in comments

Properly handle the case where HTML comments begin with exclamation
marks and have no other content, i.e. "<!--!-->". Previously these
comments would cause the tokenizer to consider everything following to
also be considered part of the comment.

Fixes golang/go#37771

Change-Id: I78ea310debc3846f145d62cba017055abc7fa4e0
Reviewed-on: https://go-review.googlesource.com/c/net/+/442496
Run-TryBot: Roland Shoemaker <roland@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Damien Neil <dneil@google.com>
---
 html/token.go      |  8 ++++++--
 html/token_test.go | 45 ++++++++++++++++++++++++++++-----------------
 2 files changed, 34 insertions(+), 19 deletions(-)

diff --git a/html/token.go b/html/token.go
index be3c7541..ae24a6fd 100644
--- a/html/token.go
+++ b/html/token.go
@@ -605,7 +605,10 @@ func (z *Tokenizer) readComment() {
 			z.data.end = z.data.start
 		}
 	}()
-	for dashCount := 2; ; {
+
+	var dashCount int
+	beginning := true
+	for {
 		c := z.readByte()
 		if z.err != nil {
 			// Ignore up to two dashes at EOF.
@@ -620,7 +623,7 @@ func (z *Tokenizer) readComment() {
 			dashCount++
 			continue
 		case '>':
-			if dashCount >= 2 {
+			if dashCount >= 2 || beginning {
 				z.data.end = z.raw.end - len("-->")
 				return
 			}
@@ -638,6 +641,7 @@ func (z *Tokenizer) readComment() {
 			}
 		}
 		dashCount = 0
+		beginning = false
 	}
 }
 
diff --git a/html/token_test.go b/html/token_test.go
index ee33caf8..0b9a9470 100644
--- a/html/token_test.go
+++ b/html/token_test.go
@@ -366,6 +366,16 @@ var tokenTests = []tokenTest{
 		"a<!--x--!>z",
 		"a$<!--x-->$z",
 	},
+	{
+		"comment14",
+		"a<!--!-->z",
+		"a$<!--!-->$z",
+	},
+	{
+		"comment15",
+		"a<!-- !-->z",
+		"a$<!-- !-->$z",
+	},
 	// An attribute with a backslash.
 	{
 		"backslash",
@@ -456,26 +466,27 @@ var tokenTests = []tokenTest{
 }
 
 func TestTokenizer(t *testing.T) {
-loop:
 	for _, tt := range tokenTests {
-		z := NewTokenizer(strings.NewReader(tt.html))
-		if tt.golden != "" {
-			for i, s := range strings.Split(tt.golden, "$") {
-				if z.Next() == ErrorToken {
-					t.Errorf("%s token %d: want %q got error %v", tt.desc, i, s, z.Err())
-					continue loop
-				}
-				actual := z.Token().String()
-				if s != actual {
-					t.Errorf("%s token %d: want %q got %q", tt.desc, i, s, actual)
-					continue loop
+		t.Run(tt.desc, func(t *testing.T) {
+			z := NewTokenizer(strings.NewReader(tt.html))
+			if tt.golden != "" {
+				for i, s := range strings.Split(tt.golden, "$") {
+					if z.Next() == ErrorToken {
+						t.Errorf("%s token %d: want %q got error %v", tt.desc, i, s, z.Err())
+						return
+					}
+					actual := z.Token().String()
+					if s != actual {
+						t.Errorf("%s token %d: want %q got %q", tt.desc, i, s, actual)
+						return
+					}
 				}
 			}
-		}
-		z.Next()
-		if z.Err() != io.EOF {
-			t.Errorf("%s: want EOF got %q", tt.desc, z.Err())
-		}
+			z.Next()
+			if z.Err() != io.EOF {
+				t.Errorf("%s: want EOF got %q", tt.desc, z.Err())
+			}
+		})
 	}
 }