* Support code.google.com

* Support LINK_TYPE_GOOGLE_DOCS

* Support LINK_TYPE_MOZILLA_BUG and LINK_TYPE_WEBKIT_BUG

* Support LINK_TYPE_SPECS

* Fix extract url ending with slash

* Update specs feature link title

* Fix

* Remove flaky test

* Fix typo in regex
This commit is contained in:
Ping 2023-08-22 09:37:53 +08:00 коммит произвёл GitHub
Родитель ccdf6ee280
Коммит 17f9886d06
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
3 изменённых файлов: 180 добавлений и 41 удалений

Просмотреть файл

@ -5,6 +5,10 @@ const LINK_TYPE_GITHUB_ISSUE = 'github_issue';
const LINK_TYPE_GITHUB_PULL_REQUEST = 'github_pull_request';
const LINK_TYPE_GITHUB_MARKDOWN = 'github_markdown';
const LINK_TYPE_MDN_DOCS = 'mdn_docs';
const LINK_TYPE_GOOGLE_DOCS = 'google_docs';
const LINK_TYPE_MOZILLA_BUG = 'mozilla_bug';
const LINK_TYPE_WEBKIT_BUG = 'webkit_bug';
const LINK_TYPE_SPECS = 'specs';
function _formatLongText(text, maxLength = 50) {
if (text.length > maxLength) {
@ -214,16 +218,11 @@ function enhanceGithubMarkdownLink(featureLink, text) {
</a>`;
}
function enhanceMDNDocsLink(featureLink, text) {
function _enhanceLinkWithTitleAndDescription(featureLink, iconUrl) {
const information = featureLink.information;
const title = information.title;
const description = information.description;
if (!text) {
text = title;
}
function renderTooltipContent() {
return html`<div class="feature-link-tooltip">
${title && html`
@ -244,13 +243,82 @@ function enhanceMDNDocsLink(featureLink, text) {
<sl-tooltip style="--sl-tooltip-arrow-size: 0;--max-width: 50vw;">
<div slot="content">${renderTooltipContent()}</div>
<sl-tag>
<img src="https://developer.mozilla.org/favicon-48x48.png" alt="icon" class="icon" />
<img src="${iconUrl}" alt="icon" class="icon" />
${_formatLongText(title)}
</sl-tag>
</sl-tooltip>
</a>`;
}
function enhanceSpecsLink(featureLink) {
const url = featureLink.url;
const iconUrl = `https://www.google.com/s2/favicons?domain_url=${url}`;
const hashtag = url.split('#')[1];
const information = featureLink.information;
const title = information.title;
const description = information.description;
function renderTooltipContent() {
return html`<div class="feature-link-tooltip">
${title && html`
<div>
<strong>Title:</strong>
<span>${title}</span>
</div>
`}
${description && html`
<div>
<strong>Description:</strong>
<span>${description}</span>
</div>
`}
</div>
${hashtag && html`
<div>
<strong>Hashtag:</strong>
<span>#${hashtag}</span>
`}
</div>`;
}
return html`<a class="feature-link" href="${featureLink.url}" target="_blank" rel="noopener noreferrer">
<sl-tooltip style="--sl-tooltip-arrow-size: 0;--max-width: 50vw;">
<div slot="content">${renderTooltipContent()}</div>
<sl-tag>
<img src="${iconUrl}" alt="icon" class="icon" />
Spec: ${_formatLongText(title)}
</sl-tag>
</sl-tooltip>
</a>`;
}
function enhanceMDNDocsLink(featureLink) {
return _enhanceLinkWithTitleAndDescription(featureLink, 'https://developer.mozilla.org/favicon-48x48.png');
}
function enhanceMozillaBugLink(featureLink) {
return _enhanceLinkWithTitleAndDescription(featureLink, 'https://bugzilla.mozilla.org/favicon.ico');
}
function enhanceWebKitBugLink(featureLink) {
return _enhanceLinkWithTitleAndDescription(featureLink, 'https://bugs.webkit.org/images/favicon.ico');
}
function enhanceGoogleDocsLink(featureLink) {
const url = featureLink.url;
const type = url.split('/')[3];
let iconUrl = 'https://ssl.gstatic.com/docs/documents/images/kix-favicon7.ico';
if (type === 'spreadsheets') {
iconUrl = 'https://ssl.gstatic.com/docs/spreadsheets/favicon3.ico';
} else if (type === 'presentation') {
iconUrl = 'https://ssl.gstatic.com/docs/presentations/images/favicon5.ico';
} else if (type === 'forms') {
iconUrl = 'https://ssl.gstatic.com/docs/spreadsheets/forms/favicon_qp2.png';
}
return _enhanceLinkWithTitleAndDescription(featureLink, iconUrl);
}
function _enhanceLink(featureLink, fallback, text, ignoreHttpErrorCodes = []) {
if (!fallback) {
throw new Error('fallback html is required');
@ -290,6 +358,14 @@ function _enhanceLink(featureLink, fallback, text, ignoreHttpErrorCodes = []) {
return enhanceGithubMarkdownLink(featureLink);
case LINK_TYPE_MDN_DOCS:
return enhanceMDNDocsLink(featureLink);
case LINK_TYPE_GOOGLE_DOCS:
return enhanceGoogleDocsLink(featureLink);
case LINK_TYPE_MOZILLA_BUG:
return enhanceMozillaBugLink(featureLink);
case LINK_TYPE_WEBKIT_BUG:
return enhanceWebKitBugLink(featureLink);
case LINK_TYPE_SPECS:
return enhanceSpecsLink(featureLink);
default:
return fallback;
}

Просмотреть файл

@ -18,12 +18,12 @@ import requests
import json
import logging
from typing import Any, Optional
from string import punctuation
from ghapi.core import GhApi
from urllib.error import HTTPError
from urllib.parse import urlparse
import base64
import validators
import html
from framework import secrets
@ -35,11 +35,16 @@ LINK_TYPE_GITHUB_ISSUE = 'github_issue'
LINK_TYPE_GITHUB_MARKDOWN = 'github_markdown'
LINK_TYPE_GITHUB_PULL_REQUEST = 'github_pull_request'
LINK_TYPE_MDN_DOCS = 'mdn_docs'
LINK_TYPE_GOOGLE_DOCS = 'google_docs'
LINK_TYPE_MOZILLA_BUG = 'mozilla_bug'
LINK_TYPE_WEBKIT_BUG = 'webkit_bug'
LINK_TYPE_SPECS = 'specs'
LINK_TYPE_WEB = 'web'
LINK_TYPES_REGEX = {
# https://bugs.chromium.org/p/chromium/issues/detail?id=
# https://crbug.com/
LINK_TYPE_CHROMIUM_BUG: re.compile(r'https?://bugs\.chromium\.org/p/chromium/issues/detail\?.*|https?://crbug\.com/\d+'),
# https://code.google.com/p/chromium/issues/detail?id=
LINK_TYPE_CHROMIUM_BUG: re.compile(r'https?://bugs\.chromium\.org/p/chromium/issues/detail\?id=\d+|https?://crbug\.com/\d+|https?://code\.google\.com/p/chromium/issues/detail\?id=\d+'),
# https://github.com/GoogleChrome/chromium-dashboard/issues/999
LINK_TYPE_GITHUB_ISSUE: re.compile(r'https?://(www\.)?github\.com/.*issues/\d+'),
# https://github.com/GoogleChrome/chromium-dashboard/pull/3044
@ -48,6 +53,18 @@ LINK_TYPES_REGEX = {
LINK_TYPE_GITHUB_MARKDOWN: re.compile(r'https?://(www\.)?github\.com/.*\.md.*'),
# https://developer.mozilla.org/en-US/docs/Web/API/DOMException
LINK_TYPE_MDN_DOCS: re.compile(r'https?://(www\.)?developer\.mozilla\.org/.*'),
# https://docs.google.com/document/d/1-M_o-il38aW64Gyk4R23Yaxy1p2Uy7D0i6J5qTWzypU
LINK_TYPE_GOOGLE_DOCS: re.compile(r'https?://docs\.google\.com/(document|spreadsheets|presentation|forms)/.*'),
# https://bugzilla.mozilla.org/show_bug.cgi?id=1314686
LINK_TYPE_MOZILLA_BUG: re.compile(r'https?://bugzilla\.mozilla\.org/show_bug\.cgi\?id=\d+'),
# https://bugs.webkit.org/show_bug.cgi?id=128456
LINK_TYPE_WEBKIT_BUG: re.compile(r'https?://bugs\.webkit\.org/show_bug\.cgi\?id=\d+'),
# https://w3c.github.io/
# https://w3.org/
# https://drafts.csswg.org/
# https://whatwg.org/
# https://wicg.github.io/
LINK_TYPE_SPECS: re.compile(r'https?://w3c\.github\.io/.*|https?://[a-z]+\.?w3\.org/.*|https?://drafts\.csswg\.org/.*|https?://[a-z\.]*whatwg\.org/.*|https?://wicg\.github\.io/.*'),
LINK_TYPE_WEB: re.compile(r'https?://.*'),
}
@ -86,7 +103,11 @@ class Link():
"""Extract the urls from the given value."""
if isinstance(value, str):
urls = URL_REGEX.findall(value)
# remove trailing punctuation
# punctuation similar to string.punctuation except that it does not include "/"
# this keep url ending with "/"
punctuation = r"""!"#$%&'()*+,-.:;<=>?@[\]^_`{|}~"""
urls = [url.rstrip(punctuation) for url in urls]
elif isinstance(value, list):
urls = [url for url in value if isinstance(url, str) and URL_REGEX.match(url)]
@ -213,6 +234,8 @@ class Link():
issue_id = parsed_url.query.split('id=')[-1].split('&')[0]
elif parsed_url.netloc == 'crbug.com':
issue_id = parsed_url.path.lstrip('/')
elif parsed_url.netloc == 'code.google.com':
issue_id = parsed_url.query.split('id=')[-1].split('&')[0]
# csrf token is required, its expiration is about 2 hours according to the tokenExpiresSec field
# technically, we could cache the csrf token and reuse it for 2 hours
@ -248,15 +271,19 @@ class Link():
return information.get('issue', None)
def _parse_html_head(self):
html_str = requests.get(self.url).text
response = requests.get(self.url)
# unescape html, e.g. &amp; -> &
html_str = html.unescape(response.text)
title = re.search(r'<title>(.*?)</title>', html_str)
title_og = re.search(r'<meta property="og:title" content="(.*?)"', html_str)
description = re.search(r'<meta name="description" content="(.*?)"', html_str)
description_og = re.search(r'<meta property="og:description" content="(.*?)"', html_str)
# use \s+ instead of whitespace, to match multiple whitespaces or newlines
title_og = re.search(r'<meta property="og:title"\s+content="(.*?)"', html_str)
description = re.search(r'<meta name="description"\s+content="(.*?)"', html_str)
description_og = re.search(r'<meta property="og:description"\s+content="(.*?)"', html_str)
return {
'title': title.group(1) if title else (title_og.group(1) if title_og else None),
'description': description.group(1) if description else (description_og.group(1) if description_og else None),
'title': title_og.group(1) if title_og else (title.group(1) if title else None),
'description': description_og.group(1) if description_og else (description.group(1) if description else None),
}
def _validate_url(self) -> bool:
@ -288,7 +315,13 @@ class Link():
self.information = self._parse_github_issue()
elif self.type == LINK_TYPE_GITHUB_MARKDOWN:
self.information = self._parse_github_markdown()
elif self.type == LINK_TYPE_MDN_DOCS:
elif self.type in [
LINK_TYPE_MDN_DOCS,
LINK_TYPE_GOOGLE_DOCS,
LINK_TYPE_MOZILLA_BUG,
LINK_TYPE_WEBKIT_BUG,
LINK_TYPE_SPECS,
]:
self.information = self._parse_html_head()
elif self.type == LINK_TYPE_WEB:
self.information = None

Просмотреть файл

@ -22,11 +22,48 @@ from internals.link_helpers import (
LINK_TYPE_GITHUB_MARKDOWN,
LINK_TYPE_WEB,
LINK_TYPE_MDN_DOCS,
LINK_TYPE_GOOGLE_DOCS,
LINK_TYPE_MOZILLA_BUG,
LINK_TYPE_SPECS,
valid_url
)
class LinkHelperTest(testing_config.CustomTestCase):
def test_specs_url(self):
urls = [
"https://w3c.github.io/presentation-api/",
"https://www.w3.org/TR/css-pseudo-4/#highlight-pseudos",
"https://dev.w3.org/html5/spec-LC/the-button-element.html",
"https://drafts.csswg.org/css-conditional-4/#support-definition-ext",
"https://drafts.csswg.org/css-values-3/#position",
"https://dom.spec.whatwg.org/#validate",
"https://html.spec.whatwg.org/multipage/webappapis.html",
"https://wicg.github.io/keyboard-map/#layoutchange-event",
]
for url in urls:
with self.subTest(url=url):
link = Link(url)
self.assertEqual(link.type, LINK_TYPE_SPECS)
self.assertEqual(link.url, url)
def test_mozilla_bug(self):
link = Link("https://bugzilla.mozilla.org/show_bug.cgi?id=1314686")
link.parse()
self.assertEqual(link.type, LINK_TYPE_MOZILLA_BUG)
self.assertTrue(link.is_parsed)
self.assertFalse(link.is_error)
self.assertIsNotNone(link.information.get('title'))
self.assertIsNotNone(link.information.get('description'))
def test_google_docs_url(self):
link = Link("https://docs.google.com/document/d/1-M_o-il38aW64Gyk4R23Yaxy1p2Uy7D0i6J5qTWzypU")
link.parse()
self.assertEqual(link.type, LINK_TYPE_GOOGLE_DOCS)
self.assertTrue(link.is_parsed)
self.assertFalse(link.is_error)
self.assertIsNotNone(link.information.get('title'))
self.assertIsNotNone(link.information.get('description'))
def test_mdn_docs_url(self):
link = Link("https://developer.mozilla.org/en-US/docs/Web/HTML")
@ -39,38 +76,23 @@ class LinkHelperTest(testing_config.CustomTestCase):
self.assertIsNotNone(link.information.get('description'))
def test_valid_url(self):
invalid_urls = [
invalid_urls = [
'http://',
'http://.',
'https://invalid',
]
valid_urls = [
]
valid_urls = [
'http://www.google.com/',
'https://www.google.com/',
'http://www.google.com',
'https://www.google.com',
]
for url in invalid_urls:
with self.subTest(url=url):
self.assertFalse(valid_url(url))
for url in valid_urls:
with self.subTest(url=url):
self.assertTrue(valid_url(url))
def test_real_server_error_url(self):
link = Link("http://httpstat.us/503")
link.parse()
self.assertEqual(link.type, LINK_TYPE_WEB)
self.assertEqual(link.is_error, True)
self.assertEqual(link.http_error_code, 503)
link = Link("https://httpstat.us/400")
link.parse()
self.assertEqual(link.type, LINK_TYPE_WEB)
self.assertEqual(link.is_error, True)
self.assertEqual(link.http_error_code, 400)
]
for url in invalid_urls:
with self.subTest(url=url):
self.assertFalse(valid_url(url))
for url in valid_urls:
with self.subTest(url=url):
self.assertTrue(valid_url(url))
@mock.patch('requests.get')
def test_mock_not_found_url(self, mock_requests_get):
@ -88,6 +110,10 @@ class LinkHelperTest(testing_config.CustomTestCase):
urls = Link.extract_urls_from_value(field_value)
self.assertEqual(urls, [field_value])
field_value = "https://w3c.github.io/presentation-api/"
urls = Link.extract_urls_from_value(field_value)
self.assertEqual(urls, [field_value])
field_value = "leadinghttps:https://www.chromestatus.com/feature/1234');, https://www.chromestatus.com/feature/5678 is valid"
urls = Link.extract_urls_from_value(field_value)
self.assertEqual(urls, ["https://www.chromestatus.com/feature/1234", "https://www.chromestatus.com/feature/5678"])
@ -176,6 +202,10 @@ class LinkHelperTest(testing_config.CustomTestCase):
self.assertEqual(link.is_error, True)
self.assertEqual(link.http_error_code, 404)
def test_link_code_google(self):
link = Link("https://code.google.com/p/chromium/issues/detail?id=515786")
self.assertEqual(link.type, LINK_TYPE_CHROMIUM_BUG)
def test_link_crbug(self):
link = Link("https://crbug.com/1352598")
self.assertEqual(link.type, LINK_TYPE_CHROMIUM_BUG)