Support more link types (#3268)
* Support code.google.com * Support LINK_TYPE_GOOGLE_DOCS * Support LINK_TYPE_MOZILLA_BUG and LINK_TYPE_WEBKIT_BUG * Support LINK_TYPE_SPECS * Fix extract url ending with slash * Update specs feature link title * Fix * Remove flaky test * Fix typo in regex
This commit is contained in:
Родитель
ccdf6ee280
Коммит
17f9886d06
|
@ -5,6 +5,10 @@ const LINK_TYPE_GITHUB_ISSUE = 'github_issue';
|
|||
const LINK_TYPE_GITHUB_PULL_REQUEST = 'github_pull_request';
|
||||
const LINK_TYPE_GITHUB_MARKDOWN = 'github_markdown';
|
||||
const LINK_TYPE_MDN_DOCS = 'mdn_docs';
|
||||
const LINK_TYPE_GOOGLE_DOCS = 'google_docs';
|
||||
const LINK_TYPE_MOZILLA_BUG = 'mozilla_bug';
|
||||
const LINK_TYPE_WEBKIT_BUG = 'webkit_bug';
|
||||
const LINK_TYPE_SPECS = 'specs';
|
||||
|
||||
function _formatLongText(text, maxLength = 50) {
|
||||
if (text.length > maxLength) {
|
||||
|
@ -214,16 +218,11 @@ function enhanceGithubMarkdownLink(featureLink, text) {
|
|||
</a>`;
|
||||
}
|
||||
|
||||
function enhanceMDNDocsLink(featureLink, text) {
|
||||
function _enhanceLinkWithTitleAndDescription(featureLink, iconUrl) {
|
||||
const information = featureLink.information;
|
||||
const title = information.title;
|
||||
const description = information.description;
|
||||
|
||||
|
||||
if (!text) {
|
||||
text = title;
|
||||
}
|
||||
|
||||
function renderTooltipContent() {
|
||||
return html`<div class="feature-link-tooltip">
|
||||
${title && html`
|
||||
|
@ -244,13 +243,82 @@ function enhanceMDNDocsLink(featureLink, text) {
|
|||
<sl-tooltip style="--sl-tooltip-arrow-size: 0;--max-width: 50vw;">
|
||||
<div slot="content">${renderTooltipContent()}</div>
|
||||
<sl-tag>
|
||||
<img src="https://developer.mozilla.org/favicon-48x48.png" alt="icon" class="icon" />
|
||||
<img src="${iconUrl}" alt="icon" class="icon" />
|
||||
${_formatLongText(title)}
|
||||
</sl-tag>
|
||||
</sl-tooltip>
|
||||
</a>`;
|
||||
}
|
||||
|
||||
function enhanceSpecsLink(featureLink) {
|
||||
const url = featureLink.url;
|
||||
const iconUrl = `https://www.google.com/s2/favicons?domain_url=${url}`;
|
||||
const hashtag = url.split('#')[1];
|
||||
const information = featureLink.information;
|
||||
const title = information.title;
|
||||
const description = information.description;
|
||||
|
||||
function renderTooltipContent() {
|
||||
return html`<div class="feature-link-tooltip">
|
||||
${title && html`
|
||||
<div>
|
||||
<strong>Title:</strong>
|
||||
<span>${title}</span>
|
||||
</div>
|
||||
`}
|
||||
${description && html`
|
||||
<div>
|
||||
<strong>Description:</strong>
|
||||
<span>${description}</span>
|
||||
</div>
|
||||
`}
|
||||
</div>
|
||||
${hashtag && html`
|
||||
<div>
|
||||
<strong>Hashtag:</strong>
|
||||
<span>#${hashtag}</span>
|
||||
`}
|
||||
</div>`;
|
||||
}
|
||||
return html`<a class="feature-link" href="${featureLink.url}" target="_blank" rel="noopener noreferrer">
|
||||
<sl-tooltip style="--sl-tooltip-arrow-size: 0;--max-width: 50vw;">
|
||||
<div slot="content">${renderTooltipContent()}</div>
|
||||
<sl-tag>
|
||||
<img src="${iconUrl}" alt="icon" class="icon" />
|
||||
Spec: ${_formatLongText(title)}
|
||||
</sl-tag>
|
||||
</sl-tooltip>
|
||||
</a>`;
|
||||
}
|
||||
|
||||
function enhanceMDNDocsLink(featureLink) {
|
||||
return _enhanceLinkWithTitleAndDescription(featureLink, 'https://developer.mozilla.org/favicon-48x48.png');
|
||||
}
|
||||
|
||||
function enhanceMozillaBugLink(featureLink) {
|
||||
return _enhanceLinkWithTitleAndDescription(featureLink, 'https://bugzilla.mozilla.org/favicon.ico');
|
||||
}
|
||||
|
||||
function enhanceWebKitBugLink(featureLink) {
|
||||
return _enhanceLinkWithTitleAndDescription(featureLink, 'https://bugs.webkit.org/images/favicon.ico');
|
||||
}
|
||||
|
||||
function enhanceGoogleDocsLink(featureLink) {
|
||||
const url = featureLink.url;
|
||||
const type = url.split('/')[3];
|
||||
let iconUrl = 'https://ssl.gstatic.com/docs/documents/images/kix-favicon7.ico';
|
||||
|
||||
if (type === 'spreadsheets') {
|
||||
iconUrl = 'https://ssl.gstatic.com/docs/spreadsheets/favicon3.ico';
|
||||
} else if (type === 'presentation') {
|
||||
iconUrl = 'https://ssl.gstatic.com/docs/presentations/images/favicon5.ico';
|
||||
} else if (type === 'forms') {
|
||||
iconUrl = 'https://ssl.gstatic.com/docs/spreadsheets/forms/favicon_qp2.png';
|
||||
}
|
||||
|
||||
return _enhanceLinkWithTitleAndDescription(featureLink, iconUrl);
|
||||
}
|
||||
|
||||
function _enhanceLink(featureLink, fallback, text, ignoreHttpErrorCodes = []) {
|
||||
if (!fallback) {
|
||||
throw new Error('fallback html is required');
|
||||
|
@ -290,6 +358,14 @@ function _enhanceLink(featureLink, fallback, text, ignoreHttpErrorCodes = []) {
|
|||
return enhanceGithubMarkdownLink(featureLink);
|
||||
case LINK_TYPE_MDN_DOCS:
|
||||
return enhanceMDNDocsLink(featureLink);
|
||||
case LINK_TYPE_GOOGLE_DOCS:
|
||||
return enhanceGoogleDocsLink(featureLink);
|
||||
case LINK_TYPE_MOZILLA_BUG:
|
||||
return enhanceMozillaBugLink(featureLink);
|
||||
case LINK_TYPE_WEBKIT_BUG:
|
||||
return enhanceWebKitBugLink(featureLink);
|
||||
case LINK_TYPE_SPECS:
|
||||
return enhanceSpecsLink(featureLink);
|
||||
default:
|
||||
return fallback;
|
||||
}
|
||||
|
|
|
@ -18,12 +18,12 @@ import requests
|
|||
import json
|
||||
import logging
|
||||
from typing import Any, Optional
|
||||
from string import punctuation
|
||||
from ghapi.core import GhApi
|
||||
from urllib.error import HTTPError
|
||||
from urllib.parse import urlparse
|
||||
import base64
|
||||
import validators
|
||||
import html
|
||||
from framework import secrets
|
||||
|
||||
|
||||
|
@ -35,11 +35,16 @@ LINK_TYPE_GITHUB_ISSUE = 'github_issue'
|
|||
LINK_TYPE_GITHUB_MARKDOWN = 'github_markdown'
|
||||
LINK_TYPE_GITHUB_PULL_REQUEST = 'github_pull_request'
|
||||
LINK_TYPE_MDN_DOCS = 'mdn_docs'
|
||||
LINK_TYPE_GOOGLE_DOCS = 'google_docs'
|
||||
LINK_TYPE_MOZILLA_BUG = 'mozilla_bug'
|
||||
LINK_TYPE_WEBKIT_BUG = 'webkit_bug'
|
||||
LINK_TYPE_SPECS = 'specs'
|
||||
LINK_TYPE_WEB = 'web'
|
||||
LINK_TYPES_REGEX = {
|
||||
# https://bugs.chromium.org/p/chromium/issues/detail?id=
|
||||
# https://crbug.com/
|
||||
LINK_TYPE_CHROMIUM_BUG: re.compile(r'https?://bugs\.chromium\.org/p/chromium/issues/detail\?.*|https?://crbug\.com/\d+'),
|
||||
# https://code.google.com/p/chromium/issues/detail?id=
|
||||
LINK_TYPE_CHROMIUM_BUG: re.compile(r'https?://bugs\.chromium\.org/p/chromium/issues/detail\?id=\d+|https?://crbug\.com/\d+|https?://code\.google\.com/p/chromium/issues/detail\?id=\d+'),
|
||||
# https://github.com/GoogleChrome/chromium-dashboard/issues/999
|
||||
LINK_TYPE_GITHUB_ISSUE: re.compile(r'https?://(www\.)?github\.com/.*issues/\d+'),
|
||||
# https://github.com/GoogleChrome/chromium-dashboard/pull/3044
|
||||
|
@ -48,6 +53,18 @@ LINK_TYPES_REGEX = {
|
|||
LINK_TYPE_GITHUB_MARKDOWN: re.compile(r'https?://(www\.)?github\.com/.*\.md.*'),
|
||||
# https://developer.mozilla.org/en-US/docs/Web/API/DOMException
|
||||
LINK_TYPE_MDN_DOCS: re.compile(r'https?://(www\.)?developer\.mozilla\.org/.*'),
|
||||
# https://docs.google.com/document/d/1-M_o-il38aW64Gyk4R23Yaxy1p2Uy7D0i6J5qTWzypU
|
||||
LINK_TYPE_GOOGLE_DOCS: re.compile(r'https?://docs\.google\.com/(document|spreadsheets|presentation|forms)/.*'),
|
||||
# https://bugzilla.mozilla.org/show_bug.cgi?id=1314686
|
||||
LINK_TYPE_MOZILLA_BUG: re.compile(r'https?://bugzilla\.mozilla\.org/show_bug\.cgi\?id=\d+'),
|
||||
# https://bugs.webkit.org/show_bug.cgi?id=128456
|
||||
LINK_TYPE_WEBKIT_BUG: re.compile(r'https?://bugs\.webkit\.org/show_bug\.cgi\?id=\d+'),
|
||||
# https://w3c.github.io/
|
||||
# https://w3.org/
|
||||
# https://drafts.csswg.org/
|
||||
# https://whatwg.org/
|
||||
# https://wicg.github.io/
|
||||
LINK_TYPE_SPECS: re.compile(r'https?://w3c\.github\.io/.*|https?://[a-z]+\.?w3\.org/.*|https?://drafts\.csswg\.org/.*|https?://[a-z\.]*whatwg\.org/.*|https?://wicg\.github\.io/.*'),
|
||||
LINK_TYPE_WEB: re.compile(r'https?://.*'),
|
||||
}
|
||||
|
||||
|
@ -86,7 +103,11 @@ class Link():
|
|||
"""Extract the urls from the given value."""
|
||||
if isinstance(value, str):
|
||||
urls = URL_REGEX.findall(value)
|
||||
|
||||
# remove trailing punctuation
|
||||
# punctuation similar to string.punctuation except that it does not include "/"
|
||||
# this keep url ending with "/"
|
||||
punctuation = r"""!"#$%&'()*+,-.:;<=>?@[\]^_`{|}~"""
|
||||
urls = [url.rstrip(punctuation) for url in urls]
|
||||
elif isinstance(value, list):
|
||||
urls = [url for url in value if isinstance(url, str) and URL_REGEX.match(url)]
|
||||
|
@ -213,6 +234,8 @@ class Link():
|
|||
issue_id = parsed_url.query.split('id=')[-1].split('&')[0]
|
||||
elif parsed_url.netloc == 'crbug.com':
|
||||
issue_id = parsed_url.path.lstrip('/')
|
||||
elif parsed_url.netloc == 'code.google.com':
|
||||
issue_id = parsed_url.query.split('id=')[-1].split('&')[0]
|
||||
|
||||
# csrf token is required, its expiration is about 2 hours according to the tokenExpiresSec field
|
||||
# technically, we could cache the csrf token and reuse it for 2 hours
|
||||
|
@ -248,15 +271,19 @@ class Link():
|
|||
return information.get('issue', None)
|
||||
|
||||
def _parse_html_head(self):
|
||||
html_str = requests.get(self.url).text
|
||||
response = requests.get(self.url)
|
||||
# unescape html, e.g. & -> &
|
||||
html_str = html.unescape(response.text)
|
||||
|
||||
title = re.search(r'<title>(.*?)</title>', html_str)
|
||||
title_og = re.search(r'<meta property="og:title" content="(.*?)"', html_str)
|
||||
description = re.search(r'<meta name="description" content="(.*?)"', html_str)
|
||||
description_og = re.search(r'<meta property="og:description" content="(.*?)"', html_str)
|
||||
# use \s+ instead of whitespace, to match multiple whitespaces or newlines
|
||||
title_og = re.search(r'<meta property="og:title"\s+content="(.*?)"', html_str)
|
||||
description = re.search(r'<meta name="description"\s+content="(.*?)"', html_str)
|
||||
description_og = re.search(r'<meta property="og:description"\s+content="(.*?)"', html_str)
|
||||
|
||||
return {
|
||||
'title': title.group(1) if title else (title_og.group(1) if title_og else None),
|
||||
'description': description.group(1) if description else (description_og.group(1) if description_og else None),
|
||||
'title': title_og.group(1) if title_og else (title.group(1) if title else None),
|
||||
'description': description_og.group(1) if description_og else (description.group(1) if description else None),
|
||||
}
|
||||
|
||||
def _validate_url(self) -> bool:
|
||||
|
@ -288,7 +315,13 @@ class Link():
|
|||
self.information = self._parse_github_issue()
|
||||
elif self.type == LINK_TYPE_GITHUB_MARKDOWN:
|
||||
self.information = self._parse_github_markdown()
|
||||
elif self.type == LINK_TYPE_MDN_DOCS:
|
||||
elif self.type in [
|
||||
LINK_TYPE_MDN_DOCS,
|
||||
LINK_TYPE_GOOGLE_DOCS,
|
||||
LINK_TYPE_MOZILLA_BUG,
|
||||
LINK_TYPE_WEBKIT_BUG,
|
||||
LINK_TYPE_SPECS,
|
||||
]:
|
||||
self.information = self._parse_html_head()
|
||||
elif self.type == LINK_TYPE_WEB:
|
||||
self.information = None
|
||||
|
|
|
@ -22,11 +22,48 @@ from internals.link_helpers import (
|
|||
LINK_TYPE_GITHUB_MARKDOWN,
|
||||
LINK_TYPE_WEB,
|
||||
LINK_TYPE_MDN_DOCS,
|
||||
LINK_TYPE_GOOGLE_DOCS,
|
||||
LINK_TYPE_MOZILLA_BUG,
|
||||
LINK_TYPE_SPECS,
|
||||
valid_url
|
||||
)
|
||||
|
||||
|
||||
class LinkHelperTest(testing_config.CustomTestCase):
|
||||
def test_specs_url(self):
|
||||
urls = [
|
||||
"https://w3c.github.io/presentation-api/",
|
||||
"https://www.w3.org/TR/css-pseudo-4/#highlight-pseudos",
|
||||
"https://dev.w3.org/html5/spec-LC/the-button-element.html",
|
||||
"https://drafts.csswg.org/css-conditional-4/#support-definition-ext",
|
||||
"https://drafts.csswg.org/css-values-3/#position",
|
||||
"https://dom.spec.whatwg.org/#validate",
|
||||
"https://html.spec.whatwg.org/multipage/webappapis.html",
|
||||
"https://wicg.github.io/keyboard-map/#layoutchange-event",
|
||||
]
|
||||
|
||||
for url in urls:
|
||||
with self.subTest(url=url):
|
||||
link = Link(url)
|
||||
self.assertEqual(link.type, LINK_TYPE_SPECS)
|
||||
self.assertEqual(link.url, url)
|
||||
def test_mozilla_bug(self):
|
||||
link = Link("https://bugzilla.mozilla.org/show_bug.cgi?id=1314686")
|
||||
link.parse()
|
||||
self.assertEqual(link.type, LINK_TYPE_MOZILLA_BUG)
|
||||
self.assertTrue(link.is_parsed)
|
||||
self.assertFalse(link.is_error)
|
||||
self.assertIsNotNone(link.information.get('title'))
|
||||
self.assertIsNotNone(link.information.get('description'))
|
||||
|
||||
def test_google_docs_url(self):
|
||||
link = Link("https://docs.google.com/document/d/1-M_o-il38aW64Gyk4R23Yaxy1p2Uy7D0i6J5qTWzypU")
|
||||
link.parse()
|
||||
self.assertEqual(link.type, LINK_TYPE_GOOGLE_DOCS)
|
||||
self.assertTrue(link.is_parsed)
|
||||
self.assertFalse(link.is_error)
|
||||
self.assertIsNotNone(link.information.get('title'))
|
||||
self.assertIsNotNone(link.information.get('description'))
|
||||
|
||||
def test_mdn_docs_url(self):
|
||||
link = Link("https://developer.mozilla.org/en-US/docs/Web/HTML")
|
||||
|
@ -39,38 +76,23 @@ class LinkHelperTest(testing_config.CustomTestCase):
|
|||
self.assertIsNotNone(link.information.get('description'))
|
||||
|
||||
def test_valid_url(self):
|
||||
invalid_urls = [
|
||||
invalid_urls = [
|
||||
'http://',
|
||||
'http://.',
|
||||
'https://invalid',
|
||||
]
|
||||
valid_urls = [
|
||||
]
|
||||
valid_urls = [
|
||||
'http://www.google.com/',
|
||||
'https://www.google.com/',
|
||||
'http://www.google.com',
|
||||
'https://www.google.com',
|
||||
]
|
||||
for url in invalid_urls:
|
||||
with self.subTest(url=url):
|
||||
self.assertFalse(valid_url(url))
|
||||
for url in valid_urls:
|
||||
with self.subTest(url=url):
|
||||
self.assertTrue(valid_url(url))
|
||||
|
||||
def test_real_server_error_url(self):
|
||||
link = Link("http://httpstat.us/503")
|
||||
|
||||
link.parse()
|
||||
self.assertEqual(link.type, LINK_TYPE_WEB)
|
||||
self.assertEqual(link.is_error, True)
|
||||
self.assertEqual(link.http_error_code, 503)
|
||||
|
||||
link = Link("https://httpstat.us/400")
|
||||
|
||||
link.parse()
|
||||
self.assertEqual(link.type, LINK_TYPE_WEB)
|
||||
self.assertEqual(link.is_error, True)
|
||||
self.assertEqual(link.http_error_code, 400)
|
||||
]
|
||||
for url in invalid_urls:
|
||||
with self.subTest(url=url):
|
||||
self.assertFalse(valid_url(url))
|
||||
for url in valid_urls:
|
||||
with self.subTest(url=url):
|
||||
self.assertTrue(valid_url(url))
|
||||
|
||||
@mock.patch('requests.get')
|
||||
def test_mock_not_found_url(self, mock_requests_get):
|
||||
|
@ -88,6 +110,10 @@ class LinkHelperTest(testing_config.CustomTestCase):
|
|||
urls = Link.extract_urls_from_value(field_value)
|
||||
self.assertEqual(urls, [field_value])
|
||||
|
||||
field_value = "https://w3c.github.io/presentation-api/"
|
||||
urls = Link.extract_urls_from_value(field_value)
|
||||
self.assertEqual(urls, [field_value])
|
||||
|
||||
field_value = "leadinghttps:https://www.chromestatus.com/feature/1234');, https://www.chromestatus.com/feature/5678 is valid"
|
||||
urls = Link.extract_urls_from_value(field_value)
|
||||
self.assertEqual(urls, ["https://www.chromestatus.com/feature/1234", "https://www.chromestatus.com/feature/5678"])
|
||||
|
@ -176,6 +202,10 @@ class LinkHelperTest(testing_config.CustomTestCase):
|
|||
self.assertEqual(link.is_error, True)
|
||||
self.assertEqual(link.http_error_code, 404)
|
||||
|
||||
def test_link_code_google(self):
|
||||
link = Link("https://code.google.com/p/chromium/issues/detail?id=515786")
|
||||
self.assertEqual(link.type, LINK_TYPE_CHROMIUM_BUG)
|
||||
|
||||
def test_link_crbug(self):
|
||||
link = Link("https://crbug.com/1352598")
|
||||
self.assertEqual(link.type, LINK_TYPE_CHROMIUM_BUG)
|
||||
|
|
Загрузка…
Ссылка в новой задаче