зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1541563 - Sync 131 - Fix string linkification greediness r=loganfsmyth
Differential Revision: https://phabricator.services.mozilla.com/D25994 --HG-- extra : moz-landing-system : lando
This commit is contained in:
Родитель
c8835a2c67
Коммит
1578579b3e
|
@ -4,7 +4,63 @@
|
|||
|
||||
// Dependencies
|
||||
const validProtocols = /(http|https|ftp|data|resource|chrome):/i;
|
||||
const tokenSplitRegex = /(\s|\'|\"|\\)+/;
|
||||
|
||||
// URL Regex, common idioms:
|
||||
//
|
||||
// Lead-in (URL):
|
||||
// ( Capture because we need to know if there was a lead-in
|
||||
// character so we can include it as part of the text
|
||||
// preceding the match. We lack look-behind matching.
|
||||
// ^| The URL can start at the beginning of the string.
|
||||
// [\s(,;'"`] Or whitespace or some punctuation that does not imply
|
||||
// a context which would preclude a URL.
|
||||
// )
|
||||
//
|
||||
// We do not need a trailing look-ahead because our regex's will terminate
|
||||
// because they run out of characters they can eat.
|
||||
|
||||
// What we do not attempt to have the regexp do:
|
||||
// - Avoid trailing '.' and ')' characters. We let our greedy match absorb
|
||||
// these, but have a separate regex for extra characters to leave off at the
|
||||
// end.
|
||||
//
|
||||
// The Regex (apart from lead-in/lead-out):
|
||||
// ( Begin capture of the URL
|
||||
// (?: (potential detect beginnings)
|
||||
// https?:\/\/| Start with "http" or "https"
|
||||
// www\d{0,3}[.][a-z0-9.\-]{2,249}|
|
||||
// Start with "www", up to 3 numbers, then "." then
|
||||
// something that looks domain-namey. We differ from the
|
||||
// next case in that we do not constrain the top-level
|
||||
// domain as tightly and do not require a trailing path
|
||||
// indicator of "/". This is IDN root compatible.
|
||||
// [a-z0-9.\-]{2,250}[.][a-z]{2,4}\/
|
||||
// Detect a non-www domain, but requiring a trailing "/"
|
||||
// to indicate a path. This only detects IDN domains
|
||||
// with a non-IDN root. This is reasonable in cases where
|
||||
// there is no explicit http/https start us out, but
|
||||
// unreasonable where there is. Our real fix is the bug
|
||||
// to port the Thunderbird/gecko linkification logic.
|
||||
//
|
||||
// Domain names can be up to 253 characters long, and are
|
||||
// limited to a-zA-Z0-9 and '-'. The roots don't have
|
||||
// hyphens unless they are IDN roots. Root zones can be
|
||||
// found here: http://www.iana.org/domains/root/db
|
||||
// )
|
||||
// [-\w.!~*'();,/?:@&=+$#%]*
|
||||
// path onwards. We allow the set of characters that
|
||||
// encodeURI does not escape plus the result of escaping
|
||||
// (so also '%')
|
||||
// )
|
||||
// eslint-disable-next-line max-len
|
||||
const urlRegex = /(^|[\s(,;'"`])((?:https?:\/\/|www\d{0,3}[.][a-z0-9.\-]{2,249}|[a-z0-9.\-]{2,250}[.][a-z]{2,4}\/)[-\w.!~*'();,/?:@&=+$#%]*)/im;
|
||||
|
||||
// Set of terminators that are likely to have been part of the context rather
|
||||
// than part of the URL and so should be uneaten. This is '(', ',', ';', plus
|
||||
// quotes and question end-ing punctuation and the potential permutations with
|
||||
// parentheses (english-specific).
|
||||
const uneatLastUrlCharsRegex = /(?:[),;.!?`'"]|[.!?]\)|\)[.!?])$/;
|
||||
|
||||
const ELLIPSIS = "\u2026";
|
||||
const dom = require("react-dom-factories");
|
||||
const { span } = dom;
|
||||
|
@ -382,7 +438,7 @@ function containsURL(grip) {
|
|||
* @param string token
|
||||
* The token.
|
||||
* @return boolean
|
||||
* Whenther the token is a URL.
|
||||
* Whether the token is a URL.
|
||||
*/
|
||||
function isURL(token) {
|
||||
try {
|
||||
|
@ -439,7 +495,8 @@ module.exports = {
|
|||
maybeEscapePropertyName,
|
||||
getGripPreviewItems,
|
||||
getGripType,
|
||||
tokenSplitRegex,
|
||||
ellipsisElement,
|
||||
ELLIPSIS
|
||||
ELLIPSIS,
|
||||
uneatLastUrlCharsRegex,
|
||||
urlRegex
|
||||
};
|
||||
|
|
|
@ -7,15 +7,15 @@ const PropTypes = require("prop-types");
|
|||
|
||||
const {
|
||||
containsURL,
|
||||
isURL,
|
||||
escapeString,
|
||||
getGripType,
|
||||
rawCropString,
|
||||
sanitizeString,
|
||||
wrapRender,
|
||||
isGrip,
|
||||
tokenSplitRegex,
|
||||
ELLIPSIS
|
||||
ELLIPSIS,
|
||||
uneatLastUrlCharsRegex,
|
||||
urlRegex
|
||||
} = require("./rep-utils");
|
||||
|
||||
const dom = require("react-dom-factories");
|
||||
|
@ -184,75 +184,75 @@ function getLinkifiedElements(text, cropLimit, openLink, isInContentPage) {
|
|||
const startCropIndex = cropLimit ? halfLimit : null;
|
||||
const endCropIndex = cropLimit ? text.length - halfLimit : null;
|
||||
|
||||
// As we walk through the tokens of the source string, we make sure to
|
||||
// preserve the original whitespace that separated the tokens.
|
||||
let currentIndex = 0;
|
||||
const items = [];
|
||||
for (const token of text.split(tokenSplitRegex)) {
|
||||
if (isURL(token)) {
|
||||
// Let's grab all the non-url strings before the link.
|
||||
const tokenStart = text.indexOf(token, currentIndex);
|
||||
let nonUrlText = text.slice(currentIndex, tokenStart);
|
||||
nonUrlText = getCroppedString(
|
||||
nonUrlText,
|
||||
currentIndex,
|
||||
startCropIndex,
|
||||
endCropIndex
|
||||
);
|
||||
if (nonUrlText) {
|
||||
items.push(nonUrlText);
|
||||
}
|
||||
|
||||
// Update the index to match the beginning of the token.
|
||||
currentIndex = tokenStart;
|
||||
|
||||
const linkText = getCroppedString(
|
||||
token,
|
||||
currentIndex,
|
||||
startCropIndex,
|
||||
endCropIndex
|
||||
);
|
||||
if (linkText) {
|
||||
items.push(
|
||||
a(
|
||||
{
|
||||
className: "url",
|
||||
title: token,
|
||||
draggable: false,
|
||||
// Because we don't want the link to be open in the current
|
||||
// panel's frame, we only render the href attribute if `openLink`
|
||||
// exists (so we can preventDefault) or if the reps will be
|
||||
// displayed in content page (e.g. in the JSONViewer).
|
||||
href: openLink || isInContentPage ? token : null,
|
||||
onClick: openLink
|
||||
? e => {
|
||||
e.preventDefault();
|
||||
openLink(token, e);
|
||||
}
|
||||
: null
|
||||
},
|
||||
linkText
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
currentIndex = tokenStart + token.length;
|
||||
let currentIndex = 0;
|
||||
let contentStart;
|
||||
while (true) {
|
||||
const url = urlRegex.exec(text);
|
||||
// Pick the regexp with the earlier content; index will always be zero.
|
||||
if (!url) {
|
||||
break;
|
||||
}
|
||||
contentStart = url.index + url[1].length;
|
||||
if (contentStart > 0) {
|
||||
const nonUrlText = text.substring(0, contentStart);
|
||||
items.push(
|
||||
getCroppedString(nonUrlText, currentIndex, startCropIndex, endCropIndex)
|
||||
);
|
||||
}
|
||||
|
||||
// There are some final characters for a URL that are much more likely
|
||||
// to have been part of the enclosing text rather than the end of the
|
||||
// URL.
|
||||
let useUrl = url[2];
|
||||
const uneat = uneatLastUrlCharsRegex.exec(useUrl);
|
||||
if (uneat) {
|
||||
useUrl = useUrl.substring(0, uneat.index);
|
||||
}
|
||||
|
||||
currentIndex = currentIndex + contentStart;
|
||||
const linkText = getCroppedString(
|
||||
useUrl,
|
||||
currentIndex,
|
||||
startCropIndex,
|
||||
endCropIndex
|
||||
);
|
||||
|
||||
if (linkText) {
|
||||
items.push(
|
||||
a(
|
||||
{
|
||||
className: "url",
|
||||
title: useUrl,
|
||||
draggable: false,
|
||||
// Because we don't want the link to be open in the current
|
||||
// panel's frame, we only render the href attribute if `openLink`
|
||||
// exists (so we can preventDefault) or if the reps will be
|
||||
// displayed in content page (e.g. in the JSONViewer).
|
||||
href: openLink || isInContentPage ? useUrl : null,
|
||||
onClick: openLink
|
||||
? e => {
|
||||
e.preventDefault();
|
||||
openLink(useUrl, e);
|
||||
}
|
||||
: null
|
||||
},
|
||||
linkText
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
currentIndex = currentIndex + useUrl.length;
|
||||
text = text.substring(url.index + url[1].length + useUrl.length);
|
||||
}
|
||||
|
||||
// Clean up any non-URL text at the end of the source string,
|
||||
// i.e. not handled in the loop.
|
||||
if (currentIndex !== text.length) {
|
||||
let nonUrlText = text.slice(currentIndex, text.length);
|
||||
if (text.length > 0) {
|
||||
if (currentIndex < endCropIndex) {
|
||||
nonUrlText = getCroppedString(
|
||||
nonUrlText,
|
||||
currentIndex,
|
||||
startCropIndex,
|
||||
endCropIndex
|
||||
);
|
||||
text = getCroppedString(text, currentIndex, startCropIndex, endCropIndex);
|
||||
}
|
||||
items.push(nonUrlText);
|
||||
items.push(text);
|
||||
}
|
||||
|
||||
return items;
|
||||
|
|
|
@ -335,7 +335,7 @@ describe("test String with URL", () => {
|
|||
|
||||
it("renders successive cropped URLs with cropped elements between", () => {
|
||||
const text =
|
||||
"- http://example.fr test http://example.fr test http://example.us -";
|
||||
"- http://example.fr test http://example.es test http://example.us -";
|
||||
const openLink = jest.fn();
|
||||
const element = renderRep(text, {
|
||||
openLink,
|
||||
|
@ -383,6 +383,30 @@ describe("test String with URL", () => {
|
|||
expect(linkFr.prop("title")).toBe("http://example.fr");
|
||||
});
|
||||
|
||||
it("renders URLs without unrelated characters", () => {
|
||||
const text =
|
||||
"global(http://example.com) and local(http://example.us)" +
|
||||
" and maybe https://example.fr, https://example.es?";
|
||||
const openLink = jest.fn();
|
||||
const element = renderRep(text, {
|
||||
openLink,
|
||||
useQuotes: false
|
||||
});
|
||||
|
||||
expect(element.text()).toEqual(text);
|
||||
const linkCom = element.find("a").at(0);
|
||||
expect(linkCom.prop("href")).toBe("http://example.com");
|
||||
|
||||
const linkUs = element.find("a").at(1);
|
||||
expect(linkUs.prop("href")).toBe("http://example.us");
|
||||
|
||||
const linkFr = element.find("a").at(2);
|
||||
expect(linkFr.prop("href")).toBe("https://example.fr");
|
||||
|
||||
const linkEs = element.find("a").at(3);
|
||||
expect(linkEs.prop("href")).toBe("https://example.es");
|
||||
});
|
||||
|
||||
it("does not render a link if the URL has no scheme", () => {
|
||||
const url = "example.com";
|
||||
const element = renderRep(url, { useQuotes: false });
|
||||
|
|
Загрузка…
Ссылка в новой задаче