Prevent URLs from being added to add-on summaries (#22563)

* Prevent URLs from being added to add-on summaries

Also simplify implementation of NoURLsField (already used for Collection
descriptions and UserProfile biographies) to only look for URLs starting
with http/https, to avoid being too aggressive while also preventing
URLs with TLDs bleach's hardcoded regexp doesn't know about.
This commit is contained in:
Mathieu Pillard 2024-08-13 16:11:41 +02:00 коммит произвёл GitHub
Родитель 04e2b03917
Коммит bd8b2a3632
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
13 изменённых файлов: 154 добавлений и 152 удалений

Просмотреть файл

@ -11,7 +11,7 @@ from olympia.amo.templatetags.jinja_helpers import absolutify
from olympia.amo.utils import (
ImageCheck,
clean_nl,
has_links,
has_urls,
subscribe_newsletter,
unsubscribe_newsletter,
urlparams,
@ -128,7 +128,7 @@ class UserProfileSerializer(PublicUserProfileSerializer):
)
def validate_biography(self, value):
if has_links(clean_nl(str(value))):
if has_urls(clean_nl(str(value))):
# There's some links, we don't want them.
raise serializers.ValidationError(gettext('No links are allowed.'))
return value

Просмотреть файл

@ -64,7 +64,7 @@ from olympia.files.utils import extract_translations, resolve_i18n_message
from olympia.ratings.models import Rating
from olympia.tags.models import Tag
from olympia.translations.fields import (
LinkifiedField,
NoURLsField,
PurifiedField,
TranslatedField,
save_signal,
@ -503,7 +503,7 @@ class Addon(OnChangeMixin, ModelBase):
support_url = TranslatedField(db_column='supporturl', max_length=255)
description = PurifiedField(short=False, max_length=15000)
summary = LinkifiedField(max_length=250)
summary = NoURLsField(max_length=250)
developer_comments = PurifiedField(db_column='developercomments', max_length=3000)
eula = PurifiedField(max_length=350000)
privacy_policy = PurifiedField(db_column='privacypolicy', max_length=150000)

Просмотреть файл

@ -85,6 +85,7 @@ from .validators import (
MatchingGuidValidator,
NoFallbackDefaultLocaleValidator,
NoThemesValidator,
NoURLsValidator,
ReviewedSourceFileValidator,
VerifyMozillaTrademark,
VersionAddonMetadataValidator,
@ -1028,7 +1029,7 @@ class AddonSerializer(AMOModelSerializer):
)
summary = TranslationSerializerField(
required=False,
validators=[OneOrMoreLetterOrNumberCharacterValidator()],
validators=[OneOrMoreLetterOrNumberCharacterValidator(), NoURLsValidator()],
)
support_email = EmailTranslationField(required=False)
support_url = OutgoingURLTranslationField(required=False)

Просмотреть файл

@ -10,9 +10,8 @@ from django_statsd.clients import statsd
from olympia import amo, core
from olympia.access.acl import action_allowed_for
from olympia.amo.utils import normalize_string
from olympia.amo.utils import normalize_string, verify_condition_with_locales
from olympia.discovery.utils import call_recommendation_server
from olympia.translations.fields import LocaleErrorMessage
from olympia.translations.models import Translation
@ -23,7 +22,7 @@ def generate_addon_guid():
return '{%s}' % str(uuid.uuid4())
def verify_mozilla_trademark(name, user, form=None):
def verify_mozilla_trademark(name, user, *, form=None):
skip_trademark_check = (
user
and user.is_authenticated
@ -47,21 +46,10 @@ def verify_mozilla_trademark(name, user, form=None):
)
if not skip_trademark_check:
if not isinstance(name, dict):
_check(name)
else:
for locale, localized_name in name.items():
try:
_check(localized_name)
except forms.ValidationError as exc:
if form is not None:
for message in exc.messages:
error_message = LocaleErrorMessage(
message=message, locale=locale
)
form.add_error('name', error_message)
else:
raise
verify_condition_with_locales(
value=name, check_func=_check, form=form, field_name='name'
)
return name

Просмотреть файл

@ -5,7 +5,7 @@ from django.utils.translation import gettext
from rest_framework import exceptions, fields
from olympia import amo
from olympia.amo.utils import find_language
from olympia.amo.utils import find_language, verify_no_urls
from olympia.versions.models import License
from .models import Addon
@ -318,6 +318,14 @@ class CanSetCompatibilityValidator:
)
class NoURLsValidator:
def __call__(self, value):
try:
verify_no_urls(value)
except forms.ValidationError as exc:
raise exceptions.ValidationError(exc.message)
class NoThemesValidator:
requires_context = True

Просмотреть файл

@ -168,18 +168,18 @@ class TestAttachTransDict(TestCase):
)
def test_has_links():
html = 'a text <strong>without</strong> links'
assert not amo.utils.has_links(html)
def test_has_urls():
content = 'a text <strong>without</strong> links'
assert not amo.utils.has_urls(content)
html = 'a <a href="http://example.com">link</a> with markup'
assert amo.utils.has_links(html)
content = 'a <a href="http://example.com">link</a> with markup'
assert amo.utils.has_urls(content)
html = 'a http://example.com text link'
assert amo.utils.has_links(html)
content = 'a http://example.com text link'
assert amo.utils.has_urls(content)
html = 'a badly markuped <a href="http://example.com">link'
assert amo.utils.has_links(html)
content = 'a badly markuped <a href="http://example.com">link'
assert amo.utils.has_urls(content)
def test_walkfiles():

Просмотреть файл

@ -25,6 +25,7 @@ from urllib.parse import (
)
import django.core.mail
from django import forms
from django.conf import settings
from django.core.exceptions import FieldDoesNotExist
from django.core.files.storage import FileSystemStorage, default_storage as storage
@ -43,9 +44,9 @@ from django.utils.http import (
quote_etag,
url_has_allowed_host_and_scheme,
)
from django.utils.translation import gettext
import basket
import bleach
import colorgram
import html5lib
import markupsafe
@ -63,6 +64,7 @@ from olympia.amo.urlresolvers import linkify_with_outgoing
from olympia.constants.abuse import REPORTED_MEDIA_BACKUP_EXPIRATION_DAYS
from olympia.core.logger import getLogger
from olympia.lib import unicodehelper
from olympia.translations.fields import LocaleErrorMessage
from olympia.translations.models import Translation
from olympia.users.utils import UnsubscribeCode
@ -70,6 +72,10 @@ from olympia.users.utils import UnsubscribeCode
log = getLogger('z.amo')
# Basic regexp to detect links.
URL_RE = re.compile(r'https?://\S+', re.IGNORECASE)
def from_string(string):
return engines['jinja2'].from_string(string)
@ -1067,23 +1073,49 @@ def find_language(locale):
return None
def has_links(html):
"""Return True if links (text or markup) are found in the given html."""
def has_urls(content):
"""Return True if URLs are found in the given html."""
# Call bleach.linkify to transform text links to real links, and add some
# content to the ``href`` attribute. If the result is different from the
# initial string, links were found.
class LinkFound(Exception):
pass
return URL_RE.search(content) if content else False
def raise_on_link(attrs, new):
raise LinkFound
try:
bleach.linkify(html, callbacks=[raise_on_link])
except LinkFound:
return True
return False
def verify_condition_with_locales(*, value, check_func, form=None, field_name=None):
"""
Check that the given `check_func` function does not raise a ValidationError
for the given `value`. If `value` is a `dict`, it assumes the keys are
locales and the values are translations in those locales.
`form` and `field_name` can be passed to transform to ValidationError that
would be raised into a locale-aware error message that is added to the
form.
"""
if not isinstance(value, dict):
check_func(value)
else:
for locale, localized_name in value.items():
try:
check_func(localized_name)
except forms.ValidationError as exc:
if form is not None and field_name is not None:
for message in exc.messages:
error_message = LocaleErrorMessage(
message=message, locale=locale
)
form.add_error(field_name, error_message)
else:
raise
def verify_no_urls(value, *, form=None, field_name=None):
def _check(value):
if has_urls(value):
raise forms.ValidationError(gettext('URLs are not allowed.'))
verify_condition_with_locales(
value=value, check_func=_check, form=form, field_name=field_name
)
return value
def walkfiles(folder, suffix=''):

Просмотреть файл

@ -9,7 +9,7 @@ from olympia.accounts.serializers import BaseUserSerializer
from olympia.addons.models import Addon
from olympia.addons.serializers import AddonSerializer
from olympia.amo.templatetags.jinja_helpers import absolutify
from olympia.amo.utils import clean_nl, has_links, slug_validator
from olympia.amo.utils import clean_nl, has_urls, slug_validator
from olympia.api.fields import (
SlugOrPrimaryKeyRelatedField,
SplitField,
@ -93,7 +93,7 @@ class CollectionSerializer(AMOModelSerializer):
return value
def validate_description(self, value):
if has_links(clean_nl(str(value))):
if has_urls(clean_nl(str(value))):
# There's some links, we don't want them.
raise serializers.ValidationError(gettext('No links are allowed.'))
return value

Просмотреть файл

@ -37,9 +37,13 @@ class TestCollections(TestCase):
description='<a href="http://example.com">example.com</a> '
'http://example.com <b>foo</b> some text lol.com'
)
# All markup kept (since this is a text field, not parsing HTML, clients will
# escape it), but URLs are removed.
assert str(collection.description) == '<a href=""></a> <b>foo</b> some text'
# Collection description is a text field, _not_ meant to contain HTML,
# clients are expected to escape it.
# We're removing URLs with a http/https protocol, that may produce
# broken HTML.
# Note that validation will typically happen at the API level to
# prevent the insertion of URLs anyway.
assert str(collection.description) == '<a href=" <b>foo</b> some text lol.com'
def test_translation_default(self):
"""Make sure we're getting strings from the default locale."""

Просмотреть файл

@ -40,7 +40,7 @@ from olympia.addons.utils import (
from olympia.amo.fields import HttpHttpsOnlyURLField, ReCaptchaField
from olympia.amo.forms import AMOModelForm
from olympia.amo.messages import DoubleSafe
from olympia.amo.utils import slug_validator
from olympia.amo.utils import slug_validator, verify_no_urls
from olympia.amo.validators import OneOrMoreLetterOrNumberCharacterValidator
from olympia.api.throttling import CheckThrottlesFormMixin, addon_submission_throttles
from olympia.applications.models import AppVersion
@ -108,6 +108,12 @@ class AddonFormBase(TranslationFormMixin, AMOModelForm):
def clean_slug(self):
return clean_addon_slug(self.cleaned_data['slug'], self.instance)
def clean_summary(self):
summary = verify_no_urls(
self.cleaned_data['summary'], form=self, field_name='summary'
)
return summary
def clean_name(self):
user = getattr(self.request, 'user', None)

Просмотреть файл

@ -346,8 +346,7 @@ class TestRankingScenarios(ESTestCase):
slug='rapidshare-downloadhelper',
summary=(
'Note from Mozilla: This add-on has been discontinued. Try '
'<a rel="nofollow" href="https://addons.mozilla.org/firefox/'
'addon/rapidshare-helper/">Rapidshare Helper</a> instead.\n\n'
'Rapidshare Helper instead.\n'
'RapidShare Download Helper will start your download once '
'ready.'
),
@ -375,10 +374,7 @@ class TestRankingScenarios(ESTestCase):
'Replace Youtube, Vimeo and Dailymotion Flash video players '
'embedded on third-party website by the HTML5 counterpart '
'when the content author still use the old style embed '
'(Flash).\n\nSource code at <a rel="nofollow" href="https://'
'outgoing.prod.mozaws.net/v1/14b404a3c05779fa94b24e0bffc0d710'
'6836f1d6b771367b065fb96e9c8656b9/https%3A//github.com/hfigui'
'ere/no-flash">https://github.com/hfiguiere/no-flash</a>'
'(Flash).\n\nSource code at github.com/hfiguiere/no-flash.'
),
weekly_downloads=77,
)
@ -736,8 +732,8 @@ class TestRankingScenarios(ESTestCase):
self._check_scenario(
'Open Image in New Tab',
(
['Open Image in New Tab', 5582],
['Open image in a new tab', 1745],
['Open Image in New Tab', 5577],
['Open image in a new tab', 1740],
),
)
@ -746,8 +742,8 @@ class TestRankingScenarios(ESTestCase):
self._check_scenario(
'CoinHive',
(
['Coinhive Blocker', 1524],
['NoMiners', 69], # via description
['Coinhive Blocker', 1523],
['NoMiners', 68], # via description
# ['CoinBlock', 0], # via prefix search
),
)
@ -756,12 +752,12 @@ class TestRankingScenarios(ESTestCase):
self._check_scenario(
'Privacy',
(
['Privacy Badger', 2111],
['Google Privacy', 1530], # More users, summary
['Privacy Settings', 1509],
['Privacy Badger', 2108],
['Google Privacy', 1528], # More users, summary
['Privacy Settings', 1507],
['Privacy Pass', 1439],
['Ghostery', 184],
['Blur', 175],
['Ghostery', 182],
['Blur', 173],
),
)
@ -829,16 +825,16 @@ class TestRankingScenarios(ESTestCase):
)
def test_scenario_megaupload(self):
self._check_scenario('MegaUpload', (['MegaUpload DownloadHelper', 1221],))
self._check_scenario('MegaUpload', (['MegaUpload DownloadHelper', 1219],))
def test_scenario_no_flash(self):
self._check_scenario(
'No Flash',
(
['No Flash', 7263],
['Download Flash and Video', 1575],
['YouTube Flash Player', 1379],
['YouTube Flash Video Player', 1268],
['No Flash', 7270],
['Download Flash and Video', 1571],
['YouTube Flash Player', 1376],
['YouTube Flash Video Player', 1265],
),
)
@ -846,10 +842,10 @@ class TestRankingScenarios(ESTestCase):
self._check_scenario(
'no flash',
(
['No Flash', 7263],
['Download Flash and Video', 1575],
['YouTube Flash Player', 1379],
['YouTube Flash Video Player', 1268],
['No Flash', 7270],
['Download Flash and Video', 1571],
['YouTube Flash Player', 1376],
['YouTube Flash Video Player', 1265],
),
)
@ -859,8 +855,8 @@ class TestRankingScenarios(ESTestCase):
self._check_scenario(
'Youtube html5 Player',
(
['YouTube Flash Player', 475],
['No Flash', 193],
['YouTube Flash Player', 464],
['No Flash', 256],
),
)
@ -1014,14 +1010,14 @@ class TestRankingScenarios(ESTestCase):
self._check_scenario(
'tab',
(
['Tabby Cat', 2432],
['Tab Mix Plus', 996],
['OneTab', 736],
['Tab Center Redux', 692],
['Open Bookmarks in New Tab', 638],
['FoxyTab', 551],
['Open image in a new tab', 524],
['Open Image in New Tab', 410],
['Tabby Cat', 2427],
['Tab Mix Plus', 994],
['OneTab', 734],
['Tab Center Redux', 690],
['Open Bookmarks in New Tab', 636],
['FoxyTab', 550],
['Open image in a new tab', 523],
['Open Image in New Tab', 409],
),
)
@ -1043,11 +1039,11 @@ class TestRankingScenarios(ESTestCase):
self._check_scenario(
'download',
(
['Download Flash and Video', 1997],
['Download Flash and Video', 1994],
['1-Click YouTube Video Download', 1456],
['RapidShare DownloadHelper', 834],
['MegaUpload DownloadHelper', 642],
['DownThemAll!', 414],
['RapidShare DownloadHelper', 850],
['MegaUpload DownloadHelper', 641],
['DownThemAll!', 413],
['All Downloader Professional', 129],
),
)

Просмотреть файл

@ -4,7 +4,6 @@ from django.db import connections, models, router
from django.db.models.deletion import Collector
import bleach
from bleach.linkifier import URL_RE # build_url_re() with good defaults.
import olympia.core.logger
from olympia.amo.fields import PositiveAutoField
@ -251,11 +250,13 @@ class NoURLsTranslation(Translation):
return str(self.localized_string_clean)
def clean(self):
# URL_RE is the regexp used by bleach to detect URLs to linkify them,
# in our case we use it to find them and replace them with nothing.
# It's more effective/aggressive than something like r'http\S+', it can
# also detect things like foo.com.
self.localized_string_clean = URL_RE.sub('', self.localized_string).strip()
from olympia.amo.utils import URL_RE
self.localized_string_clean = (
URL_RE.sub('', self.localized_string).strip()
if self.localized_string
else self.localized_string
)
class TranslationSequence(models.Model):

Просмотреть файл

@ -727,68 +727,34 @@ class NoURLsTranslationTest(TestCase):
assert str(translation) == str(value)
def test_urls_stripped(self):
# Link with markup. Link text is preserved because it doesn't contain
# an URL.
value = 'a <a href="http://example.com">link</a> with markup'
# Basic URL in text.
value = 'An URL http://example.com with some text'
translation = NoURLsTranslation(localized_string=value)
assert str(translation) == 'a <a href="">link</a> with markup'
assert str(translation) == 'An URL with some text'
# Link with markup but this time the text is also an URL. It's stripped
value = 'a <a href="http://example.com">example.com</a> with markup'
# More complex URL in text.
value = 'An URL https://example.com?foo=bar with some text'
translation = NoURLsTranslation(localized_string=value)
assert str(translation) == 'a <a href=""></a> with markup'
assert str(translation) == 'An URL with some text'
# Text link.
s = 'a text http://example.com link'
translation = NoURLsTranslation(localized_string=s)
assert str(translation) == 'a text link'
# Even more complex case with multiple URLs in text.
value = (
'An URL https://example.com?foo=bar with some text and another'
'http://example.com/path/to/somewhere/?alice=flop&bob.'
)
translation = NoURLsTranslation(localized_string=value)
assert str(translation) == 'An URL with some text and another'
# Test less obvious link
s = 'a text foo.is link'
translation = NoURLsTranslation(localized_string=s)
assert str(translation) == 'a text link'
# Another.
s = 'a text t.to link'
translation = NoURLsTranslation(localized_string=s)
assert str(translation) == 'a text link'
# Non-URL, just a domain in text.
value = 'A domain example.com with some text'
translation = NoURLsTranslation(localized_string=value)
assert str(translation) == 'A domain example.com with some text'
def test_uppercase(self):
value = (
'a <a href="http://example.com">link</a> with markup, a text '
'http://example.com link, and some raw <b>html://</b>. I <3 HTTP!'
)
# Basic URL in text.
value = 'An URL HTTP://EXAMPLE.COM with some text'
translation = NoURLsTranslation(localized_string=value)
assert str(translation) == (
'a <a href="">link</a> with markup, a text '
' link, and some raw <b>html://</b>. I <3 HTTP!'
)
def test_with_newlines(self):
value = (
'a <a href="http://example.com">link</a> with markup \n, a text '
'http://example.com link, and some raw <b>HTML</b>. I <3 http!'
)
translation = NoURLsTranslation(localized_string=value)
assert str(translation) == (
'a <a href="">link</a> with markup \n, a text '
' link, and some raw <b>HTML</b>. I <3 http!'
)
def test_bit_of_everything(self):
value = (
'a <a href="http://example.com">link</a> with markup,\n'
'a newline with a more complex link <a href="http://foo.is">lol.com</a>,\n'
'another http://example.com link, <b>with forbidden tags</b>,\n'
'and <script>forbidden tags</script> as well as <http:/bad.markup.com'
)
translation = NoURLsTranslation(localized_string=value)
assert str(translation) == (
'a <a href="">link</a> with markup,\n'
'a newline with a more complex link <a href=""></a>,\n'
'another link, <b>with forbidden tags</b>,\n'
'and <script>forbidden tags</script> as well as <'
)
assert str(translation) == 'An URL with some text'
def test_clean_on_save(self):
translation = NoURLsTranslation.objects.create(id=1001, localized_string='foo')