replace xml parsing in encode_header with regex; drop lxml in prod (#17859)

* replace xml parsing in encode_header with regex; drop lxml in prod

* add svg tests; catch more exceptions
This commit is contained in:
Andrew Williamson 2021-09-10 16:34:34 +01:00 коммит произвёл GitHub
Родитель 0dc8a8efc0
Коммит 8a841dd461
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
8 изменённых файлов: 73 добавлений и 126 удалений

Просмотреть файл

@ -301,41 +301,6 @@ jmespath==0.10.0 \
kombu==5.1.0 \
--hash=sha256:01481d99f4606f6939cdc9b637264ed353ee9e3e4f62cfb582324142c41a572d \
--hash=sha256:e2dedd8a86c9077c350555153825a31e456a0dc20c15d5751f00137ec9c75f0a
# lxml is required by pyquery
lxml==4.6.3 \
--hash=sha256:df7c53783a46febb0e70f6b05df2ba104610f2fb0d27023409734a3ecbb78fb2 \
--hash=sha256:1b7584d421d254ab86d4f0b13ec662a9014397678a7c4265a02a6d7c2b18a75f \
--hash=sha256:079f3ae844f38982d156efce585bc540c16a926d4436712cf4baee0cce487a3d \
--hash=sha256:7728e05c35412ba36d3e9795ae8995e3c86958179c9770e65558ec3fdfd3724f \
--hash=sha256:4bff24dfeea62f2e56f5bab929b4428ae6caba2d1eea0c2d6eb618e30a71e6d4 \
--hash=sha256:bc4313cbeb0e7a416a488d72f9680fffffc645f8a838bd2193809881c67dd106 \
--hash=sha256:8157dadbb09a34a6bd95a50690595e1fa0af1a99445e2744110e3dca7831c4ee \
--hash=sha256:f2380a6376dfa090227b663f9678150ef27543483055cc327555fb592c5967e2 \
--hash=sha256:c4f05c5a7c49d2fb70223d0d5bcfbe474cf928310ac9fa6a7c6dddc831d0b1d4 \
--hash=sha256:d2e35d7bf1c1ac8c538f88d26b396e73dd81440d59c1ef8522e1ea77b345ede4 \
--hash=sha256:289e9ca1a9287f08daaf796d96e06cb2bc2958891d7911ac7cae1c5f9e1e0ee3 \
--hash=sha256:bccbfc27563652de7dc9bdc595cb25e90b59c5f8e23e806ed0fd623755b6565d \
--hash=sha256:820628b7b3135403540202e60551e741f9b6d3304371712521be939470b454ec \
--hash=sha256:5a0a14e264069c03e46f926be0d8919f4105c1623d620e7ec0e612a2e9bf1c04 \
--hash=sha256:92e821e43ad382332eade6812e298dc9701c75fe289f2a2d39c7960b43d1e92a \
--hash=sha256:efd7a09678fd8b53117f6bae4fa3825e0a22b03ef0a932e070c0bdbb3a35e654 \
--hash=sha256:efac139c3f0bf4f0939f9375af4b02c5ad83a622de52d6dfa8e438e8e01d0eb0 \
--hash=sha256:0fbcf5565ac01dff87cbfc0ff323515c823081c5777a9fc7703ff58388c258c3 \
--hash=sha256:122fba10466c7bd4178b07dba427aa516286b846b2cbd6f6169141917283aae2 \
--hash=sha256:3439c71103ef0e904ea0a1901611863e51f50b5cd5e8654a151740fde5e1cade \
--hash=sha256:4289728b5e2000a4ad4ab8da6e1db2e093c63c08bdc0414799ee776a3f78da4b \
--hash=sha256:b007cbb845b28db4fb8b6a5cdcbf65bacb16a8bd328b53cbc0698688a68e1caa \
--hash=sha256:76fa7b1362d19f8fbd3e75fe2fb7c79359b0af8747e6f7141c338f0bee2f871a \
--hash=sha256:26e761ab5b07adf5f555ee82fb4bfc35bf93750499c6c7614bd64d12aaa67927 \
--hash=sha256:66e575c62792c3f9ca47cb8b6fab9e35bab91360c783d1606f758761810c9791 \
--hash=sha256:89b8b22a5ff72d89d48d0e62abb14340d9e99fd637d046c27b8b257a01ffbe28 \
--hash=sha256:2a9d50e69aac3ebee695424f7dbd7b8c6d6eb7de2a2eb6b0f6c7db6aa41e02b7 \
--hash=sha256:ce256aaa50f6cc9a649c51be3cd4ff142d67295bfc4f490c9134d0f9f6d58ef0 \
--hash=sha256:7610b8c31688f0b1be0ef882889817939490a36d0ee880ea562a4e1399c447a1 \
--hash=sha256:f8380c03e45cf09f8557bdaa41e1fa7c81f3ae22828e1db470ab2a6c96d8bc23 \
--hash=sha256:884ab9b29feaca361f7f88d811b1eea9bfca36cf3da27768d28ad45c3ee6f969 \
--hash=sha256:33bb934a044cf32157c12bfcfbb6649807da20aa92c062ef51903415c704704f \
--hash=sha256:542d454665a3e277f76954418124d67516c5f88e51a900365ed54a9806122b83
# m2secret is required by django-aesfield
m2secret-py3==1.3 \
--hash=sha256:94dfae937e8a61bf48812fab6a0b8de300781c4e1dbee0cee3aa57cb01915864

Просмотреть файл

@ -112,3 +112,38 @@ wcwidth==0.2.5 \
toml==0.10.2 \
--hash=sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b \
--hash=sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f
# lxml is required by pyquery
lxml==4.6.3 \
--hash=sha256:df7c53783a46febb0e70f6b05df2ba104610f2fb0d27023409734a3ecbb78fb2 \
--hash=sha256:1b7584d421d254ab86d4f0b13ec662a9014397678a7c4265a02a6d7c2b18a75f \
--hash=sha256:079f3ae844f38982d156efce585bc540c16a926d4436712cf4baee0cce487a3d \
--hash=sha256:7728e05c35412ba36d3e9795ae8995e3c86958179c9770e65558ec3fdfd3724f \
--hash=sha256:4bff24dfeea62f2e56f5bab929b4428ae6caba2d1eea0c2d6eb618e30a71e6d4 \
--hash=sha256:bc4313cbeb0e7a416a488d72f9680fffffc645f8a838bd2193809881c67dd106 \
--hash=sha256:8157dadbb09a34a6bd95a50690595e1fa0af1a99445e2744110e3dca7831c4ee \
--hash=sha256:f2380a6376dfa090227b663f9678150ef27543483055cc327555fb592c5967e2 \
--hash=sha256:c4f05c5a7c49d2fb70223d0d5bcfbe474cf928310ac9fa6a7c6dddc831d0b1d4 \
--hash=sha256:d2e35d7bf1c1ac8c538f88d26b396e73dd81440d59c1ef8522e1ea77b345ede4 \
--hash=sha256:289e9ca1a9287f08daaf796d96e06cb2bc2958891d7911ac7cae1c5f9e1e0ee3 \
--hash=sha256:bccbfc27563652de7dc9bdc595cb25e90b59c5f8e23e806ed0fd623755b6565d \
--hash=sha256:820628b7b3135403540202e60551e741f9b6d3304371712521be939470b454ec \
--hash=sha256:5a0a14e264069c03e46f926be0d8919f4105c1623d620e7ec0e612a2e9bf1c04 \
--hash=sha256:92e821e43ad382332eade6812e298dc9701c75fe289f2a2d39c7960b43d1e92a \
--hash=sha256:efd7a09678fd8b53117f6bae4fa3825e0a22b03ef0a932e070c0bdbb3a35e654 \
--hash=sha256:efac139c3f0bf4f0939f9375af4b02c5ad83a622de52d6dfa8e438e8e01d0eb0 \
--hash=sha256:0fbcf5565ac01dff87cbfc0ff323515c823081c5777a9fc7703ff58388c258c3 \
--hash=sha256:122fba10466c7bd4178b07dba427aa516286b846b2cbd6f6169141917283aae2 \
--hash=sha256:3439c71103ef0e904ea0a1901611863e51f50b5cd5e8654a151740fde5e1cade \
--hash=sha256:4289728b5e2000a4ad4ab8da6e1db2e093c63c08bdc0414799ee776a3f78da4b \
--hash=sha256:b007cbb845b28db4fb8b6a5cdcbf65bacb16a8bd328b53cbc0698688a68e1caa \
--hash=sha256:76fa7b1362d19f8fbd3e75fe2fb7c79359b0af8747e6f7141c338f0bee2f871a \
--hash=sha256:26e761ab5b07adf5f555ee82fb4bfc35bf93750499c6c7614bd64d12aaa67927 \
--hash=sha256:66e575c62792c3f9ca47cb8b6fab9e35bab91360c783d1606f758761810c9791 \
--hash=sha256:89b8b22a5ff72d89d48d0e62abb14340d9e99fd637d046c27b8b257a01ffbe28 \
--hash=sha256:2a9d50e69aac3ebee695424f7dbd7b8c6d6eb7de2a2eb6b0f6c7db6aa41e02b7 \
--hash=sha256:ce256aaa50f6cc9a649c51be3cd4ff142d67295bfc4f490c9134d0f9f6d58ef0 \
--hash=sha256:7610b8c31688f0b1be0ef882889817939490a36d0ee880ea562a4e1399c447a1 \
--hash=sha256:f8380c03e45cf09f8557bdaa41e1fa7c81f3ae22828e1db470ab2a6c96d8bc23 \
--hash=sha256:884ab9b29feaca361f7f88d811b1eea9bfca36cf3da27768d28ad45c3ee6f969 \
--hash=sha256:33bb934a044cf32157c12bfcfbb6649807da20aa92c062ef51903415c704704f \
--hash=sha256:542d454665a3e277f76954418124d67516c5f88e51a900365ed54a9806122b83

Просмотреть файл

@ -1,2 +0,0 @@
# Make sure we import lib.safe_xml to monkeypatch all libraries.
import olympia.lib.safe_xml # noqa

Просмотреть файл

@ -16,11 +16,8 @@ from django.core.files.uploadedfile import SimpleUploadedFile
from django.forms import ValidationError
from django.test.utils import override_settings
import lxml
import pytest
from defusedxml.common import NotSupportedError
from olympia import amo
from olympia.amo.tests import TestCase, user_factory
from olympia.amo.tests.test_helpers import get_addon_file
@ -1009,24 +1006,6 @@ class TestResolvei18nMessage:
assert result == '__MSG_foo__'
class TestXMLVulnerabilities(TestCase):
"""Test a few known vulnerabilities to make sure
our defusedxml patching is applied automatically.
This doesn't replicate all defusedxml tests.
"""
def test_lxml_XMLParser_no_resolve_entities(self):
with pytest.raises(NotSupportedError):
lxml.etree.XMLParser(resolve_entities=True)
# not setting it works
lxml.etree.XMLParser()
# Setting it explicitly to `False` is fine too.
lxml.etree.XMLParser(resolve_entities=False)
class TestGetBackgroundImages(TestCase):
file_obj = os.path.join(
settings.ROOT, 'src/olympia/devhub/tests/addons/static_theme.zip'

Просмотреть файл

@ -1,24 +0,0 @@
from lxml.etree import * # noqa
from lxml.etree import XMLParser as _XMLParser #
from lxml.etree import _ElementTree, _Comment, _Element # noqa
# This should be imported after lxml.etree so that it overrides the
# following attributes.
from defusedxml.lxml import parse, fromstring, XML # noqa
from defusedxml.common import NotSupportedError
class XMLParser(_XMLParser):
"""
A safer version of XMLParser which deosn't allow entity resolution.
"""
def __init__(self, *args, **kwargs):
resolve_entities = kwargs.get('resolve_entities', False)
if resolve_entities:
raise NotSupportedError(
'resolve_entities is forbidden for security reasons.'
)
super().__init__(*args, **kwargs)

Просмотреть файл

@ -1,39 +0,0 @@
"""
Monkey patch and defuse all stdlib xml packages and lxml.
"""
import sys
patched_modules = (
'lxml',
'ElementTree',
'minidom',
'pulldom',
'sax',
'expatbuilder',
'expatreader',
'xmlrpc',
)
if any(module in sys.modules for module in patched_modules):
existing_modules = [(module, module in sys.modules) for module in patched_modules]
raise ImportError(
f'this monkey patch was not applied early enough. {existing_modules}'
)
from defusedxml import defuse_stdlib # noqa
defuse_stdlib()
import lxml # noqa
import lxml.etree # noqa
from xml.sax.handler import ( # noqa
feature_external_ges,
feature_external_pes,
)
from olympia.lib import safe_lxml_etree # noqa
lxml.etree = safe_lxml_etree

Просмотреть файл

@ -3,8 +3,11 @@ import os
import shutil
import tempfile
from base64 import b64encode
from django.conf import settings
from django.core.files.storage import default_storage as storage
from django.utils.encoding import force_str
from unittest import mock
import pytest
@ -13,6 +16,7 @@ from PIL import Image, ImageChops
from olympia import amo
from olympia.versions.utils import (
AdditionalBackground,
encode_header,
process_color_value,
write_svg_to_png,
)
@ -141,3 +145,29 @@ def test_process_color_value(
assert (firefox_prop, css_color) == (
process_color_value(manifest_property, manifest_color)
)
def test_encode_header():
svg_encoded = 'data:image/{};base64,{}'
svg_blob = b"""
<svg id="preview-svg-root" width="680" height="92" xmlns="http://www.w3.org/2000/svg"
"""
assert encode_header(svg_blob, '.svg') == (
svg_encoded.format('svg+xml', force_str(b64encode(svg_blob))),
680,
92,
)
svg_blob_rev = b'<svg id="preview-svg-root" height="92" width="680" xmlns="'
assert encode_header(svg_blob_rev, '.svg') == (
svg_encoded.format('svg+xml', force_str(b64encode(svg_blob_rev))),
680,
92,
)
svg_blob_missing_height = b'<svg id="preview-svg-root" width="680" xmlns="'
assert encode_header(svg_blob_missing_height, '.svg') == (None, 0, 0)
svg_blob_missing_width = b'<svg id="preview-svg-root" height="92" xmlns="'
assert encode_header(svg_blob_missing_width, '.svg') == (None, 0, 0)

Просмотреть файл

@ -2,6 +2,7 @@ import copy
import json
import os
import io
import re
import tempfile
import zipfile
@ -14,7 +15,6 @@ from PIL import Image
from olympia.amo.utils import convert_svg_to_png
from olympia.core import logger
from olympia.lib.safe_xml import lxml
from . import compare
from .models import Version
@ -52,12 +52,15 @@ def write_svg_to_png(svg_content, out):
return convert_svg_to_png(temporary_svg.name, out)
SVG_DIMENSIONS_REGEX = rb'(?=.* width="(?P<width>\d+)")(?=.* height="(?P<height>\d+)")'
def encode_header(header_blob, file_ext):
try:
if file_ext == '.svg':
tree = lxml.etree.fromstring(header_blob)
width = int(tree.get('width'))
height = int(tree.get('height'))
dimensions = re.search(SVG_DIMENSIONS_REGEX, header_blob).groupdict()
width = int(dimensions['width'])
height = int(dimensions['height'])
img_format = 'svg+xml'
else:
with Image.open(io.BytesIO(header_blob)) as header_image:
@ -67,7 +70,7 @@ def encode_header(header_blob, file_ext):
img_format,
force_str(b64encode(header_blob)),
)
except (OSError, ValueError, TypeError, lxml.etree.XMLSyntaxError) as err:
except (OSError, ValueError, TypeError, AttributeError) as err:
log.info(err)
return (None, 0, 0)
return (src, width, height)