Add a method to save utf8 safely to XML

Change the sanitize method for package description
This commit is contained in:
Piotr Zalewa 2012-03-06 15:06:01 +01:00
Родитель f6d2b42d94
Коммит c341a037e7
4 изменённых файлов: 52 добавлений и 5 удалений

Просмотреть файл

@ -46,7 +46,8 @@ from jetpack.errors import (SelfDependencyException, FilenameExistException,
from jetpack.managers import SDKManager, PackageManager
from utils import validator
from utils.helpers import pathify, alphanum, alphanum_plus, get_random_string
from utils.helpers import (pathify, alphanum, alphanum_plus, get_random_string,
sanitize_for_frontend)
from utils.os_utils import make_path
from utils.amo import AMOOAuth
from xpi import xpi_utils
@ -1953,11 +1954,10 @@ class Package(BaseModel, SearchMixin):
def clean(self):
self.full_name = alphanum_plus(self.full_name)
if self.description:
self.description = alphanum_plus(self.description)
self.description = sanitize_for_frontend(self.description)
if self.version_name:
self.version_name = alphanum_plus(self.version_name)
def calc_activity_rating(self):
"""
Build a weighted average based on package revisions

Просмотреть файл

@ -88,8 +88,9 @@ class PackageTest(TestCase):
eq_(package.full_name, 'Samuel-lib')
def test_package_sanitization(self):
bad_text = 'Te$t"><script src="google.com"></script>!#'
bad_text = u'Te$tąć"><script src="google.com"></script>!#'
good_text = 'Te$tscript srcgoogle.com/script!#'
good_text_utf8 = u'Te$tąćscript srcgoogle.com/script!#'
package = Package(
author=self.author,
@ -101,7 +102,7 @@ class PackageTest(TestCase):
package.save()
eq_(package.full_name, good_text)
eq_(package.description, good_text)
eq_(package.description, good_text_utf8)
eq_(package.version_name, good_text)
def test_automatic_numbering(self):
@ -458,3 +459,4 @@ class PackageTest(TestCase):
addon2.save()
mod2 = addon2.latest.modules.all()[0]
assert 'id: "second-addon-widget' not in mod2.code

Просмотреть файл

@ -454,6 +454,15 @@ require('b');
self.library.latest.get_lib_dir(),
'test_module')))
def test_utf8_description(self):
utf8string = 'utf8 ą utf8'
self.addon.description = utf8string
self.addon.save()
response = self.addon.latest.build_xpi(hashtag=self.hashtag)
assert not response[1]
assert os.path.isfile('%s.xpi' % self.target_basename)
def test_package_included_multiple_times(self):
""" If separate dependencies require the same library, it shouldn't error """
pack = Package.objects.create(type='l', author=self.author)

Просмотреть файл

@ -1,6 +1,7 @@
import re
import mimetypes
import os
import sys
from random import choice
@ -21,6 +22,41 @@ def alphanum(text):
def alphanum_plus(text):
return re.sub('[^a-zA-Z0-9\s\.,_\-\*&%\$#@:\(\)!\{\}\[\]\^\'\\/\?]+', '', text.strip())
def filter_illegal_utf8(dirty):
"""via StackOverflow:
http://stackoverflow.com/questions/1707890/fast-way-to-filter-illegal-xml-unicode-chars-in-python
XML specification lists a bunch of Unicode characters that are either
illegal or "discouraged".
"""
illegal_unichrs = [ (0x00, 0x08), (0x0B, 0x1F), (0x7F, 0x84), (0x86, 0x9F),
(0xD800, 0xDFFF), (0xFDD0, 0xFDDF), (0xFFFE, 0xFFFF),
(0x1FFFE, 0x1FFFF), (0x2FFFE, 0x2FFFF), (0x3FFFE, 0x3FFFF),
(0x4FFFE, 0x4FFFF), (0x5FFFE, 0x5FFFF), (0x6FFFE, 0x6FFFF),
(0x7FFFE, 0x7FFFF), (0x8FFFE, 0x8FFFF), (0x9FFFE, 0x9FFFF),
(0xAFFFE, 0xAFFFF), (0xBFFFE, 0xBFFFF), (0xCFFFE, 0xCFFFF),
(0xDFFFE, 0xDFFFF), (0xEFFFE, 0xEFFFF), (0xFFFFE, 0xFFFFF),
(0x10FFFE, 0x10FFFF) ]
illegal_ranges = ["%s-%s" % (unichr(low), unichr(high))
for (low, high) in illegal_unichrs
if low < sys.maxunicode]
illegal_xml_re = re.compile(u'[%s]' % u''.join(illegal_ranges))
return illegal_xml_re.sub('', dirty)
def filter_illegal_chars(dirty):
"""Remove character which allow to inject code which would be run by
displaying on the page
"""
illegal_chars_re = re.compile('[<>="]')
return illegal_chars_re.sub('', dirty)
def sanitize_for_frontend(dirty):
"""Remove illegal XML and dangerous frontend characters
"""
return filter_illegal_utf8(filter_illegal_chars(dirty))
def pathify(path):
""" Replaces all characters except alpanum, dash, underscore, and slash with a dash """
cleaned = re.sub('[^a-zA-Z0-9_\-\/\.]+', '-', path.strip())