Bug 1292720 - Remove support for UCS-2 mode Python

Since Heroku is now using a UCS-4 mode Python, matching Vagrant/Travis.
This commit is contained in:
Ed Morley 2016-12-26 02:06:55 +00:00
Родитель 3990b40450
Коммит 02428ca8f7
2 изменённых файлов: 4 добавлений и 34 удалений

Просмотреть файл

@ -3,7 +3,6 @@ import responses
from django.conf import settings
from requests.exceptions import HTTPError
from treeherder.etl.text import char_to_codepoint_ucs2
from treeherder.log_parser.failureline import (store_failure_lines,
write_failure_lines)
from treeherder.model.models import (FailureLine,
@ -131,20 +130,6 @@ def test_store_error_summary_500(activate_responses, test_repository, jm, eleven
assert log_obj.status == JobLog.FAILED
def test_char_data_to_codepoint_ucs2():
# Unbelivably, putting the two codepoints in a string seems to cause them to be
# interpreted as a single character, but only in unit tests, and only sometimes.
# Since we only use indexing operations, putting the codepoints in a tuple is
# equivalent to a lenth 2 string.
data = [
((u"\ud800", u"\udc00"), 0x010000),
((u"\udbff", u"\udfff"), 0x10FFFF),
((u"\uda00", u"\uddff"), 0x0901ff),
]
for value, expected in data:
assert char_to_codepoint_ucs2(value) == expected
def test_store_error_summary_duplicate(activate_responses, test_repository, jm, eleven_jobs_stored):
log_url = 'http://my-log.mozilla.org'
job = Job.objects.get(guid=jm.get_job(1)[0]['job_guid'])

Просмотреть файл

@ -1,28 +1,13 @@
import re
def char_to_codepoint_ucs4(x):
return ord(x)
def char_to_codepoint_ucs2(x):
return (0x10000 + (ord(x[0]) - 0xD800) * 0x400 +
(ord(x[1]) - 0xDC00))
if len(u"\U0010FFFF") != 1:
raise Exception('Python has been compiled in UCS-2 mode which is not supported.')
# Regexp that matches all non-BMP unicode characters.
if len(u"\U0010FFFF") == 1:
filter_re = re.compile(ur"([\U00010000-\U0010FFFF])", re.U)
char_to_codepoint = char_to_codepoint_ucs4
else:
# Python is compiled as the UCS2 variant so we have to match two
# bytes in a surrogate pair. Then we have to decode the two bytes
# according to UTF16 rules to get a single codepoint
filter_re = re.compile(ur"([\uD800-\uDBFF][\uDC00-\uDFFF])", re.U)
char_to_codepoint = char_to_codepoint_ucs2
filter_re = re.compile(ur"([\U00010000-\U0010FFFF])", re.U)
def astral_filter(text):
if text is None:
return text
return filter_re.sub(lambda x: "<U+%s>" % hex(char_to_codepoint(x.group(0)))[2:].zfill(6).upper(), text)
return filter_re.sub(lambda x: "<U+%s>" % hex(ord(x.group(0)))[2:].zfill(6).upper(), text)