зеркало из https://github.com/mozilla/treeherder.git
Bug 1292720 - Remove support for UCS-2 mode Python
Since Heroku is now using a UCS-4 mode Python, matching Vagrant/Travis.
This commit is contained in:
Родитель
3990b40450
Коммит
02428ca8f7
|
@ -3,7 +3,6 @@ import responses
|
|||
from django.conf import settings
|
||||
from requests.exceptions import HTTPError
|
||||
|
||||
from treeherder.etl.text import char_to_codepoint_ucs2
|
||||
from treeherder.log_parser.failureline import (store_failure_lines,
|
||||
write_failure_lines)
|
||||
from treeherder.model.models import (FailureLine,
|
||||
|
@ -131,20 +130,6 @@ def test_store_error_summary_500(activate_responses, test_repository, jm, eleven
|
|||
assert log_obj.status == JobLog.FAILED
|
||||
|
||||
|
||||
def test_char_data_to_codepoint_ucs2():
|
||||
# Unbelivably, putting the two codepoints in a string seems to cause them to be
|
||||
# interpreted as a single character, but only in unit tests, and only sometimes.
|
||||
# Since we only use indexing operations, putting the codepoints in a tuple is
|
||||
# equivalent to a lenth 2 string.
|
||||
data = [
|
||||
((u"\ud800", u"\udc00"), 0x010000),
|
||||
((u"\udbff", u"\udfff"), 0x10FFFF),
|
||||
((u"\uda00", u"\uddff"), 0x0901ff),
|
||||
]
|
||||
for value, expected in data:
|
||||
assert char_to_codepoint_ucs2(value) == expected
|
||||
|
||||
|
||||
def test_store_error_summary_duplicate(activate_responses, test_repository, jm, eleven_jobs_stored):
|
||||
log_url = 'http://my-log.mozilla.org'
|
||||
job = Job.objects.get(guid=jm.get_job(1)[0]['job_guid'])
|
||||
|
|
|
@ -1,28 +1,13 @@
|
|||
import re
|
||||
|
||||
|
||||
def char_to_codepoint_ucs4(x):
|
||||
return ord(x)
|
||||
|
||||
|
||||
def char_to_codepoint_ucs2(x):
|
||||
return (0x10000 + (ord(x[0]) - 0xD800) * 0x400 +
|
||||
(ord(x[1]) - 0xDC00))
|
||||
|
||||
if len(u"\U0010FFFF") != 1:
|
||||
raise Exception('Python has been compiled in UCS-2 mode which is not supported.')
|
||||
|
||||
# Regexp that matches all non-BMP unicode characters.
|
||||
if len(u"\U0010FFFF") == 1:
|
||||
filter_re = re.compile(ur"([\U00010000-\U0010FFFF])", re.U)
|
||||
char_to_codepoint = char_to_codepoint_ucs4
|
||||
else:
|
||||
# Python is compiled as the UCS2 variant so we have to match two
|
||||
# bytes in a surrogate pair. Then we have to decode the two bytes
|
||||
# according to UTF16 rules to get a single codepoint
|
||||
filter_re = re.compile(ur"([\uD800-\uDBFF][\uDC00-\uDFFF])", re.U)
|
||||
char_to_codepoint = char_to_codepoint_ucs2
|
||||
filter_re = re.compile(ur"([\U00010000-\U0010FFFF])", re.U)
|
||||
|
||||
|
||||
def astral_filter(text):
|
||||
if text is None:
|
||||
return text
|
||||
return filter_re.sub(lambda x: "<U+%s>" % hex(char_to_codepoint(x.group(0)))[2:].zfill(6).upper(), text)
|
||||
return filter_re.sub(lambda x: "<U+%s>" % hex(ord(x.group(0)))[2:].zfill(6).upper(), text)
|
||||
|
|
Загрузка…
Ссылка в новой задаче