decode unidecode output
This commit is contained in:
Родитель
296e0d7f2e
Коммит
af54cd8c61
|
@ -25,6 +25,7 @@ def smart_text(s, encoding='utf-8', errors='strict'):
|
|||
s = six.text_type(s)
|
||||
return s
|
||||
|
||||
|
||||
def _sanitize(text, ok):
|
||||
rv = []
|
||||
for c in text:
|
||||
|
@ -35,6 +36,7 @@ def _sanitize(text, ok):
|
|||
rv.append(' ')
|
||||
return ''.join(rv).strip()
|
||||
|
||||
|
||||
# Extra characters outside of alphanumerics that we'll allow.
|
||||
SLUG_OK = '-_~'
|
||||
|
||||
|
@ -75,7 +77,7 @@ def slugify(s, ok=SLUG_OK, lower=True, spaces=False, only_ascii=False, space_rep
|
|||
|
||||
new = _sanitize(unicodedata.normalize('NFKC', smart_text(s)), ok)
|
||||
if only_ascii:
|
||||
new = _sanitize(unidecode(new), ok)
|
||||
new = _sanitize(smart_text(unidecode(new)), ok)
|
||||
if not spaces:
|
||||
if space_replacement and space_replacement not in ok:
|
||||
space_replacement = ok[0] if ok else ''
|
||||
|
|
|
@ -54,8 +54,8 @@ def test_slugify():
|
|||
# forms:
|
||||
('\N{LATIN SMALL LIGATURE FI}lms', 'films'),
|
||||
# I don't really care what slugify returns. Just don't crash.
|
||||
('x𘍿', 'x𘍿'),
|
||||
('ϧ𘒬𘓣', 'ϧ𘒬𘓣'),
|
||||
('x𘍿', 'x'),
|
||||
('ϧ𘒬𘓣', 'ϧ'),
|
||||
('¿x', 'x'),
|
||||
('Bakıcı geldi', 'bak\u0131c\u0131-geldi'),
|
||||
('Bäuma means tree', 'b\xe4uma-means-tree')]
|
||||
|
|
Загрузка…
Ссылка в новой задаче