зеркало из https://github.com/mozilla/kitsune.git
Management command to merge tags (#6339)
This commit is contained in:
Родитель
0e07070d27
Коммит
474d8a4bd1
|
@ -0,0 +1,55 @@
|
|||
from django.core.management.base import BaseCommand
|
||||
from fuzzywuzzy import fuzz
|
||||
from taggit.models import Tag, TaggedItem
|
||||
|
||||
SIMILARITY_THRESHOLD = 75
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "Merge similar tags"
|
||||
|
||||
def handle(self, *args, **kwargs):
|
||||
def recursively_merge_tags(tag_ids):
|
||||
merged_any = False
|
||||
deleted_tags = set()
|
||||
|
||||
for i, primary_tag_id in enumerate(tag_ids):
|
||||
if primary_tag_id in deleted_tags:
|
||||
continue
|
||||
|
||||
primary_tag = Tag.objects.get(id=primary_tag_id)
|
||||
|
||||
for secondary_tag_id in tag_ids[i + 1 :]:
|
||||
if secondary_tag_id in deleted_tags:
|
||||
continue
|
||||
|
||||
secondary_tag = Tag.objects.get(id=secondary_tag_id)
|
||||
similarity = fuzz.ratio(primary_tag.name, secondary_tag.name)
|
||||
if similarity >= SIMILARITY_THRESHOLD:
|
||||
duplicate_conflicts = TaggedItem.objects.filter(
|
||||
tag=secondary_tag,
|
||||
object_id__in=TaggedItem.objects.filter(tag=primary_tag).values_list(
|
||||
"object_id", flat=True
|
||||
),
|
||||
)
|
||||
duplicate_conflicts.delete()
|
||||
|
||||
TaggedItem.objects.filter(tag=secondary_tag).update(tag=primary_tag)
|
||||
|
||||
secondary_tag.delete()
|
||||
deleted_tags.add(secondary_tag_id)
|
||||
|
||||
print(f"Merged '{secondary_tag.name}' into '{primary_tag.name}'")
|
||||
merged_any = True
|
||||
break # start over
|
||||
|
||||
if merged_any:
|
||||
remaining_tag_ids = (
|
||||
Tag.objects.exclude(id__in=deleted_tags)
|
||||
.order_by("-id")
|
||||
.values_list("id", flat=True)
|
||||
)
|
||||
return recursively_merge_tags(list(remaining_tag_ids))
|
||||
|
||||
tag_ids = Tag.objects.all().order_by("-id").values_list("id", flat=True)
|
||||
recursively_merge_tags(list(tag_ids))
|
|
@ -1450,6 +1450,20 @@ mccabe = ">=0.7.0,<0.8.0"
|
|||
pycodestyle = ">=2.12.0,<2.13.0"
|
||||
pyflakes = ">=3.2.0,<3.3.0"
|
||||
|
||||
[[package]]
|
||||
name = "fuzzywuzzy"
|
||||
version = "0.18.0"
|
||||
description = "Fuzzy string matching in python"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "fuzzywuzzy-0.18.0-py2.py3-none-any.whl", hash = "sha256:928244b28db720d1e0ee7587acf660ea49d7e4c632569cad4f1cd7e68a5f0993"},
|
||||
{file = "fuzzywuzzy-0.18.0.tar.gz", hash = "sha256:45016e92264780e58972dca1b3d939ac864b78437422beecebb3095f8efd00e8"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
speedup = ["python-levenshtein (>=0.12)"]
|
||||
|
||||
[[package]]
|
||||
name = "gevent"
|
||||
version = "23.9.1"
|
||||
|
@ -3096,6 +3110,8 @@ files = [
|
|||
{file = "psycopg2-2.9.9-cp310-cp310-win_amd64.whl", hash = "sha256:426f9f29bde126913a20a96ff8ce7d73fd8a216cfb323b1f04da402d452853c3"},
|
||||
{file = "psycopg2-2.9.9-cp311-cp311-win32.whl", hash = "sha256:ade01303ccf7ae12c356a5e10911c9e1c51136003a9a1d92f7aa9d010fb98372"},
|
||||
{file = "psycopg2-2.9.9-cp311-cp311-win_amd64.whl", hash = "sha256:121081ea2e76729acfb0673ff33755e8703d45e926e416cb59bae3a86c6a4981"},
|
||||
{file = "psycopg2-2.9.9-cp312-cp312-win32.whl", hash = "sha256:d735786acc7dd25815e89cc4ad529a43af779db2e25aa7c626de864127e5a024"},
|
||||
{file = "psycopg2-2.9.9-cp312-cp312-win_amd64.whl", hash = "sha256:a7653d00b732afb6fc597e29c50ad28087dcb4fbfb28e86092277a559ae4e693"},
|
||||
{file = "psycopg2-2.9.9-cp37-cp37m-win32.whl", hash = "sha256:5e0d98cade4f0e0304d7d6f25bbfbc5bd186e07b38eac65379309c4ca3193efa"},
|
||||
{file = "psycopg2-2.9.9-cp37-cp37m-win_amd64.whl", hash = "sha256:7e2dacf8b009a1c1e843b5213a87f7c544b2b042476ed7755be813eaf4e8347a"},
|
||||
{file = "psycopg2-2.9.9-cp38-cp38-win32.whl", hash = "sha256:ff432630e510709564c01dafdbe996cb552e0b9f3f065eb89bdce5bd31fabf4c"},
|
||||
|
@ -5022,4 +5038,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
|
|||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.11"
|
||||
content-hash = "eba2d4699a2487fddd0734ba39fe2e2c52958212407ddf1ac4e0c9dea837f528"
|
||||
content-hash = "c156bc4650e5c519f35bbe1e39525a27cbcd72932a0e212f3eaeb066862de668"
|
||||
|
|
|
@ -90,6 +90,7 @@ google-analytics-data = "0.18.7"
|
|||
pyparsing = "3.1.2"
|
||||
django-silk = "^5.1.0"
|
||||
requests = "^2.32.3"
|
||||
fuzzywuzzy = "^0.18.0"
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
ipdb = "^0.13.11"
|
||||
|
|
Загрузка…
Ссылка в новой задаче