Management command to merge tags (#6339)

This commit is contained in:
Tasos Katsoulas 2024-11-07 18:43:35 +02:00 коммит произвёл GitHub
Родитель 0e07070d27
Коммит 474d8a4bd1
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
3 изменённых файлов: 73 добавлений и 1 удалений

Просмотреть файл

@ -0,0 +1,55 @@
from django.core.management.base import BaseCommand
from fuzzywuzzy import fuzz
from taggit.models import Tag, TaggedItem
SIMILARITY_THRESHOLD = 75
class Command(BaseCommand):
help = "Merge similar tags"
def handle(self, *args, **kwargs):
def recursively_merge_tags(tag_ids):
merged_any = False
deleted_tags = set()
for i, primary_tag_id in enumerate(tag_ids):
if primary_tag_id in deleted_tags:
continue
primary_tag = Tag.objects.get(id=primary_tag_id)
for secondary_tag_id in tag_ids[i + 1 :]:
if secondary_tag_id in deleted_tags:
continue
secondary_tag = Tag.objects.get(id=secondary_tag_id)
similarity = fuzz.ratio(primary_tag.name, secondary_tag.name)
if similarity >= SIMILARITY_THRESHOLD:
duplicate_conflicts = TaggedItem.objects.filter(
tag=secondary_tag,
object_id__in=TaggedItem.objects.filter(tag=primary_tag).values_list(
"object_id", flat=True
),
)
duplicate_conflicts.delete()
TaggedItem.objects.filter(tag=secondary_tag).update(tag=primary_tag)
secondary_tag.delete()
deleted_tags.add(secondary_tag_id)
print(f"Merged '{secondary_tag.name}' into '{primary_tag.name}'")
merged_any = True
break # start over
if merged_any:
remaining_tag_ids = (
Tag.objects.exclude(id__in=deleted_tags)
.order_by("-id")
.values_list("id", flat=True)
)
return recursively_merge_tags(list(remaining_tag_ids))
tag_ids = Tag.objects.all().order_by("-id").values_list("id", flat=True)
recursively_merge_tags(list(tag_ids))

18
poetry.lock сгенерированный
Просмотреть файл

@ -1450,6 +1450,20 @@ mccabe = ">=0.7.0,<0.8.0"
pycodestyle = ">=2.12.0,<2.13.0"
pyflakes = ">=3.2.0,<3.3.0"
[[package]]
name = "fuzzywuzzy"
version = "0.18.0"
description = "Fuzzy string matching in python"
optional = false
python-versions = "*"
files = [
{file = "fuzzywuzzy-0.18.0-py2.py3-none-any.whl", hash = "sha256:928244b28db720d1e0ee7587acf660ea49d7e4c632569cad4f1cd7e68a5f0993"},
{file = "fuzzywuzzy-0.18.0.tar.gz", hash = "sha256:45016e92264780e58972dca1b3d939ac864b78437422beecebb3095f8efd00e8"},
]
[package.extras]
speedup = ["python-levenshtein (>=0.12)"]
[[package]]
name = "gevent"
version = "23.9.1"
@ -3096,6 +3110,8 @@ files = [
{file = "psycopg2-2.9.9-cp310-cp310-win_amd64.whl", hash = "sha256:426f9f29bde126913a20a96ff8ce7d73fd8a216cfb323b1f04da402d452853c3"},
{file = "psycopg2-2.9.9-cp311-cp311-win32.whl", hash = "sha256:ade01303ccf7ae12c356a5e10911c9e1c51136003a9a1d92f7aa9d010fb98372"},
{file = "psycopg2-2.9.9-cp311-cp311-win_amd64.whl", hash = "sha256:121081ea2e76729acfb0673ff33755e8703d45e926e416cb59bae3a86c6a4981"},
{file = "psycopg2-2.9.9-cp312-cp312-win32.whl", hash = "sha256:d735786acc7dd25815e89cc4ad529a43af779db2e25aa7c626de864127e5a024"},
{file = "psycopg2-2.9.9-cp312-cp312-win_amd64.whl", hash = "sha256:a7653d00b732afb6fc597e29c50ad28087dcb4fbfb28e86092277a559ae4e693"},
{file = "psycopg2-2.9.9-cp37-cp37m-win32.whl", hash = "sha256:5e0d98cade4f0e0304d7d6f25bbfbc5bd186e07b38eac65379309c4ca3193efa"},
{file = "psycopg2-2.9.9-cp37-cp37m-win_amd64.whl", hash = "sha256:7e2dacf8b009a1c1e843b5213a87f7c544b2b042476ed7755be813eaf4e8347a"},
{file = "psycopg2-2.9.9-cp38-cp38-win32.whl", hash = "sha256:ff432630e510709564c01dafdbe996cb552e0b9f3f065eb89bdce5bd31fabf4c"},
@ -5022,4 +5038,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
[metadata]
lock-version = "2.0"
python-versions = "^3.11"
content-hash = "eba2d4699a2487fddd0734ba39fe2e2c52958212407ddf1ac4e0c9dea837f528"
content-hash = "c156bc4650e5c519f35bbe1e39525a27cbcd72932a0e212f3eaeb066862de668"

Просмотреть файл

@ -90,6 +90,7 @@ google-analytics-data = "0.18.7"
pyparsing = "3.1.2"
django-silk = "^5.1.0"
requests = "^2.32.3"
fuzzywuzzy = "^0.18.0"
[tool.poetry.group.dev.dependencies]
ipdb = "^0.13.11"