From 5f7737d8c2fe578bbf743e69607393204834d905 Mon Sep 17 00:00:00 2001 From: Smith Ellis Date: Thu, 14 Nov 2024 11:59:29 -0500 Subject: [PATCH] Improve spam filtering --- kitsune/questions/forms.py | 28 +++++++++++++++++++--------- kitsune/questions/views.py | 6 +++--- kitsune/settings.py | 19 ++++++++++++++++++- kitsune/sumo/utils.py | 7 ++++--- 4 files changed, 44 insertions(+), 16 deletions(-) diff --git a/kitsune/questions/forms.py b/kitsune/questions/forms.py index dd9ceb5bd..05e73eae6 100644 --- a/kitsune/questions/forms.py +++ b/kitsune/questions/forms.py @@ -10,6 +10,7 @@ from kitsune.questions.events import QuestionReplyEvent from kitsune.questions.models import AAQConfig, Answer, Question from kitsune.questions.utils import remove_pii from kitsune.sumo.forms import KitsuneBaseForumForm +from kitsune.sumo.utils import check_for_spam_content from kitsune.upload.models import ImageAttachment # labels and help text @@ -185,6 +186,24 @@ class NewQuestionForm(EditQuestionForm): topics = Topic.active.filter(products=product, in_aaq=True) self.fields["category"].queryset = topics + def clean(self, *args, **kwargs): + """ + Generic clean method used by all forms in the question app. + Parse content for suspicious content. + - Toll free numbers + - NANP numbers + - Links - not necessarily spam content + """ + + cdata = self.cleaned_data.get("content") + if not cdata: + return super().clean(*args, **kwargs) + + if check_for_spam_content(cdata): + self.cleaned_data.update({"is_spam": True}) + + return self.cleaned_data + def save(self, user, locale, product, *args, **kwargs): self.instance.creator = user self.instance.locale = locale @@ -228,15 +247,6 @@ class AnswerForm(KitsuneBaseForumForm): model = Answer fields = ("content",) - def clean(self, *args, **kwargs): - """Override clean method to exempt question owner from spam filtering.""" - cdata = super(AnswerForm, self).clean(*args, **kwargs) - # if there is a reply from the owner, remove the spam flag - if self.user and self.question and self.user == self.question.creator: - cdata.pop("is_spam", None) - - return cdata - class WatchQuestionForm(forms.Form): """Form to subscribe to question updates.""" diff --git a/kitsune/questions/views.py b/kitsune/questions/views.py index c4485f123..68affacf5 100644 --- a/kitsune/questions/views.py +++ b/kitsune/questions/views.py @@ -637,12 +637,12 @@ def aaq(request, product_slug=None, step=1, is_loginless=False): product=product, ) - if form.cleaned_data.get("is_spam"): - _add_to_moderation_queue(request, question) - # Submitting the question counts as a vote question_vote(request, question.id) + if form.cleaned_data.get("is_spam"): + _add_to_moderation_queue(request, question) + my_questions_url = reverse("users.questions", args=[request.user.username]) messages.add_message( request, diff --git a/kitsune/settings.py b/kitsune/settings.py index 23971e9ec..028452b42 100644 --- a/kitsune/settings.py +++ b/kitsune/settings.py @@ -1136,9 +1136,26 @@ ALLOW_LINKS_FROM = [ ] # Regexes -TOLL_FREE_REGEX = re.compile(r"^.*8(00|33|44|55|66|77|88)[2-9]\d{6,}$") REGEX_TIMEOUT = config("REGEX_TIMEOUT", default=5, cast=int) +TOLL_FREE_REGEX = re.compile(r"^.*8(00|33|44|55|66|77|88)[2-9]\d{6,}$") NANP_REGEX = re.compile(r"[0-9]{3}-?[a-zA-Z2-9][a-zA-Z0-9]{2}-?[a-zA-Z0-9]{4}") +ANY_PHONE_NUMBER = re.compile( + r""" + (?