Merge pull request #6357 from smithellis/1952-phone-number-forum-spam

Improve spam filtering
2024-11-20 11:22:21 +02:00 · 2024-11-20 11:22:21 +02:00 · 26d945bf60
--- a/kitsune/questions/forms.py
+++ b/kitsune/questions/forms.py
@ -10,6 +10,7 @@ from kitsune.questions.events import QuestionReplyEvent
 from kitsune.questions.models import AAQConfig, Answer, Question
 from kitsune.questions.utils import remove_pii
 from kitsune.sumo.forms import KitsuneBaseForumForm
 from kitsune.sumo.utils import check_for_spam_content
 from kitsune.upload.models import ImageAttachment
 # labels and help text
@ -185,6 +186,24 @@ class NewQuestionForm(EditQuestionForm):
            topics = Topic.active.filter(products=product, in_aaq=True)
            self.fields["category"].queryset = topics
    def clean(self, *args, **kwargs):
        """
        Generic clean method used by all forms in the question app.
        Parse content for suspicious content.
        - Toll free numbers
        - NANP numbers
        - Links - not necessarily spam content
        """
        cdata = self.cleaned_data.get("content")
        if not cdata:
            return super().clean(*args, **kwargs)
        if check_for_spam_content(cdata):
            self.cleaned_data.update({"is_spam": True})
        return self.cleaned_data
    def save(self, user, locale, product, *args, **kwargs):
        self.instance.creator = user
        self.instance.locale = locale
@ -228,15 +247,6 @@ class AnswerForm(KitsuneBaseForumForm):
        model = Answer
        fields = ("content",)
    def clean(self, *args, **kwargs):
        """Override clean method to exempt question owner from spam filtering."""
        cdata = super(AnswerForm, self).clean(*args, **kwargs)
        # if there is a reply from the owner, remove the spam flag
        if self.user and self.question and self.user == self.question.creator:
            cdata.pop("is_spam", None)
        return cdata
 class WatchQuestionForm(forms.Form):
    """Form to subscribe to question updates."""
--- a/kitsune/questions/views.py
+++ b/kitsune/questions/views.py
@ -639,12 +639,12 @@ def aaq(request, product_slug=None, step=1, is_loginless=False):
                product=product,
            )
            if form.cleaned_data.get("is_spam"):
                _add_to_moderation_queue(request, question)
            # Submitting the question counts as a vote
            question_vote(request, question.id)
            if form.cleaned_data.get("is_spam"):
                _add_to_moderation_queue(request, question)
            my_questions_url = reverse("users.questions", args=[request.user.username])
            messages.add_message(
                request,
--- a/kitsune/settings.py
+++ b/kitsune/settings.py
@ -1136,9 +1136,26 @@ ALLOW_LINKS_FROM = [
 ]
 # Regexes
 TOLL_FREE_REGEX = re.compile(r"^.*8(00|33|44|55|66|77|88)[2-9]\d{6,}$")
 REGEX_TIMEOUT = config("REGEX_TIMEOUT", default=5, cast=int)
 TOLL_FREE_REGEX = re.compile(r"^.*8(00|33|44|55|66|77|88)[2-9]\d{6,}$")
 NANP_REGEX = re.compile(r"[0-9]{3}-?[a-zA-Z2-9][a-zA-Z0-9]{2}-?[a-zA-Z0-9]{4}")
 ANY_PHONE_NUMBER = re.compile(
    r"""
    (?<!\w)             # Assert position is not preceded by a word character (prevents partial matches)
    (?:\+|00|011)?      # Match optional country code prefix (+, 00, or 011)
    [\s.-]?\(?          # Optional separator (space, dot, dash) and optional opening parenthesis
    \d{1,4}             # Match 1-4 digits (area code or first part of the number)
    \)?                 # Optional closing parenthesis
    [\s.-]?             # Optional separator (space, dot, dash)
    \d{1,4}             # Match 1-4 digits (first part of the phone number)
    [\s.-]?             # Optional separator (space, dot, dash)
    \d{1,4}             # Match 1-4 digits (second part of the phone number)
    [\s.-]?             # Optional separator (space, dot, dash)
    \d{1,9}             # Match 1-9 digits (remaining part of the phone number)
    (?!\w)              # Assert position is not followed by a word character (prevents partial matches)
    """,
    re.VERBOSE,  # Allows for the use of comments and whitespace in the pattern for readability
 )
 if ES_ENABLE_CONSOLE_LOGGING and DEV:
    es_trace_logger = logging.getLogger("elasticsearch.trace")
--- a/kitsune/sumo/utils.py
+++ b/kitsune/sumo/utils.py
@ -321,17 +321,18 @@ def check_for_spam_content(data):
    - Toll free numbers
    - Vanity toll free numbers
    - Links in the text.
    - Any phone number-ish string of digits
    """
-    # keep only the digits in text
+    # keep only digits
    digits = "".join(filter(type(data).isdigit, data))
    is_toll_free = settings.TOLL_FREE_REGEX.match(digits)
    is_nanp_number = any(settings.NANP_REGEX.findall(data))
    is_phone_number = any(settings.ANY_PHONE_NUMBER.findall(data))
    has_links = has_blocked_link(data)
-    return is_toll_free or is_nanp_number or has_links
+    return is_toll_free or is_nanp_number or is_phone_number or has_links
@lru_cache(maxsize=settings.WEBPACK_LRU_CACHE)