зеркало из https://github.com/mozilla/kitsune.git
Merge pull request #6357 from smithellis/1952-phone-number-forum-spam
Improve spam filtering
This commit is contained in:
Коммит
26d945bf60
|
@ -10,6 +10,7 @@ from kitsune.questions.events import QuestionReplyEvent
|
|||
from kitsune.questions.models import AAQConfig, Answer, Question
|
||||
from kitsune.questions.utils import remove_pii
|
||||
from kitsune.sumo.forms import KitsuneBaseForumForm
|
||||
from kitsune.sumo.utils import check_for_spam_content
|
||||
from kitsune.upload.models import ImageAttachment
|
||||
|
||||
# labels and help text
|
||||
|
@ -185,6 +186,24 @@ class NewQuestionForm(EditQuestionForm):
|
|||
topics = Topic.active.filter(products=product, in_aaq=True)
|
||||
self.fields["category"].queryset = topics
|
||||
|
||||
def clean(self, *args, **kwargs):
|
||||
"""
|
||||
Generic clean method used by all forms in the question app.
|
||||
Parse content for suspicious content.
|
||||
- Toll free numbers
|
||||
- NANP numbers
|
||||
- Links - not necessarily spam content
|
||||
"""
|
||||
|
||||
cdata = self.cleaned_data.get("content")
|
||||
if not cdata:
|
||||
return super().clean(*args, **kwargs)
|
||||
|
||||
if check_for_spam_content(cdata):
|
||||
self.cleaned_data.update({"is_spam": True})
|
||||
|
||||
return self.cleaned_data
|
||||
|
||||
def save(self, user, locale, product, *args, **kwargs):
|
||||
self.instance.creator = user
|
||||
self.instance.locale = locale
|
||||
|
@ -228,15 +247,6 @@ class AnswerForm(KitsuneBaseForumForm):
|
|||
model = Answer
|
||||
fields = ("content",)
|
||||
|
||||
def clean(self, *args, **kwargs):
|
||||
"""Override clean method to exempt question owner from spam filtering."""
|
||||
cdata = super(AnswerForm, self).clean(*args, **kwargs)
|
||||
# if there is a reply from the owner, remove the spam flag
|
||||
if self.user and self.question and self.user == self.question.creator:
|
||||
cdata.pop("is_spam", None)
|
||||
|
||||
return cdata
|
||||
|
||||
|
||||
class WatchQuestionForm(forms.Form):
|
||||
"""Form to subscribe to question updates."""
|
||||
|
|
|
@ -639,12 +639,12 @@ def aaq(request, product_slug=None, step=1, is_loginless=False):
|
|||
product=product,
|
||||
)
|
||||
|
||||
if form.cleaned_data.get("is_spam"):
|
||||
_add_to_moderation_queue(request, question)
|
||||
|
||||
# Submitting the question counts as a vote
|
||||
question_vote(request, question.id)
|
||||
|
||||
if form.cleaned_data.get("is_spam"):
|
||||
_add_to_moderation_queue(request, question)
|
||||
|
||||
my_questions_url = reverse("users.questions", args=[request.user.username])
|
||||
messages.add_message(
|
||||
request,
|
||||
|
|
|
@ -1136,9 +1136,26 @@ ALLOW_LINKS_FROM = [
|
|||
]
|
||||
|
||||
# Regexes
|
||||
TOLL_FREE_REGEX = re.compile(r"^.*8(00|33|44|55|66|77|88)[2-9]\d{6,}$")
|
||||
REGEX_TIMEOUT = config("REGEX_TIMEOUT", default=5, cast=int)
|
||||
TOLL_FREE_REGEX = re.compile(r"^.*8(00|33|44|55|66|77|88)[2-9]\d{6,}$")
|
||||
NANP_REGEX = re.compile(r"[0-9]{3}-?[a-zA-Z2-9][a-zA-Z0-9]{2}-?[a-zA-Z0-9]{4}")
|
||||
ANY_PHONE_NUMBER = re.compile(
|
||||
r"""
|
||||
(?<!\w) # Assert position is not preceded by a word character (prevents partial matches)
|
||||
(?:\+|00|011)? # Match optional country code prefix (+, 00, or 011)
|
||||
[\s.-]?\(? # Optional separator (space, dot, dash) and optional opening parenthesis
|
||||
\d{1,4} # Match 1-4 digits (area code or first part of the number)
|
||||
\)? # Optional closing parenthesis
|
||||
[\s.-]? # Optional separator (space, dot, dash)
|
||||
\d{1,4} # Match 1-4 digits (first part of the phone number)
|
||||
[\s.-]? # Optional separator (space, dot, dash)
|
||||
\d{1,4} # Match 1-4 digits (second part of the phone number)
|
||||
[\s.-]? # Optional separator (space, dot, dash)
|
||||
\d{1,9} # Match 1-9 digits (remaining part of the phone number)
|
||||
(?!\w) # Assert position is not followed by a word character (prevents partial matches)
|
||||
""",
|
||||
re.VERBOSE, # Allows for the use of comments and whitespace in the pattern for readability
|
||||
)
|
||||
|
||||
if ES_ENABLE_CONSOLE_LOGGING and DEV:
|
||||
es_trace_logger = logging.getLogger("elasticsearch.trace")
|
||||
|
|
|
@ -321,17 +321,18 @@ def check_for_spam_content(data):
|
|||
- Toll free numbers
|
||||
- Vanity toll free numbers
|
||||
- Links in the text.
|
||||
- Any phone number-ish string of digits
|
||||
"""
|
||||
|
||||
# keep only the digits in text
|
||||
# keep only digits
|
||||
digits = "".join(filter(type(data).isdigit, data))
|
||||
is_toll_free = settings.TOLL_FREE_REGEX.match(digits)
|
||||
|
||||
is_nanp_number = any(settings.NANP_REGEX.findall(data))
|
||||
is_phone_number = any(settings.ANY_PHONE_NUMBER.findall(data))
|
||||
|
||||
has_links = has_blocked_link(data)
|
||||
|
||||
return is_toll_free or is_nanp_number or has_links
|
||||
return is_toll_free or is_nanp_number or is_phone_number or has_links
|
||||
|
||||
|
||||
@lru_cache(maxsize=settings.WEBPACK_LRU_CACHE)
|
||||
|
|
Загрузка…
Ссылка в новой задаче