зеркало из https://github.com/mozilla/kitsune.git
Merge pull request #6357 from smithellis/1952-phone-number-forum-spam
Improve spam filtering
This commit is contained in:
Коммит
26d945bf60
|
@ -10,6 +10,7 @@ from kitsune.questions.events import QuestionReplyEvent
|
||||||
from kitsune.questions.models import AAQConfig, Answer, Question
|
from kitsune.questions.models import AAQConfig, Answer, Question
|
||||||
from kitsune.questions.utils import remove_pii
|
from kitsune.questions.utils import remove_pii
|
||||||
from kitsune.sumo.forms import KitsuneBaseForumForm
|
from kitsune.sumo.forms import KitsuneBaseForumForm
|
||||||
|
from kitsune.sumo.utils import check_for_spam_content
|
||||||
from kitsune.upload.models import ImageAttachment
|
from kitsune.upload.models import ImageAttachment
|
||||||
|
|
||||||
# labels and help text
|
# labels and help text
|
||||||
|
@ -185,6 +186,24 @@ class NewQuestionForm(EditQuestionForm):
|
||||||
topics = Topic.active.filter(products=product, in_aaq=True)
|
topics = Topic.active.filter(products=product, in_aaq=True)
|
||||||
self.fields["category"].queryset = topics
|
self.fields["category"].queryset = topics
|
||||||
|
|
||||||
|
def clean(self, *args, **kwargs):
|
||||||
|
"""
|
||||||
|
Generic clean method used by all forms in the question app.
|
||||||
|
Parse content for suspicious content.
|
||||||
|
- Toll free numbers
|
||||||
|
- NANP numbers
|
||||||
|
- Links - not necessarily spam content
|
||||||
|
"""
|
||||||
|
|
||||||
|
cdata = self.cleaned_data.get("content")
|
||||||
|
if not cdata:
|
||||||
|
return super().clean(*args, **kwargs)
|
||||||
|
|
||||||
|
if check_for_spam_content(cdata):
|
||||||
|
self.cleaned_data.update({"is_spam": True})
|
||||||
|
|
||||||
|
return self.cleaned_data
|
||||||
|
|
||||||
def save(self, user, locale, product, *args, **kwargs):
|
def save(self, user, locale, product, *args, **kwargs):
|
||||||
self.instance.creator = user
|
self.instance.creator = user
|
||||||
self.instance.locale = locale
|
self.instance.locale = locale
|
||||||
|
@ -228,15 +247,6 @@ class AnswerForm(KitsuneBaseForumForm):
|
||||||
model = Answer
|
model = Answer
|
||||||
fields = ("content",)
|
fields = ("content",)
|
||||||
|
|
||||||
def clean(self, *args, **kwargs):
|
|
||||||
"""Override clean method to exempt question owner from spam filtering."""
|
|
||||||
cdata = super(AnswerForm, self).clean(*args, **kwargs)
|
|
||||||
# if there is a reply from the owner, remove the spam flag
|
|
||||||
if self.user and self.question and self.user == self.question.creator:
|
|
||||||
cdata.pop("is_spam", None)
|
|
||||||
|
|
||||||
return cdata
|
|
||||||
|
|
||||||
|
|
||||||
class WatchQuestionForm(forms.Form):
|
class WatchQuestionForm(forms.Form):
|
||||||
"""Form to subscribe to question updates."""
|
"""Form to subscribe to question updates."""
|
||||||
|
|
|
@ -639,12 +639,12 @@ def aaq(request, product_slug=None, step=1, is_loginless=False):
|
||||||
product=product,
|
product=product,
|
||||||
)
|
)
|
||||||
|
|
||||||
if form.cleaned_data.get("is_spam"):
|
|
||||||
_add_to_moderation_queue(request, question)
|
|
||||||
|
|
||||||
# Submitting the question counts as a vote
|
# Submitting the question counts as a vote
|
||||||
question_vote(request, question.id)
|
question_vote(request, question.id)
|
||||||
|
|
||||||
|
if form.cleaned_data.get("is_spam"):
|
||||||
|
_add_to_moderation_queue(request, question)
|
||||||
|
|
||||||
my_questions_url = reverse("users.questions", args=[request.user.username])
|
my_questions_url = reverse("users.questions", args=[request.user.username])
|
||||||
messages.add_message(
|
messages.add_message(
|
||||||
request,
|
request,
|
||||||
|
|
|
@ -1136,9 +1136,26 @@ ALLOW_LINKS_FROM = [
|
||||||
]
|
]
|
||||||
|
|
||||||
# Regexes
|
# Regexes
|
||||||
TOLL_FREE_REGEX = re.compile(r"^.*8(00|33|44|55|66|77|88)[2-9]\d{6,}$")
|
|
||||||
REGEX_TIMEOUT = config("REGEX_TIMEOUT", default=5, cast=int)
|
REGEX_TIMEOUT = config("REGEX_TIMEOUT", default=5, cast=int)
|
||||||
|
TOLL_FREE_REGEX = re.compile(r"^.*8(00|33|44|55|66|77|88)[2-9]\d{6,}$")
|
||||||
NANP_REGEX = re.compile(r"[0-9]{3}-?[a-zA-Z2-9][a-zA-Z0-9]{2}-?[a-zA-Z0-9]{4}")
|
NANP_REGEX = re.compile(r"[0-9]{3}-?[a-zA-Z2-9][a-zA-Z0-9]{2}-?[a-zA-Z0-9]{4}")
|
||||||
|
ANY_PHONE_NUMBER = re.compile(
|
||||||
|
r"""
|
||||||
|
(?<!\w) # Assert position is not preceded by a word character (prevents partial matches)
|
||||||
|
(?:\+|00|011)? # Match optional country code prefix (+, 00, or 011)
|
||||||
|
[\s.-]?\(? # Optional separator (space, dot, dash) and optional opening parenthesis
|
||||||
|
\d{1,4} # Match 1-4 digits (area code or first part of the number)
|
||||||
|
\)? # Optional closing parenthesis
|
||||||
|
[\s.-]? # Optional separator (space, dot, dash)
|
||||||
|
\d{1,4} # Match 1-4 digits (first part of the phone number)
|
||||||
|
[\s.-]? # Optional separator (space, dot, dash)
|
||||||
|
\d{1,4} # Match 1-4 digits (second part of the phone number)
|
||||||
|
[\s.-]? # Optional separator (space, dot, dash)
|
||||||
|
\d{1,9} # Match 1-9 digits (remaining part of the phone number)
|
||||||
|
(?!\w) # Assert position is not followed by a word character (prevents partial matches)
|
||||||
|
""",
|
||||||
|
re.VERBOSE, # Allows for the use of comments and whitespace in the pattern for readability
|
||||||
|
)
|
||||||
|
|
||||||
if ES_ENABLE_CONSOLE_LOGGING and DEV:
|
if ES_ENABLE_CONSOLE_LOGGING and DEV:
|
||||||
es_trace_logger = logging.getLogger("elasticsearch.trace")
|
es_trace_logger = logging.getLogger("elasticsearch.trace")
|
||||||
|
|
|
@ -321,17 +321,18 @@ def check_for_spam_content(data):
|
||||||
- Toll free numbers
|
- Toll free numbers
|
||||||
- Vanity toll free numbers
|
- Vanity toll free numbers
|
||||||
- Links in the text.
|
- Links in the text.
|
||||||
|
- Any phone number-ish string of digits
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# keep only the digits in text
|
# keep only digits
|
||||||
digits = "".join(filter(type(data).isdigit, data))
|
digits = "".join(filter(type(data).isdigit, data))
|
||||||
is_toll_free = settings.TOLL_FREE_REGEX.match(digits)
|
is_toll_free = settings.TOLL_FREE_REGEX.match(digits)
|
||||||
|
|
||||||
is_nanp_number = any(settings.NANP_REGEX.findall(data))
|
is_nanp_number = any(settings.NANP_REGEX.findall(data))
|
||||||
|
is_phone_number = any(settings.ANY_PHONE_NUMBER.findall(data))
|
||||||
|
|
||||||
has_links = has_blocked_link(data)
|
has_links = has_blocked_link(data)
|
||||||
|
|
||||||
return is_toll_free or is_nanp_number or has_links
|
return is_toll_free or is_nanp_number or is_phone_number or has_links
|
||||||
|
|
||||||
|
|
||||||
@lru_cache(maxsize=settings.WEBPACK_LRU_CACHE)
|
@lru_cache(maxsize=settings.WEBPACK_LRU_CACHE)
|
||||||
|
|
Загрузка…
Ссылка в новой задаче