2020-06-26 16:59:07 +03:00
|
|
|
import logging
|
2020-07-21 17:52:05 +03:00
|
|
|
import os
|
2024-06-28 00:23:06 +03:00
|
|
|
from typing import NamedTuple
|
2020-06-26 16:59:07 +03:00
|
|
|
|
2024-04-15 18:37:08 +03:00
|
|
|
from django.apps import AppConfig, apps
|
|
|
|
from django.conf import settings
|
|
|
|
from django.utils.functional import cached_property
|
|
|
|
|
2020-06-26 22:20:48 +03:00
|
|
|
import boto3
|
2022-02-24 01:16:30 +03:00
|
|
|
from botocore.config import Config
|
2023-05-10 02:22:04 +03:00
|
|
|
from mypy_boto3_ses.client import SESClient
|
2020-06-26 22:20:48 +03:00
|
|
|
|
2020-06-26 16:59:07 +03:00
|
|
|
logger = logging.getLogger("events")
|
|
|
|
|
|
|
|
|
2024-06-28 00:23:06 +03:00
|
|
|
# Bad words are split into short and long words
|
|
|
|
class BadWords(NamedTuple):
|
|
|
|
# Short words are 4 or less characters. A hit is an exact match to a short word
|
|
|
|
short: set[str]
|
|
|
|
# Long words are 5 or more characters. A hit contains a long word.
|
|
|
|
long: list[str]
|
|
|
|
|
|
|
|
|
2019-06-05 17:38:33 +03:00
|
|
|
class EmailsConfig(AppConfig):
|
|
|
|
name = "emails"
|
2019-06-11 07:56:42 +03:00
|
|
|
|
2023-05-10 19:38:58 +03:00
|
|
|
@cached_property
|
|
|
|
def ses_client(self) -> SESClient | None:
|
|
|
|
try:
|
|
|
|
return boto3.client("ses", region_name=settings.AWS_REGION)
|
|
|
|
except Exception:
|
|
|
|
logger.exception("exception during SES connect")
|
|
|
|
return None
|
|
|
|
|
|
|
|
@cached_property
|
|
|
|
def s3_client(self):
|
2020-06-26 16:59:07 +03:00
|
|
|
try:
|
2023-05-11 22:59:09 +03:00
|
|
|
s3_config = Config(
|
2022-02-23 23:13:26 +03:00
|
|
|
region_name=settings.AWS_REGION,
|
2022-02-24 01:14:59 +03:00
|
|
|
retries={
|
2023-05-31 01:20:51 +03:00
|
|
|
# max_attempts includes the initial attempt to get the email
|
|
|
|
# so this does not retry with backoff, to avoid timeouts
|
|
|
|
"max_attempts": 1,
|
2022-02-24 01:14:59 +03:00
|
|
|
"mode": "standard",
|
|
|
|
},
|
|
|
|
)
|
2023-05-11 22:59:09 +03:00
|
|
|
return boto3.client("s3", config=s3_config)
|
2020-06-26 16:59:07 +03:00
|
|
|
except Exception:
|
2023-05-10 19:38:58 +03:00
|
|
|
logger.exception("exception during S3 connect")
|
|
|
|
|
|
|
|
def __init__(self, app_name, app_module):
|
2024-03-28 00:16:46 +03:00
|
|
|
super().__init__(app_name, app_module)
|
2020-06-26 16:59:07 +03:00
|
|
|
|
2020-07-21 17:52:05 +03:00
|
|
|
# badwords file from:
|
|
|
|
# https://www.cs.cmu.edu/~biglou/resources/bad-words.txt
|
2023-05-31 01:20:51 +03:00
|
|
|
# Using `.text` extension because of
|
|
|
|
# https://github.com/dependabot/dependabot-core/issues/1657
|
2024-06-28 00:23:06 +03:00
|
|
|
_badwords = self._load_terms("badwords.text")
|
|
|
|
self.badwords = BadWords(
|
|
|
|
short=set(word for word in _badwords if len(word) <= 4),
|
|
|
|
long=sorted(set(word for word in _badwords if len(word) > 4)),
|
|
|
|
)
|
|
|
|
self.blocklist = set(self._load_terms("blocklist.text"))
|
2020-07-21 17:52:05 +03:00
|
|
|
|
2024-06-20 19:25:32 +03:00
|
|
|
def _load_terms(self, filename: str) -> list[str]:
|
|
|
|
"""Load a list of terms from a file."""
|
2021-09-17 17:37:45 +03:00
|
|
|
terms = []
|
|
|
|
terms_file_path = os.path.join(settings.BASE_DIR, "emails", filename)
|
2024-03-28 00:20:11 +03:00
|
|
|
with open(terms_file_path) as terms_file:
|
2024-06-27 23:53:31 +03:00
|
|
|
for raw_word in terms_file:
|
|
|
|
word = raw_word.strip()
|
|
|
|
if not word or (len(word) > 0 and word[0] == "#"):
|
2021-09-17 16:27:58 +03:00
|
|
|
continue
|
2024-06-27 23:53:31 +03:00
|
|
|
terms.append(word)
|
2021-09-17 17:37:45 +03:00
|
|
|
return terms
|
2021-09-17 16:27:58 +03:00
|
|
|
|
2024-01-26 19:58:25 +03:00
|
|
|
|
|
|
|
def emails_config() -> EmailsConfig:
|
|
|
|
emails_config = apps.get_app_config("emails")
|
2024-05-07 21:29:02 +03:00
|
|
|
if not isinstance(emails_config, EmailsConfig):
|
|
|
|
raise TypeError("emails_config must be type EmailsConfig")
|
2024-01-26 19:58:25 +03:00
|
|
|
return emails_config
|
|
|
|
|
|
|
|
|
|
|
|
def ses_client() -> SESClient | None:
|
|
|
|
return emails_config().ses_client
|
|
|
|
|
|
|
|
|
|
|
|
def s3_client():
|
|
|
|
return emails_config().s3_client
|