Add blocklists with commonly-abused names

This commit is contained in:
Vincent 2021-09-17 15:27:58 +02:00
Родитель 6bda9a28c9
Коммит 5549f430bc
5 изменённых файлов: 786 добавлений и 4 удалений

Просмотреть файл

@ -30,8 +30,21 @@ class EmailsConfig(AppConfig):
)
with open(badwords_file_path, 'r') as badwords_file:
for word in badwords_file:
if len(word.strip()) > 0 and word.strip()[0] == "#":
continue
badwords.append(word.strip())
self.badwords = badwords
blocklist = []
blocklist_file_path = os.path.join(
settings.BASE_DIR, 'emails', 'blocklist.txt'
)
with open(blocklist_file_path, 'r') as blocklist_file:
for word in blocklist_file:
if len(word.strip()) > 0 and word.strip()[0] == "#":
continue
blocklist.append(word.strip())
self.blocklist = blocklist
def ready(self):
import emails.signals

Просмотреть файл

@ -1,3 +1,4 @@
# Source: https://www.cs.cmu.edu/~biglou/resources/bad-words.txt
abbo
abo
abortion

716
emails/blocklist.txt Normal file
Просмотреть файл

@ -0,0 +1,716 @@
mozilla
firefox
relay
firefox-relay
fxrelay
noreply
no-reply
spam
superuser
company
foundation
# Source: https://github.com/michaldudek/subdomain-blacklist/blob/6fa207bf1dd7a04181676835e29d7ae353e2aad0/blacklist.txt
#
# By Michał Dudek
#
# Available for use under the MIT License
#
# Copyright (C) 2014 Michał Dudek
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
about
aboutu
abuse
acme
ad
admanager
admin
admindashboard
administrator
ads
adsense
adult
adword
affiliate
affiliatepage
afp
alpha
anal
analytic
android
answer
anu
anus
ap
api
app
appengine
application
appnew
arse
asdf
a
as
ass
asset
asshole
atf
backup
ball
balls
ballsack
bank
base
bastard
beginner
beta
biatch
billing
binarie
binary
bitch
biz
blackberry
blog
blogsearch
bloody
blowjob
blowjobs
bollock
boner
boob
boobs
book
bugger
bum
butt
buttplug
buy
buzz
c
cache
calendar
cart
catalog
ceo
chart
chat
checkout
ci
cia
client
clitori
clitoris
cname
cnarne
cock
code
community
confirm
confirmation
contact
contact-u
contactu
content
controlpanel
coon
core
corp
countrie
country
cp
cpanel
crap
cs
cunt
cv
damn
dashboard
data
demo
deploy
deployment
desktop
dev
devel
developement
developer
development
dick
dike
dildo
dir
directory
discussion
dl
doc
document
donate
download
dyke
e
earth
email
enable
encrypted
engine
error
errorlog
fag
faggot
fbi
feature
feck
feed
feedburner
feedproxy
felching
fellate
fellatio
file
finance
flange
folder
forgotpassword
forum
friend
ftp
fuck
fudgepacker
fun
fusion
gadget
gear
geographic
gettingstarted
git
gitlab
gmail
go
goddamn
goto
gov
graph
group
hell
help
home
homo
html
htrnl
http
i
image
img
investor
invoice
io
ios
ipad
iphone
irnage
irng
item
j
jenkin
jerk
jira
jizz
job
join
js
knobend
lab
labia
legal
lesbo
list
lmao
lmfao
local
locale
location
log
login
logout
m
mail
manage
manager
map
marketing
me
media
message
misc
mm
mms
mobile
model
money
movie
muff
my
mystore
n
net
network
new
newsite
nigga
nigger
npm
ns
omg
online
order
org
other
p0rn
pack
packagist
page
partner
partnerpage
password
payment
peni
penis
people
person
pi
pis
piss
place
podcast
policy
poop
pop
pop3
popular
porn
pr0n
pricing
prick
print
privacy
private
prod
product
production
profile
promo
promotion
proxie
proxies
proxy
pube
public
purchase
pussy
queer
querie
queries
query
r
radio
random
reader
recover
redirect
register
registration
release
report
research
resolve
resolver
rnail
rnicrosoft
root
rs
rss
sale
sandbox
scholar
scrotum
search
secure
seminar
server
service
sex
sftp
sh1t
shit
shop
shopping
shortcut
signin
signup
site
sitemap
sitenew
sketchup
sky
slash
slashinvoice
slut
sm
smegma
sms
smtp
soap
software
sorry
spreadsheet
spunk
srntp
ssh
ssl
stage
staging
stat
static
statistic
statu
store
suggest
suggestquerie
suggestquery
support
survey
surveytool
svn
sync
sysadmin
talk
talkgadget
test
tester
testing
text
tit
tits
tool
toolbar
tosser
trac
translate
translation
translator
trend
turd
twat
txt
ul
upload
vagina
validation
vid
video
video-stat
voice
w
wank
wave
webdisk
webmail
webmaster
webrnail
whm
whoi
whore
wifi
wiki
wtf
ww
www
wwww
xhtml
xhtrnl
xml
xxx
# Source: https://github.com/wesleyraptor/streamingphish/blob/1884a2df44b75004f0cbdde0edf19ed1c24eda86/training_data/targeted_brands/initial_brands.txt
#
# Copyright 2018 Wes Connell
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
appleid
microsoftonline
microsoft-int
itunes
netflix
paypal
apple
offerup
yahoo
microsoft
snapchat
twitter
facebook
instagram
usbank
wellsfargo
amazon
americanexpress
bankofamerica
barclays
capitalone
citibank
citigroup
chase
dropbox
ebay
github
hotmail
hsbc
linkedin
mastercard
usaa
gmail
tdbank
# Source: https://github.com/wesleyraptor/streamingphish/blob/1884a2df44b75004f0cbdde0edf19ed1c24eda86/training_data/keywords/initial.txt
#
# Copyright 2018 Wes Connell
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
acc
acces
access
account
accountid
accountingservice
accountlocked
accounts
action
activity
alert
amazon
apple
appleid
applestore
applesupport
apps
appstore
auth
authentication
authorized
banking
bankofamerica
billing
bin
blockchain
blogspot
business
cancel
case
center
cgi
cgibin
chase
check
cloud
cloudfront
com
confirm
confirmation
contact
country
customer
customers
data
detail
details
disable
disabled
enable
facebook
find
fix
for
from
gift
github
help
helpdesk
home
hotmail
icloud
identity
idmsa
iforgot
in
inc
info
information
informations
intl
invoice
issue
itunes
limit
limited
limiteds
locked
login
loginpage
manage
management
manager
microsoftonline
my
myaccount
myaccounts
netflix
notice
notification
now
online
order
outlook
page
pal
password
pay
payment
paypal
privacy
problem
protect
purchase
receipt
recover
recovery
redirect
refund
report
request
reset
resolution
resolutioncenter
resolve
restore
review
secure
secured
security
securitys
server
service
services
settings
shop
sign
signin
stage
statement
store
submit
subscription
summary
support
suspicious
system
unlock
unlocked
update
updateaccount
updated
updates
upgrade
usbank
user
verif
verification
verifications
verified
verify
verifyaccount
view
web
webapps
your
youraccount
# Source: https://github.com/wesleyraptor/streamingphish/blob/1884a2df44b75004f0cbdde0edf19ed1c24eda86/training_data/fqdn_keywords/initial.txt
#
# Copyright 2018 Wes Connell
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
info
login
your
sign
in
store
apps
cgi
intl
help
my
user
idmsa
data
acc
case
web
system
auth
for
fix
acces
chase
pay
pal
now
issue
service
account
view
action
limit
gift
shop
reset
find
submit
from

Просмотреть файл

@ -55,8 +55,9 @@ class Profile(models.Model):
valid_subdomain_pattern = re.compile('^(?!-)[A-Za-z0-9-]{1,63}(?<!-)$')
valid = valid_subdomain_pattern.match(subdomain) is not None
bad_word = has_bad_words(subdomain)
blocked_word = is_blocklisted(subdomain)
taken = Profile.objects.filter(subdomain=subdomain).count() > 0
return valid and not bad_word and not taken
return valid and not bad_word and not blocked_word and not taken
@property
def num_active_address(self):
@ -188,6 +189,13 @@ def has_bad_words(value):
)
def is_blocklisted(value):
return any(
blockedword == value
for blockedword in emails_config.blocklist
)
def get_domain_numerical(domain_address):
# get domain name from the address
domains_keys = list(DOMAINS.keys())
@ -270,10 +278,11 @@ class RelayAddress(models.Model):
domain_numerical = get_domain_numerical(domain)
relay_address = RelayAddress.objects.create(user=user_profile.user, domain=domain_numerical)
address_contains_badword = has_bad_words(relay_address.address)
address_is_blocklisted = is_blocklisted(relay_address.address)
address_already_deleted = DeletedAddress.objects.filter(
address_hash=address_hash(relay_address.address, domain=domain)
).count()
if address_already_deleted > 0 or address_contains_badword:
if address_already_deleted > 0 or address_contains_badword or address_is_blocklisted:
relay_address.delete()
num_tries += 1
return RelayAddress.make_relay_address(user_profile, num_tries, domain)
@ -332,6 +341,7 @@ class DomainAddress(models.Model):
)
address_contains_badword = False
address_is_blocklisted = False
if not address:
# FIXME: if the alias is randomly generated and has bad words
# we should retry like make_relay_address does
@ -340,10 +350,11 @@ class DomainAddress(models.Model):
address = address_default()
# Only check for bad words if randomly generated
address_contains_badword = has_bad_words(address)
address_is_blocklisted = is_blocklisted(address)
address_already_deleted = DeletedAddress.objects.filter(
address_hash=address_hash(address, user_subdomain)
).count()
if address_contains_badword or address_already_deleted > 0:
if address_contains_badword or address_is_blocklisted or address_already_deleted > 0:
raise CannotMakeAddressException(
TRY_DIFFERENT_VALUE_ERR_MSG.format('Email address with subdomain')
)

Просмотреть файл

@ -23,6 +23,7 @@ from ..models import (
DomainAddress,
get_domain_numerical,
has_bad_words,
is_blocklisted,
NOT_PREMIUM_USER_ERR_MSG,
Profile,
RelayAddress,
@ -39,6 +40,12 @@ class MiscEmailModelsTest(TestCase):
def test_has_bad_words_without_bad_words(self):
assert not has_bad_words('happy')
def test_is_blocklisted_with_blocked_word(self):
assert is_blocklisted('mozilla')
def test_is_blocklisted_without_blocked_words(self):
assert not is_blocklisted('non-blocked-word')
@override_settings(TEST_MOZMAIL=False, RELAY_FIREFOX_DOMAIN='firefox.com')
def test_address_hash_without_subdomain_domain_firefox(self):
address = 'aaaaaaaaa'
@ -364,7 +371,7 @@ class ProfileTest(TestCase):
assert premium_profile.has_unlimited == True
def test_add_subdomain_to_new_unlimited_profile(self):
subdomain = 'test'
subdomain = 'test-subdomain'
premium_user = baker.make(User)
random_sub = random.choice(
settings.SUBSCRIPTIONS_WITH_UNLIMITED.split(',')
@ -432,9 +439,33 @@ class ProfileTest(TestCase):
return
self.fail("Should have raised CannotMakeSubdomainException")
def test_add_subdomain_to_unlimited_profile_with_blocked_word_subdomain_raises_exception(self):
subdomain = 'mozilla'
premium_user = baker.make(User)
random_sub = random.choice(
settings.SUBSCRIPTIONS_WITH_UNLIMITED.split(',')
)
baker.make(
SocialAccount,
user=premium_user,
provider='fxa',
extra_data={'subscriptions': [random_sub]}
)
premium_profile = Profile.objects.get(user=premium_user)
try:
premium_profile.add_subdomain(subdomain)
except CannotMakeSubdomainException as e:
assert e.message == 'error-subdomain-not-available'
return
self.fail("Should have raised CannotMakeSubdomainException")
def test_subdomain_available_bad_word_returns_False(self):
assert Profile.subdomain_available('angry') == False
def test_subdomain_available_blocked_word_returns_False(self):
assert Profile.subdomain_available('mozilla') == False
def test_subdomain_available_taken_returns_False(self):
premium_user = baker.make(User)
random_sub = random.choice(
@ -590,6 +621,16 @@ class DomainAddressTest(TestCase):
return
self.fail("Should have raise CannotMakeAddressException")
@patch('emails.models.address_default')
def test_make_domain_address_doesnt_randomly_generate_blocked_word(self, address_default_mocked):
address_default_mocked.return_value = 'mozilla'
try:
DomainAddress.make_domain_address(self.user_profile)
except CannotMakeAddressException as e:
assert e.message == TRY_DIFFERENT_VALUE_ERR_MSG.format('Email address with subdomain')
return
self.fail("Should have raise CannotMakeAddressException")
def test_delete_adds_deleted_address_object(self):
domain_address = baker.make(DomainAddress, user=self.user)
domain_address_hash = sha256(