diff --git a/emails/apps.py b/emails/apps.py index fe87a44b9..2cd9cbc59 100644 --- a/emails/apps.py +++ b/emails/apps.py @@ -30,8 +30,21 @@ class EmailsConfig(AppConfig): ) with open(badwords_file_path, 'r') as badwords_file: for word in badwords_file: + if len(word.strip()) > 0 and word.strip()[0] == "#": + continue badwords.append(word.strip()) self.badwords = badwords + blocklist = [] + blocklist_file_path = os.path.join( + settings.BASE_DIR, 'emails', 'blocklist.txt' + ) + with open(blocklist_file_path, 'r') as blocklist_file: + for word in blocklist_file: + if len(word.strip()) > 0 and word.strip()[0] == "#": + continue + blocklist.append(word.strip()) + self.blocklist = blocklist + def ready(self): import emails.signals diff --git a/emails/badwords.txt b/emails/badwords.txt index d4defecb3..7b2f9dd31 100644 --- a/emails/badwords.txt +++ b/emails/badwords.txt @@ -1,3 +1,4 @@ +# Source: https://www.cs.cmu.edu/~biglou/resources/bad-words.txt abbo abo abortion diff --git a/emails/blocklist.txt b/emails/blocklist.txt new file mode 100644 index 000000000..4d608d02b --- /dev/null +++ b/emails/blocklist.txt @@ -0,0 +1,716 @@ +mozilla +firefox +relay +firefox-relay +fxrelay +noreply +no-reply +spam +superuser +company +foundation + +# Source: https://github.com/michaldudek/subdomain-blacklist/blob/6fa207bf1dd7a04181676835e29d7ae353e2aad0/blacklist.txt +# +# By Michał Dudek +# +# Available for use under the MIT License +# +# Copyright (C) 2014 Michał Dudek +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +about +aboutu +abuse +acme +ad +admanager +admin +admindashboard +administrator +ads +adsense +adult +adword +affiliate +affiliatepage +afp +alpha +anal +analytic +android +answer +anu +anus +ap +api +app +appengine +application +appnew +arse +asdf +a +as +ass +asset +asshole +atf +backup +ball +balls +ballsack +bank +base +bastard +beginner +beta +biatch +billing +binarie +binary +bitch +biz +blackberry +blog +blogsearch +bloody +blowjob +blowjobs +bollock +boner +boob +boobs +book +bugger +bum +butt +buttplug +buy +buzz +c +cache +calendar +cart +catalog +ceo +chart +chat +checkout +ci +cia +client +clitori +clitoris +cname +cnarne +cock +code +community +confirm +confirmation +contact +contact-u +contactu +content +controlpanel +coon +core +corp +countrie +country +cp +cpanel +crap +cs +cunt +cv +damn +dashboard +data +demo +deploy +deployment +desktop +dev +devel +developement +developer +development +dick +dike +dildo +dir +directory +discussion +dl +doc +document +donate +download +dyke +e +earth +email +enable +encrypted +engine +error +errorlog +fag +faggot +fbi +feature +feck +feed +feedburner +feedproxy +felching +fellate +fellatio +file +finance +flange +folder +forgotpassword +forum +friend +ftp +fuck +fudgepacker +fun +fusion +gadget +gear +geographic +gettingstarted +git +gitlab +gmail +go +goddamn +goto +gov +graph +group +hell +help +home +homo +html +htrnl +http +i +image +img +investor +invoice +io +ios +ipad +iphone +irnage +irng +item +j +jenkin +jerk +jira +jizz +job +join +js +knobend +lab +labia +legal +lesbo +list +lmao +lmfao +local +locale +location +log +login +logout +m +mail +manage +manager +map +marketing +me +media +message +misc +mm +mms +mobile +model +money +movie +muff +my +mystore +n +net +network +new +newsite +nigga +nigger +npm +ns +omg +online +order +org +other +p0rn +pack +packagist +page +partner +partnerpage +password +payment +peni +penis +people +person +pi +pis +piss +place +podcast +policy +poop +pop +pop3 +popular +porn +pr0n +pricing +prick +print +privacy +private +prod +product +production +profile +promo +promotion +proxie +proxies +proxy +pube +public +purchase +pussy +queer +querie +queries +query +r +radio +random +reader +recover +redirect +register +registration +release +report +research +resolve +resolver +rnail +rnicrosoft +root +rs +rss +sale +sandbox +scholar +scrotum +search +secure +seminar +server +service +sex +sftp +sh1t +shit +shop +shopping +shortcut +signin +signup +site +sitemap +sitenew +sketchup +sky +slash +slashinvoice +slut +sm +smegma +sms +smtp +soap +software +sorry +spreadsheet +spunk +srntp +ssh +ssl +stage +staging +stat +static +statistic +statu +store +suggest +suggestquerie +suggestquery +support +survey +surveytool +svn +sync +sysadmin +talk +talkgadget +test +tester +testing +text +tit +tits +tool +toolbar +tosser +trac +translate +translation +translator +trend +turd +twat +txt +ul +upload +vagina +validation +vid +video +video-stat +voice +w +wank +wave +webdisk +webmail +webmaster +webrnail +whm +whoi +whore +wifi +wiki +wtf +ww +www +wwww +xhtml +xhtrnl +xml +xxx + +# Source: https://github.com/wesleyraptor/streamingphish/blob/1884a2df44b75004f0cbdde0edf19ed1c24eda86/training_data/targeted_brands/initial_brands.txt +# +# Copyright 2018 Wes Connell +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +appleid +microsoftonline +microsoft-int +itunes +netflix +paypal +apple +offerup +yahoo +microsoft +snapchat +twitter +facebook +instagram +usbank +wellsfargo +amazon +americanexpress +bankofamerica +barclays +capitalone +citibank +citigroup +chase +dropbox +ebay +github +hotmail +hsbc +linkedin +mastercard +usaa +gmail +tdbank + + +# Source: https://github.com/wesleyraptor/streamingphish/blob/1884a2df44b75004f0cbdde0edf19ed1c24eda86/training_data/keywords/initial.txt +# +# Copyright 2018 Wes Connell +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +acc +acces +access +account +accountid +accountingservice +accountlocked +accounts +action +activity +alert +amazon +apple +appleid +applestore +applesupport +apps +appstore +auth +authentication +authorized +banking +bankofamerica +billing +bin +blockchain +blogspot +business +cancel +case +center +cgi +cgibin +chase +check +cloud +cloudfront +com +confirm +confirmation +contact +country +customer +customers +data +detail +details +disable +disabled +enable +facebook +find +fix +for +from +gift +github +help +helpdesk +home +hotmail +icloud +identity +idmsa +iforgot +in +inc +info +information +informations +intl +invoice +issue +itunes +limit +limited +limiteds +locked +login +loginpage +manage +management +manager +microsoftonline +my +myaccount +myaccounts +netflix +notice +notification +now +online +order +outlook +page +pal +password +pay +payment +paypal +privacy +problem +protect +purchase +receipt +recover +recovery +redirect +refund +report +request +reset +resolution +resolutioncenter +resolve +restore +review +secure +secured +security +securitys +server +service +services +settings +shop +sign +signin +stage +statement +store +submit +subscription +summary +support +suspicious +system +unlock +unlocked +update +updateaccount +updated +updates +upgrade +usbank +user +verif +verification +verifications +verified +verify +verifyaccount +view +web +webapps +your +youraccount + +# Source: https://github.com/wesleyraptor/streamingphish/blob/1884a2df44b75004f0cbdde0edf19ed1c24eda86/training_data/fqdn_keywords/initial.txt +# +# Copyright 2018 Wes Connell +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +info +login +your +sign +in +store +apps +cgi +intl +help +my +user +idmsa +data +acc +case +web +system +auth +for +fix +acces +chase +pay +pal +now +issue +service +account +view +action +limit +gift +shop +reset +find +submit +from diff --git a/emails/models.py b/emails/models.py index 7025139b7..feecdd70c 100644 --- a/emails/models.py +++ b/emails/models.py @@ -55,8 +55,9 @@ class Profile(models.Model): valid_subdomain_pattern = re.compile('^(?!-)[A-Za-z0-9-]{1,63}(? 0 - return valid and not bad_word and not taken + return valid and not bad_word and not blocked_word and not taken @property def num_active_address(self): @@ -188,6 +189,13 @@ def has_bad_words(value): ) +def is_blocklisted(value): + return any( + blockedword == value + for blockedword in emails_config.blocklist + ) + + def get_domain_numerical(domain_address): # get domain name from the address domains_keys = list(DOMAINS.keys()) @@ -270,10 +278,11 @@ class RelayAddress(models.Model): domain_numerical = get_domain_numerical(domain) relay_address = RelayAddress.objects.create(user=user_profile.user, domain=domain_numerical) address_contains_badword = has_bad_words(relay_address.address) + address_is_blocklisted = is_blocklisted(relay_address.address) address_already_deleted = DeletedAddress.objects.filter( address_hash=address_hash(relay_address.address, domain=domain) ).count() - if address_already_deleted > 0 or address_contains_badword: + if address_already_deleted > 0 or address_contains_badword or address_is_blocklisted: relay_address.delete() num_tries += 1 return RelayAddress.make_relay_address(user_profile, num_tries, domain) @@ -332,6 +341,7 @@ class DomainAddress(models.Model): ) address_contains_badword = False + address_is_blocklisted = False if not address: # FIXME: if the alias is randomly generated and has bad words # we should retry like make_relay_address does @@ -340,10 +350,11 @@ class DomainAddress(models.Model): address = address_default() # Only check for bad words if randomly generated address_contains_badword = has_bad_words(address) + address_is_blocklisted = is_blocklisted(address) address_already_deleted = DeletedAddress.objects.filter( address_hash=address_hash(address, user_subdomain) ).count() - if address_contains_badword or address_already_deleted > 0: + if address_contains_badword or address_is_blocklisted or address_already_deleted > 0: raise CannotMakeAddressException( TRY_DIFFERENT_VALUE_ERR_MSG.format('Email address with subdomain') ) diff --git a/emails/tests/models_tests.py b/emails/tests/models_tests.py index 1efc674bb..1380174a5 100644 --- a/emails/tests/models_tests.py +++ b/emails/tests/models_tests.py @@ -23,6 +23,7 @@ from ..models import ( DomainAddress, get_domain_numerical, has_bad_words, + is_blocklisted, NOT_PREMIUM_USER_ERR_MSG, Profile, RelayAddress, @@ -39,6 +40,12 @@ class MiscEmailModelsTest(TestCase): def test_has_bad_words_without_bad_words(self): assert not has_bad_words('happy') + def test_is_blocklisted_with_blocked_word(self): + assert is_blocklisted('mozilla') + + def test_is_blocklisted_without_blocked_words(self): + assert not is_blocklisted('non-blocked-word') + @override_settings(TEST_MOZMAIL=False, RELAY_FIREFOX_DOMAIN='firefox.com') def test_address_hash_without_subdomain_domain_firefox(self): address = 'aaaaaaaaa' @@ -364,7 +371,7 @@ class ProfileTest(TestCase): assert premium_profile.has_unlimited == True def test_add_subdomain_to_new_unlimited_profile(self): - subdomain = 'test' + subdomain = 'test-subdomain' premium_user = baker.make(User) random_sub = random.choice( settings.SUBSCRIPTIONS_WITH_UNLIMITED.split(',') @@ -432,9 +439,33 @@ class ProfileTest(TestCase): return self.fail("Should have raised CannotMakeSubdomainException") + def test_add_subdomain_to_unlimited_profile_with_blocked_word_subdomain_raises_exception(self): + subdomain = 'mozilla' + premium_user = baker.make(User) + random_sub = random.choice( + settings.SUBSCRIPTIONS_WITH_UNLIMITED.split(',') + ) + baker.make( + SocialAccount, + user=premium_user, + provider='fxa', + extra_data={'subscriptions': [random_sub]} + ) + premium_profile = Profile.objects.get(user=premium_user) + + try: + premium_profile.add_subdomain(subdomain) + except CannotMakeSubdomainException as e: + assert e.message == 'error-subdomain-not-available' + return + self.fail("Should have raised CannotMakeSubdomainException") + def test_subdomain_available_bad_word_returns_False(self): assert Profile.subdomain_available('angry') == False + def test_subdomain_available_blocked_word_returns_False(self): + assert Profile.subdomain_available('mozilla') == False + def test_subdomain_available_taken_returns_False(self): premium_user = baker.make(User) random_sub = random.choice( @@ -590,6 +621,16 @@ class DomainAddressTest(TestCase): return self.fail("Should have raise CannotMakeAddressException") + @patch('emails.models.address_default') + def test_make_domain_address_doesnt_randomly_generate_blocked_word(self, address_default_mocked): + address_default_mocked.return_value = 'mozilla' + try: + DomainAddress.make_domain_address(self.user_profile) + except CannotMakeAddressException as e: + assert e.message == TRY_DIFFERENT_VALUE_ERR_MSG.format('Email address with subdomain') + return + self.fail("Should have raise CannotMakeAddressException") + def test_delete_adds_deleted_address_object(self): domain_address = baker.make(DomainAddress, user=self.user) domain_address_hash = sha256(