2014-07-21 08:30:53 +04:00
#!/usr/bin/env python
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
# Copyright (c) 2014 Mozilla Corporation
2015-03-27 18:39:55 +03:00
import collections
import json
2014-07-21 08:30:53 +04:00
import logging
import random
import netaddr
import sys
2014-07-28 20:35:45 +04:00
from bson.son import SON
2014-07-21 08:30:53 +04:00
from datetime import datetime
from configlib import getConfig, OptionParser
from logging.handlers import SysLogHandler
from pymongo import MongoClient
2015-03-27 18:39:55 +03:00
from collections import Counter
2017-01-19 01:36:25 +03:00
from kombu import Connection, Exchange
2014-07-21 08:30:53 +04:00
2016-10-18 00:48:02 +03:00
import sys
import os
2018-10-16 22:45:04 +03:00
from mozdef_util.utilities.toUTC import toUTC
from mozdef_util.elasticsearch_client import ElasticsearchClient
from mozdef_util.query_models import SearchQuery, PhraseMatch
2016-10-24 23:56:29 +03:00
2016-10-18 00:48:02 +03:00
2014-07-21 08:30:53 +04:00
logger = logging.getLogger(sys.argv[0])
def loggerTimeStamp(self, record, datefmt=None):
return toUTC(datetime.now()).isoformat()
def initLogger():
2017-04-25 06:52:12 +03:00
logger.level = logging.INFO
2014-07-21 08:30:53 +04:00
formatter = logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s - %(message)s')
formatter.formatTime = loggerTimeStamp
if options.output == 'syslog':
address=(options.sysloghostname, options.syslogport)))
sh = logging.StreamHandler(sys.stderr)
2015-04-01 19:53:14 +03:00
def isIPv4(ip):
# netaddr on it's own considers 1 and 0 to be valid_ipv4
# so a little sanity check prior to netaddr.
# Use IPNetwork instead of valid_ipv4 to allow CIDR
if '.' in ip and len(ip.split('.'))==4:
# some ips are quoted
return True
return False
return False
2014-07-21 08:30:53 +04:00
def genMeteorID():
return('%024x' % random.randrange(16**24))
2015-03-27 18:39:55 +03:00
def keypaths(nested):
''' return a list of nested dict key paths
like: [u'_source', u'details', u'hostname']
for key, value in nested.iteritems():
if isinstance(value, collections.Mapping):
for subkey, subvalue in keypaths(value):
yield [key] + subkey, subvalue
yield [key], value
def dictpath(path):
''' split a string representing a
nested dictionary path key.subkey.subkey
for i in path.split('.'):
yield '{0}'.format(i)
def mostCommon(listofdicts,dictkeypath):
Given a list containing dictionaries,
return the most common entries
along a key path separated by .
i.e. dictkey.subkey.subkey
returned as a list of tuples
for i in listofdicts:
for k in list(keypaths(i)):
if not (set(k[0]).symmetric_difference(path)):
return Counter(inspectlist).most_common()
2014-07-28 20:35:45 +04:00
def searchESForBROAttackers(es, threshold):
2016-10-24 23:56:29 +03:00
search_query = SearchQuery(hours=2)
PhraseMatch('category', 'bronotice'),
PhraseMatch('details.note', 'MozillaHTTPErrors::Excessive_HTTP_Errors_Attacker')
full_results = search_query.execute(es)
results = full_results['hits']
2016-06-28 21:26:33 +03:00
2014-07-21 08:30:53 +04:00
# Hit count is buried in the 'sub' field
# as: 'sub': u'6 in 1.0 hr, eps: 0'
# cull the records for hitcounts over the threshold before returning
attackers = list()
2016-10-24 23:56:29 +03:00
for r in results:
2014-07-21 08:30:53 +04:00
hitcount = int(r['_source']['details']['sub'].split()[0])
if hitcount > threshold:
return attackers
2017-04-25 06:51:39 +03:00
2014-07-28 20:35:45 +04:00
def searchMongoAlerts(mozdefdb):
2017-04-25 06:51:39 +03:00
attackers = mozdefdb['attackers']
alerts = mozdefdb['alerts']
2014-07-28 20:35:45 +04:00
# search the last X alerts for IP addresses
# aggregated by CIDR mask/24
2016-06-28 21:26:33 +03:00
2014-07-28 20:35:45 +04:00
# aggregate IPv4 addresses in the most recent alerts
# to find common attackers.
ipv4TopHits = alerts.aggregate([
2018-10-31 01:42:21 +03:00
# reverse sort the current alerts
{"$sort": {"utcepoch": -1}},
# most recent 100
{"$limit": 100},
# must have an ip address
{"$match": {"events.documentsource.details.sourceipaddress": {"$exists": True}}},
# must not be already related to an attacker
{"$match": {"attackerid": {"$exists": False}}},
# make each event into it's own doc
{"$unwind": "$events"},
{"$project": {
"_id": 0,
# emit the source ip only
"sourceip": "$events.documentsource.details.sourceipaddress"
# count by ip
{"$group": {"_id": "$sourceip", "hitcount": {"$sum": 1}}},
# limit to those with X observances
{"$match": {"hitcount": {"$gt": 5}}},
# sort
{"$sort": SON([("hitcount", -1), ("_id", -1)])},
# top 10
{"$limit": 10}
2018-05-19 00:44:01 +03:00
for ip in ipv4TopHits:
2017-04-25 06:51:39 +03:00
# sanity check ip['_id'] which should be the ipv4 address
2015-04-01 20:16:18 +03:00
if isIPv4(ip['_id']) and ip['_id'] not in netaddr.IPSet(['']):
2014-07-28 20:35:45 +04:00
ipcidr = netaddr.IPNetwork(ip['_id'])
2018-05-19 00:44:01 +03:00
# set CIDR
2014-07-28 20:35:45 +04:00
# todo: lookup ipwhois for asn_cidr value
# potentially with a max mask value (i.e. asn is /8, limit attackers to /24)
2018-05-19 00:44:01 +03:00
ipcidr.prefixlen = 32
2016-06-28 21:26:33 +03:00
2014-07-28 20:35:45 +04:00
# append to or create attacker.
# does this match an existing attacker's indicators
if not ipcidr.ip.is_loopback() and not ipcidr.ip.is_private() and not ipcidr.ip.is_reserved():
2017-04-25 06:51:39 +03:00
logger.debug('Searching for existing attacker with ip ' + str(ipcidr))
2014-07-28 20:35:45 +04:00
attacker = attackers.find_one({'indicators.ipv4address': str(ipcidr)})
if attacker is None:
2017-04-25 06:51:39 +03:00
logger.debug('Attacker not found, creating new one')
2014-07-28 20:35:45 +04:00
# new attacker
# generate a meteor-compatible ID
# save the ES document type, index, id
newAttacker = genNewAttacker()
# str to get the ip/cidr rather than netblock cidr.
# i.e. '' not ''
matchingalerts = alerts.find(
2017-04-25 06:51:39 +03:00
total_events = 0
2014-07-28 20:35:45 +04:00
if matchingalerts is not None:
# update list of alerts this attacker matched.
for alert in matchingalerts:
# update alert with attackerID
alert['attackerid'] = newAttacker['_id']
2016-06-28 21:26:33 +03:00
2017-04-24 22:10:24 +03:00
total_events += len(alert['events'])
if len(alert['events']) > 0:
newAttacker['lastseentimestamp'] = toUTC(alert['events'][-1]['documentsource']['utctimestamp'])
2014-07-28 20:35:45 +04:00
newAttacker['alertscount'] = len(newAttacker['alerts'])
2017-04-24 22:10:24 +03:00
newAttacker['eventscount'] = total_events
2014-07-28 20:35:45 +04:00
2017-04-24 22:10:24 +03:00
# update geoIP info
2014-07-28 20:35:45 +04:00
latestGeoIP = [a['events'] for a in alerts.find(
updateAttackerGeoIP(mozdefdb, newAttacker['_id'], latestGeoIP)
2016-06-28 21:26:33 +03:00
2015-03-28 17:40:20 +03:00
if options.broadcastattackers:
2014-07-28 20:35:45 +04:00
2017-04-25 06:51:39 +03:00
logger.debug('Found existing attacker')
2014-07-28 20:35:45 +04:00
# if alert not present in this attackers list
# append this to the list
# todo: trim the list at X (i.e. last 100)
# search alerts without attackerid
matchingalerts = alerts.find(
"attackerid":{"$exists": False}
if matchingalerts is not None:
2017-04-25 06:51:39 +03:00
logger.debug('Matched alert with attacker')
2016-06-28 21:26:33 +03:00
2014-07-28 20:35:45 +04:00
# update list of alerts this attacker matched.
for alert in matchingalerts:
# update alert with attackerID
alert['attackerid'] = attacker['_id']
2017-04-24 22:10:24 +03:00
attacker['eventscount'] += len(alert['events'])
2017-04-25 06:51:39 +03:00
attacker['lastseentimestamp'] = toUTC(alert['events'][-1]['documentsource']['utctimestamp'])
2016-06-28 21:26:33 +03:00
2017-04-25 06:51:39 +03:00
# geo ip could have changed, update it to the latest
2014-07-28 20:35:45 +04:00
updateAttackerGeoIP(mozdefdb, attacker['_id'], alert['events'][-1]['documentsource'])
2016-06-28 21:26:33 +03:00
2014-07-28 20:35:45 +04:00
# update counts
attacker['alertscount'] = len(attacker['alerts'])
2015-03-27 18:39:55 +03:00
# should we autocategorize the attacker
2016-06-28 21:26:33 +03:00
# based on their alerts?
2015-03-27 18:39:55 +03:00
if attacker['category'] == 'unknown' and options.autocategorize:
# take a look at recent alerts for this attacker
# and if they are all the same category
# auto-categorize the attacker
matchingalerts = alerts.find(
2018-11-01 01:17:49 +03:00
{"attackerid": attacker['_id']}
).sort('utcepoch', -1).limit(50)
2015-03-27 18:39:55 +03:00
# summarize the alert categories
# returns list of tuples: [(u'bruteforce', 8)]
categoryCounts= mostCommon(matchingalerts,'category')
#are the alerts all the same category?
2017-04-25 06:51:39 +03:00
2015-03-27 18:39:55 +03:00
if len(categoryCounts) == 1:
#is the alert category mapped to an attacker category?
for category in options.categorymapping:
if category.keys()[0] == categoryCounts[0][0]:
attacker['category'] = category[category.keys()[0]]
2016-06-28 21:26:33 +03:00
2015-03-28 17:40:20 +03:00
def broadcastAttacker(attacker):
send this attacker info to our message queue
connString = 'amqp://{0}:{1}@{2}:{3}/{4}'.format(options.mquser,
if options.mqprotocol == 'amqps':
mqSSL = True
mqSSL = False
mqConn = Connection(connString, ssl=mqSSL)
alertExchange = Exchange(
mqproducer = mqConn.Producer(serializer='json')
2016-06-28 21:26:33 +03:00
2015-03-28 17:40:20 +03:00
logger.debug('Kombu configured')
except Exception as e:
logger.error('Exception while configuring kombu for alerts: {0}'.format(e))
# generate an 'alert' structure for this attacker:
mqAlert = dict(severity='NOTICE', category='attacker')
2016-06-28 21:26:33 +03:00
2015-03-28 17:40:20 +03:00
if 'datecreated' in attacker.keys():
mqAlert['utctimestamp'] = attacker['datecreated'].isoformat()
2016-06-28 21:26:33 +03:00
2015-03-28 17:40:20 +03:00
mqAlert['summary'] = 'New Attacker: {0} events: {1}, alerts: {2}'.format(attacker['indicators'], attacker['eventscount'], attacker['alertscount'])
ensurePublish = mqConn.ensure(
2018-11-01 02:11:08 +03:00
2015-03-28 17:40:20 +03:00
2018-11-01 02:11:08 +03:00
2015-03-28 17:40:20 +03:00
except Exception as e:
logger.error('Exception while publishing attacker: {0}'.format(e))
2016-06-28 21:26:33 +03:00
2014-07-21 08:30:53 +04:00
def genNewAttacker():
newAttacker = dict()
newAttacker['_id'] = genMeteorID()
newAttacker['lastseentimestamp'] = toUTC(datetime.now())
newAttacker['firstseentimestamp'] = toUTC(datetime.now())
newAttacker['eventscount'] = 0
newAttacker['alerts'] = list()
newAttacker['alertscount'] = 0
newAttacker['category'] = 'unknown'
newAttacker['score'] = 0
2014-07-28 20:35:45 +04:00
newAttacker['geocoordinates'] = dict(countrycode='', longitude=0, latitude=0)
2014-07-21 08:30:53 +04:00
newAttacker['tags'] = list()
newAttacker['notes'] = list()
2014-07-28 20:35:45 +04:00
newAttacker['indicators'] = list()
2014-07-21 08:30:53 +04:00
newAttacker['attackphase'] = 'unknown'
2014-07-28 20:35:45 +04:00
newAttacker['datecreated'] = toUTC(datetime.now())
newAttacker['creator'] = sys.argv[0]
2016-06-28 21:26:33 +03:00
2014-07-21 08:30:53 +04:00
return newAttacker
2018-10-31 02:08:59 +03:00
2014-07-28 20:35:45 +04:00
def updateAttackerGeoIP(mozdefdb, attackerID, eventDictionary):
'''given an attacker ID and a dictionary of an elastic search event
look for a valid geoIP in the dict and update the attacker's geo coordinates
# geo ip should be in eventDictionary['details']['sourceipgeolocation']
2018-10-31 01:30:02 +03:00
# "sourceipgeolocation": {
# "city": "Polska",
# "region_code": "73",
# "area_code": 0,
# "time_zone": "Europe/Warsaw",
# "dma_code": 0,
# "metro_code": null,
# "country_code3": "POL",
# "latitude": 52.59309999999999,
# "postal_code": null,
# "longitude": 19.089400000000012,
# "country_code": "PL",
# "country_name": "Poland",
# "continent": "EU"
# }
# logger.debug(eventDictionary)
2014-07-28 20:35:45 +04:00
if 'details' in eventDictionary.keys():
2018-10-05 23:51:12 +03:00
if 'sourceipgeolocation' in eventDictionary['details']:
2014-07-28 20:35:45 +04:00
attacker = attackers.find_one({'_id': attackerID})
if attacker is not None:
attacker['geocoordinates'] = dict(countrycode='',
2014-08-01 03:18:33 +04:00
if 'country_code' in eventDictionary['details']['sourceipgeolocation'].keys():
attacker['geocoordinates']['countrycode'] = eventDictionary['details']['sourceipgeolocation']['country_code']
if 'longitude' in eventDictionary['details']['sourceipgeolocation'].keys():
attacker['geocoordinates']['longitude'] = eventDictionary['details']['sourceipgeolocation']['longitude']
if 'latitude' in eventDictionary['details']['sourceipgeolocation'].keys():
attacker['geocoordinates']['latitude'] = eventDictionary['details']['sourceipgeolocation']['latitude']
2014-07-28 20:35:45 +04:00
logger.debug('no details in the dictionary')
2016-06-28 21:26:33 +03:00
2014-07-28 20:35:45 +04:00
def updateMongoWithESEvents(mozdefdb, results):
2017-04-25 06:51:39 +03:00
logger.debug('Looping through events identified as malicious from bro')
attackers = mozdefdb['attackers']
2014-07-21 08:30:53 +04:00
for r in results:
if 'sourceipaddress' in r['_source']['details']:
if netaddr.valid_ipv4(r['_source']['details']['sourceipaddress']):
2014-07-28 20:35:45 +04:00
sourceIP = netaddr.IPNetwork(r['_source']['details']['sourceipaddress'])
# expand it to a /24 CIDR
# todo: lookup ipwhois for asn_cidr value
# potentially with a max mask value (i.e. asn is /8, limit attackers to /24)
2016-06-28 21:26:33 +03:00
sourceIP.prefixlen = 24
2014-07-28 20:35:45 +04:00
if not sourceIP.ip.is_loopback() and not sourceIP.ip.is_private() and not sourceIP.ip.is_reserved():
2018-11-01 02:11:08 +03:00
esrecord = dict(
2016-06-28 21:26:33 +03:00
2017-04-25 06:51:39 +03:00
logger.debug('Trying to find existing attacker at ' + str(sourceIP))
2014-07-28 20:35:45 +04:00
attacker = attackers.find_one({'indicators.ipv4address': str(sourceIP)})
2014-07-21 08:30:53 +04:00
if attacker is None:
# new attacker
# generate a meteor-compatible ID
# save the ES document type, index, id
# and add a sub list for future events
2017-04-25 06:51:39 +03:00
logger.debug('Creating new attacker from ' + str(sourceIP))
2014-07-21 08:30:53 +04:00
newAttacker = genNewAttacker()
2014-07-28 20:35:45 +04:00
2018-11-01 02:11:08 +03:00
# expand the source ip to a /24 for the indicator match.
2014-07-28 20:35:45 +04:00
sourceIP.prefixlen = 24
# str sourceIP to get the ip/cidr rather than netblock cidr.
2017-04-25 06:51:39 +03:00
newAttacker['eventscount'] = 1
newAttacker['lastseentimestamp'] = esrecord['documentsource']['utctimestamp']
2014-07-21 08:30:53 +04:00
2014-07-28 20:35:45 +04:00
updateAttackerGeoIP(mozdefdb, newAttacker['_id'], esrecord['documentsource'])
2014-07-21 08:30:53 +04:00
2017-04-25 06:51:39 +03:00
logger.debug('Attacker found, increasing eventscount and modding geoip')
2017-04-24 22:48:57 +03:00
attacker['eventscount'] += 1
attacker['lastseentimestamp'] = esrecord['documentsource']['utctimestamp']
2014-07-28 20:35:45 +04:00
# geo ip could have changed, update it
updateAttackerGeoIP(mozdefdb, attacker['_id'], esrecord['documentsource'])
2014-07-21 08:30:53 +04:00
def main():
2016-10-24 23:56:29 +03:00
es = ElasticsearchClient((list('{0}'.format(s) for s in options.esservers)))
2014-07-21 08:30:53 +04:00
client = MongoClient(options.mongohost, options.mongoport)
# use meteor db
mozdefdb = client.meteor
2014-07-28 20:35:45 +04:00
esResults = searchESForBROAttackers(es, 100)
updateMongoWithESEvents(mozdefdb, esResults)
2014-07-21 08:30:53 +04:00
2014-07-28 20:35:45 +04:00
except ValueError as e:
2014-07-21 08:30:53 +04:00
logger.error("Exception %r collecting attackers to mongo" % e)
def initConfig():
# output our log to stdout or syslog
options.output = getConfig('output', 'stdout', options.configfile)
# syslog hostname
options.sysloghostname = getConfig('sysloghostname',
# syslog port
options.syslogport = getConfig('syslogport', 514, options.configfile)
# elastic search server settings
options.esservers = list(getConfig('esservers',
options.mongohost = getConfig('mongohost', 'localhost', options.configfile)
options.mongoport = getConfig('mongoport', 3001, options.configfile)
2015-03-27 18:39:55 +03:00
# should we automatically categorize
2016-06-28 21:26:33 +03:00
# new attackers based on their alerts?
2015-03-27 18:39:55 +03:00
options.autocategorize = getConfig('autocategorize', False, options.configfile)
# get the mapping of alert category to attacker category
2016-06-28 21:26:33 +03:00
# supply as a list of dicts:
2015-03-27 18:39:55 +03:00
# [{"bruteforce":"bruteforcer"},{"alertcategory":"attackercategory"}]
options.categorymapping = json.loads(getConfig('categorymapping', "[]", options.configfile))
2016-06-28 21:26:33 +03:00
2015-03-28 17:40:20 +03:00
# should we broadcast new attackers
# to a message queue?
options.broadcastattackers = getConfig('broadcastattackers', False, options.configfile)
# message queue options
options.mqserver = getConfig('mqserver', 'localhost', options.configfile)
options.alertexchange = getConfig('alertexchange', 'alerts', options.configfile)
options.routingkey = getConfig('routingkey', 'mozdef.alert', options.configfile)
options.mquser = getConfig('mquser', 'guest', options.configfile)
options.mqpassword = getConfig('mqpassword', 'guest', options.configfile)
options.mqport = getConfig('mqport', 5672, options.configfile)
options.mqvhost = getConfig('mqvhost', '/', options.configfile)
# set to either amqp or amqps for ssl
options.mqprotocol = getConfig('mqprotocol', 'amqp', options.configfile)
2014-07-21 08:30:53 +04:00
2017-04-25 06:51:39 +03:00
2014-07-21 08:30:53 +04:00
if __name__ == '__main__':
parser = OptionParser()
default=sys.argv[0].replace('.py', '.conf'),
help="configuration file to use")
(options, args) = parser.parse_args()