Add deadman alert for sqs queues from tag

This commit is contained in:
Brandon Myers 2017-05-12 16:01:35 -05:00
Родитель ceec55ad18
Коммит ca5bd81c30
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 8AA79AD83045BBC7
4 изменённых файлов: 126 добавлений и 14 удалений

Просмотреть файл

@ -0,0 +1,2 @@
[options]
sqs_queues = queue1,queue2

Просмотреть файл

@ -0,0 +1,46 @@
#!/usr/bin/env python
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
# Copyright (c) 2017 Mozilla Corporation
#
# Contributors:
# bmyers@mozilla.com
from lib.alerttask import AlertTask
from query_models import SearchQuery, TermMatch
from configlib import getConfig, OptionParser
class AlertSQSQueuesDeadman(AlertTask):
def main(self):
self.config_file = './sqs_queues_deadman.conf'
self.initConfiguration()
for queue_name in self.config.sqs_queues:
self.sqs_queue_name = queue_name
self.process_alert()
def initConfiguration(self):
myparser = OptionParser()
(self.config, args) = myparser.parse_args([])
sqs_queues_str = getConfig('sqs_queues', '', self.config_file)
self.config.sqs_queues = sqs_queues_str.split(',')
def process_alert(self):
search_query = SearchQuery(hours=1)
search_query.add_must(TermMatch('tags', self.sqs_queue_name))
self.filtersManual(search_query)
self.searchEventsSimple()
self.walkEvents()
def onNoEvent(self):
category = 'deadman'
tags = [self.sqs_queue_name, 'sqs']
severity = 'ERROR'
summary = 'No events found from {} sqs queue the last hour'.format(self.sqs_queue_name)
# Create the alert object based on these properties
return self.createAlertDict(summary, category, tags, [], severity=severity)

Просмотреть файл

@ -100,45 +100,40 @@ class AlertTestSuite(UnitTestSuite):
def verify_expected_alert(self, found_alert, test_case):
# Verify index is set correctly
assert found_alert['_index'] == self.alert_index_name
assert found_alert['_index'] == self.alert_index_name, 'Alert index not propertly set, got: {}'.format(found_alert['_index'])
# Verify alert type is correct
assert found_alert['_type'] == 'alert'
assert found_alert['_type'] == 'alert', 'Alert _type is not alert'
# Verify that the alert has the right "look to it"
assert found_alert.keys() == ['_score', '_type', '_id', '_source', '_index']
assert found_alert.keys() == ['_score', '_type', '_id', '_source', '_index'], 'Alert format is malformed'
# Verify the alert has an id field that is unicode
assert type(found_alert['_id']) == unicode
assert type(found_alert['_id']) == unicode, 'Alert _id is not an integer'
# Verify there is a utctimestamp field
assert 'utctimestamp' in found_alert['_source']
assert 'utctimestamp' in found_alert['_source'], 'Alert does not have utctimestamp specified'
# Verify the events are added onto the alert
assert type(found_alert['_source']['events']) == list
assert type(found_alert['_source']['events']) == list, 'Alert events field is not a list'
alert_events = found_alert['_source']['events']
sorted_alert_events = sorted(alert_events, key=lambda k: k['documentsource']['utctimestamp'])
created_events = test_case.full_events
sorted_created_events = sorted(created_events, key=lambda k: k['_source']['utctimestamp'])
event_index = 0
for event in sorted_alert_events:
assert event['documentsource'] == sorted_created_events[event_index]['_source']
event_index += 1
# Verify that the alert properties are set correctly
for key, value in test_case.expected_alert.iteritems():
assert found_alert['_source'][key] == value
assert found_alert['_source'][key] == value, '{0} does not match, got: {1}'.format(key, found_alert['_source'][key])
def verify_alert_task(self, alert_task, test_case):
if test_case.expected_test_result is True:
assert len(alert_task.alert_ids) is not 0
assert len(alert_task.alert_ids) is not 0, 'Alert did not fire as expected'
self.es_client.flush('alerts')
for alert_id in alert_task.alert_ids:
found_alert = self.es_client.get_alert_by_id(alert_id)
self.verify_expected_alert(found_alert, test_case)
else:
assert len(alert_task.alert_ids) is 0
assert len(alert_task.alert_ids) is 0, 'Alert fired when it was expected not to'
@staticmethod
def copy(obj):

Просмотреть файл

@ -0,0 +1,69 @@
from positive_alert_test_case import PositiveAlertTestCase
from negative_alert_test_case import NegativeAlertTestCase
from alert_test_suite import AlertTestSuite
class TestAlertSQSQueuesDeadman(AlertTestSuite):
alert_filename = "sqs_queues_deadman"
default_event = {
'_source': {
'utctimestamp': AlertTestSuite.subtract_from_timestamp_lambda(date_timedelta={'minutes': 1})
}
}
test_cases = []
event_dict = {
'_source': {
'tags': ['queue1', 'queue2']
}
}
event = AlertTestSuite.create_event(event_dict)
test_cases.append(
NegativeAlertTestCase(
description="Negative test case with events containing the specific tags",
events=[event],
)
)
event_dict = {
'_source': {
'tags': ['queue1']
}
}
alert = {
"category": "deadman",
"severity": "ERROR",
"summary": 'No events found from queue2 sqs queue the last hour',
"tags": ['queue2', 'sqs'],
}
event = AlertTestSuite.create_event(event_dict)
test_cases.append(
PositiveAlertTestCase(
description="Postive test case with only an event for one of the tags",
events=[event],
expected_alert=alert
)
)
event_dict = {
'_source': {
'tags': ['queue2']
}
}
alert = {
"category": "deadman",
"severity": "ERROR",
"summary": 'No events found from queue1 sqs queue the last hour',
"tags": ['queue1', 'sqs'],
}
event = AlertTestSuite.create_event(event_dict)
test_cases.append(
PositiveAlertTestCase(
description="Postive test case with only an event for one of the tags",
events=[event],
expected_alert=alert
)
)