Code review changes
This commit is contained in:
Родитель
7175f62d05
Коммит
4ff1cffea6
|
@ -17,19 +17,9 @@ PATH_FOR_CSS_TICKET = {
|
|||
"Gov": "https://portal.azure.us/#blade/Microsoft_Azure_Support/HelpAndSupportBlade/overview"}
|
||||
AGENT_CONF_FILE = "/etc/opt/microsoft/azuremonitoragent/config-cache/mdsd.hr.json"
|
||||
FAILED_TESTS_COUNT = 0
|
||||
STREAM_SCENARIO = "cef" # default value
|
||||
WARNING_TESTS_COUNT = 0
|
||||
NOT_RUN_TESTS_COUNT = 0
|
||||
SCRIPT_HELP_MESSAGE = "Usage: python AMA_troubleshoot.py [STREAM_NAME] [OPTION]\n" \
|
||||
"Runs CEF validation tests on the collector machine and generates a log file here- /tmp/cef_troubleshooter_output_file.log\n\n" \
|
||||
" --cef/--CEF run the troubleshooting script for the CEF scenario." \
|
||||
" --asa/--ASA run the troubleshooting script for the Cisco ASA scenario." \
|
||||
" --ftd/--FTD run the troubleshooting script for the Cisco FTD scenario." \
|
||||
" collect, runs the script in collect mode. Useful in case you want to open a ticket. Generates an output file here- /tmp/cef_troubleshooter_collection_output.log\n" \
|
||||
" -h, --help display the help and exit\n\n" \
|
||||
"Example:\n" \
|
||||
" python cef_AMA_troubleshoot.py --cef\n" \
|
||||
" python cef_AMA_troubleshoot.py --cisco collect\n\n" \
|
||||
"This script verifies the installation of the CEF connector on the collector machine. It returns a status for each test and action items to fix detected issues."
|
||||
DELIMITER = "\n" + "-" * 20 + "\n"
|
||||
|
||||
|
||||
|
@ -69,6 +59,7 @@ class CommandShellExecution(object):
|
|||
"""
|
||||
This class is for executing all the shell related commands in the terminal for each test.
|
||||
"""
|
||||
|
||||
def __init__(self, command_name, command_to_run, result_keywords_array=None, fault_keyword=None,
|
||||
command_result=None,
|
||||
command_result_err=None):
|
||||
|
@ -114,11 +105,13 @@ class CommandVerification(CommandShellExecution):
|
|||
"""
|
||||
This class is running all the necessary verifications for the running test.
|
||||
"""
|
||||
|
||||
def __init__(self, command_name, command_to_run, result_keywords_array=None, fault_keyword=None,
|
||||
command_result=None,
|
||||
command_result_err=None,
|
||||
is_successful=False):
|
||||
super(CommandVerification, self).__init__(command_name, command_to_run, result_keywords_array, fault_keyword, command_result, command_result_err)
|
||||
super(CommandVerification, self).__init__(command_name, command_to_run, result_keywords_array, fault_keyword,
|
||||
command_result, command_result_err)
|
||||
self.is_successful = is_successful
|
||||
|
||||
def __repr__(self):
|
||||
|
@ -248,11 +241,13 @@ class AgentInstallationVerifications:
|
|||
AGENT_INSTALLATION_DOC = "https://docs.microsoft.com/azure/azure-monitor/agents/azure-monitor-agent-manage"
|
||||
AGENT_NOT_INSTALLED_ERROR_MESSAGE = "Could not detect an AMA service running and listening on the machine." \
|
||||
" Please follow this documentation in order to install it and verify your" \
|
||||
" machine's operating system is in the supported list- {}".format(AGENT_INSTALLATION_DOC)
|
||||
" machine's operating system is in the supported list- {}".format(
|
||||
AGENT_INSTALLATION_DOC)
|
||||
AGENT_NOT_RUNNING_ERROR_MESSAGE = "Detected AMA is installed on the machine but not running. Please start the agent by running " \
|
||||
"\'service azuremonitoragent start\' \nif the agent service fails to start," \
|
||||
" please run the following command to review the agent error log file here- " \
|
||||
"\'cat /var/opt/microsoft/azuremonitoragent/log/mdsd.err | tail -n 15\'".format(AGENT_INSTALLATION_DOC)
|
||||
"\'cat /var/opt/microsoft/azuremonitoragent/log/mdsd.err | tail -n 15\'".format(
|
||||
AGENT_INSTALLATION_DOC)
|
||||
|
||||
OMS_RUNNING_ERROR_MESSAGE = "Detected the OMS Agent running on your machine. If not necessary please remove it to avoid duplicated data in the workspace, which can result in an increase in costs"
|
||||
|
||||
|
@ -320,7 +315,6 @@ class DCRConfigurationVerifications:
|
|||
This class is for data collection rules verifications
|
||||
"""
|
||||
# CONSTANTS
|
||||
global STREAM_SCENARIO
|
||||
DCR_DOC = "https://docs.microsoft.com/azure/azure-monitor/agents/data-collection-rule-overview"
|
||||
DCRA_DOC = "https://docs.microsoft.com/rest/api/monitor/data-collection-rule-associations"
|
||||
CEF_STREAM_NAME = "SECURITY_CEF_BLOB"
|
||||
|
@ -334,7 +328,6 @@ class DCRConfigurationVerifications:
|
|||
"be collected from this machine to any workspace. Please create a DCR using the following documentation- " \
|
||||
"{} and run again".format(DCR_DOC)
|
||||
MULTI_HOMING_MESSAGE = "Detected multiple collection rules sending the same stream. This scenario is called multi-homing and might have effect on the agent's performance"
|
||||
|
||||
def verify_dcr_exists(self):
|
||||
"""
|
||||
Verifying there is at least one dcr on the machine
|
||||
|
@ -353,16 +346,19 @@ class DCRConfigurationVerifications:
|
|||
"""
|
||||
Verifying there is a DCR on the machine for forwarding cef data
|
||||
"""
|
||||
global STREAM_SCENARIO
|
||||
command_name = "verify_DCR_content_has_stream"
|
||||
revert_to_ftd = False
|
||||
if STREAM_SCENARIO == 'ftd':
|
||||
self.STREAM_SCENARIO = 'asa'
|
||||
else:
|
||||
self.STREAM_SCENARIO = STREAM_SCENARIO
|
||||
STREAM_SCENARIO = 'asa'
|
||||
revert_to_ftd = True
|
||||
command_to_run = "sudo grep -ri \"{}\" /etc/opt/microsoft/azuremonitoragent/config-cache/configchunks/".format(
|
||||
self.STREAM_NAME[self.STREAM_SCENARIO])
|
||||
result_keywords_array = [self.STREAM_NAME[self.STREAM_SCENARIO]]
|
||||
self.STREAM_NAME[STREAM_SCENARIO])
|
||||
result_keywords_array = [self.STREAM_NAME[STREAM_SCENARIO]]
|
||||
command_object = CommandVerification(command_name, command_to_run, result_keywords_array)
|
||||
command_object.run_full_test()
|
||||
if revert_to_ftd:
|
||||
STREAM_SCENARIO = 'ftd'
|
||||
if not command_object.is_successful:
|
||||
print_error(self.DCR_MISSING_CEF_STREAM_ERR)
|
||||
return False
|
||||
|
@ -372,11 +368,12 @@ class DCRConfigurationVerifications:
|
|||
"""
|
||||
Verifying that the CEF DCR on the machine has valid content with all necessary DCR components
|
||||
"""
|
||||
global STREAM_SCENARIO
|
||||
command_name = "verify_dcr_has_valid_content"
|
||||
if STREAM_SCENARIO == 'ftd':
|
||||
self.STREAM_SCENARIO = 'asa'
|
||||
STREAM_SCENARIO = 'asa'
|
||||
command_to_run = "sudo grep -ri \"{}\" /etc/opt/microsoft/azuremonitoragent/config-cache/configchunks/".format(
|
||||
self.STREAM_NAME[self.STREAM_SCENARIO])
|
||||
self.STREAM_NAME[STREAM_SCENARIO])
|
||||
result_keywords_array = ["stream", "kind", "syslog", "dataSources", "configuration", "facilityNames",
|
||||
"logLevels", "SecurityInsights", "endpoint", "channels", "sendToChannels", "ods-",
|
||||
"opinsights.azure", "id"]
|
||||
|
@ -384,7 +381,8 @@ class DCRConfigurationVerifications:
|
|||
command_object.run_command(should_decode=False)
|
||||
command_object.command_result = command_object.command_result.decode('UTF-8').split('\n')[:-1]
|
||||
for dcr in command_object.command_result:
|
||||
dcr_path = re.search("(/etc/opt/microsoft/azuremonitoragent/config-cache/configchunks/.*.json)", str(dcr)).group()
|
||||
dcr_path = re.search("(/etc/opt/microsoft/azuremonitoragent/config-cache/configchunks/.*.json)",
|
||||
str(dcr)).group()
|
||||
for key_word in command_object.result_keywords_array:
|
||||
if str(key_word) not in str(dcr):
|
||||
command_object.is_command_successful(should_fail=True)
|
||||
|
@ -398,11 +396,12 @@ class DCRConfigurationVerifications:
|
|||
"""
|
||||
Counting the amount of DCRs forwarding CEF data in order to alert from multi-homing scenarios.
|
||||
"""
|
||||
global STREAM_SCENARIO
|
||||
command_name = "check_multi_homing"
|
||||
if STREAM_SCENARIO == 'ftd':
|
||||
self.STREAM_SCENARIO = 'asa'
|
||||
STREAM_SCENARIO = 'asa'
|
||||
command_to_run = "sudo grep -ri \"{}\" /etc/opt/microsoft/azuremonitoragent/config-cache/configchunks/ | wc -l".format(
|
||||
self.STREAM_NAME[self.STREAM_SCENARIO])
|
||||
self.STREAM_NAME[STREAM_SCENARIO])
|
||||
command_object = CommandVerification(command_name, command_to_run)
|
||||
command_object.run_command()
|
||||
try:
|
||||
|
@ -432,6 +431,7 @@ class SyslogDaemonVerifications:
|
|||
"""
|
||||
This class is for Syslog daemon related verifications
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.command_name = "verify_Syslog_daemon_listening"
|
||||
self.SYSLOG_DAEMON = ""
|
||||
|
@ -445,8 +445,10 @@ class SyslogDaemonVerifications:
|
|||
"""
|
||||
This function is in order to determine what Syslog daemon is running on the machine (Rsyslog or Syslog-ng)
|
||||
"""
|
||||
is_rsyslog_running = CommandVerification("find_Rsyslog_daemon", "if [ `ps -ef | grep rsyslog | grep -v grep | wc -l` -gt 0 ]; then echo \"True\"; else echo \"False\"; fi")
|
||||
is_syslog_ng_running = CommandVerification("find_Syslog-ng_daemon", "if [ `ps -ef | grep syslog-ng | grep -v grep | wc -l` -gt 0 ]; then echo \"True\"; else echo \"False\"; fi")
|
||||
is_rsyslog_running = CommandVerification("find_Rsyslog_daemon",
|
||||
"if [ `ps -ef | grep rsyslog | grep -v grep | wc -l` -gt 0 ]; then echo \"True\"; else echo \"False\"; fi")
|
||||
is_syslog_ng_running = CommandVerification("find_Syslog-ng_daemon",
|
||||
"if [ `ps -ef | grep syslog-ng | grep -v grep | wc -l` -gt 0 ]; then echo \"True\"; else echo \"False\"; fi")
|
||||
is_rsyslog_running.run_command(), is_syslog_ng_running.run_command()
|
||||
if "True" in str(is_rsyslog_running.command_result):
|
||||
self.SYSLOG_DAEMON = "rsyslog"
|
||||
|
@ -521,7 +523,8 @@ class OperatingSystemVerifications:
|
|||
SELINUX_DOCUMENTATION = "https://access.redhat.com/documentation/red_hat_enterprise_linux/8/html/using_selinux/changing-selinux-states-and-modes_using-selinux#changing-selinux-modes_changing-selinux-states-and-modes"
|
||||
SELINUX_RUNNING_ERROR_MESSAGE = "Detected SELinux running on the machine. The CEF connector does not support any form of hardening at the moment," \
|
||||
"and having SELinux in Enforcing mode can harm the forwarding of data. Please disable SELinux by running the command \'setenforce 0\'." \
|
||||
"This will disable SELinux temporarily. In order to disable permemently please follow this documentation- {}".format(SELINUX_DOCUMENTATION)
|
||||
"This will disable SELinux temporarily. In order to disable permemently please follow this documentation- {}".format(
|
||||
SELINUX_DOCUMENTATION)
|
||||
IPTABLES_BLOCKING_TRAFFIC_ERROR_MESSAGE = "Iptables might be blocking incoming traffic to the agent." \
|
||||
" Please verify there are no firewall rules blocking incoming traffic to port 514 and run again."
|
||||
FULL_DISK_ERROR_MESSAGE = "There is less than 1 GB of free disk space left on this machine." \
|
||||
|
@ -590,7 +593,6 @@ class IncomingEventsVerifications:
|
|||
This class is for sending and capturing CEF events in the incoming stream of events to the syslog daemon port
|
||||
"""
|
||||
# CONSTANTS
|
||||
global STREAM_SCENARIO
|
||||
FIXED_CEF_MESSAGE = "0|TestCommonEventFormat|MOCK|common=event-format-test|end|TRAFFIC|1|rt=$common=event-formatted-receive_time deviceExternalId=0002D01655 src=1.1.1.1 dst=2.2.2.2 sourceTranslatedAddress=1.1.1.1 destinationTranslatedAddress=3.3.3.3 cs1Label=Rule cs1=CEF_TEST_InternetDNS"
|
||||
FIXED_CISCO_MESSAGE = "Deny inbound TCP src inet:1.1.1.1 dst inet:2.2.2.2"
|
||||
FIXED_FTD_MESSAGE = "Teardown dynamic UDP translation from inside:10.51.100.1/54453 to outside:10.0.2.3/54453 duration 0:00:00"
|
||||
|
@ -604,8 +606,8 @@ class IncomingEventsVerifications:
|
|||
"""
|
||||
Validate there are incoming events for the relevant stream.
|
||||
:param line: a text line from the tcpdump stream
|
||||
:param ident: The message tag to look for in the message line.
|
||||
:return: True if the stream exists in the line. Otherwise, false.
|
||||
:ident the ident of the stream being sent.
|
||||
"""
|
||||
if ident in line:
|
||||
return True
|
||||
|
@ -621,10 +623,9 @@ class IncomingEventsVerifications:
|
|||
start_seconds = int(round(time.time()))
|
||||
end_seconds = int(round(time.time()))
|
||||
mock_message_counter = 0
|
||||
mock_message_max = 3
|
||||
command_name = "listen_to_incoming_events"
|
||||
command_to_run = "sudo tcpdump -A -ni any port 514 -vv"
|
||||
result_keywords_array = [self.IDENT_NAME[STREAM_SCENARIO]]
|
||||
command_to_run = "sudo tcpdump -A -l -ni any port 514 -vv"
|
||||
result_keywords_array = [STREAM_SCENARIO.upper()]
|
||||
command_object = CommandVerification(command_name, command_to_run, result_keywords_array)
|
||||
print("Attempting to capture events using tcpdump. This could take up to " + str(
|
||||
tcpdump_time_restriction) + " seconds.")
|
||||
|
@ -640,22 +641,19 @@ class IncomingEventsVerifications:
|
|||
poll_obj = select.poll()
|
||||
poll_obj.register(tcp_dump.stdout, select.POLLIN)
|
||||
while (end_seconds - start_seconds) < tcpdump_time_restriction:
|
||||
if mock_message is True and mock_message_counter < mock_message_max:
|
||||
if mock_message and mock_message_counter < 20:
|
||||
# Sending mock messages
|
||||
mock_message_counter += 1
|
||||
self.send_message_local(514, 2)
|
||||
poll_result = poll_obj.poll(2500)
|
||||
self.send_message_local(514, 1)
|
||||
poll_result = poll_obj.poll(0)
|
||||
if poll_result:
|
||||
while line:
|
||||
line = tcp_dump.stdout.readline().decode('utf-8').strip("\n")
|
||||
if not line:
|
||||
break
|
||||
if self.handle_tcpdump_line(line, self.IDENT_NAME[STREAM_SCENARIO]):
|
||||
command_object.command_result = line
|
||||
command_object.run_full_verification()
|
||||
print_ok("Found {0} in stream. Please verify {0} events arrived at your workspace".format(
|
||||
line = tcp_dump.stdout.readline().decode('utf-8').strip("\n")
|
||||
if self.handle_tcpdump_line(line, STREAM_SCENARIO.upper()):
|
||||
command_object.command_result = line
|
||||
command_object.run_full_verification()
|
||||
print_ok("Found {0} in stream. Please verify {0} events arrived at your workspace".format(
|
||||
STREAM_SCENARIO.upper()))
|
||||
return True
|
||||
return True
|
||||
end_seconds = int(round(time.time()))
|
||||
print_error("Could not locate {0} message in tcpdump. Please verify {0} events can be sent to the machine and"
|
||||
" there is not firewall blocking incoming traffic".format(STREAM_SCENARIO.upper()))
|
||||
|
@ -672,7 +670,7 @@ class IncomingEventsVerifications:
|
|||
try:
|
||||
for index in range(0, amount):
|
||||
command_tokens = ["logger", "-p", "local4.warn", "-t", self.IDENT_NAME[STREAM_SCENARIO],
|
||||
self.STREAM_MESSAGE[STREAM_SCENARIO], "--rfc3164", "-P", str(port), "-n", "127.0.0.1"]
|
||||
self.STREAM_MESSAGE[STREAM_SCENARIO], "--rfc3164", "-P", str(port), "-n", "127.0.0.1"]
|
||||
logger = subprocess.Popen(command_tokens, stdout=subprocess.PIPE)
|
||||
o, e = logger.communicate()
|
||||
if e is not None:
|
||||
|
@ -736,7 +734,8 @@ class SystemInfo:
|
|||
command_object.command_result) + DELIMITER).replace(
|
||||
'%', '%%').replace('\\n', '\n')
|
||||
|
||||
def trace_activation(self):
|
||||
@staticmethod
|
||||
def trace_activation():
|
||||
flag = '-T 0x1002'
|
||||
file_path = '/etc/default/azuremonitoragent'
|
||||
# Check if the flag already exists
|
||||
|
@ -806,38 +805,56 @@ def find_dcr_cloud_environment():
|
|||
return DEFAULT_MACHINE_ENV
|
||||
|
||||
|
||||
def getargs(should_print=True):
|
||||
def getargs():
|
||||
"""
|
||||
Get execution args using argparse lib
|
||||
"""
|
||||
parser = argparse.ArgumentParser(description=SCRIPT_HELP_MESSAGE)
|
||||
parser.add_argument('collect', nargs='?', help='Collect syslog message samples to file')
|
||||
parser.add_argument('--CEF', '--cef', action='store_true', default=False, help='Validate CEF DCR and events')
|
||||
parser.add_argument('--ASA', '--asa', action='store_true', default=False, help='Validate Cisco ASA DCR and events')
|
||||
parser.add_argument('--FTD', '--ftd', action='store_true', default=False, help='Validate Cisco FTD DCR and events')
|
||||
args = parser.parse_args()
|
||||
if should_print:
|
||||
for arg in vars(args):
|
||||
if getattr(args, arg):
|
||||
print_notice("The scenario chosen is: {}".format(arg))
|
||||
return args
|
||||
|
||||
|
||||
def main():
|
||||
global STREAM_SCENARIO
|
||||
args = getargs()
|
||||
verify_root_privileges()
|
||||
if args.CEF:
|
||||
STREAM_SCENARIO = "cef"
|
||||
elif args.ASA:
|
||||
STREAM_SCENARIO = "asa"
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('collect', nargs='?',
|
||||
help='runs the script in collect mode. Useful in case you want to open a ticket.')
|
||||
parser.add_argument('--CEF', '--cef', action='store_true', default=False,
|
||||
help='run the troubleshooting script for the CEF scenario.')
|
||||
parser.add_argument('--ASA', '--asa', action='store_true', default=False,
|
||||
help='run the troubleshooting script for the Cisco ASA scenario.')
|
||||
parser.add_argument('--FTD', '--ftd', action='store_true', default=False,
|
||||
help='run the troubleshooting script for the Cisco FTD scenario.')
|
||||
args = parser.parse_args()
|
||||
if args.ASA:
|
||||
STREAM_SCENARIO = "asa"
|
||||
elif args.FTD:
|
||||
STREAM_SCENARIO = "ftd"
|
||||
else:
|
||||
print_error("Invalid stream name provided. The supported streams are either \'--CEF\' or \'--ASA\' or \'--FTD\'. Please try again.")
|
||||
sys.exit()
|
||||
STREAM_SCENARIO = "cef"
|
||||
return args
|
||||
|
||||
|
||||
def print_scenario(args, should_print=True):
|
||||
"""
|
||||
param: args: the arguments returned from the getargs function
|
||||
param: should_print: set to true by default. Prints the chosen scenario by the user.
|
||||
"""
|
||||
if should_print:
|
||||
scenario_provided = False
|
||||
for arg in vars(args):
|
||||
if getattr(args, arg):
|
||||
if arg == 'collect':
|
||||
continue
|
||||
if not scenario_provided:
|
||||
scenario_provided = True
|
||||
else:
|
||||
print_error("More than 1 stream provided. Please run the script again with only one scenario.\n"
|
||||
"For more information run 'python cef_AMA_troubleshoot.py -h'. Exiting.")
|
||||
sys.exit(1)
|
||||
print_notice("The scenario chosen is: {}".format(STREAM_SCENARIO.upper()))
|
||||
|
||||
|
||||
def main():
|
||||
verify_root_privileges()
|
||||
subprocess.Popen(['rm', '-f', LOG_OUTPUT_FILE],
|
||||
stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
|
||||
args = getargs()
|
||||
print_scenario(args)
|
||||
if args.collect:
|
||||
print_notice("Starting to collect data. This may take a couple of seconds")
|
||||
machine_env = find_dcr_cloud_environment()
|
||||
|
@ -845,7 +862,8 @@ def main():
|
|||
system_info = SystemInfo()
|
||||
system_info.handle_commands()
|
||||
print(
|
||||
"Finished collecting data \nIn order to open a support case please browse: {}".format(PATH_FOR_CSS_TICKET[machine_env]))
|
||||
"Finished collecting data \nIn order to open a support case please browse: {}".format(
|
||||
PATH_FOR_CSS_TICKET[machine_env]))
|
||||
with open(LOG_OUTPUT_FILE, 'a') as file:
|
||||
file.write('*' * 10 + 'FINISHED COLLECTION' + '*' * 10)
|
||||
time.sleep(1)
|
||||
|
@ -872,11 +890,12 @@ def main():
|
|||
else:
|
||||
print_ok("All tests passed successfully")
|
||||
print_notice("This script generated an output file located here - {}"
|
||||
"\nPlease review it if you would like to get more information on failed tests.".format(LOG_OUTPUT_FILE))
|
||||
"\nPlease review it if you would like to get more information on failed tests.".format(
|
||||
LOG_OUTPUT_FILE))
|
||||
if not args.collect:
|
||||
print_notice(
|
||||
"\nIf you would like to open a support case please run this script with the \'collect\' feature flag in order to collect additional system data for troubleshooting."
|
||||
"\'python cef_AMA_troubleshoot.py collect\'")
|
||||
"\'python cef_AMA_troubleshoot.py [STREAM_OPTION] collect\'")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
Загрузка…
Ссылка в новой задаче