This commit is contained in:
Noam Landress 2023-06-13 17:54:01 +03:00
Родитель 7175f62d05
Коммит 4ff1cffea6
1 изменённых файлов: 93 добавлений и 74 удалений

Просмотреть файл

@ -17,19 +17,9 @@ PATH_FOR_CSS_TICKET = {
"Gov": "https://portal.azure.us/#blade/Microsoft_Azure_Support/HelpAndSupportBlade/overview"}
AGENT_CONF_FILE = "/etc/opt/microsoft/azuremonitoragent/config-cache/mdsd.hr.json"
FAILED_TESTS_COUNT = 0
STREAM_SCENARIO = "cef" # default value
WARNING_TESTS_COUNT = 0
NOT_RUN_TESTS_COUNT = 0
SCRIPT_HELP_MESSAGE = "Usage: python AMA_troubleshoot.py [STREAM_NAME] [OPTION]\n" \
"Runs CEF validation tests on the collector machine and generates a log file here- /tmp/cef_troubleshooter_output_file.log\n\n" \
" --cef/--CEF run the troubleshooting script for the CEF scenario." \
" --asa/--ASA run the troubleshooting script for the Cisco ASA scenario." \
" --ftd/--FTD run the troubleshooting script for the Cisco FTD scenario." \
" collect, runs the script in collect mode. Useful in case you want to open a ticket. Generates an output file here- /tmp/cef_troubleshooter_collection_output.log\n" \
" -h, --help display the help and exit\n\n" \
"Example:\n" \
" python cef_AMA_troubleshoot.py --cef\n" \
" python cef_AMA_troubleshoot.py --cisco collect\n\n" \
"This script verifies the installation of the CEF connector on the collector machine. It returns a status for each test and action items to fix detected issues."
DELIMITER = "\n" + "-" * 20 + "\n"
@ -69,6 +59,7 @@ class CommandShellExecution(object):
"""
This class is for executing all the shell related commands in the terminal for each test.
"""
def __init__(self, command_name, command_to_run, result_keywords_array=None, fault_keyword=None,
command_result=None,
command_result_err=None):
@ -114,11 +105,13 @@ class CommandVerification(CommandShellExecution):
"""
This class is running all the necessary verifications for the running test.
"""
def __init__(self, command_name, command_to_run, result_keywords_array=None, fault_keyword=None,
command_result=None,
command_result_err=None,
is_successful=False):
super(CommandVerification, self).__init__(command_name, command_to_run, result_keywords_array, fault_keyword, command_result, command_result_err)
super(CommandVerification, self).__init__(command_name, command_to_run, result_keywords_array, fault_keyword,
command_result, command_result_err)
self.is_successful = is_successful
def __repr__(self):
@ -248,11 +241,13 @@ class AgentInstallationVerifications:
AGENT_INSTALLATION_DOC = "https://docs.microsoft.com/azure/azure-monitor/agents/azure-monitor-agent-manage"
AGENT_NOT_INSTALLED_ERROR_MESSAGE = "Could not detect an AMA service running and listening on the machine." \
" Please follow this documentation in order to install it and verify your" \
" machine's operating system is in the supported list- {}".format(AGENT_INSTALLATION_DOC)
" machine's operating system is in the supported list- {}".format(
AGENT_INSTALLATION_DOC)
AGENT_NOT_RUNNING_ERROR_MESSAGE = "Detected AMA is installed on the machine but not running. Please start the agent by running " \
"\'service azuremonitoragent start\' \nif the agent service fails to start," \
" please run the following command to review the agent error log file here- " \
"\'cat /var/opt/microsoft/azuremonitoragent/log/mdsd.err | tail -n 15\'".format(AGENT_INSTALLATION_DOC)
"\'cat /var/opt/microsoft/azuremonitoragent/log/mdsd.err | tail -n 15\'".format(
AGENT_INSTALLATION_DOC)
OMS_RUNNING_ERROR_MESSAGE = "Detected the OMS Agent running on your machine. If not necessary please remove it to avoid duplicated data in the workspace, which can result in an increase in costs"
@ -320,7 +315,6 @@ class DCRConfigurationVerifications:
This class is for data collection rules verifications
"""
# CONSTANTS
global STREAM_SCENARIO
DCR_DOC = "https://docs.microsoft.com/azure/azure-monitor/agents/data-collection-rule-overview"
DCRA_DOC = "https://docs.microsoft.com/rest/api/monitor/data-collection-rule-associations"
CEF_STREAM_NAME = "SECURITY_CEF_BLOB"
@ -334,7 +328,6 @@ class DCRConfigurationVerifications:
"be collected from this machine to any workspace. Please create a DCR using the following documentation- " \
"{} and run again".format(DCR_DOC)
MULTI_HOMING_MESSAGE = "Detected multiple collection rules sending the same stream. This scenario is called multi-homing and might have effect on the agent's performance"
def verify_dcr_exists(self):
"""
Verifying there is at least one dcr on the machine
@ -353,16 +346,19 @@ class DCRConfigurationVerifications:
"""
Verifying there is a DCR on the machine for forwarding cef data
"""
global STREAM_SCENARIO
command_name = "verify_DCR_content_has_stream"
revert_to_ftd = False
if STREAM_SCENARIO == 'ftd':
self.STREAM_SCENARIO = 'asa'
else:
self.STREAM_SCENARIO = STREAM_SCENARIO
STREAM_SCENARIO = 'asa'
revert_to_ftd = True
command_to_run = "sudo grep -ri \"{}\" /etc/opt/microsoft/azuremonitoragent/config-cache/configchunks/".format(
self.STREAM_NAME[self.STREAM_SCENARIO])
result_keywords_array = [self.STREAM_NAME[self.STREAM_SCENARIO]]
self.STREAM_NAME[STREAM_SCENARIO])
result_keywords_array = [self.STREAM_NAME[STREAM_SCENARIO]]
command_object = CommandVerification(command_name, command_to_run, result_keywords_array)
command_object.run_full_test()
if revert_to_ftd:
STREAM_SCENARIO = 'ftd'
if not command_object.is_successful:
print_error(self.DCR_MISSING_CEF_STREAM_ERR)
return False
@ -372,11 +368,12 @@ class DCRConfigurationVerifications:
"""
Verifying that the CEF DCR on the machine has valid content with all necessary DCR components
"""
global STREAM_SCENARIO
command_name = "verify_dcr_has_valid_content"
if STREAM_SCENARIO == 'ftd':
self.STREAM_SCENARIO = 'asa'
STREAM_SCENARIO = 'asa'
command_to_run = "sudo grep -ri \"{}\" /etc/opt/microsoft/azuremonitoragent/config-cache/configchunks/".format(
self.STREAM_NAME[self.STREAM_SCENARIO])
self.STREAM_NAME[STREAM_SCENARIO])
result_keywords_array = ["stream", "kind", "syslog", "dataSources", "configuration", "facilityNames",
"logLevels", "SecurityInsights", "endpoint", "channels", "sendToChannels", "ods-",
"opinsights.azure", "id"]
@ -384,7 +381,8 @@ class DCRConfigurationVerifications:
command_object.run_command(should_decode=False)
command_object.command_result = command_object.command_result.decode('UTF-8').split('\n')[:-1]
for dcr in command_object.command_result:
dcr_path = re.search("(/etc/opt/microsoft/azuremonitoragent/config-cache/configchunks/.*.json)", str(dcr)).group()
dcr_path = re.search("(/etc/opt/microsoft/azuremonitoragent/config-cache/configchunks/.*.json)",
str(dcr)).group()
for key_word in command_object.result_keywords_array:
if str(key_word) not in str(dcr):
command_object.is_command_successful(should_fail=True)
@ -398,11 +396,12 @@ class DCRConfigurationVerifications:
"""
Counting the amount of DCRs forwarding CEF data in order to alert from multi-homing scenarios.
"""
global STREAM_SCENARIO
command_name = "check_multi_homing"
if STREAM_SCENARIO == 'ftd':
self.STREAM_SCENARIO = 'asa'
STREAM_SCENARIO = 'asa'
command_to_run = "sudo grep -ri \"{}\" /etc/opt/microsoft/azuremonitoragent/config-cache/configchunks/ | wc -l".format(
self.STREAM_NAME[self.STREAM_SCENARIO])
self.STREAM_NAME[STREAM_SCENARIO])
command_object = CommandVerification(command_name, command_to_run)
command_object.run_command()
try:
@ -432,6 +431,7 @@ class SyslogDaemonVerifications:
"""
This class is for Syslog daemon related verifications
"""
def __init__(self):
self.command_name = "verify_Syslog_daemon_listening"
self.SYSLOG_DAEMON = ""
@ -445,8 +445,10 @@ class SyslogDaemonVerifications:
"""
This function is in order to determine what Syslog daemon is running on the machine (Rsyslog or Syslog-ng)
"""
is_rsyslog_running = CommandVerification("find_Rsyslog_daemon", "if [ `ps -ef | grep rsyslog | grep -v grep | wc -l` -gt 0 ]; then echo \"True\"; else echo \"False\"; fi")
is_syslog_ng_running = CommandVerification("find_Syslog-ng_daemon", "if [ `ps -ef | grep syslog-ng | grep -v grep | wc -l` -gt 0 ]; then echo \"True\"; else echo \"False\"; fi")
is_rsyslog_running = CommandVerification("find_Rsyslog_daemon",
"if [ `ps -ef | grep rsyslog | grep -v grep | wc -l` -gt 0 ]; then echo \"True\"; else echo \"False\"; fi")
is_syslog_ng_running = CommandVerification("find_Syslog-ng_daemon",
"if [ `ps -ef | grep syslog-ng | grep -v grep | wc -l` -gt 0 ]; then echo \"True\"; else echo \"False\"; fi")
is_rsyslog_running.run_command(), is_syslog_ng_running.run_command()
if "True" in str(is_rsyslog_running.command_result):
self.SYSLOG_DAEMON = "rsyslog"
@ -521,7 +523,8 @@ class OperatingSystemVerifications:
SELINUX_DOCUMENTATION = "https://access.redhat.com/documentation/red_hat_enterprise_linux/8/html/using_selinux/changing-selinux-states-and-modes_using-selinux#changing-selinux-modes_changing-selinux-states-and-modes"
SELINUX_RUNNING_ERROR_MESSAGE = "Detected SELinux running on the machine. The CEF connector does not support any form of hardening at the moment," \
"and having SELinux in Enforcing mode can harm the forwarding of data. Please disable SELinux by running the command \'setenforce 0\'." \
"This will disable SELinux temporarily. In order to disable permemently please follow this documentation- {}".format(SELINUX_DOCUMENTATION)
"This will disable SELinux temporarily. In order to disable permemently please follow this documentation- {}".format(
SELINUX_DOCUMENTATION)
IPTABLES_BLOCKING_TRAFFIC_ERROR_MESSAGE = "Iptables might be blocking incoming traffic to the agent." \
" Please verify there are no firewall rules blocking incoming traffic to port 514 and run again."
FULL_DISK_ERROR_MESSAGE = "There is less than 1 GB of free disk space left on this machine." \
@ -590,7 +593,6 @@ class IncomingEventsVerifications:
This class is for sending and capturing CEF events in the incoming stream of events to the syslog daemon port
"""
# CONSTANTS
global STREAM_SCENARIO
FIXED_CEF_MESSAGE = "0|TestCommonEventFormat|MOCK|common=event-format-test|end|TRAFFIC|1|rt=$common=event-formatted-receive_time deviceExternalId=0002D01655 src=1.1.1.1 dst=2.2.2.2 sourceTranslatedAddress=1.1.1.1 destinationTranslatedAddress=3.3.3.3 cs1Label=Rule cs1=CEF_TEST_InternetDNS"
FIXED_CISCO_MESSAGE = "Deny inbound TCP src inet:1.1.1.1 dst inet:2.2.2.2"
FIXED_FTD_MESSAGE = "Teardown dynamic UDP translation from inside:10.51.100.1/54453 to outside:10.0.2.3/54453 duration 0:00:00"
@ -604,8 +606,8 @@ class IncomingEventsVerifications:
"""
Validate there are incoming events for the relevant stream.
:param line: a text line from the tcpdump stream
:param ident: The message tag to look for in the message line.
:return: True if the stream exists in the line. Otherwise, false.
:ident the ident of the stream being sent.
"""
if ident in line:
return True
@ -621,10 +623,9 @@ class IncomingEventsVerifications:
start_seconds = int(round(time.time()))
end_seconds = int(round(time.time()))
mock_message_counter = 0
mock_message_max = 3
command_name = "listen_to_incoming_events"
command_to_run = "sudo tcpdump -A -ni any port 514 -vv"
result_keywords_array = [self.IDENT_NAME[STREAM_SCENARIO]]
command_to_run = "sudo tcpdump -A -l -ni any port 514 -vv"
result_keywords_array = [STREAM_SCENARIO.upper()]
command_object = CommandVerification(command_name, command_to_run, result_keywords_array)
print("Attempting to capture events using tcpdump. This could take up to " + str(
tcpdump_time_restriction) + " seconds.")
@ -640,22 +641,19 @@ class IncomingEventsVerifications:
poll_obj = select.poll()
poll_obj.register(tcp_dump.stdout, select.POLLIN)
while (end_seconds - start_seconds) < tcpdump_time_restriction:
if mock_message is True and mock_message_counter < mock_message_max:
if mock_message and mock_message_counter < 20:
# Sending mock messages
mock_message_counter += 1
self.send_message_local(514, 2)
poll_result = poll_obj.poll(2500)
self.send_message_local(514, 1)
poll_result = poll_obj.poll(0)
if poll_result:
while line:
line = tcp_dump.stdout.readline().decode('utf-8').strip("\n")
if not line:
break
if self.handle_tcpdump_line(line, self.IDENT_NAME[STREAM_SCENARIO]):
command_object.command_result = line
command_object.run_full_verification()
print_ok("Found {0} in stream. Please verify {0} events arrived at your workspace".format(
line = tcp_dump.stdout.readline().decode('utf-8').strip("\n")
if self.handle_tcpdump_line(line, STREAM_SCENARIO.upper()):
command_object.command_result = line
command_object.run_full_verification()
print_ok("Found {0} in stream. Please verify {0} events arrived at your workspace".format(
STREAM_SCENARIO.upper()))
return True
return True
end_seconds = int(round(time.time()))
print_error("Could not locate {0} message in tcpdump. Please verify {0} events can be sent to the machine and"
" there is not firewall blocking incoming traffic".format(STREAM_SCENARIO.upper()))
@ -672,7 +670,7 @@ class IncomingEventsVerifications:
try:
for index in range(0, amount):
command_tokens = ["logger", "-p", "local4.warn", "-t", self.IDENT_NAME[STREAM_SCENARIO],
self.STREAM_MESSAGE[STREAM_SCENARIO], "--rfc3164", "-P", str(port), "-n", "127.0.0.1"]
self.STREAM_MESSAGE[STREAM_SCENARIO], "--rfc3164", "-P", str(port), "-n", "127.0.0.1"]
logger = subprocess.Popen(command_tokens, stdout=subprocess.PIPE)
o, e = logger.communicate()
if e is not None:
@ -736,7 +734,8 @@ class SystemInfo:
command_object.command_result) + DELIMITER).replace(
'%', '%%').replace('\\n', '\n')
def trace_activation(self):
@staticmethod
def trace_activation():
flag = '-T 0x1002'
file_path = '/etc/default/azuremonitoragent'
# Check if the flag already exists
@ -806,38 +805,56 @@ def find_dcr_cloud_environment():
return DEFAULT_MACHINE_ENV
def getargs(should_print=True):
def getargs():
"""
Get execution args using argparse lib
"""
parser = argparse.ArgumentParser(description=SCRIPT_HELP_MESSAGE)
parser.add_argument('collect', nargs='?', help='Collect syslog message samples to file')
parser.add_argument('--CEF', '--cef', action='store_true', default=False, help='Validate CEF DCR and events')
parser.add_argument('--ASA', '--asa', action='store_true', default=False, help='Validate Cisco ASA DCR and events')
parser.add_argument('--FTD', '--ftd', action='store_true', default=False, help='Validate Cisco FTD DCR and events')
args = parser.parse_args()
if should_print:
for arg in vars(args):
if getattr(args, arg):
print_notice("The scenario chosen is: {}".format(arg))
return args
def main():
global STREAM_SCENARIO
args = getargs()
verify_root_privileges()
if args.CEF:
STREAM_SCENARIO = "cef"
elif args.ASA:
STREAM_SCENARIO = "asa"
parser = argparse.ArgumentParser()
parser.add_argument('collect', nargs='?',
help='runs the script in collect mode. Useful in case you want to open a ticket.')
parser.add_argument('--CEF', '--cef', action='store_true', default=False,
help='run the troubleshooting script for the CEF scenario.')
parser.add_argument('--ASA', '--asa', action='store_true', default=False,
help='run the troubleshooting script for the Cisco ASA scenario.')
parser.add_argument('--FTD', '--ftd', action='store_true', default=False,
help='run the troubleshooting script for the Cisco FTD scenario.')
args = parser.parse_args()
if args.ASA:
STREAM_SCENARIO = "asa"
elif args.FTD:
STREAM_SCENARIO = "ftd"
else:
print_error("Invalid stream name provided. The supported streams are either \'--CEF\' or \'--ASA\' or \'--FTD\'. Please try again.")
sys.exit()
STREAM_SCENARIO = "cef"
return args
def print_scenario(args, should_print=True):
"""
param: args: the arguments returned from the getargs function
param: should_print: set to true by default. Prints the chosen scenario by the user.
"""
if should_print:
scenario_provided = False
for arg in vars(args):
if getattr(args, arg):
if arg == 'collect':
continue
if not scenario_provided:
scenario_provided = True
else:
print_error("More than 1 stream provided. Please run the script again with only one scenario.\n"
"For more information run 'python cef_AMA_troubleshoot.py -h'. Exiting.")
sys.exit(1)
print_notice("The scenario chosen is: {}".format(STREAM_SCENARIO.upper()))
def main():
verify_root_privileges()
subprocess.Popen(['rm', '-f', LOG_OUTPUT_FILE],
stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
args = getargs()
print_scenario(args)
if args.collect:
print_notice("Starting to collect data. This may take a couple of seconds")
machine_env = find_dcr_cloud_environment()
@ -845,7 +862,8 @@ def main():
system_info = SystemInfo()
system_info.handle_commands()
print(
"Finished collecting data \nIn order to open a support case please browse: {}".format(PATH_FOR_CSS_TICKET[machine_env]))
"Finished collecting data \nIn order to open a support case please browse: {}".format(
PATH_FOR_CSS_TICKET[machine_env]))
with open(LOG_OUTPUT_FILE, 'a') as file:
file.write('*' * 10 + 'FINISHED COLLECTION' + '*' * 10)
time.sleep(1)
@ -872,11 +890,12 @@ def main():
else:
print_ok("All tests passed successfully")
print_notice("This script generated an output file located here - {}"
"\nPlease review it if you would like to get more information on failed tests.".format(LOG_OUTPUT_FILE))
"\nPlease review it if you would like to get more information on failed tests.".format(
LOG_OUTPUT_FILE))
if not args.collect:
print_notice(
"\nIf you would like to open a support case please run this script with the \'collect\' feature flag in order to collect additional system data for troubleshooting."
"\'python cef_AMA_troubleshoot.py collect\'")
"\'python cef_AMA_troubleshoot.py [STREAM_OPTION] collect\'")
if __name__ == '__main__':