diff --git a/.script/tests/asimParsersTest/ingestASimSampleData.py b/.script/tests/asimParsersTest/ingestASimSampleData.py index fcc9411118..b124fa9946 100644 --- a/.script/tests/asimParsersTest/ingestASimSampleData.py +++ b/.script/tests/asimParsersTest/ingestASimSampleData.py @@ -68,19 +68,36 @@ def convert_schema_csv_to_json(csv_file): return data def convert_data_csv_to_json(csv_file): + def convert_value(value): + # Try to convert the value to an integer, then to a float, and keep it as a string if those fail + try: + # Try integer conversion + return int(value) + except ValueError: + try: + # Try float conversion + return float(value) + except ValueError: + # Return the value as-is (string) if it's not numeric + return value + data = [] - with open(csv_file, 'r',encoding='utf-8-sig') as file: + with open(csv_file, 'r', encoding='utf-8-sig') as file: reader = csv.DictReader(file) for row in reader: - table_name=row['Type'] - data.append(row) + table_name = row['Type'] + # Convert each value in the row to its appropriate type + processed_row = {key: convert_value(value) for key, value in row.items()} + data.append(processed_row) + for item in data: for key in list(item.keys()): - # If the key matches 'TimeGenerated [UTC]', rename it - if key.endswith('[UTC]'): - substring = key.split(" [")[0] - item[substring] = item.pop(key) - return data , table_name + # If the key matches '[UTC]' or '[Local Time]', rename it + if key.endswith(('[UTC]', '[Local Time]')): + substring = key.split(" [")[0] + item[substring] = item.pop(key) + + return data, table_name def check_for_custom_table(table_name): if table_name in lia_supported_builtin_table: @@ -288,9 +305,11 @@ for file in parser_yaml_files: asim_parser_url = f'{SENTINEL_REPO_RAW_URL}/{commit_number}/{file}' asim_parser = read_github_yaml(asim_parser_url) parser_query = asim_parser.get('ParserQuery', '') + normalization = asim_parser.get('Normalization', {}) + schema = normalization.get('Schema') event_vendor, event_product, schema_name = extract_event_vendor_product(parser_query, file) - SampleDataFile = f'{event_vendor}_{event_product}_{schema_name}_IngestedLogs.csv' + SampleDataFile = f'{event_vendor}_{event_product}_{schema}_IngestedLogs.csv' sample_data_url = f'{SENTINEL_REPO_RAW_URL}/{commit_number}/{SAMPLE_DATA_PATH}' SampleDataUrl = sample_data_url+SampleDataFile response = requests.get(SampleDataUrl)