[CHERRYPICK] azurelinux-sysinfo: fasttrack/2.0 to main (#9117)

This commit is contained in:
amritakohli 2024-05-15 15:02:18 -07:00 коммит произвёл GitHub
Родитель 35d5f54713
Коммит 5f94d0b3c7
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
9 изменённых файлов: 673 добавлений и 1 удалений

1
.github/workflows/validate-cg-manifest.sh поставляемый
Просмотреть файл

@ -23,6 +23,7 @@ ignore_multiple_sources=" \
# List of ignored specs due to no source tarball to scan.
ignore_no_source_tarball=" \
azurelinux-sysinfo \
ca-certificates \
check-restart \
core-packages \

2
SPECS/LICENSES-AND-NOTICES/LICENSES-MAP.md сгенерированный

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

Просмотреть файл

@ -2152,6 +2152,7 @@
"azcopy",
"azure-iot-sdk-c",
"azure-storage-cpp",
"azurelinux-sysinfo",
"bazel",
"blobfuse",
"blobfuse2",

Просмотреть файл

@ -0,0 +1,13 @@
[Unit]
Description=Azure Linux Sysinfo Service
After=cloud-init.target multi-user.target
[Service]
Environment=PYTHONUNBUFFERED=1
Type=simple
ExecStart=/usr/bin/python3 /usr/bin/collect-sysinfo
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target

Просмотреть файл

@ -0,0 +1,9 @@
{
"Signatures": {
"collect-sysinfo": "b47df8a856c49e4bc02b36d1c3dd2825b75b9d8449b5dae8af401fc6818131c9",
"sysinfo-schema-v1.json": "67b541239416bd5f9a77a0799881f21c2e5eea686dc7a3ccaffe6bd7219a4798",
"azurelinux-sysinfo.service": "c719ab2238d0412b7ac6a793cd83e5be7879023161f86fb29d1c0ca18e70631c",
"sysinfo-selinuxpolicies.cil": "1f0df94a09f4db09093743339b6162735b6f1c81108cd3b857a6dbc729630400"
}
}

Просмотреть файл

@ -0,0 +1,69 @@
Summary: Package to deploy azurelinux-sysinfo service
Name: azurelinux-sysinfo
Version: 2.0
Release: 1%{?dist}
License: MIT
Vendor: Microsoft Corporation
Distribution: Azure Linux
Group: System Environment/Base
URL: https://aka.ms/azurelinux
Source0: collect-sysinfo
Source1: sysinfo-schema-v1.json
Source2: azurelinux-sysinfo.service
Source3: sysinfo-selinuxpolicies.cil
Requires: systemd
Requires: python3-psutil
%description
Deploys a systemd service that gathers system information related to the device, operating system, cloud-init, boot
time, resource utilization, installed packages, and SELinux mode. Collected information is written in JSON format to
a log file on the user's system for easy access and analysis. The systemd service runs at boot time if installed during
image creation.
%install
# Copy collection python script to /usr/bin/
mkdir -p %{buildroot}%{_bindir}/
install -m 755 %{SOURCE0} %{buildroot}%{_bindir}/
# Copy data schema to /usr/share/azurelinux-sysinfo/
mkdir -p %{buildroot}%{_datadir}/azurelinux-sysinfo/
install -m 755 %{SOURCE1} %{buildroot}%{_datadir}/azurelinux-sysinfo/
# Copy service to /etc/systemd/system/
mkdir -p %{buildroot}%{_sysconfdir}/systemd/system/
install -m 755 %{SOURCE2} %{buildroot}%{_sysconfdir}/systemd/system/
# Copy the sysinfo-selinuxpolicies file to /usr/share/selinux/packages/
mkdir -p %{buildroot}%{_datadir}/selinux/packages/
install -m 755 %{SOURCE3} %{buildroot}%{_datadir}/selinux/packages/
%files
%{_bindir}/collect-sysinfo
%dir %{_datadir}/azurelinux-sysinfo/
%{_datadir}/azurelinux-sysinfo/sysinfo-schema-v1.json
%{_sysconfdir}/systemd/system/azurelinux-sysinfo.service
%{_datadir}/selinux/packages/sysinfo-selinuxpolicies.cil
%post
#!/bin/sh
# Enable the systemd service
systemctl enable azurelinux-sysinfo.service
# Apply required SElinux policies only if selinux-policy is present
if rpm -q selinux-policy &> /dev/null; then
semodule -i %{_datadir}/selinux/packages/sysinfo-selinuxpolicies.cil
fi
%postun
# If selinux-policy is present, remove the sysinfo-selinuxpolicies module
if rpm -q selinux-policy &> /dev/null; then
semodule -r sysinfo-selinuxpolicies
fi
%changelog
* Thu Apr 04 2024 Amrita Kohli <amritakohli@microsoft.com> - 2.0-1
- License verified.
- Implementation of package that deploys azurelinux-sysinfo service.
- Original version for CBL-Mariner.

Просмотреть файл

@ -0,0 +1,293 @@
#!/usr/bin/python3
import argparse
import json
import shutil
import jsonschema
import psutil
import os
import re
import subprocess
DATA_SCHEMA_DIR = "/usr/share/azurelinux-sysinfo"
DATA_SCHEMA_VERSION = "v1"
DATA_SCHEMA_FILENAME = f"sysinfo-schema-{DATA_SCHEMA_VERSION}.json"
LOG_FILE_PATH = "/var/log/azurelinux-sysinfo.log"
SERVICE_NAME = "azurelinux-sysinfo-service"
# This function converts a string that matches
# regex = r"(\d+(\.\d+)?)(min|s|ms)" to seconds
def convert_to_secs(line):
regex = r"(\d+(?:\.\d+)?)(min|s|ms)"
time_secs = 0
for match in re.findall(regex, line):
time = float(match[0])
unit = match[1]
if unit == "min":
time *= 60
elif unit == "ms":
time /= 1000
time_secs += time
return time_secs
def collect_os_info():
print("Collecting os info...")
release_data = {}
release_info = subprocess.run(
["cat", "/etc/os-release"], capture_output=True, text=True
)
kernel_info = subprocess.run(["uname", "-r"], capture_output=True, text=True)
for line in release_info.stdout.strip().splitlines():
name, value = line.split("=", maxsplit=1)
release_data[name] = value.strip('"')
os_info = {
"kernel_version": kernel_info.stdout.strip(),
"release_version": release_data["VERSION"],
"release_version_id": release_data["VERSION_ID"],
}
return os_info
def collect_boot_info():
print("Collecting boot info...")
# Known issue: In SELinux enforcing mode, systemd-analyze commands are expected to fail until required policies are added.
# In this case, the boot times will be 0 and longest running processes will be empty.
# Collect boot time
result = subprocess.run(["systemd-analyze", "time"], capture_output=True, text=True)
# Sample output for livecd image:
# Startup finished in 153ms (firmware) + 554ms (loader) + 1.413s (kernel) + 908ms (userspace) = 3.030s
# multi-user.target reached after 897ms in userspace
# Sample output for host images:
# Startup finished in 12.688s (kernel) + 8.082s (initrd) + 1min 1.458s (userspace) = 1min 22.230s
# multi-user.target reached after 1min 966ms in userspace
lines = result.stdout.strip().splitlines()
# In a test setup on qemu, systemd-analyze returns empty
if len(lines) < 1 or not(lines[0].startswith("Startup finished in")):
boot_info = {
"boot_time": {
"kernel_boot_time_secs": 0,
"userspace_boot_time_secs": 0,
"total_boot_time_secs": 0,
},
"longest_running_processes": [],
}
return boot_info
# Define regular expression to extract times
timeRegex = r"((?:\d+)(?:\d*min\s?)?(?:\d*\.?\d*s\s?)?(?:\d*\.?\d*ms)?)"
# Define regular expression to extract values between parentheses
betweenParenthesesRegex = r"\((.*?)\)"
boot_time_keys = re.findall(betweenParenthesesRegex, lines[0])
boot_times = re.findall(timeRegex, lines[0])
boot_times = [t.strip() for t in boot_times]
boot_times_secs = [convert_to_secs(time) for time in boot_times]
boot_time = dict()
suffix = "_boot_time_secs"
for i in range(len(boot_time_keys)):
bootTimeKey = boot_time_keys[i] + suffix
bootTimeValue = boot_times_secs[i]
boot_time[bootTimeKey] = bootTimeValue
boot_time["total_boot_time_secs"] = boot_times_secs[-1]
# Collect boot time longest running processes
top_n = 3
result = subprocess.run(
["systemd-analyze", "blame"], capture_output=True, text=True
)
filtered_result = subprocess.run(
["head", f"-{top_n}"], input=result.stdout, capture_output=True, text=True
)
# Sample output:
# 43.642s systemd-networkd-wait-online.service
lines = filtered_result.stdout.strip().splitlines()
process_list = []
for line in lines:
process = re.search(r"\S+\s*$", line).group().strip()
process_list.append({process: convert_to_secs(line)})
boot_info = {"boot_time": boot_time, "longest_running_processes": process_list}
return boot_info
def collect_resource_utilization():
print("Collecting disk and memory usage...")
# disk
os_disk_usage = shutil.disk_usage("/")
disk_usage = {
"disk_size_gib": f"{os_disk_usage.total/1024**3:.2f}",
"disk_usage_gib": f"{os_disk_usage.used/1024**3:.2f}",
}
# memory
memory_info = psutil.virtual_memory()
total_memory = memory_info.total // (1024**3)
available_memory = memory_info.available // (1024**3)
memory_usage = {
"total_memory_gib": total_memory,
"available_memory_gib": available_memory,
}
physical_cpu_count = psutil.cpu_count(logical=False)
logical_cpu_count = psutil.cpu_count(logical=True)
cpu_percent = psutil.cpu_percent()
cpu_usage = {
"physical_cpu_count": physical_cpu_count,
"logical_cpu_count": logical_cpu_count,
"cpu_percent": cpu_percent,
}
resource_utilization = {
"disk_usage": disk_usage,
"memory_usage": memory_usage,
"cpu_usage": cpu_usage,
}
return resource_utilization
def collect_package_info():
print("Collecting package info...")
get_package_list = subprocess.run(
["rpm", "-qa"], capture_output=True, text=True, check=True
)
package_list = get_package_list.stdout.strip().splitlines()
# TASK 4917: Adding package list resulted in hitting the size limit for the log,
# so only logging package count until an alternative is implemented.
package_info = {"package_count": len(package_list)}
return package_info
def collect_cloud_init_info():
print("Collecting cloud-init info...")
# Collect cloud-init longest running processes
result = subprocess.run(
["cloud-init", "analyze", "blame"], capture_output=True, text=True, check=True
)
lines = result.stdout.strip().splitlines()
process_list = []
top_n = 5
# Skipping the first line as it is "-- Boot Record 01 --"
# Skipping the last line as it is "x boot records analyzed"
range = min(top_n + 1, len(lines) - 1)
for line in lines[1:range]:
record_details = line.split()
if len(record_details) > 1:
process_info = {}
process_info["time"], process_info["process"] = record_details
process_list.append(process_info)
get_hostname = subprocess.run(
["hostname"], capture_output=True, text=True, check=True
)
cloud_init_info = {
"hostname": get_hostname.stdout.strip(),
"longest_running_processes": process_list,
}
return cloud_init_info
def get_selinux_mode():
return subprocess.run(
["getenforce"], capture_output=True, text=True, check=True
).stdout.strip()
def collect_system_info():
print("Collecting system info...")
system_info = {"selinux_mode": get_selinux_mode()}
return system_info
def get_asset_id():
print("Collecting asset id...")
return subprocess.run(
["cat", "/sys/devices/virtual/dmi/id/product_uuid"], capture_output=True, text=True
).stdout.lower().strip()
def has_valid_schema(data):
schema_file = os.path.join(DATA_SCHEMA_DIR, DATA_SCHEMA_FILENAME)
with open(schema_file, "r") as file:
schema = json.load(file)
try:
jsonschema.validate(data, schema)
except jsonschema.exceptions.ValidationError as err:
print(f"Schema validation failed: {err}")
return False
return True
def main():
print("Running azurelinux sysinfo collection...")
asset_id = get_asset_id()
os_info = collect_os_info()
cloud_init_info = collect_cloud_init_info()
boot_info = collect_boot_info()
resource_utilization = collect_resource_utilization()
package_info = collect_package_info()
system_info = collect_system_info()
# Use json as a data structure to store the data
# since it is supported by Kusto
data = {
"$schema": f"{DATA_SCHEMA_VERSION}",
"source": f"{SERVICE_NAME}",
"asset_id": asset_id,
"os_info": os_info,
"cloud_init_info": cloud_init_info,
"boot_info": boot_info,
"resource_utilization": resource_utilization,
"package_info": package_info,
"system_info": system_info,
}
print(data)
if has_valid_schema(data):
# Dump the data to a log file, this path is added to fluentd config
# and will be picked up by fluentd and sent through Geneva Agents
with open(LOG_FILE_PATH, "w") as file:
json.dump(data, file, separators=(',', ':'))
# Add newline so that the fluentd tail plug-in consumes the log
# line.
file.write("\n")
print("Azure Linux sysinfo collection completed successfully.")
else:
print("Azure Linux sysinfo collection failed.")
exit(1)
if __name__ == "__main__":
main()

Просмотреть файл

@ -0,0 +1,272 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"$id": "SPECS/azurelinux-sysinfo/sysinfo-schema-v1.json",
"title": "Azure Linux Sysinfo Schema",
"description": "This defines the schema for the collected Azure Linux system information",
"type": "object",
"properties": {
"source": {
"$id": "#/properties/source",
"type": "string",
"title": "Name of the service",
"description": "Service the logs are obtained from",
"required": [
"source"
],
"additionalProperties": false
},
"asset_id": {
"$id": "#/properties/asset_id",
"type": "string",
"title": "Asset ID",
"description": "Unique identifier (uuid) of the device",
"required": [
"asset_id"
],
"additionalProperties": false
},
"os_info": {
"$id": "#/properties/os_info",
"type": "object",
"title": "Operating system information",
"description": "Information about the Azure Linux operating system",
"properties": {
"kernel_version": {
"$id": "#/properties/os_info/properties/kernel_version",
"type": "string",
"title": "Linux kernel version",
"description": "The kernel version of Linux"
},
"release_version": {
"$id": "#/properties/os_info/properties/release_version",
"type": "string",
"title": "Azure Linux operating system release version",
"description": "The release version of upstream Azure Linux"
},
"release_version_id": {
"$id": "#/properties/os_info/properties/release_version_id",
"type": "string",
"title": "Azure Linux operating system release version id",
"description": "The release version id of upstream Azure Linux"
}
},
"required": [
"kernel_version",
"release_version",
"release_version_id"
],
"additionalProperties": false
},
"cloud_init_info": {
"$id": "#/properties/cloud_init_info",
"type": "object",
"title": "Cloud-init information",
"description": "Information about cloud-init applied to the Azure Linux operating system",
"properties": {
"hostname": {
"$id": "#/properties/cloud_init_info/properties/hostname",
"type": "string",
"title": "Hostname",
"description": "The hostname of the system"
},
"longest_running_processes": {
"$id": "#/properties/cloud_init_info/properties/longest_running_processes",
"type": "array",
"title": "Longest Running Processes",
"description": "List of top running processes that took the most time during cloud-init"
}
},
"required": [
"hostname",
"longest_running_processes"
],
"additionalProperties": false
},
"boot_info": {
"$id": "#/properties/boot_info",
"type": "object",
"title": "Operating system boot information",
"description": "Information about Azure Linux operating system booting",
"properties": {
"boot_time": {
"$id": "#/properties/boot_info/properties/boot_time",
"type": "object",
"title": "Boot time",
"description": "Boot time information",
"properties": {
"kernel_boot_time_secs": {
"$id": "#/properties/boot_info/properties/boot_time/properties/kernel_boot_time_secs",
"type": "number",
"title": "Kernel boot time in seconds",
"description": "Time spent in the kernel before userspace has been reached"
},
"initrd_boot_time_secs": {
"$id": "#/properties/boot_info/properties/boot_time/properties/initrd_boot_time_secs",
"type": "number",
"title": "Initrd boot time in seconds",
"description": "Time spent in the initrd before userspace has been reached"
},
"userspace_boot_time_secs": {
"$id": "#/properties/boot_info/properties/boot_time/properties/userspace_boot_time_secs",
"type": "number",
"title": "Userspace boot time in seconds",
"description": "Time spent in userspace before the system is ready to use"
},
"firmware_boot_time_secs": {
"$id": "#/properties/boot_info/properties/boot_time/properties/firmware_boot_time_secs",
"type": "number",
"title": "Firmware boot time in seconds",
"description": "Time spent in firmware before the system is ready to use"
},
"loader_boot_time_secs": {
"$id": "#/properties/boot_info/properties/boot_time/properties/loader_boot_time_secs",
"type": "number",
"title": "Loader boot time in seconds",
"description": "Time spent in loader before the system is ready to use"
},
"total_boot_time_secs": {
"$id": "#/properties/boot_info/properties/boot_time/properties/total_boot_time_secs",
"type": "number",
"title": "Total boot time in seconds",
"description": "Total time spent in the boot process"
}
},
"required": [
"kernel_boot_time_secs",
"userspace_boot_time_secs",
"total_boot_time_secs"
]
},
"longest_running_processes": {
"$id": "#/properties/boot_info/properties/longest_running_processes",
"type": "array",
"title": "Longest running processes",
"description": "List of top running processes that took the most time during boot"
}
},
"required": [
"boot_time",
"longest_running_processes"
]
},
"resource_utilization": {
"$id": "#/properties/resource_utilization",
"type": "object",
"title": "System resources utilization",
"description": "Information about resources usage",
"properties": {
"disk_usage": {
"$id": "#/properties/resource_utilization/properties/disk_usage",
"type": "object",
"title": "Disk usage",
"description": "Disk usage information",
"properties": {
"disk_size_gib": {
"$id": "#/properties/resource_utilization/properties/disk_usage/properties/disk_size_gib",
"type": "string",
"title": "Os disk size",
"description": "Os disk size in GiB when the system was booted"
},
"disk_usage_gib": {
"$id": "#/properties/resource_utilization/properties/disk_usage/properties/disk_usage_gib",
"type": "string",
"title": "Os disk usage",
"description": "Os disk usage in GiB when the system was booted"
}
},
"required": [
"disk_size_gib",
"disk_usage_gib"
]
},
"memory_usage": {
"$id": "#/properties/resource_utilization/properties/memory_usage",
"type": "object",
"title": "Memory usage",
"description": "Memory usage information",
"properties": {
"total_memory_gib": {
"$id": "#properties/resource_utilization/properties/memory_usage/properties/total_memory_gib",
"type": "integer",
"title": "Total memory",
"description": "Total memory in GiB when the system was booted"
},
"available_memory_gib": {
"$id": "#properties/resource_utilization/properties/memory_usage/properties/available_memory_gib",
"type": "integer",
"title": "Available memory",
"description": "Available memory in GiB when the system was booted"
}
}
},
"cpu_usage": {
"$id": "#/properties/resource_utilization/properties/memory_usage",
"type": "object",
"title": "cpu usage & info",
"description": "Cpu usage information",
"properties": {
"physical_cpu_count": {
"$id": "#properties/resource_utilization/properties/cpu_usage/properties/physical_cpu_count",
"type": "integer",
"title": "Physical cpu count",
"description": "Physical cpu count"
},
"logical_cpu_count": {
"$id": "#properties/resource_utilization/properties/cpu_usage/properties/logical_cpu_count",
"type": "integer",
"title": "Logical cpu count",
"description": "Logical cpu count"
},
"cpu_usage_percent": {
"$id": "#properties/resource_utilization/properties/cpu_usage/properties/cpu_usage_percent",
"type": "number",
"title": "Cpu usage percent",
"description": "Cpu usage percent"
}
}
}
},
"required": [
"disk_usage",
"memory_usage",
"cpu_usage"
]
},
"package_info": {
"$id": "#/properties/package_info",
"type": "object",
"title": "Package Information",
"description": "Information about the packages installed on Azure Linux",
"properties": {
"package_count": {
"$id": "#/properties/package_info/properties/package_count",
"type": "integer",
"title": "Package Count",
"description": "The number of packages installed on Azure Linux"
}
},
"required": [
"package_count"
],
"additionalProperties": false
},
"system_info": {
"$id": "#/properties/system_info",
"type": "object",
"title": "System Information",
"description": "Information about the system-wide settings",
"properties": {
"selinux_mode": {
"$id": "#/properties/package_info/properties/package_count",
"type": "string",
"title": "SELinux Mode",
"description": "Enforced or Permissive"
}
},
"required": [
"selinux_mode"
],
"additionalProperties": false
}
}
}

Просмотреть файл

@ -0,0 +1,14 @@
(allow systemd_analyze_t sysctl_kernel_t (dir (search)))
(allow systemd_analyze_t locale_t (dir (search)))
(allow systemd_analyze_t init_runtime_t (dir (search)))
(allow systemd_analyze_t sysctl_kernel_t (file (read)))
(allow systemd_analyze_t locale_t (file (read)))
(allow systemd_analyze_t systemd_analyze_t (capability (net_admin)))
(allow systemd_analyze_t init_t (unix_stream_socket (connectto)))
(allow systemd_analyze_t system_dbusd_runtime_t (dir (search)))
(allow systemd_analyze_t security_t (filesystem (getattr)))
(allow systemd_analyze_t selinux_config_t (dir (search)))
(allow systemd_analyze_t init_t (system (status)))
(allow systemd_analyze_t init_t (service (status)))
(allow systemd_analyze_t systemdunit (service (status)))
(allow systemd_analyze_t etc_t (service (status)))