gecko-dev/servo/tests/heartbeats/process_logs.py

409 строки
16 KiB
Python

#!/usr/bin/env python
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import argparse
import matplotlib.pyplot as plt
import numpy as np
import os
from os import path
import sys
import warnings
HB_LOG_IDX_START_TIME = 7
HB_LOG_IDX_END_TIME = HB_LOG_IDX_START_TIME + 1
HB_LOG_IDX_START_ENERGY = 14
HB_LOG_IDX_END_ENERGY = HB_LOG_IDX_START_ENERGY + 1
ENERGY_PROFILER_NAME = 'ApplicationHeartbeat'
SUMMARY_OUTPUT = "summary.txt"
SUMMARY_TIME_IDX = 8
SUMMARY_ENERGY_IDX = SUMMARY_TIME_IDX + 1
SUMMARY_POWER_IDX = SUMMARY_ENERGY_IDX + 1
def autolabel(rects, ax):
"""Attach some text labels.
"""
for rect in rects:
ax.text(rect.get_x() + rect.get_width() / 2., 1.05 * rect.get_height(), '', ha='center', va='bottom')
def plot_raw_totals(config, plot_data, max_time, max_time_std, max_energy, max_energy_std, output_dir, normalize):
"""Plot the raw totals for a configuration.
Keyword arguments:
config -- configuration name
plot_data -- (profiler name, total_time, total_time_std, total_energy, total_energy_std)
max_time, max_time_std, max_energy, max_energy_std -- single values
normalize -- True/False
"""
plot_data = sorted(plot_data)
keys = [p for (p, tt, tts, te, tes) in plot_data]
total_times = [tt for (p, tt, tts, te, tes) in plot_data]
total_times_std = [tts for (p, tt, tts, te, tes) in plot_data]
total_energies = [te for (p, tt, tts, te, tes) in plot_data]
total_energies_std = [tes for (p, tt, tts, te, tes) in plot_data]
fig, ax1 = plt.subplots()
ind = np.arange(len(keys)) # the x locations for the groups
width = 0.35 # the width of the bars
# add some text for labels, title and axes ticks
ax1.set_title('Time/Energy Data for Configuration ' + config)
ax1.set_xticks(ind + width)
ax1.set_xticklabels(keys, rotation=45)
fig.set_tight_layout(True)
fig.set_size_inches(len(plot_data) / 1.5, 8)
ax2 = ax1.twinx()
# Normalize
if normalize:
total_times_std /= np.sum(total_times)
total_times /= np.sum(total_times)
total_energies_std /= np.sum(total_energies)
total_energies /= np.sum(total_energies)
ax1.set_ylabel('Time (Normalized)')
ax2.set_ylabel('Energy (Normalized)')
else:
# set time in us instead of ns
total_times_std /= np.array(1000000.0)
total_times /= np.array(1000000.0)
total_energies_std /= np.array(1000000.0)
total_energies /= np.array(1000000.0)
ax1.set_ylabel('Time (ms)')
ax2.set_ylabel('Energy (Joules)')
rects1 = ax1.bar(ind, total_times, width, color='r', yerr=total_times_std)
rects2 = ax2.bar(ind + width, total_energies, width, color='y', yerr=total_energies_std)
ax1.legend([rects1[0], rects2[0]], ['Time', 'Energy'])
# set axis
x1, x2, y1, y2 = plt.axis()
if normalize:
ax1.set_ylim(ymin=0, ymax=1)
ax2.set_ylim(ymin=0, ymax=1)
else:
ax1.set_ylim(ymin=0, ymax=((max_time + max_time_std) * 1.25 / 1000000.0))
ax2.set_ylim(ymin=0, ymax=((max_energy + max_energy_std) * 1.25 / 1000000.0))
autolabel(rects1, ax1)
autolabel(rects2, ax2)
# plt.show()
plt.savefig(path.join(output_dir, config + ".png"))
plt.close(fig)
def create_raw_total_data(config_data):
"""Get the raw data to plot for a configuration
Return: [(profiler, time_mean, time_stddev, energy_mean, energy_stddev)]
Keyword arguments:
config_data -- (trial, trial_data)
"""
# We can't assume that the same number of heartbeats are always issued across trials
# key: profiler name; value: list of timing sums for each trial
profiler_total_times = {}
# key: profiler name; value: list of energy sums for each trial
profiler_total_energies = {}
for (t, td) in config_data:
for (profiler, ts, te, es, ee) in td:
# sum the total times and energies for each profiler in this trial
total_time = np.sum(te - ts)
total_energy = np.sum(ee - es)
# add to list to be averaged later
time_list = profiler_total_times.get(profiler, [])
time_list.append(total_time)
profiler_total_times[profiler] = time_list
energy_list = profiler_total_energies.get(profiler, [])
energy_list.append(total_energy)
profiler_total_energies[profiler] = energy_list
# Get mean and stddev for time and energy totals
return [(profiler,
np.mean(profiler_total_times[profiler]),
np.std(profiler_total_times[profiler]),
np.mean(profiler_total_energies[profiler]),
np.std(profiler_total_energies[profiler]))
for profiler in profiler_total_times.keys()]
def plot_all_raw_totals(config_list, output_dir):
"""Plot column charts of the raw total time/energy spent in each profiler category.
Keyword arguments:
config_list -- [(config, result of process_config_dir(...))]
output_dir -- where to write plots to
"""
raw_total_norm_out_dir = path.join(output_dir, 'raw_totals_normalized')
os.makedirs(raw_total_norm_out_dir)
raw_total_out_dir = path.join(output_dir, 'raw_totals')
os.makedirs(raw_total_out_dir)
# (name, (profiler, (time_mean, time_stddev, energy_mean, energy_stddev)))
raw_totals_data = [(config, create_raw_total_data(config_data)) for (config, config_data) in config_list]
mean_times = []
mean_times_std = []
mean_energies = []
mean_energies_std = []
for profiler_tup in [config_tup[1] for config_tup in raw_totals_data]:
for (p, tt, tts, te, tes) in profiler_tup:
mean_times.append(tt)
mean_times_std.append(tts)
mean_energies.append(te)
mean_energies_std.append(tes)
# get consistent max time/energy values across plots
max_t = np.max(mean_times)
max_t_std = np.max(mean_times_std)
max_e = np.max(mean_energies)
max_e_std = np.max(mean_energies_std)
[plot_raw_totals(data[0], data[1], max_t, max_t_std, max_e, max_e_std, raw_total_norm_out_dir, True)
for data in raw_totals_data]
[plot_raw_totals(data[0], data[1], max_t, max_t_std, max_e, max_e_std, raw_total_out_dir, False)
for data in raw_totals_data]
def plot_trial_time_series(config, trial, trial_data, max_end_time, max_power, output_dir):
"""Plot time series for a single trial.
Keyword arguments:
config -- the config name
trial -- the trial name
trial_data -- [(profiler, [start times], [end times], [start energies], [end energies])]
max_end_time -- single value to use as max X axis value (for consistency across trials)
output_dir -- the output directory
"""
# TODO: Some profilers may have parallel tasks - need to identify this on plots
max_end_time = max_end_time / 1000000.0
trial_data = sorted(trial_data)
fig, ax1 = plt.subplots()
keys = [p for (p, ts, te, es, ee) in trial_data]
# add some text for labels, title and axes ticks
ax1.set_title('Profiler Activity for ' + config + ', ' + trial)
ax1.set_xlabel('Time (ms)')
ax1.grid(True)
width = 8 # the width of the bars
ax1.set_yticks(10 * np.arange(1, len(keys) + 2))
ax1.set_yticklabels(keys)
ax1.set_ylim(ymin=0, ymax=((len(trial_data) + 1) * 10))
ax1.set_xlim(xmin=0, xmax=max_end_time)
fig.set_tight_layout(True)
fig.set_size_inches(16, len(trial_data) / 3)
i = 10
for (p, ts, te, es, ee) in trial_data:
xranges = [(ts[j] / 1000000.0, (te[j] - ts[j]) / 1000000.0) for j in xrange(len(ts))]
ax1.broken_barh(xranges, (i - 0.5 * width, width))
i += 10
# place a vbar at the final time for this trial
last_profiler_times = map(np.nanmax, filter(lambda x: len(x) > 0, [te for (p, ts, te, es, ee) in trial_data]))
plt.axvline(np.max(last_profiler_times) / 1000000.0, color='black')
power_times = []
power_values = []
for (p, ts, te, es, ee) in trial_data:
if p == ENERGY_PROFILER_NAME:
power_times = te / 1000000.0
power_values = (ee - es) / ((te - ts) / 1000.0)
ax2 = ax1.twinx()
ax2.set_xlim(xmin=0, xmax=max_end_time)
ax2.set_ylim(ymin=0, ymax=max_power)
ax2.set_ylabel('Power (Watts)')
ax2.plot(power_times, power_values, color='r')
# plt.show()
plt.savefig(path.join(output_dir, "ts_" + config + "_" + trial + ".png"))
plt.close(fig)
def hb_energy_times_to_power(es, ee, ts, te):
"""Compute power from start and end energy and times.
Return: power values
"""
return (ee - es) / ((te - ts) / 1000.0)
def plot_all_time_series(config_list, output_dir):
"""Plot column charts of the raw total time/energy spent in each profiler category.
Keyword arguments:
config_list -- [(config, result of process_config_dir(...))]
output_dir -- where to write plots to
"""
time_series_out_dir = path.join(output_dir, 'time_series')
os.makedirs(time_series_out_dir)
max_end_times = []
max_power_values = []
for (c, cd) in config_list:
for (t, td) in cd:
trial_max_end_times = map(np.nanmax, filter(lambda x: len(x) > 0, [te for (p, ts, te, es, ee) in td]))
max_end_times.append(np.nanmax(trial_max_end_times))
for (p, ts, te, es, ee) in td:
# We only care about the energy profiler (others aren't reliable for instant power anyway)
if p == ENERGY_PROFILER_NAME and len(te) > 0:
max_power_values.append(np.nanmax(hb_energy_times_to_power(es, ee, ts, te)))
max_time = np.nanmax(max_end_times)
max_power = np.nanmax(np.array(max_power_values)) * 1.2 # leave a little space at the top
for (config, config_data) in config_list:
[plot_trial_time_series(config, trial, trial_data, max_time, max_power, time_series_out_dir)
for (trial, trial_data) in config_data]
def read_heartbeat_log(profiler_hb_log):
"""Read a heartbeat log file.
Return: (profiler name, [start times], [end times], [start energies], [end energies], [instant powers])
Keyword arguments:
profiler_hb_log -- the file to read
"""
with warnings.catch_warnings():
try:
warnings.simplefilter("ignore")
time_start, time_end, energy_start, energy_end = \
np.loadtxt(profiler_hb_log,
dtype=np.dtype('uint64'),
skiprows=1,
usecols=(HB_LOG_IDX_START_TIME,
HB_LOG_IDX_END_TIME,
HB_LOG_IDX_START_ENERGY,
HB_LOG_IDX_END_ENERGY),
unpack=True,
ndmin=1)
except ValueError:
time_start, time_end, energy_start, energy_end = [], [], [], []
name = path.split(profiler_hb_log)[1].split('-')[1].split('.')[0]
return (name,
np.atleast_1d(time_start),
np.atleast_1d(time_end),
np.atleast_1d(energy_start),
np.atleast_1d(energy_end))
def process_trial_dir(trial_dir):
"""Process trial directory.
Return: [(profiler name, [start times], [end times], [start energies], [end energies])]
Time and energy are normalized to 0 start values.
Keyword arguments:
trial_dir -- the directory for this trial
"""
log_data = map(lambda h: read_heartbeat_log(path.join(trial_dir, h)),
filter(lambda f: f.endswith(".log"), os.listdir(trial_dir)))
# Find the earliest timestamps and energy readings
min_t = np.nanmin(map(np.nanmin, filter(lambda x: len(x) > 0, [ts for (profiler, ts, te, es, ee) in log_data])))
min_e = np.nanmin(map(np.nanmin, filter(lambda x: len(x) > 0, [es for (profiler, ts, te, es, ee) in log_data])))
# Normalize timing/energy data to start values of 0
return [(profiler, ts - min_t, te - min_t, es - min_e, ee - min_e) for (profiler, ts, te, es, ee) in log_data]
def process_config_dir(config_dir):
"""Process a configuration directory.
Return: [(trial, [(profiler name, [start times], [end times], [start energies], [end energies])])]
Keyword arguments:
config_dir -- the directory for this configuration - contains subdirectories for each trial
"""
return [(trial_dir, process_trial_dir(path.join(config_dir, trial_dir))) for trial_dir in os.listdir(config_dir)]
def process_logs(log_dir):
"""Process log directory.
Return: [(config, [(trial, [(profiler name, [start times], [end times], [start energies], [end energies])])])]
Keyword arguments:
log_dir -- the log directory to process - contains subdirectories for each configuration
"""
return [((config_dir.split('_')[1], process_config_dir(path.join(log_dir, config_dir))))
for config_dir in os.listdir(log_dir)]
def find_best_executions(log_dir):
"""Get the best time, energy, and power from the characterization summaries.
Return: ((config, trial, min_time), (config, trial, min_energy), (config, trial, min_power))
Keyword arguments:
results -- the results from process_logs(...).
"""
DEFAULT = ('', '', 1000000000.0)
min_time = DEFAULT
min_energy = DEFAULT
min_power = DEFAULT
for config_dir in os.listdir(log_dir):
for trial_dir in os.listdir(path.join(log_dir, config_dir)):
with open(path.join(log_dir, config_dir, trial_dir, SUMMARY_OUTPUT), "r") as s:
lines = s.readlines()
time = float(lines[SUMMARY_TIME_IDX].split(':')[1])
energy = int(lines[SUMMARY_ENERGY_IDX].split(':')[1])
power = float(lines[SUMMARY_POWER_IDX].split(':')[1])
if time < min_time[2]:
min_time = (config_dir, trial_dir, time)
if energy < min_energy[2]:
min_energy = (config_dir, trial_dir, energy)
if power < min_power:
min_power = (config_dir, trial_dir, power)
return (min_time, min_energy, min_power)
def main():
"""This script processes the log files from the "characterize.py" script and produces visualizations.
"""
# Default log directory
directory = 'heartbeat_logs'
# Default output directory
output_dir = 'plots'
# Default android
android = False
# Parsing the input of the script
parser = argparse.ArgumentParser(description="Process Heartbeat log files from characterization")
parser.add_argument("-d", "--directory",
default=directory,
help="Heartbeat log directory \"-d heartbeat_logs\"")
parser.add_argument("-o", "--output",
default=output_dir,
help="Specify the log output directory, for example \"-o plots\"")
parser.add_argument("--android",
action="store_true",
dest="android",
default=False,
help="Specify if processing results from Android")
args = parser.parse_args()
if args.directory:
directory = args.directory
if args.output:
output_dir = args.output
if args.android:
android = args.android
if not os.path.exists(directory):
print "Input directory does not exist: " + directory
sys.exit(1)
if os.path.exists(output_dir):
print "Output directory already exists: " + output_dir
sys.exit(1)
res = process_logs(directory)
if not android:
best = find_best_executions(directory)
print 'Best time:', best[0]
print 'Best energy:', best[1]
print 'Best power:', best[2]
os.makedirs(output_dir)
plot_all_raw_totals(res, output_dir)
plot_all_time_series(res, output_dir)
if __name__ == "__main__":
main()