зеркало из https://github.com/microsoft/torchy.git
322 строки
8.1 KiB
C++
322 строки
8.1 KiB
C++
// Copyright (c) 2021-present The Torchy Authors.
|
|
// Distributed under the MIT license that can be found in the LICENSE file.
|
|
|
|
#include "config.h"
|
|
|
|
#ifdef TORCHY_ENABLE_STATS
|
|
#include "stopwatch.h"
|
|
#include "trace.h"
|
|
#include <algorithm>
|
|
#include <array>
|
|
#include <cstdlib>
|
|
#include <cstring>
|
|
#include <fstream>
|
|
#include <iostream>
|
|
#include <sstream>
|
|
#include <string>
|
|
#include <unordered_map>
|
|
#include <vector>
|
|
|
|
using namespace std;
|
|
|
|
#define NUM_ELEMS(a) (sizeof(a) / sizeof(*a))
|
|
|
|
namespace {
|
|
|
|
const char* flush_reasons[] = {
|
|
"autograd",
|
|
"debug",
|
|
"dim",
|
|
"has_storage",
|
|
"inplace shared",
|
|
"is_contiguous",
|
|
"numel",
|
|
"overflow shared list",
|
|
"set_size",
|
|
"set_storage_offset",
|
|
"set_stride",
|
|
"shallow_copy_from",
|
|
"size",
|
|
"sizes",
|
|
"storage",
|
|
"storage_offset",
|
|
"stride",
|
|
"strides",
|
|
"trace max length",
|
|
};
|
|
|
|
static_assert(NUM_ELEMS(flush_reasons) == (unsigned)FlushReason::NUM_REASONS);
|
|
|
|
float median(vector<float> &v) {
|
|
sort(v.begin(), v.end());
|
|
auto sz = v.size();
|
|
if (sz % 2 == 0)
|
|
return (v[sz/2 - 1] + v[sz/2]) / 2.0;
|
|
return v[sz / 2];
|
|
}
|
|
|
|
void inc(vector<unsigned> &v, size_t idx, unsigned amount = 1) {
|
|
if (idx >= v.size())
|
|
v.resize(idx+1);
|
|
v[idx] += amount;
|
|
}
|
|
|
|
string shrink_trace(const string &t) {
|
|
istringstream ss(t);
|
|
string out;
|
|
unsigned lines = 0;
|
|
|
|
for (string line; getline(ss, line); ) {
|
|
if (line.find("[dead]") != string::npos)
|
|
continue;
|
|
|
|
if (++lines == 20) {
|
|
out += "...\\l";
|
|
break;
|
|
}
|
|
|
|
auto pos = line.find(" #refs=");
|
|
if (pos != string::npos)
|
|
line.resize(pos);
|
|
|
|
pos = line.find(" #output");
|
|
if (pos != string::npos)
|
|
line.resize(pos);
|
|
|
|
out += line.substr(0, 50);
|
|
out += "\\l";
|
|
}
|
|
return out;
|
|
}
|
|
|
|
array<unsigned, (unsigned)FlushReason::NUM_REASONS> flush_reasons_count;
|
|
array<unsigned, MAX_TRACE_LENGTH+1> trace_size;
|
|
array<unsigned, MAX_TRACE_LENGTH+1> num_trace_outputs;
|
|
array<unsigned, MAX_TRACE_LENGTH+1> num_trace_deads;
|
|
vector<pair<float, string>> trace_compile_time;
|
|
unordered_map<string, vector<float>> trace_run_time;
|
|
unordered_map<string, unordered_map<string, unsigned>> trace_successors;
|
|
string first_trace, current_trace, last_trace;
|
|
unsigned unsupported_wrappers = 0;
|
|
unsigned torchscript_failures = 0;
|
|
|
|
struct PrintStats {
|
|
~PrintStats() {
|
|
cerr << "\n\n------------ STATISTICS ------------\n\n";
|
|
print_table("Trace Flush Reason", flush_reasons_count.data(), flush_reasons,
|
|
flush_reasons_count.size());
|
|
print_table("Trace Size", trace_size.data(), trace_size.size());
|
|
print_table("Number of Outputs per Trace", num_trace_outputs.data(),
|
|
num_trace_outputs.size());
|
|
print_table("Number of Dead Ops per Trace", num_trace_deads.data(),
|
|
num_trace_deads.size());
|
|
|
|
vector<unsigned> successor_frequency;
|
|
for (const auto &p : trace_successors) {
|
|
inc(successor_frequency, p.second.size());
|
|
}
|
|
print_table("Number of Successors per Trace", successor_frequency.data(),
|
|
successor_frequency.size());
|
|
|
|
vector<unsigned> trace_freq_stats;
|
|
unsigned total = 0;
|
|
for (const auto &p : trace_run_time) {
|
|
inc(trace_freq_stats, p.second.size());
|
|
total += p.second.size();
|
|
}
|
|
print_table("Frequency per Trace", trace_freq_stats.data(),
|
|
trace_freq_stats.size());
|
|
|
|
vector<unsigned> trace_times;
|
|
for (auto &p : trace_run_time) {
|
|
inc(trace_times, unsigned(median(p.second) * 1000000.0), p.second.size());
|
|
}
|
|
print_table("Run-times per Trace (microseconds)", trace_times.data(),
|
|
trace_times.size());
|
|
|
|
print_header("Most Frequent Traces");
|
|
{
|
|
vector<pair<unsigned,string>> traces;
|
|
for (auto &p : trace_run_time) {
|
|
traces.emplace_back(p.second.size(), p.first);
|
|
}
|
|
sort(traces.begin(), traces.end());
|
|
|
|
auto I = traces.rbegin(), E = traces.rend();
|
|
for (unsigned i = 0; i < 20 && I != E; ++i, ++I) {
|
|
auto med = median(trace_run_time.at(I->second));
|
|
cerr << "Trace executed " << I->first << " times ("
|
|
<< unsigned(med * 1000000.0) << " us)\n"
|
|
<< I->second << "\n\n";
|
|
}
|
|
}
|
|
|
|
print_header("Slowest Trace Compilation");
|
|
sort(trace_compile_time.begin(), trace_compile_time.end());
|
|
{
|
|
auto I = trace_compile_time.rbegin(), E = trace_compile_time.rend();
|
|
for (unsigned i = 0; i < 20 && I != E; ++i, ++I) {
|
|
cerr << "Trace compiled in "
|
|
<< unsigned(I->first * 1000000.0) << " us\n"
|
|
<< I->second << "\n\n";
|
|
}
|
|
}
|
|
|
|
cerr << "Number of Torchscript failures:\t" << torchscript_failures
|
|
<< "\nNumber of unsupported ops:\t" << unsupported_wrappers
|
|
<< "\nNumber of traces:\t" << total
|
|
<< "\nDistinct traces:\t" << trace_compile_time.size() << '\n';
|
|
|
|
cerr << endl;
|
|
|
|
if (auto p = getenv("TORCHY_PRINT_DOT"))
|
|
if (*p)
|
|
print_dot();
|
|
}
|
|
|
|
private:
|
|
void print_dot() {
|
|
{
|
|
ofstream f("trace.dot");
|
|
print_dot(f, false);
|
|
}
|
|
{
|
|
ofstream f("trace_detailed.dot");
|
|
print_dot(f, true);
|
|
}
|
|
}
|
|
|
|
void print_dot(ofstream &os, bool label_trace) {
|
|
unordered_map<string, unsigned> trace_map;
|
|
trace_map[first_trace] = 0;
|
|
|
|
auto get_id = [&](const string &t) -> unsigned {
|
|
auto sz = trace_map.size();
|
|
return trace_map.emplace(t, sz).first->second;
|
|
};
|
|
|
|
os << "digraph {\n"
|
|
"n0 [label=Start]\n"
|
|
"n0 -> T0\n";
|
|
|
|
for (const auto &p : trace_successors) {
|
|
unsigned src = get_id(p.first);
|
|
|
|
if (label_trace)
|
|
os << 'T' << src << " [label=\"" << shrink_trace(p.first)
|
|
<< "\", shape=box]\n";
|
|
|
|
for (const auto &p : p.second) {
|
|
os << 'T' << src << " -> T" << get_id(p.first) << '\n';
|
|
}
|
|
}
|
|
os << "}\n";
|
|
}
|
|
|
|
void print_table(const char *header, unsigned *data, const char **labels,
|
|
size_t size) {
|
|
print_header(header);
|
|
|
|
size_t max_label = 0;
|
|
for (unsigned i = 0; i < size; ++i) {
|
|
if (data[i] != 0)
|
|
max_label = max(max_label, strlen(labels[i]));
|
|
}
|
|
|
|
for (unsigned i = 0; i < size; ++i) {
|
|
if (data[i] == 0)
|
|
continue;
|
|
cerr << labels[i] << ": ";
|
|
pad(labels[i], max_label);
|
|
cerr << data[i] << '\n';
|
|
}
|
|
cerr << '\n';
|
|
}
|
|
|
|
void print_table(const char *header, unsigned *data, size_t size) {
|
|
print_header(header);
|
|
|
|
size_t max_label = to_string(size-1).size();
|
|
|
|
for (unsigned i = 0; i < size; ++i) {
|
|
if (data[i] == 0)
|
|
continue;
|
|
string label = to_string(i);
|
|
cerr << label << ": ";
|
|
pad(label.c_str(), max_label);
|
|
cerr << data[i] << '\n';
|
|
}
|
|
cerr << '\n';
|
|
}
|
|
|
|
void print_header(const char *header) {
|
|
cerr << header << '\n';
|
|
repeat('=', strlen(header));
|
|
cerr << '\n';
|
|
}
|
|
|
|
void pad(const char *str, size_t length) {
|
|
repeat(' ', length - strlen(str));
|
|
}
|
|
|
|
void repeat(char ch, size_t length) {
|
|
for (size_t i = 0; i < length; ++i) {
|
|
cerr << ch;
|
|
}
|
|
}
|
|
};
|
|
|
|
PrintStats printer;
|
|
|
|
}
|
|
|
|
void stats_register_trace(const Trace &t, FlushReason reason) {
|
|
++flush_reasons_count[(unsigned)reason];
|
|
|
|
unsigned num_ops = t.numOps();
|
|
++trace_size[num_ops];
|
|
|
|
unsigned num_outputs = 0;
|
|
unsigned num_deads = 0;
|
|
|
|
auto *ops = t.getOps();
|
|
for (unsigned i = 0; i < num_ops; ++i) {
|
|
auto &op = ops[i];
|
|
num_outputs += op.observable;
|
|
num_deads += op.dead;
|
|
}
|
|
++num_trace_outputs[num_outputs];
|
|
++num_trace_deads[num_deads];
|
|
|
|
// We need to get the trace's string representation before it is executed.
|
|
// Tensor's trace_idx gets overwritten during materialization.
|
|
stringstream trace_ss;
|
|
trace_ss << t;
|
|
current_trace = move(trace_ss).str();
|
|
|
|
if (first_trace.empty())
|
|
first_trace = current_trace;
|
|
}
|
|
|
|
void stats_register_compile_time(const StopWatch &run_time) {
|
|
trace_compile_time.emplace_back(run_time.seconds(), current_trace);
|
|
}
|
|
|
|
void stats_register_trace_time(const StopWatch &run_time) {
|
|
trace_run_time[current_trace].emplace_back(run_time.seconds());
|
|
|
|
if (!last_trace.empty())
|
|
++trace_successors[last_trace][current_trace];
|
|
last_trace = move(current_trace);
|
|
}
|
|
|
|
void stats_inc_unsupported_wrapper() {
|
|
++unsupported_wrappers;
|
|
}
|
|
|
|
void stats_inc_torchscript_fail() {
|
|
++torchscript_failures;
|
|
}
|
|
|
|
#endif
|