diff --git a/doc/yjit/yjit.md b/doc/yjit/yjit.md index 8aab1aed22..4508bce25f 100644 --- a/doc/yjit/yjit.md +++ b/doc/yjit/yjit.md @@ -480,13 +480,8 @@ perf script --fields +pid > /tmp/test.perf You can also profile the number of cycles consumed by code generated by each YJIT function. ```bash -# Build perf from source for Python support -# [Optional] libelf-dev libunwind-dev libaudit-dev libslang2-dev libdw-dev -sudo apt-get install libpython3-dev python3-pip flex libtraceevent-dev -git clone https://github.com/torvalds/linux -cd linux/tools/perf -make -make install +# Install perf +apt-get install linux-tools-common linux-tools-generic linux-tools-`uname -r` # [Optional] Allow running perf without sudo echo 0 | sudo tee /proc/sys/kernel/kptr_restrict @@ -496,6 +491,25 @@ echo -1 | sudo tee /proc/sys/kernel/perf_event_paranoid cd ../yjit-bench PERF=record ruby --yjit-perf=codegen -Iharness-perf benchmarks/lobsters/benchmark.rb +# Aggregate results +perf script > /tmp/perf.txt +../ruby/misc/yjit_perf.py /tmp/perf.txt +``` + +#### Building perf with Python support + +The above instructions work fine for most people, but you could also use +a handy `perf script -s` interface if you build perf from source. + +```bash +# Build perf from source for Python support +sudo apt-get install libpython3-dev python3-pip flex libtraceevent-dev \ + libelf-dev libunwind-dev libaudit-dev libslang2-dev libdw-dev +git clone --depth=1 https://github.com/torvalds/linux +cd linux/tools/perf +make +make install + # Aggregate results perf script -s ../ruby/misc/yjit_perf.py ``` diff --git a/misc/yjit_perf.py b/misc/yjit_perf.py old mode 100644 new mode 100755 index 44c232254e..61434e5eb4 --- a/misc/yjit_perf.py +++ b/misc/yjit_perf.py @@ -1,12 +1,9 @@ +#!/usr/bin/env python3 import os import sys from collections import Counter, defaultdict import os.path -sys.path.append(os.environ['PERF_EXEC_PATH'] + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') -from perf_trace_context import * -from EventClass import * - # Aggregating cycles per symbol and dso total_cycles = 0 category_cycles = Counter() @@ -57,11 +54,10 @@ def categorize_symbol(dso, symbol): def process_event(event): global total_cycles, category_cycles, detailed_category_cycles, categories - sample = event["sample"] full_dso = event.get("dso", "Unknown_dso") dso = os.path.basename(full_dso) symbol = event.get("symbol", "[unknown]") - cycles = sample["period"] + cycles = event["sample"]["period"] total_cycles += cycles category = categorize_symbol(dso, symbol) @@ -94,3 +90,27 @@ def trace_end(): for (dso, symbol), cycles in symbols.most_common(): symbol_ratio = (cycles / category_total) * 100 print("{:<20} {:<50} {:>20.2f}% {:>15}".format(dso, truncate_symbol(symbol), symbol_ratio, cycles)) + +# There are two ways to use this script: +# 1) perf script -s misc/yjit_perf.py -- native interface +# 2) perf script > perf.txt && misc/yjit_perf.py perf.txt -- hack, which doesn't require perf with Python support +# +# In both cases, __name__ is "__main__". The following code implements (2) when sys.argv is 2. +if __name__ == "__main__" and len(sys.argv) == 2: + if len(sys.argv) != 2: + print("Usage: yjit_perf.py ") + sys.exit(1) + + with open(sys.argv[1], "r") as file: + for line in file: + # [Example] + # ruby 78207 3482.848465: 1212775 cpu_core/cycles:P/: 5c0333f682e1 [JIT] getlocal_WC_0+0x0 (/tmp/perf-78207.map) + row = line.split(maxsplit=6) + + period = row[3] # "1212775" + symbol, dso = row[6].split(" (") # "[JIT] getlocal_WC_0+0x0", "/tmp/perf-78207.map)\n" + symbol = symbol.split("+")[0] # "[JIT] getlocal_WC_0" + dso = dso.split(")")[0] # "/tmp/perf-78207.map" + + process_event({"dso": dso, "symbol": symbol, "sample": {"period": int(period)}}) + trace_end()