YJIT: Let yjit_perf.py support perf with Python disabled (#10246)

* YJIT: Let yjit_perf.py support perf with Python disabled

* Update yjit.md about perf

* Recommend the extra interface by default
This commit is contained in:
Takashi Kokubun 2024-03-14 10:08:23 -07:00 коммит произвёл GitHub
Родитель 09d8c99cdc
Коммит b0be2961f7
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
2 изменённых файлов: 47 добавлений и 13 удалений

Просмотреть файл

@ -480,13 +480,8 @@ perf script --fields +pid > /tmp/test.perf
You can also profile the number of cycles consumed by code generated by each YJIT function. You can also profile the number of cycles consumed by code generated by each YJIT function.
```bash ```bash
# Build perf from source for Python support # Install perf
# [Optional] libelf-dev libunwind-dev libaudit-dev libslang2-dev libdw-dev apt-get install linux-tools-common linux-tools-generic linux-tools-`uname -r`
sudo apt-get install libpython3-dev python3-pip flex libtraceevent-dev
git clone https://github.com/torvalds/linux
cd linux/tools/perf
make
make install
# [Optional] Allow running perf without sudo # [Optional] Allow running perf without sudo
echo 0 | sudo tee /proc/sys/kernel/kptr_restrict echo 0 | sudo tee /proc/sys/kernel/kptr_restrict
@ -496,6 +491,25 @@ echo -1 | sudo tee /proc/sys/kernel/perf_event_paranoid
cd ../yjit-bench cd ../yjit-bench
PERF=record ruby --yjit-perf=codegen -Iharness-perf benchmarks/lobsters/benchmark.rb PERF=record ruby --yjit-perf=codegen -Iharness-perf benchmarks/lobsters/benchmark.rb
# Aggregate results
perf script > /tmp/perf.txt
../ruby/misc/yjit_perf.py /tmp/perf.txt
```
#### Building perf with Python support
The above instructions work fine for most people, but you could also use
a handy `perf script -s` interface if you build perf from source.
```bash
# Build perf from source for Python support
sudo apt-get install libpython3-dev python3-pip flex libtraceevent-dev \
libelf-dev libunwind-dev libaudit-dev libslang2-dev libdw-dev
git clone --depth=1 https://github.com/torvalds/linux
cd linux/tools/perf
make
make install
# Aggregate results # Aggregate results
perf script -s ../ruby/misc/yjit_perf.py perf script -s ../ruby/misc/yjit_perf.py
``` ```

32
misc/yjit_perf.py Normal file → Executable file
Просмотреть файл

@ -1,12 +1,9 @@
#!/usr/bin/env python3
import os import os
import sys import sys
from collections import Counter, defaultdict from collections import Counter, defaultdict
import os.path import os.path
sys.path.append(os.environ['PERF_EXEC_PATH'] + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
from perf_trace_context import *
from EventClass import *
# Aggregating cycles per symbol and dso # Aggregating cycles per symbol and dso
total_cycles = 0 total_cycles = 0
category_cycles = Counter() category_cycles = Counter()
@ -57,11 +54,10 @@ def categorize_symbol(dso, symbol):
def process_event(event): def process_event(event):
global total_cycles, category_cycles, detailed_category_cycles, categories global total_cycles, category_cycles, detailed_category_cycles, categories
sample = event["sample"]
full_dso = event.get("dso", "Unknown_dso") full_dso = event.get("dso", "Unknown_dso")
dso = os.path.basename(full_dso) dso = os.path.basename(full_dso)
symbol = event.get("symbol", "[unknown]") symbol = event.get("symbol", "[unknown]")
cycles = sample["period"] cycles = event["sample"]["period"]
total_cycles += cycles total_cycles += cycles
category = categorize_symbol(dso, symbol) category = categorize_symbol(dso, symbol)
@ -94,3 +90,27 @@ def trace_end():
for (dso, symbol), cycles in symbols.most_common(): for (dso, symbol), cycles in symbols.most_common():
symbol_ratio = (cycles / category_total) * 100 symbol_ratio = (cycles / category_total) * 100
print("{:<20} {:<50} {:>20.2f}% {:>15}".format(dso, truncate_symbol(symbol), symbol_ratio, cycles)) print("{:<20} {:<50} {:>20.2f}% {:>15}".format(dso, truncate_symbol(symbol), symbol_ratio, cycles))
# There are two ways to use this script:
# 1) perf script -s misc/yjit_perf.py -- native interface
# 2) perf script > perf.txt && misc/yjit_perf.py perf.txt -- hack, which doesn't require perf with Python support
#
# In both cases, __name__ is "__main__". The following code implements (2) when sys.argv is 2.
if __name__ == "__main__" and len(sys.argv) == 2:
if len(sys.argv) != 2:
print("Usage: yjit_perf.py <filename>")
sys.exit(1)
with open(sys.argv[1], "r") as file:
for line in file:
# [Example]
# ruby 78207 3482.848465: 1212775 cpu_core/cycles:P/: 5c0333f682e1 [JIT] getlocal_WC_0+0x0 (/tmp/perf-78207.map)
row = line.split(maxsplit=6)
period = row[3] # "1212775"
symbol, dso = row[6].split(" (") # "[JIT] getlocal_WC_0+0x0", "/tmp/perf-78207.map)\n"
symbol = symbol.split("+")[0] # "[JIT] getlocal_WC_0"
dso = dso.split(")")[0] # "/tmp/perf-78207.map"
process_event({"dso": dso, "symbol": symbol, "sample": {"period": int(period)}})
trace_end()