DENG-965 - symbolicate and signaturize crash pings (#4642)

This commit is contained in:
Daniel Thorn 2023-12-12 08:57:52 -08:00 коммит произвёл GitHub
Родитель 18c717325e
Коммит b0bfc65052
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
18 изменённых файлов: 870 добавлений и 9 удалений

Просмотреть файл

@ -40,7 +40,7 @@ jobs:
name: Build
command: |
python3 -m venv venv/
venv/bin/pip install pip-tools --constraint requirements.in
venv/bin/pip install --no-deps -r requirements.txt
venv/bin/pip-sync --pip-args=--no-deps
- run:
name: Yamllint Test
@ -84,15 +84,13 @@ jobs:
docker: *docker
steps:
- checkout
- *restore_venv_cache
- *build
- run:
name: Verify that requirements.txt contains the right dependencies for
this python version
# use `--constraint` with `requirements.in` not `requirements.txt`
# because for pip>=20.3 "Constraints are only allowed to take the form
# of a package name and a version specifier"
command: |
pip install pip-tools --constraint requirements.in
pip-compile --allow-unsafe --generate-hashes --quiet
venv/bin/pip-compile --allow-unsafe --generate-hashes --quiet
git diff --exit-code -G '^ *[^# ]' -- requirements.txt
test-sql:
docker: *docker

Просмотреть файл

@ -7,7 +7,7 @@ import re
from enum import Enum
from fnmatch import fnmatchcase
from pathlib import Path
from typing import List, Optional, Tuple
from typing import Dict, List, Optional, Tuple
import attr
import cattrs
@ -242,6 +242,9 @@ class Task:
gke_cluster_name: Optional[str] = attr.ib(None)
query_project: Optional[str] = attr.ib(None)
task_group: Optional[str] = attr.ib(None)
container_resources: Optional[Dict[str, str]] = attr.ib(None)
node_selector: Optional[Dict[str, str]] = attr.ib(None)
startup_timeout_seconds: Optional[int] = attr.ib(None)
@property
def task_key(self):

Просмотреть файл

@ -108,6 +108,15 @@ with DAG('{{ name }}', default_args=default_args{%+ if schedule_interval != None
{%+ elif task.date_partition_parameter -%}
parameters=["{{ task.date_partition_parameter }}:DATE:{% raw %}{{ds}}{% endraw %}"],
{%+ endif -%}
{%+ if task.container_resources -%}
container_resources={{ task.container_resources }},
{%+ endif -%}
{%+ if task.node_selector -%}
node_selector={{ task.node_selector }},
{%+ endif -%}
{%+ if task.startup_timeout_seconds -%}
startup_timeout_seconds={{ task.startup_timeout_seconds }},
{%+ endif -%}
{%+ else -%}
{{ task.task_name }} = bigquery_etl_query(
{% if name == "bqetl_default" -%}

Просмотреть файл

@ -1343,3 +1343,19 @@ bqetl_glam_export:
tags:
- impact/tier_2
description: DAG to prepare GLAM data for public export.
bqetl_crash:
schedule_interval: 0 2 * * *
default_args:
depends_on_past: false
email:
- dthorn@mozilla.com
- telemetry-alerts@mozilla.com
email_on_failure: true
email_on_retry: false
owner: dthorn@mozilla.com
retries: 2
retry_delay: 30m
start_date: "2023-12-10"
tags:
- impact/tier_2

Просмотреть файл

@ -9,6 +9,7 @@ gcsfs==2023.10.0
gcloud==0.18.3
gitpython==3.1.40
google-cloud-bigquery==3.11.4
google-cloud-bigquery-storage[fastavro]==2.23.0
google-cloud-storage==2.10.0
Jinja2==3.1.2
jsonschema==4.19.2
@ -36,6 +37,8 @@ smart_open==6.4.0
sqlglot==19.6.0
sqlparse==0.4.4
stripe==6.4.0
symbolic==12.4.1
siggen==2.0.20231009
tomli==2.0.1 # for backwards compatibility with python < 3.11
types-python-dateutil==2.8.19.14
types-pytz==2023.3.0.1

Просмотреть файл

@ -108,6 +108,7 @@ attrs==23.1.0 \
# -r requirements.in
# aiohttp
# cattrs
# glom
# jsonschema
# mozilla-metric-config-parser
# pytest-mypy
@ -142,6 +143,12 @@ black==23.10.1 \
# via
# -r requirements.in
# pytest-black
boltons==23.1.1 \
--hash=sha256:80a8cd930ff21fbf03545b9863e5799d0c3e7e0e3b2546bdaf2efccd7b3708cc \
--hash=sha256:d2cb2fa83cf2ebe791be1e284183e8a43a1031355156a968f8e0a333ad2448fc
# via
# face
# glom
bracex==2.3.post1 \
--hash=sha256:351b7f20d56fb9ea91f9b9e9e7664db466eb234188c175fd943f8f755c807e73 \
--hash=sha256:e7b23fc8b2cd06d3dec0692baabecb249dda94e06a617901ff03a6c56fd71693
@ -229,7 +236,9 @@ cffi==1.15.1 \
--hash=sha256:ed9cb427ba5504c1dc15ede7d516b84757c3e3d7868ccc85121d9310d27eed0b \
--hash=sha256:fa6693661a4c91757f4412306191b6dc88c1703f780c8234035eac011922bc01 \
--hash=sha256:fcd131dd944808b5bdb38e6f5b53013c5aa4f334c5cad0c72742f6eba4b73db0
# via cryptography
# via
# cryptography
# milksnake
cfgv==3.3.1 \
--hash=sha256:c6a0883f3917a037485059700b9e75da2464e6c27051014ad85ba6aaa5884426 \
--hash=sha256:f5a830efb9ce7a445376bb66ec94c638a9787422f96264c98edc6bdeed8ab736
@ -304,10 +313,48 @@ execnet==1.9.0 \
--hash=sha256:8f694f3ba9cc92cab508b152dcfe322153975c29bda272e2fd7f3f00f36e47c5 \
--hash=sha256:a295f7cc774947aac58dde7fdc85f4aa00c42adf5d8f5468fc630c1acf30a142
# via pytest-xdist
face==20.1.1 \
--hash=sha256:3790311a7329e4b0d90baee346eecad54b337629576edf3a246683a5f0d24446 \
--hash=sha256:7d59ca5ba341316e58cf72c6aff85cca2541cf5056c4af45cb63af9a814bed3e \
--hash=sha256:ca3a1d8b8b6aa8e61d62a300e9ee24e09c062aceda549e9a640128e4fa0f4559
# via glom
faker==19.6.1 \
--hash=sha256:5d6b7880b3bea708075ddf91938424453f07053a59f8fa0453c1870df6ff3292 \
--hash=sha256:64c8513c53c3a809075ee527b323a0ba61517814123f3137e4912f5d43350139
# via polyfactory
fastavro==1.9.0 \
--hash=sha256:00361ea6d5a46813f3758511153fed9698308cae175500ff62562893d3570156 \
--hash=sha256:00826f295f290ba95f1f68d5c36970b4db7f9245a1b1a33dd9d464a382733894 \
--hash=sha256:07dee19dcc2797a8cb1b410d9e65febb55af2a18d9a7b85465b039d4276b9a29 \
--hash=sha256:0c046ed9759d1100df59dc18452901253cff5a37d9e8e8701d0102116c3202cb \
--hash=sha256:0f044b71d8b0ba6bbd6166be6836c3caeadd26eeaabee70b6ac7c6a9b884f6bf \
--hash=sha256:172d6d5c186ba51ec6eaa98eaaadc8e859b5a56862ae724413424a858619da7f \
--hash=sha256:1cea6c2508dfb06d65cddb5b90bd6a79d3e481f1d80adc5f6ce6e3dacb4a8773 \
--hash=sha256:215f40921d3f1f229cea89af25533e7be3fde16dd85c55436c15fb1ad067b486 \
--hash=sha256:228e7c525ff15a9f21f1adb2097ec87888933ef5c8a682c2f1d5d83796e4dd42 \
--hash=sha256:35a32f5d33f91fcb7e8daf7afc82a75c8d7c774cf4d93937b2ad487d28f3f707 \
--hash=sha256:3d4a71d39760de455dbe0b2121ea1bbd85fc851e8bab2970d9e9d6d8825277d2 \
--hash=sha256:3ff7ac97cfe07ad90fdcca3ea90b14461ba8831bc45f02e13440b6c634f291c8 \
--hash=sha256:44fc998387271d57d0e3b29c30049ba903d2aead9471b12c20725284d60dd57e \
--hash=sha256:48d9214982c0c0f29e583df11781dc6884e8f3f3336b97991c6e7587f509a02b \
--hash=sha256:52e7df50431c21543682afd0ca95c40569c49e4c4599dcb78343f7c24fda6145 \
--hash=sha256:602492ea0c458020cd19138ff2b9e97aa187ae01c290183dd9bbb7ff2d2e83c4 \
--hash=sha256:6cebcc09c932931e3084c96fe2c666c9cfc8c4043520651fbfeb58575edeb7da \
--hash=sha256:718e5df505029269e7a80afdd7e5f196d24f1473ad47eea41061ce630609f80e \
--hash=sha256:71aad82b17442dc41223f8351b9f28a60dd877a8e5a7525eaf6342f45f6d23e1 \
--hash=sha256:83402b450f718b690ebd88f1df2ea70609f1192bed1498308d29ac737e992391 \
--hash=sha256:8629d4367373db7d195672834c59c86e2642172bbebd5ec6d83797b39ac4ef01 \
--hash=sha256:8c251e7122b436458b8e1151c0613d6dac2b5edb6acbbc35de3b4c5f6ebb80b7 \
--hash=sha256:a0d2570052b4e2d7b46bec4cd74c8b12d8e21cd151f5bfc837da990cb62385c5 \
--hash=sha256:b3704847d79377a5b4252ccf6d3a391497cdb8f57017cde2613f92f5274d6261 \
--hash=sha256:c5af71895a01618c98ae7c563ee75b18f721d8a66324d66613bd2fcd8b2f8ac9 \
--hash=sha256:cc3b2de071e4d6de19974ffd328e63f7c85de2348d614222238fda2b35578b63 \
--hash=sha256:d694bb1c2b20f1703bcb698a74f58f0f503eda8f49cb6d46209c8f3715098348 \
--hash=sha256:db30121ce34f5a0a4c368504a5e2df05449382e8d4918c0b43058ffb1d31d723 \
--hash=sha256:f45dfc29de276b509c8dbbfa6076ba6562be055c877928d4ffa1cf35b8ec59dc \
--hash=sha256:f803c33f4fd4e3bfc17bbdbf3c036fbcb92a1f8e6bd19a035800518479ce6b36 \
--hash=sha256:fb7e3a058a169d2c8bd19dfcbc7ae14c879750ce49fbaf3c436af683991f7eae
# via google-cloud-bigquery-storage
filelock==3.9.0 \
--hash=sha256:7b319f24340b51f55a2bf7a12ac0755a9b03e718311dac567a0f4f7fabd2f5de \
--hash=sha256:f58d535af89bb9ad5cd4df046f741f8553a418c01a7856bf0d173bbc9f6bd16d
@ -424,11 +471,16 @@ gitpython==3.1.40 \
# -r requirements.in
# mozilla-metric-config-parser
# mozilla-schema-generator
glom==23.5.0 \
--hash=sha256:06af5e3486aacc59382ba34e53ebeabd7a9345d78f7dbcbee26f03baa4b83bac \
--hash=sha256:fe4e9be4dc93c11a99f8277042e4bee95419c02cda4b969f504508b0a1aa6a66
# via siggen
google-api-core[grpc]==2.11.0 \
--hash=sha256:4b9bb5d5a380a0befa0573b302651b8a9a89262c1730e37bf423cec511804c22 \
--hash=sha256:ce222e27b0de0d7bc63eb043b956996d6dccab14cc3b690aaea91c9cc99dc16e
# via
# google-cloud-bigquery
# google-cloud-bigquery-storage
# google-cloud-core
# google-cloud-storage
google-auth==2.16.0 \
@ -448,6 +500,10 @@ google-cloud-bigquery==3.11.4 \
--hash=sha256:5fa7897743a0ed949ade25a0942fc9e7557d8fce307c6f8a76d1b604cf27f1b1 \
--hash=sha256:697df117241a2283bcbb93b21e10badc14e51c9a90800d2a7e1a3e1c7d842974
# via -r requirements.in
google-cloud-bigquery-storage[fastavro]==2.23.0 \
--hash=sha256:371ff0a86d6166d7a935d6839474e5b5ab04ebd6598ea4b6c12b920719f2e0cb \
--hash=sha256:8496c6d30575efb224c18940566f9ac006d3b120ae759002918697c3407997e6
# via -r requirements.in
google-cloud-core==2.3.2 \
--hash=sha256:8417acf6466be2fa85123441696c4badda48db314c607cf1e5d543fa8bdc22fe \
--hash=sha256:b9529ee7047fd8d4bf4a2182de619154240df17fbe60ead399078c1ae152af9a
@ -613,6 +669,10 @@ idna==3.4 \
# via
# requests
# yarl
importlib-resources==6.1.1 \
--hash=sha256:3893a00122eafde6894c59914446a512f728a0c1a45f9bb9b63721b6bacf0b4a \
--hash=sha256:e8bf90d8213b486f428c9c39714b920041cb02c184686a3dee24905aaa8105d6
# via siggen
iniconfig==2.0.0 \
--hash=sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3 \
--hash=sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374
@ -718,6 +778,10 @@ mergedeep==1.3.4 \
--hash=sha256:0096d52e9dad9939c3d975a774666af186eda617e6ca84df4c94dec30004f2a8 \
--hash=sha256:70775750742b25c0d8f36c55aed03d24c3384d17c951b3175d898bd778ef0307
# via mkdocs
milksnake==0.1.6 \
--hash=sha256:0198f8932b4e136c29c0d0d490ff1bac03f82c3a7b2ee6f666e3683b64314fd9 \
--hash=sha256:31e3eafaf2a48e177bb4b2dacef2c7ae8c5b2147a19c6d626209b819490e6f1d
# via symbolic
mkdocs==1.5.3 \
--hash=sha256:3b3a78e736b31158d64dbb2f8ba29bd46a379d0c6e324c2246c3bc3d2189cfc1 \
--hash=sha256:eb7c99214dcb945313ba30426c2451b735992c73c2e10838f76d09e39ff4d0e2
@ -1009,7 +1073,9 @@ pre-commit==3.5.0 \
proto-plus==1.22.2 \
--hash=sha256:0e8cda3d5a634d9895b75c573c9352c16486cb75deb0e078b5fda34db4243165 \
--hash=sha256:de34e52d6c9c6fcd704192f09767cb561bb4ee64e70eede20b0834d841f0be4d
# via google-cloud-bigquery
# via
# google-cloud-bigquery
# google-cloud-bigquery-storage
protobuf==4.21.12 \
--hash=sha256:1f22ac0ca65bb70a876060d96d914dae09ac98d114294f77584b0d2644fa9c30 \
--hash=sha256:237216c3326d46808a9f7c26fd1bd4b20015fb6867dc5d263a493ef9a539293b \
@ -1029,6 +1095,7 @@ protobuf==4.21.12 \
# gcloud
# google-api-core
# google-cloud-bigquery
# google-cloud-bigquery-storage
# googleapis-common-protos
# grpcio-status
# proto-plus
@ -1473,6 +1540,10 @@ rsa==4.9 \
# via
# google-auth
# oauth2client
siggen==2.0.20231009 \
--hash=sha256:a634310cc5911bcd68062660d932b53d09bbe073e482b6c7fc0d16f4f2f1a976 \
--hash=sha256:d39a44351e0e364aa4276715f1259dd636d2d65586f57743369c5cc2005f1ad6
# via -r requirements.in
six==1.16.0 \
--hash=sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926 \
--hash=sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254
@ -1505,6 +1576,13 @@ stripe==6.4.0 \
--hash=sha256:d1ab2739aa6511f34ba77f66fca9f86cffe2dbb17df84bd224a38e7254efe61b \
--hash=sha256:d99e97e6d3f6dbaf9361ebd07b3024ec6f9d7b1f77724c8127338e0754314e9d
# via -r requirements.in
symbolic==12.4.1 \
--hash=sha256:027297fa2d94cd4420d3e2175e37913191d5055b8ceed382f5b7c03334bc2588 \
--hash=sha256:2592a8bd2e56b92a94fab46a917523ff8fe6dcdc4b263d7479012f88e4ef5b13 \
--hash=sha256:4a4dff6b322831e78cc01a71a5303b5fe4f0352a4d932e87bcfad15fda0a1efd \
--hash=sha256:60d5e0557688cc87c8857c011459ec9a203cf6fa131c981fa56c41295c9c46c2 \
--hash=sha256:63c58db1644e940cf5c93b2827b8779225b66ea529236f5dbae87bf63b0a4f5c
# via -r requirements.in
toml==0.10.2 \
--hash=sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b \
--hash=sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f

Просмотреть файл

@ -0,0 +1,19 @@
friendly_name: Crash Frames
description: |-
Unnested frames from crash pings.
owners:
- dthorn@mozilla.com
labels:
application: firefox
schedule: daily
dag: bqetl_crash
owner1: dthorn
scheduling:
dag_name: bqetl_crash
start_date: '2023-12-10'
bigquery:
time_partitioning:
type: day
field: submission_timestamp
require_partition_filter: true
expiration_days: null

Просмотреть файл

@ -0,0 +1,36 @@
SELECT
submission_timestamp,
document_id,
thread_offset,
frame_offset,
_module.debug_file,
_module.debug_id,
IF(
_frame.ip LIKE '0x%' AND _module.base_addr LIKE '0x%',
SAFE_CAST(_frame.ip AS INT64) - SAFE_CAST(_module.base_addr AS INT64),
NULL
) AS module_offset,
FROM
`moz-fx-data-shared-prod`.telemetry_stable.crash_v4
JOIN
UNNEST(payload.stack_traces.threads) AS _thread
WITH OFFSET AS thread_offset
ON
thread_offset = 0
OR thread_offset = payload.stack_traces.crash_info.crashing_thread
JOIN
UNNEST(_thread.frames) AS _frame
WITH OFFSET AS frame_offset
ON
frame_offset < 40
LEFT JOIN
UNNEST(payload.stack_traces.modules) AS _module
WITH OFFSET AS _module_offset
ON
_frame.module_index = _module_offset
WHERE
DATE(submission_timestamp) = @submission_date
AND payload.metadata.ipc_channel_error IS NULL
AND payload.stack_traces.crash_info.crashing_thread
BETWEEN 0
AND ARRAY_LENGTH(payload.stack_traces.threads) - 1

Просмотреть файл

@ -0,0 +1,23 @@
---
fields:
- name: submission_timestamp
type: TIMESTAMP
mode: NULLABLE
- name: document_id
type: STRING
mode: NULLABLE
- name: thread_offset
type: INTEGER
mode: NULLABLE
- name: frame_offset
type: INTEGER
mode: NULLABLE
- name: debug_file
type: STRING
mode: NULLABLE
- name: debug_id
type: STRING
mode: NULLABLE
- name: module_offset
type: INTEGER
mode: NULLABLE

Просмотреть файл

@ -0,0 +1,23 @@
friendly_name: Crash Signatures
description: |-
Crash signatures generated by siggen from crash_symbolicated
owners:
- dthorn@mozilla.com
labels:
application: firefox
schedule: daily
scheduling:
dag_name: bqetl_crash
start_date: '2023-12-10'
retry_delay: 5m
arguments: ["--submission-date", "{{ ds }}"]
referenced_tables:
- - 'moz-fx-data-shared-prod'
- 'telemetry_derived'
- 'crash_symbolicated_v1'
bigquery:
time_partitioning:
type: day
field: submission_timestamp
require_partition_filter: true
expiration_days: null

Просмотреть файл

@ -0,0 +1,69 @@
"""Generate signatures for symbolicated crash pings via siggen."""
from pathlib import Path
import click
import yaml
from google.cloud import bigquery, bigquery_storage
from siggen.generator import SignatureGenerator
SCHEMA_FILE = Path(__file__).parent / "schema.yaml"
OUTPUT_SCHEMA = bigquery.SchemaField.from_api_repr(
{"name": "root", "type": "RECORD", **yaml.safe_load(SCHEMA_FILE.read_text())}
).fields
@click.command()
@click.option(
"--submission-date",
type=click.DateTime(formats=["%Y-%m-%d"]),
required=True,
)
@click.option(
"--source-table",
default="moz-fx-data-shared-prod.telemetry_derived.crash_symbolicated_v1",
)
@click.option(
"--destination-table",
default="moz-fx-data-shared-prod.telemetry_derived.crash_signatures_v1",
)
def main(
submission_date,
source_table,
destination_table,
):
sig_generator = SignatureGenerator()
bq = bigquery.Client()
bq_read = bigquery_storage.BigQueryReadClient()
source = bq.get_table(source_table)
session = bq_read.create_read_session(
parent=f"projects/{source.project}",
read_session=bigquery_storage.ReadSession(
table=source.path.lstrip("/"),
data_format=bigquery_storage.DataFormat.AVRO,
),
max_stream_count=1,
)
reader = bq_read.read_rows(session.streams[0].name)
json_rows = [
{
"submission_timestamp": crash["submission_timestamp"].isoformat(),
"document_id": crash["document_id"],
"signature": sig_generator.generate(crash).signature or None,
}
for crash in reader.rows(session)
]
job = bq.load_table_from_json(
json_rows=json_rows,
destination=f'{destination_table}${submission_date.replace("-", "")}',
job_config=bigquery.LoadJobConfig(
ignore_unknown_values=False,
schema=OUTPUT_SCHEMA,
write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE,
),
)
job.result()
if __name__ == "__main__":
main()

Просмотреть файл

@ -0,0 +1,11 @@
---
fields:
- name: submission_timestamp
type: TIMESTAMP
mode: REQUIRED
- name: document_id
type: STRING
mode: REQUIRED
- name: signature
type: STRING
mode: NULLABLE

Просмотреть файл

@ -0,0 +1,20 @@
friendly_name: Crash Symbolicated
description: |-
Crash pings enriched with symbol data and formatted for consumption by siggen.
owners:
- dthorn@mozilla.com
labels:
application: firefox
schedule: daily
owner: dthorn
dag: bqetl_crash
owner1: dthorn
scheduling:
dag_name: bqetl_crash
start_date: '2023-12-10'
bigquery:
time_partitioning:
type: day
field: submission_timestamp
require_partition_filter: true
expiration_days: null

Просмотреть файл

@ -0,0 +1,100 @@
WITH crash_frames AS (
SELECT
* REPLACE (FORMAT("0x%x", module_offset) AS module_offset)
FROM
`moz-fx-data-shared-prod`.telemetry_derived.crash_frames_v1
WHERE
DATE(submission_timestamp) = @submission_date
),
crash_symbols AS (
SELECT
*
FROM
`moz-fx-data-shared-prod`.telemetry_derived.crash_symbols_v1
WHERE
submission_date = @submission_date
QUALIFY
-- symbols may be appended over multiple attempts, so only use the most recent symbols
1 = ROW_NUMBER() OVER (
PARTITION BY
debug_file,
debug_id,
module_offset
ORDER BY
request_timestamp DESC
)
),
crash AS (
SELECT
submission_timestamp,
document_id,
IF(STARTS_WITH(normalized_os, "Windows"), "Windows NT", NULL) AS os,
payload.metadata.async_shutdown_timeout,
payload.metadata.ipc_channel_error,
payload.metadata.oom_allocation_size,
payload.metadata.moz_crash_reason,
payload.stack_traces.crash_info.type AS reason,
FROM
`moz-fx-data-shared-prod`.telemetry_stable.crash_v4
WHERE
DATE(submission_timestamp) = @submission_date
),
symbolicated_frames AS (
SELECT
crash_frames.submission_timestamp,
crash_frames.document_id,
crash_frames.thread_offset,
ARRAY_AGG(
STRUCT(
crash_frames.module_offset,
crash_symbols.module,
crash_symbols.function,
crash_symbols.function_offset,
crash_symbols.file,
crash_symbols.line,
crash_symbols.inlines
)
ORDER BY
crash_frames.frame_offset
) AS frames,
FROM
crash_frames
LEFT JOIN
crash_symbols
USING
(debug_file, debug_id, module_offset)
GROUP BY
submission_timestamp,
document_id,
thread_offset
),
symbolicated_threads AS (
SELECT
submission_timestamp,
document_id,
ARRAY_AGG(STRUCT(frames) ORDER BY thread_offset) AS threads,
FROM
symbolicated_frames
GROUP BY
submission_timestamp,
document_id
)
SELECT
crash.submission_timestamp,
crash.document_id,
crash.os,
crash.async_shutdown_timeout,
crash.ipc_channel_error,
crash.oom_allocation_size,
crash.moz_crash_reason,
crash.reason,
-- only thread 0 and crashing thread are retained, so overwrite crashing thread to point to the
-- last thread in the list
NULLIF(ARRAY_LENGTH(symbolicated_threads.threads), 0) - 1 AS crashing_thread,
symbolicated_threads.threads,
FROM
crash
LEFT JOIN
symbolicated_threads
USING
(submission_timestamp, document_id)

Просмотреть файл

@ -0,0 +1,68 @@
---
fields:
- name: submission_timestamp
type: TIMESTAMP
mode: NULLABLE
- name: document_id
type: STRING
mode: NULLABLE
- name: os
type: STRING
mode: NULLABLE
- name: async_shutdown_timeout
type: STRING
mode: NULLABLE
- name: ipc_channel_error
type: STRING
mode: NULLABLE
- name: oom_allocation_size
type: STRING
mode: NULLABLE
- name: moz_crash_reason
type: STRING
mode: NULLABLE
- name: reason
type: STRING
mode: NULLABLE
- name: crashing_thread
type: INTEGER
mode: NULLABLE
- name: threads
type: RECORD
mode: REPEATED
fields:
- name: frames
type: RECORD
mode: REPEATED
fields:
- name: module_offset
type: STRING
mode: NULLABLE
- name: module
type: STRING
mode: NULLABLE
- name: function
type: STRING
mode: NULLABLE
- name: function_offset
type: STRING
mode: NULLABLE
- name: file
type: STRING
mode: NULLABLE
- name: line
type: INTEGER
mode: NULLABLE
- name: inlines
type: RECORD
mode: REPEATED
fields:
- name: function
type: STRING
mode: NULLABLE
- name: file
type: STRING
mode: NULLABLE
- name: line
type: INTEGER
mode: NULLABLE

Просмотреть файл

@ -0,0 +1,36 @@
friendly_name: Crash Symbols
description: |-
Symbols for symbolicating crash frames.
owners:
- dthorn@mozilla.com
labels:
application: firefox
schedule: daily
scheduling:
dag_name: bqetl_crash
start_date: '2023-12-10'
retry_delay: 5m
arguments: ["--submission-date", "{{ ds }}"]
referenced_tables:
- - 'moz-fx-data-shared-prod'
- 'telemetry_derived'
- 'crash_frames_v1'
# Needed to scale the highmem-16 pool from 0 -> 1, because cluster autoscaling
# works on pod resource requests, instead of usage
container_resources:
request_memory: 102400Mi
request_cpu: null
limit_memory: 102400Mi
limit_cpu: null
limit_gpu: null
# This job needs up to 100 GiB to complete in a reasonable time frame
node_selector:
nodepool: highmem-16
# Give additional time since we may need to scale up when running this job
startup_timeout_seconds: 360
bigquery:
time_partitioning:
type: day
field: submission_date
require_partition_filter: true
expiration_days: null

Просмотреть файл

@ -0,0 +1,304 @@
"""Upload symbols used in crash pings."""
from datetime import datetime
from functools import partial
from multiprocessing.pool import ThreadPool as Pool
from pathlib import Path
import click
import requests
import symbolic
import yaml
from google.api_core.exceptions import BadRequest, NotFound
from google.cloud import bigquery
from requests.adapters import HTTPAdapter
from requests.exceptions import ConnectionError, HTTPError
from urllib3.util import Retry
SCHEMA_FILE = Path(__file__).parent / "schema.yaml"
OUTPUT_SCHEMA = bigquery.SchemaField.from_api_repr(
{"name": "root", "type": "RECORD", **yaml.safe_load(SCHEMA_FILE.read_text())}
).fields
def _bytes_split_generator(item, sep):
# https://github.com/mozilla-services/eliot/blob/be74cd6c0ef09dd85a71c8b1b22c3297b6b9f9bf/eliot/libsymbolic.py#L18-L36
index = 0
len_item = len(item)
while index <= len_item:
next_index = item.find(sep, index)
if next_index == -1:
break
yield item[index:next_index]
index = next_index + len(sep)
def _get_module_filename(sym_file, debug_filename):
# https://github.com/mozilla-services/eliot/blob/be74cd6c0ef09dd85a71c8b1b22c3297b6b9f9bf/eliot/libsymbolic.py#L39-L68
# Iterate through the first few lines of the file until we hit FILE in which
# case there's no INFO for some reason or we hit the first INFO.
for line in _bytes_split_generator(sym_file, b"\n"):
if line.startswith(b"INFO"):
parts = line.split(b" ")
if len(parts) == 4:
return parts[-1].decode("utf-8").strip()
else:
break
elif line.startswith((b"FILE", b"PUBLIC", b"FUNC")):
break
return debug_filename
def _get_symbols(session, source_url, submission_date, row):
"""Get symbols needed from a single symbol file.
This may be rearchitected to use eliot by sending per-module jobs.
"""
debug_file = row["debug_file"]
debug_id = row["debug_id"]
module_offsets = row["module_offsets"]
symbols = []
if debug_file.endswith(".pdb"):
sym_filename = debug_file[:-4] + ".sym"
else:
sym_filename = debug_file + ".sym"
request_timestamp = datetime.utcnow()
try:
resp = session.get(
f"{source_url}/{debug_file}/{debug_id}/{sym_filename}",
allow_redirects=True,
)
if resp.status_code not in (400, 404):
resp.raise_for_status()
except (ConnectionError, HTTPError) as e:
print("ERROR: could not get symbols: " f"{debug_file} {debug_id} {e}")
return e, []
if resp.status_code == 404:
print("WARNING: symbols not found: " f"{debug_file} {debug_id}")
return None, []
if resp.status_code == 400:
print(
f"400 Invalid Request {resp.request.url}: "
f"{resp.request.body.decode('utf-8')!r}"
)
resp.raise_for_status()
sym_file = resp.content
norm_debug_id = symbolic.normalize_debug_id(debug_id)
sym_archive = symbolic.Archive.from_bytes(sym_file)
symcache = sym_archive.get_object(debug_id=norm_debug_id).make_symcache()
module_filename = _get_module_filename(sym_file, debug_file)
# https://github.com/mozilla-services/eliot/blob/be74cd6c0ef09dd85a71c8b1b22c3297b6b9f9bf/eliot/symbolicate_resource.py#L505-L553
for module_offset in module_offsets:
frame_info = {
"submission_date": submission_date,
"request_timestamp": request_timestamp.isoformat(),
"debug_file": debug_file,
"debug_id": debug_id,
"module_offset": hex(module_offset),
"module": module_filename,
}
if module_offset < 0:
continue # ignore invalid offset
sourceloc_list = symcache.lookup(module_offset)
if not sourceloc_list:
continue # no symbols for this offset
# sourceloc_list can have multiple entries: It starts with the innermost
# inline stack frame, and then advances to its caller, and then its
# caller, and so on, until it gets to the outer function.
# We process the outer function first, and then add inline stack frames
# afterwards. The outer function is the last item in sourceloc_list.
sourceloc = sourceloc_list[-1]
frame_info["function"] = sourceloc.symbol
frame_info["function_offset"] = hex(module_offset - sourceloc.sym_addr)
if sourceloc.full_path:
frame_info["file"] = sourceloc.full_path
# Only add a "line" if it's non-zero and not None, and if there's a
# file--otherwise the line doesn't mean anything
if sourceloc.line:
frame_info["line"] = sourceloc.line
if len(sourceloc_list) > 1:
# We have inline information. Add an "inlines" property with a list
# of { function, file, line } entries.
inlines = []
for inline_sourceloc in sourceloc_list[:-1]:
inline_data = {
"function": inline_sourceloc.symbol,
}
if inline_sourceloc.full_path:
inline_data["file"] = inline_sourceloc.full_path
if inline_sourceloc.line:
inline_data["line"] = inline_sourceloc.line
inlines.append(inline_data)
frame_info["inlines"] = inlines
symbols.append(frame_info)
return None, symbols
@click.command()
@click.option(
"--collect-only-missing/--collect-all",
default=True,
help="Collect only missing symbols or collect all symbols for --submission-date",
)
@click.option(
"--parallelism",
default=40,
help="Number of threads to use when downloading symbols."
" Default assumes at least 100GiB of ram available.",
)
@click.option(
"--submission-date",
type=click.DateTime(formats=["%Y-%m-%d"]),
required=True,
)
@click.option(
"--source-table",
default="moz-fx-data-shared-prod.telemetry_derived.crash_frames_v1",
)
@click.option(
"--source-url",
default="https://symbols.mozilla.org",
)
@click.option(
"--destination-table",
default="moz-fx-data-shared-prod.telemetry_derived.crash_symbols_v1",
)
def main(
collect_only_missing,
parallelism,
submission_date,
source_table,
source_url,
destination_table,
):
bq = bigquery.Client()
query_job = bq.query(
f"""
SELECT
debug_file,
debug_id,
ARRAY_AGG(DISTINCT module_offset ORDER BY module_offset) AS module_offsets,
FROM
`{source_table}`
WHERE
DATE(submission_timestamp) = @submission_date
AND debug_file != ""
AND debug_id != ""
GROUP BY
debug_file,
debug_id
""",
job_config=bigquery.QueryJobConfig(
query_parameters=[
bigquery.ScalarQueryParameter(
"submission_date", "DATE", submission_date
),
],
),
)
rows = [{**row} for row in query_job.result()]
if collect_only_missing:
# don't collect symbols already present in the destination table
try:
bq.get_table(destination_table)
except NotFound:
pass # collect all symbols
else:
existing_symbols_job = bq.query(
f"""
SELECT
debug_id,
debug_file,
ARRAY_AGG(DISTINCT module_offset) AS module_offsets,
FROM
`{destination_table}`
WHERE
submission_date = @submission_date
GROUP BY
debug_id,
debug_file
""",
job_config=bigquery.QueryJobConfig(
query_parameters=[
bigquery.ScalarQueryParameter(
"submission_date", "DATE", submission_date
),
],
),
)
existing_symbols = {
(row["debug_file"], row["debug_id"]): set(row["module_offsets"])
for row in existing_symbols_job.result()
}
for row in rows:
existing_module_offsets = existing_symbols.get(
(row["debug_file"], row["debug_id"])
)
if existing_module_offsets is not None:
row["module_offsets"] = [
o
for o in row["module_offsets"]
if o not in existing_module_offsets
]
rows = [row for row in rows if row["module_offsets"]] # drop empty rows
retry_strategy = Retry(
total=4,
status_forcelist=[429, 500, 502, 503, 504],
)
adapter = HTTPAdapter(max_retries=retry_strategy)
session = requests.Session()
session.mount("https://", adapter)
_get_symbols_with_args = partial(_get_symbols, session, source_url, submission_date)
symbols = []
symbol_errors = []
with Pool(parallelism) as pool:
for error, _symbols in pool.imap(_get_symbols_with_args, rows, chunksize=1):
if error is None:
symbols.extend(symbols)
else:
symbol_errors.append(error)
load_job = bq.load_table_from_json(
json_rows=symbols,
destination=f'{destination_table}${submission_date.replace("-", "")}',
job_config=bigquery.LoadJobConfig(
ignore_unknown_values=False,
schema=OUTPUT_SCHEMA,
write_disposition=bigquery.WriteDisposition.WRITE_APPEND,
),
)
try:
load_job.result()
except BadRequest as e:
print(e.errors)
print(load_job.errors)
raise
if symbol_errors:
# errors collecting symbols are deferred to here so that retries
# only need to collect symbols that previously failed
raise Exception(
"failed to collect some symbols, "
"run again in append mode to retry only missing symbols"
)
if __name__ == "__main__":
main()

Просмотреть файл

@ -0,0 +1,45 @@
---
fields:
- name: submission_date
type: DATE
mode: REQUIRED
- name: request_timestamp
type: TIMESTAMP
mode: REQUIRED
- name: debug_file
type: STRING
mode: REQUIRED
- name: debug_id
type: STRING
mode: REQUIRED
- name: module_offset
type: STRING
mode: REQUIRED
- name: module
type: STRING
mode: REQUIRED
- name: function
type: STRING
mode: REQUIRED
- name: function_offset
type: STRING
mode: REQUIRED
- name: file
type: STRING
mode: NULLABLE
- name: line
type: INT64
mode: NULLABLE
- name: inlines
type: RECORD
mode: REPEATED
fields:
- name: function
type: STRING
mode: REQUIRED
- name: file
type: STRING
mode: NULLABLE
- name: line
type: INT64
mode: NULLABLE