Make shredder detect glean derived tables with client_info.client_id (#6459)
* Make shredder detect glean derived tables with client_info.client_id * Add skip list
This commit is contained in:
Родитель
bf6852e4b1
Коммит
3c13b9af65
|
@ -499,6 +499,15 @@ SEARCH_IGNORE_FIELDS = {
|
|||
("telemetry_stable.prio_v4", ID),
|
||||
}
|
||||
|
||||
# list of dataset_id.table_id to ignore in find_glean_targets function
|
||||
GLEAN_IGNORE_LIST = {
|
||||
# deletion request table
|
||||
"firefox_desktop_derived.migration_esr_incorrect_deletion_request_v1",
|
||||
# subset of firefox_desktop_stable.pageload_v1 which doesn't have client ids
|
||||
"firefox_desktop_derived.pageload_1pct_v1",
|
||||
"firefox_desktop_derived.pageload_nightly_v1",
|
||||
}
|
||||
|
||||
|
||||
def find_glean_targets(
|
||||
pool: ThreadPool, client: bigquery.Client, project: str = SHARED_PROD
|
||||
|
@ -602,6 +611,7 @@ def find_glean_targets(
|
|||
|
||||
# skip tables already added to DELETE_TARGETS
|
||||
manually_added_tables = {target.table for target in DELETE_TARGETS.keys()}
|
||||
skipped_tables = manually_added_tables | GLEAN_IGNORE_LIST
|
||||
|
||||
return {
|
||||
**{
|
||||
|
@ -618,7 +628,7 @@ def find_glean_targets(
|
|||
and not table.table_id.startswith("migration")
|
||||
# skip tables with explicitly excluded client ids
|
||||
and table.labels.get("include_client_id", "true").lower() != "false"
|
||||
and qualified_table_id(table) not in manually_added_tables
|
||||
and qualified_table_id(table) not in skipped_tables
|
||||
},
|
||||
**{
|
||||
# glean derived tables that contain client_id
|
||||
|
@ -630,7 +640,26 @@ def find_glean_targets(
|
|||
for table in glean_derived_tables
|
||||
if any(field.name == CLIENT_ID for field in table.schema)
|
||||
and not table.table_id.startswith(derived_source_prefix)
|
||||
and qualified_table_id(table) not in manually_added_tables
|
||||
and qualified_table_id(table) not in skipped_tables
|
||||
},
|
||||
**{
|
||||
# glean derived tables that contain client_info.client_id but not client_id
|
||||
DeleteTarget(
|
||||
table=qualified_table_id(table),
|
||||
# field must be repeated for each deletion source
|
||||
field=(GLEAN_CLIENT_ID,) * len(sources[table.dataset_id]),
|
||||
): sources[table.dataset_id]
|
||||
for table in glean_derived_tables
|
||||
if any(
|
||||
field.name == "client_info"
|
||||
and any(
|
||||
[nested_field.name == "client_id" for nested_field in field.fields]
|
||||
)
|
||||
for field in table.schema
|
||||
)
|
||||
and all(field.name != CLIENT_ID for field in table.schema)
|
||||
and not table.table_id.startswith(derived_source_prefix)
|
||||
and qualified_table_id(table) not in skipped_tables
|
||||
},
|
||||
}
|
||||
|
||||
|
|
|
@ -317,6 +317,7 @@ def test_glean_targets_override(mock_requests):
|
|||
table_ids = [
|
||||
"adclick_history_v1", # should use value from override
|
||||
"other_table_v1",
|
||||
"pageload_1pct_v1", # should be ignored
|
||||
]
|
||||
else:
|
||||
raise Exception(f"unexpected dataset: {dataset_ref}")
|
||||
|
|
Загрузка…
Ссылка в новой задаче