Make shredder detect glean derived tables with client_info.client_id (#6459)
* Make shredder detect glean derived tables with client_info.client_id * Add skip list
This commit is contained in:
Родитель
bf6852e4b1
Коммит
3c13b9af65
|
@ -499,6 +499,15 @@ SEARCH_IGNORE_FIELDS = {
|
||||||
("telemetry_stable.prio_v4", ID),
|
("telemetry_stable.prio_v4", ID),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# list of dataset_id.table_id to ignore in find_glean_targets function
|
||||||
|
GLEAN_IGNORE_LIST = {
|
||||||
|
# deletion request table
|
||||||
|
"firefox_desktop_derived.migration_esr_incorrect_deletion_request_v1",
|
||||||
|
# subset of firefox_desktop_stable.pageload_v1 which doesn't have client ids
|
||||||
|
"firefox_desktop_derived.pageload_1pct_v1",
|
||||||
|
"firefox_desktop_derived.pageload_nightly_v1",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def find_glean_targets(
|
def find_glean_targets(
|
||||||
pool: ThreadPool, client: bigquery.Client, project: str = SHARED_PROD
|
pool: ThreadPool, client: bigquery.Client, project: str = SHARED_PROD
|
||||||
|
@ -602,6 +611,7 @@ def find_glean_targets(
|
||||||
|
|
||||||
# skip tables already added to DELETE_TARGETS
|
# skip tables already added to DELETE_TARGETS
|
||||||
manually_added_tables = {target.table for target in DELETE_TARGETS.keys()}
|
manually_added_tables = {target.table for target in DELETE_TARGETS.keys()}
|
||||||
|
skipped_tables = manually_added_tables | GLEAN_IGNORE_LIST
|
||||||
|
|
||||||
return {
|
return {
|
||||||
**{
|
**{
|
||||||
|
@ -618,7 +628,7 @@ def find_glean_targets(
|
||||||
and not table.table_id.startswith("migration")
|
and not table.table_id.startswith("migration")
|
||||||
# skip tables with explicitly excluded client ids
|
# skip tables with explicitly excluded client ids
|
||||||
and table.labels.get("include_client_id", "true").lower() != "false"
|
and table.labels.get("include_client_id", "true").lower() != "false"
|
||||||
and qualified_table_id(table) not in manually_added_tables
|
and qualified_table_id(table) not in skipped_tables
|
||||||
},
|
},
|
||||||
**{
|
**{
|
||||||
# glean derived tables that contain client_id
|
# glean derived tables that contain client_id
|
||||||
|
@ -630,7 +640,26 @@ def find_glean_targets(
|
||||||
for table in glean_derived_tables
|
for table in glean_derived_tables
|
||||||
if any(field.name == CLIENT_ID for field in table.schema)
|
if any(field.name == CLIENT_ID for field in table.schema)
|
||||||
and not table.table_id.startswith(derived_source_prefix)
|
and not table.table_id.startswith(derived_source_prefix)
|
||||||
and qualified_table_id(table) not in manually_added_tables
|
and qualified_table_id(table) not in skipped_tables
|
||||||
|
},
|
||||||
|
**{
|
||||||
|
# glean derived tables that contain client_info.client_id but not client_id
|
||||||
|
DeleteTarget(
|
||||||
|
table=qualified_table_id(table),
|
||||||
|
# field must be repeated for each deletion source
|
||||||
|
field=(GLEAN_CLIENT_ID,) * len(sources[table.dataset_id]),
|
||||||
|
): sources[table.dataset_id]
|
||||||
|
for table in glean_derived_tables
|
||||||
|
if any(
|
||||||
|
field.name == "client_info"
|
||||||
|
and any(
|
||||||
|
[nested_field.name == "client_id" for nested_field in field.fields]
|
||||||
|
)
|
||||||
|
for field in table.schema
|
||||||
|
)
|
||||||
|
and all(field.name != CLIENT_ID for field in table.schema)
|
||||||
|
and not table.table_id.startswith(derived_source_prefix)
|
||||||
|
and qualified_table_id(table) not in skipped_tables
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -317,6 +317,7 @@ def test_glean_targets_override(mock_requests):
|
||||||
table_ids = [
|
table_ids = [
|
||||||
"adclick_history_v1", # should use value from override
|
"adclick_history_v1", # should use value from override
|
||||||
"other_table_v1",
|
"other_table_v1",
|
||||||
|
"pageload_1pct_v1", # should be ignored
|
||||||
]
|
]
|
||||||
else:
|
else:
|
||||||
raise Exception(f"unexpected dataset: {dataset_ref}")
|
raise Exception(f"unexpected dataset: {dataset_ref}")
|
||||||
|
|
Загрузка…
Ссылка в новой задаче