Exception handling for column sizes
This commit is contained in:
Родитель
752cd03531
Коммит
71ac2bc686
|
@ -12,7 +12,7 @@ parser.add_argument("--date", required=True) # expect string with format yyyy-m
|
||||||
parser.add_argument("--project", default="moz-fx-data-shared-prod")
|
parser.add_argument("--project", default="moz-fx-data-shared-prod")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--tables", nargs="*", default=["telemetry_stable.main_v4"]
|
"--tables", nargs="*", default=["telemetry_stable.main_v4"]
|
||||||
) # pattern
|
)
|
||||||
parser.add_argument("--destination_dataset", default="monitoring")
|
parser.add_argument("--destination_dataset", default="monitoring")
|
||||||
parser.add_argument("--destination_table", default="column_size_v1")
|
parser.add_argument("--destination_table", default="column_size_v1")
|
||||||
|
|
||||||
|
@ -37,27 +37,31 @@ def get_columns(client, project, dataset, table):
|
||||||
def get_column_size_json(client, date, column):
|
def get_column_size_json(client, date, column):
|
||||||
"""Returns the size of a specific date parition of the specified table."""
|
"""Returns the size of a specific date parition of the specified table."""
|
||||||
print(column)
|
print(column)
|
||||||
job_config = bigquery.QueryJobConfig(dry_run=True, use_query_cache=False)
|
try:
|
||||||
dataset_id = column[0]
|
job_config = bigquery.QueryJobConfig(dry_run=True, use_query_cache=False)
|
||||||
table_id = column[1]
|
dataset_id = column[0]
|
||||||
column = column[2]
|
table_id = column[1]
|
||||||
|
column = column[2]
|
||||||
|
|
||||||
sql = f"""
|
sql = f"""
|
||||||
SELECT {column} FROM {dataset_id}.{table_id}
|
SELECT {column} FROM {dataset_id}.{table_id}
|
||||||
WHERE DATE(submission_timestamp) = '{date}'
|
WHERE DATE(submission_timestamp) = '{date}'
|
||||||
"""
|
"""
|
||||||
|
|
||||||
job = client.query(sql, job_config=job_config)
|
job = client.query(sql, job_config=job_config)
|
||||||
|
|
||||||
size = job.total_bytes_processed
|
size = job.total_bytes_processed
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"submission_date": date,
|
"submission_date": date,
|
||||||
"dataset_id": dataset_id,
|
"dataset_id": dataset_id,
|
||||||
"table_id": table_id,
|
"table_id": table_id,
|
||||||
"column_name": column,
|
"column_name": column,
|
||||||
"byte_size": size,
|
"byte_size": size,
|
||||||
}
|
}
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def save_column_sizes(
|
def save_column_sizes(
|
||||||
|
@ -102,6 +106,8 @@ def main():
|
||||||
chunksize=1,
|
chunksize=1,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
column_sizes = [cs for cs in column_sizes if cs is not None]
|
||||||
|
|
||||||
save_column_sizes(
|
save_column_sizes(
|
||||||
client,
|
client,
|
||||||
column_sizes,
|
column_sizes,
|
||||||
|
|
Загрузка…
Ссылка в новой задаче