Exception handling for column sizes

This commit is contained in:
Anna Scholtz 2020-10-27 10:25:35 -07:00
Родитель 752cd03531
Коммит 71ac2bc686
1 изменённых файлов: 24 добавлений и 18 удалений

Просмотреть файл

@ -12,7 +12,7 @@ parser.add_argument("--date", required=True) # expect string with format yyyy-m
parser.add_argument("--project", default="moz-fx-data-shared-prod") parser.add_argument("--project", default="moz-fx-data-shared-prod")
parser.add_argument( parser.add_argument(
"--tables", nargs="*", default=["telemetry_stable.main_v4"] "--tables", nargs="*", default=["telemetry_stable.main_v4"]
) # pattern )
parser.add_argument("--destination_dataset", default="monitoring") parser.add_argument("--destination_dataset", default="monitoring")
parser.add_argument("--destination_table", default="column_size_v1") parser.add_argument("--destination_table", default="column_size_v1")
@ -37,27 +37,31 @@ def get_columns(client, project, dataset, table):
def get_column_size_json(client, date, column): def get_column_size_json(client, date, column):
"""Returns the size of a specific date parition of the specified table.""" """Returns the size of a specific date parition of the specified table."""
print(column) print(column)
job_config = bigquery.QueryJobConfig(dry_run=True, use_query_cache=False) try:
dataset_id = column[0] job_config = bigquery.QueryJobConfig(dry_run=True, use_query_cache=False)
table_id = column[1] dataset_id = column[0]
column = column[2] table_id = column[1]
column = column[2]
sql = f""" sql = f"""
SELECT {column} FROM {dataset_id}.{table_id} SELECT {column} FROM {dataset_id}.{table_id}
WHERE DATE(submission_timestamp) = '{date}' WHERE DATE(submission_timestamp) = '{date}'
""" """
job = client.query(sql, job_config=job_config) job = client.query(sql, job_config=job_config)
size = job.total_bytes_processed size = job.total_bytes_processed
return { return {
"submission_date": date, "submission_date": date,
"dataset_id": dataset_id, "dataset_id": dataset_id,
"table_id": table_id, "table_id": table_id,
"column_name": column, "column_name": column,
"byte_size": size, "byte_size": size,
} }
except Exception as e:
print(e)
return None
def save_column_sizes( def save_column_sizes(
@ -102,6 +106,8 @@ def main():
chunksize=1, chunksize=1,
) )
column_sizes = [cs for cs in column_sizes if cs is not None]
save_column_sizes( save_column_sizes(
client, client,
column_sizes, column_sizes,