Exception handling for column sizes

This commit is contained in:
Anna Scholtz 2020-10-27 10:25:35 -07:00
Родитель 752cd03531
Коммит 71ac2bc686
1 изменённых файлов: 24 добавлений и 18 удалений

Просмотреть файл

@ -12,7 +12,7 @@ parser.add_argument("--date", required=True) # expect string with format yyyy-m
parser.add_argument("--project", default="moz-fx-data-shared-prod")
parser.add_argument(
"--tables", nargs="*", default=["telemetry_stable.main_v4"]
) # pattern
)
parser.add_argument("--destination_dataset", default="monitoring")
parser.add_argument("--destination_table", default="column_size_v1")
@ -37,27 +37,31 @@ def get_columns(client, project, dataset, table):
def get_column_size_json(client, date, column):
"""Returns the size of a specific date parition of the specified table."""
print(column)
job_config = bigquery.QueryJobConfig(dry_run=True, use_query_cache=False)
dataset_id = column[0]
table_id = column[1]
column = column[2]
try:
job_config = bigquery.QueryJobConfig(dry_run=True, use_query_cache=False)
dataset_id = column[0]
table_id = column[1]
column = column[2]
sql = f"""
SELECT {column} FROM {dataset_id}.{table_id}
WHERE DATE(submission_timestamp) = '{date}'
"""
sql = f"""
SELECT {column} FROM {dataset_id}.{table_id}
WHERE DATE(submission_timestamp) = '{date}'
"""
job = client.query(sql, job_config=job_config)
job = client.query(sql, job_config=job_config)
size = job.total_bytes_processed
size = job.total_bytes_processed
return {
"submission_date": date,
"dataset_id": dataset_id,
"table_id": table_id,
"column_name": column,
"byte_size": size,
}
return {
"submission_date": date,
"dataset_id": dataset_id,
"table_id": table_id,
"column_name": column,
"byte_size": size,
}
except Exception as e:
print(e)
return None
def save_column_sizes(
@ -102,6 +106,8 @@ def main():
chunksize=1,
)
column_sizes = [cs for cs in column_sizes if cs is not None]
save_column_sizes(
client,
column_sizes,