diff --git a/bigquery_etl/cli/query.py b/bigquery_etl/cli/query.py index ef1dd855b2..2da8f37c99 100644 --- a/bigquery_etl/cli/query.py +++ b/bigquery_etl/cli/query.py @@ -15,7 +15,6 @@ from functools import partial from glob import glob from multiprocessing.pool import Pool, ThreadPool from pathlib import Path -from tempfile import NamedTemporaryFile from traceback import print_exc from typing import Optional @@ -2127,10 +2126,7 @@ def deploy( ) sys.exit(1) - with NamedTemporaryFile(suffix=".json") as tmp_schema_file: - existing_schema.to_json_file(Path(tmp_schema_file.name)) - bigquery_schema = client.schema_from_json(tmp_schema_file.name) - + bigquery_schema = existing_schema.to_bigquery_schema() try: table = client.get_table(full_table_id) except NotFound: @@ -2249,10 +2245,7 @@ def _deploy_external_data( except NotFound: table = bigquery.Table(full_table_id) - with NamedTemporaryFile(suffix=".json") as tmp_schema_file: - existing_schema.to_json_file(Path(tmp_schema_file.name)) - bigquery_schema = client.schema_from_json(tmp_schema_file.name) - + bigquery_schema = existing_schema.to_bigquery_schema() table.schema = bigquery_schema _attach_metadata(metadata_file_path, table) diff --git a/bigquery_etl/schema/__init__.py b/bigquery_etl/schema/__init__.py index 7c90f07d8e..6b00d7b468 100644 --- a/bigquery_etl/schema/__init__.py +++ b/bigquery_etl/schema/__init__.py @@ -10,6 +10,7 @@ import attr import yaml from google.api_core.exceptions import NotFound from google.cloud import bigquery +from google.cloud.bigquery import SchemaField from .. import dryrun @@ -282,3 +283,12 @@ class Schema: def to_json(self): """Return the schema data as JSON.""" return json.dumps(self.schema) + + def to_bigquery_schema(self) -> List[SchemaField]: + """Get the BigQuery representation of the schema.""" + return [SchemaField.from_api_repr(field) for field in self.schema["fields"]] + + @classmethod + def from_bigquery_schema(cls, fields: List[SchemaField]) -> "Schema": + """Construct a Schema from the BigQuery representation.""" + return cls({"fields": [field.to_api_repr() for field in fields]}) diff --git a/tests/schema/test_schema.py b/tests/schema/test_schema.py index 0bdd466218..156f41b7b3 100644 --- a/tests/schema/test_schema.py +++ b/tests/schema/test_schema.py @@ -2,6 +2,7 @@ from pathlib import Path from textwrap import dedent import yaml +from google.cloud.bigquery import SchemaField from bigquery_etl.schema import Schema @@ -379,3 +380,76 @@ class TestQuerySchema: schema_1.merge(schema_2) assert schema_1.schema["fields"][0]["description"] == "Date of the submission" + + def test_bigquery_conversion(self): + bq_schema = [ + SchemaField( + "record_field", + "RECORD", + "REPEATED", + description="Record field", + fields=( + SchemaField( + "nested_field_1", + "STRING", + "NULLABLE", + description="Nested Field 1", + ), + SchemaField( + "nested_field_2", + "INTEGER", + "NULLABLE", + description="Nested Field 2", + ), + ), + ), + SchemaField("normal_field", "STRING"), + ] + + schema = Schema.from_bigquery_schema(bq_schema) + assert schema.schema["fields"][0]["description"] == "Record field" + assert schema.schema["fields"][0]["name"] == "record_field" + assert ( + schema.schema["fields"][0]["fields"][0]["description"] == "Nested Field 1" + ) + assert schema.schema["fields"][0]["fields"][0]["type"] == "STRING" + assert schema.schema["fields"][1]["name"] == "normal_field" + + schema_yaml = dedent( + """ + fields: + - mode: NULLABLE + name: submission_date + type: DATE + - mode: NULLABLE + name: client_id + type: STRING + - fields: + - mode: NULLABLE + name: campaign + type: STRING + description: attribution campaign + mode: NULLABLE + name: attribution + type: RECORD + """ + ) + + schema = Schema.from_json(yaml.safe_load(schema_yaml)) + bq_schema = schema.to_bigquery_schema() + assert len(bq_schema) == 3 + assert bq_schema[0] == SchemaField( + name="submission_date", field_type="DATE", mode="NULLABLE" + ) + assert bq_schema[2] == SchemaField( + name="attribution", + field_type="RECORD", + mode="NULLABLE", + fields=( + SchemaField( + name="campaign", + field_type="STRING", + description="attribution campaign", + ), + ), + )