This commit is contained in:
Sunah Suh 2019-09-04 10:54:48 -05:00 коммит произвёл GitHub
Родитель 3049439dbd
Коммит f9c611a906
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
1 изменённых файлов: 9 добавлений и 4 удалений

Просмотреть файл

@ -24,7 +24,7 @@ from bigquery_etl.parse_udf import (
)
UDF_RE = re.compile(r"udf_(?:js_)?([a-zA-z0-9_]+)")
UDF_RE = re.compile(r"udf_(?:js_|legacy_)?([a-zA-z0-9_]+)")
OPTIONS_LIB_RE = re.compile(r'library = "gs://[^"]+/([^"]+)"')
@ -75,11 +75,12 @@ def main():
udfs_to_publish.append(raw_udf)
for dep in udfs_to_publish:
if dep not in published_udfs:
publish_persistent_udf(raw_udfs[dep], client, args.dataset, args.project_id)
publish_persistent_udf(raw_udfs[dep], client, args.dataset, args.project_id,
args.gcs_bucket, args.gcs_path)
published_udfs.append(dep)
def publish_persistent_udf(raw_udf, client, dataset, project_id):
def publish_persistent_udf(raw_udf, client, dataset, project_id, gcs_bucket, gcs_path):
# transforms temporary UDF to persistent UDFs and publishes them
for definition in raw_udf.definitions:
# Within a standard SQL function, references to other entities require explicit project IDs
@ -87,6 +88,10 @@ def publish_persistent_udf(raw_udf, client, dataset, project_id):
"`" + project_id + "`." + dataset + "." + r"\1", definition
)
query_with_renamed_udfs = query_with_renamed_udfs.replace(
"CREATE TEMP FUNCTION", "CREATE OR REPLACE FUNCTION"
)
# adjust paths for dependencies stored in GCS
query = OPTIONS_LIB_RE.sub(
fr'library = "gs://{gcs_bucket}/{gcs_path}\1"',
@ -97,7 +102,7 @@ def publish_persistent_udf(raw_udf, client, dataset, project_id):
def push_dependencies_to_gcs(bucket, path, dep_dir):
client = storage.Client()
bucket = storage_client.get_bucket(bucket)
bucket = client.get_bucket(bucket)
for root, dirs, files in os.walk(dep_dir):
for filename in files: