This commit is contained in:
Jeff Klukas 2019-09-12 16:42:36 -04:00
Родитель 57a15e67c6
Коммит f4c5ea8e7c
7 изменённых файлов: 36 добавлений и 23 удалений

Просмотреть файл

@ -52,10 +52,15 @@ class RawUdf:
)
)
dependencies.remove(name)
return RawUdf(name, filepath, definitions, tests,
# We convert the list to a set to deduplicate entries,
# but then convert back to a list for stable order.
list(sorted(set(dependencies))))
return RawUdf(
name,
filepath,
definitions,
tests,
# We convert the list to a set to deduplicate entries,
# but then convert back to a list for stable order.
list(sorted(set(dependencies))),
)
@dataclass

Просмотреть файл

@ -42,10 +42,7 @@ def worker_entrypoint(sqlfile):
DRY_RUN_URL,
headers={"Content-Type": "application/json"},
data=json.dumps(
{
"dataset": basename(dirname(dirname(sqlfile))),
"query": sql,
}
{"dataset": basename(dirname(dirname(sqlfile))), "query": sql}
).encode("utf8"),
method="POST",
)

Просмотреть файл

@ -14,7 +14,11 @@ import os, sys
# and sibling directories. Also see:
# https://stackoverflow.com/questions/6323860/sibling-package-imports/23542795#23542795
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from bigquery_etl.parse_udf import UDF_DIRS, read_udf_dirs, prepend_udf_usage_definitions
from bigquery_etl.parse_udf import (
UDF_DIRS,
read_udf_dirs,
prepend_udf_usage_definitions,
)
from textwrap import dedent
import shutil
import re
@ -29,7 +33,7 @@ parser.add_argument(
parser.add_argument(
"--udf-dir",
default=UDF_DIRS,
nargs='+',
nargs="+",
help="Directories where declarations of temporary UDFs are stored.",
)
parser.add_argument(

Просмотреть файл

@ -118,7 +118,7 @@ def create_views_if_not_exist(views, exclude, sql_dir):
if any(fnmatchcase(pattern, view) for pattern in exclude):
log.info("skipping table: matched by exclude pattern: {view}")
continue
if view.endswith('_'):
if view.endswith("_"):
# A trailing '_' confuses the logic here of parsing versions,
# and likely indicates that the table is somehow private, so
# we ignore it.

Просмотреть файл

@ -38,7 +38,7 @@ parser.add_argument(
help="Specifies creation mode for the table",
choices=create_modes.keys(),
default="create_if_not_exists",
type=str.upper
type=str.upper,
)
parser.add_argument("-p", "--project", help="Specifies project name for new table")
parser.add_argument("-d", "--dataset", help="Specifies dataset name for new table")

Просмотреть файл

@ -75,8 +75,14 @@ def main():
udfs_to_publish.append(raw_udf)
for dep in udfs_to_publish:
if dep not in published_udfs:
publish_persistent_udf(raw_udfs[dep], client, args.dataset, args.project_id,
args.gcs_bucket, args.gcs_path)
publish_persistent_udf(
raw_udfs[dep],
client,
args.dataset,
args.project_id,
args.gcs_bucket,
args.gcs_path,
)
published_udfs.append(dep)
@ -94,12 +100,12 @@ def publish_persistent_udf(raw_udf, client, dataset, project_id, gcs_bucket, gcs
# adjust paths for dependencies stored in GCS
query = OPTIONS_LIB_RE.sub(
fr'library = "gs://{gcs_bucket}/{gcs_path}\1"',
query_with_renamed_udfs,
fr'library = "gs://{gcs_bucket}/{gcs_path}\1"', query_with_renamed_udfs
)
client.query(query).result()
def push_dependencies_to_gcs(bucket, path, dep_dir):
client = storage.Client()
bucket = client.get_bucket(bucket)
@ -109,5 +115,6 @@ def push_dependencies_to_gcs(bucket, path, dep_dir):
blob = bucket.blob(path + filename)
blob.upload_from_filename(os.path.join(root, filename))
if __name__ == "__main__":
main()

Просмотреть файл

@ -23,9 +23,7 @@ def process_file(client, args, filepath):
target_view = target_view_orig.replace(project_id, args.target_project, 1)
# We only change the first occurrence, which is in the target view name.
sql = sql.replace(project_id, args.target_project, 1)
job_config = bigquery.QueryJobConfig(
use_legacy_sql=False, dry_run=args.dry_run
)
job_config = bigquery.QueryJobConfig(use_legacy_sql=False, dry_run=args.dry_run)
query_job = client.query(sql, job_config)
if args.dry_run:
print(f"Validated definition of {target_view} in {filepath}")
@ -43,8 +41,10 @@ def main():
)
parser.add_argument(
"--target-project",
help=("If specified, create views in the target project rather than"
" the project specified in the file"),
help=(
"If specified, create views in the target project rather than"
" the project specified in the file"
),
)
parser.add_argument("--log-level", default="INFO", help="Defaults to INFO")
parser.add_argument(
@ -66,8 +66,8 @@ def main():
for target in args.target:
if os.path.isdir(target):
for root, dirs, files in os.walk(target):
if 'view.sql' in files:
process_file(client, args, os.path.join(root, 'view.sql'))
if "view.sql" in files:
process_file(client, args, os.path.join(root, "view.sql"))
else:
process_file(client, args, target)