Move dependencies to udf_js_lib

This commit is contained in:
Anna Scholtz 2020-10-07 14:03:04 -07:00
Родитель 52ff8a602d
Коммит 0d51459bd1
6 изменённых файлов: 22 добавлений и 12 удалений

Просмотреть файл

@ -21,7 +21,7 @@ from ..util.common import project_dirs
UDF_NAME_RE = re.compile(r"^(?P<dataset>[a-zA-z0-9_]+)\.(?P<name>[a-zA-z0-9_]+)$")
UDF_DATASET_RE = re.compile(r"^(?P<dataset>[a-zA-z0-9_]+)$")
UDF_FILE_RE = re.compile(r"(^.*/|^)([a-zA-Z0-9_]+)/([a-zA-Z0-9_]+)/udf\.sql$")
DEFAULT_UDF_DEPENDENCY_DIR = "lib/"
DEFAULT_UDF_DEPENDENCY_DIR = "udf_js_lib/"
DEFAULT_GCS_BUCKET = "moz-fx-data-prod-bigquery-etl"
DEFAULT_GCS_PATH = ""
DEFAULT_PROJECT_ID = "moz-fx-data-shared-prod"

Просмотреть файл

@ -15,9 +15,10 @@ from bigquery_etl.routine.parse_routine import (
accumulate_dependencies,
)
DEFAULT_UDF_DEPENDENCY_DIR = "lib/"
DEFAULT_UDF_DEPENDENCY_DIR = "udf_js_lib/"
DEFAULT_GCS_BUCKET = "moz-fx-data-prod-bigquery-etl"
DEFAULT_GCS_PATH = ""
DEFAULT_PROJECT = "sql/moz-fx-data-shared-prod"
SQL_DIR = "sql/"
OPTIONS_LIB_RE = re.compile(r'library = "gs://[^"]+/([^"]+)"')
@ -33,6 +34,12 @@ parser.add_argument(
help="Project to publish UDFs to. "
"If not set, publish UDFs for all projects except mozfun.",
)
parser.add_argument(
"--target",
default=DEFAULT_PROJECT,
required=False,
help="Path to project directory.",
)
parser.add_argument(
"--dependency-dir",
"--dependency_dir",
@ -63,14 +70,15 @@ def main():
"""Publish routine."""
args = parser.parse_args()
if args.project_id is not None:
projects = [args.project_id]
if args.target is not None:
projects = [args.target]
else:
projects = project_dirs()
for project in projects:
publish(
os.path.basename(project),
args.target,
args.project_id,
os.path.join(SQL_DIR, project, args.dependency_dir),
args.gcs_bucket,
args.gcs_path,
@ -78,14 +86,16 @@ def main():
)
def publish(project_id, dependency_dir, gcs_bucket, gcs_path, public):
def publish(target, project_id, dependency_dir, gcs_bucket, gcs_path, public):
"""Publish routines in the provided directory."""
client = bigquery.Client(project_id)
if dependency_dir and os.path.exists(dependency_dir):
push_dependencies_to_gcs(gcs_bucket, gcs_path, dependency_dir, project_id)
push_dependencies_to_gcs(
gcs_bucket, gcs_path, dependency_dir, os.path.basename(target)
)
raw_routines = read_routine_dir(os.path.join(SQL_DIR, project_id))
raw_routines = read_routine_dir(target)
published_routines = []

Просмотреть файл

@ -5,4 +5,4 @@
cd "$(dirname "$0")/.."
exec python3 -m bigquery_etl.routine.publish_routines --project_id=mozfun \
--dependency_dir=lib/ --gcs-bucket=mozfun --public=True "$@"
--dependency_dir=lib/ --gcs-bucket=mozfun --target=sql/mozfun --public=True "$@"

Просмотреть файл

Просмотреть файл

@ -11,12 +11,12 @@ class TestPublishRoutine:
udf_dir = TEST_DIR / "data" / "test_sql" / "moz-fx-data-test-project" / "udf"
@mock.patch("google.cloud.bigquery.Client")
def test_publish_udf_with_description(self, mock_client):
def test_publish_routine_with_description(self, mock_client):
raw_routine = parse_routine.RawRoutine.from_file(
self.udf_dir / "test_shift_28_bits_one_day" / "udf.sql"
)
mock_client.query = MagicMock()
publish_routines.publish_udf(
publish_routines.publish_routine(
raw_routine, mock_client, "test-project", "", "", [], False
)
query = (
@ -33,7 +33,7 @@ class TestPublishRoutine:
self.udf_dir / "test_js_udf" / "udf.sql"
)
mock_client.query = MagicMock()
publish_routines.publish_udf(
publish_routines.publish_routine(
raw_routine, mock_client, "test-project", "", "", [], False
)
query = (