Package bqetl and publish to PyPI (#4917)

* pyproject.toml for bqetl

* Correctly resolve SQL generators from package

* CircleCI config to publish tagged versions to PyPI

* Get version from git tags
This commit is contained in:
Anna Scholtz 2024-02-05 09:04:04 -08:00 коммит произвёл GitHub
Родитель a4c7b0ab40
Коммит 138841d351
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
5 изменённых файлов: 79 добавлений и 71 удалений

Просмотреть файл

@ -679,6 +679,25 @@ jobs:
exit 1
# yamllint enable rule:line-length
deploy-to-pypi:
docker: *docker
steps:
- checkout
- run:
name: Install deployment tools
command: |
pip install --upgrade setuptools wheel twine
- run:
name: Create the distribution files
command: |
python -m build --sdist --wheel
- run:
name: Upload to PyPI
command: |
# Relies on the TWINE_USERNAME and TWINE_PASSWORD environment variables
# For more on twine, see:
# https://twine.readthedocs.io/en/latest/
twine upload --skip-existing dist/*
workflows:
version: 2
@ -820,6 +839,15 @@ workflows:
branches:
only:
- main
tagged-deploy:
jobs:
- deploy-to-pypi:
filters:
tags:
only: /[0-9]{4}.[0-9]{1,2}.[0-9]+/ # Calver: YYYY.M.MINOR
branches:
# Ignore all branches; this workflow should only run for tags.
ignore: /.*/
nightly:
# Run after schema-generator to ensure we are up-to-date
triggers:

Просмотреть файл

@ -1,5 +1,6 @@
"""bigquery-etl CLI generate command."""
import importlib.util
import sys
from inspect import getmembers
from pathlib import Path
@ -13,12 +14,18 @@ GENERATE_COMMAND = "generate"
ROOT = Path(__file__).parent.parent.parent
def generate_group():
def generate_group(sql_generators_dir):
"""Create the CLI group for the generate command."""
commands = []
generator_path = ROOT / SQL_GENERATORS_DIR
for path in generator_path.iterdir():
# import sql_generators module
spec = importlib.util.spec_from_file_location(
sql_generators_dir.name, (sql_generators_dir / "__init__.py").absolute()
)
module = importlib.util.module_from_spec(spec)
sys.modules["sql_generators"] = module
for path in sql_generators_dir.iterdir():
if "__pycache__" in path.parts:
# Ignore pycache subdirectories
continue
@ -50,7 +57,17 @@ def generate_group():
# expose click command group
generate = generate_group()
generate = generate_group(
Path(
ConfigLoader.get(
"default",
"sql_generators_dir",
fallback=ConfigLoader.get(
"default", "sql_generators_dir", fallback=ROOT / SQL_GENERATORS_DIR
),
)
)
)
@generate.command(help="Run all query generators", name="all")
@ -80,7 +97,7 @@ generate = generate_group()
def generate_all(ctx, output_dir, target_project, ignore, use_cloud_function):
"""Run all SQL generators."""
click.echo(f"Generating SQL content in {output_dir}.")
click.echo(ROOT / SQL_GENERATORS_DIR)
for _, cmd in reversed(generate.commands.items()):
if cmd.name != "all" and cmd.name not in ignore:
ctx.invoke(

Просмотреть файл

@ -1,6 +1,7 @@
default:
project: moz-fx-data-shared-prod
sql_dir: sql/
sql_generators_dir: sql_generators/
test_project: bigquery-etl-integration-test
non_user_facing_dataset_suffixes: # suffixes of datasets with non-user-facing views
- _derived

28
pyproject.toml Normal file
Просмотреть файл

@ -0,0 +1,28 @@
[build-system]
requires = ["setuptools>=61.0", "setuptools_scm>=8"]
build-backend = "setuptools.build_meta"
[project]
name = "mozilla-bigquery-etl"
authors = [
{ name="Mozilla Corporation", email="fx-data-dev@mozilla.org" },
]
description = "Tooling for building derived datasets in BigQuery"
readme = "README.md"
requires-python = ">=3.10"
dynamic = ["dependencies", "version"]
[project.urls]
Homepage = "https://github.com/mozilla/bigquery-etl"
Issues = "https://github.com/mozilla/bigquery-etl/issues"
[tool.setuptools.dynamic]
dependencies = {file = ["requirements.in"]}
[tool.setuptools_scm]
[tool.setuptools.packages.find]
include = ["bigquery_etl", "bigquery_etl.*"]
[project.scripts]
bqetl = "bigquery_etl.cli:cli"

Просмотреть файл

@ -1,66 +0,0 @@
from setuptools import find_namespace_packages, setup
def get_version():
version = {}
with open("bigquery_etl/_version.py") as fp:
exec(fp.read(), version)
return version["__version__"]
setup(
name="mozilla-bigquery-etl",
version=get_version(),
author="Mozilla Corporation",
author_email="fx-data-dev@mozilla.org",
description="Tooling for building derived datasets in BigQuery",
url="https://github.com/mozilla/bigquery-etl",
packages=find_namespace_packages(
include=["bigquery_etl.*", "bigquery_etl", "sql_generators", "sql_generators.*"]
),
package_data={
"bigquery_etl": [
"query_scheduling/templates/*.j2",
"alchemer/*.json",
"stripe/*.json",
"stripe/*.yaml",
],
"sql_generators": ["**/*"],
},
include_package_data=True,
install_requires=[
"gcloud",
"gcsfs",
"google-cloud-bigquery",
"google-cloud-storage",
"Jinja2",
"pathos",
"pyarrow",
"pytest-black",
"pytest-pydocstyle",
"pytest-flake8",
"pytest-mypy",
"pytest",
"PyYAML",
"smart_open",
"sqlparse",
"mozilla_schema_generator",
"GitPython",
"cattrs",
"attrs",
"typing",
"click",
"pandas",
"ujson",
"stripe",
"authlib",
],
long_description="Tooling for building derived datasets in BigQuery",
long_description_content_type="text/markdown",
python_requires=">=3.10",
entry_points="""
[console_scripts]
bqetl=bigquery_etl.cli:cli
""",
)