This commit is contained in:
Daniel Thorn 2019-03-07 12:43:21 -08:00 коммит произвёл GitHub
Родитель 55da69cca9
Коммит 79070068ad
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
17 изменённых файлов: 559 добавлений и 1 удалений

39
.circleci/config.yml Normal file
Просмотреть файл

@ -0,0 +1,39 @@
version: 2
jobs:
build:
docker:
- image: python:3.7
steps:
- checkout
- restore_cache:
keys:
# when lock files change, use increasingly general patterns to restore cache
- python-packages-v1-{{ .Branch }}-{{ checksum "requirements.txt" }}-{{ checksum "constraints.txt" }}
- python-packages-v1-{{ .Branch }}-{{ checksum "requirements.txt" }}-
- python-packages-v1-{{ .Branch }}-
- python-packages-v1-
- run:
name: Build
command: |
python3.7 -m venv venv/
venv/bin/pip install --upgrade -r requirements.txt
- run:
name: PyTest with linters
# Google's client libraries will check for GOOGLE_APPLICATION_CREDENTIALS
# and use a file in that location for credentials if present;
# See https://cloud.google.com/docs/authentication/production
environment:
GOOGLE_APPLICATION_CREDENTIALS: /tmp/gcp.json
command: |
echo "${GCLOUD_SERVICE_KEY:?}" > "$GOOGLE_APPLICATION_CREDENTIALS"
venv/bin/pytest --black --docstyle --flake8 --mypy-ignore-missing-imports
- save_cache:
paths:
- venv/
key: python-packages-v1-{{ .Branch }}-{{ checksum "requirements.txt" }}-{{ checksum "constraints.txt" }}
workflows:
version: 2
build:
jobs:
- build

2
.flake8 Normal file
Просмотреть файл

@ -0,0 +1,2 @@
[flake8]
max-line-length = 88

5
.gitignore поставляемый Normal file
Просмотреть файл

@ -0,0 +1,5 @@
*.pyc
*.swp
*.swo
.mypy_cache/
venv/

Просмотреть файл

@ -6,11 +6,14 @@ Bigquery UDFs and SQL queries for building derived datasets.
Recommended practices
===
- Should name sql files like `sql/destination_table_with_version.sql` e.g.
- Should name query files like `sql/destination_table_with_version.sql` e.g.
`sql/clients_daily_v6.sql`
- Should not specify a project or dataset in table names to simplify testing
- Should use incremental queries
- Should filter input tables on partition and clustering columns
- Should name UDFs like `udf_function_name` e.g. `udf_mode_last`
- Should name UDF files like `udfs/udf_function_name.{sql,js}` e.g.
`udfs/udf_mode_last.sql`
- Should use UDF language `SQL` over `js` for performance
- Should use UDFs for reusability
- Should use query parameters over jinja templating
@ -28,6 +31,7 @@ Incremental queries have these benefits:
- Will have tooling to replace partitions atomically to prevent duplicate data
- Will have tooling to generate an optimized "destination plus" view that
calculates the most recent partition
- Note: UDFs are not allowed in views
Incremental queries have these properties:
@ -38,3 +42,8 @@ Incremental queries have these properties:
- May depend on the previous partition
- If using previous partition, must include a `.init.sql` query to init the
first partition
Tests
===
(see here)[tests/]

39
constraints.txt Normal file
Просмотреть файл

@ -0,0 +1,39 @@
apipkg==1.5 --hash=sha256:37228cda29411948b422fae072f57e31d3396d2ee1c9783775980ee9c9990af6 --hash=sha256:58587dd4dc3daefad0487f6d9ae32b4542b185e1c36db6993290e7c41ca2b47c
appdirs==1.4.3 --hash=sha256:9e5896d1372858f8dd3344faf4e5014d21849c756c8d5701f78f8a103b372d92 --hash=sha256:d8b24664561d0d34ddfaec54636d502d7cea6e29c3eaf68f3df6180863e2166e
atomicwrites==1.3.0 --hash=sha256:03472c30eb2c5d1ba9227e4c2ca66ab8287fbfbbda3888aa93dc2e28fc6811b4 --hash=sha256:75a9445bac02d8d058d5e1fe689654ba5a6556a1dfd8ce6ec55a0ed79866cfa6
attrs==19.1.0 --hash=sha256:69c0dbf2ed392de1cb5ec704444b08a5ef81680a61cb899dc08127123af36a79 --hash=sha256:f0b870f674851ecbfbbbd364d6b5cbdff9dcedbc7f3f5e18a6891057f21fe399
black==18.9b0 --hash=sha256:817243426042db1d36617910df579a54f1afd659adb96fc5032fcf4b36209739 --hash=sha256:e030a9a28f542debc08acceb273f228ac422798e5215ba2a791a6ddeaaca22a5
cachetools==3.1.0 --hash=sha256:219b7dc6024195b6f2bc3d3f884d1fef458745cd323b04165378622dcc823852 --hash=sha256:9efcc9fab3b49ab833475702b55edd5ae07af1af7a4c627678980b45e459c460
certifi==2018.11.29 --hash=sha256:47f9c83ef4c0c621eaef743f133f09fa8a74a9b75f037e8624f83bd1b6626cb7 --hash=sha256:993f830721089fef441cdfeb4b2c8c9df86f0c63239f06bd025a76a7daddb033
chardet==3.0.4 --hash=sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae --hash=sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691
click==7.0 --hash=sha256:2335065e6395b9e67ca716de5f7526736bfa6ceead690adf616d925bdc622b13 --hash=sha256:5b94b49521f6456670fdb30cd82a4eca9412788a93fa6dd6df72c94d5a8ff2d7
entrypoints==0.3 --hash=sha256:589f874b313739ad35be6e0cd7efde2a4e9b6fea91edcc34e58ecbb8dbe56d19 --hash=sha256:c70dd71abe5a8c85e55e12c19bd91ccfeec11a6e99044204511f9ed547d48451
execnet==1.5.0 --hash=sha256:a7a84d5fa07a089186a329528f127c9d73b9de57f1a1131b82bb5320ee651f6a --hash=sha256:fc155a6b553c66c838d1a22dba1dc9f5f505c43285a878c6f74a79c024750b83
flake8==3.7.7 --hash=sha256:859996073f341f2670741b51ec1e67a01da142831aa1fdc6242dbf88dffbe661 --hash=sha256:a796a115208f5c03b18f332f7c11729812c8c3ded6c46319c59b53efd3819da8
google-api-core==1.8.0 --hash=sha256:3157625b4f4f033650c6e674d52fd8a3a8c116b26b39705cddf4ed61621c09ff --hash=sha256:c6d834143a2bea4de8d1161b5460fd362457db40c55ea9ccbe672e5602e330af
google-auth==1.6.3 --hash=sha256:0f7c6a64927d34c1a474da92cfc59e552a5d3b940d3266606c6a28b72888b9e4 --hash=sha256:20705f6803fd2c4d1cc2dcb0df09d4dfcb9a7d51fd59e94a3a28231fd93119ed
google-cloud-core==0.29.1 --hash=sha256:9bee63e0991be9801a4baf0b7841cf54f86c6e7fec922f45ea74cd4032ed4ee4 --hash=sha256:d85b1aaaf3bad9415ad1d8ee5eadce96d7007a82f13ce0a0629a003a11e83f29
google-resumable-media==0.3.2 --hash=sha256:2dae98ee716efe799db3578a7b902fbf5592fc5c77d3c0906fc4ef9b1b930861 --hash=sha256:3e38923493ca0d7de0ad91c31acfefc393c78586db89364e91cb4f11990e51ba
googleapis-common-protos==1.5.8 --hash=sha256:d56ca712f67fff216d3be9eeeb8360ca59066d0365ba70b137b9e1801813747e
idna==2.8 --hash=sha256:c357b3f628cf53ae2c4c05627ecc484553142ca23264e593d327bcde5e9c3407 --hash=sha256:ea8b7f6188e6fa117537c3df7da9fc686d485087abf6ac197f9c46432f7e4a3c
mccabe==0.6.1 --hash=sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42 --hash=sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f
more-itertools==6.0.0 --hash=sha256:0125e8f60e9e031347105eb1682cef932f5e97d7b9a1a28d9bf00c22a5daef40 --hash=sha256:590044e3942351a1bdb1de960b739ff4ce277960f2425ad4509446dbace8d9d1
mypy==0.670 --hash=sha256:308c274eb8482fbf16006f549137ddc0d69e5a589465e37b99c4564414363ca7 --hash=sha256:e80fd6af34614a0e898a57f14296d0dacb584648f0339c2e000ddbf0f4cc2f8d
mypy_extensions==0.4.1 --hash=sha256:37e0e956f41369209a3d5f34580150bcacfabaa57b33a15c0b25f4b5725e0812 --hash=sha256:b16cabe759f55e3409a7d231ebd2841378fb0c27a5d1994719e340e4f429ac3e
pluggy==0.9.0 --hash=sha256:19ecf9ce9db2fce065a7a0586e07cfb4ac8614fe96edf628a264b1c70116cf8f --hash=sha256:84d306a647cc805219916e62aab89caa97a33a1dd8c342e87a37f91073cd4746
protobuf==3.7.0 --hash=sha256:03666634d038e35d90155756914bc3a6316e8bcc0d300f3ee539e586889436b9 --hash=sha256:049d5900e442d4cc0fd2afd146786b429151e2b29adebed28e6376026ab0ee0b --hash=sha256:0eb9e62a48cc818b1719b5035042310c7e4f57b01f5283b32998c68c2f1c6a7c --hash=sha256:255d10c2c9059964f6ebb5c900a830fc8a089731dda94a5cc873f673193d208b --hash=sha256:358cc59e4e02a15d3725f204f2eb5777fc10595e2d9a9c4c8d82292f49af6d41 --hash=sha256:41f1b737d5f97f1e2af23d16fac6c0b8572f9c7ea73054f1258ca57f4f97cb80 --hash=sha256:4da3781eba8d3051d75b296d48154014c872e8a6323b8c207174cae49523b628 --hash=sha256:6a5129576a2cf925cd100e06ead5f9ae4c86db70a854fb91cedb8d680112734a --hash=sha256:80722b0d56dcb7ca8f75f99d8dadd7c7efd0d2265714d68f871ed437c32d82b3 --hash=sha256:88a960e949ec356f7016d84f8262dcff2b842fca5355b4c1be759f5c103b19b3 --hash=sha256:97872686223f47d95e914881cb0ca46e1bc622562600043da9edddcb54f2fe1e --hash=sha256:a1df9d22433ab44b7c7e0bd33817134832ae8a8f3d93d9b9719fc032c5b20e96 --hash=sha256:ad385fbb9754023d17be14dd5aa67efff07f43c5df7f93118aef3c20e635ea19 --hash=sha256:b2d5ee7ba5c03b735c02e6ae75fd4ff8c831133e7ca078f2963408dc7beac428 --hash=sha256:c8c07cd8635d45b28ec53ee695e5ac8b0f9d9a4ae488a8d8ee168fe8fc75ba43 --hash=sha256:d44ebc9838b183e8237e7507885d52e8d08c48fdc953fd4a7ee3e56cb9d20977 --hash=sha256:dff97b0ee9256f0afdfc9eaa430736cdcdc18899d9a666658f161afd137cf93d --hash=sha256:e47d248d614c68e4b029442de212bdd4f6ae02ae36821de319ae90314ea2578c --hash=sha256:e650b521b429fed3d525428b1401a40051097a5a92c30076c91f36b31717e087
py==1.8.0 --hash=sha256:64f65755aee5b381cea27766a3a147c3f15b9b6b9ac88676de66ba2ae36793fa --hash=sha256:dc639b046a6e2cff5bbe40194ad65936d6ba360b52b3c3fe1d08a82dd50b5e53
pyasn1-modules==0.2.4 --hash=sha256:136020f884635942239b33abdb63b1e0fdfb3c4bc8693f769ff1ab0908133a5b --hash=sha256:1c2ce0717e099620d7d425d2bb55e68f8126d77c8ba93112f0448a212048fe76 --hash=sha256:39da883a45dfc71314c48bba772be63a13946d0dd6abde326df163656a7b13e1 --hash=sha256:4160b0caedf8f1675ca7b94a65900d0219c715ac745cbc0c93557a9864b19748 --hash=sha256:50c5f454c29bc8a7b8bfffc0fd00fed1f9012160b4532807a33c27af91747337 --hash=sha256:52c46ecb2c1e7a03fe54dc8e11d6460ec7ebdcaedba3b0fe4ba2a811521df05f --hash=sha256:6db7a0510e55212b42a1f3e3553559eb214c8c8495e1018b4135d2bfb5a9169a --hash=sha256:79580acf813e3b7d6e69783884e6e83ac94bf4617b36a135b85c599d8a818a7b --hash=sha256:98e80b5ae1ed0d92694927a3e34df016c3b69b7bf439b32fc0a0dc516ec3653d --hash=sha256:9e879981cbf4c868a2267385a56837e0d384eab2d1690e6e0c8bba28d102509e --hash=sha256:a52090e8c5841ebbf08ae455146792d9ef3e8445b21055d3a3b7ed9c712b7c7c --hash=sha256:c00dad1d69d8592bbbc978f5beb3e992d3bf996e6b97eeec1c8608f81221d922 --hash=sha256:c226b5c17683d98498e157d6ac0098b93f9c475da5bc50072f64bf3f3f6b828f
pyasn1==0.4.5 --hash=sha256:061442c60842f6d11051d4fdae9bc197b64bd41573a12234a753a0cb80b4f30b --hash=sha256:0ee2449bf4c4e535823acc25624c45a8b454f328d59d3f3eeb82d3567100b9bd --hash=sha256:5f9fb05c33e53b9a6ee3b1ed1d292043f83df465852bec876e93b47fd2df7eed --hash=sha256:65201d28e081f690a32401e6253cca4449ccacc8f3988e811fae66bd822910ee --hash=sha256:79b336b073a52fa3c3d8728e78fa56b7d03138ef59f44084de5f39650265b5ff --hash=sha256:8ec20f61483764de281e0b4aba7d12716189700debcfa9e7935780850bf527f3 --hash=sha256:9458d0273f95d035de4c0d5e0643f25daba330582cc71bb554fe6969c015042a --hash=sha256:98d97a1833a29ca61cd04a60414def8f02f406d732f9f0bcb49f769faff1b699 --hash=sha256:b00d7bfb6603517e189d1ad76967c7e805139f63e43096e5f871d1277f50aea5 --hash=sha256:b06c0cfd708b806ea025426aace45551f91ea7f557e0c2d4fbd9a4b346873ce0 --hash=sha256:d14d05984581770333731690f5453efd4b82e1e5d824a1d7976b868a2e5c38e8 --hash=sha256:da2420fe13a9452d8ae97a0e478adde1dee153b11ba832a95b223a2ba01c10f7 --hash=sha256:da6b43a8c9ae93bc80e2739efb38cc776ba74a886e3e9318d65fe81a8b8a2c6e
pycodestyle==2.5.0 --hash=sha256:95a2219d12372f05704562a14ec30bc76b05a5b297b21a5dfe3f6fac3491ae56 --hash=sha256:e40a936c9a450ad81df37f549d676d127b1b66000a6c500caa2b085bc0ca976c
pydocstyle==3.0.0 --hash=sha256:2258f9b0df68b97bf3a6c29003edc5238ff8879f1efb6f1999988d934e432bd8 --hash=sha256:5741c85e408f9e0ddf873611085e819b809fca90b619f5fd7f34bd4959da3dd4 --hash=sha256:ed79d4ec5e92655eccc21eb0c6cf512e69512b4a97d215ace46d17e4990f2039
pyflakes==2.1.1 --hash=sha256:17dbeb2e3f4d772725c777fabc446d5634d1038f234e77343108ce445ea69ce0 --hash=sha256:d976835886f8c5b31d47970ed689944a0262b5f3afa00a5a7b4dc81e5449f8a2
pytest-forked==1.0.2 --hash=sha256:5fe33fbd07d7b1302c95310803a5e5726a4ff7f19d5a542b7ce57c76fed8135f --hash=sha256:d352aaced2ebd54d42a65825722cb433004b4446ab5d2044851d9cc7a00c9e38
pytz==2018.9 --hash=sha256:32b0891edff07e28efe91284ed9c31e123d84bea3fd98e1f72be2508f43ef8d9 --hash=sha256:d5f05e487007e29e03409f9398d074e158d920d36eb82eaf66fb1136b0c5374c
requests==2.21.0 --hash=sha256:502a824f31acdacb3a35b6690b5fbf0bc41d63a24a45c4004352b0242707598e --hash=sha256:7bf2a778576d825600030a110f3c0e3e8edc51dfaafe1c146e39a2027784957b
rsa==4.0 --hash=sha256:14ba45700ff1ec9eeb206a2ce76b32814958a98e372006c8fb76ba820211be66 --hash=sha256:1a836406405730121ae9823e19c6e806c62bbad73f890574fff50efa4122c487
six==1.12.0 --hash=sha256:3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c --hash=sha256:d16a0141ec1a18405cd4ce8b4613101da75da0e9a7aec5bdd4fa804d0e0eba73
snowballstemmer==1.2.1 --hash=sha256:919f26a68b2c17a7634da993d91339e288964f93c274f1343e3bbbe2096e1128 --hash=sha256:9f3bcd3c401c3e862ec0ebe6d2c069ebc012ce142cce209c098ccb5b09136e89
toml==0.10.0 --hash=sha256:229f81c57791a41d65e399fc06bf0848bab550a9dfd5ed66df18ce5f05e73d5c --hash=sha256:235682dd292d5899d361a811df37e04a8828a5b1da3115886b73cf81ebc9100e --hash=sha256:f1db651f9657708513243e61e6cc67d101a39bad662eaa9b5546f789338e07a3
typed-ast==1.3.1 --hash=sha256:035a54ede6ce1380599b2ce57844c6554666522e376bd111eb940fbc7c3dad23 --hash=sha256:037c35f2741ce3a9ac0d55abfcd119133cbd821fffa4461397718287092d9d15 --hash=sha256:049feae7e9f180b64efacbdc36b3af64a00393a47be22fa9cb6794e68d4e73d3 --hash=sha256:19228f7940beafc1ba21a6e8e070e0b0bfd1457902a3a81709762b8b9039b88d --hash=sha256:2ea681e91e3550a30c2265d2916f40a5f5d89b59469a20f3bad7d07adee0f7a6 --hash=sha256:3a6b0a78af298d82323660df5497bcea0f0a4a25a0b003afd0ce5af049bd1f60 --hash=sha256:5385da8f3b801014504df0852bf83524599df890387a3c2b17b7caa3d78b1773 --hash=sha256:606d8afa07eef77280c2bf84335e24390055b478392e1975f96286d99d0cb424 --hash=sha256:69245b5b23bbf7fb242c9f8f08493e9ecd7711f063259aefffaeb90595d62287 --hash=sha256:6f6d839ab09830d59b7fa8fb6917023d8cb5498ee1f1dbd82d37db78eb76bc99 --hash=sha256:730888475f5ac0e37c1de4bd05eeb799fdb742697867f524dc8a4cd74bcecc23 --hash=sha256:9819b5162ffc121b9e334923c685b0d0826154e41dfe70b2ede2ce29034c71d8 --hash=sha256:9e60ef9426efab601dd9aa120e4ff560f4461cf8442e9c0a2b92548d52800699 --hash=sha256:af5fbdde0690c7da68e841d7fc2632345d570768ea7406a9434446d7b33b0ee1 --hash=sha256:b64efdbdf3bbb1377562c179f167f3bf301251411eb5ac77dec6b7d32bcda463 --hash=sha256:bac5f444c118aeb456fac1b0b5d14c6a71ea2a42069b09c176f75e9bd4c186f6 --hash=sha256:bda9068aafb73859491e13b99b682bd299c1b5fd50644d697533775828a28ee0 --hash=sha256:d659517ca116e6750101a1326107d3479028c5191f0ecee3c7203c50f5b915b0 --hash=sha256:eddd3fb1f3e0f82e5915a899285a39ee34ce18fd25d89582bc89fc9fb16cd2c6
urllib3==1.24.1 --hash=sha256:61bf29cada3fc2fbefad4fdf059ea4bd1b4a86d2b6d15e1c7c0b582b9752fe39 --hash=sha256:de9529817c93f27c8ccbfead6985011db27bd0ddfcdb2d86f3f663385c6a9c22

7
pytest.ini Normal file
Просмотреть файл

@ -0,0 +1,7 @@
[pytest]
filterwarnings =
# upstream lib imports ABC improperly for backward compatibility
ignore::DeprecationWarning:google.protobuf.descriptor:47
ignore::DeprecationWarning:google.protobuf.internal.well_known_types:788
ignore::DeprecationWarning:yaml.constructor:126
norecursedirs = venv

9
requirements.txt Normal file
Просмотреть файл

@ -0,0 +1,9 @@
-c constraints.txt
google-cloud-bigquery==1.9.0 --hash=sha256:169ffdb1b677f69f1f9d032bd38f724aed73e0565153ac17199472c083a3852f --hash=sha256:46bef58521e9c245064b3deaf26581cf93127cd1dd019b3ecbc380831a1a6d28
pytest-black==0.3.4 --hash=sha256:5f7dd1b04b4adbb4da201dc6333c066adf8cd552eb8744834d9d52f1d5f71f2f
pytest-docstyle==1.5.0 --hash=sha256:dcc54084b8e8282a83e50c6220c85d1c7d05e3871f74f0e911499b4f3adea756
pytest-flake8==1.0.4 --hash=sha256:4d225c13e787471502ff94409dcf6f7927049b2ec251c63b764a4b17447b60c0 --hash=sha256:d7e2b6b274a255b7ae35e9224c85294b471a83b76ecb6bd53c337ae977a499af
pytest-mypy==0.3.2 --hash=sha256:8f6436eed8118afd6c10a82b3b60fb537336736b0fd7a29262a656ac42ce01ac --hash=sha256:acc653210e7d8d5c72845a5248f00fd33f4f3379ca13fe56cfc7b749b5655c3e
pytest-xdist==1.26.1 --hash=sha256:4a201bb3ee60f5dd6bb40c5209d4e491cecc4d5bafd656cfb10f86178786e568 --hash=sha256:d03d1ff1b008458ed04fa73e642d840ac69b4107c168e06b71037c62d7813dd4
pytest==4.3.0 --hash=sha256:067a1d4bf827ffdd56ad21bd46674703fce77c5957f6c1eef731f6146bfcef1c --hash=sha256:9687049d53695ad45cf5fdc7bbd51f0c49f1ea3ecfc4b7f3fde7501b541f17f4
PyYAML==3.13 --hash=sha256:3d7da3009c0f3e783b2c873687652d83b1bbfd5c88e9813fb7e5b03c0dd3108b --hash=sha256:3ef3092145e9b70e3ddd2c7ad59bdd0252a94dfe3949721633e41344de00a6bf --hash=sha256:40c71b8e076d0550b2e6380bada1f1cd1017b882f7e16f09a65be98e017f211a --hash=sha256:558dd60b890ba8fd982e05941927a3911dc409a63dcb8b634feaa0cda69330d3 --hash=sha256:a7c28b45d9f99102fa092bb213aa12e0aaf9a6a1f5e395d36166639c1f96c3a1 --hash=sha256:aa7dd4a6a427aed7df6fb7f08a580d68d9b118d90310374716ae90b710280af1 --hash=sha256:bc558586e6045763782014934bfaf39d48b8ae85a2713117d16c39864085c613 --hash=sha256:d46d7982b62e0729ad0175a9bc7e10a566fc07b224d2c79fafb5e032727eaa04 --hash=sha256:d5eef459e30b09f5a098b9cea68bebfeb268697f78d647bd255a085371ac7f3f --hash=sha256:e01d3203230e1786cd91ccfdc8f8454c8069c91bee3962ad93b87a4b2860f537 --hash=sha256:e170a9e6fcfd19021dd29845af83bb79236068bf5fd4df3327c1be18182b2531

65
tests/README.md Normal file
Просмотреть файл

@ -0,0 +1,65 @@
How to Run Tests
===
This repository uses `pytest`:
```
# create a venv
python3.7 -m venv venv/
# install requirements
venv/bin/pip install -r requirements.txt
# run pytest with all linters and 4 workers in parallel
venv/bin/pytest --black --docstyle --flake8 --mypy-ignore-missing-imports -n 4
```
How to Configure a Generated Test
===
1. Make a directory for test resources named `tests/{query_name}/{test_name}/`,
e.g. `tests/clients_last_seen_v1/test_single_day`
- `query_name` must match a query file named `sql/{query_name}.sql`, e.g.
`sql/clients_last_seen_v1.sql`
- `test_name` should start with `test_`, e.g. `test_single_day`
1. Add `.ndjson` files for input tables, e.g. `clients_daily_v6.ndjson`
- Include the dataset prefix if it's set in the tested query,
e.g. `analysis.clients_last_seen_v1.ndjson`
- This will result in the dataset prefix being removed from the query,
e.g. `query.replace("analysis.clients_last_seen_v1",
"clients_last_seen_v1")`
1. Add `expect.ndjson` to validate the result
- `DATE` and `DATETIME` type columns in the result are coerced to strings
using `.isoformat()`
- Columns named `generated_time` are removed from the result before
comparing to `expect` because they should not be static
1. Optionally add `.schema.json` files for input table schemas, e.g.
`clients_daily_v6.schema.json`
1. Optionally add `query_params.yaml` to define query parameters
- `query_params` must be a list
Additional Guidelines and Options
---
- If the destination table is also an input table then `generated_time` should
be a required `DATETIME` field to ensure minimal validation
- Input table files
- All of the formats supported by `bq load` are supported
- Formats other than `.ndjson` and `.csv` should not be used because they
are not human readable
- `expect.ndjson`
- File extensions `yaml`, `json` and `ndjson` are supported
- Formats other than `ndjson` should not be used because they are not
supported by `bq load`
- Schema files
- Setting the description of a top level field to `time_partitioning_field`
will cause the table to use it for time partitioning
- File extensions `yaml`, `json` and `ndjson` are supported
- Formats other than `.json` should not be used because they are not
supported by `bq load`
- Query parameters
- Scalar query params should be defined as a dict with keys `name`, `type` or
`type_`, and `value`
- `query_parameters.yaml` may be used instead of `query_params.yaml`, but
they are mutually exclusive
- File extensions `yaml`, `json` and `ndjson` are supported

4
tests/__init__.py Normal file
Просмотреть файл

@ -0,0 +1,4 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, you can obtain one at http://mozilla.org/MPL/2.0/.
"""Tests."""

Просмотреть файл

@ -0,0 +1,2 @@
{"submission_date":"2019-01-01","generated_time":"2019-01-02T01:00:00","last_seen_date":"2019-01-01","active_hours_sum":0.0,"attribution":{"source":"prev"},"client_id":"a"}
{"submission_date":"2019-01-01","generated_time":"2019-01-02T01:00:00","last_seen_date":"2019-01-01","active_hours_sum":0.0,"attribution":{"source":"prev"},"client_id":"b"}

Просмотреть файл

@ -0,0 +1,39 @@
[
{
"name": "submission_date",
"type": "DATE",
"mode": "REQUIRED"
},
{
"name": "generated_time",
"type": "DATETIME",
"mode": "REQUIRED"
},
{
"name": "last_seen_date",
"type": "DATE",
"mode": "REQUIRED"
},
{
"name": "active_hours_sum",
"type": "FLOAT",
"mode": "REQUIRED"
},
{
"name": "attribution",
"type": "RECORD",
"mode": "REQUIRED",
"fields": [
{
"name": "source",
"type": "STRING",
"mode": "REQUIRED"
}
]
},
{
"name": "client_id",
"type": "STRING",
"mode": "REQUIRED"
}
]

Просмотреть файл

@ -0,0 +1,2 @@
{"submission_date_s3":"2019-01-02","active_hours_sum":1.0,"attribution":{"source":"test"},"client_id":"b"}
{"submission_date_s3":"2019-01-02","active_hours_sum":1.0,"attribution":{"source":"test"},"client_id":"c"}

Просмотреть файл

@ -0,0 +1,30 @@
[
{
"name": "submission_date_s3",
"type": "DATE",
"mode": "REQUIRED",
"description": "time_partitioning_field"
},
{
"name": "active_hours_sum",
"type": "FLOAT",
"mode": "REQUIRED"
},
{
"name": "attribution",
"type": "RECORD",
"mode": "REQUIRED",
"fields": [
{
"name": "source",
"type": "STRING",
"mode": "REQUIRED"
}
]
},
{
"name": "client_id",
"type": "STRING",
"mode": "REQUIRED"
}
]

Просмотреть файл

@ -0,0 +1,3 @@
{"submission_date":"2019-01-02","active_hours_sum":0.0,"attribution":{"source":"prev"},"client_id":"a","last_seen_date":"2019-01-01"}
{"submission_date":"2019-01-02","active_hours_sum":1.0,"attribution":{"source":"test"},"client_id":"b","last_seen_date":"2019-01-02"}
{"submission_date":"2019-01-02","active_hours_sum":1.0,"attribution":{"source":"test"},"client_id":"c","last_seen_date":"2019-01-02"}

Просмотреть файл

@ -0,0 +1,3 @@
- name: submission_date
type: DATE
value: 2019-01-02

81
tests/test_generated.py Normal file
Просмотреть файл

@ -0,0 +1,81 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, you can obtain one at http://mozilla.org/MPL/2.0/.
"""Automatically generated tests."""
from google.api_core.exceptions import NotFound
from google.cloud import bigquery
from .util import coerce_result, generate_tests
import json
import pytest
@pytest.fixture(scope="session")
def bq():
return bigquery.Client()
@pytest.fixture(params=list(generate_tests()))
def generated_test(request):
return request.param
@pytest.fixture
def dataset(bq, generated_test):
# create dataset
try:
bq.get_dataset(generated_test.dataset_id)
except NotFound:
bq.create_dataset(generated_test.dataset_id)
# wait for test
yield bq.dataset(generated_test.dataset_id)
# clean up
bq.delete_dataset(generated_test.dataset_id, delete_contents=True)
@pytest.fixture(autouse=True)
def tables(bq, dataset, generated_test):
# load tables into dataset
for table in generated_test.tables.values():
destination = f"{dataset.dataset_id}.{table.name}"
assert table.schema is not None
job_config = bigquery.LoadJobConfig(
default_dataset=dataset,
source_format=table.source_format,
write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE,
schema=table.schema,
)
if job_config.schema is None:
# autodetect schema if not provided
job_config.autodetect = True
else:
# look for time_partitioning_field in provided schema
for field in job_config.schema:
if field.description == "time_partitioning_field":
job_config.time_partitioning = bigquery.TimePartitioning(
field=field.name
)
break # stop because there can only be one time partitioning field
with open(table.source_path, "rb") as file_obj:
job = bq.load_table_from_file(file_obj, destination, job_config=job_config)
job.result()
# clean up handled by default_dataset fixture
def test_generated(bq, dataset, generated_test):
# configure job
job_config = bigquery.QueryJobConfig(
default_dataset=dataset,
destination=bigquery.TableReference(dataset, generated_test.query_name),
query_parameters=generated_test.query_params,
use_legacy_sql=False,
write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE,
)
# run query
job = bq.query(generated_test.modified_query, job_config=job_config)
result = list(coerce_result(*job.result()))
result.sort(key=lambda row: json.dumps(row))
assert result == generated_test.expect

219
tests/util.py Normal file
Просмотреть файл

@ -0,0 +1,219 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, you can obtain one at http://mozilla.org/MPL/2.0/.
"""Utilities."""
from dataclasses import dataclass
from datetime import date, datetime
from google.cloud import bigquery
from typing import Any, Callable, Dict, Generator, List, Optional, Union
import json
import os.path
import yaml
QueryParameter = Union[
bigquery.ArrayQueryParameter,
bigquery.ScalarQueryParameter,
bigquery.StructQueryParameter,
]
table_extensions = {
"ndjson": bigquery.SourceFormat.NEWLINE_DELIMITED_JSON,
"csv": bigquery.SourceFormat.CSV,
"backup_info": bigquery.SourceFormat.DATASTORE_BACKUP,
"export_metadata": bigquery.SourceFormat.DATASTORE_BACKUP,
"avro": bigquery.SourceFormat.AVRO,
"parquet": bigquery.SourceFormat.PARQUET,
"orc": bigquery.SourceFormat.ORC,
}
@dataclass
class Table:
"""Define info needed to create a table for a generated test."""
name: str
source_format: str
source_path: str
# post_init fields
schema: Optional[List[bigquery.SchemaField]] = None
def __post_init__(self):
"""Fill in calculated fields if not provided."""
if self.schema is None:
resource_dir, resource = os.path.split(self.source_path)
full_name, _ = resource.rsplit(".", 1)
try:
self.schema = [
bigquery.SchemaField.from_api_repr(field)
for field in load(resource_dir, f"{full_name}.schema")
]
except FileNotFoundError:
pass
@dataclass
class GeneratedTest:
"""Define the info needed to run a generated test."""
expect: List[Dict[str, Any]]
name: str
query: str
query_name: str
query_params: List[Any]
replace: Dict[str, str]
tables: Dict[str, Table]
# post_init fields
dataset_id: Optional[str] = None
modified_query: Optional[str] = None
def __post_init__(self):
"""Fill in calculated fields if not provided."""
if self.dataset_id is None:
self.dataset_id = f"{self.query_name}_{self.name}"
if self.modified_query is None:
self.modified_query = self.query
for old, new in self.replace.items():
self.modified_query = self.modified_query.replace(old, new)
def read(*paths: str, decoder: Optional[Callable] = None, **kwargs):
"""Read a file and apply decoder if provided."""
with open(os.path.join(*paths), **kwargs) as f:
return decoder(f) if decoder else f.read()
def ndjson_load(file_obj) -> List[Any]:
"""Decode newline delimited json from file_obj."""
return [json.loads(line) for line in file_obj]
def load(resource_dir: str, *basenames: str, **search: Optional[Callable]) -> Any:
"""Read the first matching file found in resource_dir.
Calls read on paths under resource_dir with a name sans extension in
basenames and an extension and decoder in search.
:param resource_dir: directory to check for files
:param basenames: file names to look for, without an extension
:param search: mapping of file extension to decoder
:return: first response from read() that doesn't raise FileNotFoundError
:raises FileNotFoundError: when all matching files raise FileNotFoundError
"""
search = search or {"yaml": yaml.load, "json": json.load, "ndjson": ndjson_load}
not_found: List[str] = []
for basename in basenames:
for ext, decoder in search.items():
try:
return read(resource_dir, f"{basename}.{ext}", decoder=decoder)
except FileNotFoundError:
not_found.append(f"{basename}.{ext}")
raise FileNotFoundError(f"[Errno 2] No such files in '{resource_dir}': {not_found}")
def get_query_params(resource_dir: str) -> Generator[QueryParameter, None, None]:
"""Attempt to load the first query params found in resource_dir."""
try:
params = load(resource_dir, "query_params")
except FileNotFoundError:
params = []
for param in params:
if {"name", "type", "type_", "value"}.issuperset(param.keys()):
# this is a scalar query param
param["type_"] = param.pop("type", param.pop("type_", "STRING"))
yield bigquery.ScalarQueryParameter(**param)
else:
# attempt to coerce to some type of query param
try:
yield bigquery.StructQueryParameter.from_api_repr(param)
except KeyError:
try:
yield bigquery.ArrayQueryParameter.from_api_repr(param)
except KeyError:
# this is a different format for scalar param than above
yield bigquery.ScalarQueryParameter.from_api_repr(param)
def generate_tests() -> Generator[GeneratedTest, None, None]:
"""Attempt to generate tests."""
tests_dir = os.path.dirname(__file__)
sql_dir = os.path.join(os.path.dirname(tests_dir), "sql")
# iterate over directories in tests_dir
for query_name in next(os.walk(tests_dir))[1]:
query_dir = os.path.join(tests_dir, query_name)
# read query or skip
try:
query = read(sql_dir, f"{query_name}.sql")
except FileNotFoundError:
continue
# generate a test for each directory in query_dir
for test_name in next(os.walk(query_dir))[1]:
resource_dir = os.path.join(query_dir, test_name)
query_params = list(get_query_params(resource_dir))
tables: Dict[str, Table] = {}
replace: Dict[str, str] = {}
# load expect or skip
try:
expect = load(resource_dir, "expect")
except FileNotFoundError:
continue
# generate tables for files with a supported table extension
for resource in next(os.walk(resource_dir))[2]:
if "." not in resource:
continue # tables require an extension
table_name, extension = resource.rsplit(".", 1)
if table_name.endswith(".schema") or table_name in (
"expect",
"query_params",
):
continue # not a table
print(table_name)
if extension in table_extensions:
source_format = table_extensions[extension]
source_path = os.path.join(resource_dir, resource)
if "." in table_name:
# define replace to remove dataset from table_name in sql
replace[table_name] = table_name.rsplit(".", 1)[1]
# remove dataset from table_name
table_name = replace[table_name]
tables[table_name] = Table(table_name, source_format, source_path)
# yield a test
yield GeneratedTest(
expect=expect,
name=test_name,
query=query,
query_name=query_name,
query_params=query_params,
replace=replace,
tables=tables,
)
def coerce_result(*elements: Any) -> Generator[Any, None, None]:
"""Recursively coerce elements to types available in json.
Coerce date and datetime to string using isoformat.
Coerce bigquery.Row to dict using comprehensions.
Omit dict keys named "generated_time".
"""
for element in elements:
if isinstance(element, (dict, bigquery.Row)):
yield {
key: list(coerce_result(*value))
if isinstance(value, list)
else next(coerce_result(value))
for key, value in element.items()
# drop generated_time column
if key not in ("generated_time",)
}
elif isinstance(element, (date, datetime)):
yield element.isoformat()
else:
yield element