Add first test (#9)
This commit is contained in:
Родитель
55da69cca9
Коммит
79070068ad
|
@ -0,0 +1,39 @@
|
|||
version: 2
|
||||
jobs:
|
||||
build:
|
||||
docker:
|
||||
- image: python:3.7
|
||||
steps:
|
||||
- checkout
|
||||
- restore_cache:
|
||||
keys:
|
||||
# when lock files change, use increasingly general patterns to restore cache
|
||||
- python-packages-v1-{{ .Branch }}-{{ checksum "requirements.txt" }}-{{ checksum "constraints.txt" }}
|
||||
- python-packages-v1-{{ .Branch }}-{{ checksum "requirements.txt" }}-
|
||||
- python-packages-v1-{{ .Branch }}-
|
||||
- python-packages-v1-
|
||||
- run:
|
||||
name: Build
|
||||
command: |
|
||||
python3.7 -m venv venv/
|
||||
venv/bin/pip install --upgrade -r requirements.txt
|
||||
- run:
|
||||
name: PyTest with linters
|
||||
# Google's client libraries will check for GOOGLE_APPLICATION_CREDENTIALS
|
||||
# and use a file in that location for credentials if present;
|
||||
# See https://cloud.google.com/docs/authentication/production
|
||||
environment:
|
||||
GOOGLE_APPLICATION_CREDENTIALS: /tmp/gcp.json
|
||||
command: |
|
||||
echo "${GCLOUD_SERVICE_KEY:?}" > "$GOOGLE_APPLICATION_CREDENTIALS"
|
||||
venv/bin/pytest --black --docstyle --flake8 --mypy-ignore-missing-imports
|
||||
- save_cache:
|
||||
paths:
|
||||
- venv/
|
||||
key: python-packages-v1-{{ .Branch }}-{{ checksum "requirements.txt" }}-{{ checksum "constraints.txt" }}
|
||||
|
||||
workflows:
|
||||
version: 2
|
||||
build:
|
||||
jobs:
|
||||
- build
|
|
@ -0,0 +1,2 @@
|
|||
[flake8]
|
||||
max-line-length = 88
|
|
@ -0,0 +1,5 @@
|
|||
*.pyc
|
||||
*.swp
|
||||
*.swo
|
||||
.mypy_cache/
|
||||
venv/
|
11
README.md
11
README.md
|
@ -6,11 +6,14 @@ Bigquery UDFs and SQL queries for building derived datasets.
|
|||
Recommended practices
|
||||
===
|
||||
|
||||
- Should name sql files like `sql/destination_table_with_version.sql` e.g.
|
||||
- Should name query files like `sql/destination_table_with_version.sql` e.g.
|
||||
`sql/clients_daily_v6.sql`
|
||||
- Should not specify a project or dataset in table names to simplify testing
|
||||
- Should use incremental queries
|
||||
- Should filter input tables on partition and clustering columns
|
||||
- Should name UDFs like `udf_function_name` e.g. `udf_mode_last`
|
||||
- Should name UDF files like `udfs/udf_function_name.{sql,js}` e.g.
|
||||
`udfs/udf_mode_last.sql`
|
||||
- Should use UDF language `SQL` over `js` for performance
|
||||
- Should use UDFs for reusability
|
||||
- Should use query parameters over jinja templating
|
||||
|
@ -28,6 +31,7 @@ Incremental queries have these benefits:
|
|||
- Will have tooling to replace partitions atomically to prevent duplicate data
|
||||
- Will have tooling to generate an optimized "destination plus" view that
|
||||
calculates the most recent partition
|
||||
- Note: UDFs are not allowed in views
|
||||
|
||||
Incremental queries have these properties:
|
||||
|
||||
|
@ -38,3 +42,8 @@ Incremental queries have these properties:
|
|||
- May depend on the previous partition
|
||||
- If using previous partition, must include a `.init.sql` query to init the
|
||||
first partition
|
||||
|
||||
Tests
|
||||
===
|
||||
|
||||
(see here)[tests/]
|
||||
|
|
|
@ -0,0 +1,39 @@
|
|||
apipkg==1.5 --hash=sha256:37228cda29411948b422fae072f57e31d3396d2ee1c9783775980ee9c9990af6 --hash=sha256:58587dd4dc3daefad0487f6d9ae32b4542b185e1c36db6993290e7c41ca2b47c
|
||||
appdirs==1.4.3 --hash=sha256:9e5896d1372858f8dd3344faf4e5014d21849c756c8d5701f78f8a103b372d92 --hash=sha256:d8b24664561d0d34ddfaec54636d502d7cea6e29c3eaf68f3df6180863e2166e
|
||||
atomicwrites==1.3.0 --hash=sha256:03472c30eb2c5d1ba9227e4c2ca66ab8287fbfbbda3888aa93dc2e28fc6811b4 --hash=sha256:75a9445bac02d8d058d5e1fe689654ba5a6556a1dfd8ce6ec55a0ed79866cfa6
|
||||
attrs==19.1.0 --hash=sha256:69c0dbf2ed392de1cb5ec704444b08a5ef81680a61cb899dc08127123af36a79 --hash=sha256:f0b870f674851ecbfbbbd364d6b5cbdff9dcedbc7f3f5e18a6891057f21fe399
|
||||
black==18.9b0 --hash=sha256:817243426042db1d36617910df579a54f1afd659adb96fc5032fcf4b36209739 --hash=sha256:e030a9a28f542debc08acceb273f228ac422798e5215ba2a791a6ddeaaca22a5
|
||||
cachetools==3.1.0 --hash=sha256:219b7dc6024195b6f2bc3d3f884d1fef458745cd323b04165378622dcc823852 --hash=sha256:9efcc9fab3b49ab833475702b55edd5ae07af1af7a4c627678980b45e459c460
|
||||
certifi==2018.11.29 --hash=sha256:47f9c83ef4c0c621eaef743f133f09fa8a74a9b75f037e8624f83bd1b6626cb7 --hash=sha256:993f830721089fef441cdfeb4b2c8c9df86f0c63239f06bd025a76a7daddb033
|
||||
chardet==3.0.4 --hash=sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae --hash=sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691
|
||||
click==7.0 --hash=sha256:2335065e6395b9e67ca716de5f7526736bfa6ceead690adf616d925bdc622b13 --hash=sha256:5b94b49521f6456670fdb30cd82a4eca9412788a93fa6dd6df72c94d5a8ff2d7
|
||||
entrypoints==0.3 --hash=sha256:589f874b313739ad35be6e0cd7efde2a4e9b6fea91edcc34e58ecbb8dbe56d19 --hash=sha256:c70dd71abe5a8c85e55e12c19bd91ccfeec11a6e99044204511f9ed547d48451
|
||||
execnet==1.5.0 --hash=sha256:a7a84d5fa07a089186a329528f127c9d73b9de57f1a1131b82bb5320ee651f6a --hash=sha256:fc155a6b553c66c838d1a22dba1dc9f5f505c43285a878c6f74a79c024750b83
|
||||
flake8==3.7.7 --hash=sha256:859996073f341f2670741b51ec1e67a01da142831aa1fdc6242dbf88dffbe661 --hash=sha256:a796a115208f5c03b18f332f7c11729812c8c3ded6c46319c59b53efd3819da8
|
||||
google-api-core==1.8.0 --hash=sha256:3157625b4f4f033650c6e674d52fd8a3a8c116b26b39705cddf4ed61621c09ff --hash=sha256:c6d834143a2bea4de8d1161b5460fd362457db40c55ea9ccbe672e5602e330af
|
||||
google-auth==1.6.3 --hash=sha256:0f7c6a64927d34c1a474da92cfc59e552a5d3b940d3266606c6a28b72888b9e4 --hash=sha256:20705f6803fd2c4d1cc2dcb0df09d4dfcb9a7d51fd59e94a3a28231fd93119ed
|
||||
google-cloud-core==0.29.1 --hash=sha256:9bee63e0991be9801a4baf0b7841cf54f86c6e7fec922f45ea74cd4032ed4ee4 --hash=sha256:d85b1aaaf3bad9415ad1d8ee5eadce96d7007a82f13ce0a0629a003a11e83f29
|
||||
google-resumable-media==0.3.2 --hash=sha256:2dae98ee716efe799db3578a7b902fbf5592fc5c77d3c0906fc4ef9b1b930861 --hash=sha256:3e38923493ca0d7de0ad91c31acfefc393c78586db89364e91cb4f11990e51ba
|
||||
googleapis-common-protos==1.5.8 --hash=sha256:d56ca712f67fff216d3be9eeeb8360ca59066d0365ba70b137b9e1801813747e
|
||||
idna==2.8 --hash=sha256:c357b3f628cf53ae2c4c05627ecc484553142ca23264e593d327bcde5e9c3407 --hash=sha256:ea8b7f6188e6fa117537c3df7da9fc686d485087abf6ac197f9c46432f7e4a3c
|
||||
mccabe==0.6.1 --hash=sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42 --hash=sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f
|
||||
more-itertools==6.0.0 --hash=sha256:0125e8f60e9e031347105eb1682cef932f5e97d7b9a1a28d9bf00c22a5daef40 --hash=sha256:590044e3942351a1bdb1de960b739ff4ce277960f2425ad4509446dbace8d9d1
|
||||
mypy==0.670 --hash=sha256:308c274eb8482fbf16006f549137ddc0d69e5a589465e37b99c4564414363ca7 --hash=sha256:e80fd6af34614a0e898a57f14296d0dacb584648f0339c2e000ddbf0f4cc2f8d
|
||||
mypy_extensions==0.4.1 --hash=sha256:37e0e956f41369209a3d5f34580150bcacfabaa57b33a15c0b25f4b5725e0812 --hash=sha256:b16cabe759f55e3409a7d231ebd2841378fb0c27a5d1994719e340e4f429ac3e
|
||||
pluggy==0.9.0 --hash=sha256:19ecf9ce9db2fce065a7a0586e07cfb4ac8614fe96edf628a264b1c70116cf8f --hash=sha256:84d306a647cc805219916e62aab89caa97a33a1dd8c342e87a37f91073cd4746
|
||||
protobuf==3.7.0 --hash=sha256:03666634d038e35d90155756914bc3a6316e8bcc0d300f3ee539e586889436b9 --hash=sha256:049d5900e442d4cc0fd2afd146786b429151e2b29adebed28e6376026ab0ee0b --hash=sha256:0eb9e62a48cc818b1719b5035042310c7e4f57b01f5283b32998c68c2f1c6a7c --hash=sha256:255d10c2c9059964f6ebb5c900a830fc8a089731dda94a5cc873f673193d208b --hash=sha256:358cc59e4e02a15d3725f204f2eb5777fc10595e2d9a9c4c8d82292f49af6d41 --hash=sha256:41f1b737d5f97f1e2af23d16fac6c0b8572f9c7ea73054f1258ca57f4f97cb80 --hash=sha256:4da3781eba8d3051d75b296d48154014c872e8a6323b8c207174cae49523b628 --hash=sha256:6a5129576a2cf925cd100e06ead5f9ae4c86db70a854fb91cedb8d680112734a --hash=sha256:80722b0d56dcb7ca8f75f99d8dadd7c7efd0d2265714d68f871ed437c32d82b3 --hash=sha256:88a960e949ec356f7016d84f8262dcff2b842fca5355b4c1be759f5c103b19b3 --hash=sha256:97872686223f47d95e914881cb0ca46e1bc622562600043da9edddcb54f2fe1e --hash=sha256:a1df9d22433ab44b7c7e0bd33817134832ae8a8f3d93d9b9719fc032c5b20e96 --hash=sha256:ad385fbb9754023d17be14dd5aa67efff07f43c5df7f93118aef3c20e635ea19 --hash=sha256:b2d5ee7ba5c03b735c02e6ae75fd4ff8c831133e7ca078f2963408dc7beac428 --hash=sha256:c8c07cd8635d45b28ec53ee695e5ac8b0f9d9a4ae488a8d8ee168fe8fc75ba43 --hash=sha256:d44ebc9838b183e8237e7507885d52e8d08c48fdc953fd4a7ee3e56cb9d20977 --hash=sha256:dff97b0ee9256f0afdfc9eaa430736cdcdc18899d9a666658f161afd137cf93d --hash=sha256:e47d248d614c68e4b029442de212bdd4f6ae02ae36821de319ae90314ea2578c --hash=sha256:e650b521b429fed3d525428b1401a40051097a5a92c30076c91f36b31717e087
|
||||
py==1.8.0 --hash=sha256:64f65755aee5b381cea27766a3a147c3f15b9b6b9ac88676de66ba2ae36793fa --hash=sha256:dc639b046a6e2cff5bbe40194ad65936d6ba360b52b3c3fe1d08a82dd50b5e53
|
||||
pyasn1-modules==0.2.4 --hash=sha256:136020f884635942239b33abdb63b1e0fdfb3c4bc8693f769ff1ab0908133a5b --hash=sha256:1c2ce0717e099620d7d425d2bb55e68f8126d77c8ba93112f0448a212048fe76 --hash=sha256:39da883a45dfc71314c48bba772be63a13946d0dd6abde326df163656a7b13e1 --hash=sha256:4160b0caedf8f1675ca7b94a65900d0219c715ac745cbc0c93557a9864b19748 --hash=sha256:50c5f454c29bc8a7b8bfffc0fd00fed1f9012160b4532807a33c27af91747337 --hash=sha256:52c46ecb2c1e7a03fe54dc8e11d6460ec7ebdcaedba3b0fe4ba2a811521df05f --hash=sha256:6db7a0510e55212b42a1f3e3553559eb214c8c8495e1018b4135d2bfb5a9169a --hash=sha256:79580acf813e3b7d6e69783884e6e83ac94bf4617b36a135b85c599d8a818a7b --hash=sha256:98e80b5ae1ed0d92694927a3e34df016c3b69b7bf439b32fc0a0dc516ec3653d --hash=sha256:9e879981cbf4c868a2267385a56837e0d384eab2d1690e6e0c8bba28d102509e --hash=sha256:a52090e8c5841ebbf08ae455146792d9ef3e8445b21055d3a3b7ed9c712b7c7c --hash=sha256:c00dad1d69d8592bbbc978f5beb3e992d3bf996e6b97eeec1c8608f81221d922 --hash=sha256:c226b5c17683d98498e157d6ac0098b93f9c475da5bc50072f64bf3f3f6b828f
|
||||
pyasn1==0.4.5 --hash=sha256:061442c60842f6d11051d4fdae9bc197b64bd41573a12234a753a0cb80b4f30b --hash=sha256:0ee2449bf4c4e535823acc25624c45a8b454f328d59d3f3eeb82d3567100b9bd --hash=sha256:5f9fb05c33e53b9a6ee3b1ed1d292043f83df465852bec876e93b47fd2df7eed --hash=sha256:65201d28e081f690a32401e6253cca4449ccacc8f3988e811fae66bd822910ee --hash=sha256:79b336b073a52fa3c3d8728e78fa56b7d03138ef59f44084de5f39650265b5ff --hash=sha256:8ec20f61483764de281e0b4aba7d12716189700debcfa9e7935780850bf527f3 --hash=sha256:9458d0273f95d035de4c0d5e0643f25daba330582cc71bb554fe6969c015042a --hash=sha256:98d97a1833a29ca61cd04a60414def8f02f406d732f9f0bcb49f769faff1b699 --hash=sha256:b00d7bfb6603517e189d1ad76967c7e805139f63e43096e5f871d1277f50aea5 --hash=sha256:b06c0cfd708b806ea025426aace45551f91ea7f557e0c2d4fbd9a4b346873ce0 --hash=sha256:d14d05984581770333731690f5453efd4b82e1e5d824a1d7976b868a2e5c38e8 --hash=sha256:da2420fe13a9452d8ae97a0e478adde1dee153b11ba832a95b223a2ba01c10f7 --hash=sha256:da6b43a8c9ae93bc80e2739efb38cc776ba74a886e3e9318d65fe81a8b8a2c6e
|
||||
pycodestyle==2.5.0 --hash=sha256:95a2219d12372f05704562a14ec30bc76b05a5b297b21a5dfe3f6fac3491ae56 --hash=sha256:e40a936c9a450ad81df37f549d676d127b1b66000a6c500caa2b085bc0ca976c
|
||||
pydocstyle==3.0.0 --hash=sha256:2258f9b0df68b97bf3a6c29003edc5238ff8879f1efb6f1999988d934e432bd8 --hash=sha256:5741c85e408f9e0ddf873611085e819b809fca90b619f5fd7f34bd4959da3dd4 --hash=sha256:ed79d4ec5e92655eccc21eb0c6cf512e69512b4a97d215ace46d17e4990f2039
|
||||
pyflakes==2.1.1 --hash=sha256:17dbeb2e3f4d772725c777fabc446d5634d1038f234e77343108ce445ea69ce0 --hash=sha256:d976835886f8c5b31d47970ed689944a0262b5f3afa00a5a7b4dc81e5449f8a2
|
||||
pytest-forked==1.0.2 --hash=sha256:5fe33fbd07d7b1302c95310803a5e5726a4ff7f19d5a542b7ce57c76fed8135f --hash=sha256:d352aaced2ebd54d42a65825722cb433004b4446ab5d2044851d9cc7a00c9e38
|
||||
pytz==2018.9 --hash=sha256:32b0891edff07e28efe91284ed9c31e123d84bea3fd98e1f72be2508f43ef8d9 --hash=sha256:d5f05e487007e29e03409f9398d074e158d920d36eb82eaf66fb1136b0c5374c
|
||||
requests==2.21.0 --hash=sha256:502a824f31acdacb3a35b6690b5fbf0bc41d63a24a45c4004352b0242707598e --hash=sha256:7bf2a778576d825600030a110f3c0e3e8edc51dfaafe1c146e39a2027784957b
|
||||
rsa==4.0 --hash=sha256:14ba45700ff1ec9eeb206a2ce76b32814958a98e372006c8fb76ba820211be66 --hash=sha256:1a836406405730121ae9823e19c6e806c62bbad73f890574fff50efa4122c487
|
||||
six==1.12.0 --hash=sha256:3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c --hash=sha256:d16a0141ec1a18405cd4ce8b4613101da75da0e9a7aec5bdd4fa804d0e0eba73
|
||||
snowballstemmer==1.2.1 --hash=sha256:919f26a68b2c17a7634da993d91339e288964f93c274f1343e3bbbe2096e1128 --hash=sha256:9f3bcd3c401c3e862ec0ebe6d2c069ebc012ce142cce209c098ccb5b09136e89
|
||||
toml==0.10.0 --hash=sha256:229f81c57791a41d65e399fc06bf0848bab550a9dfd5ed66df18ce5f05e73d5c --hash=sha256:235682dd292d5899d361a811df37e04a8828a5b1da3115886b73cf81ebc9100e --hash=sha256:f1db651f9657708513243e61e6cc67d101a39bad662eaa9b5546f789338e07a3
|
||||
typed-ast==1.3.1 --hash=sha256:035a54ede6ce1380599b2ce57844c6554666522e376bd111eb940fbc7c3dad23 --hash=sha256:037c35f2741ce3a9ac0d55abfcd119133cbd821fffa4461397718287092d9d15 --hash=sha256:049feae7e9f180b64efacbdc36b3af64a00393a47be22fa9cb6794e68d4e73d3 --hash=sha256:19228f7940beafc1ba21a6e8e070e0b0bfd1457902a3a81709762b8b9039b88d --hash=sha256:2ea681e91e3550a30c2265d2916f40a5f5d89b59469a20f3bad7d07adee0f7a6 --hash=sha256:3a6b0a78af298d82323660df5497bcea0f0a4a25a0b003afd0ce5af049bd1f60 --hash=sha256:5385da8f3b801014504df0852bf83524599df890387a3c2b17b7caa3d78b1773 --hash=sha256:606d8afa07eef77280c2bf84335e24390055b478392e1975f96286d99d0cb424 --hash=sha256:69245b5b23bbf7fb242c9f8f08493e9ecd7711f063259aefffaeb90595d62287 --hash=sha256:6f6d839ab09830d59b7fa8fb6917023d8cb5498ee1f1dbd82d37db78eb76bc99 --hash=sha256:730888475f5ac0e37c1de4bd05eeb799fdb742697867f524dc8a4cd74bcecc23 --hash=sha256:9819b5162ffc121b9e334923c685b0d0826154e41dfe70b2ede2ce29034c71d8 --hash=sha256:9e60ef9426efab601dd9aa120e4ff560f4461cf8442e9c0a2b92548d52800699 --hash=sha256:af5fbdde0690c7da68e841d7fc2632345d570768ea7406a9434446d7b33b0ee1 --hash=sha256:b64efdbdf3bbb1377562c179f167f3bf301251411eb5ac77dec6b7d32bcda463 --hash=sha256:bac5f444c118aeb456fac1b0b5d14c6a71ea2a42069b09c176f75e9bd4c186f6 --hash=sha256:bda9068aafb73859491e13b99b682bd299c1b5fd50644d697533775828a28ee0 --hash=sha256:d659517ca116e6750101a1326107d3479028c5191f0ecee3c7203c50f5b915b0 --hash=sha256:eddd3fb1f3e0f82e5915a899285a39ee34ce18fd25d89582bc89fc9fb16cd2c6
|
||||
urllib3==1.24.1 --hash=sha256:61bf29cada3fc2fbefad4fdf059ea4bd1b4a86d2b6d15e1c7c0b582b9752fe39 --hash=sha256:de9529817c93f27c8ccbfead6985011db27bd0ddfcdb2d86f3f663385c6a9c22
|
|
@ -0,0 +1,7 @@
|
|||
[pytest]
|
||||
filterwarnings =
|
||||
# upstream lib imports ABC improperly for backward compatibility
|
||||
ignore::DeprecationWarning:google.protobuf.descriptor:47
|
||||
ignore::DeprecationWarning:google.protobuf.internal.well_known_types:788
|
||||
ignore::DeprecationWarning:yaml.constructor:126
|
||||
norecursedirs = venv
|
|
@ -0,0 +1,9 @@
|
|||
-c constraints.txt
|
||||
google-cloud-bigquery==1.9.0 --hash=sha256:169ffdb1b677f69f1f9d032bd38f724aed73e0565153ac17199472c083a3852f --hash=sha256:46bef58521e9c245064b3deaf26581cf93127cd1dd019b3ecbc380831a1a6d28
|
||||
pytest-black==0.3.4 --hash=sha256:5f7dd1b04b4adbb4da201dc6333c066adf8cd552eb8744834d9d52f1d5f71f2f
|
||||
pytest-docstyle==1.5.0 --hash=sha256:dcc54084b8e8282a83e50c6220c85d1c7d05e3871f74f0e911499b4f3adea756
|
||||
pytest-flake8==1.0.4 --hash=sha256:4d225c13e787471502ff94409dcf6f7927049b2ec251c63b764a4b17447b60c0 --hash=sha256:d7e2b6b274a255b7ae35e9224c85294b471a83b76ecb6bd53c337ae977a499af
|
||||
pytest-mypy==0.3.2 --hash=sha256:8f6436eed8118afd6c10a82b3b60fb537336736b0fd7a29262a656ac42ce01ac --hash=sha256:acc653210e7d8d5c72845a5248f00fd33f4f3379ca13fe56cfc7b749b5655c3e
|
||||
pytest-xdist==1.26.1 --hash=sha256:4a201bb3ee60f5dd6bb40c5209d4e491cecc4d5bafd656cfb10f86178786e568 --hash=sha256:d03d1ff1b008458ed04fa73e642d840ac69b4107c168e06b71037c62d7813dd4
|
||||
pytest==4.3.0 --hash=sha256:067a1d4bf827ffdd56ad21bd46674703fce77c5957f6c1eef731f6146bfcef1c --hash=sha256:9687049d53695ad45cf5fdc7bbd51f0c49f1ea3ecfc4b7f3fde7501b541f17f4
|
||||
PyYAML==3.13 --hash=sha256:3d7da3009c0f3e783b2c873687652d83b1bbfd5c88e9813fb7e5b03c0dd3108b --hash=sha256:3ef3092145e9b70e3ddd2c7ad59bdd0252a94dfe3949721633e41344de00a6bf --hash=sha256:40c71b8e076d0550b2e6380bada1f1cd1017b882f7e16f09a65be98e017f211a --hash=sha256:558dd60b890ba8fd982e05941927a3911dc409a63dcb8b634feaa0cda69330d3 --hash=sha256:a7c28b45d9f99102fa092bb213aa12e0aaf9a6a1f5e395d36166639c1f96c3a1 --hash=sha256:aa7dd4a6a427aed7df6fb7f08a580d68d9b118d90310374716ae90b710280af1 --hash=sha256:bc558586e6045763782014934bfaf39d48b8ae85a2713117d16c39864085c613 --hash=sha256:d46d7982b62e0729ad0175a9bc7e10a566fc07b224d2c79fafb5e032727eaa04 --hash=sha256:d5eef459e30b09f5a098b9cea68bebfeb268697f78d647bd255a085371ac7f3f --hash=sha256:e01d3203230e1786cd91ccfdc8f8454c8069c91bee3962ad93b87a4b2860f537 --hash=sha256:e170a9e6fcfd19021dd29845af83bb79236068bf5fd4df3327c1be18182b2531
|
|
@ -0,0 +1,65 @@
|
|||
How to Run Tests
|
||||
===
|
||||
|
||||
This repository uses `pytest`:
|
||||
|
||||
```
|
||||
# create a venv
|
||||
python3.7 -m venv venv/
|
||||
|
||||
# install requirements
|
||||
venv/bin/pip install -r requirements.txt
|
||||
|
||||
# run pytest with all linters and 4 workers in parallel
|
||||
venv/bin/pytest --black --docstyle --flake8 --mypy-ignore-missing-imports -n 4
|
||||
```
|
||||
|
||||
How to Configure a Generated Test
|
||||
===
|
||||
|
||||
1. Make a directory for test resources named `tests/{query_name}/{test_name}/`,
|
||||
e.g. `tests/clients_last_seen_v1/test_single_day`
|
||||
- `query_name` must match a query file named `sql/{query_name}.sql`, e.g.
|
||||
`sql/clients_last_seen_v1.sql`
|
||||
- `test_name` should start with `test_`, e.g. `test_single_day`
|
||||
1. Add `.ndjson` files for input tables, e.g. `clients_daily_v6.ndjson`
|
||||
- Include the dataset prefix if it's set in the tested query,
|
||||
e.g. `analysis.clients_last_seen_v1.ndjson`
|
||||
- This will result in the dataset prefix being removed from the query,
|
||||
e.g. `query.replace("analysis.clients_last_seen_v1",
|
||||
"clients_last_seen_v1")`
|
||||
1. Add `expect.ndjson` to validate the result
|
||||
- `DATE` and `DATETIME` type columns in the result are coerced to strings
|
||||
using `.isoformat()`
|
||||
- Columns named `generated_time` are removed from the result before
|
||||
comparing to `expect` because they should not be static
|
||||
1. Optionally add `.schema.json` files for input table schemas, e.g.
|
||||
`clients_daily_v6.schema.json`
|
||||
1. Optionally add `query_params.yaml` to define query parameters
|
||||
- `query_params` must be a list
|
||||
|
||||
Additional Guidelines and Options
|
||||
---
|
||||
|
||||
- If the destination table is also an input table then `generated_time` should
|
||||
be a required `DATETIME` field to ensure minimal validation
|
||||
- Input table files
|
||||
- All of the formats supported by `bq load` are supported
|
||||
- Formats other than `.ndjson` and `.csv` should not be used because they
|
||||
are not human readable
|
||||
- `expect.ndjson`
|
||||
- File extensions `yaml`, `json` and `ndjson` are supported
|
||||
- Formats other than `ndjson` should not be used because they are not
|
||||
supported by `bq load`
|
||||
- Schema files
|
||||
- Setting the description of a top level field to `time_partitioning_field`
|
||||
will cause the table to use it for time partitioning
|
||||
- File extensions `yaml`, `json` and `ndjson` are supported
|
||||
- Formats other than `.json` should not be used because they are not
|
||||
supported by `bq load`
|
||||
- Query parameters
|
||||
- Scalar query params should be defined as a dict with keys `name`, `type` or
|
||||
`type_`, and `value`
|
||||
- `query_parameters.yaml` may be used instead of `query_params.yaml`, but
|
||||
they are mutually exclusive
|
||||
- File extensions `yaml`, `json` and `ndjson` are supported
|
|
@ -0,0 +1,4 @@
|
|||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
# file, you can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
"""Tests."""
|
|
@ -0,0 +1,2 @@
|
|||
{"submission_date":"2019-01-01","generated_time":"2019-01-02T01:00:00","last_seen_date":"2019-01-01","active_hours_sum":0.0,"attribution":{"source":"prev"},"client_id":"a"}
|
||||
{"submission_date":"2019-01-01","generated_time":"2019-01-02T01:00:00","last_seen_date":"2019-01-01","active_hours_sum":0.0,"attribution":{"source":"prev"},"client_id":"b"}
|
|
@ -0,0 +1,39 @@
|
|||
[
|
||||
{
|
||||
"name": "submission_date",
|
||||
"type": "DATE",
|
||||
"mode": "REQUIRED"
|
||||
},
|
||||
{
|
||||
"name": "generated_time",
|
||||
"type": "DATETIME",
|
||||
"mode": "REQUIRED"
|
||||
},
|
||||
{
|
||||
"name": "last_seen_date",
|
||||
"type": "DATE",
|
||||
"mode": "REQUIRED"
|
||||
},
|
||||
{
|
||||
"name": "active_hours_sum",
|
||||
"type": "FLOAT",
|
||||
"mode": "REQUIRED"
|
||||
},
|
||||
{
|
||||
"name": "attribution",
|
||||
"type": "RECORD",
|
||||
"mode": "REQUIRED",
|
||||
"fields": [
|
||||
{
|
||||
"name": "source",
|
||||
"type": "STRING",
|
||||
"mode": "REQUIRED"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "client_id",
|
||||
"type": "STRING",
|
||||
"mode": "REQUIRED"
|
||||
}
|
||||
]
|
|
@ -0,0 +1,2 @@
|
|||
{"submission_date_s3":"2019-01-02","active_hours_sum":1.0,"attribution":{"source":"test"},"client_id":"b"}
|
||||
{"submission_date_s3":"2019-01-02","active_hours_sum":1.0,"attribution":{"source":"test"},"client_id":"c"}
|
|
@ -0,0 +1,30 @@
|
|||
[
|
||||
{
|
||||
"name": "submission_date_s3",
|
||||
"type": "DATE",
|
||||
"mode": "REQUIRED",
|
||||
"description": "time_partitioning_field"
|
||||
},
|
||||
{
|
||||
"name": "active_hours_sum",
|
||||
"type": "FLOAT",
|
||||
"mode": "REQUIRED"
|
||||
},
|
||||
{
|
||||
"name": "attribution",
|
||||
"type": "RECORD",
|
||||
"mode": "REQUIRED",
|
||||
"fields": [
|
||||
{
|
||||
"name": "source",
|
||||
"type": "STRING",
|
||||
"mode": "REQUIRED"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "client_id",
|
||||
"type": "STRING",
|
||||
"mode": "REQUIRED"
|
||||
}
|
||||
]
|
|
@ -0,0 +1,3 @@
|
|||
{"submission_date":"2019-01-02","active_hours_sum":0.0,"attribution":{"source":"prev"},"client_id":"a","last_seen_date":"2019-01-01"}
|
||||
{"submission_date":"2019-01-02","active_hours_sum":1.0,"attribution":{"source":"test"},"client_id":"b","last_seen_date":"2019-01-02"}
|
||||
{"submission_date":"2019-01-02","active_hours_sum":1.0,"attribution":{"source":"test"},"client_id":"c","last_seen_date":"2019-01-02"}
|
|
@ -0,0 +1,3 @@
|
|||
- name: submission_date
|
||||
type: DATE
|
||||
value: 2019-01-02
|
|
@ -0,0 +1,81 @@
|
|||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
# file, you can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
"""Automatically generated tests."""
|
||||
|
||||
from google.api_core.exceptions import NotFound
|
||||
from google.cloud import bigquery
|
||||
from .util import coerce_result, generate_tests
|
||||
|
||||
import json
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def bq():
|
||||
return bigquery.Client()
|
||||
|
||||
|
||||
@pytest.fixture(params=list(generate_tests()))
|
||||
def generated_test(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dataset(bq, generated_test):
|
||||
# create dataset
|
||||
try:
|
||||
bq.get_dataset(generated_test.dataset_id)
|
||||
except NotFound:
|
||||
bq.create_dataset(generated_test.dataset_id)
|
||||
# wait for test
|
||||
yield bq.dataset(generated_test.dataset_id)
|
||||
# clean up
|
||||
bq.delete_dataset(generated_test.dataset_id, delete_contents=True)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def tables(bq, dataset, generated_test):
|
||||
# load tables into dataset
|
||||
for table in generated_test.tables.values():
|
||||
destination = f"{dataset.dataset_id}.{table.name}"
|
||||
assert table.schema is not None
|
||||
job_config = bigquery.LoadJobConfig(
|
||||
default_dataset=dataset,
|
||||
source_format=table.source_format,
|
||||
write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE,
|
||||
schema=table.schema,
|
||||
)
|
||||
if job_config.schema is None:
|
||||
# autodetect schema if not provided
|
||||
job_config.autodetect = True
|
||||
else:
|
||||
# look for time_partitioning_field in provided schema
|
||||
for field in job_config.schema:
|
||||
if field.description == "time_partitioning_field":
|
||||
job_config.time_partitioning = bigquery.TimePartitioning(
|
||||
field=field.name
|
||||
)
|
||||
break # stop because there can only be one time partitioning field
|
||||
with open(table.source_path, "rb") as file_obj:
|
||||
job = bq.load_table_from_file(file_obj, destination, job_config=job_config)
|
||||
job.result()
|
||||
# clean up handled by default_dataset fixture
|
||||
|
||||
|
||||
def test_generated(bq, dataset, generated_test):
|
||||
# configure job
|
||||
job_config = bigquery.QueryJobConfig(
|
||||
default_dataset=dataset,
|
||||
destination=bigquery.TableReference(dataset, generated_test.query_name),
|
||||
query_parameters=generated_test.query_params,
|
||||
use_legacy_sql=False,
|
||||
write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE,
|
||||
)
|
||||
|
||||
# run query
|
||||
job = bq.query(generated_test.modified_query, job_config=job_config)
|
||||
result = list(coerce_result(*job.result()))
|
||||
result.sort(key=lambda row: json.dumps(row))
|
||||
|
||||
assert result == generated_test.expect
|
|
@ -0,0 +1,219 @@
|
|||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
# file, you can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
"""Utilities."""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from datetime import date, datetime
|
||||
from google.cloud import bigquery
|
||||
from typing import Any, Callable, Dict, Generator, List, Optional, Union
|
||||
|
||||
import json
|
||||
import os.path
|
||||
import yaml
|
||||
|
||||
QueryParameter = Union[
|
||||
bigquery.ArrayQueryParameter,
|
||||
bigquery.ScalarQueryParameter,
|
||||
bigquery.StructQueryParameter,
|
||||
]
|
||||
|
||||
table_extensions = {
|
||||
"ndjson": bigquery.SourceFormat.NEWLINE_DELIMITED_JSON,
|
||||
"csv": bigquery.SourceFormat.CSV,
|
||||
"backup_info": bigquery.SourceFormat.DATASTORE_BACKUP,
|
||||
"export_metadata": bigquery.SourceFormat.DATASTORE_BACKUP,
|
||||
"avro": bigquery.SourceFormat.AVRO,
|
||||
"parquet": bigquery.SourceFormat.PARQUET,
|
||||
"orc": bigquery.SourceFormat.ORC,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class Table:
|
||||
"""Define info needed to create a table for a generated test."""
|
||||
|
||||
name: str
|
||||
source_format: str
|
||||
source_path: str
|
||||
# post_init fields
|
||||
schema: Optional[List[bigquery.SchemaField]] = None
|
||||
|
||||
def __post_init__(self):
|
||||
"""Fill in calculated fields if not provided."""
|
||||
if self.schema is None:
|
||||
resource_dir, resource = os.path.split(self.source_path)
|
||||
full_name, _ = resource.rsplit(".", 1)
|
||||
try:
|
||||
self.schema = [
|
||||
bigquery.SchemaField.from_api_repr(field)
|
||||
for field in load(resource_dir, f"{full_name}.schema")
|
||||
]
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class GeneratedTest:
|
||||
"""Define the info needed to run a generated test."""
|
||||
|
||||
expect: List[Dict[str, Any]]
|
||||
name: str
|
||||
query: str
|
||||
query_name: str
|
||||
query_params: List[Any]
|
||||
replace: Dict[str, str]
|
||||
tables: Dict[str, Table]
|
||||
# post_init fields
|
||||
dataset_id: Optional[str] = None
|
||||
modified_query: Optional[str] = None
|
||||
|
||||
def __post_init__(self):
|
||||
"""Fill in calculated fields if not provided."""
|
||||
if self.dataset_id is None:
|
||||
self.dataset_id = f"{self.query_name}_{self.name}"
|
||||
if self.modified_query is None:
|
||||
self.modified_query = self.query
|
||||
for old, new in self.replace.items():
|
||||
self.modified_query = self.modified_query.replace(old, new)
|
||||
|
||||
|
||||
def read(*paths: str, decoder: Optional[Callable] = None, **kwargs):
|
||||
"""Read a file and apply decoder if provided."""
|
||||
with open(os.path.join(*paths), **kwargs) as f:
|
||||
return decoder(f) if decoder else f.read()
|
||||
|
||||
|
||||
def ndjson_load(file_obj) -> List[Any]:
|
||||
"""Decode newline delimited json from file_obj."""
|
||||
return [json.loads(line) for line in file_obj]
|
||||
|
||||
|
||||
def load(resource_dir: str, *basenames: str, **search: Optional[Callable]) -> Any:
|
||||
"""Read the first matching file found in resource_dir.
|
||||
|
||||
Calls read on paths under resource_dir with a name sans extension in
|
||||
basenames and an extension and decoder in search.
|
||||
|
||||
:param resource_dir: directory to check for files
|
||||
:param basenames: file names to look for, without an extension
|
||||
:param search: mapping of file extension to decoder
|
||||
:return: first response from read() that doesn't raise FileNotFoundError
|
||||
:raises FileNotFoundError: when all matching files raise FileNotFoundError
|
||||
"""
|
||||
search = search or {"yaml": yaml.load, "json": json.load, "ndjson": ndjson_load}
|
||||
not_found: List[str] = []
|
||||
for basename in basenames:
|
||||
for ext, decoder in search.items():
|
||||
try:
|
||||
return read(resource_dir, f"{basename}.{ext}", decoder=decoder)
|
||||
except FileNotFoundError:
|
||||
not_found.append(f"{basename}.{ext}")
|
||||
raise FileNotFoundError(f"[Errno 2] No such files in '{resource_dir}': {not_found}")
|
||||
|
||||
|
||||
def get_query_params(resource_dir: str) -> Generator[QueryParameter, None, None]:
|
||||
"""Attempt to load the first query params found in resource_dir."""
|
||||
try:
|
||||
params = load(resource_dir, "query_params")
|
||||
except FileNotFoundError:
|
||||
params = []
|
||||
for param in params:
|
||||
if {"name", "type", "type_", "value"}.issuperset(param.keys()):
|
||||
# this is a scalar query param
|
||||
param["type_"] = param.pop("type", param.pop("type_", "STRING"))
|
||||
yield bigquery.ScalarQueryParameter(**param)
|
||||
else:
|
||||
# attempt to coerce to some type of query param
|
||||
try:
|
||||
yield bigquery.StructQueryParameter.from_api_repr(param)
|
||||
except KeyError:
|
||||
try:
|
||||
yield bigquery.ArrayQueryParameter.from_api_repr(param)
|
||||
except KeyError:
|
||||
# this is a different format for scalar param than above
|
||||
yield bigquery.ScalarQueryParameter.from_api_repr(param)
|
||||
|
||||
|
||||
def generate_tests() -> Generator[GeneratedTest, None, None]:
|
||||
"""Attempt to generate tests."""
|
||||
tests_dir = os.path.dirname(__file__)
|
||||
sql_dir = os.path.join(os.path.dirname(tests_dir), "sql")
|
||||
|
||||
# iterate over directories in tests_dir
|
||||
for query_name in next(os.walk(tests_dir))[1]:
|
||||
query_dir = os.path.join(tests_dir, query_name)
|
||||
|
||||
# read query or skip
|
||||
try:
|
||||
query = read(sql_dir, f"{query_name}.sql")
|
||||
except FileNotFoundError:
|
||||
continue
|
||||
|
||||
# generate a test for each directory in query_dir
|
||||
for test_name in next(os.walk(query_dir))[1]:
|
||||
resource_dir = os.path.join(query_dir, test_name)
|
||||
query_params = list(get_query_params(resource_dir))
|
||||
tables: Dict[str, Table] = {}
|
||||
replace: Dict[str, str] = {}
|
||||
|
||||
# load expect or skip
|
||||
try:
|
||||
expect = load(resource_dir, "expect")
|
||||
except FileNotFoundError:
|
||||
continue
|
||||
|
||||
# generate tables for files with a supported table extension
|
||||
for resource in next(os.walk(resource_dir))[2]:
|
||||
if "." not in resource:
|
||||
continue # tables require an extension
|
||||
table_name, extension = resource.rsplit(".", 1)
|
||||
if table_name.endswith(".schema") or table_name in (
|
||||
"expect",
|
||||
"query_params",
|
||||
):
|
||||
continue # not a table
|
||||
print(table_name)
|
||||
if extension in table_extensions:
|
||||
source_format = table_extensions[extension]
|
||||
source_path = os.path.join(resource_dir, resource)
|
||||
if "." in table_name:
|
||||
# define replace to remove dataset from table_name in sql
|
||||
replace[table_name] = table_name.rsplit(".", 1)[1]
|
||||
# remove dataset from table_name
|
||||
table_name = replace[table_name]
|
||||
tables[table_name] = Table(table_name, source_format, source_path)
|
||||
|
||||
# yield a test
|
||||
yield GeneratedTest(
|
||||
expect=expect,
|
||||
name=test_name,
|
||||
query=query,
|
||||
query_name=query_name,
|
||||
query_params=query_params,
|
||||
replace=replace,
|
||||
tables=tables,
|
||||
)
|
||||
|
||||
|
||||
def coerce_result(*elements: Any) -> Generator[Any, None, None]:
|
||||
"""Recursively coerce elements to types available in json.
|
||||
|
||||
Coerce date and datetime to string using isoformat.
|
||||
Coerce bigquery.Row to dict using comprehensions.
|
||||
Omit dict keys named "generated_time".
|
||||
"""
|
||||
for element in elements:
|
||||
if isinstance(element, (dict, bigquery.Row)):
|
||||
yield {
|
||||
key: list(coerce_result(*value))
|
||||
if isinstance(value, list)
|
||||
else next(coerce_result(value))
|
||||
for key, value in element.items()
|
||||
# drop generated_time column
|
||||
if key not in ("generated_time",)
|
||||
}
|
||||
elif isinstance(element, (date, datetime)):
|
||||
yield element.isoformat()
|
||||
else:
|
||||
yield element
|
Загрузка…
Ссылка в новой задаче