From d1b0af796e310e3f0dd81e489c1ff4b7563fefc3 Mon Sep 17 00:00:00 2001 From: Peter Williams Date: Tue, 8 Dec 2020 21:08:22 -0500 Subject: [PATCH 1/7] docs: some minor improvements --- docs/cli/pipeline-approve.rst | 15 +++++++++++---- docs/pipeline.rst | 8 ++++---- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/docs/cli/pipeline-approve.rst b/docs/cli/pipeline-approve.rst index d822921..922b17d 100644 --- a/docs/cli/pipeline-approve.rst +++ b/docs/cli/pipeline-approve.rst @@ -26,12 +26,18 @@ Example ======= Before approving an image, it should be validated. First, check the astrometry -with the help of ``wwtdatatool`` command: +with the help of ``wwtdatatool`` command. To check a group of images all at once, +it can be convenient to merge the individual image files into a temporary index: .. code-block:: shell - wwtdatatool serve processed/noao0201b/ - [open up http://localhost:8080/index.wtml in the webclient, review] + wwtdatatool wtml merge processed/*/index_rel.wtml processed/index_rel.wtml + wwtdatatool preview processed/index_rel.wtml + +(Change the forward slashes to backslashes if you’re using Windows.) The first +command merges the individual image WTMLs into a new file, +``processed/index_rel.wtml``. The second command opens up this combined file in +the WWT webclient, running an internal webserver to make the data available. Next, get a metadata report and check for any issues: @@ -39,7 +45,7 @@ Next, get a metadata report and check for any issues: wwtdatatool wtml report processed/noao0201b/index_rel.wtml -If everything is OK, the image may be approved: +If everything is OK, you can mark the image as approved: .. code-block:: shell @@ -47,6 +53,7 @@ If everything is OK, the image may be approved: After approval of a batch of images, the next step is to :ref:`cli-pipeline-publish`. + Notes ===== diff --git a/docs/pipeline.rst b/docs/pipeline.rst index 70cce96..251cffd 100644 --- a/docs/pipeline.rst +++ b/docs/pipeline.rst @@ -39,13 +39,13 @@ command-line program. Configuration ============= -The root of the *destionation* data repository should contain a configuration +The root of the *destination* data repository should contain a configuration file named ``toasty-pipeline-config.yaml``. Once a pipeline workflow is set up, you shouldn’t need to worry about this file. But to get a new pipeline going, you need to create it and then place it in your data destination. -As implied, this file contains structured data in the `YAML -`_ format. An example is: +This file contains structured data in the `YAML `_ format. An +example is: .. code-block:: YAML @@ -72,7 +72,7 @@ Djangoplicity Data Source Currently, the only functional ``source_type`` is ``djangoplicity``, which downloads and parses an imagery feed from a website powered by the the `Djangoplicity `_ gallery -system. An example is the `ESO Hubble gallery +system. An example is the `ESA Hubble gallery `_. When using the ``djangoplicity`` data source, the ``toasty-pipeline-config.yaml`` From c85c4ddb7bbe4a123041fad919fe58a8dc36d09c Mon Sep 17 00:00:00 2001 From: Peter Williams Date: Tue, 8 Dec 2020 21:09:23 -0500 Subject: [PATCH 2/7] We currently require a much newer version of wwt_data_formats --- README.md | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index d84ab7c..48a7d4e 100644 --- a/README.md +++ b/README.md @@ -78,7 +78,7 @@ and [PyPI](https://pypi.org/project/toasty/#history). - [pytest] to run the test suite - [PyYAML] - [tqdm] -- [wwt_data_formats] +- [wwt_data_formats] >= 0.7 [astropy]: https://www.astropy.org/ [azure-storage-blob]: https://github.com/Azure/azure-sdk-for-python/tree/master/sdk/storage/azure-storage-blob diff --git a/setup.py b/setup.py index 82ee87b..0a1478e 100644 --- a/setup.py +++ b/setup.py @@ -78,7 +78,7 @@ setup_args = dict( 'pillow>=7.0', 'PyYAML>=5.0', 'tqdm>=4.0', - 'wwt_data_formats>=0.2.0', + 'wwt_data_formats>=0.7.0', ], extras_require = { From 6390e55d7d4656a25ba2a5dac9c46ae5f7ee6413 Mon Sep 17 00:00:00 2001 From: Peter Williams Date: Tue, 8 Dec 2020 21:19:18 -0500 Subject: [PATCH 3/7] pipeline fetch: try to avoid crashing on NotActionable on Windows We were trying to move the "candidate" directory with a file open inside it, which isn't allowed on Windows. --- toasty/pipeline/cli.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/toasty/pipeline/cli.py b/toasty/pipeline/cli.py index b09699a..2f08bdb 100644 --- a/toasty/pipeline/cli.py +++ b/toasty/pipeline/cli.py @@ -103,24 +103,26 @@ def fetch_impl(settings): src = mgr.get_image_source() for cid in settings.cand_ids: + # Funky structure here is to try to ensure that cdata is closed in case + # a NotActionable happens, so that we can move the directory on Windows. try: - cdata = open(os.path.join(cand_dir, cid), 'rb') - except FileNotFoundError: - die(f'no such candidate ID {cid!r}') + try: + cdata = open(os.path.join(cand_dir, cid), 'rb') + except FileNotFoundError: + die(f'no such candidate ID {cid!r}') - print(f'fetching {cid} ... ', end='') - sys.stdout.flush() - - try: - cachedir = mgr._ensure_dir('cache_todo', cid) - src.fetch_candidate(cid, cdata, cachedir) - print('done') + try: + print(f'fetching {cid} ... ', end='') + sys.stdout.flush() + cachedir = mgr._ensure_dir('cache_todo', cid) + src.fetch_candidate(cid, cdata, cachedir) + print('done') + finally: + cdata.close() except NotActionableError: print('not usable') os.rename(os.path.join(cand_dir, cid), os.path.join(rej_dir, cid)) os.rmdir(cachedir) - finally: - cdata.close() # The "init" subcommand From 9e806367e98ae86fa26d0e61fb561fb53767b8ee Mon Sep 17 00:00:00 2001 From: Peter Williams Date: Tue, 8 Dec 2020 21:50:35 -0500 Subject: [PATCH 4/7] pipeline: add support for globby arguments in fetch and approve --- docs/cli/pipeline-approve.rst | 17 ++++++++++++++- docs/cli/pipeline-fetch.rst | 21 +++++++++++++++++- toasty/pipeline/cli.py | 40 ++++++++++++++++++++++++++++++----- 3 files changed, 71 insertions(+), 7 deletions(-) diff --git a/docs/cli/pipeline-approve.rst b/docs/cli/pipeline-approve.rst index 922b17d..45f3467 100644 --- a/docs/cli/pipeline-approve.rst +++ b/docs/cli/pipeline-approve.rst @@ -16,7 +16,10 @@ Usage toasty pipeline approve [--workdir=WORKDIR] {IMAGE-IDs...} The ``IMAGE-IDs`` argument specifies one or more images by their unique -identifiers. +identifiers. You can specify exact ID’s, or `glob patterns`_ as processed by the +Python ``fnmatch`` module. See examples below. + +.. _glob patterns: https://docs.python.org/3/library/fnmatch.html#module-fnmatch The ``WORKDIR`` argument optionally specifies the location of the pipeline workspace directory. The default is the current directory. @@ -51,6 +54,18 @@ If everything is OK, you can mark the image as approved: toasty pipeline approve noao0201b +You can use `glob patterns`_ to match image names. For instance, + +.. code-block:: shell + + toasty pipeline approve "vla*20" "?vlba" + +will match every processed image whose identifier begins with ``vla`` and ends +with ``20``, as well as those whose names are exactly four letters long and end +with ``vlba``. You generally must make sure to encase glob arguments in +quotation marks, as shown above, to prevent your shell from attempting to +process them before Toasty gets a chance to. + After approval of a batch of images, the next step is to :ref:`cli-pipeline-publish`. diff --git a/docs/cli/pipeline-fetch.rst b/docs/cli/pipeline-fetch.rst index 718c22c..7d8af1a 100644 --- a/docs/cli/pipeline-fetch.rst +++ b/docs/cli/pipeline-fetch.rst @@ -16,7 +16,10 @@ Usage toasty pipeline fetch [--workdir=WORKDIR] {IMAGE-IDs...} The ``IMAGE-IDs`` argument specifies one or more images by their unique -identifiers. +identifiers. You can specify exact ID’s, or `glob patterns`_ as processed by the +Python ``fnmatch`` module. See examples below. + +.. _glob patterns: https://docs.python.org/3/library/fnmatch.html#module-fnmatch The ``WORKDIR`` argument optionally specifies the location of the pipeline workspace directory. The default is the current directory. @@ -34,6 +37,22 @@ Fetch two images: After fetching, the next step is to :ref:`cli-pipeline-process-todos`. +Example +======= + +You can use `glob patterns`_ to match candidate names. For instance, + +.. code-block:: shell + + toasty pipeline fetch "rubin-*" "soar?" + +will match every candidate whose name begins with ``rubin-``, as well as those +whose names are exactly five letters long and start with ``soar``. You generally +must make sure to encase glob arguments in quotation marks, as shown above, to +prevent your shell from attempting to process them before Toasty gets a chance +to. + + Notes ===== diff --git a/toasty/pipeline/cli.py b/toasty/pipeline/cli.py index 2f08bdb..ce6a497 100644 --- a/toasty/pipeline/cli.py +++ b/toasty/pipeline/cli.py @@ -12,6 +12,8 @@ pipeline_impl '''.split() import argparse +from fnmatch import fnmatch +import glob import os.path import sys @@ -19,6 +21,34 @@ from ..cli import die, warn from . import NotActionableError +def evaluate_imageid_args(searchdir, args): + """ + Figure out which image-ID's to process. + """ + + matched_ids = set() + globs_todo = set() + + for arg in args: + if glob.has_magic(arg): + globs_todo.add(arg) + else: + # If an ID is explicitly (non-gobbily) added, always add it to the + # list, without checking if it exists in `searchdir`. We could check + # for it in searchdir now, but we'll have to check later anyway, so + # we don't bother. + matched_ids.add(arg) + + if len(globs_todo): + for filename in os.listdir(searchdir): + for g in globs_todo: + if fnmatch(filename, g): + matched_ids.add(filename) + break + + return sorted(matched_ids) + + # The "approve" subcommand def approve_setup_parser(parser): @@ -31,8 +61,8 @@ def approve_setup_parser(parser): parser.add_argument( 'cand_ids', nargs = '+', - metavar = 'CAND-ID', - help = 'Name(s) of candidate(s) to approve and prepare for processing' + metavar = 'IMAGE-ID', + help = 'Name(s) of image(s) to approve for publication (globs accepted)' ) @@ -51,7 +81,7 @@ def approve_impl(settings): proc_dir = mgr._ensure_dir('processed') app_dir = mgr._ensure_dir('approved') - for cid in settings.cand_ids: + for cid in evaluate_imageid_args(proc_dir, settings.cand_ids): if not os.path.isdir(os.path.join(proc_dir, cid)): die(f'no such processed candidate ID {cid!r}') @@ -90,7 +120,7 @@ def fetch_setup_parser(parser): 'cand_ids', nargs = '+', metavar = 'CAND-ID', - help = 'Name(s) of candidate(s) to fetch and prepare for processing' + help = 'Name(s) of candidate(s) to fetch and prepare for processing (globs accepted)' ) @@ -102,7 +132,7 @@ def fetch_impl(settings): rej_dir = mgr._ensure_dir('rejects') src = mgr.get_image_source() - for cid in settings.cand_ids: + for cid in evaluate_imageid_args(cand_dir, settings.cand_ids): # Funky structure here is to try to ensure that cdata is closed in case # a NotActionable happens, so that we can move the directory on Windows. try: From 495077daff15f0fcefa2e357d427d86b974e9995 Mon Sep 17 00:00:00 2001 From: Peter Williams Date: Tue, 8 Dec 2020 21:50:43 -0500 Subject: [PATCH 5/7] docs/api: update --- docs/api/toasty.image.Image.rst | 4 ++++ docs/api/toasty.image.ImageMode.rst | 2 ++ 2 files changed, 6 insertions(+) diff --git a/docs/api/toasty.image.Image.rst b/docs/api/toasty.image.Image.rst index cf86c33..b5b6580 100644 --- a/docs/api/toasty.image.Image.rst +++ b/docs/api/toasty.image.Image.rst @@ -10,10 +10,12 @@ Image .. autosummary:: + ~Image.default_format ~Image.dtype ~Image.height ~Image.mode ~Image.shape + ~Image.wcs ~Image.width .. rubric:: Methods Summary @@ -32,10 +34,12 @@ Image .. rubric:: Attributes Documentation + .. autoattribute:: default_format .. autoattribute:: dtype .. autoattribute:: height .. autoattribute:: mode .. autoattribute:: shape + .. autoattribute:: wcs .. autoattribute:: width .. rubric:: Methods Documentation diff --git a/docs/api/toasty.image.ImageMode.rst b/docs/api/toasty.image.ImageMode.rst index 1952733..cccb6e3 100644 --- a/docs/api/toasty.image.ImageMode.rst +++ b/docs/api/toasty.image.ImageMode.rst @@ -12,6 +12,7 @@ ImageMode ~ImageMode.F16x3 ~ImageMode.F32 + ~ImageMode.F64 ~ImageMode.RGB ~ImageMode.RGBA @@ -26,6 +27,7 @@ ImageMode .. autoattribute:: F16x3 .. autoattribute:: F32 + .. autoattribute:: F64 .. autoattribute:: RGB .. autoattribute:: RGBA From 09e5beb8741df90f536d47527e3e026c2523ff59 Mon Sep 17 00:00:00 2001 From: Peter Williams Date: Tue, 8 Dec 2020 22:05:51 -0500 Subject: [PATCH 6/7] toasty/tests/test_pipeline.py: try to get some coverage in the glob tests --- toasty/tests/test_pipeline.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/toasty/tests/test_pipeline.py b/toasty/tests/test_pipeline.py index 3d6e035..6ba4b5d 100644 --- a/toasty/tests/test_pipeline.py +++ b/toasty/tests/test_pipeline.py @@ -89,7 +89,7 @@ class TestPipeline(object): args = [ 'pipeline', 'fetch', '--workdir', self.work_path('work'), - 'fake_test1', + 'fake_test1', '*nomatchisok*', ] cli.entrypoint(args) @@ -102,7 +102,7 @@ class TestPipeline(object): args = [ 'pipeline', 'approve', '--workdir', self.work_path('work'), - 'fake_test1', + 'fake_test1', 'fake_test?', ] cli.entrypoint(args) From 483b1021a75818eaa1066d634ac812deb0809e1b Mon Sep 17 00:00:00 2001 From: Peter Williams Date: Tue, 8 Dec 2020 22:08:26 -0500 Subject: [PATCH 7/7] docs/cli/pipeline-fetch.rst: mention that some images are rejected --- docs/cli/pipeline-fetch.rst | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/docs/cli/pipeline-fetch.rst b/docs/cli/pipeline-fetch.rst index 7d8af1a..55d6359 100644 --- a/docs/cli/pipeline-fetch.rst +++ b/docs/cli/pipeline-fetch.rst @@ -59,9 +59,15 @@ Notes Candidate names may be found by looking at the filenames contained in the ``candidates`` subdirectory of your workspace. -For each candidate that is successfully fetched, a sub-subdirectory is created -in the ``cache_todo`` subdirectory with a name corresponding to the unique -candidate ID. +During the fetch process, the candidates are analyzed. Some of them may be +deemed “not actionable” — a common reason being that an image may not have +sufficient astrometric information attached for it to be placed on the sky as +WWT requires. Such candidates will be discarded, with their information files +moved into the ``rejects`` subdirectory. + +For each candidate that is successfully fetched and validated, a +sub-subdirectory is created in the ``cache_todo`` subdirectory with a name +corresponding to the unique candidate ID. See Also