From 0953befb34d6f1683b47b064877df46e2ede7678 Mon Sep 17 00:00:00 2001 From: Peter Williams Date: Tue, 13 Sep 2022 17:33:23 -0400 Subject: [PATCH 1/4] wwt_data_formats/plate.py: add this module Skeleton support for dealing with plate files. It turns out that the 4 GB plate file format (which I'll call "V1") is incredibly easy to handle, so that's nice. --- .../wwt_data_formats.plate.V1PlateReader.rst | 19 ++++ docs/api/wwt_data_formats.plate.rst | 3 + docs/index.rst | 1 + wwt_data_formats/plate.py | 95 +++++++++++++++++++ 4 files changed, 118 insertions(+) create mode 100644 docs/api/wwt_data_formats.plate.V1PlateReader.rst create mode 100644 docs/api/wwt_data_formats.plate.rst create mode 100644 wwt_data_formats/plate.py diff --git a/docs/api/wwt_data_formats.plate.V1PlateReader.rst b/docs/api/wwt_data_formats.plate.V1PlateReader.rst new file mode 100644 index 0000000..c4dab44 --- /dev/null +++ b/docs/api/wwt_data_formats.plate.V1PlateReader.rst @@ -0,0 +1,19 @@ +V1PlateReader +============= + +.. currentmodule:: wwt_data_formats.plate + +.. autoclass:: V1PlateReader + :show-inheritance: + + .. rubric:: Methods Summary + + .. autosummary:: + + ~V1PlateReader.close + ~V1PlateReader.read_tile + + .. rubric:: Methods Documentation + + .. automethod:: close + .. automethod:: read_tile diff --git a/docs/api/wwt_data_formats.plate.rst b/docs/api/wwt_data_formats.plate.rst new file mode 100644 index 0000000..be3a0d4 --- /dev/null +++ b/docs/api/wwt_data_formats.plate.rst @@ -0,0 +1,3 @@ +.. automodapi:: wwt_data_formats.plate + :no-inheritance-diagram: + :inherited-members: diff --git a/docs/index.rst b/docs/index.rst index 00ddef0..83a8579 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -46,6 +46,7 @@ API Reference api/wwt_data_formats.imageset api/wwt_data_formats.layers api/wwt_data_formats.place + api/wwt_data_formats.plate api/wwt_data_formats.server diff --git a/wwt_data_formats/plate.py b/wwt_data_formats/plate.py new file mode 100644 index 0000000..f09cc32 --- /dev/null +++ b/wwt_data_formats/plate.py @@ -0,0 +1,95 @@ +# -*- mode: python; coding: utf-8 -*- +# Copyright 2022 the .NET Foundation +# Licensed under the MIT License. + +"""The "plate" container formats for binary files. + +"Plate files" are used in some parts of WWT to assemble large numbers of small +files into one big file. In particular, most of the original core WWT tile data +assets are compiled into plate files. + +There are three variants of the plate file format. The oldest variant (let's +call it V0) needs external information in order to be read correctly and is not +yet supported here. + +The next version (let's call it V1) has an eight-byte header followed by all +file location information in a fixed structure. Offsets are 32 bits, and so the +total file size is limited to 4 GiB. The densely-populated header is not +efficient for sparsely-populated tile pyramids. The reference implementation for +the V1 format is in `PlateTilePyramid.cs`_. + +.. _PlateTilePyramid.cs: https://github.com/WorldWideTelescope/wwt-website/blob/master/src/WWT.PlateFiles/PlateTilePyramid.cs + +The last version (V2) uses a hash table format. The V2 format is more efficient +for sparsely-populated tile pyramids, and supports files that are (much) larger +than 4 GiB. The reference implementation for the V2 format is in +`PlateFile2.cs`_. The V2 format is used by WWT's HiRISE data. + +.. _PlateFile2.cs: https://github.com/WorldWideTelescope/wwt-website/blob/master/src/WWT.PlateFiles/PlateFile2.cs + +""" + +__all__ = """ +V1PlateReader +""".split() + +from struct import unpack + + +class V1PlateReader(object): + """Reader for the "V1" plate file format. + + Unlike most of the other WWT data formats implemented in this package, + plate files are stored in a simple binary structure, not XML.""" + + _stream = None + _levels: int + + def __init__(self, stream): + self._stream = stream + + # We must have random access to the stream. + stream.seek(0) + magic, levels = unpack(" self._levels: + raise ValueError(f"invalid `level` {level}") + + n = 2**level + + if x < 0 or x >= n or y < 0 or y >= n: + raise ValueError(f"invalid tile position L{level}X{x}Y{y}") + + # This is the total number of tiles in all levels from 0 to `level - 1`, + # plus one to account for the header item: + index = (4**level - 1) // 3 + 1 + + # The offset of this tile within the level: + index += n * y + x + + self._stream.seek(8 * index) + offset, length = unpack(" Date: Wed, 14 Sep 2022 15:15:20 -0400 Subject: [PATCH 2/4] wwt_data_formats/tests/test_folder.py: use centralized text fixture --- wwt_data_formats/tests/test_folder.py | 26 ++++---------------------- 1 file changed, 4 insertions(+), 22 deletions(-) diff --git a/wwt_data_formats/tests/test_folder.py b/wwt_data_formats/tests/test_folder.py index 3eb3b91..315a418 100644 --- a/wwt_data_formats/tests/test_folder.py +++ b/wwt_data_formats/tests/test_folder.py @@ -6,31 +6,13 @@ from __future__ import absolute_import, division, print_function from mock import Mock import os.path -import shutil -import tempfile import pytest from xml.etree import ElementTree as etree -from . import assert_xml_trees_equal, test_path +from . import assert_xml_trees_equal, tempdir, test_path, work_in_tempdir from .. import cli, folder, imageset, place -@pytest.fixture -def tempdir(): - d = tempfile.mkdtemp() - yield d - shutil.rmtree(d) - - -@pytest.fixture -def in_tempdir(tempdir): - prev_dir = os.getcwd() - os.chdir(tempdir) - yield tempdir - # Windows can't remove the temp tree unless we chdir out of it. - os.chdir(prev_dir) - - BASIC_XML_STRING = """ """ @@ -205,7 +187,7 @@ def test_wtml_report(): cli.entrypoint(["wtml", "report", test_path("report_rel.wtml")]) -def test_wtml_rewrite_disk(in_tempdir): +def test_wtml_rewrite_disk(work_in_tempdir): f = folder.Folder() f.url = "sub%20dir/image.jpg" @@ -215,12 +197,12 @@ def test_wtml_rewrite_disk(in_tempdir): cli.entrypoint(["wtml", "rewrite-disk", "index_rel.wtml", "index_disk.wtml"]) f = folder.Folder.from_file("index_disk.wtml") - # abspath('') is not necessarily equal to abspath(in_tempdir), due to + # abspath('') is not necessarily equal to abspath(work_in_tempdir), due to # symlinks and Windows filename shorterning. assert f.url == os.path.join(os.path.abspath(""), "sub dir", "image.jpg") -def test_wtml_rewrite_urls(in_tempdir): +def test_wtml_rewrite_urls(work_in_tempdir): f = folder.Folder() f.url = "../updir/somewhere.wtml" From 430a77f60aea9908dc013f2f27fefce14e90a9aa Mon Sep 17 00:00:00 2001 From: Peter Williams Date: Wed, 14 Sep 2022 15:40:59 -0400 Subject: [PATCH 3/4] wwt_data_formats/plate.py: add writer, tests, docs --- .../wwt_data_formats.plate.V1PlateWriter.rst | 21 ++ wwt_data_formats/plate.py | 224 +++++++++++++++++- wwt_data_formats/tests/test_plate.py | 50 ++++ 3 files changed, 284 insertions(+), 11 deletions(-) create mode 100644 docs/api/wwt_data_formats.plate.V1PlateWriter.rst create mode 100644 wwt_data_formats/tests/test_plate.py diff --git a/docs/api/wwt_data_formats.plate.V1PlateWriter.rst b/docs/api/wwt_data_formats.plate.V1PlateWriter.rst new file mode 100644 index 0000000..db7bcd3 --- /dev/null +++ b/docs/api/wwt_data_formats.plate.V1PlateWriter.rst @@ -0,0 +1,21 @@ +V1PlateWriter +============= + +.. currentmodule:: wwt_data_formats.plate + +.. autoclass:: V1PlateWriter + :show-inheritance: + + .. rubric:: Methods Summary + + .. autosummary:: + + ~V1PlateWriter.append_bytes + ~V1PlateWriter.append_stream + ~V1PlateWriter.close + + .. rubric:: Methods Documentation + + .. automethod:: append_bytes + .. automethod:: append_stream + .. automethod:: close diff --git a/wwt_data_formats/plate.py b/wwt_data_formats/plate.py index f09cc32..63323ed 100644 --- a/wwt_data_formats/plate.py +++ b/wwt_data_formats/plate.py @@ -31,21 +31,32 @@ than 4 GiB. The reference implementation for the V2 format is in __all__ = """ V1PlateReader +V1PlateWriter """.split() -from struct import unpack +from io import BytesIO +from struct import pack, unpack +from typing import BinaryIO, List class V1PlateReader(object): - """Reader for the "V1" plate file format. + """Reader for the "V1" WWT plate file format. - Unlike most of the other WWT data formats implemented in this package, - plate files are stored in a simple binary structure, not XML.""" + Parameters + ---------- + stream : readable, seekable, bytes-based file-like object + The underlying data stream. If you explicitly :meth:`close` this object, + it will close the underlying stream. - _stream = None + Notes + ----- + Unlike most of the other WWT data formats implemented in this package, plate + files are stored in a simple binary structure, not XML.""" + + _stream: BinaryIO _levels: int - def __init__(self, stream): + def __init__(self, stream: BinaryIO): self._stream = stream # We must have random access to the stream. @@ -61,17 +72,37 @@ class V1PlateReader(object): def close(self): """Close the underlying stream, making this object essentially unusable.""" - self._stream.close() - self._stream = None + if self._stream is not None: + self._stream.close() + self._stream = None - def read_tile(self, level, x, y): + def __enter__(self): + return self + + def __exit__(self, *_exc): + self.close() + return False + + def read_tile(self, level: int, x: int, y: int) -> bytes: """Read the specified tile position into memory in its entirety and return its contents. - Returns bytes.""" + Parameters + ---------- + level : int + The level of the tile to read + x : int + The X position of the tile to read + y : int + The Y position of the tile to read + + Returns + ------- + data : bytes + The data for the specified tile position.""" if self._stream is None: - raise Exception("cannot read a closed FileCabinetReader") + raise Exception("cannot read a closed V1PlateReader") if level < 0 or level > self._levels: raise ValueError(f"invalid `level` {level}") @@ -93,3 +124,174 @@ class V1PlateReader(object): self._stream.seek(offset) return self._stream.read(length) + + +class V1PlateWriter(object): + """Writer for the "V1" WWT plate file format. + + Parameters + ---------- + stream : writeable, seekable, bytes-based file-like object + The underlying data destination. This object becomes responsible for + closing the stream. + levels : int + The number of tile levels to allocate for this plate file. Must be + nonnegative. + + Notes + ----- + This file format assumes that tile data will be densely populated up to the + specified number of levels (although missing entries are allowed). The + maximum final file size is 4 GiB. + + This object is usable as a context manager, and should be explicitly closed. + + Unlike most of the other WWT data formats implemented in this package, plate + files are stored in a simple binary structure, not XML.""" + + _stream: BinaryIO + _levels: int + _next_offset: int + _filedata: List[bytes] + + def __init__(self, stream: BinaryIO, levels: int): + self._stream = stream + self._levels = levels + + if levels < 0: + raise ValueError(f"illegal `levels` value {levels!r}") + + # We must have random access to the stream. + stream.seek(0) + stream.write(pack(" self._levels: + raise ValueError(f"invalid `level` {level}") + + n = 2**level + + if x < 0 or x >= n or y < 0 or y >= n: + raise ValueError(f"invalid tile position L{level}X{x}Y{y}") + + if self._next_offset >= 4294967296: # that's 2**32 + raise Exception( + "cannot append to V1PlateWriter: 4-gibibyte size limit exceeded" + ) + + # This is basically `shutil.copyfileobj()`, but that doesn't tell + # us the total length written. + + length = 0 + + while True: + b = stream.read(65536) + if not b: + break + + self._stream.write(b) + length += len(b) + + if length >= 4294967296: # that's 2**32 + raise Exception( + "error appending to V1PlateWriter: 4-gibibyte file size limit exceeded" + ) + + # Now we can add the filedata entry. + # + # This is the total number of tiles in all levels from 0 to `level - 1`: + index = (4**level - 1) // 3 + + # The offset of this tile within the level: + index += n * y + x + + self._filedata[index] = pack(" + assert os.stat("test.plate").st_size == 63 + + with plate.V1PlateReader(open("test.plate", "rb")) as pr: + assert pr.read_tile(1, 1, 1) == b"111" + assert pr.read_tile(1, 1, 0) == b"110" + assert pr.read_tile(1, 0, 1) == b"101" + assert pr.read_tile(1, 0, 0) == b"100" + assert pr.read_tile(0, 0, 0) == b"000" + + with pytest.raises(ValueError): + pr.read_tile(-1, 0, 0) + + with pytest.raises(ValueError): + pr.read_tile(2, 0, 0) + + with pytest.raises(ValueError): + pr.read_tile(1, -1, 0) + + with pytest.raises(ValueError): + pr.read_tile(1, 0, 2) + + pr.close() + + with pytest.raises(Exception): + pr.read_tile(1, 0, 0) From 531df450adc745a13335a04510cf76dd77744d17 Mon Sep 17 00:00:00 2001 From: Peter Williams Date: Wed, 14 Sep 2022 15:54:36 -0400 Subject: [PATCH 4/4] ci/azure-build-and-test.yml: update macos image version --- ci/azure-build-and-test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/azure-build-and-test.yml b/ci/azure-build-and-test.yml index 1fdfc8a..965ec87 100644 --- a/ci/azure-build-and-test.yml +++ b/ci/azure-build-and-test.yml @@ -16,12 +16,12 @@ parameters: PYTHON_SERIES: "3.8" - name: macos_37 - vmImage: macos-10.15 + vmImage: macos-11 vars: PYTHON_SERIES: "3.7" - name: macos_38 - vmImage: macos-10.15 + vmImage: macos-11 vars: PYTHON_SERIES: "3.8"