зеркало из https://github.com/microsoft/genalog.git
Merge pull request #14 from microsoft/laserprec/bugfix/silentDiskWrite
Raise errors when writing to disk fails
This commit is contained in:
Коммит
7cf054acfd
|
@ -22,12 +22,9 @@ class ImageStateEncoder(JSONEncoder):
|
|||
|
||||
class AnalogDocumentGeneration(object):
|
||||
def __init__(
|
||||
self,
|
||||
template_path=None,
|
||||
styles=DEFAULT_STYLE_COMBINATION,
|
||||
degradations=[],
|
||||
resolution=300,
|
||||
):
|
||||
self,
|
||||
template_path=None, styles=DEFAULT_STYLE_COMBINATION,
|
||||
degradations=[], resolution=300):
|
||||
self.doc_generator = DocumentGenerator(template_path=template_path)
|
||||
self.doc_generator.set_styles_to_generate(styles)
|
||||
self.degrader = Degrader(degradations)
|
||||
|
@ -42,8 +39,13 @@ class AnalogDocumentGeneration(object):
|
|||
"""
|
||||
return self.doc_generator.template_list
|
||||
|
||||
# Fix: rename to generate_sample()
|
||||
# Add another method called generate_all_styles()
|
||||
def generate_img(self, full_text_path, template, target_folder=None):
|
||||
"""Generate synthetic images given the filepath of a text document
|
||||
"""Generate a image with a sample style given a text document
|
||||
|
||||
NOTE: This does not generate all possible style combinations.
|
||||
Use generate_all_styles() instead.
|
||||
|
||||
Arguments:
|
||||
full_text_path {str} -- full filepath of a text document (i.e /dataset/doc.txt)
|
||||
|
@ -54,6 +56,9 @@ class AnalogDocumentGeneration(object):
|
|||
target_folder {str} -- folder path in which the generated images are stored
|
||||
(default: {None})
|
||||
resolution {int} -- resolution in dpi (default: {300})
|
||||
|
||||
Raises:
|
||||
RuntimeError: when cannot write to disk at specified path
|
||||
"""
|
||||
with open(full_text_path, "r", encoding="utf8") as f: # read file
|
||||
text = f.read()
|
||||
|
@ -61,7 +66,10 @@ class AnalogDocumentGeneration(object):
|
|||
|
||||
generator = self.doc_generator.create_generator(content, [template])
|
||||
# Generate the image
|
||||
doc = next(generator) # TODO: this does not exhaust all of the style combinations in the generator
|
||||
try:
|
||||
doc = next(generator) # NOTE: this does not exhaust all of the style combinations in the generator
|
||||
except StopIteration:
|
||||
return None
|
||||
src = doc.render_array(resolution=self.resolution, channel="GRAYSCALE")
|
||||
# Degrade the image
|
||||
dst = self.degrader.apply_effects(src)
|
||||
|
@ -74,7 +82,8 @@ class AnalogDocumentGeneration(object):
|
|||
text_filename = os.path.basename(full_text_path)
|
||||
img_filename = text_filename.replace(".txt", ".png")
|
||||
img_dst_path = os.path.join(target_folder, "img", img_filename)
|
||||
cv2.imwrite(img_dst_path, dst)
|
||||
if not cv2.imwrite(img_dst_path, dst):
|
||||
raise RuntimeError(f"Could not write to path {img_dst_path}")
|
||||
return
|
||||
|
||||
|
||||
|
@ -115,14 +124,9 @@ def _set_batch_generate_args(
|
|||
|
||||
|
||||
def generate_dataset_multiprocess(
|
||||
input_text_files,
|
||||
output_folder,
|
||||
styles,
|
||||
degradations,
|
||||
template,
|
||||
resolution=300,
|
||||
batch_size=25,
|
||||
):
|
||||
input_text_files, output_folder,
|
||||
styles, degradations, template,
|
||||
resolution=300, batch_size=25):
|
||||
_setup_folder(output_folder)
|
||||
print(f"Storing generated images in {output_folder}")
|
||||
|
||||
|
|
|
@ -2,4 +2,6 @@ flake8
|
|||
flake8-import-order
|
||||
pytest
|
||||
pytest-cov
|
||||
pytest-mock
|
||||
pytest-lazy-fixture
|
||||
tox
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
import os
|
||||
import glob
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from genalog import pipeline
|
||||
from genalog.pipeline import AnalogDocumentGeneration, generate_dataset_multiprocess
|
||||
from genalog.generation.document import DocumentGenerator
|
||||
|
||||
EXAMPLE_TEXT_FILE = "tests/unit/text/data/gt_1.txt"
|
||||
|
@ -18,33 +19,69 @@ DEGRATIONS = [
|
|||
|
||||
|
||||
@pytest.fixture
|
||||
def default_analog_generator():
|
||||
return pipeline.AnalogDocumentGeneration()
|
||||
def default_doc_generator():
|
||||
return AnalogDocumentGeneration()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def custom_analog_generator():
|
||||
return pipeline.AnalogDocumentGeneration(
|
||||
styles=STYLES, degradations=DEGRATIONS, resolution=300
|
||||
)
|
||||
def custom_doc_generator():
|
||||
return AnalogDocumentGeneration(styles=STYLES, degradations=DEGRATIONS, resolution=300)
|
||||
|
||||
|
||||
def test_default_generate_img(default_analog_generator):
|
||||
assert len(default_analog_generator.list_templates()) > 0
|
||||
example_template = default_analog_generator.list_templates()[0]
|
||||
default_analog_generator.generate_img(
|
||||
@pytest.fixture
|
||||
def empty_style_doc_generator():
|
||||
return AnalogDocumentGeneration(styles={})
|
||||
|
||||
|
||||
@pytest.mark.parametrize("doc_generator", [
|
||||
pytest.lazy_fixture('default_doc_generator'),
|
||||
pytest.lazy_fixture('custom_doc_generator')
|
||||
])
|
||||
def test_generate_img_array(doc_generator):
|
||||
# Precondition checks
|
||||
assert len(doc_generator.list_templates()) > 0
|
||||
|
||||
example_template = doc_generator.list_templates()[0]
|
||||
sample_img = doc_generator.generate_img(
|
||||
EXAMPLE_TEXT_FILE, example_template, target_folder=None
|
||||
)
|
||||
assert sample_img is not None
|
||||
assert isinstance(sample_img, np.ndarray)
|
||||
|
||||
|
||||
def test_custom_generate_img(custom_analog_generator):
|
||||
assert len(custom_analog_generator.list_templates()) > 0
|
||||
example_template = custom_analog_generator.list_templates()[0]
|
||||
custom_analog_generator.generate_img(
|
||||
def test_generate_img_array_empty(empty_style_doc_generator):
|
||||
# Precondition checks
|
||||
assert len(empty_style_doc_generator.list_templates()) > 0
|
||||
|
||||
example_template = empty_style_doc_generator.list_templates()[0]
|
||||
sample_img = empty_style_doc_generator.generate_img(
|
||||
EXAMPLE_TEXT_FILE, example_template, target_folder=None
|
||||
)
|
||||
assert sample_img is None
|
||||
|
||||
|
||||
@pytest.mark.io
|
||||
@pytest.mark.parametrize("doc_generator", [
|
||||
pytest.lazy_fixture('default_doc_generator'),
|
||||
pytest.lazy_fixture('custom_doc_generator')
|
||||
])
|
||||
def test_generate_img_write_to_disk(tmpdir, doc_generator):
|
||||
os.makedirs(os.path.join(tmpdir, "img")) # TODO: generate_img() store image under "img" folder
|
||||
output_img_wildcard = os.path.join(tmpdir, "img", "*.png")
|
||||
num_generated_img = glob.glob(output_img_wildcard)
|
||||
# Precondition checks
|
||||
assert len(num_generated_img) == 0
|
||||
assert len(doc_generator.list_templates()) > 0
|
||||
|
||||
example_template = doc_generator.list_templates()[0]
|
||||
doc_generator.generate_img(
|
||||
EXAMPLE_TEXT_FILE, example_template, target_folder=tmpdir
|
||||
)
|
||||
num_generated_img = glob.glob(output_img_wildcard) # look for any jpg on file
|
||||
assert len(num_generated_img) > 0
|
||||
|
||||
|
||||
@pytest.mark.io
|
||||
@pytest.mark.parametrize("styles", [
|
||||
STYLES,
|
||||
pytest.param(
|
||||
|
@ -56,9 +93,9 @@ def test_custom_generate_img(custom_analog_generator):
|
|||
def test_generate_dataset_multiprocess(tmpdir, folder_name, styles):
|
||||
assert len(INPUT_TEXT_FILENAMES) > 0
|
||||
output_folder = os.path.join(tmpdir, folder_name)
|
||||
pipeline.generate_dataset_multiprocess(
|
||||
generate_dataset_multiprocess(
|
||||
INPUT_TEXT_FILENAMES, output_folder, styles, DEGRATIONS, "text_block.html.jinja"
|
||||
)
|
||||
num_generated_img = glob.glob(os.path.join(output_folder, "**/*.png"))
|
||||
num_generated_img = glob.glob(os.path.join(output_folder, "**", "*.png"))
|
||||
assert len(num_generated_img) > 0
|
||||
assert len(num_generated_img) == len(INPUT_TEXT_FILENAMES) * len(DocumentGenerator.expand_style_combinations(styles))
|
||||
|
|
1
tox.ini
1
tox.ini
|
@ -34,6 +34,7 @@ markers =
|
|||
# EX: pytest -m "not slow and not azure"
|
||||
slow: marks tests as slow (deselect with '-m "not slow"')
|
||||
azure: marks as integration tests that require azure resource
|
||||
io: marks integration tests involving some form of I/O operations (disk, internet, etc)
|
||||
testpaths =
|
||||
tests
|
||||
addopts =
|
||||
|
|
Загрузка…
Ссылка в новой задаче