зеркало из https://github.com/mozilla/DSAlign.git
Progress indication during SDB finalization
This commit is contained in:
Родитель
836b87f4a5
Коммит
dfc565190d
|
@ -165,9 +165,9 @@ def main(args):
|
|||
logging.basicConfig(stream=sys.stderr, level=args.loglevel if args.loglevel else 20)
|
||||
logging.getLogger('sox').setLevel(logging.ERROR)
|
||||
|
||||
def progress(iter, desc='Processing', total=None):
|
||||
def progress(it=None, desc='Processing', total=None):
|
||||
desc = desc.rjust(24)
|
||||
return iter if args.no_progress else tqdm(iter, desc=desc, total=total, ncols=120)
|
||||
return iter if args.no_progress else tqdm(it, desc=desc, total=total, ncols=120)
|
||||
|
||||
logging.debug("Start")
|
||||
|
||||
|
@ -415,7 +415,10 @@ def main(args):
|
|||
sdb.add(sample)
|
||||
if not dry_run:
|
||||
for sdb in lists.values():
|
||||
sdb.close()
|
||||
with progress(desc='Finalizing SDB "{}"'.format(sdb.sdb_filename), total=1000) as bar:
|
||||
for frac in sdb.finalize():
|
||||
bar.n = int(frac * 1001)
|
||||
bar.refresh()
|
||||
return
|
||||
|
||||
created_directories = {}
|
||||
|
|
|
@ -12,6 +12,7 @@ BIG_ENDIAN = 'big'
|
|||
INT_SIZE = 4
|
||||
BIGINT_SIZE = 2 * INT_SIZE
|
||||
MAGIC = b'SAMPLEDB'
|
||||
INDEXING_FRACTION = 0.05
|
||||
|
||||
BUFFER_SIZE = 1 * MEGABYTE
|
||||
CACHE_SIZE = 1 * GIGABYTE
|
||||
|
@ -89,7 +90,7 @@ class DirectSDBWriter:
|
|||
self.sdb_file.write(buffer)
|
||||
self.num_samples += 1
|
||||
|
||||
def close(self):
|
||||
def finalize(self):
|
||||
if self.sdb_file is None:
|
||||
return
|
||||
offset_index = self.sdb_file.tell()
|
||||
|
@ -99,12 +100,18 @@ class DirectSDBWriter:
|
|||
|
||||
self.sdb_file.seek(offset_index + BIGINT_SIZE)
|
||||
self.write_big_int(self.num_samples)
|
||||
for offset in self.offsets:
|
||||
for index, offset in enumerate(self.offsets):
|
||||
self.write_big_int(offset)
|
||||
yield index / len(self.offsets)
|
||||
offset_end = self.sdb_file.tell()
|
||||
self.sdb_file.seek(offset_index)
|
||||
self.write_big_int(offset_end - offset_index - BIGINT_SIZE)
|
||||
self.sdb_file.close()
|
||||
self.sdb_file = None
|
||||
|
||||
def close(self):
|
||||
for _ in self.finalize():
|
||||
pass
|
||||
|
||||
def __len__(self):
|
||||
return len(self.offsets)
|
||||
|
@ -143,7 +150,7 @@ class SortingSDBWriter: # pylint: disable=too-many-instance-attributes
|
|||
self.bucket.sort(key=lambda s: s.duration)
|
||||
for sample in self.bucket:
|
||||
self.tmp_sdb.add(sample)
|
||||
self.buckets.append((self.bucket_offset, len(self.bucket)))
|
||||
self.buckets.append((self.bucket_offset, self.bucket_offset + len(self.bucket)))
|
||||
self.bucket_offset += len(self.bucket)
|
||||
self.bucket = []
|
||||
self.overall_size += self.bucket_size
|
||||
|
@ -156,18 +163,21 @@ class SortingSDBWriter: # pylint: disable=too-many-instance-attributes
|
|||
if self.bucket_size > self.cache_size:
|
||||
self.finish_bucket()
|
||||
|
||||
def close(self):
|
||||
def finalize(self):
|
||||
if self.tmp_sdb is None:
|
||||
return
|
||||
self.finish_bucket()
|
||||
num_samples = len(self.tmp_sdb)
|
||||
self.tmp_sdb.close()
|
||||
for frac in self.tmp_sdb.finalize():
|
||||
yield frac * INDEXING_FRACTION
|
||||
self.tmp_sdb = None
|
||||
avg_sample_size = self.overall_size / num_samples
|
||||
max_cached_samples = self.cache_size / avg_sample_size
|
||||
buffer_size = max(1, int(max_cached_samples / len(self.buckets)))
|
||||
sdb_reader = SDB(self.tmp_sdb_filename, buffering=self.buffering)
|
||||
|
||||
def buffered_view(start, end):
|
||||
def buffered_view(bucket):
|
||||
start, end = bucket
|
||||
buffer = []
|
||||
current_offset = start
|
||||
while current_offset < end:
|
||||
|
@ -177,13 +187,21 @@ class SortingSDBWriter: # pylint: disable=too-many-instance-attributes
|
|||
while len(buffer) > 0:
|
||||
yield buffer.pop(-1)
|
||||
|
||||
bucket_views = list(map(lambda b: buffered_view(b[0], b[0] + b[1]), self.buckets))
|
||||
bucket_views = list(map(buffered_view, self.buckets))
|
||||
interleaved = Interleaved(*bucket_views, key=lambda s: s.duration)
|
||||
with DirectSDBWriter(self.sdb_filename, buffering=self.buffering, audio_type=self.audio_type) as sdb_writer:
|
||||
for sample in interleaved:
|
||||
factor = (1.0 - 2.0 * INDEXING_FRACTION) / num_samples
|
||||
for index, sample in enumerate(interleaved):
|
||||
sdb_writer.add(sample)
|
||||
yield INDEXING_FRACTION + index * factor
|
||||
for frac in sdb_writer.finalize():
|
||||
yield (1.0 - INDEXING_FRACTION) + frac * INDEXING_FRACTION
|
||||
os.unlink(self.tmp_sdb_filename)
|
||||
|
||||
def close(self):
|
||||
for _ in self.finalize():
|
||||
pass
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
self.close()
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче