Additional parameter for SDB finalization sample buffer size

This commit is contained in:
Tilman Kamp 2020-02-20 17:16:55 +01:00
Родитель 0d45123b29
Коммит e8fb5895ca
2 изменённых файлов: 12 добавлений и 4 удалений

Просмотреть файл

@ -136,6 +136,8 @@ def main(args):
help='Memory bucket size for external sorting of SDBs')
parser.add_argument('--sdb-workers', type=int, default=None,
help='Number of SDB encoding workers')
parser.add_argument('--sdb-buffered-samples', type=int, default=None,
help='Number of samples per bucket buffer during finalization')
parser.add_argument('--sdb-audio-type', default='opus', choices=AUDIO_TYPE_LOOKUP.keys(),
help='Audio representation inside target SDBs')
parser.add_argument('--buffer', default='1MB',
@ -444,7 +446,8 @@ def main(args):
lists[list_name] = SortingSDBWriter(sdb_path,
audio_type=audio_type,
buffering=args.buffer,
cache_size=args.sdb_bucket_size)
cache_size=args.sdb_bucket_size,
buffered_samples=args.sdb_buffered_samples)
def to_samples():
for pcm_data, f in list_fragments():

Просмотреть файл

@ -126,6 +126,7 @@ class SortingSDBWriter: # pylint: disable=too-many-instance-attributes
cache_size=CACHE_SIZE,
buffering=BUFFER_SIZE,
audio_type=AUDIO_TYPE_OPUS,
buffered_samples=None,
id_prefix=None):
self.sdb_filename = sdb_filename
self.id_prefix = sdb_filename if id_prefix is None else id_prefix
@ -134,6 +135,7 @@ class SortingSDBWriter: # pylint: disable=too-many-instance-attributes
if audio_type not in SERIALIZABLE_AUDIO_TYPES:
raise ValueError('Audio type "{}" not supported'.format(audio_type))
self.audio_type = audio_type
self.buffered_samples = buffered_samples
self.tmp_sdb = DirectSDBWriter(self.tmp_sdb_filename,
buffering=buffering,
audio_type=audio_type,
@ -181,9 +183,12 @@ class SortingSDBWriter: # pylint: disable=too-many-instance-attributes
num_samples = len(self.tmp_sdb)
self.tmp_sdb.close()
self.tmp_sdb = None
avg_sample_size = self.overall_size / num_samples
max_cached_samples = self.cache_size / avg_sample_size
buffer_size = max(1, int(max_cached_samples / len(self.buckets)))
if self.buffered_samples is None:
avg_sample_size = self.overall_size / num_samples
max_cached_samples = self.cache_size / avg_sample_size
buffer_size = max(1, int(max_cached_samples / len(self.buckets)))
else:
buffer_size = self.buffered_samples
sdb_reader = SDB(self.tmp_sdb_filename, buffering=self.buffering, id_prefix='#pre-sorted')
def buffered_view(bucket):