Bug: Executor - Fix executor for Benchmark Execution Without Explicit Framework Field (#636)
**Description** Fix executor for Benchmark Execution Without Explicit Framework Field
This commit is contained in:
Родитель
7af75df392
Коммит
96cc4d9397
|
@ -228,8 +228,8 @@ class SuperBenchExecutor():
|
||||||
logger.warning('Monitor can not support CPU platform.')
|
logger.warning('Monitor can not support CPU platform.')
|
||||||
|
|
||||||
benchmark_real_name = benchmark_name.split(':')[0]
|
benchmark_real_name = benchmark_name.split(':')[0]
|
||||||
if 'frameworks' in benchmark_config:
|
frameworks = benchmark_config.get('frameworks', [Framework.NONE.value])
|
||||||
for framework in benchmark_config.frameworks or [Framework.NONE.value]:
|
for framework in frameworks:
|
||||||
if benchmark_real_name == 'model-benchmarks' or (
|
if benchmark_real_name == 'model-benchmarks' or (
|
||||||
':' not in benchmark_name and benchmark_name.endswith('_models')
|
':' not in benchmark_name and benchmark_name.endswith('_models')
|
||||||
):
|
):
|
||||||
|
@ -253,9 +253,8 @@ class SuperBenchExecutor():
|
||||||
benchmark_real_name,
|
benchmark_real_name,
|
||||||
platform=self.__get_platform(),
|
platform=self.__get_platform(),
|
||||||
framework=Framework(framework.lower()),
|
framework=Framework(framework.lower()),
|
||||||
parameters=self.__get_arguments(
|
parameters=self.
|
||||||
{} if 'parameters' not in benchmark_config else benchmark_config.parameters
|
__get_arguments({} if 'parameters' not in benchmark_config else benchmark_config.parameters)
|
||||||
)
|
|
||||||
)
|
)
|
||||||
result = self.__exec_benchmark(full_name, context)
|
result = self.__exec_benchmark(full_name, context)
|
||||||
benchmark_results.append(result)
|
benchmark_results.append(result)
|
||||||
|
|
|
@ -84,7 +84,7 @@ class SuperBenchRunner():
|
||||||
if 'proc_num' not in mode:
|
if 'proc_num' not in mode:
|
||||||
self._sb_benchmarks[name].modes[idx].proc_num = 8
|
self._sb_benchmarks[name].modes[idx].proc_num = 8
|
||||||
elif mode.name == 'mpi':
|
elif mode.name == 'mpi':
|
||||||
if 'machinefile' not in mode:
|
if 'mca' not in mode:
|
||||||
self._sb_benchmarks[name].modes[idx].mca = {
|
self._sb_benchmarks[name].modes[idx].mca = {
|
||||||
'pml': 'ob1',
|
'pml': 'ob1',
|
||||||
'btl': '^openib',
|
'btl': '^openib',
|
||||||
|
@ -448,7 +448,7 @@ class SuperBenchRunner():
|
||||||
mode.env.update({'SB_MODE_SERIAL_INDEX': mode.serial_index, 'SB_MODE_PARALLEL_INDEX': mode.parallel_index})
|
mode.env.update({'SB_MODE_SERIAL_INDEX': mode.serial_index, 'SB_MODE_PARALLEL_INDEX': mode.parallel_index})
|
||||||
logger.info('Runner is going to run %s in %s mode, proc rank %d.', benchmark_name, mode.name, mode.proc_rank)
|
logger.info('Runner is going to run %s in %s mode, proc rank %d.', benchmark_name, mode.name, mode.proc_rank)
|
||||||
|
|
||||||
timeout = self._sb_benchmarks[benchmark_name].get('timeout', 60)
|
timeout = self._sb_benchmarks[benchmark_name].get('timeout', None)
|
||||||
if isinstance(timeout, int):
|
if isinstance(timeout, int):
|
||||||
timeout = max(timeout, 60)
|
timeout = max(timeout, 60)
|
||||||
|
|
||||||
|
|
|
@ -166,5 +166,7 @@ class ExecutorTestCase(unittest.TestCase):
|
||||||
self.assertTrue(p.is_dir())
|
self.assertTrue(p.is_dir())
|
||||||
self.assertTrue((p / 'results.json').is_file())
|
self.assertTrue((p / 'results.json').is_file())
|
||||||
with (p / 'results.json').open() as f:
|
with (p / 'results.json').open() as f:
|
||||||
for result in json.load(f):
|
results = json.load(f)
|
||||||
|
self.assertTrue(len(results) > 0)
|
||||||
|
for result in results:
|
||||||
self.assertIn(benchmark_name, result['name'])
|
self.assertIn(benchmark_name, result['name'])
|
||||||
|
|
|
@ -41,6 +41,22 @@ class RunnerTestCase(unittest.TestCase):
|
||||||
expected_log_file = Path(self.runner._sb_output_dir) / 'sb-run.log'
|
expected_log_file = Path(self.runner._sb_output_dir) / 'sb-run.log'
|
||||||
self.assertTrue(expected_log_file.is_file())
|
self.assertTrue(expected_log_file.is_file())
|
||||||
|
|
||||||
|
def test_validate_sb_config(self):
|
||||||
|
"""Test validate_sb_config."""
|
||||||
|
self.runner._SuperBenchRunner__validate_sb_config()
|
||||||
|
self.assertIn('env', self.runner._sb_config.superbench)
|
||||||
|
for name in self.runner._sb_benchmarks:
|
||||||
|
self.assertIn('modes', self.runner._sb_config.superbench.benchmarks[name])
|
||||||
|
for mode in self.runner._sb_config.superbench.benchmarks[name].modes:
|
||||||
|
self.assertIn('env', mode)
|
||||||
|
if mode.name == 'local':
|
||||||
|
self.assertIn('proc_num', mode)
|
||||||
|
self.assertIn('prefix', mode)
|
||||||
|
if mode.name == 'torch.distributed':
|
||||||
|
self.assertIn('proc_num', mode)
|
||||||
|
if mode.name == 'mpi':
|
||||||
|
self.assertIn('mca', mode)
|
||||||
|
|
||||||
def test_get_failure_count(self):
|
def test_get_failure_count(self):
|
||||||
"""Test get_failure_count."""
|
"""Test get_failure_count."""
|
||||||
self.assertEqual(0, self.runner.get_failure_count())
|
self.assertEqual(0, self.runner.get_failure_count())
|
||||||
|
@ -410,3 +426,30 @@ class RunnerTestCase(unittest.TestCase):
|
||||||
test_case['run_count'], test_case['curr_rank'], test_case['curr_run']
|
test_case['run_count'], test_case['curr_rank'], test_case['curr_run']
|
||||||
), test_case['expected']
|
), test_case['expected']
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def test_run_proc_timeout(self):
|
||||||
|
"""Test run_proc_ timeout."""
|
||||||
|
self.runner._sb_benchmarks = {
|
||||||
|
'benchmark1': {
|
||||||
|
'timeout': 120
|
||||||
|
},
|
||||||
|
'benchmark2': {
|
||||||
|
'timeout': None
|
||||||
|
},
|
||||||
|
'benchmark3': {
|
||||||
|
'timeout': 30
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
test_cases = [
|
||||||
|
('benchmark1', 120),
|
||||||
|
('benchmark2', None),
|
||||||
|
('benchmark3', 60),
|
||||||
|
]
|
||||||
|
|
||||||
|
for benchmark_name, expected_timeout in test_cases:
|
||||||
|
with self.subTest(benchmark_name=benchmark_name):
|
||||||
|
timeout = self.runner._sb_benchmarks[benchmark_name].get('timeout', None)
|
||||||
|
if isinstance(timeout, int):
|
||||||
|
timeout = max(timeout, 60)
|
||||||
|
self.assertEqual(timeout, expected_timeout)
|
||||||
|
|
Загрузка…
Ссылка в новой задаче