From 9c984c7eb031ba4fc86fef5cde803ad339f9d270 Mon Sep 17 00:00:00 2001 From: guoshzhao Date: Fri, 9 Jul 2021 16:54:42 +0800 Subject: [PATCH] Bug bash - Merge fix from release/0.2 to main (#124) * Bug Fix - Fix race condition issue for multi ranks (#117) Fix race condition issue when multi ranks rotating the same directory. * Update pipeline for release branch (#122) * Bug Fix - Fix bug when convert bool config to store_true argument. (#120) Co-authored-by: Yifan Xiong --- .azure-pipelines/ansible-integration-test.yml | 1 + .azure-pipelines/cpu-unit-test.yml | 1 + .azure-pipelines/cuda-unit-test.yml | 1 + .codecov.yml | 2 ++ .github/workflows/build-image.yml | 1 + .github/workflows/gh-pages.yml | 1 + .github/workflows/lint.yml | 1 + superbench/common/utils/file_handler.py | 2 +- superbench/executor/executor.py | 12 ++++++------ tests/executor/test_executor.py | 16 ++++++++-------- 10 files changed, 23 insertions(+), 15 deletions(-) diff --git a/.azure-pipelines/ansible-integration-test.yml b/.azure-pipelines/ansible-integration-test.yml index 7eaea725..f5b34dd6 100644 --- a/.azure-pipelines/ansible-integration-test.yml +++ b/.azure-pipelines/ansible-integration-test.yml @@ -3,6 +3,7 @@ trigger: - main + - release/* pool: name: SuperBench CI diff --git a/.azure-pipelines/cpu-unit-test.yml b/.azure-pipelines/cpu-unit-test.yml index 2ec1ed70..8b75d197 100644 --- a/.azure-pipelines/cpu-unit-test.yml +++ b/.azure-pipelines/cpu-unit-test.yml @@ -3,6 +3,7 @@ trigger: - main + - release/* container: image: python:3.7 diff --git a/.azure-pipelines/cuda-unit-test.yml b/.azure-pipelines/cuda-unit-test.yml index c83c2b8f..dd988545 100644 --- a/.azure-pipelines/cuda-unit-test.yml +++ b/.azure-pipelines/cuda-unit-test.yml @@ -3,6 +3,7 @@ trigger: - main + - release/* pool: name: SuperBench CI diff --git a/.codecov.yml b/.codecov.yml index 648133bb..5133216e 100644 --- a/.codecov.yml +++ b/.codecov.yml @@ -18,6 +18,7 @@ coverage: - cuda-unit-test branches: - main + - release/* patch: default: target: 80% @@ -27,3 +28,4 @@ coverage: - cuda-unit-test branches: - main + - release/* diff --git a/.github/workflows/build-image.yml b/.github/workflows/build-image.yml index 38040105..60cfb06b 100644 --- a/.github/workflows/build-image.yml +++ b/.github/workflows/build-image.yml @@ -7,6 +7,7 @@ on: pull_request: branches: - main + - release/* jobs: docker: diff --git a/.github/workflows/gh-pages.yml b/.github/workflows/gh-pages.yml index 430b6d82..5cb2b34f 100644 --- a/.github/workflows/gh-pages.yml +++ b/.github/workflows/gh-pages.yml @@ -7,6 +7,7 @@ on: pull_request: branches: - main + - release/* jobs: docs-build: diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 12330a70..3646c95d 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -4,6 +4,7 @@ on: pull_request: branches: - main + - release/* jobs: spelling: diff --git a/superbench/common/utils/file_handler.py b/superbench/common/utils/file_handler.py index f6447dde..34616a29 100644 --- a/superbench/common/utils/file_handler.py +++ b/superbench/common/utils/file_handler.py @@ -23,7 +23,7 @@ def rotate_dir(target_dir): if target_dir.is_dir() and any(target_dir.iterdir()): logger.warning('Directory %s is not empty.', str(target_dir)) for i in itertools.count(start=1): - backup_dir = target_dir.with_name(f'{target_dir.name}.{i}') + backup_dir = target_dir.with_name(f'{target_dir.name}.bak{i}') if not backup_dir.is_dir(): target_dir.rename(backup_dir) break diff --git a/superbench/executor/executor.py b/superbench/executor/executor.py index 88d8810a..10fbac2f 100644 --- a/superbench/executor/executor.py +++ b/superbench/executor/executor.py @@ -93,8 +93,9 @@ class SuperBenchExecutor(): for name, val in parameters.items(): if val is None: continue - if isinstance(val, bool) and val: - argv.append('--{}'.format(name)) + if isinstance(val, bool): + if val: + argv.append('--{}'.format(name)) elif isinstance(val, (str, int, float)): argv.append('--{} {}'.format(name, val)) elif isinstance(val, (list, ListConfig)): @@ -139,9 +140,8 @@ class SuperBenchExecutor(): benchmark_output_dir = self._output_path / 'benchmarks' / benchmark_name for rank_env in ['PROC_RANK', 'LOCAL_RANK']: if os.getenv(rank_env): - benchmark_output_dir /= 'rank{}'.format(os.getenv(rank_env)) - break - return benchmark_output_dir + return benchmark_output_dir / 'rank{}'.format(os.getenv(rank_env)) + return benchmark_output_dir / 'rank0' def __create_benchmark_dir(self, benchmark_name): """Create output directory for benchmark. @@ -149,7 +149,7 @@ class SuperBenchExecutor(): Args: benchmark_name (str): Benchmark name. """ - rotate_dir(self._output_path / 'benchmarks' / benchmark_name) + rotate_dir(self.__get_benchmark_dir(benchmark_name)) try: self.__get_benchmark_dir(benchmark_name).mkdir(mode=0o755, parents=True, exist_ok=True) except Exception: diff --git a/tests/executor/test_executor.py b/tests/executor/test_executor.py index d8951e98..8539b14b 100644 --- a/tests/executor/test_executor.py +++ b/tests/executor/test_executor.py @@ -94,7 +94,7 @@ class ExecutorTestCase(unittest.TestCase): def test_create_benchmark_dir(self): """Test __create_benchmark_dir.""" - foo_path = Path(self.sb_output_dir, 'benchmarks', 'foo') + foo_path = Path(self.sb_output_dir, 'benchmarks', 'foo', 'rank0') self.executor._SuperBenchExecutor__create_benchmark_dir('foo') self.assertTrue(foo_path.is_dir()) self.assertFalse(any(foo_path.iterdir())) @@ -104,20 +104,20 @@ class ExecutorTestCase(unittest.TestCase): self.assertTrue(foo_path.is_dir()) self.assertFalse(any(foo_path.iterdir())) self.assertFalse((foo_path / 'bar.txt').is_file()) - self.assertTrue(foo_path.with_name('foo.1').is_dir()) - self.assertTrue((foo_path.with_name('foo.1') / 'bar.txt').is_file()) + self.assertTrue(foo_path.with_name('rank0.bak1').is_dir()) + self.assertTrue((foo_path.with_name('rank0.bak1') / 'bar.txt').is_file()) (foo_path / 'bar.json').touch() self.executor._SuperBenchExecutor__create_benchmark_dir('foo') self.assertTrue(foo_path.is_dir()) self.assertFalse(any(foo_path.iterdir())) self.assertFalse((foo_path / 'bar.json').is_file()) - self.assertTrue(foo_path.with_name('foo.2').is_dir()) - self.assertTrue((foo_path.with_name('foo.2') / 'bar.json').is_file()) + self.assertTrue(foo_path.with_name('rank0.bak2').is_dir()) + self.assertTrue((foo_path.with_name('rank0.bak2') / 'bar.json').is_file()) def test_write_benchmark_results(self): """Test __write_benchmark_results.""" - foobar_path = Path(self.sb_output_dir, 'benchmarks', 'foobar') + foobar_path = Path(self.sb_output_dir, 'benchmarks', 'foobar', 'rank0') foobar_results_path = foobar_path / 'results.json' self.executor._SuperBenchExecutor__create_benchmark_dir('foobar') foobar_results = { @@ -146,5 +146,5 @@ class ExecutorTestCase(unittest.TestCase): self.assertTrue(Path(self.sb_output_dir, 'benchmarks').is_dir()) for benchmark_name in self.executor._sb_benchmarks: - self.assertTrue(Path(self.sb_output_dir, 'benchmarks', benchmark_name).is_dir()) - self.assertTrue(Path(self.sb_output_dir, 'benchmarks', benchmark_name, 'results.json').is_file()) + self.assertTrue(Path(self.sb_output_dir, 'benchmarks', benchmark_name, 'rank0').is_dir()) + self.assertTrue(Path(self.sb_output_dir, 'benchmarks', benchmark_name, 'rank0', 'results.json').is_file())