Bug bash - Merge fix from release/0.2 to main (#124)

* Bug Fix - Fix race condition issue for multi ranks (#117) Fix race condition issue when multi ranks rotating the same directory. * Update pipeline for release branch (#122) * Bug Fix - Fix bug when convert bool config to store_true argument. (#120) Co-authored-by: Yifan Xiong <yifan.xiong@microsoft.com>
2021-07-09 16:54:42 +08:00 · 2021-07-09 16:54:42 +08:00 · 9c984c7eb0
--- a/.azure-pipelines/ansible-integration-test.yml
+++ b/.azure-pipelines/ansible-integration-test.yml
@ -3,6 +3,7 @@

 trigger:
  - main
+  - release/*

 pool:
  name: SuperBench CI
--- a/.azure-pipelines/cpu-unit-test.yml
+++ b/.azure-pipelines/cpu-unit-test.yml
@ -3,6 +3,7 @@

 trigger:
  - main
+  - release/*

 container:
  image: python:3.7
--- a/.azure-pipelines/cuda-unit-test.yml
+++ b/.azure-pipelines/cuda-unit-test.yml
@ -3,6 +3,7 @@

 trigger:
  - main
+  - release/*

 pool:
  name: SuperBench CI
--- a/.codecov.yml
+++ b/.codecov.yml
@ -18,6 +18,7 @@ coverage:
          - cuda-unit-test
        branches:
          - main
+          - release/*
    patch:
      default:
        target: 80%
@ -27,3 +28,4 @@ coverage:
          - cuda-unit-test
        branches:
          - main
+          - release/*
--- a/.github/workflows/build-image.yml
+++ b/.github/workflows/build-image.yml
@ -7,6 +7,7 @@ on:
  pull_request:
    branches:
    - main
+    - release/*

 jobs:
  docker:
--- a/.github/workflows/gh-pages.yml
+++ b/.github/workflows/gh-pages.yml
@ -7,6 +7,7 @@ on:
  pull_request:
    branches:
    - main
+    - release/*

 jobs:
  docs-build:
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@ -4,6 +4,7 @@ on:
  pull_request:
    branches:
    - main
+    - release/*

 jobs:
  spelling:
--- a/superbench/common/utils/file_handler.py
+++ b/superbench/common/utils/file_handler.py
@ -23,7 +23,7 @@ def rotate_dir(target_dir):
        if target_dir.is_dir() and any(target_dir.iterdir()):
            logger.warning('Directory %s is not empty.', str(target_dir))
            for i in itertools.count(start=1):
-                backup_dir = target_dir.with_name(f'{target_dir.name}.{i}')
+                backup_dir = target_dir.with_name(f'{target_dir.name}.bak{i}')
                if not backup_dir.is_dir():
                    target_dir.rename(backup_dir)
                    break
--- a/superbench/executor/executor.py
+++ b/superbench/executor/executor.py
@ -93,8 +93,9 @@ class SuperBenchExecutor():
        for name, val in parameters.items():
            if val is None:
                continue
-            if isinstance(val, bool) and val:
-                argv.append('--{}'.format(name))
+            if isinstance(val, bool):
+                if val:
+                    argv.append('--{}'.format(name))
            elif isinstance(val, (str, int, float)):
                argv.append('--{} {}'.format(name, val))
            elif isinstance(val, (list, ListConfig)):
@ -139,9 +140,8 @@ class SuperBenchExecutor():
        benchmark_output_dir = self._output_path / 'benchmarks' / benchmark_name
        for rank_env in ['PROC_RANK', 'LOCAL_RANK']:
            if os.getenv(rank_env):
-                benchmark_output_dir /= 'rank{}'.format(os.getenv(rank_env))
-                break
-        return benchmark_output_dir
+                return benchmark_output_dir / 'rank{}'.format(os.getenv(rank_env))
+        return benchmark_output_dir / 'rank0'

    def __create_benchmark_dir(self, benchmark_name):
        """Create output directory for benchmark.
@ -149,7 +149,7 @@ class SuperBenchExecutor():
        Args:
            benchmark_name (str): Benchmark name.
        """
-        rotate_dir(self._output_path / 'benchmarks' / benchmark_name)
+        rotate_dir(self.__get_benchmark_dir(benchmark_name))
        try:
            self.__get_benchmark_dir(benchmark_name).mkdir(mode=0o755, parents=True, exist_ok=True)
        except Exception:
--- a/tests/executor/test_executor.py
+++ b/tests/executor/test_executor.py
@ -94,7 +94,7 @@ class ExecutorTestCase(unittest.TestCase):

    def test_create_benchmark_dir(self):
        """Test __create_benchmark_dir."""
-        foo_path = Path(self.sb_output_dir, 'benchmarks', 'foo')
+        foo_path = Path(self.sb_output_dir, 'benchmarks', 'foo', 'rank0')
        self.executor._SuperBenchExecutor__create_benchmark_dir('foo')
        self.assertTrue(foo_path.is_dir())
        self.assertFalse(any(foo_path.iterdir()))
@ -104,20 +104,20 @@ class ExecutorTestCase(unittest.TestCase):
        self.assertTrue(foo_path.is_dir())
        self.assertFalse(any(foo_path.iterdir()))
        self.assertFalse((foo_path / 'bar.txt').is_file())
-        self.assertTrue(foo_path.with_name('foo.1').is_dir())
-        self.assertTrue((foo_path.with_name('foo.1') / 'bar.txt').is_file())
+        self.assertTrue(foo_path.with_name('rank0.bak1').is_dir())
+        self.assertTrue((foo_path.with_name('rank0.bak1') / 'bar.txt').is_file())

        (foo_path / 'bar.json').touch()
        self.executor._SuperBenchExecutor__create_benchmark_dir('foo')
        self.assertTrue(foo_path.is_dir())
        self.assertFalse(any(foo_path.iterdir()))
        self.assertFalse((foo_path / 'bar.json').is_file())
-        self.assertTrue(foo_path.with_name('foo.2').is_dir())
-        self.assertTrue((foo_path.with_name('foo.2') / 'bar.json').is_file())
+        self.assertTrue(foo_path.with_name('rank0.bak2').is_dir())
+        self.assertTrue((foo_path.with_name('rank0.bak2') / 'bar.json').is_file())

    def test_write_benchmark_results(self):
        """Test __write_benchmark_results."""
-        foobar_path = Path(self.sb_output_dir, 'benchmarks', 'foobar')
+        foobar_path = Path(self.sb_output_dir, 'benchmarks', 'foobar', 'rank0')
        foobar_results_path = foobar_path / 'results.json'
        self.executor._SuperBenchExecutor__create_benchmark_dir('foobar')
        foobar_results = {
@ -146,5 +146,5 @@ class ExecutorTestCase(unittest.TestCase):

        self.assertTrue(Path(self.sb_output_dir, 'benchmarks').is_dir())
        for benchmark_name in self.executor._sb_benchmarks:
-            self.assertTrue(Path(self.sb_output_dir, 'benchmarks', benchmark_name).is_dir())
-            self.assertTrue(Path(self.sb_output_dir, 'benchmarks', benchmark_name, 'results.json').is_file())
+            self.assertTrue(Path(self.sb_output_dir, 'benchmarks', benchmark_name, 'rank0').is_dir())
+            self.assertTrue(Path(self.sb_output_dir, 'benchmarks', benchmark_name, 'rank0', 'results.json').is_file())