Support `sb run` on host directly without Docker (#358)

**Description** Support `sb run` on host directly without Docker **Major Revisions** - Add `--no-docker` argument for `sb run`. - Run on host directly if `--no-docker` if specified. - Update docs and tests correspondingly.
2022-06-14 10:57:01 +08:00 · 2022-06-14 10:57:01 +08:00 · a4937e95c6
--- a/docs/cli.md
+++ b/docs/cli.md
@ -104,7 +104,7 @@ sb deploy [--docker-image]
 #### Optional arguments

 | Name                  | Default                 | Description                                                                       |
-|-----------------------|-------------------------|-------------------------------------------------------------------------------|
+|-----------------------|-------------------------|-----------------------------------------------------------------------------------|
 | `--docker-image` `-i` | `superbench/superbench` | Docker image URI, [here](./user-tutorial/container-images.mdx) listed all images. |
 | `--docker-password`   | `None`                  | Docker registry password if authentication is needed.                             |
 | `--docker-username`   | `None`                  | Docker registry username if authentication is needed.                             |
@ -281,6 +281,7 @@ sb run [--config-file]
       [--host-list]
       [--host-password]
       [--host-username]
+       [--no-docker]
       [--output-dir]
       [--private-key]
 ```
@ -298,6 +299,7 @@ sb run [--config-file]
 | `--host-list` `-l`       | `None`                  | Comma separated host list.                                                  |
 | `--host-password`        | `None`                  | Host password or key passphase if needed.                                   |
 | `--host-username`        | `None`                  | Host username if needed.                                                    |
+| `--no-docker`            | `False`                 | Run on host directly without Docker.                                        |
 | `--output-dir`           | `None`                  | Path to output directory, outputs/{datetime} will be used if not specified. |
 | `--private-key`          | `None`                  | Path to private key if needed.                                              |

@ -320,6 +322,12 @@ and default benchmarking configuration:
 sb run --docker-image superbench/cuda:11.1 --host-file ./host.ini
 ```

+Run kernel launch benchmarks on host directly without using Docker:
+```bash title="SB CLI"
+sb run --no-docker --host-list localhost --config-override \
+  superbench.enable=kernel-launch superbench.env.SB_MICRO_PATH=/path/to/superbenchmark
+```
+
 ### `sb version`

 Print the current SuperBench CLI version.
--- a/docs/getting-started/run-superbench.md
+++ b/docs/getting-started/run-superbench.md
@ -40,3 +40,10 @@ After deployment, you can start to run the SuperBench benchmarks on all managed
 ```bash
 sb run -f local.ini -c resnet.yaml
 ```
+
+:::tip TIP
+For environments that cannot start containers through `sb deploy`, e.g., a Kubernetes cluster.
+You can create a privileged container with `superbench/superbench` image, skip `sb deploy`, and run `sb run` directly inside the container with `--no-docker` argument:
+`sb run --no-docker -l localhost -c resnet.yaml`.
+
+:::
--- a/setup.py
+++ b/setup.py
@ -12,10 +12,17 @@ import sys
 import pathlib
 from typing import List, Tuple

+import pkg_resources
 from setuptools import setup, find_packages, Command

 import superbench

+try:
+    pkg_resources.require(['pip>=18'])
+except (pkg_resources.VersionConflict, pkg_resources.DistributionNotFound):
+    print('Try upgrade pip to latest version, for example, python3 -m pip install --upgrade pip')
+    raise
+
 here = pathlib.Path(__file__).parent.resolve()
 long_description = (here / 'README.md').read_text(encoding='utf-8')

@ -150,7 +157,6 @@ setup(
        'pyyaml>=5.3',
        'seaborn>=0.11.2',
        'tcping>=0.1.1rc1',
-        'types-Markdown>=3.3.0'
        'xlrd>=2.0.1',
        'xlsxwriter>=1.3.8',
        'xmltodict>=0.12.0',
@ -166,6 +172,8 @@ setup(
            'pytest-cov>=2.11.1',
            'pytest-subtests>=0.4.0',
            'pytest>=6.2.2',
+            'types-markdown',
+            'types-pkg_resources',
            'types-pyyaml',
            'vcrpy>=4.1.1',
            'yapf==0.31.0',
--- a/superbench/benchmarks/build.sh
+++ b/superbench/benchmarks/build.sh
@ -5,6 +5,7 @@

 set -e

+MPI_HOME="${MPI_HOME:-/usr/local/mpi}"
 SB_MICRO_PATH="${SB_MICRO_PATH:-/usr/local}"

 for dir in micro_benchmarks/*/ ; do
@ -12,7 +13,7 @@ for dir in micro_benchmarks/*/ ; do
        SOURCE_DIR=$dir
        BUILD_ROOT=$dir/build
        mkdir -p $BUILD_ROOT
-        cmake -DCMAKE_INSTALL_PREFIX=$SB_MICRO_PATH -DCMAKE_BUILD_TYPE=Release -S $SOURCE_DIR -B $BUILD_ROOT
+        cmake -DCMAKE_PREFIX_PATH=$MPI_HOME -DCMAKE_INSTALL_PREFIX=$SB_MICRO_PATH -DCMAKE_BUILD_TYPE=Release -S $SOURCE_DIR -B $BUILD_ROOT
        cmake --build $BUILD_ROOT
        cmake --install $BUILD_ROOT
    fi
--- a/superbench/cli/_commands.py
+++ b/superbench/cli/_commands.py
@ -43,6 +43,7 @@ class SuperBenchCommandsLoader(CLICommandsLoader):
            ac.argument('docker_image', options_list=('--docker-image', '-i'), type=str, help='Docker image URI.')
            ac.argument('docker_username', type=str, help='Docker registry username if authentication is needed.')
            ac.argument('docker_password', type=str, help='Docker registry password if authentication is needed.')
+            ac.argument('no_docker', action='store_true', help='Run on host directly without Docker.')
            ac.argument(
                'host_file', options_list=('--host-file', '-f'), type=str, help='Path to Ansible inventory host file.'
            )
--- a/superbench/cli/_handler.py
+++ b/superbench/cli/_handler.py
@ -96,6 +96,7 @@ def process_runner_arguments(
    docker_image='superbench/superbench',
    docker_username=None,
    docker_password=None,
+    no_docker=False,
    host_file=None,
    host_list=None,
    host_username=None,
@ -111,6 +112,7 @@ def process_runner_arguments(
        docker_image (str, optional): Docker image URI. Defaults to superbench/superbench:latest.
        docker_username (str, optional): Docker registry username if authentication is needed. Defaults to None.
        docker_password (str, optional): Docker registry password if authentication is needed. Defaults to None.
+        no_docker (bool, optional): Run on host directly without Docker. Defaults to False.
        host_file (str, optional): Path to Ansible inventory host file. Defaults to None.
        host_list (str, optional): Comma separated host list. Defaults to None.
        host_username (str, optional): Host username if needed. Defaults to None.
@ -144,6 +146,7 @@ def process_runner_arguments(
            'username': docker_username,
            'password': docker_password,
            'registry': split_docker_domain(docker_image)[0],
+            'skip': no_docker,
        }
    )
    # Ansible config
@ -234,6 +237,7 @@ def deploy_command_handler(
        docker_image=docker_image,
        docker_username=docker_username,
        docker_password=docker_password,
+        no_docker=False,
        host_file=host_file,
        host_list=host_list,
        host_username=host_username,
@ -250,6 +254,7 @@ def run_command_handler(
    docker_image='superbench/superbench',
    docker_username=None,
    docker_password=None,
+    no_docker=False,
    host_file=None,
    host_list=None,
    host_username=None,
@ -267,6 +272,7 @@ def run_command_handler(
        docker_image (str, optional): Docker image URI. Defaults to superbench/superbench:latest.
        docker_username (str, optional): Docker registry username if authentication is needed. Defaults to None.
        docker_password (str, optional): Docker registry password if authentication is needed. Defaults to None.
+        no_docker (bool, optional): Run on host directly without Docker. Defaults to False.
        host_file (str, optional): Path to Ansible inventory host file. Defaults to None.
        host_list (str, optional): Comma separated host list. Defaults to None.
        host_username (str, optional): Host username if needed. Defaults to None.
@ -284,6 +290,7 @@ def run_command_handler(
        docker_image=docker_image,
        docker_username=docker_username,
        docker_password=docker_password,
+        no_docker=no_docker,
        host_file=host_file,
        host_list=host_list,
        host_username=host_username,
--- a/superbench/cli/_help.py
+++ b/superbench/cli/_help.py
@ -59,6 +59,10 @@ helps['run'] = """
        - name: run all benchmarks on all nodes in ./host.ini using image "superbench/cuda:11.1"
            and default benchmarking configuration
          text: {cli_name} run --docker-image superbench/cuda:11.1 --host-file ./host.ini
+        - name: run kernel launch benchmarks on host directly without using Docker
+          text: >
+            {cli_name} run --no-docker --host-list localhost
+            --config-override superbench.enable=kernel-launch superbench.env.SB_MICRO_PATH=/path/to/superbenchmark
 """.format(cli_name=CLI_NAME)

 helps['benchmark'] = """
--- a/superbench/runner/playbooks/check_env.yaml
+++ b/superbench/runner/playbooks/check_env.yaml
@ -4,15 +4,17 @@
  max_fail_percentage: 0
  vars:
    container: sb-workspace
+    skip: '{{ no_docker | default(false) }}'
  tasks:
    - name: Checking container status
      shell: docker inspect --format={{ '{{.State.Running}}' }} {{ container }}
      register: result
      ignore_errors: true
      become: yes
+      when: not skip
    - fail:
        msg: Container {{ container }} is not running.
-      when: result is failed or result.stdout != "true"
+      when: (not skip) and (result is failed or result.stdout != "true")

 - name: Runtime Environment Update
  hosts: all
@ -22,6 +24,8 @@
    container: sb-workspace
    sb_nodes: '{{ hostvars.values() | map(attribute="ansible_hostname") | sort }}'
    sb_env: |
+      # sb env
+      HOST_WS={{ ansible_user_dir }}/sb-workspace
      # pytorch env
      NNODES={{ sb_nodes | length }}
      NODE_RANK={{ lookup('ansible.utils.index_of', sb_nodes, 'eq', ansible_hostname) }}
@ -31,6 +35,11 @@
      # config env
      {{ env | default('') }}
  tasks:
+    - name: Ensure Workspace
+      file:
+        path: '{{ workspace }}'
+        state: directory
+        mode: 0755
    - name: Updating Config
      copy:
        src: '{{ output_dir }}/sb.config.yaml'
--- a/superbench/runner/runner.py
+++ b/superbench/runner/runner.py
@ -186,6 +186,7 @@ class SuperBenchRunner():
            self._ansible_client.get_playbook_config(
                'check_env.yaml',
                extravars={
+                    'no_docker': bool(self._docker_config.skip),
                    'output_dir': str(self._output_path),
                    'env': '\n'.join(f'{k}={v}' for k, v in self._sb_config.superbench.env.items()),
                }
@ -388,13 +389,18 @@ class SuperBenchRunner():

        timeout = self._sb_benchmarks[benchmark_name].timeout
        env_list = '--env-file /tmp/sb.env'
+        if self._docker_config.skip:
+            env_list = 'set -o allexport && source /tmp/sb.env && set +o allexport'
        for k, v in mode.env.items():
            if isinstance(v, str):
-                env_list += f' -e {k}={str(v).format(proc_rank=mode.proc_rank, proc_num=mode.proc_num)}'
+                envvar = f'{k}={str(v).format(proc_rank=mode.proc_rank, proc_num=mode.proc_num)}'
+                env_list += f' -e {envvar}' if not self._docker_config.skip else f' && export {envvar}'
+
+        fcmd = "docker exec {env_list} sb-workspace bash -c '{command}'"
+        if self._docker_config.skip:
+            fcmd = "bash -c '{env_list} && cd $HOST_WS && {command}'"
        ansible_runner_config = self._ansible_client.get_shell_config(
-            "docker exec {env_list} sb-workspace bash -c '{command}'".format(
-                env_list=env_list, command=self.__get_mode_command(benchmark_name, mode, timeout)
-            )
+            fcmd.format(env_list=env_list, command=self.__get_mode_command(benchmark_name, mode, timeout))
        )
        if mode.name == 'mpi':
            ansible_runner_config = self._ansible_client.update_mpi_config(ansible_runner_config)
--- a/tests/cli/test_sb.py
+++ b/tests/cli/test_sb.py
@ -69,6 +69,10 @@ class SuperBenchCLIScenarioTest(ScenarioTest):
        """Test sb run."""
        self.cmd('sb run --host-list localhost --config-override superbench.enable=none', checks=[NoneCheck()])

+    def test_sb_run_skipdocker(self):
+        """Test sb run without docker."""
+        self.cmd('sb run -l localhost -C superbench.enable=none --no-docker', checks=[NoneCheck()])
+
    def test_sb_run_no_docker_auth(self):
        """Test sb run, only --docker-username argument, should fail."""
        result = self.cmd('sb run --docker-username test-user', expect_failure=True)
--- a/tests/runner/test_runner.py
+++ b/tests/runner/test_runner.py
@ -25,7 +25,12 @@ class RunnerTestCase(unittest.TestCase):
            self.default_config = OmegaConf.create(yaml.load(fp, Loader=yaml.SafeLoader))
        self.sb_output_dir = tempfile.mkdtemp()

-        self.runner = SuperBenchRunner(self.default_config, None, None, self.sb_output_dir)
+        self.runner = SuperBenchRunner(
+            self.default_config,
+            OmegaConf.create({}),
+            OmegaConf.create({}),
+            self.sb_output_dir,
+        )

    def tearDown(self):
        """Hook method for deconstructing the test fixture after testing it."""