Support `sb run` on host directly without Docker (#358)

**Description**

Support `sb run` on host directly without Docker

**Major Revisions**
- Add `--no-docker` argument for `sb run`.
- Run on host directly if `--no-docker` if specified.
- Update docs and tests correspondingly.
This commit is contained in:
Yifan Xiong 2022-06-14 10:57:01 +08:00 коммит произвёл GitHub
Родитель 528d69bd13
Коммит a4937e95c6
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
11 изменённых файлов: 78 добавлений и 18 удалений

Просмотреть файл

@ -103,17 +103,17 @@ sb deploy [--docker-image]
#### Optional arguments
| Name | Default | Description |
|-----------------------|-------------------------|-------------------------------------------------------------------------------|
| Name | Default | Description |
|-----------------------|-------------------------|-----------------------------------------------------------------------------------|
| `--docker-image` `-i` | `superbench/superbench` | Docker image URI, [here](./user-tutorial/container-images.mdx) listed all images. |
| `--docker-password` | `None` | Docker registry password if authentication is needed. |
| `--docker-username` | `None` | Docker registry username if authentication is needed. |
| `--host-file` `-f` | `None` | Path to Ansible inventory host file. |
| `--host-list` `-l` | `None` | Comma separated host list. |
| `--host-password` | `None` | Host password or key passphase if needed. |
| `--host-username` | `None` | Host username if needed. |
| `--output-dir` | `None` | Path to output directory, outputs/{datetime} will be used if not specified. |
| `--private-key` | `None` | Path to private key if needed. |
| `--docker-password` | `None` | Docker registry password if authentication is needed. |
| `--docker-username` | `None` | Docker registry username if authentication is needed. |
| `--host-file` `-f` | `None` | Path to Ansible inventory host file. |
| `--host-list` `-l` | `None` | Comma separated host list. |
| `--host-password` | `None` | Host password or key passphase if needed. |
| `--host-username` | `None` | Host username if needed. |
| `--output-dir` | `None` | Path to output directory, outputs/{datetime} will be used if not specified. |
| `--private-key` | `None` | Path to private key if needed. |
#### Global arguments
@ -281,6 +281,7 @@ sb run [--config-file]
[--host-list]
[--host-password]
[--host-username]
[--no-docker]
[--output-dir]
[--private-key]
```
@ -298,6 +299,7 @@ sb run [--config-file]
| `--host-list` `-l` | `None` | Comma separated host list. |
| `--host-password` | `None` | Host password or key passphase if needed. |
| `--host-username` | `None` | Host username if needed. |
| `--no-docker` | `False` | Run on host directly without Docker. |
| `--output-dir` | `None` | Path to output directory, outputs/{datetime} will be used if not specified. |
| `--private-key` | `None` | Path to private key if needed. |
@ -320,6 +322,12 @@ and default benchmarking configuration:
sb run --docker-image superbench/cuda:11.1 --host-file ./host.ini
```
Run kernel launch benchmarks on host directly without using Docker:
```bash title="SB CLI"
sb run --no-docker --host-list localhost --config-override \
superbench.enable=kernel-launch superbench.env.SB_MICRO_PATH=/path/to/superbenchmark
```
### `sb version`
Print the current SuperBench CLI version.

Просмотреть файл

@ -40,3 +40,10 @@ After deployment, you can start to run the SuperBench benchmarks on all managed
```bash
sb run -f local.ini -c resnet.yaml
```
:::tip TIP
For environments that cannot start containers through `sb deploy`, e.g., a Kubernetes cluster.
You can create a privileged container with `superbench/superbench` image, skip `sb deploy`, and run `sb run` directly inside the container with `--no-docker` argument:
`sb run --no-docker -l localhost -c resnet.yaml`.
:::

Просмотреть файл

@ -12,10 +12,17 @@ import sys
import pathlib
from typing import List, Tuple
import pkg_resources
from setuptools import setup, find_packages, Command
import superbench
try:
pkg_resources.require(['pip>=18'])
except (pkg_resources.VersionConflict, pkg_resources.DistributionNotFound):
print('Try upgrade pip to latest version, for example, python3 -m pip install --upgrade pip')
raise
here = pathlib.Path(__file__).parent.resolve()
long_description = (here / 'README.md').read_text(encoding='utf-8')
@ -150,7 +157,6 @@ setup(
'pyyaml>=5.3',
'seaborn>=0.11.2',
'tcping>=0.1.1rc1',
'types-Markdown>=3.3.0'
'xlrd>=2.0.1',
'xlsxwriter>=1.3.8',
'xmltodict>=0.12.0',
@ -166,6 +172,8 @@ setup(
'pytest-cov>=2.11.1',
'pytest-subtests>=0.4.0',
'pytest>=6.2.2',
'types-markdown',
'types-pkg_resources',
'types-pyyaml',
'vcrpy>=4.1.1',
'yapf==0.31.0',

Просмотреть файл

@ -5,6 +5,7 @@
set -e
MPI_HOME="${MPI_HOME:-/usr/local/mpi}"
SB_MICRO_PATH="${SB_MICRO_PATH:-/usr/local}"
for dir in micro_benchmarks/*/ ; do
@ -12,7 +13,7 @@ for dir in micro_benchmarks/*/ ; do
SOURCE_DIR=$dir
BUILD_ROOT=$dir/build
mkdir -p $BUILD_ROOT
cmake -DCMAKE_INSTALL_PREFIX=$SB_MICRO_PATH -DCMAKE_BUILD_TYPE=Release -S $SOURCE_DIR -B $BUILD_ROOT
cmake -DCMAKE_PREFIX_PATH=$MPI_HOME -DCMAKE_INSTALL_PREFIX=$SB_MICRO_PATH -DCMAKE_BUILD_TYPE=Release -S $SOURCE_DIR -B $BUILD_ROOT
cmake --build $BUILD_ROOT
cmake --install $BUILD_ROOT
fi

Просмотреть файл

@ -43,6 +43,7 @@ class SuperBenchCommandsLoader(CLICommandsLoader):
ac.argument('docker_image', options_list=('--docker-image', '-i'), type=str, help='Docker image URI.')
ac.argument('docker_username', type=str, help='Docker registry username if authentication is needed.')
ac.argument('docker_password', type=str, help='Docker registry password if authentication is needed.')
ac.argument('no_docker', action='store_true', help='Run on host directly without Docker.')
ac.argument(
'host_file', options_list=('--host-file', '-f'), type=str, help='Path to Ansible inventory host file.'
)

Просмотреть файл

@ -96,6 +96,7 @@ def process_runner_arguments(
docker_image='superbench/superbench',
docker_username=None,
docker_password=None,
no_docker=False,
host_file=None,
host_list=None,
host_username=None,
@ -111,6 +112,7 @@ def process_runner_arguments(
docker_image (str, optional): Docker image URI. Defaults to superbench/superbench:latest.
docker_username (str, optional): Docker registry username if authentication is needed. Defaults to None.
docker_password (str, optional): Docker registry password if authentication is needed. Defaults to None.
no_docker (bool, optional): Run on host directly without Docker. Defaults to False.
host_file (str, optional): Path to Ansible inventory host file. Defaults to None.
host_list (str, optional): Comma separated host list. Defaults to None.
host_username (str, optional): Host username if needed. Defaults to None.
@ -144,6 +146,7 @@ def process_runner_arguments(
'username': docker_username,
'password': docker_password,
'registry': split_docker_domain(docker_image)[0],
'skip': no_docker,
}
)
# Ansible config
@ -234,6 +237,7 @@ def deploy_command_handler(
docker_image=docker_image,
docker_username=docker_username,
docker_password=docker_password,
no_docker=False,
host_file=host_file,
host_list=host_list,
host_username=host_username,
@ -250,6 +254,7 @@ def run_command_handler(
docker_image='superbench/superbench',
docker_username=None,
docker_password=None,
no_docker=False,
host_file=None,
host_list=None,
host_username=None,
@ -267,6 +272,7 @@ def run_command_handler(
docker_image (str, optional): Docker image URI. Defaults to superbench/superbench:latest.
docker_username (str, optional): Docker registry username if authentication is needed. Defaults to None.
docker_password (str, optional): Docker registry password if authentication is needed. Defaults to None.
no_docker (bool, optional): Run on host directly without Docker. Defaults to False.
host_file (str, optional): Path to Ansible inventory host file. Defaults to None.
host_list (str, optional): Comma separated host list. Defaults to None.
host_username (str, optional): Host username if needed. Defaults to None.
@ -284,6 +290,7 @@ def run_command_handler(
docker_image=docker_image,
docker_username=docker_username,
docker_password=docker_password,
no_docker=no_docker,
host_file=host_file,
host_list=host_list,
host_username=host_username,

Просмотреть файл

@ -59,6 +59,10 @@ helps['run'] = """
- name: run all benchmarks on all nodes in ./host.ini using image "superbench/cuda:11.1"
and default benchmarking configuration
text: {cli_name} run --docker-image superbench/cuda:11.1 --host-file ./host.ini
- name: run kernel launch benchmarks on host directly without using Docker
text: >
{cli_name} run --no-docker --host-list localhost
--config-override superbench.enable=kernel-launch superbench.env.SB_MICRO_PATH=/path/to/superbenchmark
""".format(cli_name=CLI_NAME)
helps['benchmark'] = """

Просмотреть файл

@ -4,15 +4,17 @@
max_fail_percentage: 0
vars:
container: sb-workspace
skip: '{{ no_docker | default(false) }}'
tasks:
- name: Checking container status
shell: docker inspect --format={{ '{{.State.Running}}' }} {{ container }}
register: result
ignore_errors: true
become: yes
when: not skip
- fail:
msg: Container {{ container }} is not running.
when: result is failed or result.stdout != "true"
when: (not skip) and (result is failed or result.stdout != "true")
- name: Runtime Environment Update
hosts: all
@ -22,6 +24,8 @@
container: sb-workspace
sb_nodes: '{{ hostvars.values() | map(attribute="ansible_hostname") | sort }}'
sb_env: |
# sb env
HOST_WS={{ ansible_user_dir }}/sb-workspace
# pytorch env
NNODES={{ sb_nodes | length }}
NODE_RANK={{ lookup('ansible.utils.index_of', sb_nodes, 'eq', ansible_hostname) }}
@ -31,6 +35,11 @@
# config env
{{ env | default('') }}
tasks:
- name: Ensure Workspace
file:
path: '{{ workspace }}'
state: directory
mode: 0755
- name: Updating Config
copy:
src: '{{ output_dir }}/sb.config.yaml'

Просмотреть файл

@ -186,6 +186,7 @@ class SuperBenchRunner():
self._ansible_client.get_playbook_config(
'check_env.yaml',
extravars={
'no_docker': bool(self._docker_config.skip),
'output_dir': str(self._output_path),
'env': '\n'.join(f'{k}={v}' for k, v in self._sb_config.superbench.env.items()),
}
@ -388,13 +389,18 @@ class SuperBenchRunner():
timeout = self._sb_benchmarks[benchmark_name].timeout
env_list = '--env-file /tmp/sb.env'
if self._docker_config.skip:
env_list = 'set -o allexport && source /tmp/sb.env && set +o allexport'
for k, v in mode.env.items():
if isinstance(v, str):
env_list += f' -e {k}={str(v).format(proc_rank=mode.proc_rank, proc_num=mode.proc_num)}'
envvar = f'{k}={str(v).format(proc_rank=mode.proc_rank, proc_num=mode.proc_num)}'
env_list += f' -e {envvar}' if not self._docker_config.skip else f' && export {envvar}'
fcmd = "docker exec {env_list} sb-workspace bash -c '{command}'"
if self._docker_config.skip:
fcmd = "bash -c '{env_list} && cd $HOST_WS && {command}'"
ansible_runner_config = self._ansible_client.get_shell_config(
"docker exec {env_list} sb-workspace bash -c '{command}'".format(
env_list=env_list, command=self.__get_mode_command(benchmark_name, mode, timeout)
)
fcmd.format(env_list=env_list, command=self.__get_mode_command(benchmark_name, mode, timeout))
)
if mode.name == 'mpi':
ansible_runner_config = self._ansible_client.update_mpi_config(ansible_runner_config)

Просмотреть файл

@ -69,6 +69,10 @@ class SuperBenchCLIScenarioTest(ScenarioTest):
"""Test sb run."""
self.cmd('sb run --host-list localhost --config-override superbench.enable=none', checks=[NoneCheck()])
def test_sb_run_skipdocker(self):
"""Test sb run without docker."""
self.cmd('sb run -l localhost -C superbench.enable=none --no-docker', checks=[NoneCheck()])
def test_sb_run_no_docker_auth(self):
"""Test sb run, only --docker-username argument, should fail."""
result = self.cmd('sb run --docker-username test-user', expect_failure=True)

Просмотреть файл

@ -25,7 +25,12 @@ class RunnerTestCase(unittest.TestCase):
self.default_config = OmegaConf.create(yaml.load(fp, Loader=yaml.SafeLoader))
self.sb_output_dir = tempfile.mkdtemp()
self.runner = SuperBenchRunner(self.default_config, None, None, self.sb_output_dir)
self.runner = SuperBenchRunner(
self.default_config,
OmegaConf.create({}),
OmegaConf.create({}),
self.sb_output_dir,
)
def tearDown(self):
"""Hook method for deconstructing the test fixture after testing it."""