Feature: Added custom scripts functionality for plugins with the cli(Deprecate custom scripts) (#517)

This commit is contained in:
Timothee Guerin 2018-04-27 10:31:24 -07:00 коммит произвёл GitHub
Родитель 07ac9b7596
Коммит c98df7d1df
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
16 изменённых файлов: 284 добавлений и 68 удалений

Просмотреть файл

@ -14,12 +14,17 @@ class ConfigurationBase:
The dict is cleaned from null values and passed expanded to the constructor
"""
try:
clean = dict((k, v) for k, v in args.items() if v)
return cls(**clean)
except TypeError as e:
return cls._from_dict(args)
except (ValueError, TypeError) as e:
pretty_args = yaml.dump(args, default_flow_style=False)
raise AztkError("{0} {1}\n{2}".format(cls.__name__, str(e), pretty_args))
@classmethod
def _from_dict(cls, args: dict):
clean = dict((k, v) for k, v in args.items() if v)
return cls(**clean)
def validate(self):
raise NotImplementedError("Validate not implemented")

Просмотреть файл

@ -134,6 +134,9 @@ class ClusterConfiguration(ConfigurationBase):
"You must configure a VNET to use AZTK in mixed mode (dedicated and low priority nodes). Set the VNET's subnet_id in your cluster.yaml."
)
if self.custom_scripts:
logging.warning("Custom scripts are DEPRECATED and will be removed in 0.8.0. Use plugins instead See https://aztk.readthedocs.io/en/latest/15-plugins.html")
class RemoteLogin:
def __init__(self, ip_address, port):

Просмотреть файл

@ -1,21 +1,71 @@
from aztk.error import InvalidPluginConfigurationError, InvalidModelError
import os
from aztk.error import InvalidModelError
from aztk.internal import ConfigurationBase
from aztk.models import PluginConfiguration
from aztk.models.plugins import PluginFile, PluginTarget, PluginTargetRole
from .plugin_manager import plugin_manager
class PluginReference(ConfigurationBase):
"""
Contains the configuration to use a plugin
Args:
name (str): Name of the plugin(Must be the name of one of the provided plugins if no script provided)
script (str): Path to a custom script to run as the plugin
target_role (PluginTarget): Target for the plugin. Default to SparkContainer.
This can only be used if providing a script
target_role (PluginTargetRole): Target role default to All nodes. This can only be used if providing a script
args: (dict): If using name this is the arguments to pass to the plugin
"""
def __init__(self, name, args: dict = None):
def __init__(self,
name: str = None,
script: str = None,
target: PluginTarget = None,
target_role: PluginTargetRole = None,
args: dict = None):
super().__init__()
self.name = name
self.script = script
self.target = target
self.target_role = target_role
self.args = args or dict()
@classmethod
def _from_dict(cls, args: dict):
if "target" in args:
args["target"] = PluginTarget(args["target"])
if "target_role" in args:
args["target_role"] = PluginTargetRole(args["target_role"])
return super()._from_dict(args)
def get_plugin(self) -> PluginConfiguration:
self.validate()
if self.script:
return self._plugin_from_script()
return plugin_manager.get_plugin(self.name, self.args)
def validate(self) -> bool:
if not self.name:
raise InvalidModelError("Plugin is missing a name")
if not self.name and not self.script:
raise InvalidModelError("Plugin must either specify a name of an existing plugin or the path to a script.")
if self.script and not os.path.isfile(self.script):
raise InvalidModelError("Plugin script file doesn't exists: '{0}'".format(self.script))
def _plugin_from_script(self):
script_filename = os.path.basename(self.script)
name = self.name or os.path.splitext(script_filename)[0]
return PluginConfiguration(
name=name,
execute=script_filename,
target=self.target,
target_role=self.target_role or PluginConfiguration,
files=[
PluginFile(script_filename, self.script),
],
)

Просмотреть файл

@ -8,8 +8,8 @@ class PluginTarget(Enum):
"""
Where this plugin should run
"""
SparkContainer = "spark-container",
Host = "host",
SparkContainer = "spark-container"
Host = "host"
class PluginTargetRole(Enum):
@ -18,7 +18,6 @@ class PluginTargetRole(Enum):
All = "all-nodes"
class PluginPort:
"""
Definition for a port that should be opened on node
@ -54,17 +53,17 @@ class PluginConfiguration(ConfigurationBase):
def __init__(self,
name: str,
ports: List[PluginPort]=None,
files: List[PluginFile]=None,
execute: str=None,
ports: List[PluginPort] = None,
files: List[PluginFile] = None,
execute: str = None,
args=None,
env=None,
target_role: PluginTargetRole=PluginTargetRole.Master,
target: PluginTarget=PluginTarget.SparkContainer):
target_role: PluginTargetRole = None,
target: PluginTarget = None):
self.name = name
# self.docker_image = docker_image
self.target = target
self.target_role = target_role
self.target = target or PluginTarget.SparkContainer
self.target_role = target_role or PluginTargetRole.Master
self.ports = ports or []
self.files = files or []
self.args = args or []

Просмотреть файл

@ -1,7 +1,6 @@
import os
from aztk.models.plugins.plugin_configuration import PluginConfiguration, PluginPort, PluginTargetRole
from aztk.models.plugins.plugin_file import PluginFile
from aztk.utils import constants
dir_path = os.path.dirname(os.path.realpath(__file__))

Просмотреть файл

@ -9,7 +9,6 @@ from aztk.spark.models import (
DockerConfiguration,
ClusterConfiguration,
UserConfiguration,
PluginConfiguration,
)
from aztk.models.plugins.internal import PluginReference
@ -127,7 +126,7 @@ def read_cluster_config(
Reads the config file in the .aztk/ directory (.aztk/cluster.yaml)
"""
if not os.path.isfile(path):
return
return None
with open(path, 'r', encoding='UTF-8') as stream:
try:
@ -137,7 +136,7 @@ def read_cluster_config(
"Error in cluster.yaml: {0}".format(err))
if config_dict is None:
return
return None
return cluster_config_from_dict(config_dict)

Просмотреть файл

@ -1,5 +1,4 @@
import argparse
import os
import typing
import aztk.spark

Просмотреть файл

@ -1,4 +1,7 @@
# Custom scripts
**Custom scripts are _DEPRECATED_. Use [plugins](15-plugins.html) instead.**
Custom scripts allow for additional cluster setup steps when the cluster is being provisioned. This is useful
if you want to install additional software, and if you need to modify the default cluster configuration for things such as modifying spark.conf, adding jars or downloading any files you need in the cluster.
@ -18,7 +21,7 @@ custom_scripts:
The first script, simple.sh, will run on all nodes and will be executed first. The next script, master-only.sh will run only on nodes that are Spark masters and after simple.sh. The next script, worker-only.sh, will run last and only on nodes that are Spark workers.
Directories may also be provided in the custom_scripts section of `.aztk/cluster.yaml`.
Directories may also be provided in the custom_scripts section of `.aztk/cluster.yaml`.
```yaml
custom_scripts:
@ -50,11 +53,11 @@ A custom-script to install HDFS (2.8.2) is provided at `custom-scripts/hdfs.sh`
To enable HDFS, add this snippet to the custom_scripts section of your `.aztk/cluster.yaml` configuration file:
```yaml
custom_scripts:
custom_scripts:
- script: ./custom-scripts/hdfs.sh
runOn: all-nodes
```
When SSHing into the cluster, you will have access to the Namenode UI at the default port 50070. This port can be changed in the ssh.yaml file in your `.aztk/` directory, or by passing the `--namenodeui` flag to the `aztk spark cluster ssh` command.
When SSHing into the cluster, you will have access to the Namenode UI at the default port 50070. This port can be changed in the ssh.yaml file in your `.aztk/` directory, or by passing the `--namenodeui` flag to the `aztk spark cluster ssh` command.
When enabled on the cluster, HDFS can be used to read or write data locally during program execution.
When enabled on the cluster, HDFS can be used to read or write data locally during program execution.

Просмотреть файл

@ -1,5 +1,9 @@
# Plugins
Plugins are a successor to [custom scripts](11-custom-scripts.html) and are the reconmmended way of running custom code on the cluster.
Plugins can either be one of the Aztk [supported plugins](#supported-plugins) or the path to a [local file](#custom-script-plugin).
## Supported Plugins
AZTK ships with a library of default plugins that enable auxillary services to use with your Spark cluster.
@ -22,7 +26,8 @@ plugins:
- name: hdfs
- name: spark_ui_proxy
- name: rsutio_server
version: "1.1.383"
args:
version: "1.1.383"
```
### Enable a plugin using the SDK
@ -38,3 +43,26 @@ cluster_config = ClusterConfiguration(
]
)
```
## Custom script plugin
This allows you to run your custom code on the cluster
### Run a custom script plugin with the CLI
#### Example
```yaml
plugins:
- script: path/to/my/script.sh
- name: friendly-name
script: path/to/my-other/script.sh
target: host
target_role: all-nodes
```
#### Options
* `script`: **Required** Path to the script you want to run
* `name`: **Optional** Friendly name. By default will be the name of the script file
* `target`: **Optional** Target on where to run the plugin(Default: `spark-container`). Can be `spark-container` or `host`
* `target_role`: **Optional** What should be the role of the node where this script run(Default: `master`). Can be `master`, `worker` or `all-nodes`

Просмотреть файл

@ -37,34 +37,52 @@ cluster_config = ClusterConfiguration(
## Parameters
### `PluginConfiguration`
| Name | Required? | Type | Description |
|--------------|-----------|---------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `name` | required | string | Name of your plugin(This will be used for creating folder, it is recommended to have a simple letter, dash, underscore only name) |
| `files` | required | List[PluginFile|PluginTextFile] | List of files to upload |
| `execute` | required | str | Script to execute. This script must be defined in the files above and must match its remote path |
| `args` | optional | List[str] | List of arguments to be passed to your execute scripts |
| `env` | optional | dict | List of environment variables to access in the script(This can be used to pass arguments to your script instead of args) |
| `ports` | optional | List[PluginPort] | List of ports to open if the script is running in a container. A port can also be specific public and it will then be accessible when ssh into the master node. |
| `target` | optional | PluginTarget | Define where the execute script should be running. Potential values are `PluginTarget.SparkContainer(Default)` and `PluginTarget.Host` |
| `taget_role` | optional | PluginTargetRole | If the plugin should be run only on the master worker or all. You can use environment variables(See below to have different master/worker config) | |
#### name `required` | `string`
Name of your plugin(This will be used for creating folder, it is recommended to have a simple letter, dash, underscore only name)
#### files `required` | `List[PluginFile|PluginTextFile]`
List of files to upload
#### execute `required` | `str`
Script to execute. This script must be defined in the files above and must match its remote path
#### args `optional` | List[str]
List of arguments to be passed to your execute scripts
#### env `optional` | dict
List of environment variables to access in the script(This can be used to pass arguments to your script instead of args)
#### ports `optional` | `List[PluginPort]`
List of ports to open if the script is running in a container. A port can also be specific public and it will then be accessible when ssh into the master node.
#### target | `optional` | `PluginTarget`
Define where the execute script should be running. Potential values are `PluginTarget.SparkContainer(Default)` and `PluginTarget.Host`
#### `taget_role` | `optional` | `PluginTargetRole`
If the plugin should be run only on the master worker or all. You can use environment variables(See below to have different master/worker config)
### `PluginFile`
| Name | Required? | Type | Description |
|--------------|-----------|------|------------------------------------------------------------------------------|
| `target` | required | str | Where the file should be dropped relative to the plugin working directory |
| `local_path` | required | str | Path to the local file you want to upload(Could form the plugins parameters) |
#### `target` `required` | `str`
Where the file should be dropped relative to the plugin working directory
#### `local_path` | `required` | `str`
Path to the local file you want to upload(Could form the plugins parameters)
### `TextPluginFile`
| Name | Required? | Type | Description |
|-----------|-----------|-------------------|------------------------------------------------------------------------------|
| `target` | required | str | Where the file should be dropped relative to the plugin working directory |
| `content` | required | str | io.StringIO | Path to the local file you want to upload(Could form the plugins parameters) |
#### target | `required` | `str`
Where the file should be dropped relative to the plugin working directory
#### content | `required` | `str` | `io.StringIO`
Path to the local file you want to upload(Could form the plugins parameters)
### `PluginPort`
| Name | Required? | Type | Description |
|------------|-----------|------|-------------------------------------------------------|
| `internal` | required | int | Internal port to open on the docker container |
| `public` | optional | bool | If the port should be open publicly(Default: `False`) |
#### internal | `required` | `int`
Internal port to open on the docker container
#### public | `optional` | `bool`
If the port should be open publicly(Default: `False`)
## Environment variables availables in the plugin

Просмотреть файл

@ -21,6 +21,8 @@ basedir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__))))
sys.path.insert(0, basedir)
from aztk.version import __version__
# -- Project information -----------------------------------------------------
project = 'aztk'
@ -28,8 +30,7 @@ project = 'aztk'
copyright = '2018, Microsoft'
author = 'Microsoft'
# This gets set automatically by readthedocs
release = version = ''
release = version = __version__
# -- General configuration ---------------------------------------------------
@ -54,7 +55,7 @@ intersphinx_mapping = {
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# templates_path = ['_templates']
# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
@ -75,7 +76,7 @@ master_doc = 'index'
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = None
# language = None
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
@ -88,24 +89,25 @@ pygments_style = 'sphinx'
# -- Options for HTML output -------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = 'sphinx_rtd_theme'
on_rtd = os.environ.get('READTHEDOCS', None) == 'True'
if not on_rtd: # only import and set the theme if we're building docs locally
import sphinx_rtd_theme
html_theme = 'sphinx_rtd_theme'
html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
#
html_theme_options = {
'collapse_navigation': True,
'sticky_navigation': True,
}
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
#
html_theme_options = {
'collapse_navigation': True,
'sticky_navigation': True,
}
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
# html_static_path = ['_static']
# Custom sidebar templates, must be a dictionary that maps document names
# to template names.

Просмотреть файл

Просмотреть файл

Просмотреть файл

@ -0,0 +1,62 @@
# Writing a model
## Getting started
In `aztk/models` create a new file with the name of your model `my_model.py`
In `aztk/models/__init__.py` add `from .my_model import MyModel`
Create a new class `MyModel` that inherit `ConfigurationBase`
```python
from aztk.internal import ConfigurationBase
class MyModel(ConfigurationBase):
"""
MyModel is an sample model
Args:
input1 (str): This is the first input
"""
def __init__(self, input1: str):
self.input1 = input1
def validate(self):
pass
```
## Add validation
In `def validate` do any kind of checks and raise a `InvalidModelError` if there is any problems with the values
### Validate required
To validate required attributes call the parent `_validate_required` method. Method takes a list of attributes which should not be None
```python
def validate(self) -> bool:
self._validate_required(["input1"])
```
### Custom validation
```python
def validate(self) -> bool:
if "foo" in self.input1:
raise InvalidModelError("foo cannot be in input1")
```
## Convert dict to model
When inheriting from `ConfigurationBase` it comes with a `from_dict` class method which allows to convert a dict to this class
It works great for simple case where values are simple types(str, int, etc). If however you need to process it you can override the `_from_dict` method.
** Important: Do not override the `from_dict` method as this one will handle error and display them nicely **
```python
@classmethod
def _from_dict(cls, args: dict):
if "input1" in args:
args["input1"] = MyInput1Model.from_dict(args["input1"])
return super()._from_dict(args)
```

Просмотреть файл

@ -39,5 +39,15 @@ This toolkit is built on top of Azure Batch but does not require any Azure Batch
:maxdepth: 2
:caption: Developper documentation:
docs
80-tests
dev/docs
dev/writing-models
dev/tests
Indices and tables
==================
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`

Просмотреть файл

@ -0,0 +1,39 @@
import pytest
from aztk.error import AztkError
from aztk.models.plugins.internal import PluginReference, PluginTarget, PluginTargetRole
def test_from_dict():
ref = PluginReference.from_dict(dict(
name="my-test-script",
script="path/to/script.sh",
target="host",
target_role="worker",
))
assert ref.name == "my-test-script"
assert ref.script == "path/to/script.sh"
assert ref.target == PluginTarget.Host
assert ref.target_role == PluginTargetRole.Worker
def test_from_dict_invalid_param():
with pytest.raises(AztkError):
PluginReference.from_dict(dict(
name2="invalid"
))
def test_from_dict_invalid_target():
with pytest.raises(AztkError):
PluginReference.from_dict(dict(
script="path/to/script.sh",
target="host-invalid",
))
def test_from_dict_invalid_target_role():
with pytest.raises(AztkError):
PluginReference.from_dict(dict(
script="path/to/script.sh",
target_role="worker-invalid",
))