Use predefined plugins
from aztk.spark.models.plugins import JupyterPlugin, RStudioServerPlugin, HDFSPlugin
cluster_config = ClusterConfiguration(
...# Other config,
plugins=[
JupyterPlugin(),
RStudioServerPlugin(version="1.1.383"),
HDFSPlugin(),
]
)
Define a custom plugin
from aztk.spark.models.plugins import PluginConfiguration, PluginFile,PluginPort
cluster_config = ClusterConfiguration(
...# Other config,
plugins=[
PluginConfiguration(
name="my-custom-plugin",
files=[
PluginFile("file.sh", "/my/local/path/to/file.sh"),
PluginFile("data/one.json", "/my/local/path/to/data/one.json"),
PluginFile("data/two.json", "/my/local/path/to/data/two.json"),
],
execute="file.sh", # This must be one of the files defined in the file list and match the target path,
env=dict(
SOME_ENV_VAR="foo"
),
arg=["arg1"], # Those arguments are passed to your execute script
ports=[
PluginPort(internal="1234"), # Internal only(For node communication for example)
PluginPort(internal="2345", public=True), # Open the port to the public(When ssh into). Used for UI for example
]
)
]
)