First Japanese Translation PR (#74)

* Update deploy.yml

* Update deploy.yml

* Update deploy.yml

* Update deploy.yml

* Update docusaurus.config.js

* Update docusaurus.config.js

* Update deploy.yml

* Update deploy.yml

* Update deploy.yml

* Update deploy.yml

* Update docusaurus.config.js

* Update docusaurus.config.js

* add japanese

* Update deploy.yml

* Update deploy.yml

* json for japanese

* add japanese translation

* add japanese translation

* Update deploy.yml

* fix : branch to main for github actions workflow

* add devcontainer.json

* japanese markdown

* japanese markdown

* docusaurus devcontainer for vscode developer

* japanese markdown translation message

* Translated into Japanese

* Translate workspace.md

* Translated installation.md into Japanese

* Translated installation.md into Japanese

* translate to japanese

* Small fix

* Add spaces

 Add spaces before/after English words in Japanese sentences

* Add spaces

Add spaces before/after English words in Japanese sentences

* Translated into Japanese

* Replace single quotes with backquotes

* Translated logging.md into Japanese

* adopt to upstream main

* adopt to upstream main

* fix link path in japanese markdown

Co-authored-by: RyO <graphgear800@gmail.com>
Co-authored-by: Keisuke Takahashi <k14i@outlook.com>
This commit is contained in:
konabuta 2021-07-14 04:46:33 +09:00 коммит произвёл GitHub
Родитель 492034d811
Коммит 1c472e26ed
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
43 изменённых файлов: 4255 добавлений и 5 удалений

Просмотреть файл

@ -0,0 +1,11 @@
# devcontainer json from docusaurus github repo
{
"name": "Docusaurus Dev Container",
"image": "mcr.microsoft.com/vscode/devcontainers/typescript-node:14-buster",
"settings": {
"terminal.integrated.shell.linux": "/bin/bash"
},
"extensions": ["dbaeumer.vscode-eslint", "orta.vscode-jest"],
"forwardPorts": [3000],
"postCreateCommand": "cd website && yarn install"
}

Просмотреть файл

@ -1,6 +1,10 @@
const path = require('path');
module.exports = {
i18n: {
defaultLocale: 'en',
locales: ['en', 'ja'],
},
title: 'Azure Machine Learning',
tagline: 'Open source cheat sheets for Azure ML',
url: 'https://github.com/Azure/',
@ -21,6 +25,10 @@ module.exports = {
},
items: [
{to: '/docs/cheatsheets/python/v1/cheatsheet', label: 'Python SDK', position: 'left'},
{
type: 'localeDropdown',
position: 'left',
},
],
},
footer: {

218
website/i18n/ja/code.json Normal file
Просмотреть файл

@ -0,0 +1,218 @@
{
"theme.NotFound.title": {
"message": "ページが見つかりません",
"description": "The title of the 404 page"
},
"theme.NotFound.p1": {
"message": "お探しのページが見つかりませんでした。",
"description": "The first paragraph of the 404 page"
},
"theme.NotFound.p2": {
"message": "このページにリンクしているサイトの所有者に連絡をしてリンクが壊れていることを伝えてください。",
"description": "The 2nd paragraph of the 404 page"
},
"theme.AnnouncementBar.closeButtonAriaLabel": {
"message": "閉じる",
"description": "The ARIA label for close button of announcement bar"
},
"theme.blog.paginator.navAriaLabel": {
"message": "ブログ記事一覧のナビゲーション",
"description": "The ARIA label for the blog pagination"
},
"theme.blog.paginator.newerEntries": {
"message": "新しい記事",
"description": "The label used to navigate to the newer blog posts page (previous page)"
},
"theme.blog.paginator.olderEntries": {
"message": "過去の記事",
"description": "The label used to navigate to the older blog posts page (next page)"
},
"theme.blog.post.readingTime.plurals": {
"message": "約{readingTime}分",
"description": "Pluralized label for \"{readingTime} min read\". Use as much plural forms (separated by \"|\") as your language support (see https://www.unicode.org/cldr/cldr-aux/charts/34/supplemental/language_plural_rules.html)"
},
"theme.tags.tagsListLabel": {
"message": "タグ:",
"description": "The label alongside a tag list"
},
"theme.blog.post.readMore": {
"message": "もっと見る",
"description": "The label used in blog post item excerpts to link to full blog posts"
},
"theme.blog.post.paginator.navAriaLabel": {
"message": "ブログ記事のナビゲーション",
"description": "The ARIA label for the blog posts pagination"
},
"theme.blog.post.paginator.newerPost": {
"message": "新しい記事",
"description": "The blog post button label to navigate to the newer/previous post"
},
"theme.blog.post.paginator.olderPost": {
"message": "過去の記事",
"description": "The blog post button label to navigate to the older/next post"
},
"theme.tags.tagsPageTitle": {
"message": "タグ",
"description": "The title of the tag list page"
},
"theme.blog.post.plurals": {
"message": "{count}件",
"description": "Pluralized label for \"{count} posts\". Use as much plural forms (separated by \"|\") as your language support (see https://www.unicode.org/cldr/cldr-aux/charts/34/supplemental/language_plural_rules.html)"
},
"theme.blog.tagTitle": {
"message": "「{tagName}」タグの記事が{nPosts}あります",
"description": "The title of the page for a blog tag"
},
"theme.tags.tagsPageLink": {
"message": "全てのタグを見る",
"description": "The label of the link targeting the tag list page"
},
"theme.CodeBlock.copyButtonAriaLabel": {
"message": "クリップボードにコードをコピー",
"description": "The ARIA label for copy code blocks button"
},
"theme.CodeBlock.copied": {
"message": "コピーしました",
"description": "The copied button label on code blocks"
},
"theme.CodeBlock.copy": {
"message": "コピー",
"description": "The copy button label on code blocks"
},
"theme.docs.sidebar.expandButtonTitle": {
"message": "サイドバーを開く",
"description": "The ARIA label and title attribute for expand button of doc sidebar"
},
"theme.docs.sidebar.expandButtonAriaLabel": {
"message": "サイドバーを開く",
"description": "The ARIA label and title attribute for expand button of doc sidebar"
},
"theme.docs.sidebar.collapseButtonTitle": {
"message": "サイドバーを隠す",
"description": "The title attribute for collapse button of doc sidebar"
},
"theme.docs.sidebar.collapseButtonAriaLabel": {
"message": "サイドバーを隠す",
"description": "The title attribute for collapse button of doc sidebar"
},
"theme.docs.sidebar.responsiveCloseButtonLabel": {
"message": "メニューを閉じる",
"description": "The ARIA label for close button of mobile doc sidebar"
},
"theme.docs.sidebar.responsiveOpenButtonLabel": {
"message": "メニューを開く",
"description": "The ARIA label for open button of mobile doc sidebar"
},
"theme.docs.paginator.navAriaLabel": {
"message": "ドキュメントのナビゲーション",
"description": "The ARIA label for the docs pagination"
},
"theme.docs.paginator.previous": {
"message": "前へ",
"description": "The label used to navigate to the previous doc"
},
"theme.docs.paginator.next": {
"message": "次へ",
"description": "The label used to navigate to the next doc"
},
"theme.docs.versions.unreleasedVersionLabel": {
"message": "これはリリース前の{siteTitle} {versionLabel}のドキュメントです。",
"description": "The label used to tell the user that he's browsing an unreleased doc version"
},
"theme.docs.versions.unmaintainedVersionLabel": {
"message": "これは{siteTitle} {versionLabel}のドキュメントで現在はアクティブにメンテナンスされていません。",
"description": "The label used to tell the user that he's browsing an unmaintained doc version"
},
"theme.docs.versions.latestVersionSuggestionLabel": {
"message": "最新のドキュメントは{latestVersionLink} ({versionLabel}) を見てください。",
"description": "The label userd to tell the user that he's browsing an unmaintained doc version"
},
"theme.docs.versions.latestVersionLinkLabel": {
"message": "最新バージョン",
"description": "The label used for the latest version suggestion link label"
},
"theme.common.editThisPage": {
"message": "このページを編集",
"description": "The link label to edit the current page"
},
"theme.common.headingLinkTitle": {
"message": "見出しへの直接リンク",
"description": "Title for link to heading"
},
"theme.lastUpdated.atDate": {
"message": "{date}に",
"description": "The words used to describe on which date a page has been last updated"
},
"theme.lastUpdated.byUser": {
"message": "{user}が",
"description": "The words used to describe by who the page has been last updated"
},
"theme.lastUpdated.lastUpdatedAtBy": {
"message": "{atDate}{byUser}最終更新",
"description": "The sentence used to display when a page has been last updated, and by who"
},
"theme.common.skipToMainContent": {
"message": "メインコンテンツまでスキップ",
"description": "The skip to content label used for accessibility, allowing to rapidly navigate to main content with keyboard tab/enter navigation"
},
"theme.SearchPage.documentsFound.plurals": {
"message": "{count}件のドキュメントが見つかりました",
"description": "Pluralized label for \"{count} documents found\". Use as much plural forms (separated by \"|\") as your language support (see https://www.unicode.org/cldr/cldr-aux/charts/34/supplemental/language_plural_rules.html)"
},
"theme.SearchPage.existingResultsTitle": {
"message": "『{query}』の検索結果",
"description": "The search page title for non-empty query"
},
"theme.SearchPage.emptyResultsTitle": {
"message": "ドキュメントを検索",
"description": "The search page title for empty query"
},
"theme.SearchPage.inputPlaceholder": {
"message": "ここに検索するキーワードを入力してください",
"description": "The placeholder for search page input"
},
"theme.SearchPage.inputLabel": {
"message": "検索",
"description": "The ARIA label for search page input"
},
"theme.SearchPage.algoliaLabel": {
"message": "Algoliaで検索",
"description": "The ARIA label for Algolia mention"
},
"theme.SearchPage.noResultsText": {
"message": "検索結果が見つかりませんでした",
"description": "The paragraph for empty search result"
},
"theme.SearchPage.fetchingNewResults": {
"message": "新しい検索結果を取得しています...",
"description": "The paragraph for fetching new search results"
},
"theme.SearchBar.label": {
"message": "検索",
"description": "The ARIA label and placeholder for search button"
},
"index.title": {
"message": "Azure Machine Learning 日本語版"
},
"index.tagline": {
"message": "オープンソースの Azure Machine Learning チートシート"
},
"section1": {
"message": "GPU 分散学習"
},
"section0": {
"message": "チートシート"
},
"section2": {
"message": "環境"
},
"section0.desc": {
"message": "Azure ML で頻出するコードに関するチートシートです。"
},
"section1.desc": {
"message": "Azure ML で分散学習をするためのガイドです。"
},
"section2.desc": {
"message": "Azure ML で Python パッケージと Docker イメージを構築・管理します。"
}
}

Просмотреть файл

@ -0,0 +1,26 @@
{
"version.label": {
"message": "Next",
"description": "The label for version current"
},
"sidebar.pythonSidebar.category.Python": {
"message": "Python",
"description": "The label for category Python in sidebar pythonSidebar"
},
"sidebar.pythonSidebar.category.Getting Started": {
"message": "Getting Started",
"description": "The label for category Getting Started in sidebar pythonSidebar"
},
"sidebar.pythonSidebar.category.Azure ML Resources": {
"message": "Azure ML Resources",
"description": "The label for category Azure ML Resources in sidebar pythonSidebar"
},
"sidebar.pythonSidebar.category.Guides": {
"message": "Guides",
"description": "The label for category Guides in sidebar pythonSidebar"
},
"sidebar.cliSidebar.category.CLI (preview)": {
"message": "CLI (preview)",
"description": "The label for category CLI (preview) in sidebar cliSidebar"
}
}

Просмотреть файл

@ -0,0 +1 @@

Просмотреть файл

@ -0,0 +1,225 @@
---
title: チートシート
id: cheatsheet
description: A cheat sheet for Azure ML.
keywords:
- azure machine learning
- aml
- cheatsheet
- overview
---
## 基本セットアップ
### ワークスペースへの接続
```python
from azureml.core import Workspace
ws = Workspace.from_config()
```
この Workspace オブジェクトは Azure ML 操作における基本的なオブジェクトで、一連のコードを通して共有されます。(`ws`という変数名で参照されることが多いです。)
ワークスペースの詳細: [Workspaces](./workspace.md)
### コンピューティングターゲットへの接続
```python
compute_target = ws.compute_targets['<compute-target-name>']
```
**使用例**
```python
compute_target = ws.compute_targets['powerful-gpu']
config = ScriptRunConfig(
compute_target=compute_target, # train.py スクリプトを実行するために使用されるコンピューティングターゲット
source_directory='.',
script='train.py',
)
```
コンピューティングターゲットの詳細: [コンピューティングターゲット](./compute-targets.md)
### Python 環境の準備
pip の`requirements.txt`ファイルや Conda の`env.yml`ファイルを使い、コンピューティング環境の Python 環境を Environment オブジェクトとして定義することができます。
```python
from azureml.core import Environment
# 選択肢 1: pip
environment = Environment.from_pip_requirements('<env-name>', '<path/to/requirements.txt>')
# 選択肢 2: Conda
environment = Environment.from_conda_specification('<env-name>', '<path/to/env.yml>')
```
docker イメージを使って環境を準備することもできます。
**使用例**
```python
environment = Environment.from_pip_requirements('<env-name>', '<path/to/requirements.txt>')
config = ScriptRunConfig(
environment=environment, # Python 環境を設定する
source_directory='.',
script='train.py',
)
```
環境の詳細: [環境](./environment.md)
## コードをサブミットする
Azure ML 上でコードを実行するためには:
1. エントリーポイントとなるコードのパス、コードを実行するコンピューティングターゲット、そしてコードを実行する Python 環境の**設定情報を作成**します。
2. Azure ML の実験を新規作成または再利用して**サブミット**します。
### ScriptRunConfig
典型的なディレクトリ構成例:
```bash
source_directory/
script.py # エントリーポイントとなるコード
module1.py # script.py により呼ばれるモジュール
...
```
リモートコンピューティングクラスター`target: ComputeTarget`上の、Python 環境`env: Environment`で、`$ (env) python <path/to/code>/script.py [arguments]`を実行するには、 `ScriptRunConfig`クラスを使用します。
```python
from azureml.core import ScriptRunConfig
config = ScriptRunConfig(
source_directory='<path/to/code>', # 相対パスでも OK
script='script.py',
compute_target=compute_target,
environment=environment,
arguments=arguments,
)
```
ScriptRunConfig の引数の詳細: [Command line arguments](./script-run-config.md#command-line-arguments)
:::info
- `compute_target`: もし引数が与えられなかった場合は、スクリプトはローカルマシン上で実行されます。
- `environment`: もし引数が与えられなかった場合、Azure ML のデフォルトPython 環境が使用されます。環境の詳細: [Environment](./environment.md)
:::
#### コマンド
もしも明示的なコマンドを与える場合。
```python
command = 'echo cool && python script.py'.split()
config = ScriptRunConfig(
source_directory='<path/to/code>', # 相対パスも OK
command=command,
compute_target=compute_target,
environment=environment,
arguments=arguments,
)
```
コマンドの詳細: [コマンドライン引数](./script-run-config.md#コマンドライン引数)
### 実験
コードをサブミットするには`実験`を作成します。実験は、サブミットされた一連のコードをグルーピングしてコードの実行履歴を追跡する軽量のコンテナです。 (参照: [Run History](./run-history.md)).
```python
exp = Experiment(ws, '<experiment-name>')
run = exp.submit(config)
print(run.get_portal_url())
```
上記コードで返される Azure ML Studio へのリンクにより、実験の実行をモニタリングすることができます。
詳細: [ScriptRunConfig](./script-run-config.md)
### 使用例
以下はコマンドラインから Conda 環境を使ってトレーニングスクリプト`train.py`をローカルマシン上で実行する典型的な例です。
```bash
$ conda env create -f env.yml # pythorch という名前の conda env を作成
$ conda activate pytorch
(pytorch) $ cd <path/to/code>
(pytorch) $ python train.py --learning_rate 0.001 --momentum 0.9
```
このスクリプトを Azure 上の GPU を使って実行したいと仮定します。
```python
ws = Workspace.from_config()
compute_target = ws.compute_targets['powerful-gpu']
environment = Environment.from_conda_specification('pytorch', 'env.yml')
config = ScriptRunConfig(
source_directory='<path/to/code>',
script='train.py',
environment=environment,
arguments=['--learning_rate', 0.001, '--momentum', 0.9],
)
run = Experiment(ws, 'PyTorch model training').submit(config)
```
## 分散 GPU 学習
分散 GPU 学習を有効にするために`ScriptRunConfig`を変更します。
```python {3,8-9,12,19}
from azureml.core import Workspace, Experiment, ScriptRunConfig
from azureml.core import Environment
from azureml.core.runconfig import MpiConfiguration
ws = Workspace.from_config()
compute_target = ws.compute_targets['powerful-gpu']
environment = Environment.from_conda_specification('pytorch', 'env.yml')
environment.docker.enabled = True
environment.docker.base_image = 'mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.1-cudnn7-ubuntu18.04'
# それぞれ 4 つの GPU を搭載した 2 つのノード上でトレーニングを行う
mpiconfig = MpiConfiguration(process_count_per_node=4, node_count=2)
config = ScriptRunConfig(
source_directory='<path/to/code>', # train.py が含まれるディレクトリ
script='train.py',
environment=environment,
arguments=['--learning_rate', 0.001, '--momentum', 0.9],
distributed_job_config=mpiconfig, # 分散学習のための設定を追加
)
run = Experiment(ws, 'PyTorch model training').submit(config)
```
:::info
- `mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.1-cudnn7-ubuntu18.04`は OpenMPI の docker イメージです。このイメージは Azure ML 上で分散学習を実行する際に必要となります。
- `MpiConfiguration`はトレーニングを行うノード数とノードあたりの GPU 数を指定するために使います。
:::
詳細: [Distributed GPU Training](./distributed-training.md)
## データへの接続
ワークスペース`ws`のデフォルトデータストアにあるデータをトレーニングスクリプトから扱うためには:
```python
datastore = ws.get_default_datastore()
dataset = Dataset.File.from_files(path=(datastore, '<path/on/datastore>'))
```
詳細: [Data](./data.md)
コマンドライン引数に以下を渡すことで上記の`dataset`を使用できます。
```python
arguments=['--data', dataset.as_mount()]
```

Просмотреть файл

@ -0,0 +1,182 @@
---
title: Developing on Azure ML
description: Guide to developing your code on Azure ML.
keywords:
- ssh
- development
- compute
---
:::note
このコンテンツはお使いの言語では利用できません。
:::
This guide gives some pointers for developing your code on Azure ML. A typical
scenario might be testing your distributed training code, or some other aspect
of your code that isn't well represented on your local devbox.
A common pain-point in these scenarios is that iteration on Azure ML can feel
slow - especially when compared to developing on a VM.
**Learning objective.** To improve the development experience on Azure ML
to match - or even exceed - that of a "bare" VM.
## 🚧 The hurdles
Two main reasons developing on Azure ML can feel slow as compared to a VM are:
- Any changes to my Python environment force Docker image rebuild which can
take >5 minutes.
- Compute resources are _released_ between iterations, forcing me to wait for
new compute to warm up (e.g. pulling Docker images).
Below we provide some techniques to address these issues, as well as some advantages
to working with Azure ML compute directly. We also provide a [example](#example) applying these
techniques.
## 🕰️ Prepare compute for development
When creating your _compute instance / cluster_ there are a fews things you can
do to prepare for development:
1. **Enable SSH on compute.**
Supported on both _compute instance_ and _compute targets_. This will allow you to
use your compute just like you would a VM.
:::tip VS Code Remote Extension.
VS Code's [remote extension](https://code.visualstudio.com/docs/remote/ssh)
allows you to connect to your Azure ML compute resources via SSH.
This way you can develop directly in the cloud.
:::
2. **Increase "Idle seconds before scale down".**
For compute targets you can increase this parameter e.g. to 30 minutes. This means
the cluster won't be released between runs while you iterate.
:::warning
Don't forget to roll this back when you're done iterating.
:::
## 🏃‍♀️ Commands
Typically you will submit your code to Azure ML via a `ScriptRunConfig` a little like this:
```python
config = ScriptRunConfig(
source_directory='<path/to/source_directory>',
script='script.py',
compute_target=target,
environment=env,
...
)
```
:::info
For more details on using `ScriptRunConfig` to submit your code see
[Running Code in the cloud](script-run-config).
:::
By using the [`command`](script-run-config#commands) argument you can improve your agility.
Commands allow you to chain together several steps in one e.g.:
```python
command = "pip install torch && python script.py --learning_rate 2e-5".split()
```
Another example would be to include a setup script:
```bash title="setup.sh"
echo "Running setup script"
pip install torch
pip install -r requirements.txt
export PYTHONPATH=$PWD
```
and then calling it in your command
```python
command = "bash setup.sh && python script.py --learning_rate 2e-5".split()
```
This way Azure ML doesn't have to rebuild the docker image with incremental changes.
## Advantages
In addition to matching the development experience on a VM, there are certain benefits to
developing on Azure ML compute directly.
- **Production-ready.** By developing directly in Azure ML you avoid the additional step of porting your
VM-developed code to Azure ML later. This is particularly relevant if you intend to
run your production code on Azure ML.
- **Data access.** If your training script makes use of data in Azure you can use the Azure ML
Python SDK to read it (see [Data](data) for examples). The alternative is that you might have to
find some way of getting your data onto the VM you are developing on.
- **Notebooks.** Azure ML's _compute insances_ come with Jupyter notebooks which can help with quick
debugging. Moreover, these notebooks can easily be run against different compute infrastructure
and can be a great way to collaborate.
## Example
We provide a simple example demonstrating the mechanics of the above steps. Consider the following
setup:
```bash
src/
.azureml/
config.json # workspace connection config
train.py # python script we are developing
setup.sh # to run on compute before train.py
azureml_run.py # submit job to azure
```
```bash title="setup.sh"
echo "Running setup script"
pip install numpy
```
```python title="train.py"
import numpy as np
print(np.random.rand())
```
Now from your local machine you can use the Azure ML Python SDK
to execute your command in the cloud:
```python title="azureml_run.py"
from azureml.core import Workspace, Experiment, ScriptRunConfig
# get workspace
ws = Workspace.from_config()
target = ws.compute_targets['cpucluster']
exp = Experiment(ws, 'dev-example')
command = "bash setup.sh && python script.py".split()
# set up script run configuration
config = ScriptRunConfig(
source_directory='.',
command=command,
compute_target=target,
)
# submit script to AML
run = exp.submit(config)
print(run.get_portal_url()) # link to ml.azure.com
run.wait_for_completion(show_output=True)
```
Now if you needed to update your Python environment for example you can simply
add commands to `setup.sh`:
```bash title="setup.sh"
echo "Running setup script"
pip install numpy
pip install pandas # add additional libraries
export CUDA_VISIBLE_DEVICES="0,1" # set environment variables
nvidia-smi # run helpful command-line tools
```
without having to rebuild any Docker images.

Просмотреть файл

@ -0,0 +1,95 @@
---
title: コンピューティングターゲット
description: Guide to setting up and using Azure compute resources in Azure ML.
keywords:
- compute
- cpu
- gpu
---
Compute Target (コンピューティングターゲット) は AML の計算環境の概念を抽象化したものです。対象はローカルマシンから Azure VM で構成されるクラスターまで様々です。
### Compute Target の取得
ワークスペース `ws` にある既存の Compute Target の取得:
```python
from azureml.core import ComputeTarget
target = ComputeTarget(ws, '<compute_target_name>')
```
### 既存 Compute Target のリスト
ワークスペース `ws` にある Compute Target のリストの取得:
```python
ComputeTarget.list(ws): List[ComputeTarget]
```
### 空き状況の確認
ワークスペースをチームで共有するときには、ジョブを実行する前にワークスペース `ws` の計算環境が利用可能か確認することがよくあります。
[studio](https://ml.azure.com) から簡単に確認することができます。
![](img/compute-target.png)
## Compute Target の作成
[studio](https://ml.azure.com) から簡単に新しい Compute Target が作成できます。
"コンピューティング" のメニュー選択> "コンピューティングクラスタ" のタブを選択 > "+ 新規作成" ボタンを選択:
![](img/create-compute.png)
作成時に次の情報を入力します。:
- **コンピューティング名**: 後に studio や Python SDK から参照するのに利用されます。入力必須です。名前は 名前の長さは 2 から 16 文字の間でなければなりません。有効な文字は英字、数字、- 文字です。
- **仮想マシンの種類**: "CPU" or "GPU"
- **仮想マシンの優先度**: "専用" もしくは "低優先度"
> 低優先度の仮想マシンは安く使えますが、計算環境の確保を保証していません。ジョブが途中で中断される場合があります。
- **仮想マシンのサイズ**: ドロップダウンリストから選択します。利用可能な仮想マシンのサイズの一覧は[こちら](https://azure.microsoft.com/global-infrastructure/services/?products=virtual-machines)です。
- **最小 / 最大ノード数**: Compute Target は実行されたジョブの数に依って最小ノード数と最大ノード数の間でオートスケースします。最小ノード数を 0 に設定することで計算環境上でのジョブが完了すると自動で 0 台に縮小するためコストを節約できます。
- **スケールダウンする前のアイドル時間 (秒)**: 計算環境をスケールダウンする前のアイドル時間を指定します。
備考: 計算環境を常に Azure Machine Learning Workspace と同じリージョンに作成されます。
### SSH の利用
管理者ユーザ名とパスワード・SSH キーを設定することで、Compute Target に対して SSH で接続できます。
![](img/create-compute-ssh.png)
### 低優先度 の Compute Target
低優先度の仮想マシンは安く使えますが、計算環境の確保を保証していません。ジョブが途中で中断される場合があります。
![](img/create-compute-lp.png)
### SDK 経由での作成
SDK 経由での Compute Target の作成:
```python
from azureml.core import Workspace
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
ws = Workspace.from_config() # .azureml フォルダのファイルから接続情報を参照
# CPU クラスターの名前を選択
cpu_cluster_name = "cpu-cluster"
# 既存のクラスターが無いことを確認
try:
cpu_cluster = ComputeTarget(workspace=ws, name=cpu_cluster_name)
print('Found existing cluster, use it.')
except ComputeTargetException:
compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',
max_nodes=4,
idle_seconds_before_scaledown=2400)
cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config)
cpu_cluster.wait_for_completion(show_output=True)
```

Просмотреть файл

@ -0,0 +1,319 @@
---
title: Data
description: Guide to working with data in Azure ML.
keywords:
- data
- dataset
- datastore
---
:::note
このコンテンツはお使いの言語では利用できません。
:::
## Concepts
AzureML provides two basic assets for working with data:
- Datastore
- Dataset
### Datastore
Provides an interface for numerous Azure Machine Learning storage accounts.
Each Azure ML workspace comes with a default datastore:
```python
from azureml.core import Workspace
ws = Workspace.from_config()
datastore = ws.get_default_datastore()
```
which can also be accessed directly from the [Azure Portal](https://portal.azure.com) (under the same
resource group as your Azure ML Workspace).
Datastores are attached to workspaces and are used to store connection information to Azure storage services so you can refer to them by name and don't need to remember the connection information and secret used to connect to the storage services.
Use this class to perform management operations, including register, list, get, and remove datastores.
### Dataset
A dataset is a reference to data - either in a datastore or behind a public URL.
Datasets provide enhaced capabilities including data lineage (with the notion of versioned datasets).
## Get Datastore
### Default datastore
Each workspace comes with a default datastore.
```python
datastore = ws.get_default_datastore()
```
### Register datastore
Connect to, or create, a datastore backed by one of the multiple data-storage options
that Azure provides. For example:
- Azure Blob Container
- Azure Data Lake (Gen1 or Gen2)
- Azure File Share
- Azure MySQL
- Azure PostgreSQL
- Azure SQL
- Azure Databricks File System
See the SDK for a comprehensive list of datastore types and authentication options:
[Datastores (SDK)](https://docs.microsoft.com/python/api/azureml-core/azureml.core.datastore(class)?view=azure-ml-py).
#### Register a new datastore
- To register a store via an **account key**:
```python
datastores = Datastore.register_azure_blob_container(
workspace=ws,
datastore_name='<datastore-name>',
container_name='<container-name>',
account_name='<account-name>',
account_key='<account-key>',
)
```
- To register a store via a **SAS token**:
```python
datastores = Datastore.register_azure_blob_container(
workspace=ws,
datastore_name='<datastore-name>',
container_name='<container-name>',
account_name='<account-name>',
sas_token='<sas-token>',
)
```
### Connect to datastore
The workspace object `ws` has access to its datastores via
```python
ws.datastores: Dict[str, Datastore]
```
Any datastore that is registered to workspace can thus be accessed by name.
```python
datastore = ws.datastores['<name-of-registered-datastore>']
```
### Link datastore to Azure Storage Explorer
The workspace object `ws` is a very powerful handle when it comes to managing assets the
workspace has access to. For example, we can use the workspace to connect to a datastore
in Azure Storage Explorer.
```python
from azureml.core import Workspace
ws = Workspace.from_config()
datastore = ws.datastores['<name-of-datastore>']
```
- For a datastore that was created using an **account key** we can use:
```python
account_name, account_key = datastore.account_name, datastore.account_key
```
- For a datastore that was created using a **SAS token** we can use:
```python
sas_token = datastore.sas_token
```
The account_name and account_key can then be used directly in Azure Storage Explorer to
connect to the Datastore.
## Blob Datastore
Move data to and from your [AzureBlobDatastore](https://docs.microsoft.com/python/api/azureml-core/azureml.data.azure_storage_datastore.azureblobdatastore?view=azure-ml-py) object `datastore`.
### Upload to Blob Datastore
The AzureBlobDatastore provides APIs for data upload:
```python
datastore.upload(
src_dir='./data',
target_path='<path/on/datastore>',
overwrite=True,
)
```
Alternatively, if you are working with multiple files in different locations you can use
```python
datastore.upload_files(
files, # List[str] of absolute paths of files to upload
target_path='<path/on/datastore>',
overwrite=False,
)
```
### Download from Blob Datastore
Download the data from the blob container to the local file system.
```python
datastore.download(
target_path, # str: local directory to download to
prefix='<path/on/datastore>',
overwrite=False,
)
```
### Via Storage Explorer
Azure Storage Explorer is free tool to easily manage your Azure cloud storage
resources from Windows, macOS, or Linux. Download it from [here](https://azure.microsoft.com/features/storage-explorer/).
Azure Storage Explorer gives you a (graphical) file exporer, so you can literally drag-and-drop
files into and out of your datastores.
See ["Link datastore to Azure Storage Explorer"](#link-datastore-to-azure-storage-explorer)
above for more details.
## Read from Datastore
Reference data in a `Datastore` in your code, for example to use in a remote setting.
### DataReference
First, connect to your basic assets: `Workspace`, `ComputeTarget` and `Datastore`.
```python
from azureml.core import Workspace
ws: Workspace = Workspace.from_config()
compute_target: ComputeTarget = ws.compute_targets['<compute-target-name>']
ds: Datastore = ws.get_default_datastore()
```
Create a `DataReference`, either as mount:
```python
data_ref = ds.path('<path/on/datastore>').as_mount()
```
or as download:
```python
data_ref = ds.path('<path/on/datastore>').as_download()
```
:::info
To mount a datastore the workspace need to have read and write access to the underlying storage. For readonly datastore `as_download` is the only option.
:::
#### Consume DataReference in ScriptRunConfig
Add this DataReference to a ScriptRunConfig as follows.
```python
config = ScriptRunConfig(
source_directory='.',
script='script.py',
arguments=[str(data_ref)], # returns environment variable $AZUREML_DATAREFERENCE_example_data
compute_target=compute_target,
)
config.run_config.data_references[data_ref.data_reference_name] = data_ref.to_config()
```
The command-line argument `str(data_ref)` returns the environment variable `$AZUREML_DATAREFERENCE_example_data`.
Finally, `data_ref.to_config()` instructs the run to mount the data to the compute target and to assign the
above environment variable appropriately.
#### Without specifying argument
Specify a `path_on_compute` to reference your data without the need for command-line arguments.
```python
data_ref = ds.path('<path/on/datastore>').as_mount()
data_ref.path_on_compute = '/tmp/data'
config = ScriptRunConfig(
source_directory='.',
script='script.py',
compute_target=compute_target,
)
config.run_config.data_references[data_ref.data_reference_name] = data_ref.to_config()
```
## Create Dataset
### From local data
#### Upload to datastore
To upload a local directory `./data/`:
```python
datastore = ws.get_default_datastore()
datastore.upload(src_dir='./data', target_path='<path/on/datastore>', overwrite=True)
```
This will upload the entire directory `./data` from local to the default datastore associated
to your workspace `ws`.
#### Create dataset from files in datastore
To create a dataset from a directory on a datastore at `<path/on/datastore>`:
```python
datastore = ws.get_default_datastore()
dataset = Dataset.File.from_files(path=(datastore, '<path/on/datastore>'))
```
## Use Dataset
### ScriptRunConfig
To reference data from a dataset in a ScriptRunConfig you can either mount or download the
dataset using:
- `dataset.as_mount(path_on_compute)` : mount dataset to a remote run
- `dataset.as_download(path_on_compute)` : download the dataset to a remote run
**Path on compute** Both `as_mount` and `as_download` accept an (optional) parameter `path_on_compute`.
This defines the path on the compute target where the data is made available.
- If `None`, the data will be downloaded into a temporary directory.
- If `path_on_compute` starts with a `/` it will be treated as an **absolute path**. (If you have
specified an absolute path, please make sure that the job has permission to write to that directory.)
- Otherwise it will be treated as relative to the working directory
Reference this data in a remote run, for example in mount-mode:
```python title="run.py"
arguments=[dataset.as_mount()]
config = ScriptRunConfig(source_directory='.', script='train.py', arguments=arguments)
experiment.submit(config)
```
and consumed in `train.py`:
```python title="train.py"
import sys
data_dir = sys.argv[1]
print("===== DATA =====")
print("DATA PATH: " + data_dir)
print("LIST FILES IN DATA DIR...")
print(os.listdir(data_dir))
print("================")
```
For more details: [ScriptRunConfig](script-run-config)

Просмотреть файл

@ -0,0 +1,75 @@
---
title: Debugging
description: Guide to debugging in Azure ML.
keywords:
- debug
- log files
---
:::note
このコンテンツはお使いの言語では利用できません。
:::
## Azure ML Log Files
Azure ML's log files are an essential resource for debugging your Azure ML workloads.
| Log file | Description |
| - | - |
| `20_image_build_log*.txt` | Docker build logs. Only applicable when updating your Environment. Otherwise Azure ML will reuse cached image. <br/><br/> If successful, contains image registry details for the corresponding image.|
| `55_azureml-execution*.txt` | Pulls image to compute target. Note, this log only appears once you have secured compute resources.|
| `65_job_prep*.txt` | Job preparation: Download your code to compute target and datastores (if requested). |
| **`70_driver_log.txt`** | **The standard output from your script. This is where your code's logs (e.g. print statements) show up.** <br/><br/> In the majority of cases you will monitor the logs here. |
| `75_job_post*.txt` | Job release: Send logs, release the compute resources back to Azure. |
:::info
You will not necessarily see every file for every run. For example, the `20_image_build_log*.txt` only appears when a new image is built (e.g. when you change you environment).
:::
### Find logs in the Studio
These log files are available via the Studio UI at https://ml.azure.com under Workspace > Experiment >
Run > "Outputs and logs".
![](img/log-files.png)
### Streaming logs
It is also possible to stream these logs directly to your local terminal using a `Run` object,
for example:
```python
from azureml.core import Workspace, Experiment, ScriptRunConfig
ws = Workspace.from_config()
config = ScriptRunConfig(...)
run = Experiment(ws, 'my-amazing-experiment').submit(config)
run.wait_for_completion(show_output=True)
```
## SSH
It can be useful to SSH into your compute for a variety of reasons - including to assist in debugging.
:::warning Enable SSH at compute creation
SSH needs to be enabled when you create the compute instance / target - see [Compute Targets](compute-targets#with-ssh) for details.
:::
1. Get **public ip** and **port number** for your compute.
Visit [ml.azure.com](https://ml.azure.com/) > select "Compute" tab > Locate the desired compute instance / target.
**Note.** The compute needs to be running in order to connect.
- In the case of compute instance this just requires turning it on.
- For compute targets there should be something running on the cluster. In this case you can select the "Nodes" tab of the cluster ([ml.azure.com](https://ml.azure.com/) > Compute > _your compute target_ > Nodes) to get Public IP & port number for each node.
2. Open your favorite shell and run:
```bash
ssh azureuser@<public-ip> -p <port-number>
```
:::info SSH key pair using RSA
We recommend setting up SSH public-private key pair: see [here](https://docs.microsoft.com/en-us/azure/virtual-machines/linux/mac-create-ssh-keys) for more details.
:::

Просмотреть файл

@ -0,0 +1,349 @@
---
title: Distributed GPU Training
id: distributed-training
description: Guide to distributed training in Azure ML.
keywords:
- distributed training
- mpi
- process group
- pytorch
- horovod
- tensorflow
---
:::note
このコンテンツはお使いの言語では利用できません。
:::
## Basic Concepts
We assume readers already understand the basic concept of distributed GPU training such as _data parallelism, distributed data parallelism, and model parallelism_. This guide aims at helping readers running existing distributed training code on Azure ML.
:::info
If you don't know which type of parallelism to use, for >90% of the time you should use __Distributed Data Parallelism__.
:::
## MPI
Azure ML offers an MPI job to launch a given number of processes in each node. Users can adopt this approach to run distributed training using either per-process-launcher or per-node-launcher, depending on whether `process_count_per_node` is set to 1 (the default) for per-node-launcher, or equal to the number of devices/GPUs for per-process-launcher. Azure ML handles constructing the full MPI launch command (`mpirun`) behind the scenes.
:::note
Azure ML currently does not allow users to provide the full head-node-launcher command like `mpirun` or the DeepSpeed launcher. This functionality may be added in a future release.
:::
:::caution
To use the Azure ML MPI job, the base Docker image used by the job needs to have an MPI library installed. [Open MPI](https://www.open-mpi.org/) is included in all the [AzureML GPU base images](https://github.com/Azure/AzureML-Containers). If you are using a custom Docker image, you are responsible for making sure the image includes an MPI library. Open MPI is recommended, but you can also use a different MPI implementation such as Intel MPI. Azure ML also provides [curated environments](https://docs.microsoft.com/en-us/azure/machine-learning/resource-curated-environments) for popular frameworks.
:::
To run distributed training using MPI, follow these steps:
1. Use an Azure ML environment with the preferred deep learning framework and MPI. AzureML provides [curated environment](https://docs.microsoft.com/en-us/azure/machine-learning/resource-curated-environments) for popular frameworks.
2. Define `MpiConfiguration` with the desired `process_count_per_node` and `node_count`. `process_count_per_node` should be equal to the number of GPUs per node for per-process-launch, or set to 1 (the default) for per-node-launch if the user script will be responsible for launching the processes per node.
3. Pass the `MpiConfiguration` object to the `distributed_job_config` parameter of `ScriptRunConfig`.
```python
from azureml.core import Workspace, ScriptRunConfig, Environment, Experiment
from azureml.core.runconfig import MpiConfiguration
curated_env_name = 'AzureML-PyTorch-1.6-GPU'
pytorch_env = Environment.get(workspace=ws, name=curated_env_name)
distr_config = MpiConfiguration(process_count_per_node=4, node_count=2)
run_config = ScriptRunConfig(
source_directory= './src',
script='train.py',
compute_target=compute_target,
environment=pytorch_env,
distributed_job_config=distr_config,
)
# submit the run configuration to start the job
run = Experiment(ws, "experiment_name").submit(run_config)
```
### Horovod
If you are using [Horovod](https://horovod.readthedocs.io/en/stable/index.html) for distributed training with the deep learning framework of your choice, you can run distributed training on Azure ML using the MPI job configuration.
Simply ensure that you have taken care of the following:
* The training code is instrumented correctly with Horovod.
* Your Azure ML environment contains Horovod and MPI. The PyTorch and TensorFlow curated GPU environments come pre-configured with Horovod and its dependencies.
* Create an `MpiConfiguration` with your desired distribution.
#### Example
* [azureml-examples: TensorFlow distributed training using Horovod](https://github.com/Azure/azureml-examples/tree/main/workflows/train/tensorflow/mnist-distributed-horovod)
### DeepSpeed
To run distributed training with the [DeepSpeed](https://www.deepspeed.ai/) library on Azure ML, do not use DeepSpeed's custom launcher. Instead, configure an MPI job to launch the training job [with MPI](https://www.deepspeed.ai/getting-started/#mpi-and-azureml-compatibility).
Ensure that you have taken care of the following:
* Your Azure ML environment contains DeepSpeed and its dependencies, Open MPI, and mpi4py.
* Create an `MpiConfiguration` with your desired distribution.
#### Example
* [azureml-examples: Distributed training with DeepSpeed on CIFAR-10](https://github.com/Azure/azureml-examples/tree/main/workflows/train/deepspeed/cifar)
### Environment variables from Open MPI
When running MPI jobs with Open MPI images, the following environment variables for each process launched:
1. OMPI_COMM_WORLD_RANK - the rank of the process
2. OMPI_COMM_WORLD_SIZE - the world size
3. AZ_BATCH_MASTER_NODE - master address with port, MASTER_ADDR:MASTER_PORT
4. OMPI_COMM_WORLD_LOCAL_RANK - the local rank of the process on the node
5. OMPI_COMM_WORLD_LOCAL_SIZE - number of processes on the node
:::caution
Despite the name, environment variable OMPI_COMM_WORLD_NODE_RANK does not corresponds to the NODE_RANK. To use per-node-launcher, simply set `process_count_per_node=1` and use OMPI_COMM_WORLD_RANK as the NODE_RANK.
:::
## PyTorch
Azure ML also supports running distributed jobs using PyTorch's native distributed training capabilities (`torch.distributed`).
:::tip torch.nn.parallel.DistributedDataParallel vs torch.nn.DataParallel and torch.multiprocessing
For data parallelism, the [official PyTorch guidance](https://pytorch.org/tutorials/intermediate/ddp_tutorial.html#comparison-between-dataparallel-and-distributeddataparallel) is to use DistributedDataParallel (DDP) over DataParallel for both single-node and multi-node distributed training. PyTorch also [recommends using DistributedDataParallel over the multiprocessing package](https://pytorch.org/docs/stable/notes/cuda.html#use-nn-parallel-distributeddataparallel-instead-of-multiprocessing-or-nn-dataparallel). Azure ML documentation and examples will therefore focus on DistributedDataParallel training.
:::
### Process group initialization
The backbone of any distributed training is based on a group of processes that know each other and can communicate with each other using a backend. For PyTorch, the process group is created by calling [torch.distributed.init_process_group](https://pytorch.org/docs/stable/distributed.html#torch.distributed.init_process_group) in __all distributed processes__ to collectively form a process group.
```
torch.distributed.init_process_group(backend='nccl', init_method='env://', ...)
```
The most common communication backends used are __mpi__, __nccl__ and __gloo__. For GPU-based training __nccl__ is strongly recommended for best performance and should be used whenever possible.
`init_method` specifies how each process can discover each other and initialize as well as verify the process group using the communication backend. By default if `init_method` is not specified PyTorch will use the environment variable initialization method (`env://`). This is also the recommended the initialization method to use in your training code to run distributed PyTorch on Azure ML. For environment variable initialization, PyTorch will look for the following environment variables:
- **MASTER_ADDR** - IP address of the machine that will host the process with rank 0.
- **MASTER_PORT** - A free port on the machine that will host the process with rank 0.
- **WORLD_SIZE** - The total number of processes. This should be equal to the total number of devices (GPU) used for distributed training.
- **RANK** - The (global) rank of the current process. The possible values are 0 to (world size - 1).
For more information on process group initialization, see the [PyTorch documentation](https://pytorch.org/docs/stable/distributed.html#torch.distributed.init_process_group).
Beyond these, many applications will also need the following environment variables:
- **LOCAL_RANK** - The local (relative) rank of the process within the node. The possible values are 0 to (# of processes on the node - 1). This information is useful because many operations such as data preparation only should be performed once per node --- usually on local_rank = 0.
- **NODE_RANK** - The rank of the node for multi-node training. The possible values are 0 to (total # of nodes - 1).
### Launch options
The Azure ML PyTorch job supports two types of options for launching distributed training:
1. __Per-process-launcher__: The system will launch all distributed processes for the user, with all the relevant information (e.g. environment variables) to set up the process group.
2. __Per-node-launcher__: The user provides Azure ML with the utility launcher that will get run on each node. The utility launcher will handle launching each of the processes on a given node. Locally within each node, RANK and LOCAL_RANK is set up by the launcher. The **torch.distributed.launch** utility and PyTorch Lightning both belong in this category.
There are no fundamental differences between these launch options; it is largely up to the user's preference or the conventions of the frameworks/libraries built on top of vanilla PyTorch (such as Lightning or Hugging Face).
The following sections go into more detail on how to configure Azure ML PyTorch jobs for each of the launch options.
### DistributedDataParallel (per-process-launch)
Azure ML supports launching each process for the user without the user needing to use a launcher utility like `torch.distributed.launch`.
To run a distributed PyTorch job, you will just need to do the following:
1. Specify the training script and arguments
2. Create a `PyTorchConfiguration` and specify the `process_count` as well as the `node_count`. The `process_count` corresponds to the total number of processes you want to run for your job. This should typically equal `# GPUs per node x # nodes`. If `process_count` is not specified, Azure ML will by default launch one process per node.
Azure ML will set the MASTER_ADDR, MASTER_PORT, WORLD_SIZE, and NODE_RANK environment variables on each node, in addition to setting the process-level RANK and LOCAL_RANK environment variables.
:::caution
In order to use this option for multi-process-per-node training, you will need to use Azure ML Python SDK `>= 1.22.0`, as process_count was introduced in 1.22.0.
:::
```python
from azureml.core import ScriptRunConfig, Environment, Experiment
from azureml.core.runconfig import PyTorchConfiguration
curated_env_name = 'AzureML-PyTorch-1.6-GPU'
pytorch_env = Environment.get(workspace=ws, name=curated_env_name)
distr_config = PyTorchConfiguration(process_count=8, node_count=2)
run_config = ScriptRunConfig(
source_directory='./src',
script='train.py',
arguments=['--epochs', 50],
compute_target=compute_target,
environment=pytorch_env,
distributed_job_config=distr_config,
)
run = Experiment(ws, 'experiment_name').submit(run_config)
```
:::tip
If your training script passes information like local rank or rank as script arguments, you can reference the environment variable(s) in the arguments:
`arguments=['--epochs', 50, '--local_rank', $LOCAL_RANK]`.
:::
#### Example
- [azureml-examples: Distributed training with PyTorch on CIFAR-10](https://github.com/Azure/azureml-examples/tree/main/workflows/train/pytorch/cifar-distributed)
### Using `torch.distributed.launch` (per-node-launch)
PyTorch provides a launch utility in [torch.distributed.launch](https://pytorch.org/docs/stable/distributed.html#launch-utility) that users can use to launch multiple processes per node. The `torch.distributed.launch` module will spawn multiple training processes on each of the nodes.
The following steps will demonstrate how to configure a PyTorch job with a per-node-launcher on Azure ML that will achieve the equivalent of running the following command:
python -m torch.distributed.launch --nproc_per_node <num processes per node> \
--nnodes <num nodes> --node_rank $NODE_RANK --master_addr $MASTER_ADDR \
--master_port $MASTER_PORT --use_env \
<your training script> <your script arguments>
1. Provide the `torch.distributed.launch` command to the `command` parameter of the `ScriptRunConfig` constructor. Azure ML will run this command on each node of your training cluster. `--nproc_per_node` should be less than or equal to the number of GPUs available on each node. MASTER_ADDR, MASTER_PORT, and NODE_RANK are all set by Azure ML, so you can just reference the environment variables in the command. Azure ML sets MASTER_PORT to `6105`, but you can pass a different value to the `--master_port` argument of torch.distributed.launch command if you wish. (The launch utility will reset the environment variables.)
2. Create a `PyTorchConfiguration` and specify the `node_count`.
```python
from azureml.core import ScriptRunConfig, Environment, Experiment
from azureml.core.runconfig import PyTorchConfiguration
curated_env_name = 'AzureML-PyTorch-1.6-GPU'
pytorch_env = Environment.get(workspace=ws, name=curated_env_name)
distr_config = PyTorchConfiguration(node_count=2)
launch_cmd = "python -m torch.distributed.launch --nproc_per_node 4 --nnodes 2 --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT --use_env train.py --epochs 50".split()
run_config = ScriptRunConfig(
source_directory='./src',
command=launch_cmd,
compute_target=compute_target,
environment=pytorch_env,
distributed_job_config=distr_config,
)
run = Experiment(ws, 'experiment_name').submit(run_config)
```
:::tip Single-node multi-GPU training
If you are using the launch utility to run single-node multi-GPU PyTorch training, you do not need to specify the `distributed_job_config` parameter of ScriptRunConfig.
```python
launch_cmd = "python -m torch.distributed.launch --nproc_per_node 4 --use_env train.py --epochs 50".split()
run_config = ScriptRunConfig(
source_directory='./src',
command=launch_cmd,
compute_target=compute_target,
environment=pytorch_env,
)
```
:::
#### Example
- [azureml-examples: Distributed training with PyTorch on CIFAR-10](https://github.com/Azure/azureml-examples/tree/main/workflows/train/pytorch/cifar-distributed)
### PyTorch Lightning
[PyTorch Lightning](https://pytorch-lightning.readthedocs.io/en/stable/) is a lightweight open-source library that provides a high-level interface for PyTorch. Lightning abstracts away much of the lower-level distributed training configurations required for vanilla PyTorch from the user, and allows users to run their training scripts in single GPU, single-node multi-GPU, and multi-node multi-GPU settings. Behind the scene it launches multiple processes for user similar to `torch.distributed.launch`.
For single-node training (including single-node multi-GPU), you can run your code on Azure ML without needing to specify a `distributed_job_config`. For multi-node training, Lightning requires the following environment variables to be set on each node of your training cluster:
- MASTER_ADDR
- MASTER_PORT
- NODE_RANK
To run multi-node Lightning training on Azure ML, you can largely follow the [per-node-launch guide](#using-distributedddataparallel-per-node-launch):
- Define the `PyTorchConfiguration` and specify the desired `node_count`. Do not specify `process_count` as Lightning internally handles launching the worker processes for each node.
- For PyTorch jobs, Azure ML handles setting the MASTER_ADDR, MASTER_PORT, and NODE_RANK envirnment variables required by Lightning.
- Lightning will handle computing the world size from the Trainer flags `--gpus` and `--num_nodes` and manage rank and local rank internally.
```python
from azureml.core import ScriptRunConfig, Experiment
from azureml.core.runconfig import PyTorchConfiguration
nnodes = 2
args = ['--max_epochs', 50, '--gpus', 2, '--accelerator', 'ddp', '--num_nodes', nnodes]
distr_config = PyTorchConfiguration(node_count=nnodes)
run_config = ScriptRunConfig(
source_directory='./src',
script='train.py',
arguments=args,
compute_target=compute_target,
environment=pytorch_env,
distributed_job_config=distr_config,
)
run = Experiment(ws, 'experiment_name').submit(run_config)
```
#### Example
* [azureml-examples: Multi-node training with PyTorch Lightning](https://github.com/Azure/azureml-examples/blob/main/tutorials/using-pytorch-lightning/4.train-multi-node-ddp.ipynb)
### Hugging Face Transformers
Hugging Face provides many [examples](https://github.com/huggingface/transformers/tree/master/examples) for using its Transformers library with `torch.distributed.launch` to run distributed training. To run these examples and your own custom training scripts using the Transformers Trainer API, follow the [Using `torch.distributed.launch`](#using-torchdistributedlaunch-per-node-launch) section.
Sample job configuration code to fine-tune the BERT large model on the text classification MNLI task using the `run_glue.py` script on one node with 8 GPUs:
```python
from azureml.core import ScriptRunConfig
from azureml.core.runconfig import PyTorchConfiguration
distr_config = PyTorchConfiguration() # node_count defaults to 1
launch_cmd = "python -m torch.distributed.launch --nproc_per_node 8 text-classification/run_glue.py --model_name_or_path bert-large-uncased-whole-word-masking --task_name mnli --do_train --do_eval --max_seq_length 128 --per_device_train_batch_size 8 --learning_rate 2e-5 --num_train_epochs 3.0 --output_dir /tmp/mnli_output".split()
run_config = ScriptRunConfig(
source_directory='./src',
command=launch_cmd,
compute_target=compute_target,
environment=pytorch_env,
distributed_job_config=distr_config,
)
```
You can also use the [per-process-launch](#distributeddataparallel-per-process-launch) option to run distributed training without using `torch.distributed.launch`. One thing to keep in mind if using this method is that the transformers [TrainingArguments](https://huggingface.co/transformers/main_classes/trainer.html?highlight=launch#trainingarguments) expects the local rank to be passed in as an argument (`--local_rank`). `torch.distributed.launch` takes care of this when `--use_env=False`, but if you are using per-process-launch you will need to explicitly pass this in as an argument to the training script `--local_rank=$LOCAL_RANK` as Azure ML only sets the LOCAL_RANK environment variable.
## TensorFlow
If you are using [native distributed TensorFlow](https://www.tensorflow.org/guide/distributed_training) in your training code, such as TensorFlow 2.x's `tf.distribute.Strategy` API, you can launch the distributed job via Azure ML using the `TensorflowConfiguration`.
To do so, specify a `TensorflowConfiguration` object to the `distributed_job_config` parameter of the `ScriptRunConfig` constructor. If you are using `tf.distribute.experimental.MultiWorkerMirroredStrategy`, specify the `worker_count` in the `TensorflowConfiguration` corresponding to the number of nodes for your training job.
```python
from azureml.core import ScriptRunConfig, Environment, Experiment
from azureml.core.runconfig import TensorflowConfiguration
curated_env_name = 'AzureML-TensorFlow-2.3-GPU'
tf_env = Environment.get(workspace=ws, name=curated_env_name)
distr_config = TensorflowConfiguration(worker_count=2, parameter_server_count=0)
run_config = ScriptRunConfig(
source_directory='./src',
script='train.py',
compute_target=compute_target,
environment=tf_env,
distributed_job_config=distr_config,
)
# submit the run configuration to start the job
run = Experiment(ws, "experiment_name").submit(run_config)
```
If your training script uses the parameter server strategy for distributed training, i.e. for legacy TensorFlow 1.x, you will also need to specify the number of parameter servers to use in the job, e.g. `tf_config = TensorflowConfiguration(worker_count=2, parameter_server_count=1)`.
### TF_CONFIG
In TensorFlow, the **TF_CONFIG** environment variable is required for training on multiple machines. For TensorFlow jobs, Azure ML will configure and set the TF_CONFIG variable appropriately for each worker before executing your training script.
You can access TF_CONFIG from your training script if you need to: `os.environ['TF_CONFIG']`.
Example TF_CONFIG set on a chief worker node:
```json
TF_CONFIG='{
"cluster": {
"worker": ["host0:2222", "host1:2222"]
},
"task": {"type": "worker", "index": 0},
"environment": "cloud"
}'
```
#### Example
- [azureml-examples: Distributed TensorFlow training with MultiWorkerMirroredStrategy](https://github.com/Azure/azureml-examples/tree/main/workflows/train/tensorflow/mnist-distributed)
## Accelerating GPU training with InfiniBand
Certain Azure VM series, specifically the NC, ND, and H-series, now have RDMA-capable VMs with SR-IOV and Infiniband support. These VMs communicate over the low latency and high bandwidth InfiniBand network, which is much more performant than Ethernet-based connectivity. SR-IOV for InfiniBand enables near bare-metal performance for any MPI library (MPI is leveraged by many distributed training frameworks and tooling, including NVIDIA's NCCL software.) These SKUs are intended to meet the needs of computationally-intensive, GPU-acclerated machine learning workloads. For more information, see [Accelerating Distributed Training in Azure Machine Learning with SR-IOV](https://techcommunity.microsoft.com/t5/azure-ai/accelerating-distributed-training-in-azure-machine-learning/ba-p/1059050).
If you create an `AmlCompute` cluster of one of these RDMA-capable, InfiniBand-enabled sizes, such as `Standard_ND40rs_v2`, the OS image will come with the Mellanox OFED driver required to enable InfiniBand preinstalled and preconfigured.

Просмотреть файл

@ -0,0 +1,108 @@
---
title: 'Azure ML Containers'
description: Guide to containers in Azure ML.
keywords:
- containers
- dockerfile
- docker
- environment
---
:::note
このコンテンツはお使いの言語では利用できません。
:::
In this post we explain how Azure ML builds the containers used to run your code.
## Dockerfile
Each job in Azure ML runs with an associated `Environment`. In practice, each environment
corresponds to a Docker image.
There are numerous ways to define an environment - from specifying a set of required Python packages
through to directly providing a custom Docker image. In each case the contents of the associated
dockerfile are available directly from the environment object.
For more background: [Environment](environment)
#### Example
Suppose you create an environment - in this example we will work with Conda:
```yml title="env.yml"
name: pytorch
channels:
- defaults
- pytorch
dependencies:
- python=3.7
- pytorch
- torchvision
```
We can create and register this as an `Environment` in our workspace `ws` as follows:
```python
from azureml.core import Environment
env = Environment.from_conda_specification('pytorch', 'env.yml')
env.register(ws)
```
In order to consume this environment in a remote run, Azure ML builds a docker image
that creates the corresponding python environment.
The dockerfile used to build this image is available directly from the environment object.
```python
details = env.get_image_details(ws)
print(details['ingredients']['dockerfile'])
```
Let's take a look:
```docker title="Dockerfile" {1,7-12}
FROM mcr.microsoft.com/azureml/intelmpi2018.3-ubuntu16.04:20200821.v1@sha256:8cee6f674276dddb23068d2710da7f7f95b119412cc482675ac79ba45a4acf99
USER root
RUN mkdir -p $HOME/.cache
WORKDIR /
COPY azureml-environment-setup/99brokenproxy /etc/apt/apt.conf.d/
RUN if dpkg --compare-versions `conda --version | grep -oE '[^ ]+$'` lt 4.4.11; then conda install conda==4.4.11; fi
COPY azureml-environment-setup/mutated_conda_dependencies.yml azureml-environment-setup/mutated_conda_dependencies.yml
RUN ldconfig /usr/local/cuda/lib64/stubs && conda env create -p /azureml-envs/azureml_7459a71437df47401c6a369f49fbbdb6 -
f azureml-environment-setup/mutated_conda_dependencies.yml && rm -rf "$HOME/.cache/pip" && conda clean -aqy && CONDA_ROO
T_DIR=$(conda info --root) && rm -rf "$CONDA_ROOT_DIR/pkgs" && find "$CONDA_ROOT_DIR" -type d -name __pycache__ -exec rm
-rf {} + && ldconfig
# AzureML Conda environment name: azureml_7459a71437df47401c6a369f49fbbdb6
ENV PATH /azureml-envs/azureml_7459a71437df47401c6a369f49fbbdb6/bin:$PATH
ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/azureml_7459a71437df47401c6a369f49fbbdb6
ENV LD_LIBRARY_PATH /azureml-envs/azureml_7459a71437df47401c6a369f49fbbdb6/lib:$LD_LIBRARY_PATH
COPY azureml-environment-setup/spark_cache.py azureml-environment-setup/log4j.properties /azureml-environment-setup/
RUN if [ $SPARK_HOME ]; then /bin/bash -c '$SPARK_HOME/bin/spark-submit /azureml-environment-setup/spark_cache.py'; fi
ENV AZUREML_ENVIRONMENT_IMAGE True
CMD ["bash"]
```
Notice:
- The base image here is a standard image maintained by Azure ML. Dockerfiles for all base images are available on
github: https://github.com/Azure/AzureML-Containers
- The dockerfile references `mutated_conda_dependencies.yml` to build the Python environment via Conda.
Get the contents of `mutated_conda_dependencies.yml` from the environment:
```python
print(env.python.conda_dependencies.serialize_to_string())
```
Which looks like
```bash title="mutated_conda_dependencies.yml"
channels:
- defaults
- pytorch
dependencies:
- python=3.7
- pytorch
- torchvision
name: azureml_7459a71437df47401c6a369f49fbbdb6
```

Просмотреть файл

@ -0,0 +1,351 @@
---
title: Environment
description: Guide to working with Python environments in Azure ML.
keywords:
- environment
- python
- conda
- pip
- docker
- environment variables
---
:::note
このコンテンツはお使いの言語では利用できません。
:::
Azure ML Environments are used to define the containers where your code will run. In the simplest case you can add custom Python libraries using pip, Conda or directly via the Azure ML Python SDK. If more customization is necessary you can use custom docker images.
This page provides examples creating environments:
- From pip `requirements.txt` file
- From Conda `env.yml` file
- Directly via the Azure ML Python SDK
- From custom Docker image
## Azure ML Managed Python Environments
### From pip
Create Environment from pip `requirements.txt` file
```python
from azureml.core import Environment
env = Environment.from_pip_requirements('<env-name>', '<path/to/requirements.txt>')
```
### From Conda
Create Environment from Conda `env.yml` file
```python
from azureml.core import Environment
env = Environment.from_conda_specification('<env-name>', '<path/to/env.yml>')
```
### In Azure ML SDK
Use the `CondaDependencies` class to create a Python environment in directly with the Azure ML
Python SDK:
```python
from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies
conda = CondaDependencies()
# add channels
conda.add_channel('pytorch')
# add conda packages
conda.add_conda_package('python=3.7')
conda.add_conda_package('pytorch')
conda.add_conda_package('torchvision')
# add pip packages
conda.add_pip_package('pyyaml')
conda.add_pip_package('mpi4py')
conda.add_pip_package('deepspeed')
# create environment
env = Environment('pytorch')
env.python.conda_dependencies = conda
```
## Custom docker image / dockerfile
To create an `Environment` from a custom docker image:
```python
env = Environment('<env-name>')
env.docker.base_image = '<image-name>'
env.docker.base_image_registry.address = '<container-registry-address>'
env.docker.base_image_registry.username = '<acr-username>'
env.docker.base_image_registry.password = os.environ.get("CONTAINER_PASSWORD")
# optional
env.python.user_managed_dependencies = True
env.python.interpreter_path = '/opt/miniconda/envs/example/bin/python'
```
For example Azure Container Registry addresses are of the form `"<acr-name>.azurecr.io"`.
**Never check in passwords**. In this example we provide the password via an environment variable.
To create an `Environment` from a dockerfile:
```python
env = Environment('<env-name>')
env.docker.base_dockerfile = './Dockerfile' # path to your dockerfile
# optional
env.python.user_managed_dependencies = True
env.python.interpreter_path = '/opt/miniconda/envs/example/bin/python'
```
**Remarks.**
- `user_managed_dependencies = True`: You are responsible for installing all necessary Python
libraries, typically in your docker image.
- `interpreter_path`: Only used when `user_managed_dependencies=True` and sets the Python interpreter
path (e.g. `which python`).
It is possible to have Azure ML manage your Python installation when providing a custom base image. For example, using pip `requirements.txt`:
```python
env = Environment.from_pip_requirements('<env-name>', '<path/to/requirements.txt>')
env.docker.base_dockerfile = './Dockerfile'
```
**Note.** In this case Python libraries installed in `Dockerfile` will **not** be available.
### Build custom docker image for Azure ML
We **strongly** recommend building your docker image from one of the Azure ML base images available
here: [AzureML-Containers GitHub Repo](https://github.com/Azure/AzureML-Containers) - like this:
```dockerfile title="Dockerfile"
FROM mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04
...
```
These images come configured with all the requirements to run on Azure ML.
If user wants to build from scratch, here are a list of requirements and recommendations to keep in mind:
- **Conda**: Azure ML uses Conda to manage python environments by default. If you intend to allow Azure ML to manage the Python environment, Conda is required.
- **libfuse**: Required when using `Dataset`
- **Openmpi**: Required for distributed runs
- **nvidia/cuda**: (Recommended) For GPU-based training build image from [nvidia/cuda](https://hub.docker.com/r/nvidia/cuda)
- **Mellanox OFED user space drivers** (Recommend) For SKUs with Infiniband
We suggest users to look at the [dockerfiles of Azure ML base images](https://github.com/Azure/AzureML-Containers) as references.
### Use custom image from a private registry
Azure ML can use a custom image from a private registry as long as login information are provided.
```python
env = Environment('<env-name>')
env.docker.base_image = "/my/private/img:tag", # image repository path
env.docker.base_image_registry.address = "myprivateacr.azurecr.io" # private registry
# Retrieve username and password from the workspace key vault
env.docker.base_image_registry.username = ws.get_default_keyvault().get_secret("username")
env.docker.base_image_registry.password = ws.get_default_keyvault().get_secret("password")
```
## Environment Management
### Registered Environments
Register an environment `env: Environment` to your workspace `ws` to reuse/share with your team.
```python
env.register(ws)
```
Registered environments can be obtained directly from the workspace handle `ws`:
```python
envs: Dict[str, Environment] = ws.environments
```
This dictionary contains custom environments that have been registered to the workspace as well as a
collection of _curated environments_ maintained by Azure ML.
#### Example.
```python
# create / update, register environment
env = Environment.from_pip_requirements('my-env', 'requirements.txt')
env.register(ws)
# use later
env = ws.environments['my-env']
# get a specific version
env = Environment.get(ws, 'my-env', version=6)
```
### Save / Load Environments
Save an environment to a local directory:
```python
env.save_to_directory('<path/to/local/directory>', overwrite=True)
```
This will generate a directory with two (human-understandable and editable) files:
- `azureml_environment.json` : Metadata including name, version, environment variables and Python and Docker configuration
- `conda_dependencies.yml` : Standard conda dependencies YAML (for more details see [Conda docs](https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#creating-an-environment-from-an-environment-yml-file)).
Load this environment later with
```python
env = Environment.load_from_directory('<path/to/local/directory>')
```
### Environment Variables
To set environment variables use the `environment_variables: Dict[str, str]` attribute. Environment variables
are set on the process where the user script is executed.
```python
env = Environment('example')
env.environment_variables['EXAMPLE_ENV_VAR'] = 'EXAMPLE_VALUE'
```
## Hints and tips
When the conda dependencies are managed by Azure ML (`user_managed_dependencies=False`, by default), Azure ML will check whether the same environment has already been materialized into a docker image in the Azure Container Registry associated with the Azure ML workspace. If it is a new environment, Azure ML will have a job preparation stage to build a new docker image for the new environment. You will see a image build log file in the logs and monitor the image build progress. The job won't start until the image is built and pushed to the container registry.
This image building process can take some time and delay your job start. To avoid unnecessary image building, consider:
1. Register an environment that contains most packages you need and reuse when possible.
2. If you only need a few extra packages on top of an existing environment,
1. If the existing environment is a docker image, use a dockerfile from this docker image so you only need to add one layer to install a few extra packagers.
2. Install extra python packages in your user script so the package installation happens in the script run as part of your code instead of asking Azure ML to treat them as part of a new environment. Consider using a [setup script](#advanced-shell-initialization-script).
Due to intricacy of the python package dependencies and potential version conflict, we recommend use of custom docker image and dockerfiles (based on Azure ML base images) to manage your own python environment. This practice not only gives users full transparency of the environment, but also saves image building times at agile development stage.
### Build docker images locally and push to Azure Container Registry
If you have docker installed locally, you can build the docker image from Azure ML environment locally with option to push the image to workspace ACR directly. This is recommended when users are iterating on the dockerfile since local build can utilize cached layers.
```python
from azureml.core import Environment
myenv = Environment(name='<env-name>')
registered_env = myenv.register(ws)
registered_env.build_local(ws, useDocker=True, pushImageToWorkspaceAcr=True)
```
### Bootstrap Script
It can be useful to invoke a `bootstrap.sh` script for faster development. One typical example
would be to modify the Python installation _at runtime_ to avoid frequent image rebuilding.
This can be done quite simply with _commands_. First set up your `bootstrap.sh` script.
```bash title="bootstrap.sh"
echo "Running bootstrap.sh"
pip install torch==1.8.0+cu111
...
```
To have this run ahead of your training script `train.py` make use of the command:
```python
cmd = "bash bootstrap.sh && python train.py --learning_rate 1e-5".split()
config = ScriptRunConfig(
source_directory='<path/to/code>',
command=command,
compute_target=compute_target,
environment=environment,
)
```
See [Running Code in the Cloud](script-run-config) for more details on `ScriptRunConfig`.
### Distributed bootstrapping
In some cases you may wish to run certain parts of your `bootstrap.sh` script
on certain ranks in a distributed setup. This can be achieved with a little care
as follows:
```bash title="bootstrap.sh"
MARKER="/tmp/.azureml_bootstrap_complete"
if [[ $AZ_BATCHAI_TASK_INDEX = 0 ]] ; then
echo "Running bootstrap.sh"
echo "Installing transformers from source"
pip install git+https://github.com/huggingface/transformers
python -c "from transformers import pipeline; print(pipeline('sentiment-analysis')('we love you'))"
pip install datasets
pip install tensorflow
echo "Installation complete"
touch $MARKER
fi
echo "Barrier..."
while [[ ! -f $MARKER ]]
do
sleep 1
done
echo "Bootstrap complete!"
```
This script will wait for local rank 0 (`$AZ_BATCHAI_TASK_INDEX`) to create its `MARKER` file
before the other processes continue.
### Use Keyvault to pass secrets
#### Workspace Default Keyvault
Each Azure workspace comes with a keyvault (you can find this in the Azure Portal under the same resource
group as your Workspace).
```python
from azureml.core import Workspace
ws = Workspace.from_config()
kv = ws.get_default_keyvault()
```
This can be used both to get and set secrets:
```python
import os
from azureml.core import Keyvault
# add a secret to keyvault
kv.set_secret(name="<my-secret>", value=os.environ.get("MY_SECRET"))
# get a secret from the keyvault
secret = kv.get_secret(name="<my-secret>")
# equivalently
secret = run.get_secret(name="<my-secret>")
```
#### Generic Azure Keyvault
Of course you can also make use of other keyvaults you might have in Azure.
```python
from azure.identity import DefaultAzureCredential
from azure.keyvault.secrets import SecretClient
credential = DefaultAzureCredential()
client = SecretClient(vault_url=kv_url, credential=credential)
my_secret = client.get_secret(secret_name).value
env = Environment('example')
env.environment_variables['POWERFUL_SECRET'] = my_secret
```
Be sure to add `azure-identity` and `azure-keyvault` to your projects requirements in
this case.
```bash
pip install azure-identity azure-keyvault
```

Двоичный файл не отображается.

После

Ширина:  |  Высота:  |  Размер: 181 KiB

Двоичный файл не отображается.

После

Ширина:  |  Высота:  |  Размер: 154 KiB

Двоичный файл не отображается.

После

Ширина:  |  Высота:  |  Размер: 7.0 KiB

Просмотреть файл

@ -0,0 +1,3 @@
[ZoneTransfer]
LastWriterPackageFamilyName=Microsoft.ScreenSketch_8wekyb3d8bbwe
ZoneId=3

Двоичный файл не отображается.

После

Ширина:  |  Высота:  |  Размер: 23 KiB

Просмотреть файл

@ -0,0 +1,3 @@
[ZoneTransfer]
LastWriterPackageFamilyName=Microsoft.ScreenSketch_8wekyb3d8bbwe
ZoneId=3

Двоичный файл не отображается.

После

Ширина:  |  Высота:  |  Размер: 216 KiB

Двоичный файл не отображается.

После

Ширина:  |  Высота:  |  Размер: 108 KiB

Двоичный файл не отображается.

После

Ширина:  |  Высота:  |  Размер: 16 KiB

Двоичный файл не отображается.

После

Ширина:  |  Высота:  |  Размер: 24 KiB

Просмотреть файл

@ -0,0 +1,84 @@
---
title: インストール
description: Guide to installing Azure ML Python SDK and setting up key resources.
keywords:
- azureml-sdk
- installation
- workspace
- compute
- cpu
- gpu
---
Azure ML Python SDK のインストール:
```console
pip install azureml-sdk
```
### ワークスペースの作成
```python
from azureml.core import Workspace
ws = Workspace.create(name='<my_workspace_name>', # 任意のワークスペース名
subscription_id='<azure-subscription-id>', # サブスクリプションID
resource_group='<myresourcegroup>', # 任意のリソースグループ名
create_resource_group=True,
location='<NAME_OF_REGION>') # リソースを作成するリージョン e.g. 'japaneast'
# ワークスペースの情報を設定ファイルに書き出し: azureml/config.json
ws.write_config(path='.azureml')
```
:::info
次回からは以下のように簡単にワークスペースにアクセスすることができます。
```python
from azureml.core import Workspace
ws = Workspace.from_config()
```
:::
### コンピューティングターゲットの作成
以下の例はワークスペースにコンピューティングターゲットを作成します。
- VM の種類: CPU
- VM のサイズ: STANDARD_D2_V2
- VM クラスターの最大ノード数: 4
- VM クラスターのノードが自動的にスケールインするまでのアイドル時間: 2400秒
GPU を使用したり VM のサイズを変更する場合は以下のコードを変更してください。
```python
from azureml.core import Workspace
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
ws = Workspace.from_config() # 自動的に .azureml/ ディレクトリを参照
# 任意のクラスター名
cpu_cluster_name = "cpu-cluster"
try:
# クラスターが既に存在するかどうかのチェック
cpu_cluster = ComputeTarget(workspace=ws, name=cpu_cluster_name)
print('Found existing cluster, use it.')
except ComputeTargetException:
# もし無ければ作成する
compute_config = AmlCompute.provisioning_configuration(
vm_size='STANDARD_D2_V2',
max_nodes=4,
idle_seconds_before_scaledown=2400,)
cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config)
cpu_cluster.wait_for_completion(show_output=True)
```
:::info
次回からは以下のように簡単にコンピューティングターゲットにアクセスすることができます。
```python
from azureml.core import ComputeTarget
cpu_cluster = ComputeTarget(ws, 'cpu-cluster')
```
:::

Просмотреть файл

@ -0,0 +1,144 @@
---
title: メトリック
description: Guide to metric logging in Azure ML.
keywords:
- metric
- logging
---
## メトリックの記録
メトリックは Azure ML の各実行に紐付けて記録され、複数の実行は一つの実験に紐付けられて記録されます。
メトリックの履歴の保存と可視化を行います。
### `log`
あるメトリックの 1 つの値を実行に記録します。
```python
from azureml.core import Run
run = Run.get_context()
run.log('metric-name', metric_value)
```
あるメトリックを同一の実行に対して複数回記録することもできます。その場合、記録されたメトリックはチャートで表示されます。
### `log_row`
あるメトリックを複数の列として記録します。
```python
from azureml.core import Run
run = Run.get_context()
run.log_row("Y over X", x=1, y=0.4)
```
:::info その他の記録オプション
メトリックの記録に使われる一般的な API は含まれていますが、完全なリストについては[こちら](https://docs.microsoft.com/azure/machine-learning/how-to-log-view-metrics#data-types)を参照してください。
:::
## メトリックを表示する
メトリックは Azure ML Studio の中で自動的に表示可能になります。[こちら](https://ml.azure.com)のリンク先か、SDK から見ることができます:
```
run.get_workspace_url()
```
"メトリック"タブを選択し、表示したいメトリックを選択します。
![](/img/view-metrics.png)
### SDK からメトリックを表示する
実行に記録されたメトリックを確認します。(詳細: [実験と実行](run))
```python
metrics = run.get_metrics()
# メトリックは Dict[str, List[float]] 形式になっており、
# メトリック名と list 形式の値がマッピングされて実行に保存されています。
metrics.get('metric-name')
# 記録された順に並んだメトリックのリスト
```
実験`my-experiment`のメトリック`my-metric`のすべてのレコードを表示する:
```python
experiments = ws.experiments
# 実験名と実験オブジェクトのリスト
exp = experiments['my-experiment']
for run in exp.get_runs():
metrics = run.get_metrics()
my_metric = metrics.get('my-metric')
if my_metric:
print(my_metric)
```
## 例
### MLFlow を使って記録する
[MLFlow](https://mlflow.org/) を使って Azure ML にメトリックを記録します。
```python
from azureml.core import Run
# コードから実行中の実験や実行が含まれるワークスペースに接続する
run = Run.get_context()
ws = run.experiment.workspace
# ワークスペースを ml-flow-tracking-uri に関連付ける
mlflow_url = ws.get_mlflow_tracking_uri()
```
### PyTorch Lightning を使って記録する
この例は:
- Lightning の`TensorBoardLogger`を含みます。
- Azure ML の`Run.get_context()`を使って Lightning の`MLFlowLogger`を設定します。
- Azure ML の実行の一部として使うときはこのロガーを追加するだけです。
```python
import pytorch_lightning as pl
run = None
try:
from azureml.core.run import Run, _OfflineRun
run = Run.get_context()
if isinstance(run, _OfflineRun):
run = None
except ImportError:
print("Couldn't import azureml.core.run.Run")
def get_logger():
tb_logger = pl.loggers.TensorBoardLogger('logs/')
logger = [tb_logger]
if run is not None:
mlflow_url = run.experiment.workspace.get_mlflow_tracking_uri()
mlf_logger = pl.loggers.MLFlowLogger(
experiment_name=run.experiment.name,
tracking_uri=mlflow_url,
)
mlf_logger._run_id = run.id
logger.append(mlf_logger)
return logger
```
ここでこのロガーを lightning の`Trainer`クラスに含めます:
```python
logger = get_logger()
trainer = pl.Trainer.from_argparse_args(
args=args,
logger=logger,
)
trainer.fit(model)
```

Просмотреть файл

@ -0,0 +1,74 @@
---
title: Run History
---
:::note
このコンテンツはお使いの言語では利用できません。
:::
Azure ML can supercharge your ML workloads in (at least!) two ways:
- AML Compute: Providing powerful compute resoures to train larger models
- Run history: Best-in-class lineage and reproducability
In this article we focus on Run History - and why you need it in your life!
As teams progress to running dozens, and eventually hundreds of experiments, having
some way to organize them is essential. Run History is a service that provides a number
features that quickly become essential to your ML-model builders:
### Experiments and Runs
When you are running dozens of experiments in multiple different projects, having a clear
way to organize and search though the results is key. Azure ML provides two concepts to help
with this: `Run`s and `Experiment`s.
#### Runs
A run is a single execution of your code - usually a training script. The run has a life-cycle:
the code is prepared to be submited to Azure ML (e.g. via a ScriptRunConfig), then the code is
submitted
Once the code is submitted to Azure ML (for example, via a `ScriptRunConfig`) a `Run` object is
created. This compute target is prepared (nodes are provisioned, containers hosting your Python
environment are fired up), the entry point script is called (`$ python run.py [args]`) and logs
start being generated:
```console
Files already downloaded and verified
epoch=1, batch= 2000: loss 2.19
epoch=1, batch= 4000: loss 1.82
epoch=1, batch= 6000: loss 1.66
...
```
You may log metrics to Azure ML with `run.log('<metric_name>', metric_value)` and monitor them in the studio:
![](img/logging-metrics.png)
The training concludes, usually some model files are saved, and the nodes are
released.
But the story doesn't end there. The run persists even after the nodes are returned
to Azure. You can always return, either in code or via the studio, to see a history
of your runs, all their outputs and metrics, and the exact code that was used to generate them.
#### Experiments
An Experiment is a collection of runs. All runs belongs to an Experiment. Usually
an Experiment is tied to a specific work item, for example, "Finetune Bert-Large",
and will possess a number of runs as you iterate toward this goal.
### Snapshot
When you submit your code to run in Azure ML, a _snapshot_ is taken. This is a copy of the exact
code that ran. Think of this as version control for your experiments. Want to reproduce the
results from that experiment 2-months ago even though you've iterated on the model and the
training script in the meantime? No problem, snapshot has you covered!
You have total control of what goes into the snapshot with the `.amlignore` file. This plays
the same role as a `.gitignore` so you can efficiently manage what to include in the snapshot.
### Metrics
As you run experiments, you track metrics - from validation loss through to GPU load. Analysing these metrics is essential to determining your best model. With Run History, these metrics are stored for all your runs.

Просмотреть файл

@ -0,0 +1,119 @@
---
title: Experiment and Run
description: Guide to running code with Azure ML
keywords:
- run
- experiment
- submit
- remote
- ScriptRunConfig
---
:::note
このコンテンツはお使いの言語では利用できません。
:::
## Concepts
### Run
A run represents a single execution of your code.
Azure ML is a machine-learning service that facilitates running your code in
the cloud. A `Run` is an abstraction layer around each such submission, and is used to
monitor the job in real time as well as keep a history of your results.
### Experiments
An experiment is a light-weight container for `Run`. Use experiments to submit
and track runs.
Create an experiment in your workspace `ws`.
```python
from azureml.core import Experiment
exp = Experiment(ws, '<experiment-name>')
```
## Create Run
### Via ScriptRunConfig
Usually a run is created by submitting a ScriptRunConfig.
```python
from azureml.core import Workspace, Experiment, ScriptRunConfig
ws = Workspace.from_config()
exp = Experiment(ws, '<experiment-name>')
config = ScriptRunConfig(source_directory=<'<path/to/script>'>, script='train.py', ...)
run = exp.submit(config)
```
For more details: [ScriptRunConfig](script-run-config)
### Get Context
Code that is running within Azure ML is associated to a `Run`. The submitted code
can access its own run.
```py
from azureml.core import Run
run = Run.get_context()
```
#### Example: Logging metrics to current run context
A common use-case is logging metrics in a training script.
```py title="train.py"
from azureml.core import Run
run = Run.get_context()
# training code
for epoch in range(n_epochs):
model.train()
...
val = model.evaluate()
run.log('validation', val)
```
When this code is submitted to Azure ML (e.g. via ScriptRunConfig) it will log metrics to its assocaited run.
For more details: [Logging Metrics](logging)
### Interactive
In an interactive setting e.g. a Jupyter notebook
```python
run = exp.start_logging()
```
#### Example: Jupyter notebook
A common use case for interacive logging is to train a model in a notebook.
```py
from azureml.core import Workspace
from azureml.core import Experiment
ws = Workspace.from_config()
exp = Experiment(ws, 'example')
run = exp.start_logging() # start interactive run
print(run.get_portal_url()) # get link to studio
# toy example in place of e.g. model
# training or exploratory data analysis
import numpy as np
for x in np.linspace(0, 10):
y = np.sin(x)
run.log_row('sine', x=x, y=y) # log metrics
run.complete() # stop interactive run
```
Follow the link to the run to see the metric logging in real time.
![](img/run-ex-sine.png)

Просмотреть файл

@ -0,0 +1,284 @@
---
title: クラウド上でコードを実行する
description: Guide to running code with Azure ML
keywords:
- run
- experiment
- submit
- remote
- ScriptRunConfig
---
## 実験と実行
Azure ML は機械学習コードのクラウド上での実行を支援するサービスです。`実行`は Azure ML にサブミットされたジョブの履歴をただ保存するだけではなく、リアルタイムで監視することもできる抽象レイヤーです。
- 実行: 一度のコード実行を表します。詳細: [実行](#実行)
- 実験: 実験は`実行`の軽量なコンテナです。実験は実行の Azure ML へのサブミットと追跡に使われます。
ワークスペース`ws`に実験を作成します。
```python
from azureml.core import Experiment
exp = Experiment(ws, '<experiment-name>')
```
## ScriptRunConfig
一般的に Azure ML では`ScriptRunConfig`を使って実行するコードの情報と実行のための設定情報をパッケージ化して、クラウド上にサブミットして実行します。
実行しようとしているコードが以下のディレクトリ構成だとします。
```bash
source_directory/
script.py # コードのエントリーポイント
module1.py # script.py によって呼ばれるモジュール
...
```
`ScriptRunConfig`を使って`script.py`をクラウド上で実行するための設定:
```python
config = ScriptRunConfig(
source_directory='<path/to/source_directory>',
script='script.py',
compute_target=target,
environment=env,
arguments = [
'--learning_rate', 0.001,
'--momentum', 0.9,
]
)
```
ここで:
- `source_directory='source_directory'` : 実行するコードが存在するローカルディレクトリ。
- `script='script.py'` : 実行する Python スクリプト。必ずしも`source_directory`のルートにある必要はない。
- `compute_taget=target` : 参照 [コンピューティングターゲット](compute-targets)
- `environment` : 参照 [環境](environment)
- `arguments` : 参照 [コマンドライン引数](#コマンドライン引数)
このコードを Azure ML にサブミットする:
```python
exp = Experiment(ws, '<exp-name>')
run = exp.submit(config)
print(run)
run.wait_for_completion(show_output=True)
```
このコードはターミナル上にログストリームを出力するだけではなく、サブミットされた実行を Web 上で監視するためのリンクを出力します。(https://ml.azure.com)
## コマンドライン引数
スクリプトにコマンドライン引数を渡すには`ScriptRunConfig`の中にある`arguments`パラメータを使います。
引数は list 形式で指定します:
```python
arguments = [first, second, third, ...]
```
このとき引数は下記のコマンドライン引数のように渡されます:
```console
$ python script.py first second third ...
```
名前付きの引数もサポートされます:
```python
arguments = ['--first_arg', first_val, '--second_arg', second_val, ...]
```
引数には`int`、`float`、`str`などのデータ型に加えて他の参照型のデータも使えます。
コマンドライン引数の詳細: [Use dataset in a remote run](dataset#use-dataset-in-a-remote-run)
### 引数の例 1: `sys.argv`
この例では 2 つの引数をスクリプトに渡します。コンソールから実行する場合:
```console title="console"
$ python script.py 0.001 0.9
```
これを`ScriptRunConfig`の中の`arguments`を使って表現する場合:
```python title="run.py"
arguments = [0.001, 0.9]
config = ScriptRunConfig(
source_directory='.',
script='script.py',
arguments=arguments,
)
```
これらの引数はスクリプトの中で通常のコマンドライン引数と同じように使えます:
```python title="script.py"
import sys
learning_rate = sys.argv[1] # 0.001 を受け取る
momentum = sys.argv[2] # 0.9 を受け取る
```
### 引数の例 2: `argparse`
この例では 2 つの名前付きの引数をスクリプトに渡します。コンソールから実行する場合:
```console title="console"
$ python script.py --learning_rate 0.001 --momentum 0.9
```
これを`ScriptRunConfig`の中の`arguments`を使って表現する場合:
```python title="run.py"
arguments = [
'--learning_rate', 0.001,
'--momentum', 0.9,
]
config = ScriptRunConfig(
source_directory='.',
script='script.py',
arguments=arguments,
)
```
これらの引数はスクリプトの中で通常のコマンドライン引数と同じように使えます:
```python title="script.py"
import argparse
parser = argparse.Argparser()
parser.add_argument('--learning_rate', type=float)
parser.add_argument('--momentum', type=float)
args = parser.parse_args()
learning_rate = args.learning_rate # 0.001 を受け取る
momentum = args.momentum # 0.9 を受け取る
```
## コマンド
明示的に実行するコマンドを与えることもできます。
```python
command = 'python script.py'.split()
config = ScriptRunConfig(
source_directory='<path/to/code>',
command=command,
compute_target=compute_target,
environment=environment,
)
```
この例は`command`引数の代わりに`script='script.py'`という引数をするということと同じです。
このオプションは多くの柔軟性を与えます。例えば:
- **環境変数の設定**: よくある例:
```python
command = 'export PYTHONPATH=$PWD && python script.py'.split()
```
```python
command = f'export RANK={rank} && python script.py'.split()
```
- **セットアップスクリプトの実行**: データのダウンロードや環境変数の設定を行うセットアップスクリプトの実行。
```python
command = 'python setup.py && python script.py'.split()
```
## データセットの使用
### 引数から
`ScriptRunConfig`に引数としてデータセットを渡します。
```py
# データセットを作成する
datastore = ws.get_default_datastore()
dataset = Dataset.File.from_files(path=(datastore, '<path/on/datastore>'))
arguments = ['--dataset', dataset.as_mount()]
config = ScriptRunConfig(
source_directory='.',
script='script.py',
arguments=arguments,
)
```
この例では`script.py`から参照可能な実行に対してデータセットがマウントされます。
## 実行
### インタラクティブ
Jupyter Notebookなどを使う場合のインタラクティブ設定
```python
run = exp.start_logging()
```
#### 例 : Jupyter Notebook
よくあるユースケースはノートブック内で学習中のモデルのログをインタラクティブに表示する場合です。
```py
from azureml.core import Workspace
from azureml.core import Experiment
ws = Workspace.from_config()
exp = Experiment(ws, 'example')
run = exp.start_logging() # インタラクティブ実行の開始
print(run.get_portal_url()) # Azure ML Studio へのリンクの取得
# モデル学習コードのダミー
# 実際は学習やEDAなど
import numpy as np
for x in np.linspace(0, 10):
y = np.sin(x)
run.log_row('sine', x=x, y=y) # メトリックのロギング
run.complete() # インタラクティブ実行の終了
```
Azure ML Studio へのリンクからリアルタイムでメトリックのログや実行を確認できます。
![](img/run-ex-sine.png)
### コンテキストの取得
Azrue ML 上で実行されているコードは`実行`に関連付けられます。サブミットされたコードは`実行`からアクセスすることができます。
```py
from azureml.core import Run
run = Run.get_context()
```
#### 例: 実行中のコードのメトリックをログに保存する
よくあるユースケースはトレーニングスクリプト内でのメトリックのログです。
```py title="train.py"
from azureml.core import Run
run = Run.get_context()
# training code
for epoch in range(n_epochs):
model.train()
...
val = model.evaluate()
run.log('validation', val)
```
このコードが Azrue ML (例えば`ScriptRunConfig`により) にサブミットされた時、このコードは関連付けられている`実行`にメトリックのログを保存します。
詳細: [メトリック](logging)

Просмотреть файл

@ -0,0 +1,82 @@
# Templates
## Introduction
Cookiecutter is a simple command-line tool that allows you to quickly create
new projects from pre-defined templates. Let's see it in action!
First go ahead and get cookiecutter using your environment manager of choice,
for example:
```bash
pip install cookiecutter
```
Then give this repo a home
```bash
cd ~/repos # or wherever your repos call home :-)
git clone <this-repo>
```
To create a new project from the `ScriptRunConfig` template for example, simply
run
```bash
cookiecutter path/to/cheatsheet/repo/templates/ScriptRunConfig
```
See [ScriptRunConfig](#ScriptRunConfig) for more details for this template.
## Project Templates
- ScriptRunConfig: Create a project to run a script in AML making use of the
ScriptRunConfig class. This template is well suited for smaller projects and
is especially handy for testing.
### ScriptRunConfig
[Cookiecutter](https://cookiecutter.readthedocs.io/en/1.7.2/README.html)
template for setting up an AML
[ScriptRunConfig](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.scriptrunconfig?view=azure-ml-py)
used to run your script in Azure.
#### Usage
Run the cookiecutter command
```bash
cookiecutter <path/to/cookiecutter/templates>/ScriptRunConfig
```
to create a new `ScriptRunConfig` project.
**Note.** Install with `pip install cookiecutter` (see
[cookiecutter docs](https://cookiecutter.readthedocs.io/en/1.7.2/installation.html)
for more installation options)
You will be prompted for the following:
- `directory_name`: The desired name of the directory (default:
"aml-src-script")
- `script_name`: The name of the python script to be run in Azure (default:
"script")
- `subscription_id`: Your Azure Subscription ID
- `resource_group`: Your Azure resource group name
- `workspace_name`: Your Azure ML workspace name
- `compute_target_name`: The name of the Azure ML compute target to run the
script on (default: "local", will run on your box)
Cookiecutter creates a new project with the following layout.
```bash
{directory_name}/
{script_name}.py # the script you want to run in the cloud
run.py # wraps your script in ScriptRunConfig to send to Azure
config.json # your Azure ML metadata
readme.md # this readme file!
```
See
[ScriptRunConfig](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.scriptrunconfig?view=azure-ml-py)
for more options and details on configuring runs.

Просмотреть файл

@ -0,0 +1,32 @@
---
title: Troubleshooting
id: troubleshooting
description: A cheat sheet for Azure ML.
keywords:
- azure machine learning
- aml
- troubleshooting
---
:::note
このコンテンツはお使いの言語では利用できません。
:::
### Error: az acr login- APIVersion 2020-11-01-preview is not available.
**Description**
NotImplementedError occurred when building image using az acr.
```bash
az acr build --image $image_name --subscription $ws.subscription_id --registry $cr --file docker/Dockerfile docker/
```
The error:
```text
NotImplementedError: APIVersion 2020-11-01-preview is not available.
```
**Solution** This is a problem related with the version of az cli. Please update az cli by running
```bash
az upgrade --yes
```

Просмотреть файл

@ -0,0 +1,93 @@
---
title: Workspace
description: Azure ML ワークスペースの概要
keywords:
- workspace
---
ワークスペースは、Azure ML で用いられる基本的なオブジェクトであり、他の多くのクラスのコンストラクタの中で使用されます。
このドキュメントを通して、私たちは頻繁にワークスペース・オブジェクトのインスタンス化を省略し、単純に `ws` を参照します。
新規ワークスペースの作成についての説明が必要でしたら、[インストール](installation)を見てください。
## ワークスペースを取得する
AMLアセットへの接続に用いられる `Workspace` オブジェクトをインスタンス化します。
```python title="run.py"
from azureml.core import Workspace
ws = Workspace(
subscription_id="<subscription_id>",
resource_group="<resource_group>",
workspace_name="<workspace_name>",
)
```
利便性のために、ワークスペースのメタデータを `config.json` 内に保存します。
```json title=".azureml/config.json"
{
"subscription_id": <subscription-id>,
"resource_group": <resource-group>,
"workspace_name": <workspace-name>
}
```
### 役立つメソッド
- `ws.write_config(path, file_name)` : あなたの代わりに `config.json` を書き出します。 `path` はデフォルトでカレントワーキングディレクトリ内の `.azureml/``file_name` はデフォルトで `config.json` です。
- `Workspace.from_config(path, _file_name)`: コンフィグからワークスペースの設定を読み込みます。そのパラメーターは、カレントディレクトリで検索を開始するのがデフォルトです。
:::info
これらを `.azureml/` ディレクトリに格納するのが推奨されます。 `Workspace.from_config` メソッドでは _デフォルトで_ このパスが検索されるためです。
:::
## ワークスペースのアセットを取得する
ワークスペースは、以下の Azure ML アセットへのハンドラを提供します。
### Compute Targets
ワークスペースにアタッチされた全ての Compute ターゲットを取得します。
```python
ws.compute_targets: Dict[str, ComputeTarget]
```
### Datastores
ワークスペースに登録された全てのデータストアを取得します。
```python
ws.datastores: Dict[str, Datastore]
```
ワークスペースのデフォルトのデータストアを取得します。
```python
ws.get_default_datastore(): Datastore
```
### Keyvault
ワークスペースのデフォルトの Keyvault を取得します。
```python
ws.get_default_keyvault(): Keyvault
```
### Environments
ワークスペースに登録された Environments を取得します。
```python
ws.environments: Dict[str, Environment]
```
### MLFlow
MLFlow の tracking URI を取得します。
```python
ws.get_mlflow_tracking_uri(): str
```

Просмотреть файл

@ -0,0 +1 @@

Просмотреть файл

@ -0,0 +1,55 @@
---
title: Contributing
description: Guide to contributing.
---
## Issues
All forms of feedback are welcome through [issues](https://github.com/Azure/azureml-cheatsheets/issues) - please follow the pre-defined templates where applicable.
## Pull requests
Pull requests (PRs) to this repo require review and approval by the Azure Machine Learning team to merge. Please follow the pre-defined template and read all relevant sections below.
Make PR's against the `main` branch.
```bash
git clone git@github.com:Azure/azureml-cheatsheets.git
cd azureml-cheatsheets
git checkout -b user/contrib
...
gh pr create
```
- When a PR arrives against `main` GitHub actions (deploy) will test the build is successful
- When the PR is merged the change will be automatically deployed to `gh-pages` branch (and the webpage will be updated)
99% of contributions should only need the following:
- Add markdown files to the `website/docs/cheatsheets` folder
- Update the `sidebar.js` file to add a page to the sidebar
- Put any images in `website/docs/cheatsheets/<path-to-cheat-sheet-directory>/img/` and refer to them like this: `![](img/<image-name>.png)`
If you need to do anything more than adding a new page to the sidebar (e.g.
modify the nav bar) then please refer to the [Docusaurus 2 documentation](https://v2.docusaurus.io/).
## Previewing changes locally
- Install [npm](https://nodejs.org/en/download/) and [yarn](https://classic.yarnpkg.com/en/docs/install#windows-stable).
- Initial Docusaurus installation: (**First time only**)
```bash
cd website
npm install
```
- Run local server while developing:
```bash
cd website
yarn start
```
See Docusaurus instructions [here](https://v2.docusaurus.io/docs/installation) for more details.

Просмотреть файл

@ -0,0 +1,34 @@
---
title: Deployment
description: One-time website deployment setup.
---
## Deployment
This article describes the one-time process for deploying the cheat sheeets as a GitHub Pages website.
This repo has GitHub actions in place that automate deployment by watching the `main` branch.
If you are interested in how deployment works then read on!
### GitHub Actions
We use GitHub actions to automate deployment. Set up was as follows:
- Generated new SSH key
- NB. Since there was an existing ssh key tied the repo a new key was generated (in a different location) `/tmp/.ssh/id_rsa`
- Add public key to repo's [deploy key](https://developer.github.com/v3/guides/managing-deploy-keys/)
- NB. Allow write access
- Add private key as [GitHub secret](https://help.github.com/en/actions/configuring-and-managing-workflows/creating-and-storing-encrypted-secrets)
- We use repo-level (not org level) secret
- Secret is named `GH_PAGES_DEPLOY`
- `xclip -sel clip < /tmp/.ssh/id_rsa`
### Manual
It is possible to make manual deployments without use of the GitHub action above.
```console
GIT_USER=<Your GitHub username> USE_SSH=true yarn deploy
```
If you are using GitHub pages for hosting, this command is a convenient way to build the website and push to the `gh-pages` branch.

Просмотреть файл

@ -0,0 +1,115 @@
---
title: Issue Triage Process
id: issues
description: GitHub issue triage process for Azure Machine Learning.
keywords:
- azure machine learning
- aml
- azure
---
## Overview
This page defines the triage process for Azure Machine Learning (AML) repositories.
## Repositories
AML examples:
- https://github.com/Azure/MachineLearningNotebooks
- https://github.com/Azure/azureml-examples
Azure core:
- https://github.com/Azure/azure-cli
- https://github.com/Azure/azure-cli-extensions
- https://github.com/Azure/azure-powershell
- https://github.com/Azure/azure-rest-api-specs
- https://github.com/Azure/azure-sdk-for-js
- https://github.com/Azure/azure-sdk-for-python
> To request a repository to be added, [open an issue](https://github.com/Azure/azureml-web/issues)
## Code of Conduct
All interactions on GitHub must follow the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
## Priority
GitHub user experience.
## Metrics
- FQR: first quality response
- TTC: time to close
## Goals
- triage issue area and type in <3 hrs
- FQR <8 hrs
- TTC for questions <5 days
- TTC for bugs <30 days
## SLA
- triage <1 day
- FQR <3 days
## Labels
### Areas
#### Foundations
- `Foundations/Data`
- `Foundations/Compute`
- `Foundations/Infrastructure`
- `Foundations/Admin`
#### Experiences
- `Experiences/UI`
- `Experiences/Lifecycle`
- `Experiences/Intelligence`
- `Experiences/Inference`
#### Pipelines
- `Pipelines/UI`
- `Pipelines/Aether`
### Issue types
- `bug`
- `question`
- `feature-request`
### Other
- `needs-details`: additional details needed from author
- `v2`: planned for AMLv2
## Process
### Triage
Initial triage will be performed by the GitHub v-team. On initial triage, assign the correct area label and issue type.
If the issue needs obvious clarification before this can be done, kindly ask the user. If the issue has no path to closing without user response, mark it as `needs-details`.
After initial triage, it is up to each area (Experiences, Foundations, Pipelines) to further triage as necessary to the correct engineering team members.
One type of issue may be changed to another, i.e. for an issue like “can I do X” could end up as a feature request for X. Simply change the issue labels as appropriate. In some cases, it might make sense to open a new issue and close the original instead of changing the label.
Once the issue is understood, it is up to each area to appropriately route through internal tools such as ADO, maintaining the GitHub issue as the point of communication with the user. Major developments should be communicated back to the user.
### Closing
Issues may be closed by their creator at anytime, which is preferred, **especially for questions**.
Additionally, issues may be closed once:
- `needs-details`: user/author has not responded for 5+ days with no other path to closure
- `question`: the question has been thoroughly answered with relevant links, documentation, and examples and has no follow-up questions from user(s) in 48 hrs
- `bug`: the bug fix has been released, tested, and the user confirms the solution or does not respond for 48 hrs after being made aware of the fix
- `feature-request`: the feature has been released, tested, and the user confirms the solution or does not respond for 48 hrs after being made aware of the release

Просмотреть файл

@ -0,0 +1,481 @@
{
"Imports Group: Basic": {
"prefix": ["import-basic"],
"body": [
"from azureml.core import Workspace # connect to workspace",
"from azureml.core import Experiment # connect/create experiments",
"from azureml.core import ComputeTarget # connect to compute",
"from azureml.core import Environment # manage e.g. Python environments",
"from azureml.core import Datastore, Dataset # work with data",
"$0"
],
"description": "Import collection of basic Azure ML classes"
},
"Import Workspace": {
"prefix": ["import-workspace"],
"body": [
"from azureml.core import Workspace",
"$0"
],
"description": "Import Workspace class"
},
"Import Compute Target": {
"prefix": ["import-compute-target"],
"body": [
"from azureml.core import ComputeTarget",
"$0"
],
"description": "Import ComputeTarget class"
},
"Import Environment": {
"prefix": ["import-environment"],
"body": [
"from azureml.core import Environment",
"$0"
],
"description": "Import Environment class"
},
"Import ScriptRunConfig": {
"prefix": ["import-script-run-config", "import-src"],
"body": [
"from azureml.core import ScriptRunConfig",
"$0"
],
"description": "Import ScriptRunConfig class"
},
"Import Dataset": {
"prefix": ["import-dataset"],
"body": [
"from azureml.core import Dataset",
"$0"
],
"description": "Import Dataset class"
},
"Import Datastore": {
"prefix": ["import-datastore"],
"body": [
"from azureml.core import Datastore",
"$0"
],
"description": "Import Datastore class"
},
"Import Run": {
"prefix": ["import-run"],
"body": [
"from azureml.core import Run",
"$0"
],
"description": "Import Run class"
},
"Import Conda Dependencies": {
"prefix": ["import-conda-dependencies"],
"body": [
"from azureml.core.conda_dependencies import CondaDependencies",
"$0"
],
"description": "Import CondaDependencies class"
},
"Get Workspace From Config": {
"prefix": ["get-workspace-config", "ws-config"],
"body": [
"from azureml.core import Workspace",
"ws = Workspace.from_config()",
"$0"
],
"description": "Get Azure ML Workspace from config"
},
"Get Workspace": {
"prefix": ["get-workspace", "get-ws"],
"body": [
"from azureml.core import Workspace",
"ws = Workspace.get(",
" name='${1:name}',",
" subscription_id='${2:subscription_id}',",
" resource_group='${3:resource_group}',",
")",
"$0"
],
"description": "Get Azure ML Workspace"
},
"Get Compute": {
"prefix": ["get-compute"],
"body": [
"from azureml.core import ComputeTarget",
"target = ComputeTarget(${2:ws}, '${1:<compute_target_name>}')",
"$0"
],
"description": "Get Azure ML Compute Target"
},
"Get Compute with SSH": {
"prefix": ["get-compute-ssh"],
"body": [
"from azureml.core.compute import AmlCompute",
"from azureml.core.compute_target import ComputeTargetException",
"",
"ssh_public_key = 'public-key-here'",
"compute_config = AmlCompute.provisioning_configuration(",
" vm_size='$2',",
" min_nodes=$3,",
" max_nodes=$4,",
" admin_username='$5',",
" admin_user_ssh_key=ssh_public_key,",
" vm_priority='${6|lowpriority,dedicated|}',",
" remote_login_port_public_access='Enabled',",
" )",
"",
"cluster = ComputeTarget.create(",
" workspace=${7:workspace_name},",
" name='${8:target_name}',",
" compute_config=compute_config,",
")",
"$0"
],
"description": "Get Azure ML Compute Target with SSH"
},
"Get Environment": {
"prefix": ["get-environment"],
"body": [
"from azureml.core import Environment",
"${2:env} = Environment('${1:<env-name>}')",
"$0"
],
"description": "Get Azure ML Environment"
},
"Get Environment From Pip": {
"prefix": ["get-environment-pip", "env-pip"],
"body": [
"from azureml.core import Environment",
"env = Environment.from_pip_requirements(",
" name='${1:env_name}',",
" file_path='${2:requirements.txt}',",
")",
"$0"
],
"description": "Create environment from pip requirements.txt"
},
"Get Environment From Conda": {
"prefix": ["get-environment-conda", "env-conda"],
"body": [
"from azureml.core import Environment",
"env = Environment.from_conda_specification(",
" name='${1:env_name}',",
" file_path='${2:env.yml}',",
")",
"$0"
],
"description": "Create environment from Conda env.yml file"
},
"Get Environment From SDK": {
"prefix": ["get-environment-sdk", "env-sdk"],
"body": [
"from azureml.core import Environment",
"from azureml.core.conda_dependencies import CondaDependencies",
"env = Environment('${1:my-env}')",
"",
"conda = CondaDependencies()",
"",
"# add channels",
"conda.add_channel('$2')",
"",
"# add conda packages",
"conda.add_conda_package('$3')",
"",
"# add pip packages",
"conda.add_pip_package('$4')",
"",
"# add conda dependencies to environment",
"env.python.conda_dependencies = conda",
"$0"
],
"description": "Create environment using CondaDependencies class"
},
"Get Environment From Custom image": {
"prefix": ["get-environment-custom-image", "env-image"],
"body": [
"from azureml.core import Environment",
"env = Environment('${1:my-env}')",
"",
"env.docker.enabled = True",
"",
"# base image for DockerHub",
"env.docker.base_image = '${2}'",
"",
"# if you are using base image from a Dockerfile",
"# env.docker.base_image = None",
"# env.docker.base_dockerfile = './Dockerfile'",
"",
"# The user_managed_dependencies flag to True will use your custom image's built-in Python environment. ",
"env.python.user_managed_dependencies = True",
"",
"$0"
],
"description": "Create environment using Custom image"
},
"Workspace Compute Targets": {
"prefix": ["ws-compute-target"],
"body": [
"target = ws.compute_targets['${1:target-name}']",
"$0"
],
"description": "Get compute target from workspace"
},
"Workspace Environments": {
"prefix": ["ws-environment"],
"body": [
"env = ws.environments['${1:env-name}']",
"$0"
],
"description": "Get environment from workspace"
},
"Workspace Datastores": {
"prefix": ["ws-datastore"],
"body": [
"datastore = ws.datastores['${1:datastore-name}']",
"$0"
],
"description": "Get datastore from workspace"
},
"Workspace Datasets": {
"prefix": ["ws-dataset"],
"body": [
"dataset = ws.datasets['${1:dataset-name}']",
"$0"
],
"description": "Get dataset from workspace"
},
"Workspace Experiment": {
"prefix": ["ws-experiment"],
"body": [
"exp = ws.experiments['${1:experiment-name}']",
"$0"
],
"description": "Get (existing) experiment from workspace"
},
"Workspace Models": {
"prefix": ["ws-model"],
"body": [
"model = ws.models['${1:model-name}']",
"$0"
],
"description": "Get model from workspace"
},
"Script Run Config": {
"prefix": ["script-run-config", "src"],
"body": [
"from azureml.core import Workspace, Experiment, ScriptRunConfig",
"",
"# get workspace",
"ws = Workspace.from_config()",
"",
"# get compute target",
"target = ws.compute_targets['${1:target-name}']",
"",
"# get registered environment",
"env = ws.environments['${2:env-name}']",
"",
"# get/create experiment",
"exp = Experiment(ws, '${3:experiment_name}')",
"",
"# set up script run configuration",
"config = ScriptRunConfig(",
" source_directory='${4:.}',",
" script='${5:script.py}',",
" compute_target=target,",
" environment=env,",
" arguments=[${6:'--meaning', 42}],",
")",
"",
"# submit script to AML",
"run = exp.submit(config)",
"print(run.get_portal_url()) # link to ml.azure.com",
"run.wait_for_completion(show_output=True)",
"$0"
],
"description": "Set up ScriptRunConfig including compute target, environment and experiment"
},
"Script Run Config with Command": {
"prefix": ["script-run-config-command", "command-src", "src-command"],
"body": [
"from azureml.core import Workspace, Experiment, ScriptRunConfig",
"",
"# get workspace",
"ws = Workspace.from_config()",
"",
"# get compute target",
"target = ws.compute_targets['${1:target-name}']",
"",
"# get registered environment",
"env = ws.environments['${2:env-name}']",
"",
"# get/create experiment",
"exp = Experiment(ws, '${3:experiment_name}')",
"",
"# create command",
"command = 'python ${4:script.py} ${5:--argument value}'.split()",
"",
"# set up script run configuration",
"config = ScriptRunConfig(",
" source_directory='${6:.}',",
" command=command,",
" compute_target=target,",
" environment=env,",
")",
"",
"# submit script to AML",
"run = exp.submit(config)",
"print(run.get_portal_url()) # link to ml.azure.com",
"run.wait_for_completion(show_output=True)",
"$0"
],
"description": "Set up ScriptRunConfig using command argument"
},
"Script Run Config with Distributed Config": {
"prefix": ["script-run-config-distributed", "distributed-src", "src-distributed"],
"body": [
"from azureml.core import Workspace, ScriptRunConfig, Environment, Experiment",
"from azureml.core.runconfig import MpiConfiguration",
"",
"# get workspace",
"ws = Workspace.from_config()",
"",
"# get compute target",
"target = ws.compute_targets['${1:target-name}']",
"",
"# get curated environment",
"curated_env_name = '${2:AzureML-PyTorch-1.6-GPU}'",
"env = Environment.get(workspace=ws, name=curated_env_name)",
"",
"# get/create experiment",
"exp = Experiment(ws, '${3:experiment_name}')",
"",
"# distributed job configuration",
"distributed_job_config = MpiConfiguration(process_count_per_node=4, node_count=2)",
"",
"# set up script run configuration",
"config = ScriptRunConfig(",
" source_directory='${4:.}',",
" script='${5:script.py}',",
" compute_target=target,",
" environment=env,",
" distributed_job_config=distributed_job_config,",
")",
"",
"# submit script to AML",
"run = exp.submit(config)",
"print(run.get_portal_url()) # link to ml.azure.com",
"run.wait_for_completion(show_output=True)",
"$0"
],
"description": "Set up ScriptRunConfig for distributed training."
},
"Run Details Widget": {
"prefix": ["run-details-widget"],
"body": [
"from azureml.core import Workspace,Experiment,Run",
"from azureml.widgets import RunDetails",
"",
"# get workspace",
"ws = Workspace.from_config()",
"",
"# get/create experiment",
"exp = Experiment(ws, '${1:experiment_name}')",
"",
"# get run",
"run = Run(exp,'${2:run_id}')",
"",
"# submit script to AML",
"RunDetails(run).show()",
"$0"
],
"description": "Represents a Jupyter notebook widget used to view the progress of model training."
},
"Consume Dataset": {
"prefix": ["consume-dataset"],
"body": [
"#azureml-core of version 1.0.72 or higher is required",
"from azureml.core import Workspace, Dataset",
"",
"# get/create experiment",
"ws = Workspace.from_config()",
"",
"# get dataset",
"dataset = Dataset.get_by_name(ws, name='${1:dataset_name}')",
"dataset.download(target_path='.', overwrite=False)",
"$0"
],
"description": "Download Azure ML dataset to current working directory"
},
"Create Tabular Dataset": {
"prefix": ["create-tabular-dataset"],
"body": [
"from azureml.core import Workspace, Datastore, Dataset",
"",
"datastore_name = '${1:datastore_name}'",
"",
"# get workspace",
"ws = Workspace.from_config()",
"",
"# retrieve an existing datastore in the workspace by name",
"datastore = Datastore.get(ws, datastore_name)",
"",
"# create a TabularDataset from 1 file paths in datastore",
"datastore_paths = [(datastore, ${2:file_path})]",
"",
"custom_ds = Dataset.Tabular.from_delimited_files(path=datastore_paths)",
"$0"
],
"description": "Create Azure ML tabular dataset."
},
"Create File Dataset": {
"prefix": ["create-file-dataset"],
"body": [
"# create a FileDataset pointing to files in 'animals' folder and its subfolders recursively",
"from azureml.core import Workspace, Datastore, Dataset",
"",
"datastore_name = '${1:datastore_name}'",
"",
"# get workspace",
"ws = Workspace.from_config()",
"",
"# retrieve an existing datastore in the workspace by name",
"datastore = Datastore.get(ws, datastore_name)",
"",
"# create a FileDataset pointing to files in your folder and its subfolders recursively, you can also use public web urls paths",
"datastore_paths = [(datastore, ${2:file_path})]",
"",
"custom_ds = Dataset.File.from_files(path=datastore_paths)",
"$0"
],
"description": "Create Azure ML file dataset."
}
}

Просмотреть файл

@ -0,0 +1,9 @@
# VS Code Snippets
Notes for contributing Azure ML Snippets.
For using snippets see https://azure.github.io/azureml-web/docs/vs-code-snippets/snippets.
1. Add snippets to `python.json`. For more details on VS Code snippets: [vs-code-docs](https://code.visualstudio.com/docs/editor/userdefinedsnippets)
2. Run `python snippets-parser.py` to automatically update the `snippets.md` (which will document your changes)
3. Make a PR to the `main` branch and request a review.

Просмотреть файл

@ -0,0 +1,112 @@
import json
from typing import List
class Snippet:
"""Handle json snippets
Parse json (VS Code) snippets file and generate markdown summary.
"""
def __init__(self, name, snippet_json):
self.name = name
self.description = snippet_json.get("description")
self.prefix = self._read_prefix(snippet_json.get("prefix"))
self.body = snippet_json.get("body")
def __repr__(self):
return f"Snippet({self.name})"
@staticmethod
def _read_prefix(prefix):
"""Guarentee prefix is of type List."""
if type(prefix) == list:
return prefix
else:
assert type(prefix) == str
return [prefix]
def to_markdown(self) -> List[str]:
"""Convert snippet to markdown (as list of lines)."""
lines = []
# add heading
heading = f"### {self.name}"
lines.append(heading)
lines.append("")
# add description
description = f"Description: {self.description}"
lines.append(description)
lines.append("")
# add prefix(es)
if len(self.prefix) > 1:
prefix = f"Prefixes: "
else:
prefix = f"Prefix: "
for p in self.prefix:
prefix += f"`{p}`, "
prefix = prefix[:-2] # remove trailing comma and whitespace
lines.append(prefix)
lines.append("")
# add python snippet
lines.append("```python")
for line in self.body:
if line == "$0":
continue
lines.append(line)
lines.append("```")
return lines
@staticmethod
def _convert_to_json(body):
json_body = []
for line in body[:-1]:
line = '"' + line + '",'
json_body.append(line)
line = '"' + body[-1] + '"'
json_body.append(line)
return json_body
frontmatter = """---
title: VS Code Snippets
description: A collection of VS Code Snippets for working with Azure ML.
---
We have compiled a collection of useful templates in the form of
[VS code snippets](https://code.visualstudio.com/docs/editor/userdefinedsnippets).
![VS Code Snippets](vs-code-snippets-demo.gif)
To add these snippets to your VS Code: `ctrl+shift+p` > Type 'Configure user
snippets' > Select `python.json`. All of these snippets are available here:
[python.json](https://github.com/Azure/azureml-web/blob/main/website/docs/vs-code-snippets/python.json)
"""
if __name__ == "__main__":
# parse snippets
with open("python.json") as f:
snippets_file = json.load(f)
snippets = []
for name, snippet_json in snippets_file.items():
snippet = Snippet(name, snippet_json)
snippets.append(snippet)
# create file and write frontmatter
md_filename = "snippets.md"
with open(md_filename, "w") as f:
# write frontmatter
f.writelines(frontmatter)
# write each snippet
for snippet in snippets:
lines = snippet.to_markdown()
for line in lines:
f.write(line + "\n")

Просмотреть файл

@ -0,0 +1,494 @@
---
title: VS Code Snippets
description: A collection of VS Code Snippets for working with Azure ML.
---
We have compiled a collection of useful templates in the form of
[VS code snippets](https://code.visualstudio.com/docs/editor/userdefinedsnippets).
![VS Code Snippets](vs-code-snippets-demo.gif)
To add these snippets to your VS Code: `ctrl+shift+p` > Type 'Configure user
snippets' > Select `python.json`. All of these snippets are available here:
[python.json](https://github.com/Azure/azureml-web/blob/main/website/docs/vs-code-snippets/python.json)
### Imports Group: Basic
Description: Import collection of basic Azure ML classes
Prefix: `import-basic`
```python
from azureml.core import Workspace # connect to workspace
from azureml.core import Experiment # connect/create experiments
from azureml.core import ComputeTarget # connect to compute
from azureml.core import Environment # manage e.g. Python environments
from azureml.core import Datastore, Dataset # work with data
```
### Import Workspace
Description: Import Workspace class
Prefix: `import-workspace`
```python
from azureml.core import Workspace
```
### Import Compute Target
Description: Import ComputeTarget class
Prefix: `import-compute-target`
```python
from azureml.core import ComputeTarget
```
### Import Environment
Description: Import Environment class
Prefix: `import-environment`
```python
from azureml.core import Environment
```
### Import ScriptRunConfig
Description: Import ScriptRunConfig class
Prefixes: `import-script-run-config`, `import-src`
```python
from azureml.core import ScriptRunConfig
```
### Import Dataset
Description: Import Dataset class
Prefix: `import-dataset`
```python
from azureml.core import Dataset
```
### Import Datastore
Description: Import Datastore class
Prefix: `import-datastore`
```python
from azureml.core import Datastore
```
### Import Run
Description: Import Run class
Prefix: `import-run`
```python
from azureml.core import Run
```
### Import Conda Dependencies
Description: Import CondaDependencies class
Prefix: `import-conda-dependencies`
```python
from azureml.core.conda_dependencies import CondaDependencies
```
### Get Workspace From Config
Description: Get Azure ML Workspace from config
Prefixes: `get-workspace-config`, `ws-config`
```python
from azureml.core import Workspace
ws = Workspace.from_config()
```
### Get Workspace
Description: Get Azure ML Workspace
Prefixes: `get-workspace`, `get-ws`
```python
from azureml.core import Workspace
ws = Workspace.get(
name='${1:name}',
subscription_id='${2:subscription_id}',
resource_group='${3:resource_group}',
)
```
### Get Compute
Description: Get Azure ML Compute Target
Prefix: `get-compute`
```python
from azureml.core import ComputeTarget
target = ComputeTarget(${2:ws}, '${1:<compute_target_name>}')
```
### Get Compute with SSH
Description: Get Azure ML Compute Target with SSH
Prefix: `get-compute-ssh`
```python
from azureml.core.compute import AmlCompute
from azureml.core.compute_target import ComputeTargetException
ssh_public_key = 'public-key-here'
compute_config = AmlCompute.provisioning_configuration(
vm_size='$2',
min_nodes=$3,
max_nodes=$4,
admin_username='$5',
admin_user_ssh_key=ssh_public_key,
vm_priority='${6|lowpriority,dedicated|}',
remote_login_port_public_access='Enabled',
)
cluster = ComputeTarget.create(
workspace=${7:workspace_name},
name='${8:target_name}',
compute_config=compute_config,
)
```
### Get Environment
Description: Get Azure ML Environment
Prefix: `get-environment`
```python
from azureml.core import Environment
${2:env} = Environment('${1:<env-name>}')
```
### Get Environment From Pip
Description: Create environment from pip requirements.txt
Prefixes: `get-environment-pip`, `env-pip`
```python
from azureml.core import Environment
env = Environment.from_pip_requirements(
name='${1:env_name}',
file_path='${2:requirements.txt}',
)
```
### Get Environment From Conda
Description: Create environment from Conda env.yml file
Prefixes: `get-environment-conda`, `env-conda`
```python
from azureml.core import Environment
env = Environment.from_conda_specification(
name='${1:env_name}',
file_path='${2:env.yml}',
)
```
### Get Environment From SDK
Description: Create environment using CondaDependencies class
Prefixes: `get-environment-sdk`, `env-sdk`
```python
from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies
env = Environment('${1:my-env}')
conda = CondaDependencies()
# add channels
conda.add_channel('$2')
# add conda packages
conda.add_conda_package('$3')
# add pip packages
conda.add_pip_package('$4')
# add conda dependencies to environment
env.python.conda_dependencies = conda
```
### Get Environment From Custom image
Description: Create environment using Custom image
Prefixes: `get-environment-custom-image`, `env-image`
```python
from azureml.core import Environment
env = Environment('${1:my-env}')
env.docker.enabled = True
# base image for DockerHub
env.docker.base_image = '${2}'
# if you are using base image from a Dockerfile
# env.docker.base_image = None
# env.docker.base_dockerfile = './Dockerfile'
# The user_managed_dependencies flag to True will use your custom image's built-in Python environment.
env.python.user_managed_dependencies = True
```
### Workspace Compute Targets
Description: Get compute target from workspace
Prefix: `ws-compute-target`
```python
target = ws.compute_targets['${1:target-name}']
```
### Workspace Environments
Description: Get environment from workspace
Prefix: `ws-environment`
```python
env = ws.environments['${1:env-name}']
```
### Workspace Datastores
Description: Get datastore from workspace
Prefix: `ws-datastore`
```python
datastore = ws.datastores['${1:datastore-name}']
```
### Workspace Datasets
Description: Get dataset from workspace
Prefix: `ws-dataset`
```python
dataset = ws.datasets['${1:dataset-name}']
```
### Workspace Experiment
Description: Get (existing) experiment from workspace
Prefix: `ws-experiment`
```python
exp = ws.experiments['${1:experiment-name}']
```
### Workspace Models
Description: Get model from workspace
Prefix: `ws-model`
```python
model = ws.models['${1:model-name}']
```
### Script Run Config
Description: Set up ScriptRunConfig including compute target, environment and experiment
Prefixes: `script-run-config`, `src`
```python
from azureml.core import Workspace, Experiment, ScriptRunConfig
# get workspace
ws = Workspace.from_config()
# get compute target
target = ws.compute_targets['${1:target-name}']
# get registered environment
env = ws.environments['${2:env-name}']
# get/create experiment
exp = Experiment(ws, '${3:experiment_name}')
# set up script run configuration
config = ScriptRunConfig(
source_directory='${4:.}',
script='${5:script.py}',
compute_target=target,
environment=env,
arguments=[${6:'--meaning', 42}],
)
# submit script to AML
run = exp.submit(config)
print(run.get_portal_url()) # link to ml.azure.com
run.wait_for_completion(show_output=True)
```
### Script Run Config with Command
Description: Set up ScriptRunConfig using command argument
Prefixes: `script-run-config-command`, `command-src`, `src-command`
```python
from azureml.core import Workspace, Experiment, ScriptRunConfig
# get workspace
ws = Workspace.from_config()
# get compute target
target = ws.compute_targets['${1:target-name}']
# get registered environment
env = ws.environments['${2:env-name}']
# get/create experiment
exp = Experiment(ws, '${3:experiment_name}')
# create command
command = 'python ${4:script.py} ${5:--argument value}'.split()
# set up script run configuration
config = ScriptRunConfig(
source_directory='${6:.}',
command=command,
compute_target=target,
environment=env,
)
# submit script to AML
run = exp.submit(config)
print(run.get_portal_url()) # link to ml.azure.com
run.wait_for_completion(show_output=True)
```
### Script Run Config with Distributed Config
Description: Set up ScriptRunConfig for distributed training.
Prefixes: `script-run-config-distributed`, `distributed-src`, `src-distributed`
```python
from azureml.core import Workspace, ScriptRunConfig, Environment, Experiment
from azureml.core.runconfig import MpiConfiguration
# get workspace
ws = Workspace.from_config()
# get compute target
target = ws.compute_targets['${1:target-name}']
# get curated environment
curated_env_name = '${2:AzureML-PyTorch-1.6-GPU}'
env = Environment.get(workspace=ws, name=curated_env_name)
# get/create experiment
exp = Experiment(ws, '${3:experiment_name}')
# distributed job configuration
distributed_job_config = MpiConfiguration(process_count_per_node=4, node_count=2)
# set up script run configuration
config = ScriptRunConfig(
source_directory='${4:.}',
script='${5:script.py}',
compute_target=target,
environment=env,
distributed_job_config=distributed_job_config,
)
# submit script to AML
run = exp.submit(config)
print(run.get_portal_url()) # link to ml.azure.com
run.wait_for_completion(show_output=True)
```
### Run Details Widget
Description: Represents a Jupyter notebook widget used to view the progress of model training.
Prefix: `run-details-widget`
```python
from azureml.core import Workspace,Experiment,Run
from azureml.widgets import RunDetails
# get workspace
ws = Workspace.from_config()
# get/create experiment
exp = Experiment(ws, '${1:experiment_name}')
# get run
run = Run(exp,'${2:run_id}')
# submit script to AML
RunDetails(run).show()
```
### Consume Dataset
Description: Download Azure ML dataset to current working directory
Prefix: `consume-dataset`
```python
#azureml-core of version 1.0.72 or higher is required
from azureml.core import Workspace, Dataset
# get/create experiment
ws = Workspace.from_config()
# get dataset
dataset = Dataset.get_by_name(ws, name='${1:dataset_name}')
dataset.download(target_path='.', overwrite=False)
```
### Create Tabular Dataset
Description: Create Azure ML tabular dataset.
Prefix: `create-tabular-dataset`
```python
from azureml.core import Workspace, Datastore, Dataset
datastore_name = '${1:datastore_name}'
# get workspace
ws = Workspace.from_config()
# retrieve an existing datastore in the workspace by name
datastore = Datastore.get(ws, datastore_name)
# create a TabularDataset from 1 file paths in datastore
datastore_paths = [(datastore, ${2:file_path})]
custom_ds = Dataset.Tabular.from_delimited_files(path=datastore_paths)
```
### Create File Dataset
Description: Create Azure ML file dataset.
Prefix: `create-file-dataset`
```python
# create a FileDataset pointing to files in 'animals' folder and its subfolders recursively
from azureml.core import Workspace, Datastore, Dataset
datastore_name = '${1:datastore_name}'
# get workspace
ws = Workspace.from_config()
# retrieve an existing datastore in the workspace by name
datastore = Datastore.get(ws, datastore_name)
# create a FileDataset pointing to files in your folder and its subfolders recursively, you can also use public web urls paths
datastore_paths = [(datastore, ${2:file_path})]
custom_ds = Dataset.File.from_files(path=datastore_paths)
```

Двоичный файл не отображается.

После

Ширина:  |  Высота:  |  Размер: 416 KiB

Просмотреть файл

@ -0,0 +1,46 @@
{
"link.title.Resources": {
"message": "Resources",
"description": "The title of the footer links column with title=Resources in the footer"
},
"link.title.Support": {
"message": "Support",
"description": "The title of the footer links column with title=Support in the footer"
},
"link.title.GitHub": {
"message": "GitHub",
"description": "The title of the footer links column with title=GitHub in the footer"
},
"link.item.label.Azure ML - Microsoft Docs": {
"message": "Azure ML - Microsoft Docs",
"description": "The label of footer link with label=Azure ML - Microsoft Docs linking to https://docs.microsoft.com/azure/machine-learning"
},
"link.item.label.Azure ML - Python API": {
"message": "Azure ML - Python API",
"description": "The label of footer link with label=Azure ML - Python API linking to https://docs.microsoft.com/python/api/overview/azure/ml/?view=azure-ml-py"
},
"link.item.label.GitHub issues": {
"message": "GitHub issues",
"description": "The label of footer link with label=GitHub issues linking to https://github.com/Azure/azureml-cheatsheets/issues"
},
"link.item.label.Stack Overflow": {
"message": "Stack Overflow",
"description": "The label of footer link with label=Stack Overflow linking to https://stackoverflow.microsoft.com/questions/tagged/10888"
},
"link.item.label.Cheat sheets": {
"message": "Cheat sheets",
"description": "The label of footer link with label=Cheat sheets linking to https://github.com/Azure/azureml-cheatsheets"
},
"link.item.label.Azure ML Examples": {
"message": "Azure ML Examples",
"description": "The label of footer link with label=Azure ML Examples linking to https://github.com/Azure/azureml-examples"
},
"link.item.label.Contribution": {
"message": "Contribution",
"description": "The label of footer link with label=Contribution linking to /docs/misc/contributing"
},
"copyright": {
"message": "Copyright © 2021 Microsoft Corporation",
"description": "The footer copyright"
}
}

Просмотреть файл

@ -0,0 +1,10 @@
{
"title": {
"message": "Azure Machine Learning",
"description": "The title in the navbar"
},
"item.label.Python SDK": {
"message": "Python SDK",
"description": "Navbar item with label Python SDK"
}
}

Просмотреть файл

@ -5,37 +5,44 @@ import Link from '@docusaurus/Link';
import useDocusaurusContext from '@docusaurus/useDocusaurusContext';
import useBaseUrl from '@docusaurus/useBaseUrl';
import styles from './styles.module.css';
import Translate, {translate} from '@docusaurus/Translate';
const features = [
{
title: 'Cheat Sheet',
title: <Translate id="section0">Cheat Sheet</Translate>,
pageUrl: 'docs/cheatsheets/python/v1/cheatsheet',
imageUrl: 'img/undraw_docusaurus_mountain.svg',
description: (
<>
<Translate id="section0.desc">
A cheat sheet for common use cases with AML.
Get 80% of what you need in 20% of the documentation.
</Translate>
</>
),
},
{
title: 'Distributed GPU Training',
title: <Translate id="section1">Distributed GPU Training</Translate>,
pageUrl: 'docs/cheatsheets/python/v1/distributed-training',
imageUrl: 'img/undraw_docusaurus_react.svg',
description: (
<>
<Translate id="section1.desc">
Guide to getting your distributed training code running in Azure ML.
</Translate>
</>
),
},
{
title: 'Environments',
title: <Translate id="section2">Environments</Translate>,
pageUrl: 'docs/cheatsheets/python/v1/environment',
imageUrl: 'img/undraw_docusaurus_tree.svg',
description: (
<>
<Translate id="section2.desc">
Set up and manage your Python environments and docker images
in Azure ML.
</Translate>
</>
),
},
@ -65,8 +72,8 @@ function Home() {
description="A user guide to Azure ML <head />">
<header className={clsx('hero hero--primary', styles.heroBanner)}>
<div className="container">
<h1 className="hero__title">{siteConfig.title}</h1>
<p className="hero__subtitle">{siteConfig.tagline}</p>
<h1 className="hero__title"><Translate id="index.title">{siteConfig.title}</Translate></h1>
<p className="hero__subtitle"><Translate id="index.tagline">{siteConfig.tagline}</Translate></p>
<div className={styles.buttons}>
<Link
className={clsx(