azureml-examples/readme.py

# imports
import os
import json
import glob
import argparse

# issue #146
if "posix" not in os.name:
    print(
        "windows is not supported, see issue #146 (https://github.com/Azure/azureml-examples/issues/146)"
    )
    exit(1)

# setup argparse
parser = argparse.ArgumentParser()
parser.add_argument("--check-readme", type=bool, default=False)
args = parser.parse_args()

# constants, variables, parameters, etc.
with open("prefix.md", "r") as f:
    prefix = f.read()
with open("suffix.md", "r") as f:
    suffix = f.read()

tutorial_table = """
**Tutorials**

path|status|notebooks|description
-|-|-|-
"""

notebook_table = """
**Notebooks**

path|description
-|-
"""

train_table = """
**Train**

path|compute|environment|description
-|-|-|-
"""

deploy_table = """
**Deploy**

path|compute|description
-|-|-
"""

ws = "default"
rg = "azureml-examples"
mn = "${{matrix.notebook}}"
mw = "${{matrix.workflow}}"
cr = "${{secrets.AZ_AE_CREDS}}"

kernelspec = {"display_name": "Python 3.8", "language": "python", "name": "python3.8"}

# process tutorials/*
tutorials = sorted(glob.glob("tutorials/*"))

for tutorial in tutorials:

    # get list of notebooks
    nbs = sorted(
        [nb.split("/")[-1] for nb in glob.glob(f"{tutorial}/*.ipynb")]
    )  # TODO: fix for Windows
    nbs = [f"[{nb}]({tutorial}/{nb})" for nb in nbs]  # TODO: fix for Windows
    nbs = "<br>".join(nbs)

    # get the tutorial name and initials
    name = tutorial.split("/")[-1]  # TODO: fix for Windows
    initials = "".join([word[0][0] for word in name.split("-")])

    # build entries for tutorial table
    status = f"[![{name}](https://github.com/Azure/azureml-examples/workflows/run-tutorial-{initials}/badge.svg)](https://github.com/Azure/azureml-examples/actions?query=workflow%3Arun-tutorial-{initials})"
    desc = "*no description*"
    try:
        with open(f"{tutorial}/README.md", "r") as f:
            for line in f.readlines():
                if "description: " in str(line):
                    desc = line.split(": ")[-1].strip()
                    break
    except:
        pass

    # add row to tutorial table
    tutorial_table += f"[{name}]({tutorial})|{status}|{nbs}|{desc}\n"

# process notebooks/*
notebooks = sorted(glob.glob("notebooks/*.ipynb"))

# create `run-workflows` workflow yaml file
workflow = f"""name: run-notebooks
on:
  schedule:
      - cron: "0 0/2 * * *"
  push:
    branches:
      - main
    paths:
      - "notebooks/**"
  pull_request:
    branches:
      - main
    paths:
      - "notebooks/**"
jobs:
  build:
    runs-on: ubuntu-latest
    strategy:
      matrix:
        notebook: {notebooks}
    steps:
    - name: check out repo
      uses: actions/checkout@v2
    - name: setup python
      uses: actions/setup-python@v2
      with:
        python-version: "3.8"
    - name: pip install
      run: pip install -r requirements.txt
    - name: azure login
      uses: azure/login@v1
      with:
        creds: {cr}
    - name: install azmlcli
      run: az extension add -s https://azurecliext.blob.core.windows.net/release/azure_cli_ml-1.15.0-py3-none-any.whl -y
    - name: attach to workspace
      run: az ml folder attach -w {ws} -g {rg}
    - name: run notebook
      run: papermill {mn} out.ipynb -k python
"""

# write `run-notebooks` workflow yaml file
print("writing workflow file...")
with open(f".github/workflows/run-notebooks.yml", "w") as f:
    f.write(workflow)

# create notebook_table
for nb in notebooks:

    # read in notebook
    with open(nb, "r") as f:
        data = json.load(f)

    # read in the description
    try:
        if "description: " in str(data["cells"][0]["source"]):
            desc = (
                str(data["cells"][0]["source"])
                .split("description: ")[-1]
                .replace("']", "")
                .strip()
            )
    except:
        desc = "*no description*"

    # build tables
    notebook_table += f"[{nb}]({nb})|{desc}\n"

# process code/azureml/*
workflows = sorted(glob.glob("workflows/**/*/*job*.py", recursive=True))

# create `run-workflows` workflow yaml file
workflow = f"""name: run-workflows
on:
  schedule:
      - cron: "0 0/2 * * *"
  push:
    branches:
      - main
    paths:
      - "workflows/**"
  pull_request:
    branches:
      - main
    paths:
      - "workflows/**"
jobs:
  build:
    runs-on: ubuntu-latest
    strategy:
      matrix:
        workflow: {workflows}
    steps:
    - name: check out repo
      uses: actions/checkout@v2
    - name: setup python
      uses: actions/setup-python@v2
      with:
        python-version: "3.8"
    - name: pip install
      run: pip install -r requirements.txt
    - name: azure login
      uses: azure/login@v1
      with:
        creds: {cr}
    - name: install azmlcli
      run: az extension add -s https://azurecliext.blob.core.windows.net/release/azure_cli_ml-1.15.0-py3-none-any.whl -y
    - name: attach to workspace
      run: az ml folder attach -w {ws} -g {rg}
    - name: run workflow
      run: python {mw}
"""

# write `run-workflows` workflow yaml file
print("writing workflow file...")
with open(f".github/workflows/run-workflows.yml", "w") as f:
    f.write(workflow)

# create example tables
for wf in workflows:

    # read in example
    with open(wf, "r") as f:
        data = f.read()

        # read in the description
        try:
            desc = data.split("\n")[0].split(": ")[-1].strip()
        except:
            desc = "*no description*"

        # build tables
        if "train" in wf:
            # parse for compute target
            if "cpu-cluster" in data:
                compute = "AML - CPU"
            elif "gpu-cluster" in data or "gpu-K80" in data or "gpu-V100" in data:
                compute = "AML - GPU"
            else:
                compute = "unknown"
            # parse for environment type
            if "Environment.from_pip_requirements" in data:
                environment = "pip"
            elif "Environment.from_conda_specification" in data:
                environment = "conda"
            elif "env.docker.base_dockerfile" in data:
                environment = "docker"
            elif "mlproject" in wf:
                environment = "mlproject"
            else:
                environment = "unknown"
            train_table += f"[{wf}]({wf})|{compute}|{environment}|{desc}\n"
        elif "deploy" in wf:
            if "aci-cpu" in wf:
                compute = "ACI - CPU"
            elif "aks-cpu" in wf:
                compute = "AKS - CPU"
            elif "aks-gpu" in wf:
                compute = "AKS - GPU"
            elif "local" in wf:
                compute = "local"
            else:
                compute = "unknown"
            deploy_table += f"[{wf}]({wf})|{compute}|{desc}\n"

# glob all notebooks
notebooks = sorted(glob.glob("**/**/*.ipynb", recursive=True))

# process all notebooks and rewrite
for nb in notebooks:

    # read in notebook
    with open(nb, "r") as f:
        data = json.load(f)

    # update metadata
    data["metadata"]["kernelspec"] = kernelspec

    # write notebook
    with open(nb, "w") as f:
        json.dump(data, f, indent=1)

# run code formatter on .py files
os.system("black .")

# run code formatter on .ipynb files
os.system("black-nb --clear-output .")

# read in README.md for comparison
with open("README.md", "r") as f:
    before = f.read()

# write README.md file
print("writing README.md...")
with open("README.md", "w") as f:
    f.write(
        prefix + tutorial_table + notebook_table + train_table + deploy_table + suffix
    )

# read in README.md for comparison
with open("README.md", "r") as f:
    after = f.read()

# check if README.md file matches before and after
if args.check_readme and before != after:
    print("README.md file did not match...")
    exit(2)