Add commands of getting run and task

2018-01-03 09:44:00 -08:00 · 2018-01-03 09:44:00 -08:00 · 9bd5ad4c54
--- a/.gitignore
+++ b/.gitignore
@ -99,3 +99,6 @@ ENV/

 # mypy
 .mypy_cache/
+
+# IDE and Editor
+.vscode/
--- a/.idea/a01client.iml
+++ b/.idea/a01client.iml
@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="jdk" jdkName="Python 3.6 (wip)" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+  <component name="TestRunnerService">
+    <option name="PROJECT_TEST_RUNNER" value="Unittests" />
+  </component>
+</module>
--- a/.idea/inspectionProfiles/Project_Default.xml
+++ b/.idea/inspectionProfiles/Project_Default.xml
@ -0,0 +1,32 @@
+<component name="InspectionProjectProfileManager">
+  <profile version="1.0">
+    <option name="myName" value="Project Default" />
+    <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
+      <option name="ignoredPackages">
+        <value>
+          <list size="18">
+            <item index="0" class="java.lang.String" itemvalue="nose" />
+            <item index="1" class="java.lang.String" itemvalue="adal" />
+            <item index="2" class="java.lang.String" itemvalue="vcrpy" />
+            <item index="3" class="java.lang.String" itemvalue="applicationinsights" />
+            <item index="4" class="java.lang.String" itemvalue="tabulate" />
+            <item index="5" class="java.lang.String" itemvalue="argcomplete" />
+            <item index="6" class="java.lang.String" itemvalue="setuptools" />
+            <item index="7" class="java.lang.String" itemvalue="requests" />
+            <item index="8" class="java.lang.String" itemvalue="jmespath" />
+            <item index="9" class="java.lang.String" itemvalue="pyyaml" />
+            <item index="10" class="java.lang.String" itemvalue="paramiko" />
+            <item index="11" class="java.lang.String" itemvalue="pyOpenSSL" />
+            <item index="12" class="java.lang.String" itemvalue="colorama" />
+            <item index="13" class="java.lang.String" itemvalue="pylint" />
+            <item index="14" class="java.lang.String" itemvalue="mock" />
+            <item index="15" class="java.lang.String" itemvalue="pygments" />
+            <item index="16" class="java.lang.String" itemvalue="six" />
+            <item index="17" class="java.lang.String" itemvalue="pip" />
+          </list>
+        </value>
+      </option>
+    </inspection_tool>
+    <inspection_tool class="PyProtectedMemberInspection" enabled="true" level="INFORMATION" enabled_by_default="true" />
+  </profile>
+</component>
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6 (wip)" project-jdk-type="Python SDK" />
+</project>
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/a01client.iml" filepath="$PROJECT_DIR$/.idea/a01client.iml" />
+    </modules>
+  </component>
+</project>
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>
--- a/a01.py
+++ b/a01.py
@ -0,0 +1,6 @@
+#!/usr/bin/env python3
+
+import argparse
+
+parser = argparser.ArgumentParser()
+parser.parse_args()
--- a/a01/init.py
+++ b/a01/init.py
@ -0,0 +1,5 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for license information.
+# --------------------------------------------------------------------------------------------
+
--- a/a01/main.py
+++ b/a01/main.py
@ -0,0 +1,128 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for license information.
+# --------------------------------------------------------------------------------------------
+
+import sys
+import argparse
+import functools
+import shlex
+import logging
+import datetime
+from subprocess import check_output, CalledProcessError
+from collections import defaultdict
+
+import requests
+import tabulate
+
+logger = logging.getLogger('A01')
+
+LOG_FILE = 'https://azureclia01log.file.core.windows.net/k8slog/{}?' \
+           'se=2019-01-01T00%3A00%3A00Z&sp=r&sv=2017-04-17&sr=s&sig=v/4afGXPe5ENN1K7zIw1oQVUm73LDCZPEFgp6NUerh4%3D'
+
+
+@functools.lru_cache(maxsize=1)
+def get_store_uri(store) -> str:
+    cmd = f'kubectl get service {store}' + ' -ojsonpath={.status.loadBalancer.ingress[0].ip}'
+    try:
+        store_ip = check_output(shlex.split(cmd)).decode('utf-8')
+        return f'http://{store_ip}'
+    except CalledProcessError:
+        logger.error('Failed to get the a01 task store service URI. Make sure kubectl is installed and login the '
+                     'cluster.')
+        sys.exit(1)
+
+
+def get_arguments() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(prog='a01')
+    parser.set_defaults(func=lambda _: parser.print_help())
+    sub = parser.add_subparsers(title='Commands')
+
+    get_actions = sub.add_parser('get', help='Retrieve information.')
+    get_actions.set_defaults(func=lambda _: get_actions.print_help())
+    get_actions_root = get_actions.add_subparsers(title='Sub Commands')
+
+    get_run = get_actions_root.add_parser('run', help='Retrieve run data.')
+    get_run.add_argument('id', help='The run id')
+    get_run.add_argument('--store', help='The name of the task store. Default: a01store.', default='a01store')
+    get_run.set_defaults(func=list_run)
+
+    get_task = get_actions_root.add_parser('task', help='Retrieve task data.')
+    get_task.add_argument('id', help='The task id. Support multiple IDs.', nargs='+')
+    get_task.add_argument('--store', help='The name of the task store. Default: a01store.', default='a01store')
+    get_task.add_argument('--log', help='Retrieve the log of the task', action='store_true')
+    get_task.add_argument('--run', help='The run id (required when retrieve log. will be remove later)')
+    get_task.set_defaults(func=show_task)
+
+    # if print_help:
+    #     parser.print_help()
+    return parser.parse_args()
+
+
+def list_run(args):
+    resp = requests.get(f'{get_store_uri(args.store)}/run/{args.id}/tasks')
+    resp.raise_for_status()
+    tasks = resp.json()
+
+    statuses = defaultdict(lambda: 0)
+    results = defaultdict(lambda: 0)
+
+    failure = []
+
+    for task in tasks:
+        status = task['status']
+        result = task['result']
+
+        statuses[status] = statuses[status] + 1
+        results[result] = results[result] + 1
+
+        if result == 'Failed':
+            failure.append(
+                (task['id'],
+                 task['name'].rsplit('.')[-1],
+                 task['status'],
+                 task['result'],
+                 (task.get('result_details') or dict()).get('agent'),
+                 (task.get('result_details') or dict()).get('duration')))
+
+    status_summary = ' | '.join([f'{status_name}: {count}' for status_name, count in statuses.items()])
+    result_summary = ' | '.join([f'{result or "Not run"}: {count}' for result, count in results.items()])
+
+    summaries = [('Time', str(datetime.datetime.now())), ('Task', status_summary), ('Result', result_summary)]
+
+    print()
+    print(tabulate.tabulate(summaries, tablefmt='plain'))
+    print()
+    print(tabulate.tabulate(failure, headers=('id', 'name', 'status', 'result', 'agent', 'duration(ms)')))
+
+
+def show_task(args):
+    for task_id in args.id:
+        resp = requests.get(f'{get_store_uri(args.store)}/task/{task_id}')
+        resp.raise_for_status()
+        task = resp.json()
+        view = [
+            ('id', task['id']),
+            ('result', task['result']),
+            ('test', task['settings']['path']),
+            ('duration(ms)', task['result_details']['duration'])
+        ]
+
+        print(tabulate.tabulate(view, tablefmt='plain'))
+        if args.log:
+            log_path = LOG_FILE.format(f'{args.run}/task_{task_id}.log')
+            print()
+            for index, line in enumerate(requests.get(log_path).content.decode('utf-8').split('\n')):
+                print(f' {index}\t{line}')
+
+        print()
+        print()
+
+
+def main() -> None:
+    args = get_arguments()
+    args.func(args)
+
+
+if __name__ == '__main__':
+    main()
--- a/delete_run.py
+++ b/delete_run.py
@ -0,0 +1,32 @@
+import sys
+import logging
+import yaml
+import requests
+from subprocess import check_output, CalledProcessError
+
+logging.basicConfig(level=logging.DEBUG)
+
+try:
+    run_id = sys.argv[1]
+except IndexError:
+    print(f'Usage: {sys.argv[0]} <Run ID>', file=sys.stderr)
+    sys.exit(1)
+
+try:
+    with open('config.yaml') as fq:
+        config = yaml.load(fq)
+    store_name = config['store']
+    store_ip = check_output(
+        'kubectl get service a01store -ojsonpath={.status.loadBalancer.ingress[0].ip}'.split(' ')).decode('utf-8')
+    store_uri = f'http://{store_ip}'
+except IOError:
+    print('Missing config.yaml', file=sys.stderr)
+    sys.exit(1)
+except KeyError as error:
+    print(f'Incorrect config.yaml file: {error}', file=sys.stderr)
+    sys.exit(1)
+except CalledProcessError:
+    print('Fail to run kubectl')
+
+
+requests.delete(f'{store_uri}/run/{run_id}')
--- a/examples/oneshot.yaml
+++ b/examples/oneshot.yaml
@ -0,0 +1,24 @@
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: a01droidrun
+spec:
+  template:
+    metadata:
+      name: a01droidrun
+    spec:
+      containers:
+      - name: a01droid
+        image: a01reg.azurecr.io/a01droid:0.0.5
+        command: ["python",  "/app/job.py", "a01store", "2"]
+        env:
+        - name: ENV_POD_NAME
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.name
+        - name: ENV_NODE_NAME
+          valueFrom:
+            fieldRef:
+              fieldPath: spec.nodeName
+      restartPolicy: Never
+  backoffLimit: 2
--- a/examples/parallelx3.yaml
+++ b/examples/parallelx3.yaml
@ -0,0 +1,25 @@
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: a01droidrun
+spec:
+  parallelism: 3
+  backoffLimit: 3
+  template:
+    metadata:
+      name: a01droidrun
+    spec:
+      containers:
+      - name: a01droid
+        image: a01reg.azurecr.io/a01droid:0.0.5-alpine-python3.6
+        command: ["python",  "/app/job.py", "a01store", "3"]
+        env:
+        - name: ENV_POD_NAME
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.name
+        - name: ENV_NODE_NAME
+          valueFrom:
+            fieldRef:
+              fieldPath: spec.nodeName
+      restartPolicy: OnFailure
--- a/monitor.py
+++ b/monitor.py
@ -0,0 +1,66 @@
+#!/usr/bin/env python3
+
+import sys
+import curses
+import time
+import datetime
+import requests
+import tabulate
+from collections import defaultdict
+from subprocess import check_output
+
+run_id = sys.argv[1]
+
+store_ip = check_output(
+    'kubectl get service a01store -ojsonpath={.status.loadBalancer.ingress[0].ip}'.split(' ')).decode('utf-8')
+store_uri = f'http://{store_ip}'
+
+
+def main(stdscr):
+    while True:
+        resp = requests.get(f'{store_uri}/run/{run_id}/tasks')
+        resp.raise_for_status()
+        tasks = resp.json()
+
+        statuses = defaultdict(lambda: 0)
+        results = defaultdict(lambda: 0)
+
+        failure = []
+
+        for task in tasks:
+            status = task['status']
+            result = task['result']
+
+            statuses[status] = statuses[status] + 1
+            results[result] = results[result] + 1
+
+            if result == 'Failed':
+                failure.append(
+                    (task['id'],
+                     task['name'].rsplit('.')[-1],
+                     task['status'],
+                     task['result'],
+                     (task.get('result_details') or dict()).get('agent'),
+                     (task.get('result_details') or dict()).get('duration')))
+
+        status_summary = 'Task status: ' + ' | '.join(
+            [f'{status_name}: {count}' for status_name, count in statuses.items()])
+
+        result_summary = 'Results: ' + ' | '.join(
+            [f'{result or "Not run"}: {count}' for result, count in results.items()])
+
+        stdscr.addstr(0, 0, f'Update on {datetime.datetime.now()}. (refresh every 5 seconds)')
+        stdscr.addstr(2, 0, status_summary)
+        stdscr.addstr(3, 0, result_summary)
+        stdscr.addstr(6, 0, 'Failed tasks')
+        stdscr.addstr(7, 0,
+                      tabulate.tabulate(failure, headers=('id', 'name', 'status', 'result', 'agent', 'duration(ms)')))
+
+        stdscr.refresh()
+        time.sleep(5)
+
+
+try:
+    curses.wrapper(main)
+except KeyboardInterrupt:
+    print('Bye.')
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,3 @@
+PyYAML==3.12
+requests==2.18.4
+tabulate==0.8.2
--- a/setup.py
+++ b/setup.py
@ -0,0 +1,43 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for license information.
+# --------------------------------------------------------------------------------------------
+
+from setuptools import setup
+
+VERSION = "0.1.0"
+
+CLASSIFIERS = [
+    'Development Status :: 4 - Beta',
+    'Intended Audience :: Developers',
+    'Intended Audience :: System Administrators',
+    'Programming Language :: Python',
+    'Programming Language :: Python :: 3.6',
+    'License :: OSI Approved :: MIT License',
+]
+
+DEPENDENCIES = [
+    'tabulate>=0.8.2',
+    'requests>=2.18.4',
+    'PyYAML>=3.12'
+]
+
+setup(
+    name='a01ctl',
+    version=VERSION,
+    description='A01 CLI',
+    long_description='Command line tools for a01 system',
+    license='MIT',
+    author='Microsoft Corporation',
+    author_email='trdai@microsoft.com',
+    url='https://github.com/troydai/a01client',
+    packages=[
+        'a01',
+    ],
+    entry_points={
+        'console_scripts': [
+            'a01=a01.__main__:main'
+        ]
+    },
+    install_requires=DEPENDENCIES
+)
--- a/setup_run.py
+++ b/setup_run.py
@ -0,0 +1,88 @@
+#!/usr/bin/env python3
+
+import logging
+import yaml
+import os
+import base64
+import tempfile
+import shlex
+import argparse
+import requests
+from subprocess import check_call, check_output
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument('run_id', help='The run this job to run against.')
+parser.add_argument('-p', dest='parallelism', help='The parallelism. Default: 3', default=3, type=int)
+parser.add_argument('--store', help='The a01 store name. Default: a01store', default='a01store')
+parser.add_argument('--live', help='Run test live.', action='store_true')
+parser.add_argument('--secret', help='The kubernetes secret providing service principal. Default: azurecli-live-sp',
+                    default='azurecli-live-sp')
+parser.add_argument('--storage', help='The kubernetes secret providing Azure Storage Account credential for logging', 
+                    default='azurecli-test-storage')
+args = parser.parse_args()
+
+
+store_ip = check_output(
+    'kubectl get service a01store -ojsonpath={.status.loadBalancer.ingress[0].ip}'.split(' ')).decode('utf-8')
+store_uri = f'http://{store_ip}'
+
+run_details = requests.get(f'{store_uri}/run/{args.run_id}').json()
+image = run_details['settings']['droid_image']
+
+job_name = f'azurecli-test-{base64.b32encode(os.urandom(12)).decode("utf-8").lower()}'.rstrip('=')
+
+job_config = {
+    'apiVersion': 'batch/v1',
+    'kind': 'Job',
+    'metadata': {
+        'name': job_name
+    },
+    'spec': {
+        'parallelism': args.parallelism,
+        'backoffLimit': 5,
+        'template': {
+            'metadata': {
+                'name': job_name
+            },
+            'spec': {
+                'containers': [{
+                    'name': 'droid',
+                    'image': image,
+                    'command': ['python', '/app/job.py'],
+                    'volumeMounts': [
+                        {'name': 'azure-storage', 'mountPath': '/mnt/storage'}
+                    ],
+                    'env': [
+                        {'name': 'ENV_POD_NAME', 'valueFrom': {'fieldRef': {'fieldPath': 'metadata.name'}}},
+                        {'name': 'ENV_NODE_NAME', 'valueFrom': {'fieldRef': {'fieldPath': 'spec.nodeName'}}},
+                        {'name': 'A01_DROID_RUN_ID', 'value': args.run_id}
+                    ]
+                }],
+                'restartPolicy': 'Never',
+                'volumes': [{
+                    'name': 'azure-storage',
+                    'azureFile': {
+                        'secretName': args.storage,
+                        'shareName': 'k8slog',
+                    }}]
+            }
+        }
+    }
+}
+
+if args.live:
+    envs = job_config['spec']['template']['spec']['containers'][0]['env']
+    envs.append({'name': 'A01_RUN_LIVE', 'value': 'True'})
+    envs.append(
+        {'name': 'A01_SP_USERNAME', 'valueFrom': {'secretKeyRef': {'name': 'azurecli-live-sp', 'key': 'username'}}})
+    envs.append(
+        {'name': 'A01_SP_PASSWORD', 'valueFrom': {'secretKeyRef': {'name': 'azurecli-live-sp', 'key': 'password'}}})
+    envs.append({'name': 'A01_SP_TENANT', 'valueFrom': {'secretKeyRef': {'name': 'azurecli-live-sp', 'key': 'tenant'}}})
+
+_, name = tempfile.mkstemp(text=True)
+with open(name, 'w') as f:
+    yaml.dump(job_config, f, default_flow_style=False)
+
+print(f'Temp config file created at {name}')
+check_call(shlex.split(f'kubectl create -f {name}'))
--- a/setup_tasks.py
+++ b/setup_tasks.py
@ -0,0 +1,123 @@
+#!/usr/bin/env python3
+
+import sys
+import json
+import os
+import base64
+import argparse
+from subprocess import check_output, CalledProcessError
+
+import tabulate
+import requests
+
+
+def get_cache_dir(image_name):
+    try:
+        image_details = json.loads(check_output(['docker', 'image', 'inspect', image_name]))
+        image_id = image_details[0]['Id'].split(':')[1]
+        cache_dir = os.path.join(os.getcwd(), '.cache', image_id)
+        if not os.path.isdir(cache_dir):
+            os.makedirs(cache_dir, exist_ok=True)
+        return cache_dir
+    except (CalledProcessError, json.JSONDecodeError, TypeError, IOError):
+        pass
+
+
+def get_store_uri():
+    # Get the public IP of the store service using kubectl, you need to authenticate yourself first.
+    store_ip = check_output(
+        'kubectl get service a01store -ojsonpath={.status.loadBalancer.ingress[0].ip}'.split(' ')).decode('utf-8')
+    return f'http://{store_ip}'
+
+
+def get_manifest(image_name, from_failures):
+    if from_failures:
+        all_tasks = requests.get(f'{get_store_uri()}/run/{from_failures}/tasks').json()
+        failed_test_paths = set([task['settings']['path'] for task in all_tasks if task['result'] != 'Passed'])
+    else:
+        failed_test_paths = None
+
+    try:
+        manifest_file = os.path.join(get_cache_dir(image_name), 'manifest.json')
+        with open(manifest_file, mode='r') as mf:
+            manifest = json.load(mf)
+    except (json.JSONDecodeError, TypeError, IOError):
+        try:
+            container_name = base64.b32encode(os.urandom(12))[:-4]
+            manifest = json.loads(
+                check_output(['docker', 'run', '--name', container_name, image_name, 'python', '/app/collect_tests.py']))
+        except CalledProcessError:
+            print(f'Failed to list tests in image {image_name}.', file=sys.stderr)
+            sys.exit(1)
+        except (json.JSONDecodeError, TypeError) as error:
+            print('Failed to parse the manifest as JSON.', file=sys.stderr)
+            print(error, file=sys.stderr)
+            sys.exit(1)
+
+        try:
+            check_output(['docker', 'rm', container_name])
+        except CalledProcessError:
+            print(f'Failed to remove container {container_name}.', file=sys.stderr)
+            sys.exit(1)
+
+        with open(os.path.join(get_cache_dir(image_name), 'manifest.json'), mode='w') as wmf:
+            json.dump(manifest, wmf)
+
+    if failed_test_paths:
+        manifest = [each for each in manifest if each['path'] in failed_test_paths]
+    return manifest
+
+
+def schedule_tests(manifest, image_name):
+    print(f'{len(manifest)} tests to run.')
+    store_uri = get_store_uri()
+
+    print(requests.get(f'{store_uri}/runs').json())
+
+    # create a run
+    resp = requests.post(f'{store_uri}/run', json={
+        'name': f'Azure CLI Test with {image_name}',
+        'settings': {
+            'droid_image': image_name
+        },
+        'details': {
+            'creator': 'Troy Dai (troy.dai@outlook.com)',
+            'purpose': 'demo'
+        }
+    })
+    resp.raise_for_status()
+    run_id = resp.json()['id']
+
+    # create tasks
+    task_payload = [
+        {
+            'name': f'Test: {each["path"]}',
+            'annotation': image_name,
+            'settings': {
+                'path': each['path'],
+            }
+        } for each in manifest]
+    requests.post(f'{store_uri}/run/{run_id}/tasks', json=task_payload).raise_for_status()
+    print(f'created run {run_id}')
+
+
+def main(arguments):
+    manifest = get_manifest(arguments.image, arguments.from_failures)
+    if arguments.module_prefix:
+        manifest = [m for m in manifest if m['module'].startswith(arguments.module_prefix)]
+
+    if args.list:
+        print(tabulate.tabulate([(m['module'], m['class'], m['method']) for m in manifest],
+                                headers=('module', 'class', 'method')))
+    else:
+        schedule_tests(manifest, arguments.image)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Create new run and tasks')
+    parser.add_argument('image', help='The Azure CLI droid container image.')
+    parser.add_argument('--list', help='Listing the tasks instead of adding them', action='store_true')
+    parser.add_argument('--module-prefix', help='Limit the tasks to the specific module prefix.')
+    parser.add_argument('--from-failures', help='Give a run id. Create a new run from the failed tasks of it.')
+    args = parser.parse_args()
+    main(args)