diff --git a/.ci/repo_metrics_pipeline.yml b/.ci/repo_metrics_pipeline.yml index 40a0c81..4daaabc 100644 --- a/.ci/repo_metrics_pipeline.yml +++ b/.ci/repo_metrics_pipeline.yml @@ -1,3 +1,23 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +# More info on scheduling: https://docs.microsoft.com/en-us/azure/devops/pipelines/build/triggers?view=azure-devops&tabs=yaml#scheduled-triggers +# Implementing the scheduler from the dashboard +# Uncomment in case it wants to be done from using the yml +# schedules: +# - cron: "56 22 * * *" +# displayName: Daily track of metrics +# branches: +# include: +# - master +# always: true + + +# no PR builds +pr: none + +# no CI trigger +trigger: none jobs: - job: Repometrics @@ -5,7 +25,6 @@ jobs: vmImage: 'ubuntu-16.04' steps: - - task: UsePythonVersion@0 inputs: versionSpec: '3.6' @@ -13,12 +32,12 @@ jobs: - script: | cp tools/repo_metrics/config_template.py tools/repo_metrics/config.py - sed -i ''s/XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX/$(github_token)/g'' tools/repo_metrics/config.py - sed -i ''s/XXXXXXXXXXXXXXXXXXXXXXXXX/$(cosmosdb_connectionstring)/g'' tools/repo_metrics/config.py + sed -i "s//$(github_token)/" tools/repo_metrics/config.py + sed -i "s//$(cosmosdb_connectionstring)/" tools/repo_metrics/config.py displayName: Configure CosmosDB Connection - script: | - python -m pip install python-dateutil>=2.80 pymongo>=3.8.0 gitpython>2.1.11 requests>=2.21.0 + python -m pip install "python-dateutil>=2.8.0" "pymongo>=3.8.0" "gitpython>2.1.11" "requests>=2.21.0" python tools/repo_metrics/track_metrics.py --github_repo "https://github.com/microsoft/ComputerVision" --save_to_database displayName: Python script to record stats diff --git a/tools/repo_metrics/README.md b/tools/repo_metrics/README.md index 102529d..6f684b9 100644 --- a/tools/repo_metrics/README.md +++ b/tools/repo_metrics/README.md @@ -1,8 +1,8 @@ # Repository Metrics -[![Build Status](https://dev.azure.com/best-practices/computervision/_apis/build/status/repo-metrics?branchName=master)](https://dev.azure.com/best-practices/computervision/_build/latest?definitionId=27&branchName=master) +[![Build Status](https://dev.azure.com/best-practices/computervision/_apis/build/status/repo-metrics?branchName=staging)](https://dev.azure.com/best-practices/computervision/_build/latest?definitionId=27&branchName=staging) -We developed a script that allows us to track the metrics of the ComputerVisionBestPractices repo. Some of the metrics we can track are listed here: +We developed a script that allows us to track the repo metrics. Some of the metrics we can track are listed here: * Number of stars * Number of forks @@ -10,17 +10,27 @@ We developed a script that allows us to track the metrics of the ComputerVisionB * Number of views * Number of lines of code -To see the full list of metrics, see [git_stats.py](scripts/repo_metrics/git_stats.py) +To see the full list of metrics, see [git_stats.py](git_stats.py) The first step is to set up the credentials, copy the configuration file and fill up the credentials of GitHub and CosmosDB: - cp scripts/repo_metrics/config_template.py scripts/repo_metrics/config.py + cp tools/repo_metrics/config_template.py tools/repo_metrics/config.py To track the current state of the repository and save it to CosmosDB: - python scripts/repo_metrics/track_metrics.py --github_repo "https://github.com/Microsoft/ComputerVision" --save_to_database + python tools/repo_metrics/track_metrics.py --github_repo "https://github.com/Microsoft/ComputerVision" --save_to_database To track an event related to this repository and save it to CosmosDB: - python scripts/repo_metrics/track_metrics.py --event "Today we did our first blog of the project" --event_date 2018-12-01 --save_to_database + python tools/repo_metrics/track_metrics.py --event "Today we did our first blog of the project" --event_date 2018-12-01 --save_to_database + + +### Setting up Azure CosmosDB + +The API that we is used to track the GitHub metrics is the [Mongo API](https://docs.microsoft.com/en-us/azure/cosmos-db/mongodb-introduction). + +The database name and collections name are defined in the [config file](config_template.py). There are two main collections, defined as `COLLECTION_GITHUB_STATS` and `COLLECTION_EVENTS` to store the information defined on the previous section. + +**IMPORTANT NOTE**: If the database and the collections are created directly through the portal, a common partition key should be defined. We recommend to use `date` as partition key. + diff --git a/tools/repo_metrics/config_template.py b/tools/repo_metrics/config_template.py index 03efb45..1b6b42b 100644 --- a/tools/repo_metrics/config_template.py +++ b/tools/repo_metrics/config_template.py @@ -3,10 +3,12 @@ # Github token # More info: https://help.github.com/articles/creating-a-personal-access-token-for-the-command-line/ -GITHUB_TOKEN = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX" +GITHUB_TOKEN = "" # CosmosDB Mongo API -CONNECTION_STRING = "mongodb://XXXXXXXXXXXXXXXXXXXXXXXXX.documents.azure.com:10255/?ssl=true&replicaSet=globaldb" +# * Azure Portal: Settings -> Connection String -> PRIMARY CONNECTION STRING +# * For example, 'mongodb://:@:/?ssl=true&replicaSet=globaldb' +CONNECTION_STRING = "" DATABASE = "cv_stats" COLLECTION_GITHUB_STATS = "github_stats" COLLECTION_EVENTS = "events" diff --git a/tools/repo_metrics/track_metrics.py b/tools/repo_metrics/track_metrics.py index 5f5b181..b0e40fd 100644 --- a/tools/repo_metrics/track_metrics.py +++ b/tools/repo_metrics/track_metrics.py @@ -14,7 +14,6 @@ import logging from datetime import datetime from dateutil.parser import isoparse from pymongo import MongoClient -from datetime import datetime from tools.repo_metrics.git_stats import Github from tools.repo_metrics.config import ( GITHUB_TOKEN, @@ -32,6 +31,7 @@ log = logging.getLogger() def parse_args(): """Argument parser. + Returns: obj: Parser. """ @@ -61,12 +61,14 @@ def parse_args(): def connect(uri="mongodb://localhost"): """Mongo connector. + Args: uri (str): Connection string. + Returns: obj: Mongo client. """ - client = MongoClient(uri, serverSelectionTimeoutMS=1000) + client = MongoClient(uri, serverSelectionTimeoutMS=5000) # Send a query to the server to see if the connection is working. try: @@ -78,9 +80,11 @@ def connect(uri="mongodb://localhost"): def event_as_dict(event, date): """Encodes an string event input as a dictionary with the date. + Args: event (str): Details of a event. date (datetime): Date of the event. + Returns: dict: Dictionary with the event and the date. """ @@ -89,8 +93,10 @@ def event_as_dict(event, date): def github_stats_as_dict(github): """Encodes Github statistics as a dictionary with the date. + Args: obj: Github object. + Returns: dict: Dictionary with Github details and the date. """ @@ -125,6 +131,7 @@ def github_stats_as_dict(github): def tracker(args): """Main function to track metrics. + Args: args (obj): Parsed arguments. """