Merge pull request #116 from microsoft/abhiram-mlflow

Integrating Mlflow.
This commit is contained in:
Said Bleik 2019-06-25 08:55:44 -07:00 коммит произвёл GitHub
Родитель 7a8e4cf4d4 5d86d03d10
Коммит 4dac5f1cff
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
5 изменённых файлов: 415 добавлений и 406 удалений

Просмотреть файл

@ -132,6 +132,13 @@
"scrolled": true
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Failure while loading azureml_run_type_providers. Failed to load entrypoint hyperdrive = azureml.train.hyperdrive:HyperDriveRun._from_run_dto with exception module 'azureml.train.hyperdrive' has no attribute 'HyperDriveRun'.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
@ -188,7 +195,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 2,
"metadata": {
"scrolled": true
},
@ -242,7 +249,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 3,
"metadata": {},
"outputs": [
{
@ -285,7 +292,7 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
@ -301,9 +308,9 @@
},
{
"cell_type": "code",
"execution_count": 22,
"execution_count": 5,
"metadata": {
"scrolled": true
"scrolled": false
},
"outputs": [
{
@ -484,7 +491,7 @@
"4 2267923837.jpg#2r1e entailment NaN NaN NaN NaN "
]
},
"execution_count": 22,
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
@ -511,7 +518,7 @@
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 6,
"metadata": {},
"outputs": [
{
@ -591,7 +598,7 @@
"4 There are children present "
]
},
"execution_count": 23,
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
@ -622,7 +629,7 @@
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
@ -643,7 +650,7 @@
},
{
"cell_type": "code",
"execution_count": 25,
"execution_count": 8,
"metadata": {
"scrolled": true
},
@ -654,7 +661,7 @@
"'../../data\\\\clean/snli_1.0'"
]
},
"execution_count": 25,
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
@ -682,7 +689,7 @@
},
{
"cell_type": "code",
"execution_count": 26,
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
@ -691,7 +698,7 @@
},
{
"cell_type": "code",
"execution_count": 27,
"execution_count": 10,
"metadata": {},
"outputs": [
{
@ -713,29 +720,29 @@
"source": [
"**Prerequisites:**\n",
"\n",
"Upload the all the local files under `data_folder` to the path `./data/processed/` on the default datastore.\n",
"Upload the all the local files under `data_folder` to the default datastore.\n",
"\n",
"**Note: To download data required to train a GenSen model in the original paper, run code [here](https://github.com/Maluuba/gensen/blob/master/get_data.sh). By training on the original datasets (training time around 20 hours), it will reproduce the results in the [paper](https://arxiv.org/abs/1804.00079). For simplicity, we will train on a smaller dataset, which is SNLI preprocessed in [1 Data Loading and Preprocessing](#1-Data-Loading-and-Preprocessing) for showcasing the example.**"
]
},
{
"cell_type": "code",
"execution_count": 29,
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"$AZUREML_DATAREFERENCE_6faee69b569b4268b8bf027b0bb4fd73"
"$AZUREML_DATAREFERENCE_liqungensen"
]
},
"execution_count": 29,
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ds.upload(src_dir=data_folder, target_path=\"data/processed\", overwrite=True, show_progress=False)"
"ds.upload(src_dir=data_folder, overwrite=True, show_progress=False)"
]
},
{
@ -769,7 +776,7 @@
},
{
"cell_type": "code",
"execution_count": 30,
"execution_count": 4,
"metadata": {},
"outputs": [
{
@ -777,13 +784,13 @@
"output_type": "stream",
"text": [
"Found existing compute target.\n",
"{'currentNodeCount': 0, 'targetNodeCount': 0, 'nodeStateCounts': {'preparingNodeCount': 0, 'runningNodeCount': 0, 'idleNodeCount': 0, 'unusableNodeCount': 0, 'leavingNodeCount': 0, 'preemptedNodeCount': 0}, 'allocationState': 'Steady', 'allocationStateTransitionTime': '2019-06-14T20:39:31.676000+00:00', 'errors': None, 'creationTime': '2019-06-03T21:18:34.507970+00:00', 'modifiedTime': '2019-06-03T21:18:50.790782+00:00', 'provisioningState': 'Succeeded', 'provisioningStateTransitionTime': None, 'scaleSettings': {'minNodeCount': 0, 'maxNodeCount': 8, 'nodeIdleTimeBeforeScaleDown': 'PT120S'}, 'vmPriority': 'Dedicated', 'vmSize': 'STANDARD_NC6'}\n"
"{'currentNodeCount': 2, 'targetNodeCount': 2, 'nodeStateCounts': {'preparingNodeCount': 0, 'runningNodeCount': 2, 'idleNodeCount': 0, 'unusableNodeCount': 0, 'leavingNodeCount': 0, 'preemptedNodeCount': 0}, 'allocationState': 'Steady', 'allocationStateTransitionTime': '2019-06-21T20:14:04.778000+00:00', 'errors': None, 'creationTime': '2019-06-19T02:57:39.833104+00:00', 'modifiedTime': '2019-06-19T02:58:11.339451+00:00', 'provisioningState': 'Succeeded', 'provisioningStateTransitionTime': None, 'scaleSettings': {'minNodeCount': 0, 'maxNodeCount': 4, 'nodeIdleTimeBeforeScaleDown': 'PT120S'}, 'vmPriority': 'Dedicated', 'vmSize': 'STANDARD_NC6'}\n"
]
}
],
"source": [
"# choose a name for your cluster\n",
"cluster_name = \"gpugensen\"\n",
"cluster_name = \"gensen-mlflow\"\n",
"\n",
"try:\n",
" compute_target = ComputeTarget(workspace=ws, name=cluster_name)\n",
@ -791,7 +798,7 @@
"except ComputeTargetException:\n",
" print('Creating a new compute target...')\n",
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6',\n",
" max_nodes=8)\n",
" max_nodes=2)\n",
"\n",
" # create the cluster\n",
" compute_target = ComputeTarget.create(ws, cluster_name, compute_config)\n",
@ -814,7 +821,7 @@
},
{
"cell_type": "code",
"execution_count": 31,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
@ -836,19 +843,11 @@
"metadata": {},
"source": [
"### 2.3.1 Prepare Training Script\n",
"Now you will need to create your training script. In this tutorial, the script for distributed training of GENSEN is already provided for you at `train.py`. In practice, you should be able to take any custom PyTorch training script as is and run it with Azure ML without having to modify your code.\n",
"Now you will need to create your training script. In this tutorial, the script for distributed training of GENSEN is already provided for you at `gensen_train.py`. \n",
"\n",
"However, if you would like to use Azure ML's [metric logging](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#logging) capabilities, you will have to add a small amount of Azure ML logic inside your training script. In this example, at each logging interval, we will log the loss for that minibatch to our Azure ML run.\n",
"In this example, we use MLflow to log your metrics. We also use the [Azure ML-Mlflow](https://pypi.org/project/azureml-mlflow/) package to log these metrics to the Azure Portal. This is done with no change to the provided training script!\n",
"\n",
"To do so, in `train.py`, we will first access the Azure ML `Run` object within the script:\n",
"```Python\n",
"from azureml.core.run import Run\n",
"run = Run.get_context()\n",
"```\n",
"Later within the script, we log the loss metric to our run:\n",
"```Python\n",
"run.log('loss', loss.item())\n",
"```"
"In this example the script provided logs the loss for that minibatch to our Azure ML portal."
]
},
{
@ -872,7 +871,7 @@
},
{
"cell_type": "code",
"execution_count": 36,
"execution_count": 6,
"metadata": {},
"outputs": [
{
@ -881,7 +880,7 @@
"'../../utils_nlp/gensen/gensen_config.json'"
]
},
"execution_count": 36,
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
@ -902,7 +901,7 @@
},
{
"cell_type": "code",
"execution_count": 37,
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
@ -917,12 +916,12 @@
"### 2.3.3 Create a PyTorch Estimator\n",
"The Azure ML SDK's PyTorch estimator enables you to easily submit PyTorch training jobs for both single-node and distributed runs. For more information on the PyTorch estimator, refer [here](https://docs.microsoft.com/azure/machine-learning/service/how-to-train-pytorch).\n",
"\n",
"`sample_config.json` defines all the hyperparameters and paths when training GenSen model. The trained model will be saved in `data/models/example` to Azure Blob Storage. **Remember to clean `data/models/example` folder in order to save new models.**"
"`gensen_config.json` defines all the hyperparameters and paths when training GenSen model. The trained model will be saved in `models` to Azure Blob Storage. **Remember to clean `models` folder in order to save new models.**"
]
},
{
"cell_type": "code",
"execution_count": 34,
"execution_count": 8,
"metadata": {},
"outputs": [
{
@ -942,11 +941,12 @@
" script_params=script_params,\n",
" compute_target=compute_target,\n",
" entry_script='utils_nlp/gensen/gensen_train.py',\n",
" node_count=4,\n",
" node_count=2,\n",
" process_count_per_node=1,\n",
" distributed_training=MpiConfiguration(),\n",
" use_gpu=True,\n",
" conda_packages=['scikit-learn=0.20.3', 'h5py', 'nltk']\n",
" conda_packages=['scikit-learn=0.20.3', 'h5py', 'nltk'],\n",
" pip_packages=['azureml-mlflow>=1.0.43.1','numpy>=1.16.0']\n",
" )"
]
},
@ -979,7 +979,7 @@
},
{
"cell_type": "code",
"execution_count": 38,
"execution_count": 9,
"metadata": {
"scrolled": true
},
@ -988,7 +988,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"Submitting C:\\Users\\lishao\\Project\\Rotation2\\NLP directory for run. The size of the directory >= 25 MB, so it can take a few minutes.\n"
"Submitting E:\\Projects\\NLP-BP\\temp\\nlp directory for run. The size of the directory >= 25 MB, so it can take a few minutes.\n"
]
},
{
@ -996,9 +996,9 @@
"output_type": "stream",
"text": [
"Run(Experiment: pytorch-gensen,\n",
"Id: pytorch-gensen_1560797674_e36e44f4,\n",
"Id: pytorch-gensen_1561150688_f84eab04,\n",
"Type: azureml.scriptrun,\n",
"Status: Preparing)\n"
"Status: Queued)\n"
]
}
],
@ -1019,7 +1019,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"**Horovod on AzureML**\n",
"#### Horovod on AzureML\n",
"\n",
"[Horovod](https://github.com/horovod/horovod) is a distributed training framework for TensorFlow, PyTorch etc. to make distributed Deep Learning fast and easy to use. We have created 2 nodes in the GPU cluster on AzureML. By using Horovod, we can use those two machines to train the model in parallel. In theory, the model trains faster on AzureML than on VM which uses single machine because it converges faster which we will get lower loss. However, by using more nodes, the model may take more time in communicating with each node. The communication time could be ignored when the model is trained on the large datasets.\n",
"\n",
@ -1031,7 +1031,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"**Interpret the Training Results**\n",
"#### Interpret the Training Results\n",
"\n",
"The following chart shows the model validation loss (the less loss, the better performance) with different nodes with AmlCompute:\n",
"\n",
@ -1043,27 +1043,28 @@
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "19d55fcc0871444da604b1d828d9eac4",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': True, 'log_level': 'INFO', 's…"
"cell_type": "markdown",
"metadata": {},
"source": [
"The Azureml Widget allows an easy way to stream updates of the logged metrics right into your notebook. To use this feature install the widget by running the commands below. \n",
"\n",
"```\n",
"conda install ipywidgets\n",
"\n",
"jupyter nbextension install --py --user azureml.widgets\n",
"\n",
"jupyter nbextension enable azureml.widgets --user --py\n",
"\n",
"```"
]
},
"metadata": {},
"output_type": "display_data"
}
],
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"RunDetails(run).show()"
]
@ -1085,7 +1086,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"run.wait_for_completion(show_output=True) # this provides a verbose log"
@ -1114,6 +1117,35 @@
" ```"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 2.3.6 Clean up after training\n",
"\n",
"We finally delete the training script `gensen_train.py` and config file `gensen_config.json` from the project directory."
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"gensen_train = os.path.join(project_folder,'utils_nlp/gensen/gensen_train.py')\n",
"gensen_config = os.path.join(project_folder,'utils_nlp/gensen/gensen_config.json')\n",
"\n",
"if os.path.isfile(gensen_train):\n",
" os.remove(gensen_train)\n",
"else:\n",
" print(\"Error: %s file not found\" % gensen_train)\n",
" \n",
"if os.path.isfile(gensen_config):\n",
" os.remove(gensen_config)\n",
"else:\n",
" print(\"Error: %s file not found\" % gensen_config)"
]
},
{
"cell_type": "markdown",
"metadata": {},
@ -1135,7 +1167,7 @@
},
{
"cell_type": "code",
"execution_count": 130,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -1164,7 +1196,7 @@
},
{
"cell_type": "code",
"execution_count": 131,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -1194,54 +1226,11 @@
},
{
"cell_type": "code",
"execution_count": 132,
"execution_count": null,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "c61e610d4601486e9f41fd852320b47b",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': True, 'log_level': 'INFO',…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': True, 'log_level': 'INFO', 's…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "5c47f13e11c646cd865d4f286b70ab0c",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': True, 'log_level': 'INFO', 's…"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"outputs": [],
"source": [
"RunDetails(hyperdrive_run).show()"
]
@ -1331,9 +1320,9 @@
}
],
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python NLP CPU",
"language": "python",
"name": "python3"
"name": "nlp_cpu"
},
"language_info": {
"codemirror_mode": {

Просмотреть файл

@ -15,20 +15,20 @@
},
"data": {"paths": [
{
"train_src": "data/processed/snli_1.0_train.txt.s1.tok",
"train_trg": "data/processed/snli_1.0_train.txt.s2.tok",
"val_src": "data/processed/snli_1.0_dev.txt.s1.tok",
"val_trg": "data/processed/snli_1.0_dev.txt.s1.tok",
"train_src": "snli_1.0_train.txt.s1.tok",
"train_trg": "snli_1.0_train.txt.s2.tok",
"val_src": "snli_1.0_dev.txt.s1.tok",
"val_trg": "snli_1.0_dev.txt.s1.tok",
"taskname": "snli"
}
],
"max_src_length": 90,
"max_trg_length": 90,
"task": "multi-seq2seq-nli",
"save_dir": "data/models/example",
"nli_train": "data/processed/snli_1.0_train.txt.clean.noblank",
"nli_dev": "data/processed/snli_1.0_dev.txt.clean.noblank",
"nli_test": "data/processed/snli_1.0_test.txt.clean.noblank"
"save_dir": "models/",
"nli_train": "snli_1.0_train.txt.clean.noblank",
"nli_dev": "snli_1.0_dev.txt.clean.noblank",
"nli_test": "snli_1.0_test.txt.clean.noblank"
},
"model": {
"dim_src": 2048,

Просмотреть файл

@ -15,20 +15,20 @@ AzureML provides AI Compute to train the model and track the performance.
This training process is based on GPU only.
"""
import logging
import argparse
import os
import json
import logging
import os
import time
import horovod.torch as hvd
import mlflow
import numpy as np
import torch
import torch.backends.cudnn as cudnn
import torch.nn as nn
import torch.nn.functional as f
import torch.optim as optim
from azureml.core.run import Run
import horovod.torch as hvd
from utils_nlp.gensen.multi_task_model import MultitaskModel
from utils_nlp.gensen.utils import (
@ -37,10 +37,8 @@ from utils_nlp.gensen.utils import (
compute_validation_loss,
)
# get the Azure ML run object
run = Run.get_context()
cudnn.benchmark = True
logger = logging.getLogger(__name__)
hvd.init()
if torch.cuda.is_available():
@ -172,7 +170,9 @@ def evaluate(
# Horovod: print output only on first rank.
if hvd.rank() == 0:
# log the best val accuracy to AML run
run.log("Best Validation Loss", np.float(validation_loss))
logging.info(
"Best Validation Loss: {}".format(np.float(validation_loss))
)
# If the validation loss is small enough, and it starts to go up.
# Should stop training.
@ -182,8 +182,6 @@ def evaluate(
min_val_loss_epoch = monitor_epoch
model_state = model.state_dict()
run.log("Validation Loss", validation_loss)
print(monitor_epoch, min_val_loss_epoch, min_val_loss)
logging.info(
"Monitor epoch: %d Validation Loss: %.3f Min Validation Epoch: "
"%d Loss : %.3f "
@ -275,15 +273,13 @@ def train(config, data_folder, learning_rate=0.0001):
config(dict): Loaded json file as a python object.
data_folder(str): Path to the folder containing the data.
learning_rate(float): Learning rate for the model.
"""
owd = os.getcwd()
try:
save_dir = config["data"]["save_dir"]
os.chdir(data_folder)
try:
with mlflow.start_run():
save_dir = config["data"]["save_dir"]
if not os.path.exists("./log"):
os.makedirs("./log")
@ -308,7 +304,9 @@ def train(config, data_folder, learning_rate=0.0001):
and "skipthought_previous" in tasknames
):
skipthought_idx = tasknames.index("skipthought_next")
skipthought_backward_idx = tasknames.index("skipthought_previous")
skipthought_backward_idx = tasknames.index(
"skipthought_previous"
)
paired_tasks = {
skipthought_idx: skipthought_backward_idx,
skipthought_backward_idx: skipthought_idx,
@ -395,10 +393,15 @@ def train(config, data_folder, learning_rate=0.0001):
mbatch_times = []
min_val_loss = 10000000
min_val_loss_epoch = -1
rng_num_tasks = len(tasknames) - 1 if paired_tasks else len(tasknames)
rng_num_tasks = (
len(tasknames) - 1 if paired_tasks else len(tasknames)
)
logging.info("OS Environ: \n {} \n\n".format(os.environ))
mlflow.log_param("learning_rate", learning_rate)
logging.info("Commencing Training ...")
start = time.time()
while True:
batch_start_time = time.time()
# Train NLI once every 10 minibatches of other tasks
if nli_ctr % 10 == 0:
minibatch = nli_iterator.get_parallel_minibatch(
@ -410,7 +413,9 @@ def train(config, data_folder, learning_rate=0.0001):
)
loss = nli_criterion(
class_logits.contiguous().view(-1, class_logits.size(1)),
class_logits.contiguous().view(
-1, class_logits.size(1)
),
minibatch["labels"].contiguous().view(-1),
)
@ -420,9 +425,6 @@ def train(config, data_folder, learning_rate=0.0001):
torch.nn.utils.clip_grad_norm(model.parameters(), 1.0)
optimizer.step()
# For AML.
run.log("loss", loss.item())
nli_mbatch_ctr += batch_size * n_gpus
if nli_mbatch_ctr >= len(nli_iterator.train_lines):
nli_mbatch_ctr = 0
@ -455,12 +457,16 @@ def train(config, data_folder, learning_rate=0.0001):
max_len_src,
max_len_trg,
)
task_idxs[skipthought_backward_idx] += batch_size * n_gpus
task_idxs[skipthought_backward_idx] += (
batch_size * n_gpus
)
if (
task_idxs[skipthought_backward_idx]
>= train_iterator.buffer_size
):
train_iterator.fetch_buffer(skipthought_backward_idx)
train_iterator.fetch_buffer(
skipthought_backward_idx
)
task_idxs[skipthought_backward_idx] = 0
optimizer.zero_grad()
@ -512,7 +518,7 @@ def train(config, data_folder, learning_rate=0.0001):
optimizer.step()
end = time.time()
mbatch_times.append(end - start)
mbatch_times.append(end - batch_start_time)
# Validations
if (
@ -532,21 +538,34 @@ def train(config, data_folder, learning_rate=0.0001):
len(task_losses[idx]),
)
)
run.log("Task Loss", np.mean(task_losses[idx]))
mlflow.log_metric(
"validation_loss",
np.mean(task_losses[idx]),
step=monitor_epoch,
)
logging.info(
"Round: %d NLI Epoch : %d NLI Examples Processed : %d NLI "
"Loss : %.5f "
% (nli_ctr, nli_epoch, nli_mbatch_ctr, np.mean(nli_losses))
% (
nli_ctr,
nli_epoch,
nli_mbatch_ctr,
np.mean(nli_losses),
)
run.log("NLI Loss", np.mean(nli_losses))
)
mlflow.log_metric(
"nli_loss", np.mean(nli_losses), step=nli_epoch
)
logging.info(
"Average time per mininbatch : %.5f"
% (np.mean(mbatch_times))
)
run.log(
"Average time per mininbatch : ", np.mean(mbatch_times)
mlflow.log_metric(
"minibatch_avg_duration", np.mean(mbatch_times)
)
task_losses = [[] for _ in tasknames]
mbatch_times = []
nli_losses = []

Просмотреть файл

@ -38,6 +38,7 @@ def test_load_pretrained_vectors_word2vec():
assert isinstance(load_word2vec(dir_path), Word2VecKeyedVectors)
def test_load_pretrained_vectors_glove():
dir_path = "temp_data/"
file_path = os.path.join(
@ -58,7 +59,8 @@ def test_load_pretrained_vectors_glove():
def test_load_pretrained_vectors_fasttext():
dir_path = "temp_data/"
file_path = os.path.join(os.path.join(dir_path, "fastText"), "wiki.simple.bin")
file_path = os.path.join(os.path.join(dir_path, "fastText"),
"wiki.simple.bin")
assert isinstance(load_fasttext(dir_path), FastText)

Просмотреть файл

@ -54,9 +54,7 @@ CONDA_GPU = {
}
PIP_BASE = {
"azureml-sdk[notebooks,tensorboard]": (
"azureml-sdk[notebooks,tensorboard]==1.0.33"
),
"azureml-sdk[notebooks,tensorboard]": "azureml-sdk[notebooks,tensorboard]==1.0.43",
"azureml-dataprep": "azureml-dataprep==1.1.4",
"black": "black>=18.6b4",
"dask": "dask[dataframe]==1.2.2",
@ -75,6 +73,7 @@ PIP_BASE = {
"nltk": "nltk>=3.4",
"pytorch-pretrained-bert": "pytorch-pretrained-bert>=0.6",
"seqeval": "seqeval>=0.0.12",
"azureml-mlflow": "azureml-mlflow>=1.0.43.1",
}
PIP_GPU = {"horovod": "horovod>=0.16.1"}