* Update Tutorial.md

* fix CPU train --> GPU test, GPU train --> CPU test. But CPU train --> multi-GPU test will report error.

* remove ujson

* remove glove for model_zoo/demo/conf.json

* modify glove for Tutorial.md

* add nltk.download('punkt') in problem.py
This commit is contained in:
woailaosang 2019-04-30 14:06:17 +08:00 коммит произвёл L.J. SHOU
Родитель 9e01e6e16f
Коммит bc305b6dcd
9 изменённых файлов: 28 добавлений и 17 удалений

Просмотреть файл

@ -667,12 +667,13 @@ class LearningMachine(object):
self.model = torch.load(model_path)
if isinstance(self.model, nn.DataParallel):
self.model = self.model.module
self.model.update_use_gpu(self.use_gpu)
self.model = nn.DataParallel(self.model)
else:
self.model = torch.load(model_path, map_location='cpu')
if isinstance(self.model, nn.DataParallel):
self.model = self.model.module
self.model.use_gpu = False
self.model.update_use_gpu(self.use_gpu)
logging.info("Model %s loaded!" % model_path)
logging.info("Total trainable parameters: %d" % (get_trainable_param_num(self.model)))

Просмотреть файл

@ -6,7 +6,6 @@ import torch.nn as nn
from block_zoo import *
import copy
import logging
import ujson as json
from utils.exceptions import ConfigurationError, LayerUndefinedError, LayerConfigUndefinedError
from queue import Queue
from utils.common_utils import transform_tensors2params, transfer_to_gpu
@ -388,6 +387,18 @@ class Model(nn.Module):
def is_cuda(self):
return next(self.parameters()).data.is_cuda
def update_use_gpu(self, new_use_gpu):
self.use_gpu = new_use_gpu
for layer_id in self.layers.keys():
if isinstance(self.layers[layer_id], Embedding):
for input_cluster in self.layers[layer_id].embeddings:
if isinstance(self.layers[layer_id].embeddings[input_cluster], CNNCharEmbedding):
self.layers[layer_id].embeddings[input_cluster].layer_conf.use_gpu = new_use_gpu
elif isinstance(self.layers[layer_id], EncoderDecoder):
self.layers[layer_id].encoder.layer_conf.use_gpu = new_use_gpu
self.layers[layer_id].decoder.layer_conf.use_gpu = new_use_gpu
else:
self.layers[layer_id].layer_conf.use_gpu = new_use_gpu

Просмотреть файл

@ -2,7 +2,7 @@
# Licensed under the MIT license.
import codecs
import ujson as json
import json
import os
import tempfile
import random

Просмотреть файл

@ -25,7 +25,7 @@
## <span id="installation">Installation</span>
*Note: NeuronBlocks is based on Python 3.6*
*Note: NeuronBlocks is based on **Python 3.6***
1. Clone this project.
```bash
@ -37,7 +37,7 @@
pip install -r requirements.txt
```
3. Install PyTorch (*NeuronBlocks supports PyTorch version 0.4.1 currently*).
3. Install PyTorch (*NeuronBlocks supports **PyTorch 0.4.1** currently*).
For **Linux**, run the following command:
```bash
@ -50,15 +50,11 @@
## <span id="quick-start">Quick Start</span>
Get started by trying the given examples.
Get started by trying the given examples. For **Windows**, we suggest you to use PowerShell instead of CMD.
*Tips: in the following instruction, PROJECTROOT denotes the root directory of this project.*
```bash
# get GloVe pre-trained word vectors
cd PROJECT_ROOT/dataset
bash get_glove.sh
# train
cd PROJECT_ROOT
python train.py --conf_path=model_zoo/demo/conf.json
@ -207,6 +203,11 @@ A model visualizer is provided for visualization and configuration correctness c
In Model Zoo, we provide a suite of NLP models for common NLP tasks, in the form of JSON configuration files. You can pick one of existing models (JSON config files) in Model Zoo to start model training quickly, or build your own models by modifying the JSON config file to suit your specific task.
*Note: Before trying models in NLP tasks, please download [GloVe](https://nlp.stanford.edu/projects/glove/) firstly via following commands.*
```bash
cd PROJECT_ROOT/dataset
./get_glove.sh
```
### <span id="task-1">Task 1: Text Classification</span>

Просмотреть файл

@ -1,7 +1,6 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT license.
import ujson as json
import codecs
import logging

Просмотреть файл

@ -99,6 +99,8 @@ class CNNCharEmbedding(BaseLayer):
string_reshaped = string.view(string.size()[0], -1) #[batch_size, seq_len * char num in words]
char_embs_lookup = self.char_embeddings(string_reshaped).float() # [batch_size, seq_len * char num in words, embedding_dim]
if self.is_cuda():
if self.filters.device == torch.device('cpu'):
self.filters = self.filters.cuda()
char_embs_lookup = char_embs_lookup.cuda(device=self.filters.device)
char_embs_lookup = char_embs_lookup.view(-1, string.size()[2], self.layer_conf.embedding_matrix_dim) #[batch_size * seq_len, char num in words, embedding_dim]

Просмотреть файл

@ -9,13 +9,9 @@
"train_data_path": "./dataset/demo/train.tsv",
"valid_data_path": "./dataset/demo/valid.tsv",
"test_data_path": "./dataset/demo/test.tsv",
"predict_data_path": "./dataset/demo/predict.tsv",
"pre_trained_emb": "./dataset/GloVe/glove.840B.300d.txt"
"predict_data_path": "./dataset/demo/predict.tsv"
},
"file_with_col_header": false,
"pretrained_emb_type": "glove",
"pretrained_emb_binary_or_text": "text",
"involve_all_words_in_pretrained_emb": false,
"add_start_end_for_seq": true,
"file_header": {
"question_text": 0,

Просмотреть файл

@ -6,6 +6,8 @@ import numpy as np
from core.CellDict import CellDict
from tqdm import tqdm
from utils.corpus_utils import load_embedding
import nltk
nltk.download('punkt')
from utils.BPEEncoder import BPEEncoder
import codecs
from multiprocessing import cpu_count

Просмотреть файл

@ -3,7 +3,6 @@ six==1.11.0
nltk==3.4
gensim==3.2.0
tqdm==4.19.5
ujson==1.35
numpy==1.13.3
scikit_learn==0.19.2
PyYAML==3.13