423 строки
18 KiB
Python
423 строки
18 KiB
Python
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
# Licensed under the MIT license.
|
|
|
|
import torch
|
|
import torch.nn as nn
|
|
from block_zoo import *
|
|
import copy
|
|
import logging
|
|
from utils.exceptions import ConfigurationError, LayerUndefinedError, LayerConfigUndefinedError
|
|
from queue import Queue
|
|
from utils.common_utils import transform_tensors2params, transfer_to_gpu
|
|
|
|
from block_zoo.Embedding import *
|
|
|
|
EMBED_LAYER_NAME = 'Embedding'
|
|
EMBED_LAYER_ID = 'embedding'
|
|
|
|
|
|
def get_conf(layer_id, layer_name, input_layer_ids, all_layer_configs, model_input_ids, use_gpu,
|
|
conf_dict=None, shared_conf=None, succeed_embedding_flag=False, output_layer_flag=False,
|
|
target_num=None, fixed_lengths=None, target_dict=None):
|
|
""" get layer configuration
|
|
|
|
Args
|
|
layer_id: layer identifier
|
|
layer_name: name of layer such as BiLSTM
|
|
input_layer_ids (list): the inputs of current layer
|
|
all_layer_configs (dict): records the conf class of each layer.
|
|
model_input_ids (set): the inputs of the model, e.g. ['query', 'passage']
|
|
use_gpu:
|
|
conf_dict:
|
|
shared_conf: if fixed_lengths is not None, the output_dim of shared_conf should be corrected!
|
|
flag:
|
|
output_layer_flag:
|
|
target_num: used for inference the dimension of output space if someone declare a dimension of -1
|
|
fixed_lengths
|
|
Returns:
|
|
configuration class coresponds to the layer
|
|
|
|
"""
|
|
if shared_conf:
|
|
conf = copy.deepcopy(shared_conf)
|
|
else:
|
|
try:
|
|
conf_dict['use_gpu'] = use_gpu
|
|
|
|
# for Embedding layer, add weight_on_gpu parameters
|
|
if layer_id == EMBED_LAYER_ID:
|
|
conf_dict['weight_on_gpu'] = conf_dict['conf']['weight_on_gpu']
|
|
del conf_dict['conf']['weight_on_gpu']
|
|
|
|
# for classification tasks, we usually add a Linear layer to project the output to dimension of number of classes. If we don't know the #classes, we can use '-1' instead and we would calculate the number of classes from the corpus.
|
|
if layer_name == 'Linear':
|
|
if isinstance(conf_dict['hidden_dim'], list):
|
|
if conf_dict['hidden_dim'][-1] == -1:
|
|
assert output_layer_flag is True, "Only in the last layer, hidden_dim == -1 is allowed!"
|
|
assert target_num is not None, "Number of targets should be given!"
|
|
conf_dict['hidden_dim'][-1] = target_num
|
|
elif conf_dict['hidden_dim'][-1] == '#target#':
|
|
logging.info('#target# position will be replace by target num: %d' % target_num)
|
|
conf_dict['hidden_dim'][-1] = target_num
|
|
elif isinstance(conf_dict['hidden_dim'], int) and conf_dict['hidden_dim'] == -1:
|
|
assert output_layer_flag is True, "Only in the last layer, hidden_dim == -1 is allowed!"
|
|
assert target_num is not None, "Number of targets should be given!"
|
|
conf_dict['hidden_dim'] = target_num
|
|
elif isinstance(conf_dict['hidden_dim'], str) and conf_dict['hidden_dim'] == '#target#':
|
|
logging.info('#target# position will be replace by target num: %d' % target_num)
|
|
conf_dict['hidden_dim'] = target_num
|
|
# add some necessary attribute for CRF layer
|
|
if layer_name == 'CRF':
|
|
conf_dict['target_dict'] = target_dict
|
|
|
|
conf = eval(layer_name + "Conf")(**conf_dict)
|
|
except NameError as e:
|
|
raise LayerConfigUndefinedError("\"%sConf\" has not been defined" % layer_name)
|
|
|
|
# verify the rank consistence of joint layers
|
|
if layer_name == EMBED_LAYER_NAME:
|
|
# the embedding layer
|
|
pass
|
|
else:
|
|
# make sure all the inputs to current layer exist
|
|
for input_layer_id in input_layer_ids:
|
|
if not (input_layer_id in all_layer_configs or input_layer_id in model_input_ids):
|
|
raise ConfigurationError("The input %s of layer %s does not exist. Please define it before "
|
|
"defining layer %s!" % (input_layer_id, layer_id, layer_id))
|
|
|
|
former_output_ranks = [all_layer_configs[input_layer_id].output_rank if input_layer_id in all_layer_configs else all_layer_configs[EMBED_LAYER_ID].output_rank for input_layer_id in input_layer_ids]
|
|
# inference input_dim
|
|
conf.input_dims = [all_layer_configs[input_layer_id].output_dim if input_layer_id in all_layer_configs else all_layer_configs[EMBED_LAYER_ID].output_dim for input_layer_id in input_layer_ids]
|
|
|
|
# If the inputs come from embedding layer and fixed_lengths exist, set the length to input_dims
|
|
if len(input_layer_ids) == 1 and input_layer_ids[0] in model_input_ids and fixed_lengths:
|
|
conf.input_dims[0][1] = fixed_lengths[input_layer_ids[0]]
|
|
|
|
# check and verify input ranks
|
|
if conf.num_of_inputs > 0:
|
|
if conf.num_of_inputs != len(input_layer_ids):
|
|
raise ConfigurationError("%s only accept %d inputs but you feed %d inputs to it!" % \
|
|
(layer_name, conf.num_of_inputs, len(input_layer_ids)))
|
|
elif conf.num_of_inputs == -1:
|
|
conf.num_of_inputs = len(input_layer_ids)
|
|
if isinstance(conf.input_ranks, list):
|
|
conf.input_ranks = conf.input_ranks * conf.num_of_inputs
|
|
else:
|
|
logging.warning("[For developer of %s] The input_ranks attribute should be a list!" % (layer_name))
|
|
[conf.input_ranks] * conf.num_of_inputs
|
|
|
|
for input_rank, former_output_rank in zip(conf.input_ranks, former_output_ranks):
|
|
if input_rank != -1 and input_rank != former_output_rank:
|
|
raise ConfigurationError("Input ranks of %s are inconsistent with former layers" % layer_id)
|
|
conf.input_ranks = copy.deepcopy(former_output_ranks)
|
|
|
|
# inference and varification inside the layer
|
|
conf.inference() # update some attributes which relies on input dimension or something else
|
|
conf.verify() # verify if the configuration is legal
|
|
former_conf = None if len(all_layer_configs) == 0 else list(all_layer_configs.values())[-1]
|
|
conf.verify_former_block(former_conf) # check if has special attribute rely on former layer
|
|
|
|
logging.debug('Layer id: %s; name: %s; input_dims: %s; input_ranks: %s; output_dim: %s; output_rank: %s' % (layer_id, layer_name, conf.input_dims if layer_id != 'embedding' else 'None', conf.input_ranks, conf.output_dim, conf.output_rank))
|
|
|
|
return conf
|
|
|
|
|
|
def get_layer(layer_name, conf):
|
|
"""
|
|
|
|
Args:
|
|
layer_name:
|
|
conf: configuration class
|
|
|
|
Returns:
|
|
specific layer
|
|
|
|
"""
|
|
try:
|
|
layer = eval(layer_name)(conf)
|
|
except NameError as e:
|
|
raise Exception("%s; Layer \"%s\" has not been defined" % (str(e), layer_name))
|
|
return layer
|
|
|
|
|
|
class Model(nn.Module):
|
|
def __init__(self, conf, problem, vocab_info, use_gpu):
|
|
"""
|
|
|
|
Args:
|
|
inputs: ['string1', 'string2']
|
|
layer_archs: The layers must produce tensors with similar shapes. The layers may be nested.
|
|
[
|
|
{
|
|
'layer': Layer name,
|
|
'conf': {xxxx}
|
|
},
|
|
[
|
|
{
|
|
'layer': Layer name,
|
|
'conf': {},
|
|
},
|
|
{
|
|
'layer': Layer name,
|
|
'conf': {},
|
|
}
|
|
]
|
|
]
|
|
vocab_info:
|
|
{
|
|
'word': {
|
|
'vocab_size': xxx,
|
|
'init_weights': np matrix
|
|
}
|
|
'postag': {
|
|
'vocab_size': xxx,
|
|
'init_weights': None
|
|
}
|
|
}
|
|
"""
|
|
super(Model, self).__init__()
|
|
|
|
inputs = conf.object_inputs_names
|
|
layer_archs = conf.architecture
|
|
target_num = problem.output_target_num()
|
|
|
|
# correct the real fixed length if begin/end of sentence are added
|
|
if conf.fixed_lengths:
|
|
fixed_lengths_corrected = copy.deepcopy(conf.fixed_lengths)
|
|
for seq in fixed_lengths_corrected:
|
|
if problem.with_bos_eos:
|
|
fixed_lengths_corrected[seq] += 2
|
|
else:
|
|
fixed_lengths_corrected = None
|
|
|
|
self.use_gpu = use_gpu
|
|
|
|
all_layer_configs = dict()
|
|
self.layers = nn.ModuleDict()
|
|
self.layer_inputs = dict()
|
|
self.layer_dependencies = dict()
|
|
self.layer_dependencies[EMBED_LAYER_ID] = set()
|
|
# change output_layer_id to list for support multi_output
|
|
self.output_layer_id = []
|
|
|
|
for layer_index, layer_arch in enumerate(layer_archs):
|
|
output_layer_flag = True if 'output_layer_flag' in layer_arch and layer_arch['output_layer_flag'] is True else False
|
|
succeed_embedding_flag = True if layer_index > 0 and 'inputs' in layer_arch and \
|
|
[input in inputs for input in layer_arch['inputs']].count(True) == len(layer_arch['inputs']) else False
|
|
|
|
if output_layer_flag:
|
|
self.output_layer_id.append(layer_arch['layer_id'])
|
|
# if hasattr(self, 'output_layer_id'):
|
|
# raise ConfigurationError("There should be only one output!")
|
|
# else:
|
|
# self.output_layer_id = layer_arch['layer_id']
|
|
|
|
if layer_index == 0:
|
|
# embedding layer
|
|
emb_conf = copy.deepcopy(vocab_info)
|
|
for input_cluster in emb_conf:
|
|
emb_conf[input_cluster]['dim'] = layer_arch['conf'][input_cluster]['dim']
|
|
emb_conf[input_cluster]['fix_weight'] = layer_arch['conf'][input_cluster].get('fix_weight', False)
|
|
emb_conf['weight_on_gpu'] = layer_arch.get('weight_on_gpu', True)
|
|
|
|
all_layer_configs[EMBED_LAYER_ID] = get_conf(EMBED_LAYER_ID, layer_arch['layer'],
|
|
None, all_layer_configs, inputs, self.use_gpu, conf_dict={'conf': emb_conf},
|
|
shared_conf=None, succeed_embedding_flag=False, output_layer_flag=output_layer_flag,
|
|
target_num=target_num, fixed_lengths=fixed_lengths_corrected, target_dict=problem.output_dict)
|
|
self.add_layer(EMBED_LAYER_ID, get_layer(layer_arch['layer'], all_layer_configs[EMBED_LAYER_ID]))
|
|
else:
|
|
if layer_arch['layer'] in self.layers and not 'conf' in layer_arch:
|
|
# reuse formly defined layers (share the same parameters)
|
|
logging.debug("Layer id: %s; Sharing configuration with layer %s" % (layer_arch['layer_id'], layer_arch['layer']))
|
|
conf_dict = None
|
|
shared_conf = all_layer_configs[layer_arch['layer']]
|
|
else:
|
|
conf_dict = layer_arch['conf']
|
|
shared_conf = None
|
|
|
|
# if the layer is EncoderDecoder, inference the vocab size
|
|
if layer_arch['layer'] == 'EncoderDecoder':
|
|
layer_arch['conf']['decoder_conf']['decoder_vocab_size'] = target_num
|
|
all_layer_configs[layer_arch['layer_id']] = get_conf(layer_arch['layer_id'], layer_arch['layer'],
|
|
layer_arch['inputs'], all_layer_configs, inputs, self.use_gpu, conf_dict=conf_dict,
|
|
shared_conf=shared_conf, succeed_embedding_flag=succeed_embedding_flag,
|
|
output_layer_flag=output_layer_flag, target_num=target_num,
|
|
fixed_lengths=fixed_lengths_corrected, target_dict=problem.output_dict)
|
|
|
|
if layer_arch['layer'] in self.layers and not 'conf' in layer_arch:
|
|
self.add_layer(layer_arch['layer_id'], self.layers[layer_arch['layer']])
|
|
else:
|
|
self.add_layer(layer_arch['layer_id'], get_layer(layer_arch['layer'], all_layer_configs[layer_arch['layer_id']]))
|
|
|
|
self.layer_inputs[layer_arch['layer_id']] = layer_arch['inputs']
|
|
|
|
# register dependencies, except embeddings
|
|
cur_layer_depend = set()
|
|
for layer_depend_id in layer_arch['inputs']:
|
|
if not layer_depend_id in inputs:
|
|
cur_layer_depend.add(layer_depend_id)
|
|
self.add_dependency(layer_arch['layer_id'], cur_layer_depend)
|
|
|
|
logging.debug("Layer dependencies: %s" % repr(self.layer_dependencies))
|
|
|
|
if not hasattr(self, 'output_layer_id'):
|
|
raise ConfigurationError("Please define an output layer")
|
|
|
|
self.layer_topological_sequence = self.get_topological_sequence()
|
|
|
|
def add_layer(self, layer_id, layer):
|
|
""" register a layer
|
|
|
|
Args:
|
|
layer_id:
|
|
layer:
|
|
|
|
Returns:
|
|
|
|
"""
|
|
if layer_id in self.layers:
|
|
raise ConfigurationError("The layer id %s is not unique!")
|
|
else:
|
|
self.layers[layer_id] = layer
|
|
|
|
def add_dependency(self, layer_id, depend_layer_id):
|
|
""" add the layers have to be proceed before layer_id
|
|
|
|
Args:
|
|
layer_id:
|
|
depend_layer_id:
|
|
|
|
Returns:
|
|
|
|
"""
|
|
if not layer_id in self.layer_dependencies:
|
|
self.layer_dependencies[layer_id] = set()
|
|
|
|
if isinstance(depend_layer_id, int):
|
|
self.layer_dependencies[layer_id].add(depend_layer_id)
|
|
else:
|
|
self.layer_dependencies[layer_id] |= set(depend_layer_id)
|
|
|
|
def remove_dependency(self, depend_layer_id):
|
|
""" remove dependencies on layer_id
|
|
|
|
Args:
|
|
layer_id:
|
|
|
|
Returns:
|
|
|
|
"""
|
|
for layer_id in self.layer_dependencies:
|
|
self.layer_dependencies[layer_id].remove(depend_layer_id)
|
|
|
|
def get_topological_sequence(self):
|
|
""" get topological sequence of nodes in the model
|
|
|
|
Returns:
|
|
|
|
"""
|
|
total_layer_ids = Queue()
|
|
for layer_id in self.layers.keys():
|
|
if layer_id != EMBED_LAYER_ID:
|
|
total_layer_ids.put(layer_id)
|
|
|
|
topological_list = []
|
|
circular_cnt = 0 # used for checking if there is at least one legal topological sorting
|
|
while not total_layer_ids.empty():
|
|
layer_id = total_layer_ids.get()
|
|
if len(self.layer_dependencies[layer_id]) == 0:
|
|
for layer_id2 in self.layer_dependencies:
|
|
if layer_id in self.layer_dependencies[layer_id2]:
|
|
self.layer_dependencies[layer_id2].remove(layer_id)
|
|
circular_cnt = 0
|
|
topological_list.append(layer_id)
|
|
else:
|
|
total_layer_ids.put(layer_id)
|
|
circular_cnt += 1
|
|
if circular_cnt >= total_layer_ids.qsize():
|
|
rest_layers = []
|
|
while not total_layer_ids.empty():
|
|
rest_layers.append(total_layer_ids.get())
|
|
raise ConfigurationError("The model architecture is illegal because there is a circular dependency "
|
|
"or there are some isolated layers. The layers can not be resolved: [%s]" % (", ".join(rest_layers)))
|
|
|
|
logging.debug("Topological sequence of nodes: %s" % (",".join(topological_list)))
|
|
return topological_list
|
|
|
|
def forward(self, inputs_desc, length_desc, *param_list):
|
|
"""
|
|
|
|
Args:
|
|
with the help of transform_tensors2params(inputs_desc, length_desc, param_list), we can get the below inputs and lengths
|
|
|
|
inputs: dict.
|
|
{
|
|
"string1":{
|
|
'word': word ids, [batch size, seq len]
|
|
'postag': postag ids,[batch size, seq len]
|
|
...
|
|
}
|
|
"string2":{
|
|
'word': word ids,[batch size, seq len]
|
|
'postag': postag ids,[batch size, seq len]
|
|
...
|
|
}
|
|
}
|
|
lengths: dict.
|
|
{
|
|
"string1": [...]
|
|
"string2": [...]
|
|
}
|
|
|
|
Returns:
|
|
|
|
"""
|
|
inputs, lengths = transform_tensors2params(inputs_desc, length_desc, param_list)
|
|
|
|
representation = dict()
|
|
representation[EMBED_LAYER_ID] = dict()
|
|
repre_lengths = dict()
|
|
repre_lengths[EMBED_LAYER_ID] = dict()
|
|
|
|
for input in inputs:
|
|
representation[input] = self.layers[EMBED_LAYER_ID](inputs[input], use_gpu=self.is_cuda())
|
|
if self.use_gpu:
|
|
repre_lengths[input] = transfer_to_gpu(lengths[input])
|
|
else:
|
|
repre_lengths[input] = lengths[input]
|
|
|
|
for layer_id in self.layer_topological_sequence:
|
|
#logging.debug("To proces layer %s" % layer_id)
|
|
input_params = []
|
|
for input_layer_id in self.layer_inputs[layer_id]:
|
|
input_params.append(representation[input_layer_id])
|
|
input_params.append(repre_lengths[input_layer_id])
|
|
|
|
representation[layer_id], repre_lengths[layer_id] = self.layers[layer_id](*input_params)
|
|
#logging.debug("Layer %s processed. output size: %s" % (layer_id, representation[layer_id].size()))
|
|
|
|
# for support multi_output
|
|
representation_output = dict()
|
|
for single_output_layer_id in self.output_layer_id:
|
|
representation_output[single_output_layer_id] = representation[single_output_layer_id]
|
|
return representation_output
|
|
|
|
def is_cuda(self):
|
|
return list(self.parameters())[-1].data.is_cuda
|
|
|
|
def update_use_gpu(self, new_use_gpu):
|
|
self.use_gpu = new_use_gpu
|
|
for layer_id in self.layers.keys():
|
|
if isinstance(self.layers[layer_id], Embedding):
|
|
for input_cluster in self.layers[layer_id].embeddings:
|
|
if isinstance(self.layers[layer_id].embeddings[input_cluster], CNNCharEmbedding):
|
|
self.layers[layer_id].embeddings[input_cluster].layer_conf.use_gpu = new_use_gpu
|
|
elif isinstance(self.layers[layer_id], EncoderDecoder):
|
|
self.layers[layer_id].encoder.layer_conf.use_gpu = new_use_gpu
|
|
self.layers[layer_id].decoder.layer_conf.use_gpu = new_use_gpu
|
|
else:
|
|
self.layers[layer_id].layer_conf.use_gpu = new_use_gpu
|
|
|
|
|
|
|