diff --git a/.gitignore b/.gitignore
index d7627e4..1cb6c7c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,4 +5,5 @@
*.vs*
dataset/GloVe/
dataset/20_newsgroups/
+dataset/SST-2/
models/
diff --git a/Contributing.md b/Contributing.md
index a41832e..8ffed86 100644
--- a/Contributing.md
+++ b/Contributing.md
@@ -18,6 +18,64 @@ contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additio
- Contribute Model to **Model Zoo**
- We encourage everyone to contribute their NLP models (namely JSON configuration files). Please follow the structure in model_zoo to create a pull request.
- Contribute Block to **Block Zoo**
- - We encourage everyone to improve this toolkit by contributing code, such as customized Blocks. So other users can further benefit from these new Blocks.
+
+ We encourage everyone to improve this toolkit by contributing code, such as customized Blocks. So other users can further benefit from these new Blocks.
+
+ For adding a new block to NeuronBlocks, you need *Three steps*(take [BiLSTM block](https://github.com/microsoft/NeuronBlocks/blob/master/block_zoo/BiLSTM.py) for example):
+ - Define the new block's Configuration class(BiLSTMConf class in BiLSTM block). The Configuration class should inheritance [Base Configuration Class](https://github.com/microsoft/NeuronBlocks/blob/master/block_zoo/BaseLayer.py) that define some necessary functions, and rewrite these functions.
+ We will give the details of these functions:
+ ```bash
+ def default():
+ '''
+ Define the default hyper parameters for block, it will read the corresponding block hyper parameters in configuration json files firstly.
+ '''
+
+ def declare():
+ '''
+ Define things like "input_ranks" and "num_of_inputs", which are certain with regard to the block.
+ num_of_input is N(N>0) means this layer accepts N inputs;
+ num_of_input is -1 means this layer accepts any number of inputs;
+
+ The rank here is not the same as matrix rank:
+ For a scalar, its rank is 0;
+ For a vector, its rank is 1;
+ For a matrix, its rank is 2;
+ For a cube of numbers, its rank is 3.
+
+ if num_of_input > 0:
+ len(input_ranks) should be equal to num_of_input
+ elif num_of_input == -1:
+ input_ranks should be a list with only one element and the rank of all the inputs should be equal to that element.
+ '''
+
+ def inference():
+ '''
+ Inference things like output_dim, which may relies on defined hyper parameter or the block special operation.
+ '''
+
+ def verify():
+ '''
+ Define some necessary varification for your layer when we define the model.
+ '''
+ ```
+ - Implement the new block's class(BiLSTM class in BiLSTM block). The block class should inheritance [Base Block Class](https://github.com/microsoft/NeuronBlocks/blob/master/block_zoo/BaseLayer.py) and rewrite __init__ and forward function.
+ ```bash
+ def __init__():
+ '''
+ Define necessary attributions that would be used in block operation logic.
+ '''
+
+ def forward():
+ '''
+ Tensor operation logic.
+ '''
+ ```
+ - Register the new block in block_zoo.
+ NeuronBlocks provides a script that can register new block automatically, and blocks contributors just focus on block logic.
+ *Tips: PROJECTROOT denotes the root directory of this project.*
+ ```bash
+ cd PROJECT_ROOT
+ python register_block.py --block_name=new_block_name
+ ```
*Tips: Before you contribute your code, we strongly suggest to verify that your improvements are valid by **[AUTOTEST](./autotest)**. We also encourage everyone to improve this autotest tool by contributing code, such as adding test tasks.*
diff --git a/LearningMachine.py b/LearningMachine.py
index 2cfd9c4..4b00240 100644
--- a/LearningMachine.py
+++ b/LearningMachine.py
@@ -13,7 +13,7 @@ import codecs
import pickle as pkl
from utils.common_utils import dump_to_pkl, load_from_pkl, get_param_num, get_trainable_param_num, \
- transfer_to_gpu, transform_params2tensors
+ transfer_to_gpu, transform_params2tensors, get_layer_class, load_from_json, dump_to_json
from utils.philly_utils import HDFSDirectTransferer, open_and_move, convert_to_tmppath, \
convert_to_hdfspath, move_from_local_to_hdfs
from Model import Model
@@ -22,8 +22,10 @@ from metrics.Evaluator import Evaluator
from utils.corpus_utils import get_batches
from core.StreamingRecorder import StreamingRecorder
from core.LRScheduler import LRScheduler
-from settings import ProblemTypes
+from settings import ProblemTypes, Setting as st
from block_zoo import Linear
+from block_zoo import CRF
+from losses.CRFLoss import CRFLoss
class LearningMachine(object):
@@ -40,6 +42,7 @@ class LearningMachine(object):
device = 'GPU' if 'cuda' in emb_weight_device else 'CPU'
logging.info(
"The embedding matrix is on %s now, you can modify the weight_on_gpu parameter to change embeddings weight device." % device)
+ logging.info("="*100 + '\n' + "*"*15 + "Model Achitecture" + "*"*15)
logging.info(self.model)
#logging.info("Total parameters: %d; trainable parameters: %d" % (get_param_num(self.model), get_trainable_param_num(self.model)))
logging.info("Total trainable parameters: %d" % (get_trainable_param_num(self.model)))
@@ -89,33 +92,18 @@ class LearningMachine(object):
def train(self, optimizer, loss_fn):
self.model.train()
- if not self.conf.train_data_path.endswith('.pkl'):
- train_data, train_length, train_target = self.problem.encode(self.conf.train_data_path, self.conf.file_columns,
- self.conf.input_types, self.conf.file_with_col_header, self.conf.object_inputs, self.conf.answer_column_name, max_lengths=self.conf.max_lengths,
- min_sentence_len = self.conf.min_sentence_len, extra_feature=self.conf.extra_feature,fixed_lengths=self.conf.fixed_lengths, file_format='tsv',
- show_progress=True if self.conf.mode == 'normal' else False, cpu_num_workers=self.conf.cpu_num_workers)
- else:
- train_pkl_data = load_from_pkl(self.conf.train_data_path)
- train_data, train_length, train_target = train_pkl_data['data'], train_pkl_data['length'], train_pkl_data['target']
+ logging.info("="*100 + '\n' + "*"*15 + 'Prepare data for training' + "*"*15)
- if not self.conf.valid_data_path.endswith('.pkl'):
- valid_data, valid_length, valid_target = self.problem.encode(self.conf.valid_data_path, self.conf.file_columns,
- self.conf.input_types, self.conf.file_with_col_header, self.conf.object_inputs, self.conf.answer_column_name, max_lengths=self.conf.max_lengths,
- min_sentence_len = self.conf.min_sentence_len, extra_feature = self.conf.extra_feature,fixed_lengths=self.conf.fixed_lengths, file_format='tsv',
- show_progress=True if self.conf.mode == 'normal' else False, cpu_num_workers=self.conf.cpu_num_workers)
- else:
- valid_pkl_data = load_from_pkl(self.conf.valid_data_path)
- valid_data, valid_length, valid_target = valid_pkl_data['data'], valid_pkl_data['length'], valid_pkl_data['target']
+ valid_data, valid_length, valid_target = self.problem.encode(self.conf.valid_data_path, self.conf.file_columns,
+ self.conf.input_types, self.conf.file_with_col_header, self.conf.object_inputs, self.conf.answer_column_name, max_lengths=self.conf.max_lengths,
+ min_sentence_len = self.conf.min_sentence_len, extra_feature = self.conf.extra_feature,fixed_lengths=self.conf.fixed_lengths, file_format='tsv',
+ show_progress=True if self.conf.mode == 'normal' else False, cpu_num_workers=self.conf.cpu_num_workers, chunk_size=self.conf.chunk_size)
if self.conf.test_data_path is not None:
- if not self.conf.test_data_path.endswith('.pkl'):
- test_data, test_length, test_target = self.problem.encode(self.conf.test_data_path, self.conf.file_columns, self.conf.input_types,
- self.conf.file_with_col_header, self.conf.object_inputs, self.conf.answer_column_name, max_lengths=self.conf.max_lengths,
- min_sentence_len = self.conf.min_sentence_len, extra_feature = self.conf.extra_feature,fixed_lengths=self.conf.fixed_lengths,
- file_format='tsv', show_progress=True if self.conf.mode == 'normal' else False, cpu_num_workers=self.conf.cpu_num_workers)
- else:
- test_pkl_data = load_from_pkl(self.conf.test_data_path)
- test_data, test_length, test_target = test_pkl_data['data'], test_pkl_data['length'], test_pkl_data['target']
+ test_data, test_length, test_target = self.problem.encode(self.conf.test_data_path, self.conf.file_columns,
+ self.conf.input_types, self.conf.file_with_col_header, self.conf.object_inputs, self.conf.answer_column_name, max_lengths=self.conf.max_lengths,
+ min_sentence_len = self.conf.min_sentence_len, extra_feature = self.conf.extra_feature,fixed_lengths=self.conf.fixed_lengths, file_format='tsv',
+ show_progress=True if self.conf.mode == 'normal' else False, cpu_num_workers=self.conf.cpu_num_workers, chunk_size=self.conf.chunk_size)
stop_training = False
epoch = 1
@@ -132,196 +120,219 @@ class LearningMachine(object):
elif ProblemTypes[self.problem.problem_type] == ProblemTypes.mrc:
streaming_recoder = StreamingRecorder(['prediction', 'answer_text'])
+ logging.info("=" * 100 + '\n' + "*" * 15 + 'Start training' + "*" * 15)
while not stop_training and epoch <= self.conf.max_epoch:
logging.info('Training: Epoch ' + str(epoch))
+ train_data_generator = self._get_training_data_generator()
+ part_index = 1
+ for train_data, train_length, train_target in train_data_generator:
+ logging.debug('Training: Epoch %s Part %s'%(epoch, part_index))
+ part_index += 1
+ data_batches, length_batches, target_batches = \
+ get_batches(self.problem, train_data, train_length, train_target, self.conf.batch_size_total,
+ self.conf.input_types, None, permutate=True, transform_tensor=True)
- data_batches, length_batches, target_batches = \
- get_batches(self.problem, train_data, train_length, train_target, self.conf.batch_size_total,
- self.conf.input_types, None, permutate=True, transform_tensor=True)
-
- whole_batch_num = len(target_batches)
- valid_batch_num = max(len(target_batches) // self.conf.valid_times_per_epoch, 1)
- if torch.cuda.device_count() > 1:
- small_batch_num = whole_batch_num * torch.cuda.device_count() # total batch num over all the gpus
- valid_batch_num_show = valid_batch_num * torch.cuda.device_count() # total batch num over all the gpus to do validation
- else:
+ whole_batch_num = len(target_batches)
+ valid_batch_num = min(self.conf.steps_per_validation, whole_batch_num)
small_batch_num = whole_batch_num
valid_batch_num_show = valid_batch_num
+ batch_num_to_show_results = self.conf.batch_num_to_show_results
+ if torch.cuda.device_count() > 1:
+ batch_num_to_show_results *= torch.cuda.device_count() # total batch num overall all the gpus to log
+ small_batch_num *= torch.cuda.device_count() # total batch num over all the gpus
+ valid_batch_num_show *= torch.cuda.device_count() # total batch num over all the gpus to do validation
+
+ streaming_recoder.clear_records()
+ all_costs = []
- streaming_recoder.clear_records()
- all_costs = []
+ logging.info('There are %d batches during current period; validation are conducted every %d batch' % (small_batch_num, valid_batch_num_show))
- logging.info('There are %d batches during an epoch; validation are conducted every %d batch' % (small_batch_num, valid_batch_num_show))
+ if self.conf.mode == 'normal':
+ progress = tqdm(range(len(target_batches)))
+ elif self.conf.mode == 'philly':
+ progress = range(len(target_batches))
+ for i in progress:
+ # the result shape: for classification: [batch_size, # of classes]; for sequence tagging: [batch_size, seq_len, # of tags]
+ param_list, inputs_desc, length_desc = transform_params2tensors(data_batches[i], length_batches[i])
+ logits = self.model(inputs_desc, length_desc, *param_list)
- if self.conf.mode == 'normal':
- progress = tqdm(range(len(target_batches)))
- elif self.conf.mode == 'philly':
- progress = range(len(target_batches))
- for i in progress:
- # the result shape: for classification: [batch_size, # of classes]; for sequence tagging: [batch_size, seq_len, # of tags]
- param_list, inputs_desc, length_desc = transform_params2tensors(data_batches[i], length_batches[i])
- logits = self.model(inputs_desc, length_desc, *param_list)
-
- logits_softmax = {}
- if isinstance(self.model, nn.DataParallel):
- for tmp_output_layer_id in self.model.module.output_layer_id:
- if isinstance(self.model.module.layers[tmp_output_layer_id], Linear) and \
- (not self.model.module.layers[tmp_output_layer_id].layer_conf.last_hidden_softmax):
- logits_softmax[tmp_output_layer_id] = nn.functional.softmax(
- logits[tmp_output_layer_id], dim=-1)
- else:
- logits_softmax[tmp_output_layer_id] = logits[tmp_output_layer_id]
- else:
- for tmp_output_layer_id in self.model.output_layer_id:
- if isinstance(self.model.layers[tmp_output_layer_id], Linear) and \
- (not self.model.layers[tmp_output_layer_id].layer_conf.last_hidden_softmax):
- logits_softmax[tmp_output_layer_id] = nn.functional.softmax(
- logits[tmp_output_layer_id], dim=-1)
- else:
- logits_softmax[tmp_output_layer_id] = logits[tmp_output_layer_id]
-
- # check the output
- if ProblemTypes[self.problem.problem_type] == ProblemTypes.classification:
- logits = list(logits.values())[0]
- logits_softmax = list(logits_softmax.values())[0]
- assert len(logits_softmax.shape) == 2, 'The dimension of your output is %s, but we need [batch_size*GPUs, class num]' % (str(list(logits_softmax.shape)))
- assert logits_softmax.shape[1] == self.problem.output_target_num(), 'The dimension of your output layer %d is inconsistent with your type number %d!' % (logits_softmax.shape[1], self.problem.output_target_num())
- # for auc metric
- prediction_scores = logits_softmax[:, self.conf.pos_label].cpu().data.numpy()
- if self.evaluator.has_auc_type_specific:
- prediction_scores_all = logits_softmax.cpu().data.numpy()
- else:
- prediction_scores_all = None
- elif ProblemTypes[self.problem.problem_type] == ProblemTypes.sequence_tagging:
- logits = list(logits.values())[0]
- logits_softmax = list(logits_softmax.values())[0]
- assert len(logits_softmax.shape) == 3, 'The dimension of your output is %s, but we need [batch_size*GPUs, sequence length, representation dim]' % (str(list(logits_softmax.shape)), )
- prediction_scores = None
- prediction_scores_all = None
- elif ProblemTypes[self.problem.problem_type] == ProblemTypes.regression:
- logits = list(logits.values())[0]
- logits_softmax = list(logits_softmax.values())[0]
- assert len(logits_softmax.shape) == 2 and logits_softmax.shape[1] == 1, 'The dimension of your output is %s, but we need [batch_size*GPUs, 1]' % (str(list(logits_softmax.shape)))
- prediction_scores = None
- prediction_scores_all = None
- elif ProblemTypes[self.problem.problem_type] == ProblemTypes.mrc:
- for single_value in logits_softmax.values():
- assert len(single_value.shape) == 3, 'The dimension of your output is %s, but we need [batch_size*GPUs, sequence_len, 1]' % (str(list(single_value.shape)))
- prediction_scores = None
- prediction_scores_all = None
-
- logits_flat = dict()
- if ProblemTypes[self.problem.problem_type] == ProblemTypes.sequence_tagging:
- # Transform output shapes for metric evaluation
- # for seq_tag_f1 metric
- prediction_indices = logits_softmax.data.max(2)[1].cpu().numpy() # [batch_size, seq_len]
- streaming_recoder.record_one_row([self.problem.decode(prediction_indices, length_batches[i]['target'][self.conf.answer_column_name[0]].numpy()),
- prediction_scores, self.problem.decode(target_batches[i][self.conf.answer_column_name[0]],
- length_batches[i]['target'][self.conf.answer_column_name[0]].numpy())], keep_dim=False)
-
- # pytorch's CrossEntropyLoss only support this
- logits_flat[self.conf.output_layer_id[0]] = logits.view(-1, logits.size(2)) # [batch_size * seq_len, # of tags]
- #target_batches[i] = target_batches[i].view(-1) # [batch_size * seq_len]
- # [batch_size * seq_len]
- target_batches[i][self.conf.answer_column_name[0]] = target_batches[i][self.conf.answer_column_name[0]].reshape(-1)
-
- elif ProblemTypes[self.problem.problem_type] == ProblemTypes.classification:
- prediction_indices = logits_softmax.detach().max(1)[1].cpu().numpy()
- # Should not decode!
- streaming_recoder.record_one_row([prediction_indices, prediction_scores, prediction_scores_all, target_batches[i][self.conf.answer_column_name[0]].numpy()])
- logits_flat[self.conf.output_layer_id[0]] = logits
- elif ProblemTypes[self.problem.problem_type] == ProblemTypes.regression:
- temp_logits_flat = logits.squeeze(1)
- prediction_scores = temp_logits_flat.detach().cpu().numpy()
- streaming_recoder.record_one_row([prediction_scores, target_batches[i][self.conf.answer_column_name[0]].numpy()])
- logits_flat[self.conf.output_layer_id[0]] = temp_logits_flat
- elif ProblemTypes[self.problem.problem_type] == ProblemTypes.mrc:
- for key, value in logits.items():
- logits[key] = value.squeeze()
- for key, value in logits_softmax.items():
- logits_softmax[key] = value.squeeze()
- passage_identify = None
- for type_key in data_batches[i].keys():
- if 'p' in type_key.lower():
- passage_identify = type_key
- break
- if not passage_identify:
- raise Exception('MRC task need passage information.')
- prediction = self.problem.decode(logits_softmax, lengths=length_batches[i][passage_identify],
- batch_data=data_batches[i][passage_identify])
- logits_flat = logits
- mrc_answer_target = None
- for single_target in target_batches[i]:
- if isinstance(target_batches[i][single_target][0], str):
- mrc_answer_target = target_batches[i][single_target]
- streaming_recoder.record_one_row([prediction, mrc_answer_target])
-
- if self.use_gpu:
- for single_target in self.conf.answer_column_name:
- if isinstance(target_batches[i][single_target], torch.Tensor):
- target_batches[i][single_target] = transfer_to_gpu(target_batches[i][single_target])
- loss = loss_fn(logits_flat, target_batches[i])
-
- all_costs.append(loss.item())
- optimizer.zero_grad()
- loss.backward()
- if self.conf.clip_grad_norm_max_norm != -1:
- torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.conf.clip_grad_norm_max_norm)
+ logits_softmax = {}
if isinstance(self.model, nn.DataParallel):
- torch.nn.utils.clip_grad_norm_(self.model.module.layers['embedding'].get_parameters(), self.conf.clip_grad_norm_max_norm)
+ for tmp_output_layer_id in self.model.module.output_layer_id:
+ if isinstance(self.model.module.layers[tmp_output_layer_id], Linear) and \
+ (not self.model.module.layers[tmp_output_layer_id].layer_conf.last_hidden_softmax):
+ logits_softmax[tmp_output_layer_id] = nn.functional.softmax(
+ logits[tmp_output_layer_id], dim=-1)
+ elif isinstance(get_layer_class(self.model, tmp_output_layer_id), CRF):
+ pass
+ else:
+ logits_softmax[tmp_output_layer_id] = logits[tmp_output_layer_id]
else:
- torch.nn.utils.clip_grad_norm_(self.model.layers['embedding'].get_parameters(), self.conf.clip_grad_norm_max_norm)
- optimizer.step()
+ for tmp_output_layer_id in self.model.output_layer_id:
+ if isinstance(self.model.layers[tmp_output_layer_id], Linear) and \
+ (not self.model.layers[tmp_output_layer_id].layer_conf.last_hidden_softmax):
+ logits_softmax[tmp_output_layer_id] = nn.functional.softmax(
+ logits[tmp_output_layer_id], dim=-1)
+ elif isinstance(get_layer_class(self.model, tmp_output_layer_id), CRF):
+ pass
+ else:
+ logits_softmax[tmp_output_layer_id] = logits[tmp_output_layer_id]
- del loss, logits, logits_softmax, logits_flat
- del prediction_scores
- if ProblemTypes[self.problem.problem_type] == ProblemTypes.sequence_tagging \
- or ProblemTypes[self.problem.problem_type] == ProblemTypes.classification:
- del prediction_indices
-
- if show_result_cnt == self.conf.batch_num_to_show_results:
+ # check the output
if ProblemTypes[self.problem.problem_type] == ProblemTypes.classification:
- result = self.evaluator.evaluate(streaming_recoder.get('target'),
- streaming_recoder.get('prediction'), y_pred_pos_score=streaming_recoder.get('pred_scores'),
- y_pred_scores_all=streaming_recoder.get('pred_scores_all'), formatting=True)
+ logits = list(logits.values())[0]
+ logits_softmax = list(logits_softmax.values())[0]
+ assert len(logits_softmax.shape) == 2, 'The dimension of your output is %s, but we need [batch_size*GPUs, class num]' % (str(list(logits_softmax.shape)))
+ assert logits_softmax.shape[1] == self.problem.output_target_num(), 'The dimension of your output layer %d is inconsistent with your type number %d!' % (logits_softmax.shape[1], self.problem.output_target_num())
+ # for auc metric
+ prediction_scores = logits_softmax[:, self.conf.pos_label].cpu().data.numpy()
+ if self.evaluator.has_auc_type_specific:
+ prediction_scores_all = logits_softmax.cpu().data.numpy()
+ else:
+ prediction_scores_all = None
elif ProblemTypes[self.problem.problem_type] == ProblemTypes.sequence_tagging:
- result = self.evaluator.evaluate(streaming_recoder.get('target'),
- streaming_recoder.get('prediction'), y_pred_pos_score=streaming_recoder.get('pred_scores'),
- formatting=True)
+ logits = list(logits.values())[0]
+ if not isinstance(get_layer_class(self.model, tmp_output_layer_id), CRF):
+ logits_softmax = list(logits_softmax.values())[0]
+ assert len(logits_softmax.shape) == 3, 'The dimension of your output is %s, but we need [batch_size*GPUs, sequence length, representation dim]' % (str(list(logits_softmax.shape)), )
+ prediction_scores = None
+ prediction_scores_all = None
elif ProblemTypes[self.problem.problem_type] == ProblemTypes.regression:
- result = self.evaluator.evaluate(streaming_recoder.get('target'),
- streaming_recoder.get('prediction'), y_pred_pos_score=None, y_pred_scores_all=None, formatting=True)
+ logits = list(logits.values())[0]
+ logits_softmax = list(logits_softmax.values())[0]
+ assert len(logits_softmax.shape) == 2 and logits_softmax.shape[1] == 1, 'The dimension of your output is %s, but we need [batch_size*GPUs, 1]' % (str(list(logits_softmax.shape)))
+ prediction_scores = None
+ prediction_scores_all = None
elif ProblemTypes[self.problem.problem_type] == ProblemTypes.mrc:
- result = self.evaluator.evaluate(streaming_recoder.get('answer_text'), streaming_recoder.get('prediction'),
- y_pred_pos_score=None, y_pred_scores_all=None, formatting=True)
+ for single_value in logits_softmax.values():
+ assert len(single_value.shape) == 3, 'The dimension of your output is %s, but we need [batch_size*GPUs, sequence_len, 1]' % (str(list(single_value.shape)))
+ prediction_scores = None
+ prediction_scores_all = None
- if torch.cuda.device_count() > 1:
- logging.info("Epoch %d batch idx: %d; lr: %f; since last log, loss=%f; %s" % \
- (epoch, i * torch.cuda.device_count(), lr_scheduler.get_lr(), np.mean(all_costs), result))
+ logits_flat = dict()
+ if ProblemTypes[self.problem.problem_type] == ProblemTypes.sequence_tagging:
+ # Transform output shapes for metric evaluation
+ # for seq_tag_f1 metric
+ if isinstance(get_layer_class(self.model, tmp_output_layer_id), CRF):
+ forward_score, scores, masks, tag_seq, transitions, layer_conf = logits
+ prediction_indices = tag_seq.cpu().numpy()
+ streaming_recoder.record_one_row([self.problem.decode(prediction_indices, length_batches[i]['target'][self.conf.answer_column_name[0]].numpy()),
+ prediction_scores, self.problem.decode(
+ target_batches[i][self.conf.answer_column_name[0]],
+ length_batches[i]['target'][self.conf.answer_column_name[0]].numpy())], keep_dim=False)
+
+ else:
+ prediction_indices = logits_softmax.data.max(2)[1].cpu().numpy() # [batch_size, seq_len]
+ # pytorch's CrossEntropyLoss only support this
+ logits_flat[self.conf.output_layer_id[0]] = logits.view(-1, logits.size(2)) # [batch_size * seq_len, # of tags]
+ streaming_recoder.record_one_row([self.problem.decode(prediction_indices, length_batches[i]['target'][self.conf.answer_column_name[0]].numpy()),
+ prediction_scores, self.problem.decode(
+ target_batches[i][self.conf.answer_column_name[0]],
+ length_batches[i]['target'][self.conf.answer_column_name[0]].numpy())], keep_dim=False)
+
+ target_batches[i][self.conf.answer_column_name[0]] = target_batches[i][
+ self.conf.answer_column_name[0]].reshape(-1)
+
+ elif ProblemTypes[self.problem.problem_type] == ProblemTypes.classification:
+ prediction_indices = logits_softmax.detach().max(1)[1].cpu().numpy()
+ # Should not decode!
+ streaming_recoder.record_one_row([prediction_indices, prediction_scores, prediction_scores_all, target_batches[i][self.conf.answer_column_name[0]].numpy()])
+ logits_flat[self.conf.output_layer_id[0]] = logits
+ elif ProblemTypes[self.problem.problem_type] == ProblemTypes.regression:
+ temp_logits_flat = logits.squeeze(1)
+ prediction_scores = temp_logits_flat.detach().cpu().numpy()
+ streaming_recoder.record_one_row([prediction_scores, target_batches[i][self.conf.answer_column_name[0]].numpy()])
+ logits_flat[self.conf.output_layer_id[0]] = temp_logits_flat
+ elif ProblemTypes[self.problem.problem_type] == ProblemTypes.mrc:
+ for key, value in logits.items():
+ logits[key] = value.squeeze()
+ for key, value in logits_softmax.items():
+ logits_softmax[key] = value.squeeze()
+ passage_identify = None
+ for type_key in data_batches[i].keys():
+ if 'p' in type_key.lower():
+ passage_identify = type_key
+ break
+ if not passage_identify:
+ raise Exception('MRC task need passage information.')
+ prediction = self.problem.decode(logits_softmax, lengths=length_batches[i][passage_identify],
+ batch_data=data_batches[i][passage_identify])
+ logits_flat = logits
+ mrc_answer_target = None
+ for single_target in target_batches[i]:
+ if isinstance(target_batches[i][single_target][0], str):
+ mrc_answer_target = target_batches[i][single_target]
+ streaming_recoder.record_one_row([prediction, mrc_answer_target])
+
+ if self.use_gpu:
+ for single_target in self.conf.answer_column_name:
+ if isinstance(target_batches[i][single_target], torch.Tensor):
+ target_batches[i][single_target] = transfer_to_gpu(target_batches[i][single_target])
+ if isinstance(loss_fn.loss_fn[0], CRFLoss):
+ loss = loss_fn.loss_fn[0](forward_score, scores, masks, list(target_batches[i].values())[0], transitions, layer_conf)
else:
- logging.info("Epoch %d batch idx: %d; lr: %f; since last log, loss=%f; %s" % \
- (epoch, i, lr_scheduler.get_lr(), np.mean(all_costs), result))
- show_result_cnt = 0
- # The loss and other metrics printed during a training epoch are just the result of part of the training data.
- all_costs = []
- streaming_recoder.clear_records()
+ loss = loss_fn(logits_flat, target_batches[i])
- if (i != 0 and i % valid_batch_num == 0) or i == len(target_batches) - 1:
- torch.cuda.empty_cache() # actually useless
- logging.info('Valid & Test : Epoch ' + str(epoch))
- new_result = self.evaluate(valid_data, valid_length, valid_target,
- self.conf.input_types, self.evaluator, loss_fn, pad_ids=None, cur_best_result=best_result,
- model_save_path=self.conf.model_save_path, phase="valid", epoch=epoch)
- renew_flag = best_result != new_result
- best_result = new_result
+ all_costs.append(loss.item())
+ optimizer.zero_grad()
+ loss.backward()
+ if self.conf.clip_grad_norm_max_norm != -1:
+ torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.conf.clip_grad_norm_max_norm)
+ if isinstance(self.model, nn.DataParallel):
+ torch.nn.utils.clip_grad_norm_(self.model.module.layers['embedding'].get_parameters(), self.conf.clip_grad_norm_max_norm)
+ else:
+ torch.nn.utils.clip_grad_norm_(self.model.layers['embedding'].get_parameters(), self.conf.clip_grad_norm_max_norm)
+ optimizer.step()
- if renew_flag and self.conf.test_data_path is not None:
- self.evaluate(test_data, test_length, test_target,
- self.conf.input_types, self.evaluator, loss_fn, pad_ids=None, phase="test", epoch=epoch)
- self.model.train()
- show_result_cnt += 1
+ del loss, logits, logits_softmax, logits_flat
+ del prediction_scores
+ if ProblemTypes[self.problem.problem_type] == ProblemTypes.sequence_tagging \
+ or ProblemTypes[self.problem.problem_type] == ProblemTypes.classification:
+ del prediction_indices
- del data_batches, length_batches, target_batches
+ if show_result_cnt == batch_num_to_show_results:
+ if ProblemTypes[self.problem.problem_type] == ProblemTypes.classification:
+ result = self.evaluator.evaluate(streaming_recoder.get('target'),
+ streaming_recoder.get('prediction'), y_pred_pos_score=streaming_recoder.get('pred_scores'),
+ y_pred_scores_all=streaming_recoder.get('pred_scores_all'), formatting=True)
+ elif ProblemTypes[self.problem.problem_type] == ProblemTypes.sequence_tagging:
+ result = self.evaluator.evaluate(streaming_recoder.get('target'),
+ streaming_recoder.get('prediction'), y_pred_pos_score=streaming_recoder.get('pred_scores'),
+ formatting=True)
+ elif ProblemTypes[self.problem.problem_type] == ProblemTypes.regression:
+ result = self.evaluator.evaluate(streaming_recoder.get('target'),
+ streaming_recoder.get('prediction'), y_pred_pos_score=None, y_pred_scores_all=None, formatting=True)
+ elif ProblemTypes[self.problem.problem_type] == ProblemTypes.mrc:
+ result = self.evaluator.evaluate(streaming_recoder.get('answer_text'), streaming_recoder.get('prediction'),
+ y_pred_pos_score=None, y_pred_scores_all=None, formatting=True)
+
+ if torch.cuda.device_count() > 1:
+ logging.info("Epoch %d batch idx: %d; lr: %f; since last log, loss=%f; %s" % \
+ (epoch, i * torch.cuda.device_count(), lr_scheduler.get_lr(), np.mean(all_costs), result))
+ else:
+ logging.info("Epoch %d batch idx: %d; lr: %f; since last log, loss=%f; %s" % \
+ (epoch, i, lr_scheduler.get_lr(), np.mean(all_costs), result))
+ show_result_cnt = 0
+ # The loss and other metrics printed during a training epoch are just the result of part of the training data.
+ all_costs = []
+ streaming_recoder.clear_records()
+
+ if (i != 0 and i % valid_batch_num == 0) or i == len(target_batches) - 1:
+ torch.cuda.empty_cache() # actually useless
+ logging.info('Valid & Test : Epoch ' + str(epoch))
+ new_result = self.evaluate(valid_data, valid_length, valid_target,
+ self.conf.input_types, self.evaluator, loss_fn, pad_ids=None, cur_best_result=best_result,
+ model_save_path=self.conf.model_save_path, phase="valid", epoch=epoch)
+ renew_flag = best_result != new_result
+ best_result = new_result
+
+ if renew_flag and self.conf.test_data_path is not None:
+ self.evaluate(test_data, test_length, test_target,
+ self.conf.input_types, self.evaluator, loss_fn, pad_ids=None, phase="test", epoch=epoch)
+ self.model.train()
+ show_result_cnt += 1
+
+ del data_batches, length_batches, target_batches
lr_scheduler.step()
epoch += 1
@@ -334,7 +345,7 @@ class LearningMachine(object):
test_data, test_length, test_target = self.problem.encode(test_data_path, self.conf.file_columns, self.conf.input_types,
self.conf.file_with_col_header, self.conf.object_inputs, self.conf.answer_column_name, max_lengths=self.conf.max_lengths,
min_sentence_len = self.conf.min_sentence_len, extra_feature = self.conf.extra_feature,fixed_lengths=self.conf.fixed_lengths, file_format='tsv',
- show_progress=True if self.conf.mode == 'normal' else False, cpu_num_workers=self.conf.cpu_num_workers)
+ show_progress=True if self.conf.mode == 'normal' else False, cpu_num_workers=self.conf.cpu_num_workers, chunk_size=self.conf.chunk_size)
else:
test_pkl_data = load_from_pkl(test_data_path)
test_data, test_length, test_target = test_pkl_data['data'], test_pkl_data['length'], test_pkl_data['target']
@@ -472,18 +483,29 @@ class LearningMachine(object):
logits_flat = {}
if ProblemTypes[self.problem.problem_type] == ProblemTypes.sequence_tagging:
logits = list(logits.values())[0]
- logits_softmax = list(logits_softmax.values())[0]
- # Transform output shapes for metric evaluation
- # for seq_tag_f1 metric
- prediction_indices = logits_softmax.data.max(2)[1].cpu().numpy() # [batch_size, seq_len]
- streaming_recoder.record_one_row(
- [self.problem.decode(prediction_indices, length_batches[i]['target'][self.conf.answer_column_name[0]].numpy()), prediction_pos_scores,
- self.problem.decode(target_batches[i], length_batches[i]['target'][self.conf.answer_column_name[0]].numpy())], keep_dim=False)
+ if isinstance(get_layer_class(self.model, tmp_output_layer_id), CRF):
+ forward_score, scores, masks, tag_seq, transitions, layer_conf = logits
+ prediction_indices = tag_seq.cpu().numpy()
+ streaming_recoder.record_one_row(
+ [self.problem.decode(prediction_indices, length_batches[i]['target'][self.conf.answer_column_name[0]].numpy()),
+ prediction_pos_scores,
+ self.problem.decode(target_batches[i], length_batches[i]['target'][self.conf.answer_column_name[0]].numpy())],
+ keep_dim=False)
+ else:
+ logits_softmax = list(logits_softmax.values())[0]
+ # Transform output shapes for metric evaluation
+ # for seq_tag_f1 metric
+ prediction_indices = logits_softmax.data.max(2)[1].cpu().numpy() # [batch_size, seq_len]
+ # pytorch's CrossEntropyLoss only support this
+ logits_flat[self.conf.output_layer_id[0]] = logits.view(-1, logits.size(2)) # [batch_size * seq_len, # of tags]
+ streaming_recoder.record_one_row(
+ [self.problem.decode(prediction_indices, length_batches[i]['target'][self.conf.answer_column_name[0]].numpy()),
+ prediction_pos_scores,
+ self.problem.decode(target_batches[i], length_batches[i]['target'][self.conf.answer_column_name[0]].numpy())],
+ keep_dim=False)
- # pytorch's CrossEntropyLoss only support this
- logits_flat[self.conf.output_layer_id[0]] = logits.view(-1, logits.size(2)) # [batch_size * seq_len, # of tags]
- #target_batches[i] = target_batches[i].view(-1) # [batch_size * seq_len]
- target_batches[i][self.conf.answer_column_name[0]] = target_batches[i][self.conf.answer_column_name[0]].reshape(-1) # [batch_size * seq_len]
+ target_batches[i][self.conf.answer_column_name[0]] = target_batches[i][
+ self.conf.answer_column_name[0]].reshape(-1) # [batch_size * seq_len]
if to_predict:
prediction_batch = self.problem.decode(prediction_indices, length_batches[i][key_random].numpy())
@@ -546,8 +568,13 @@ class LearningMachine(object):
predict_stream_recoder.record_one_row([prediction])
if to_predict:
- logits_len = len(list(logits.values())[0]) \
- if ProblemTypes[self.problem.problem_type] == ProblemTypes.mrc else len(logits)
+ if ProblemTypes[self.problem.problem_type] == ProblemTypes.mrc:
+ logits_len = len(list(logits.values())[0])
+ elif ProblemTypes[self.problem.problem_type] == ProblemTypes.sequence_tagging and isinstance(get_layer_class(self.model, tmp_output_layer_id), CRF):
+ # for sequence_tagging task, logits is tuple type which index 3 is tag_seq [batch_size*seq_len]
+ logits_len = logits[3].size(0)
+ else:
+ logits_len = len(logits)
for sample_idx in range(logits_len):
while True:
sample = fin.readline().rstrip()
@@ -563,7 +590,10 @@ class LearningMachine(object):
for single_target in self.conf.answer_column_name:
if isinstance(target_batches[i][single_target], torch.Tensor):
target_batches[i][single_target] = transfer_to_gpu(target_batches[i][single_target])
- loss = loss_fn(logits_flat, target_batches[i])
+ if isinstance(loss_fn.loss_fn[0], CRFLoss):
+ loss = loss_fn.loss_fn[0](forward_score, scores, masks, list(target_batches[i].values())[0], transitions, layer_conf)
+ else:
+ loss = loss_fn(logits_flat, target_batches[i])
loss_recoder.record('loss', loss.item())
del loss, logits, logits_softmax, logits_flat
@@ -639,7 +669,7 @@ class LearningMachine(object):
self.conf.file_with_col_header,self.conf.object_inputs, None, min_sentence_len=self.conf.min_sentence_len,
extra_feature=self.conf.extra_feature,max_lengths=self.conf.max_lengths, fixed_lengths=self.conf.fixed_lengths,
file_format='tsv', show_progress=True if self.conf.mode == 'normal' else False,
- cpu_num_workers=self.conf.cpu_num_workers)
+ cpu_num_workers=self.conf.cpu_num_workers, chunk_size=self.conf.chunk_size)
logging.info("Starting predict ...")
self.model.eval()
@@ -685,9 +715,14 @@ class LearningMachine(object):
if ProblemTypes[self.problem.problem_type] == ProblemTypes.sequence_tagging:
logits = list(logits.values())[0]
- logits_softmax = list(logits_softmax.values())[0]
- # Transform output shapes for metric evaluation
- prediction_indices = logits_softmax.data.max(2)[1].cpu().numpy() # [batch_size, seq_len]
+ if isinstance(get_layer_class(self.model, tmp_output_layer_id), CRF):
+ forward_score, scores, masks, tag_seq, transitions, layer_conf = logits
+ prediction_indices = tag_seq.cpu().numpy()
+ else:
+ logits_softmax = list(logits_softmax.values())[0]
+ # Transform output shapes for metric evaluation
+ # for seq_tag_f1 metric
+ prediction_indices = logits_softmax.data.max(2)[1].cpu().numpy() # [batch_size, seq_len]
prediction_batch = self.problem.decode(prediction_indices, length_batches[i][key_random].numpy())
for prediction_sample in prediction_batch:
streaming_recoder.record('prediction', " ".join(prediction_sample))
@@ -745,6 +780,107 @@ class LearningMachine(object):
fin.close()
+ def interactive(self, sample, file_columns, predict_fields=['prediction'], predict_mode='batch'):
+ """ interactive prediction
+
+ Args:
+ file_columns: representation the columns of sample
+ predict_mode: interactive|batch(need a predict file)
+ """
+ predict_data, predict_length, _, _, _ = \
+ self.problem.encode_data_list(sample, file_columns, self.conf.input_types, self.conf.object_inputs, None,
+ self.conf.min_sentence_len, self.conf.extra_feature, self.conf.max_lengths,
+ self.conf.fixed_lengths, predict_mode=predict_mode)
+ if predict_data is None:
+ return 'Wrong Case!'
+ self.model.eval()
+ with torch.no_grad():
+ data_batches, length_batches, _ = \
+ get_batches(self.problem, predict_data, predict_length, None, 1,
+ self.conf.input_types, None, permutate=False, transform_tensor=True, predict_mode=predict_mode)
+ streaming_recoder = StreamingRecorder(predict_fields)
+
+ key_random = random.choice(
+ list(length_batches[0].keys()).remove('target') if 'target' in list(length_batches[0].keys()) else
+ list(length_batches[0].keys()))
+ param_list, inputs_desc, length_desc = transform_params2tensors(data_batches[0], length_batches[0])
+ logits = self.model(inputs_desc, length_desc, *param_list)
+
+ logits_softmax = {}
+ if isinstance(self.model, nn.DataParallel):
+ for tmp_output_layer_id in self.model.module.output_layer_id:
+ if isinstance(self.model.module.layers[tmp_output_layer_id], Linear) and \
+ (not self.model.module.layers[tmp_output_layer_id].layer_conf.last_hidden_softmax):
+ logits_softmax[tmp_output_layer_id] = nn.functional.softmax(
+ logits[tmp_output_layer_id], dim=-1)
+ else:
+ logits_softmax[tmp_output_layer_id] = logits[tmp_output_layer_id]
+ else:
+ for tmp_output_layer_id in self.model.output_layer_id:
+ if isinstance(self.model.layers[tmp_output_layer_id], Linear) and \
+ (not self.model.layers[tmp_output_layer_id].layer_conf.last_hidden_softmax):
+ logits_softmax[tmp_output_layer_id] = nn.functional.softmax(
+ logits[tmp_output_layer_id], dim=-1)
+ else:
+ logits_softmax[tmp_output_layer_id] = logits[tmp_output_layer_id]
+
+ if ProblemTypes[self.problem.problem_type] == ProblemTypes.sequence_tagging:
+ logits = list(logits.values())[0]
+ if isinstance(get_layer_class(self.model, tmp_output_layer_id), CRF):
+ forward_score, scores, masks, tag_seq, transitions, layer_conf = logits
+ prediction_indices = tag_seq.cpu().numpy()
+ else:
+ logits_softmax = list(logits_softmax.values())[0]
+ # Transform output shapes for metric evaluation
+ # for seq_tag_f1 metric
+ prediction_indices = logits_softmax.data.max(2)[1].cpu().numpy() # [batch_size, seq_len]
+ prediction_batch = self.problem.decode(prediction_indices, length_batches[0][key_random].numpy())
+ for prediction_sample in prediction_batch:
+ streaming_recoder.record('prediction', " ".join(prediction_sample))
+ elif ProblemTypes[self.problem.problem_type] == ProblemTypes.classification:
+ logits = list(logits.values())[0]
+ logits_softmax = list(logits_softmax.values())[0]
+ prediction_indices = logits_softmax.data.max(1)[1].cpu().numpy()
+
+ for field in predict_fields:
+ if field == 'prediction':
+ streaming_recoder.record(field,
+ self.problem.decode(prediction_indices,
+ length_batches[0][key_random].numpy()))
+ elif field == 'confidence':
+ prediction_scores = logits_softmax.cpu().data.numpy()
+ for prediction_score, prediction_idx in zip(prediction_scores, prediction_indices):
+ streaming_recoder.record(field, prediction_score[prediction_idx])
+ elif field.startswith('confidence') and field.find('@') != -1:
+ label_specified = field.split('@')[1]
+ label_specified_idx = self.problem.output_dict.id(label_specified)
+ confidence_specified = torch.index_select(logits_softmax.cpu(), 1, torch.tensor([label_specified_idx], dtype=torch.long)).squeeze(1)
+ streaming_recoder.record(field, confidence_specified.data.numpy())
+ elif ProblemTypes[self.problem.problem_type] == ProblemTypes.regression:
+ logits = list(logits.values())[0]
+ # logits_softmax is unuseful for regression task!
+ logits_softmax = list(logits_softmax.values())[0]
+ logits_flat = logits.squeeze(1)
+ prediction_scores = logits_flat.detach().cpu().numpy()
+ streaming_recoder.record_one_row([prediction_scores])
+ elif ProblemTypes[self.problem.problem_type] == ProblemTypes.mrc:
+ for key, value in logits.items():
+ logits[key] = value.squeeze()
+ for key, value in logits_softmax.items():
+ logits_softmax[key] = value.squeeze()
+ passage_identify = None
+ for type_key in data_batches[0].keys():
+ if 'p' in type_key.lower():
+ passage_identify = type_key
+ break
+ if not passage_identify:
+ raise Exception('MRC task need passage information.')
+ prediction = self.problem.decode(logits_softmax, lengths=length_batches[0][passage_identify],
+ batch_data=data_batches[0][passage_identify])
+ streaming_recoder.record_one_row([prediction])
+
+ return "\t".join([str(streaming_recoder.get(field)[0]) for field in predict_fields])
+
def load_model(self, model_path):
if self.use_gpu is True:
self.model = torch.load(model_path)
@@ -762,5 +898,19 @@ class LearningMachine(object):
logging.info("Model %s loaded!" % model_path)
logging.info("Total trainable parameters: %d" % (get_trainable_param_num(self.model)))
+ def _get_training_data_generator(self):
+ if not self.conf.use_cache:
+ return self.problem.get_encode_generator(self.conf, build_cache=False)
+ if not self.conf.encoding_file_index:
+ return self._get_save_encode_generator()
+ assert self.conf.load_encoding_cache_generator, 'function conf.load_encoding_cache_generator is not defined'
+ return self.conf.load_encoding_cache_generator(self.conf.encoding_cache_dir, self.conf.encoding_file_index)
+
+ def _get_save_encode_generator(self):
+ load_save_encode_generator = self.problem.get_encode_generator(self.conf, build_cache=True)
+ for data, lengths, target in load_save_encode_generator:
+ yield data, lengths, target
+ cache_index = load_from_json(self.conf.encoding_cache_index_file_path)
+ self.conf.encoding_file_index = cache_index[st.cencoding_key_index]
diff --git a/Model.py b/Model.py
index b40d3cd..dae316d 100644
--- a/Model.py
+++ b/Model.py
@@ -18,7 +18,7 @@ EMBED_LAYER_ID = 'embedding'
def get_conf(layer_id, layer_name, input_layer_ids, all_layer_configs, model_input_ids, use_gpu,
conf_dict=None, shared_conf=None, succeed_embedding_flag=False, output_layer_flag=False,
- target_num=None, fixed_lengths=None):
+ target_num=None, fixed_lengths=None, target_dict=None):
""" get layer configuration
Args
@@ -51,14 +51,24 @@ def get_conf(layer_id, layer_name, input_layer_ids, all_layer_configs, model_inp
# for classification tasks, we usually add a Linear layer to project the output to dimension of number of classes. If we don't know the #classes, we can use '-1' instead and we would calculate the number of classes from the corpus.
if layer_name == 'Linear':
- if isinstance(conf_dict['hidden_dim'], list) and conf_dict['hidden_dim'][-1] == -1:
- assert output_layer_flag is True, "Only in the last layer, hidden_dim == -1 is allowed!"
- assert target_num is not None, "Number of targets should be given!"
- conf_dict['hidden_dim'][-1] = target_num
+ if isinstance(conf_dict['hidden_dim'], list):
+ if conf_dict['hidden_dim'][-1] == -1:
+ assert output_layer_flag is True, "Only in the last layer, hidden_dim == -1 is allowed!"
+ assert target_num is not None, "Number of targets should be given!"
+ conf_dict['hidden_dim'][-1] = target_num
+ elif conf_dict['hidden_dim'][-1] == '#target#':
+ logging.info('#target# position will be replace by target num: %d' % target_num)
+ conf_dict['hidden_dim'][-1] = target_num
elif isinstance(conf_dict['hidden_dim'], int) and conf_dict['hidden_dim'] == -1:
assert output_layer_flag is True, "Only in the last layer, hidden_dim == -1 is allowed!"
assert target_num is not None, "Number of targets should be given!"
conf_dict['hidden_dim'] = target_num
+ elif isinstance(conf_dict['hidden_dim'], str) and conf_dict['hidden_dim'] == '#target#':
+ logging.info('#target# position will be replace by target num: %d' % target_num)
+ conf_dict['hidden_dim'] = target_num
+ # add some necessary attribute for CRF layer
+ if layer_name == 'CRF':
+ conf_dict['target_dict'] = target_dict
conf = eval(layer_name + "Conf")(**conf_dict)
except NameError as e:
@@ -104,6 +114,8 @@ def get_conf(layer_id, layer_name, input_layer_ids, all_layer_configs, model_inp
# inference and varification inside the layer
conf.inference() # update some attributes which relies on input dimension or something else
conf.verify() # verify if the configuration is legal
+ former_conf = None if len(all_layer_configs) == 0 else list(all_layer_configs.values())[-1]
+ conf.verify_former_block(former_conf) # check if has special attribute rely on former layer
logging.debug('Layer id: %s; name: %s; input_dims: %s; input_ranks: %s; output_dim: %s; output_rank: %s' % (layer_id, layer_name, conf.input_dims if layer_id != 'embedding' else 'None', conf.input_ranks, conf.output_dim, conf.output_rank))
@@ -211,7 +223,7 @@ class Model(nn.Module):
all_layer_configs[EMBED_LAYER_ID] = get_conf(EMBED_LAYER_ID, layer_arch['layer'],
None, all_layer_configs, inputs, self.use_gpu, conf_dict={'conf': emb_conf},
shared_conf=None, succeed_embedding_flag=False, output_layer_flag=output_layer_flag,
- target_num=target_num, fixed_lengths=fixed_lengths_corrected)
+ target_num=target_num, fixed_lengths=fixed_lengths_corrected, target_dict=problem.output_dict)
self.add_layer(EMBED_LAYER_ID, get_layer(layer_arch['layer'], all_layer_configs[EMBED_LAYER_ID]))
else:
if layer_arch['layer'] in self.layers and not 'conf' in layer_arch:
@@ -230,7 +242,7 @@ class Model(nn.Module):
layer_arch['inputs'], all_layer_configs, inputs, self.use_gpu, conf_dict=conf_dict,
shared_conf=shared_conf, succeed_embedding_flag=succeed_embedding_flag,
output_layer_flag=output_layer_flag, target_num=target_num,
- fixed_lengths=fixed_lengths_corrected)
+ fixed_lengths=fixed_lengths_corrected, target_dict=problem.output_dict)
if layer_arch['layer'] in self.layers and not 'conf' in layer_arch:
self.add_layer(layer_arch['layer_id'], self.layers[layer_arch['layer']])
@@ -391,7 +403,7 @@ class Model(nn.Module):
return representation_output
def is_cuda(self):
- return next(self.parameters()).data.is_cuda
+ return list(self.parameters())[-1].data.is_cuda
def update_use_gpu(self, new_use_gpu):
self.use_gpu = new_use_gpu
diff --git a/ModelConf.py b/ModelConf.py
index c56e07e..1b30ecf 100644
--- a/ModelConf.py
+++ b/ModelConf.py
@@ -14,8 +14,8 @@ import shutil
from losses.BaseLossConf import BaseLossConf
#import traceback
-from settings import LanguageTypes, ProblemTypes, TaggingSchemes, SupportedMetrics, PredictionTypes, DefaultPredictionFields
-from utils.common_utils import log_set, prepare_dir
+from settings import LanguageTypes, ProblemTypes, TaggingSchemes, SupportedMetrics, PredictionTypes, DefaultPredictionFields, ConstantStatic
+from utils.common_utils import log_set, prepare_dir, md5
from utils.exceptions import ConfigurationError
import numpy as np
@@ -219,6 +219,10 @@ class ModelConf(object):
# vocabulary setting
self.max_vocabulary = self.get_item(['training_params', 'vocabulary', 'max_vocabulary'], default=800000, use_default=True)
self.min_word_frequency = self.get_item(['training_params', 'vocabulary', 'min_word_frequency'], default=3, use_default=True)
+ self.max_building_lines = self.get_item(['training_params', 'vocabulary', 'max_building_lines'], default=1000 * 1000, use_default=True)
+
+ # chunk_size
+ self.chunk_size = self.get_item(['training_params', 'chunk_size'], default=1000 * 1000, use_default=True)
# file column header setting
self.file_with_col_header = self.get_item(['inputs', 'file_with_col_header'], default=False, use_default=True)
@@ -280,6 +284,9 @@ class ModelConf(object):
tmp_problem_path = os.path.join(self.save_base_dir, '.necessary_cache', 'problem.pkl')
self.problem_path = tmp_problem_path if os.path.isfile(tmp_problem_path) else os.path.join(self.save_base_dir, 'necessary_cache', 'problem.pkl')
+ # cache configuration
+ self._load_cache_config_from_conf()
+
# training params
self.training_params = self.get_item(['training_params'])
@@ -303,12 +310,17 @@ class ModelConf(object):
self.max_epoch = self.params.max_epoch
else:
self.max_epoch = self.get_item(['training_params', 'max_epoch'], default=float('inf'))
- self.valid_times_per_epoch = self.get_item(['training_params', 'valid_times_per_epoch'], default=1)
+ if 'valid_times_per_epoch' in self.conf['training_params']:
+ logging.info("configuration[training_params][valid_times_per_epoch] is deprecated, please use configuration[training_params][steps_per_validation] instead")
+ self.steps_per_validation = self.get_item(['training_params', 'steps_per_validation'], default=10)
self.batch_num_to_show_results = self.get_item(['training_params', 'batch_num_to_show_results'], default=10)
self.max_lengths = self.get_item(['training_params', 'max_lengths'], default=None, use_default=True)
self.fixed_lengths = self.get_item(['training_params', 'fixed_lengths'], default=None, use_default=True)
if self.fixed_lengths:
self.max_lengths = None
+ if ProblemTypes[self.problem_type] == ProblemTypes.sequence_tagging:
+ self.fixed_lengths = None
+ self.max_lengths = None
if torch.cuda.device_count() > 1:
self.batch_size_total = torch.cuda.device_count() * self.training_params['batch_size']
@@ -403,7 +415,8 @@ class ModelConf(object):
"The configuration file %s is illegal. There should be an item configuration[%s], "
"but the item %s is not found." % (self.conf_path, "][".join(error_keys), key))
else:
- print("configuration[%s] is not found in %s, use default value %s" % ("][".join(error_keys), self.conf_path, repr(default)))
+ # print("configuration[%s] is not found in %s, use default value %s" %
+ # ("][".join(error_keys), self.conf_path, repr(default)))
item = default
return item
@@ -525,3 +538,23 @@ class ModelConf(object):
shutil.copy(params.conf_path, self.save_base_dir)
logging.info('Configuration file is backed up to %s' % (self.save_base_dir))
+ def _load_cache_config_from_conf(self):
+ # training data
+ self.train_data_md5 = None
+ if self.phase == 'train' and self.train_data_path:
+ logging.info("Calculating the md5 of traing data ...")
+ self.train_data_md5 = md5([self.train_data_path])
+ logging.info("the md5 of traing data is %s"%(self.train_data_md5))
+
+ # problem
+ self.problem_md5 = None
+
+ # encoding
+ self.encoding_cache_dir = None
+ self.encoding_cache_index_file_path = None
+ self.encoding_cache_index_file_md5_path = None
+ self.encoding_file_index = None
+ self.encoding_cache_legal_line_cnt = 0
+ self.encoding_cache_illegal_line_cnt = 0
+ self.load_encoding_cache_generator = None
+
diff --git a/README.md b/README.md
index fb443dc..967fe72 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,6 @@
-# ***NeuronBlocks*** - Building Your NLP DNN Models Like Playing Lego
+
+
+## Building Your NLP DNN Models Like Playing Lego
[![language](https://img.shields.io/badge/language-en%20%7C%20中文-brightgreen.svg)](#language-supported)
[![python](https://img.shields.io/badge/python-3.6%20%7C%203.7-blue.svg)](https://www.python.org)
@@ -7,7 +9,7 @@
[简体中文](README_zh_CN.md)
-[Tutorial](Tutorial.md) [中文教程](Tutorial_zh_CN.md)
+[Tutorial](Tutorial.md) [中文教程](Tutorial_zh_CN.md) [Demo Video](https://youtu.be/x6cOpVSZcdo)
# Table of Contents
@@ -29,7 +31,7 @@ NeuronBlocks consists of two major components: ***Block Zoo*** and ***Model Zoo*
- In ***Block Zoo***, we provide commonly used neural network components as building blocks for model architecture design.
- In ***Model Zoo***, we provide a suite of NLP models for common NLP tasks, in the form of **JSON configuration** files.
-
+
### Language Supported
- English
@@ -50,6 +52,9 @@ Users can either pick existing models (config files) in *Model Zoo* to start mod
+
+
+
# Get Started in 60 Seconds
## Installation
@@ -93,6 +98,21 @@ python test.py --conf_path=model_zoo/demo/conf.json
# predict
python predict.py --conf_path=model_zoo/demo/conf.json
```
+
+For prediction, NeuronBlocks have two modes: **Interactive** and **Batch**.
+- *Interactive Prediction Mode:* The interactive mode provides interactive interface, users can input case according to corresponding prompt message and get realtime prediction result from trained model, and input "exit" to exit interactive interface.
+```bash
+# use the above example
+# interactive prediction
+python predict.py --conf_path=model_zoo/demo/conf.json --predict_mode='interactive'
+```
+- *Batch Prediction Mode:* For batched cases prediction, NeuronBlocks provides batch prediction mode which receives a cases file as input and write the prediction results in the prediction file.
+```bash
+# use the above example
+# batch prediction
+python predict.py --conf_path=model_zoo/demo/conf.json --predict_mode='batch' --predict_data_path=dataset/demo/predict.tsv
+```
+
For more details, please refer to [Tutorial.md](Tutorial.md) and [Code documentation](https://microsoft.github.io/NeuronBlocks/).
# Who should consider using NeuronBlocks
@@ -134,7 +154,7 @@ Anyone who are familiar with are highly encouraged to contribute code.
```
@article{gong2019neuronblocks,
title={NeuronBlocks--Building Your NLP DNN Models Like Playing Lego},
- author={Gong, Ming and Shou, Linjun and Lin, Wutao and Sang, Zhijie and Yan, Quanjia and Yang, Ze and Jiang, Daxin},
+ author={Gong, Ming and Shou, Linjun and Lin, Wutao and Sang, Zhijie and Yan, Quanjia and Yang, Ze, Cheng, Feixiang and Jiang, Daxin},
journal={arXiv preprint arXiv:1904.09535},
year={2019}
}
@@ -155,5 +175,5 @@ If you have any questions, please contact NeuronBlocks@microsoft.com
If you have wechat, you can also add the following account:
-
+
diff --git a/README_zh_CN.md b/README_zh_CN.md
index ac239d8..79d6ad9 100644
--- a/README_zh_CN.md
+++ b/README_zh_CN.md
@@ -1,4 +1,6 @@
-# ***NeuronBlocks*** - 像搭积木一样构建自然语言理解深度学习模型
+
+
+## 像搭积木一样构建自然语言理解深度学习模型
[![language](https://img.shields.io/badge/language-en%20%7C%20中文-brightgreen.svg)](#language-supported)
[![python](https://img.shields.io/badge/python-3.6%20%7C%203.7-blue.svg)](https://www.python.org)
@@ -7,7 +9,7 @@
[English version](README.md)
-[中文教程](Tutorial_zh_CN.md) [Tutorial](Tutorial.md)
+[中文教程](Tutorial_zh_CN.md) [Tutorial](Tutorial.md) [Demo Video](https://youtu.be/x6cOpVSZcdo)
# 目录
@@ -47,11 +49,12 @@ NeuronBlocks包括 ***Block Zoo*** 和 ***Model Zoo*** 两个重要组件,其
- 更多……
### 使用方法
-
用户可以选择 *Model Zoo* 中的示例模型(JSON配置文件)开启模型训练,或者利用 *Block Zoo* 中的神经网络模块构建新的模型,就像玩乐高积木一样。
+
+
# 快速入门
## 安装
@@ -95,6 +98,19 @@ python test.py --conf_path=model_zoo/demo/conf.json
python predict.py --conf_path=model_zoo/demo/conf.json
```
+对于预测,NeuronBlocks 提供了两种预测的形式: **交互式**和**批量式**。
+- *交互式:* 交互式模式预测提供了交互界面,用户可以根据输入提示信息每次输入一个样本并实时得到模型前向计算出的结果,输入 "exit" 时退出交互预测模式。
+```bash
+# use the above example
+# interactive prediction
+python predict.py --conf_path=model_zoo/demo/conf.json --predict_mode='interactive'
+```
+- *批量式:* 对于批量样本预测的需求,NeuronBlocks 提供批量预测模式,其接受一个包含批量样本的文件作为输入,并且将模型前向计算的结果写回这个文件。
+```bash
+# use the above example
+# batch prediction
+python predict.py --conf_path=model_zoo/demo/conf.json --predict_mode='batch' --predict_data_path=dataset/demo/predict.tsv
+```
更多细节, 请查看[Tutorial_zh_CN.md](Tutorial_zh_CN.md) 和 [Code documentation](https://microsoft.github.io/NeuronBlocks/)。
# 适用人群
@@ -135,7 +151,7 @@ NeuronBlocks以开放的模式运行。它由 **微软 STCA NLP Group** 设计
```
@article{gong2019neuronblocks,
title={NeuronBlocks--Building Your NLP DNN Models Like Playing Lego},
- author={Gong, Ming and Shou, Linjun and Lin, Wutao and Sang, Zhijie and Yan, Quanjia and Yang, Ze and Jiang, Daxin},
+ author={Gong, Ming and Shou, Linjun and Lin, Wutao and Sang, Zhijie and Yan, Quanjia and Yang, Ze, Cheng, Feixiang and Jiang, Daxin},
journal={arXiv preprint arXiv:1904.09535},
year={2019}
}
@@ -156,4 +172,5 @@ Licensed under the [MIT](LICENSE) License.
如果您有微信,也可以添加工具包的官方账号:
-
+
+
diff --git a/Tutorial.md b/Tutorial.md
index 12ae91c..ebe88dc 100644
--- a/Tutorial.md
+++ b/Tutorial.md
@@ -1,4 +1,4 @@
-# ***NeuronBlocks*** Tutorial
+# ***NeuronBlocks*** Tutorial
[简体中文](Tutorial_zh_CN.md)
@@ -21,6 +21,7 @@
4. [Compression for MRC Model](#task-6.4)
* [Task 7: Chinese Sentiment Analysis](#task-7)
* [Task 8: Chinese Text Matching](#task-8)
+ * [Task 9: Sequence Labeling](#task-9)
* [Advanced Usage](#advanced-usage)
* [Extra Feature Support](#extra-feature)
* [Learning Rate Decay](#lr-decay)
@@ -146,10 +147,12 @@ The architecture of the configuration file is:
CUDA_VISIBLE_DEVICES= python train.py
```
- ***cpu_num_workers***. [default: -1] Define the number of processes to preprocess the dataset. The number of processes is equal to that of logical cores CPU supports if value is negtive or 0, otherwise it is equal to *cpu_num_workers*.
+ - ***chunk_size***. [default: 1000000] Define the chunk size of files that NB reads every time for avoiding out of memory and the mechanism of lazy-loading.
- ***batch_size***. Define the batch size here. If there are multiple GPUs, *batch_size* is the batch size of each GPU.
- ***batch_num_to_show_results***. [necessary for training] During the training process, show the results every batch_num_to_show_results batches.
- ***max_epoch***. [necessary for training] The maximum number of epochs to train.
- - ***valid_times_per_epoch***. [optional for training, default: 1] Define how many times to conduct validation per epoch. Usually, we conduct validation after each epoch, but for a very large corpus, we'd better validate multiple times in case to miss the best state of our model. The default value is 1.
+ - ~~***valid_times_per_epoch***~~. [**deprecated**] Please use steps_per_validation instead.
+ - ***steps_per_validation***. [default: 10] Define how many steps does each validation take place.
- ***tokenizer***. [optional] Define tokenizer here. Currently, we support 'nltk' and 'jieba'. By default, 'nltk' for English and 'jieba' for Chinese.
- **architecture**. Define the model architecture. The node is a list of layers (blocks) in block_zoo to represent a model. The supported layers of this toolkit are given in [block_zoo overview](https://microsoft.github.io/NeuronBlocks).
@@ -294,11 +297,13 @@ Question answer matching is a crucial subtask of the question answering problem,
Model | AUC
-------- | --------
- CNN (WikiQA paper) | 0.735
+ CNN (WikiQA paper) | 0.735
CNN-Cnt (WikiQA paper) | 0.753
CNN (NeuronBlocks) | 0.747
BiLSTM (NeuronBlocks) | 0.767
- BiLSTM+Attn (NeuronBlocks) | 0.754
+ BiLSTM+Attn (NeuronBlocks) | 0.754
+ [ARC-I](https://arxiv.org/abs/1503.03244) (NeuronBlocks) | 0.7508
+ [ARC-II](https://arxiv.org/abs/1503.03244) (NeuronBlocks) | 0.7612
[MatchPyramid](https://arxiv.org/abs/1602.06359) (NeuronBlocks) | 0.763
BiLSTM+Match Attention (NeuronBlocks) | 0.786
@@ -457,7 +462,7 @@ This task is to train a query regression model to learn from a heavy teacher mod
3. Calculate AUC metric
```bash
cd PROJECT_ROOT
- python tools/calculate_AUC.py --input_file models/kdqbc_bilstmattn_cnn/train/predict.tsv --predict_index 2 --label_index 1
+ python tools/calculate_auc.py --input_file models/kdqbc_bilstmattn_cnn/train/predict.tsv --predict_index 2 --label_index 1
```
*Tips: you can try different models by running different JSON config files.*
@@ -501,7 +506,7 @@ This task is to train a query-passage regression model to learn from a heavy tea
3. Calculate AUC metric
```bash
cd PROJECT_ROOT
- python tools/calculate_AUC.py --input_file=models/kdtm_match_linearAttn/predict.tsv --predict_index=3 --label_index=2
+ python tools/calculate_auc.py --input_file=models/kdtm_match_linearAttn/predict.tsv --predict_index=3 --label_index=2
```
*Tips: you can try different models by running different JSON config files.*
@@ -562,7 +567,58 @@ Here is an example using Chinese data, for text matching task.
```
*Tips: you can try different models by running different JSON config files. The model file and train log file can be found in JOSN config file's outputs/save_base_dir after you finish training.*
+### Task 9: Sequence Labeling
+Sequence Labeling is an important NLP task, which includes NER, Slot Tagging, Pos Tagging, etc.
+- ***Dataset***
+
+ [CoNLL 2003](https://www.clips.uantwerpen.be/conll2003/) is a popular dataset in Sequence Labeling task. We use CoNLL 2003 English NER data for our experiment and you can refer the data format in [sample data](https://github.com/microsoft/NeuronBlocks/tree/master/dataset/slot_tagging/conll_2003).
+
+- ***Tagging Scheme***
+
+ - NeuronBlocks support both BIO and BIOES tag schemes.
+ - The IOB scheme is not supported, because of its worse performance in most [experiment](https://arxiv.org/pdf/1707.06799.pdf).
+ - NeuronBlocks provides a [script](tools/tagging_schemes_converter.py) that converts the tag scheme among IOB/BIO/BIOES (NOTE: the script only supports tsv file which has data and label in two columns).
+
+- ***Usages***
+
+ 1. Softmax output.
+ ```bash
+ # train model
+ cd PROJECT_ROOT
+ python train.py --conf_path=model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging.json
+
+ # test model
+ cd PROJECT_ROOT
+ python test.py --conf_path=model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging.json
+ ```
+ 2. CRF output.
+ ```bash
+ # train model
+ cd PROJECT_ROOT
+ python train.py --conf_path=model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_ccnn_wlstm_crf.json
+
+ # test model
+ cd PROJECT_ROOT
+ python test.py --conf_path=model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_ccnn_wlstm_crf.json
+ ```
+ *Tips: you can try more model in [here](https://github.com/microsoft/NeuronBlocks/tree/master/model_zoo/nlp_tasks/slot_tagging).*
+
+- ***Result***
+
+ The result on CoNLL 2003 English NER dataset.
+
+ Model | F1-score
+ -------- | --------
+ [Ma and Hovy(2016)](https://arxiv.org/pdf/1603.01354.pdf)|87.00
+ [BiLSTM+Softmax](https://github.com/microsoft/NeuronBlocks/blob/master/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging.json) (NeuronBlocks)|88.50
+ [Lample et al.(2016)](https://arxiv.org/pdf/1603.01360.pdf)| 89.15
+ [CLSTM+WLSTM+CRF](https://github.com/microsoft/NeuronBlocks/blob/master/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_clstm_wlstm_crf.json) (NeuronBlocks)|90.83
+ [Chiu and Nichols(2016)](https://www.mitpressjournals.org/doi/pdf/10.1162/tacl_a_00104)|90.91
+ [CCNN+WLSTM+CRF](https://github.com/microsoft/NeuronBlocks/blob/master/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_ccnn_wlstm_crf.json) (NeuronBlocks)|91.38
+
+ *Tips: C means Char and W means Word. CCNN means Char-level representation with CNN model and CLSTM means Char-level representation with LSTM model.*
+
## Advanced Usage
After building a model, the next goal is to train a model with good performance. It depends on a highly expressive model and tricks of the model training. NeuronBlocks provides some tricks of model training.
@@ -675,5 +731,7 @@ To solve the above problems, NeuronBlocks supports *fixing embedding weight* (em
***training_params/vocabulary/max_vocabulary***. [int, optional for training, default: 800,000] The max size of corpus vocabulary. If corpus vocabulary size is larger than *max_vocabulary*, it will be cut according to word frequency.
+ ***training_params/vocabulary/max_building_lines***. [int, optional for training, default: 1,000,000] The max lines NB will read from every file to build vocabulary
+
## Frequently Asked Questions
diff --git a/Tutorial_zh_CN.md b/Tutorial_zh_CN.md
index 37dd3bd..501af09 100644
--- a/Tutorial_zh_CN.md
+++ b/Tutorial_zh_CN.md
@@ -1,4 +1,4 @@
-# ***NeuronBlocks*** 教程
+# ***NeuronBlocks*** 教程
[English Version](Tutorial.md)
@@ -21,6 +21,7 @@
4. [机器阅读理解模型的模型压缩](#task-6.4)
* [任务 7: 中文情感分析](#task-7)
* [任务 8:中文文本匹配](#task-8)
+ * [任务 9:序列标注](#task-9)
* [高阶用法](#advanced-usage)
* [额外的feature](#extra-feature)
* [学习率衰减](#lr-decay)
@@ -136,10 +137,12 @@ python predict.py --conf_path=model_zoo/demo/conf.json
CUDA_VISIBLE_DEVICES= python train.py
```
- ***cpu_num_workers***. [default: -1] Define the number of processes to preprocess the dataset. The number of processes is equal to that of logical cores CPU supports if value is negtive or 0, otherwise it is equal to *cpu_num_workers*.
+ - ***chunk_size***. [default: 1000000] Define the chunk size of files that NB reads every time for avoiding out of memory and the mechanism of lazy-loading.
- ***batch_size***. Define the batch size here. If there are multiple GPUs, *batch_size* is the batch size of each GPU.
- ***batch_num_to_show_results***. [necessary for training] During the training process, show the results every batch_num_to_show_results batches.
- ***max_epoch***. [necessary for training] The maximum number of epochs to train.
- - ***valid_times_per_epoch***. [optional for training, default: 1] Define how many times to conduct validation per epoch. Usually, we conduct validation after each epoch, but for a very large corpus, we'd better validate multiple times in case to miss the best state of our model. The default value is 1.
+ - ~~***valid_times_per_epoch***~~. [**deprecated**] Please use steps_per_validation instead.
+ - ***steps_per_validation***. [default: 10] Define how many steps does each validation take place.
- ***tokenizer***. [optional] Define tokenizer here. Currently, we support 'nltk' and 'jieba'. By default, 'nltk' for English and 'jieba' for Chinese.
- **architecture**. Define the model architecture. The node is a list of layers (blocks) in block_zoo to represent a model. The supported layers of this toolkit are given in [block_zoo overview](https://microsoft.github.io/NeuronBlocks).
@@ -288,6 +291,8 @@ Question answer matching is a crucial subtask of the question answering problem,
CNN (NeuronBlocks) | 0.747
BiLSTM (NeuronBlocks) | 0.767
BiLSTM+Attn (NeuronBlocks) | 0.754
+ [ARC-I](https://arxiv.org/abs/1503.03244) (NeuronBlocks) | 0.7508
+ [ARC-II](https://arxiv.org/abs/1503.03244) (NeuronBlocks) | 0.7612
[MatchPyramid](https://arxiv.org/abs/1602.06359) (NeuronBlocks) | 0.763
BiLSTM+Match Attention (NeuronBlocks) | 0.786
@@ -446,7 +451,7 @@ This task is to train a query regression model to learn from a heavy teacher mod
3. Calculate AUC metric
```bash
cd PROJECT_ROOT
- python tools/calculate_AUC.py --input_file models/kdqbc_bilstmattn_cnn/train/predict.tsv --predict_index 2 --label_index 1
+ python tools/calculate_auc.py --input_file models/kdqbc_bilstmattn_cnn/train/predict.tsv --predict_index 2 --label_index 1
```
*Tips: you can try different models by running different JSON config files.*
@@ -490,7 +495,7 @@ This task is to train a query-passage regression model to learn from a heavy tea
3. Calculate AUC metric
```bash
cd PROJECT_ROOT
- python tools/calculate_AUC.py --input_file=models/kdtm_match_linearAttn/predict.tsv --predict_index=3 --label_index=2
+ python tools/calculate_auc.py --input_file=models/kdtm_match_linearAttn/predict.tsv --predict_index=3 --label_index=2
```
*Tips: you can try different models by running different JSON config files.*
@@ -552,6 +557,58 @@ This task is to train a query-passage regression model to learn from a heavy tea
```
*提示:您可以通过运行不同的JSON配置文件来尝试不同的模型。当训练完成后,模型文件和训练日志文件可以在JSON配置的outputs/save_base_dir目录中找到。*
+### 任务 9: 序列标注
+序列标注是一项重要的NLP任务,包括 NER, Slot Tagging, Pos Tagging 等任务。
+
+- ***数据集***
+
+ 在序列标注任务中,[CoNLL 2003](https://www.clips.uantwerpen.be/conll2003/)是一个很常用的数据集。在我们的序列标注任务中,使用 CoNLL 2003 中英文 NER 数据作为实验数据,其中数据格式可以参考我们给出的[抽样数据](https://github.com/microsoft/NeuronBlocks/tree/master/dataset/slot_tagging/conll_2003)。
+
+- ***标注策略***
+
+ - NeuronBlocks 支持 BIO 和 BIOES 标注策略。
+ - IOB 标注标注是不被支持的,因为在大多[实验](https://arxiv.org/pdf/1707.06799.pdf)中它具有很差的表现。
+ - NeuronBlocks 提供一个在不同标注策略(IOB/BIO/BIOES)中的[转化脚本](tools/tagging_schemes_converter.py)(脚本仅支持具有 数据和标签 的两列tsv文件输入)。
+
+- ***用法***
+
+ 1. Softmax 输出.
+ ```bash
+ # train model
+ cd PROJECT_ROOT
+ python train.py --conf_path=model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging.json
+
+ # test model
+ cd PROJECT_ROOT
+ python test.py --conf_path=model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging.json
+ ```
+ 2. CRF 输出.
+ ```bash
+ # train model
+ cd PROJECT_ROOT
+ python train.py --conf_path=model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_ccnn_wlstm_crf.json
+
+ # test model
+ cd PROJECT_ROOT
+ python test.py --conf_path=model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_ccnn_wlstm_crf.json
+ ```
+ *提示 :尝试更多模型可 [点击](https://github.com/microsoft/NeuronBlocks/tree/master/model_zoo/nlp_tasks/slot_tagging)。*
+
+- ***结果***
+
+ 实验采用 CoNLL 2003 英文 NER 数据集。
+
+ Model | F1-score
+ -------- | --------
+ [Ma and Hovy(2016)](https://arxiv.org/pdf/1603.01354.pdf)|87.00
+ [BiLSTM+Softmax](https://github.com/microsoft/NeuronBlocks/blob/master/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging.json) (NeuronBlocks)|88.50
+ [Lample et al.(2016)](https://arxiv.org/pdf/1603.01360.pdf)| 89.15
+ [CLSTM+WLSTM+CRF](https://github.com/microsoft/NeuronBlocks/blob/master/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_clstm_wlstm_crf.json) (NeuronBlocks)|90.83
+ [Chiu and Nichols(2016)](https://www.mitpressjournals.org/doi/pdf/10.1162/tacl_a_00104)|90.91
+ [CCNN+WLSTM+CRF](https://github.com/microsoft/NeuronBlocks/blob/master/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_ccnn_wlstm_crf.json) (NeuronBlocks)|91.38
+
+ *提示 : C 代表字符,W 代表单词。 CCNN 代表使用 CNN 模型的字符级别表示, CLSTM 代表使用 LSTM 模型的字符级别表示。*
+
## 高阶用法
After building a model, the next goal is to train a model with good performance. It depends on a highly expressive model and tricks of the model training. NeuronBlocks provides some tricks of model training.
@@ -664,4 +721,6 @@ To solve the above problems, NeuronBlocks supports *fixing embedding weight* (em
***training_params/vocabulary/max_vocabulary***. [int, optional for training, default: 800,000] The max size of corpus vocabulary. If corpus vocabulary size is larger than *max_vocabulary*, it will be cut according to word frequency.
+ ***training_params/vocabulary/max_building_lines***. [int, optional for training, default: 1,000,000] The max lines NB will read from every file to build vocabulary
+
## 常见问题与答案
diff --git a/autotest.sh b/autotest.sh
old mode 100644
new mode 100755
diff --git a/autotest/tools/get_results.py b/autotest/tools/get_results.py
index 7dd1b3c..e84e65f 100644
--- a/autotest/tools/get_results.py
+++ b/autotest/tools/get_results.py
@@ -1,6 +1,3 @@
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# Licensed under the MIT license.
-
import re
from calculate_AUC import main
@@ -10,24 +7,29 @@ task_dir = ['/20_newsgroup_bilstm_attn', '/chinese_text_matching', '/question_pa
results = {'english_text_matching': [0.96655], 'chinese_text_matching': [0.70001], 'quora_question_pairs': [0.72596], 'knowledge_distillation': [0.66329]}
for each_dir, key in zip(task_dir, results.keys()):
target_dir = base_dir + each_dir
- with open(target_dir + '/train_autotest.log', 'r') as f_r:
- last_line = f_r.readlines()[-1].strip()
- score = ''.join(re.findall(r'(?<=accuracy:).*?(?=loss|;)', last_line))
- try:
- results[key].append(float(score))
- except:
- results[key].append('wrong')
- print ('GPU test. Wrong number in %s/train_autotest.log' %target_dir)
-
- with open(target_dir + '/test_autotest.log', 'r') as f_r:
- last_line = f_r.readlines()[-1].strip()
- score = ''.join(re.findall(r'(?<=accuracy:).*?(?=loss|;)', last_line))
- try:
- results[key].append(float(score))
- except:
- results[key].append('wrong')
- print ('CPU test. Wrong number in %s/test_autotest.log' %target_dir)
+ try:
+ with open(target_dir + '/train_autotest.log', 'r') as f_r:
+ last_line = f_r.readlines()[-1].strip()
+ score = ''.join(re.findall(r'(?<=accuracy:).*?(?=loss|;)', last_line))
+ try:
+ results[key].append(float(score))
+ except:
+ results[key].append('wrong number in train log')
+ print ('GPU test. Wrong number in %s/train_autotest.log' %target_dir)
+ except:
+ results[key].append('no train log')
+ try:
+ with open(target_dir + '/test_autotest.log', 'r') as f_r:
+ last_line = f_r.readlines()[-1].strip()
+ score = ''.join(re.findall(r'(?<=accuracy:).*?(?=loss|;)', last_line))
+ try:
+ results[key].append(float(score))
+ except:
+ results[key].append('wrong number in test log')
+ print ('CPU test. Wrong number in %s/test_autotest.log' %target_dir)
+ except:
+ results[key].append('no test log')
# for kdtm_match_linearAttn task, we use calculate_AUC.main()
params = {'input_file': './autotest/models/kdtm_match_linearAttn/predict.tsv', 'predict_index': '3', 'label_index': '2', 'header': False}
diff --git a/block_zoo/BaseLayer.py b/block_zoo/BaseLayer.py
index 7209aeb..13999d0 100644
--- a/block_zoo/BaseLayer.py
+++ b/block_zoo/BaseLayer.py
@@ -154,6 +154,11 @@ class BaseConf(ABC):
# To check if deepcopy is applied
assert id(self.output_dim) != id(self.input_dims[0]), 'Please use copy.deepcopy to copy the input_dim to output_dim'
+ def verify_former_block(self, former_conf):
+ """check if has special attribute rely on former layer
+
+ """
+ return True
def add_attr_type_assertion(self, attr, specified_type):
""" check if the types of attributes are legal
diff --git a/block_zoo/CRF.py b/block_zoo/CRF.py
new file mode 100644
index 0000000..2c8ea64
--- /dev/null
+++ b/block_zoo/CRF.py
@@ -0,0 +1,244 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT license.
+
+from block_zoo.BaseLayer import BaseLayer, BaseConf
+from utils.DocInherit import DocInherit
+import torch
+import torch.nn as nn
+from copy import deepcopy
+import torch.autograd as autograd
+
+
+def argmax(vec):
+ # return the argmax as a python int
+ _, idx = torch.max(vec, 1)
+ return idx.item()
+
+
+def log_sum_exp(vec, m_size):
+ """
+ calculate log of exp sum
+ args:
+ vec (batch_size, vanishing_dim, hidden_dim) : input tensor
+ m_size : hidden_dim
+ return:
+ batch_size, hidden_dim
+ """
+ _, idx = torch.max(vec, 1) # B * 1 * M
+ max_score = torch.gather(vec, 1, idx.view(-1, 1, m_size)).view(-1, 1, m_size) # B * M
+ return max_score.view(-1, m_size) + torch.log(torch.sum(torch.exp(vec - max_score.expand_as(vec)), 1)).view(-1, m_size) # B * M
+
+
+class CRFConf(BaseConf):
+ """
+ Configuration of CRF layer
+
+ Args:
+
+ """
+ def __init__(self, **kwargs):
+ super(CRFConf, self).__init__(**kwargs)
+
+ @DocInherit
+ def default(self):
+ self.START_TAG = ""
+ self.STOP_TAG = ""
+
+ @DocInherit
+ def declare(self):
+ self.num_of_inputs = 1
+ self.input_ranks = [3]
+
+ @DocInherit
+ def inference(self):
+ self.output_dim = [1]
+ # add target dict judgement start or end
+ self.target_dict = deepcopy(self.target_dict.cell_id_map)
+ if not self.target_dict.get(self.START_TAG):
+ self.target_dict[self.START_TAG] = len(self.target_dict)
+ if not self.target_dict.get(self.STOP_TAG):
+ self.target_dict[self.STOP_TAG] = len(self.target_dict)
+
+ super(CRFConf, self).inference()
+
+ @DocInherit
+ def verify(self):
+ super(CRFConf, self).verify()
+
+
+class CRF(BaseLayer):
+ """ Conditional Random Field layer
+
+ Args:
+ layer_conf(CRFConf): configuration of CRF layer
+ """
+ def __init__(self, layer_conf):
+ super(CRF, self).__init__(layer_conf)
+ self.target_size = len(self.layer_conf.target_dict)
+
+ init_transitions = torch.zeros(self.target_size, self.target_size)
+ init_transitions[:, self.layer_conf.target_dict[self.layer_conf.START_TAG]] = -10000.0
+ init_transitions[self.layer_conf.target_dict[self.layer_conf.STOP_TAG], :] = -10000.0
+ init_transitions[:, 0] = -10000.0
+ init_transitions[0, :] = -10000.0
+
+ if self.layer_conf.use_gpu:
+ init_transitions = init_transitions.cuda()
+ self.transitions = nn.Parameter(init_transitions)
+
+ def _calculate_forward(self, feats, mask):
+ """
+ input:
+ feats: (batch, seq_len, self.tag_size)
+ masks: (batch, seq_len)
+ """
+ batch_size = feats.size(0)
+ seq_len = feats.size(1)
+ tag_size = feats.size(2)
+
+ mask = mask.transpose(1, 0).contiguous()
+ ins_num = seq_len * batch_size
+ # be careful the view shape, it is .view(ins_num, 1, tag_size) but not .view(ins_num, tag_size, 1)
+ feats = feats.transpose(1, 0).contiguous().view(ins_num, 1, tag_size).expand(ins_num, tag_size, tag_size)
+ # need to consider start
+ scores = feats + self.transitions.view(1, tag_size, tag_size).expand(ins_num, tag_size, tag_size)
+ scores = scores.view(seq_len, batch_size, tag_size, tag_size)
+ # build iter
+ seq_iter = enumerate(scores)
+ _, inivalues = next(seq_iter) # bat_size * from_target_size * to_target_size
+ # only need start from start_tag
+ partition = inivalues[:, self.layer_conf.target_dict[self.layer_conf.START_TAG], :].clone().view(batch_size, tag_size, 1) # bat_size * to_target_size
+
+ for idx, cur_values in seq_iter:
+ # previous to_target is current from_target
+ # partition: previous results log(exp(from_target)), #(batch_size * from_target)
+ # cur_values: bat_size * from_target * to_target
+
+ cur_values = cur_values + partition.contiguous().view(batch_size, tag_size, 1).expand(batch_size, tag_size, tag_size)
+ cur_partition = log_sum_exp(cur_values, tag_size)
+
+ # (bat_size * from_target * to_target) -> (bat_size * to_target)
+ # partition = utils.switch(partition, cur_partition, mask[idx].view(bat_size, 1).expand(bat_size, self.tagset_size)).view(bat_size, -1)
+ mask_idx = mask[idx, :].view(batch_size, 1).expand(batch_size, tag_size)
+
+ # effective updated partition part, only keep the partition value of mask value = 1
+ masked_cur_partition = cur_partition.masked_select(mask_idx)
+ # let mask_idx broadcastable, to disable warning
+ mask_idx = mask_idx.contiguous().view(batch_size, tag_size, 1)
+
+ # replace the partition where the maskvalue=1, other partition value keeps the same
+ partition.masked_scatter_(mask_idx, masked_cur_partition)
+ # until the last state, add transition score for all partition (and do log_sum_exp) then select the value in STOP_TAG
+ cur_values = self.transitions.view(1, tag_size, tag_size).expand(batch_size, tag_size, tag_size) + partition.contiguous().view(batch_size, tag_size, 1).expand(batch_size, tag_size, tag_size)
+ cur_partition = log_sum_exp(cur_values, tag_size)
+ final_partition = cur_partition[:, self.layer_conf.target_dict[self.layer_conf.STOP_TAG]]
+ return final_partition.sum(), scores
+
+ def _viterbi_decode(self, feats, mask):
+ """
+ input:
+ feats: (batch, seq_len, self.tag_size)
+ mask: (batch, seq_len)
+ output:
+ decode_idx: (batch, seq_len) decoded sequence
+ path_score: (batch, 1) corresponding score for each sequence
+ """
+ batch_size = feats.size(0)
+ seq_len = feats.size(1)
+ tag_size = feats.size(2)
+
+ # calculate sentence length for each sentence
+ length_mask = torch.sum(mask.long(), dim=1).view(batch_size, 1).long()
+ # mask to (seq_len, batch_size)
+ mask = mask.transpose(1, 0).contiguous()
+ ins_num = seq_len * batch_size
+ # be careful the view shape, it is .view(ins_num, 1, tag_size) but not .view(ins_num, tag_size, 1)
+ feats = feats.transpose(1, 0).contiguous().view(ins_num, 1, tag_size).expand(ins_num, tag_size, tag_size)
+ # need to consider start
+ scores = feats + self.transitions.view(1, tag_size, tag_size).expand(ins_num, tag_size, tag_size)
+ scores = scores.view(seq_len, batch_size, tag_size, tag_size)
+
+ # build iter
+ seq_iter = enumerate(scores)
+ # record the position of best score
+ back_points = list()
+ partition_history = list()
+ # reverse mask (bug for mask = 1- mask, use this as alternative choice)
+ mask = (1 - mask.long()).byte()
+ _, inivalues = next(seq_iter) # bat_size * from_target_size * to_target_size
+ # only need start from start_tag
+ partition = inivalues[:, self.layer_conf.target_dict[self.layer_conf.START_TAG], :].clone().view(batch_size, tag_size) # bat_size * to_target_size
+ # print "init part:",partition.size()
+ partition_history.append(partition)
+ # iter over last scores
+ for idx, cur_values in seq_iter:
+ # previous to_target is current from_target
+ # partition: previous results log(exp(from_target)), #(batch_size * from_target)
+ # cur_values: batch_size * from_target * to_target
+ cur_values = cur_values + partition.contiguous().view(batch_size, tag_size, 1).expand(batch_size, tag_size, tag_size)
+ partition, cur_bp = torch.max(cur_values, 1)
+ partition_history.append(partition)
+ # cur_bp: (batch_size, tag_size) max source score position in current tag
+ # set padded label as 0, which will be filtered in post processing
+ cur_bp.masked_fill_(mask[idx].view(batch_size, 1).expand(batch_size, tag_size), 0)
+ back_points.append(cur_bp)
+ # add score to final STOP_TAG
+ partition_history = torch.cat(partition_history, 0).view(seq_len, batch_size, -1).transpose(1, 0).contiguous() # (batch_size, seq_len. tag_size)
+ # get the last position for each setences, and select the last partitions using gather()
+ last_position = length_mask.view(batch_size, 1, 1).expand(batch_size, 1, tag_size) - 1
+ last_partition = torch.gather(partition_history, 1, last_position).view(batch_size,tag_size,1)
+ # calculate the score from last partition to end state (and then select the STOP_TAG from it)
+ last_values = last_partition.expand(batch_size, tag_size, tag_size) + self.transitions.view(1, tag_size, tag_size).expand(batch_size, tag_size, tag_size)
+ _, last_bp = torch.max(last_values, 1)
+ pad_zero = autograd.Variable(torch.zeros(batch_size, tag_size)).long()
+ if self.layer_conf.use_gpu:
+ pad_zero = pad_zero.cuda()
+ back_points.append(pad_zero)
+ back_points = torch.cat(back_points).view(seq_len, batch_size, tag_size)
+
+ # select end ids in STOP_TAG
+ pointer = last_bp[:, self.layer_conf.target_dict[self.layer_conf.STOP_TAG]]
+ insert_last = pointer.contiguous().view(batch_size, 1, 1).expand(batch_size, 1, tag_size)
+ back_points = back_points.transpose(1, 0).contiguous()
+ # move the end ids(expand to tag_size) to the corresponding position of back_points to replace the 0 values
+ back_points.scatter_(1, last_position, insert_last)
+ back_points = back_points.transpose(1, 0).contiguous()
+ # decode from the end, padded position ids are 0, which will be filtered if following evaluation
+ decode_idx = autograd.Variable(torch.LongTensor(seq_len, batch_size))
+ if self.layer_conf.use_gpu:
+ decode_idx = decode_idx.cuda()
+ decode_idx[-1] = pointer.detach()
+ for idx in range(len(back_points)-2, -1, -1):
+ pointer = torch.gather(back_points[idx], 1, pointer.contiguous().view(batch_size, 1))
+ decode_idx[idx] = pointer.detach().view(batch_size)
+ path_score = None
+ decode_idx = decode_idx.transpose(1, 0)
+ return path_score, decode_idx
+
+ def forward(self, string, string_len):
+ """
+ CRF layer process: include use transition matrix compute score and viterbi decode
+
+ Args:
+ string(Tensor): [batch_size, seq_len, target_num]
+ string_len(Tensor): [batch_size]
+
+ Returns:
+ score: the score by CRF inference
+ best_path: the best bath of viterbi decode
+ """
+ assert string_len is not None, "CRF layer need string length for mask."
+ masks = []
+ string_len_val = string_len.cpu().data.numpy()
+ for i in range(len(string_len)):
+ masks.append(
+ torch.cat([torch.ones(string_len_val[i]), torch.zeros(string.shape[1] - string_len_val[i])]))
+ masks = torch.stack(masks).view(string.shape[0], string.shape[1]).byte()
+ if self.layer_conf.use_gpu:
+ masks = masks.cuda()
+
+ forward_score, scores = self._calculate_forward(string, masks)
+
+ _, tag_seq = self._viterbi_decode(string, masks)
+
+ return (forward_score, scores, masks, tag_seq, self.transitions, self.layer_conf), string_len
diff --git a/block_zoo/Conv.py b/block_zoo/Conv.py
index 8291ac7..d97c45f 100644
--- a/block_zoo/Conv.py
+++ b/block_zoo/Conv.py
@@ -35,6 +35,9 @@ class ConvConf(BaseConf):
self.output_channel_num = 16
self.batch_norm = True
self.activation = 'ReLU'
+ self.padding_type = 'VALID'
+ self.dropout = 0
+ self.remind_lengths = True
@DocInherit
def declare(self):
@@ -43,9 +46,16 @@ class ConvConf(BaseConf):
@DocInherit
def inference(self):
+
+ if self.padding_type == 'SAME':
+ self.padding = int((self.window_size-1)/2)
+
self.output_dim = [-1]
if self.input_dims[0][1] != -1:
- self.output_dim.append((self.input_dims[0][1] - self.window_size) // self.stride + 1)
+ if self.padding_type == 'SAME':
+ self.output_dim.append(self.input_dims[0][1])
+ else:
+ self.output_dim.append((self.input_dims[0][1] - self.window_size) // self.stride + 1)
else:
self.output_dim.append(-1)
self.output_dim.append(self.output_channel_num)
@@ -67,6 +77,13 @@ class ConvConf(BaseConf):
for attr in necessary_attrs_for_user:
self.add_attr_exist_assertion_for_user(attr)
+ @DocInherit
+ def verify_former_block(self, former_conf):
+ if 'conv' in str(type(former_conf)).lower():
+ self.mask = False
+ else:
+ self.mask = True
+
class Conv(BaseLayer):
""" Convolution along just 1 direction
@@ -82,16 +99,29 @@ class Conv(BaseLayer):
else:
self.activation = None
- self.filters = nn.ParameterList([nn.Parameter(torch.randn(layer_conf.output_channel_num,
- layer_conf.input_channel_num, layer_conf.window_size, layer_conf.input_dims[0][-1],
- requires_grad=True).float())])
+ self.conv = nn.Conv1d(layer_conf.input_dims[0][-1], layer_conf.output_channel_num, kernel_size=layer_conf.window_size, padding=layer_conf.padding)
if layer_conf.batch_norm:
- self.batch_norm = nn.BatchNorm2d(layer_conf.output_channel_num) # the output_chanel of Conv is the input_channel of BN
+ # self.batch_norm = nn.BatchNorm2d(layer_conf.output_channel_num) # the output_chanel of Conv is the input_channel of BN
+ self.batch_norm = nn.BatchNorm1d(layer_conf.output_channel_num)
else:
self.batch_norm = None
- def forward(self, string, string_len=None):
+ if layer_conf.dropout > 0:
+ self.cov_dropout = nn.Dropout(layer_conf.dropout)
+ else:
+ self.cov_dropout = None
+
+ if layer_conf.use_gpu:
+ self.conv = self.conv.cuda()
+ if self.batch_norm:
+ self.batch_norm = self.batch_norm.cuda()
+ if self.cov_dropout:
+ self.cov_dropout = self.cov_dropout.cuda()
+ if self.activation:
+ self.activation = self.activation.cuda()
+
+ def forward(self, string, string_len):
""" process inputs
Args:
@@ -102,7 +132,7 @@ class Conv(BaseLayer):
Tensor: shape: [batch_size, (seq_len - conv_window_size) // stride + 1, output_channel_num]
"""
- if string_len is not None:
+ if string_len is not None and self.layer_conf.mask:
string_len_val = string_len.cpu().data.numpy()
masks = []
for i in range(len(string_len)):
@@ -113,17 +143,21 @@ class Conv(BaseLayer):
masks = masks.to(device)
string = string * masks
- string = torch.unsqueeze(string, 1) # [batch_size, input_channel_num=1, seq_len, feature_dim]
- string_out = F.conv2d(string, self.filters[0], stride=self.layer_conf.stride, padding=self.layer_conf.padding)
- if hasattr(self, 'batch_norms') and self.batch_norm:
- string_out = self.batch_norm(string_out)
-
- string_out = torch.squeeze(string_out, 3).permute(0, 2, 1)
+ string_ = string.transpose(2, 1).contiguous()
+ string_out = self.conv(string_)
if self.activation:
string_out = self.activation(string_out)
- if string_len is not None:
- string_len_out = (string_len - self.layer_conf.window_size) // self.layer_conf.stride + 1
- else:
- string_len_out = None
+
+ if self.cov_dropout:
+ string_out = self.cov_dropout(string_out)
+
+ if self.batch_norm:
+ string_out = self.batch_norm(string_out)
+
+ string_out = string_out.transpose(2, 1).contiguous()
+
+ string_len_out = None
+ if string_len is not None and self.layer_conf.remind_lengths:
+ string_len_out = string_len
return string_out, string_len_out
diff --git a/block_zoo/Embedding.py b/block_zoo/Embedding.py
index 29186f2..d0004b2 100644
--- a/block_zoo/Embedding.py
+++ b/block_zoo/Embedding.py
@@ -67,7 +67,10 @@ class EmbeddingConf(BaseConf):
for emb_type in self.conf:
if emb_type == 'position':
continue
- self.output_dim[2] += self.conf[emb_type]['dim']
+ if isinstance(self.conf[emb_type]['dim'], list):
+ self.output_dim[2] += sum(self.conf[emb_type]['dim'])
+ else:
+ self.output_dim[2] += self.conf[emb_type]['dim']
super(EmbeddingConf, self).inference()
@@ -114,6 +117,7 @@ class Embedding(BaseLayer):
self.layer_conf = layer_conf
self.embeddings = nn.ModuleDict() if layer_conf.weight_on_gpu else dict()
+ self.char_embeddings = nn.ModuleDict()
for input_cluster in layer_conf.conf:
if 'type' in layer_conf.conf[input_cluster]:
# char embedding
@@ -123,7 +127,7 @@ class Embedding(BaseLayer):
char_emb_conf = eval(layer_conf.conf[input_cluster]['type'] + "Conf")(** char_emb_conf_dict)
char_emb_conf.inference()
char_emb_conf.verify()
- self.embeddings[input_cluster] = eval(layer_conf.conf[input_cluster]['type'])(char_emb_conf)
+ self.char_embeddings[input_cluster] = eval(layer_conf.conf[input_cluster]['type'])(char_emb_conf)
else:
# word embedding, postag embedding, and so on
self.embeddings[input_cluster] = nn.Embedding(layer_conf.conf[input_cluster]['vocab_size'], layer_conf.conf[input_cluster]['dim'], padding_idx=0)
@@ -135,7 +139,6 @@ class Embedding(BaseLayer):
self.embeddings[input_cluster].weight.requires_grad = False
logging.info("The Embedding[%s][fix_weight] is true, fix the embeddings[%s]'s weight" % (input_cluster, input_cluster))
-
def forward(self, inputs, use_gpu=False):
""" process inputs
@@ -157,14 +160,13 @@ class Embedding(BaseLayer):
if 'extra' in input_cluster:
continue
input = inputs[input_cluster]
- # if 'type' in self.layer_conf.conf[input_cluster]:
- # emb = self.embeddings[input_cluster](input, lengths[input]).float()
- # else:
- # emb = self.embeddings[input_cluster](input).float()
- if self.embeddings[input_cluster].weight.device.type == 'cpu':
- emb = self.embeddings[input_cluster](input.cpu()).float()
+ if input_cluster == 'char':
+ emb = self.char_embeddings[input_cluster](input).float()
else:
- emb = self.embeddings[input_cluster](input).float()
+ if list(self.embeddings[input_cluster].parameters())[0].device.type == 'cpu':
+ emb = self.embeddings[input_cluster](input.cpu()).float()
+ else:
+ emb = self.embeddings[input_cluster](input).float()
if use_gpu is True:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
emb = emb.to(device)
diff --git a/block_zoo/Linear.py b/block_zoo/Linear.py
index ecdfa5b..6867bd1 100644
--- a/block_zoo/Linear.py
+++ b/block_zoo/Linear.py
@@ -32,6 +32,7 @@ class LinearConf(BaseConf):
self.activation = 'PReLU'
self.last_hidden_activation = True
self.last_hidden_softmax = False
+ self.keep_dim = True # for exmaple if the output shape is [?, len, 1]. you want to squeeze it, set keep_dim=False, the the output shape is [?, len]
@DocInherit
def declare(self):
@@ -42,10 +43,16 @@ class LinearConf(BaseConf):
def inference(self):
if isinstance(self.hidden_dim, int):
self.output_dim = copy.deepcopy(self.input_dims[0])
- self.output_dim[-1] = self.hidden_dim
+ if not self.keep_dim and self.hidden_dim == 1:
+ self.output_dim.pop()
+ else:
+ self.output_dim[-1] = self.hidden_dim
elif isinstance(self.hidden_dim, list):
self.output_dim = copy.deepcopy(self.input_dims[0])
- self.output_dim[-1] = self.hidden_dim[-1]
+ if not self.keep_dim and self.hidden_dim[-1] == 1:
+ self.output_dim.pop()
+ else:
+ self.output_dim[-1] = self.hidden_dim[-1]
super(LinearConf, self).inference() # PUT THIS LINE AT THE END OF inference()
@@ -87,6 +94,7 @@ class Linear(BaseLayer):
def __init__(self, layer_conf):
super(Linear, self).__init__(layer_conf)
+ self.layer_conf = layer_conf
if layer_conf.input_ranks[0] == 3 and layer_conf.batch_norm is True:
layer_conf.batch_norm = False
@@ -139,6 +147,8 @@ class Linear(BaseLayer):
masks = masks.to(device)
string = string * masks
string_out = self.linear(string.float())
+ if not self.layer_conf.keep_dim:
+ string_out = torch.squeeze(string_out, -1)
return string_out, string_len
diff --git a/block_zoo/Pooling1D.py b/block_zoo/Pooling1D.py
new file mode 100644
index 0000000..236e440
--- /dev/null
+++ b/block_zoo/Pooling1D.py
@@ -0,0 +1,104 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT license.
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+import numpy as np
+
+from block_zoo.BaseLayer import BaseLayer, BaseConf
+from utils.DocInherit import DocInherit
+
+
+class Pooling1DConf(BaseConf):
+ """
+
+ Args:
+ pool_type (str): 'max' or 'mean', default is 'max'.
+ stride (int): which axis to conduct pooling, default is 1.
+ padding (int): implicit zero paddings on both sides of the input. Can be a single number or a tuple (padH, padW). Default: 0
+ window_size (int): the size of the pooling
+
+ """
+
+ def __init__(self, **kwargs):
+ super(Pooling1DConf, self).__init__(**kwargs)
+
+ @DocInherit
+ def default(self):
+ self.pool_type = 'max' # Supported: ['max', mean']
+ self.stride = 1
+ self.padding = 0
+ self.window_size = 3
+
+ @DocInherit
+ def declare(self):
+ self.num_of_inputs = 1
+ self.input_ranks = [3]
+
+
+ @DocInherit
+ def inference(self):
+
+ self.output_dim = [self.input_dims[0][0]]
+ if self.input_dims[0][1] != -1:
+ self.output_dim.append(
+ (self.input_dims[0][1] + 2 * self.padding - self.window_size) // self.stride + 1)
+ else:
+ self.output_dim.append(-1)
+
+ self.output_dim.append(self.input_dims[0][-1])
+ # DON'T MODIFY THIS
+ self.output_rank = len(self.output_dim)
+
+ @DocInherit
+ def verify(self):
+ super(Pooling1DConf, self).verify()
+
+ necessary_attrs_for_user = ['pool_type']
+ for attr in necessary_attrs_for_user:
+ self.add_attr_exist_assertion_for_user(attr)
+
+ self.add_attr_value_assertion('pool_type', ['max', 'mean'])
+
+ assert self.output_dim[
+ -1] != -1, "The shape of input is %s , and the input channel number of pooling should not be -1." % (
+ str(self.input_dims[0]))
+
+
+class Pooling1D(BaseLayer):
+ """ Pooling layer
+
+ Args:
+ layer_conf (PoolingConf): configuration of a layer
+ """
+
+ def __init__(self, layer_conf):
+ super(Pooling1D, self).__init__(layer_conf)
+ self.pool = None
+ if layer_conf.pool_type == "max":
+ self.pool = nn.MaxPool1d(kernel_size=layer_conf.window_size, stride=layer_conf.stride,
+ padding=layer_conf.padding)
+ elif layer_conf.pool_type == "mean":
+ self.pool = nn.AvgPool1d(kernel_size=layer_conf.window_size, stride=layer_conf.stride,
+ padding=layer_conf.padding)
+
+ def forward(self, string, string_len=None):
+ """ process inputs
+
+ Args:
+ string (Tensor): tensor with shape: [batch_size, length, feature_dim]
+ string_len (Tensor): [batch_size], default is None.
+
+ Returns:
+ Tensor: Pooling result of string
+
+ """
+
+ string = string.permute([0, 2, 1]).contiguous()
+ string = self.pool(string)
+ string = string.permute([0, 2, 1]).contiguous()
+ return string, string_len
+
+
diff --git a/block_zoo/Pooling2D.py b/block_zoo/Pooling2D.py
index 5c94a8b..76cfb7a 100644
--- a/block_zoo/Pooling2D.py
+++ b/block_zoo/Pooling2D.py
@@ -19,7 +19,6 @@ class Pooling2DConf(BaseConf):
stride (int): which axis to conduct pooling, default is 1.
padding (int): implicit zero paddings on both sides of the input. Can be a single number or a tuple (padH, padW). Default: 0
window_size (int): the size of the pooling
- activation (string): activation functions, e.g. ReLU
"""
def __init__(self, **kwargs):
@@ -30,7 +29,7 @@ class Pooling2DConf(BaseConf):
self.pool_type = 'max' # Supported: ['max', mean']
self.stride = 1
self.padding = 0
- self.window_size = 3
+ # self.window_size = [self.input_dims[0][1], self.input_dims[0][2]]
@DocInherit
def declare(self):
@@ -39,7 +38,7 @@ class Pooling2DConf(BaseConf):
def check_size(self, value, attr):
res = value
- if isinstance(value,int):
+ if isinstance(value, int):
res = [value, value]
elif (isinstance(self.window_size, tuple) or isinstance(self.window_size, list)) and len(value)==2:
res = list(value)
@@ -49,6 +48,9 @@ class Pooling2DConf(BaseConf):
@DocInherit
def inference(self):
+
+ if not hasattr(self, "window_size"):
+ self.window_size = [self.input_dims[0][1], self.input_dims[0][2]]
self.window_size = self.check_size(self.window_size, "window_size")
self.stride = self.check_size(self.stride, "stride")
diff --git a/block_zoo/__init__.py b/block_zoo/__init__.py
index ec7f073..7351a69 100644
--- a/block_zoo/__init__.py
+++ b/block_zoo/__init__.py
@@ -16,9 +16,13 @@ from .ConvPooling import ConvPooling, ConvPoolingConf
from .Dropout import Dropout, DropoutConf
from .Conv2D import Conv2D, Conv2DConf
+from .Pooling1D import Pooling1D, Pooling1DConf
from .Pooling2D import Pooling2D, Pooling2DConf
from .embedding import CNNCharEmbedding, CNNCharEmbeddingConf
+from .embedding import LSTMCharEmbedding, LSTMCharEmbeddingConf
+
+from .CRF import CRFConf, CRF
from .attentions import FullAttention, FullAttentionConf
from .attentions import Seq2SeqAttention, Seq2SeqAttentionConf
@@ -49,4 +53,5 @@ from .EncoderDecoder import EncoderDecoder, EncoderDecoderConf
from .normalizations import LayerNorm, LayerNormConf
-from .HighwayLinear import HighwayLinear, HighwayLinearConf
\ No newline at end of file
+from .HighwayLinear import HighwayLinear, HighwayLinearConf
+
diff --git a/block_zoo/attentions/Interaction.py b/block_zoo/attentions/Interaction.py
index e607cc1..6976c02 100644
--- a/block_zoo/attentions/Interaction.py
+++ b/block_zoo/attentions/Interaction.py
@@ -58,7 +58,7 @@ class InteractionConf(BaseConf):
def verify(self):
super(InteractionConf, self).verify()
assert hasattr(self, 'matching_type'), "Please define matching_type attribute of BiGRUConf in default() or the configuration file"
- assert self.matching_type in ['general', 'dot', 'mul', 'plus', 'minus', 'add'], "Invalid `matching_type`{self.matching_type} received. Must be in `mul`, `general`, `plus`, `minus`, `dot` and `concat`."
+ assert self.matching_type in ['general', 'dot', 'mul', 'plus', 'minus', 'add', 'concat'], "Invalid `matching_type`{self.matching_type} received. Must be in `mul`, `general`, `plus`, `minus`, `dot` and `concat`."
class Interaction(BaseLayer):
@@ -120,7 +120,7 @@ class Interaction(BaseLayer):
return x - y
elif self.matching_type == 'concat':
def func(x, y):
- return torch.concat([x, y], axis=-1)
+ return torch.cat([x, y], dim=-1)
else:
raise ValueError(f"Invalid matching type."
f"{self.matching_type} received."
diff --git a/block_zoo/embedding/CNNCharEmbedding.py b/block_zoo/embedding/CNNCharEmbedding.py
index 7f51d40..b4b8dc1 100644
--- a/block_zoo/embedding/CNNCharEmbedding.py
+++ b/block_zoo/embedding/CNNCharEmbedding.py
@@ -28,11 +28,11 @@ class CNNCharEmbeddingConf(BaseConf):
@DocInherit
def default(self):
- self.dim = 30 # cnn's output channel dim
+ self.dim = [30] # cnn's output channel dim
self.embedding_matrix_dim = 30 #
- self.stride = 1
+ self.stride = [1]
self.padding = 0
- self.window_size = 3
+ self.window_size = [3]
self.activation = 'ReLU'
@DocInherit
@@ -41,8 +41,14 @@ class CNNCharEmbeddingConf(BaseConf):
self.num_of_inputs = 1
self.input_ranks = [3]
+ def change_to_list(self, attribute):
+ for single in attribute:
+ if not isinstance(getattr(self, single), list):
+ setattr(self, single, [getattr(self, single)])
+
@DocInherit
def inference(self):
+ self.change_to_list(['dim', 'stride', 'window_size'])
self.output_channel_num = self.dim
self.output_rank = 3
@@ -65,20 +71,24 @@ class CNNCharEmbedding(BaseLayer):
super(CNNCharEmbedding, self).__init__(layer_conf)
self.layer_conf = layer_conf
+ assert len(layer_conf.dim) == len(layer_conf.window_size) == len(layer_conf.stride), "The attribute dim/window_size/stride must have the same length."
+
self.char_embeddings = nn.Embedding(layer_conf.vocab_size, layer_conf.embedding_matrix_dim, padding_idx=self.layer_conf.padding)
nn.init.uniform_(self.char_embeddings.weight, -0.001, 0.001)
- self.filters = Variable(torch.randn(layer_conf.output_channel_num, layer_conf.input_channel_num,
- layer_conf.window_size, layer_conf.embedding_matrix_dim).float(),
- requires_grad=True)
+ self.char_cnn = nn.ModuleList()
+ for i in range(len(layer_conf.output_channel_num)):
+ self.char_cnn.append(nn.Conv2d(1, layer_conf.output_channel_num[i], (layer_conf.window_size[i], layer_conf.embedding_matrix_dim),
+ stride=self.layer_conf.stride[i], padding=self.layer_conf.padding))
if layer_conf.activation:
self.activation = eval("nn." + self.layer_conf.activation)()
else:
self.activation = None
- if self.is_cuda():
- self.filters = self.filters.cuda()
- if self.activation:
- self.activation.weight = torch.nn.Parameter(self.activation.weight.cuda())
+ # if self.is_cuda():
+ # self.char_embeddings = self.char_embeddings.cuda()
+ # self.char_cnn = self.char_cnn.cuda()
+ # if self.activation and hasattr(self.activation, 'weight'):
+ # self.activation.weight = torch.nn.Parameter(self.activation.weight.cuda())
def forward(self, string):
"""
@@ -97,24 +107,29 @@ class CNNCharEmbedding(BaseLayer):
"""
string_reshaped = string.view(string.size()[0], -1) #[batch_size, seq_len * char num in words]
+
char_embs_lookup = self.char_embeddings(string_reshaped).float() # [batch_size, seq_len * char num in words, embedding_dim]
- if self.is_cuda():
- if self.filters.device == torch.device('cpu'):
- self.filters = self.filters.cuda()
- char_embs_lookup = char_embs_lookup.cuda(device=self.filters.device)
char_embs_lookup = char_embs_lookup.view(-1, string.size()[2], self.layer_conf.embedding_matrix_dim) #[batch_size * seq_len, char num in words, embedding_dim]
string_input = torch.unsqueeze(char_embs_lookup, 1) # [batch_size * seq_len, input_channel_num=1, char num in words, embedding_dim]
- string_conv = F.conv2d(string_input, self.filters, stride=self.layer_conf.stride, padding=self.layer_conf.padding) # [batch_size * seq_len, output_channel_num, char num in word related, 1]
- string_conv = torch.squeeze(string_conv, 3).permute(0, 2, 1) # [batch_size * seq_len, char num in word related, output_channel_num]
- if self.activation:
- string_conv = self.activation(string_conv)
+ outputs = []
+ for index, single_cnn in enumerate(self.char_cnn):
+ string_conv = single_cnn(string_input).squeeze(3)
+ if self.activation:
+ string_conv = self.activation(string_conv)
- string_maxpooling = torch.max(string_conv, 1)[0]
- string_out = string_maxpooling.view(string.size()[0], string.size()[1], -1)
+ string_maxpooling = F.max_pool1d(string_conv, string_conv.size(2)).squeeze()
+ string_out = string_maxpooling.view(string.size()[0], -1, self.layer_conf.output_channel_num[index])
- return string_out.cpu()
+ outputs.append(string_out)
+
+ if len(outputs) > 1:
+ string_output = torch.cat(outputs, 2)
+ else:
+ string_output = outputs[0]
+
+ return string_output
if __name__ == '__main__':
diff --git a/block_zoo/embedding/LSTMCharEmbedding.py b/block_zoo/embedding/LSTMCharEmbedding.py
new file mode 100644
index 0000000..1fc18a8
--- /dev/null
+++ b/block_zoo/embedding/LSTMCharEmbedding.py
@@ -0,0 +1,137 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT license.
+
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.autograd import Variable
+
+import numpy as np
+from block_zoo.BaseLayer import BaseLayer, BaseConf
+from utils.DocInherit import DocInherit
+
+
+class LSTMCharEmbeddingConf(BaseConf):
+ """ Configuration of LSTMCharEmbedding
+
+ Args:
+ dim (int, optional): the dimension of character embedding after lstm. Default: 50
+ embedding_matrix_dim(int, optional): the dimension of character initialized embedding. Default: 30
+ padding(int, optional): Zero-padding added to both sides of the input. Default: 0
+ dropout(float, optional): dropout rate. Default: 0.2
+ bidirect_flag(Bool, optional): Using BiLSTM or not. Default: True
+ """
+ def __init__(self, **kwargs):
+ super(LSTMCharEmbeddingConf, self).__init__(**kwargs)
+
+ @DocInherit
+ def default(self):
+
+ self.dim = 50 # lstm's output channel dim
+ self.embedding_matrix_dim = 30
+ self.padding = 0
+ self.dropout = 0.2
+ self.bidirect_flag = True
+
+ @DocInherit
+ def declare(self):
+ #self.input_channel_num = 1
+ self.num_of_inputs = 1
+ self.input_ranks = [3]
+
+ @DocInherit
+ def inference(self):
+ #self.output_channel_num = self.hidden_dim
+ self.output_rank = 3
+
+ @DocInherit
+ def verify(self):
+ # super(LSTMCharEmbeddingConf, self).verify()
+
+ necessary_attrs_for_user = ['embedding_matrix_dim', 'dim', 'dropout', 'bidirect_flag', 'vocab_size']
+ for attr in necessary_attrs_for_user:
+ self.add_attr_exist_assertion_for_user(attr)
+
+
+class LSTMCharEmbedding(BaseLayer):
+ """
+ This layer implements the character embedding use LSTM
+ Args:
+ layer_conf (LSTMCharEmbeddingConf): configuration of LSTMCharEmbedding
+ """
+ def __init__(self, layer_conf):
+ super(LSTMCharEmbedding, self).__init__(layer_conf)
+ self.layer_conf = layer_conf
+
+ self.char_embeddings = nn.Embedding(layer_conf.vocab_size, layer_conf.embedding_matrix_dim, padding_idx=self.layer_conf.padding)
+ nn.init.uniform_(self.char_embeddings.weight, -0.001, 0.001)
+
+ if layer_conf.bidirect_flag:
+ self.dim = layer_conf.dim // 2
+ self.dropout = nn.Dropout(layer_conf.dropout)
+ self.char_lstm = nn.LSTM(layer_conf.embedding_matrix_dim, self.dim, num_layers=1, batch_first=True, bidirectional=layer_conf.bidirect_flag)
+
+ if self.is_cuda():
+ self.char_embeddings = self.char_embeddings.cuda()
+ self.dropout = self.dropout.cuda()
+ self.char_lstm = self.char_lstm.cuda()
+
+ def forward(self, string):
+ """
+ Step1: [batch_size, seq_len, char num in words] -> [batch_size*seq_len, char num in words]
+ Step2: lookup embedding matrix -> [batch_size*seq_len, char num in words, embedding_dim]
+ Step3: after lstm operation, got [num_layer* num_directions, batch_size * seq_len, dim]
+ Step5: reshape -> [batch_size, seq_len, dim]
+
+ Args:
+ string (Variable): [[char ids of word1], [char ids of word2], [...], ...], shape: [batch_size, seq_len, char num in words]
+
+ Returns:
+ Variable: [batch_size, seq_len, output_dim]
+
+ """
+ #print ('string shape: ', string.size())
+ string_reshaped = string.view(string.size()[0]*string.size()[1], -1) #[batch_size, seq_len * char num in words]
+
+ char_embs_lookup = self.char_embeddings(string_reshaped).float() # [batch_size, seq_len * char num in words, embedding_dim]
+ char_embs_drop = self.dropout(char_embs_lookup)
+ char_hidden = None
+ char_rnn_out, char_hidden = self.char_lstm(char_embs_drop, char_hidden)
+ #print('char_hidden shape: ', char_hidden[0].size())
+ string_out = char_hidden[0].transpose(1,0).contiguous().view(string.size()[0], string.size()[1], -1)
+ #print('string_out shape: ', string_out.size())
+ return string_out
+
+
+if __name__ == '__main__':
+ conf = {
+ 'embedding_matrix_dim': 30,
+ 'dim': 30, # lstm's output channel dim
+ 'padding': 0,
+ 'dropout': 0.2,
+ 'bidirect_flag': True,
+
+ # should be infered from the corpus
+ 'vocab_size': 10,
+ 'input_dims': [5],
+ 'input_ranks': [3],
+ 'use_gpu': True
+ }
+ layer_conf = LSTMCharEmbeddingConf(**conf)
+
+ # make a fake input: [bs, seq_len, char num in words]
+ # assume in this batch, the padded sentence length is 3 and the each word has 5 chars, including padding 0.
+ input_chars = np.array([
+ [[3, 1, 2, 5, 4], [1, 2, 3, 4, 0], [0, 0, 0, 0, 0]],
+ [[1, 1, 0, 0, 0], [2, 3, 1, 0, 0], [1, 2, 3, 4, 5]]
+ ])
+
+ char_emb_layer = LSTMCharEmbedding(layer_conf)
+
+ input_chars = torch.LongTensor(input_chars)
+ output = char_emb_layer(input_chars)
+
+ print(output)
+
+
diff --git a/block_zoo/embedding/__init__.py b/block_zoo/embedding/__init__.py
index adb4c39..f22055c 100644
--- a/block_zoo/embedding/__init__.py
+++ b/block_zoo/embedding/__init__.py
@@ -1,3 +1,4 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT license.
-from .CNNCharEmbedding import CNNCharEmbeddingConf, CNNCharEmbedding
\ No newline at end of file
+from .CNNCharEmbedding import CNNCharEmbeddingConf, CNNCharEmbedding
+from .LSTMCharEmbedding import LSTMCharEmbeddingConf, LSTMCharEmbedding
\ No newline at end of file
diff --git a/block_zoo/op/CalculateDistance.py b/block_zoo/op/CalculateDistance.py
new file mode 100644
index 0000000..823d0f9
--- /dev/null
+++ b/block_zoo/op/CalculateDistance.py
@@ -0,0 +1,97 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT license.
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import logging
+
+from ..BaseLayer import BaseConf, BaseLayer
+from utils.DocInherit import DocInherit
+from utils.exceptions import ConfigurationError
+import copy
+
+
+class CalculateDistanceConf(BaseConf):
+ """ Configuration of CalculateDistance Layer
+
+ Args:
+ operations (list): a subset of ["cos", "euclidean", "manhattan", "chebyshev"].
+ """
+
+ # init the args
+ def __init__(self, **kwargs):
+ super(CalculateDistanceConf, self).__init__(**kwargs)
+
+ # set default params
+ @DocInherit
+ def default(self):
+ self.operations = ["cos", "euclidean", "manhattan", "chebyshev"]
+
+ @DocInherit
+ def declare(self):
+ self.num_of_inputs = 2
+ self.input_ranks = [2]
+
+ @DocInherit
+ def inference(self):
+ self.output_dim = copy.deepcopy(self.input_dims[0])
+ self.output_dim[-1] = 1
+
+ super(CalculateDistanceConf, self).inference()
+
+ @DocInherit
+ def verify(self):
+ super(CalculateDistanceConf, self).verify()
+
+ assert len(self.input_dims) == 2, "Operation requires that there should be two inputs"
+
+ # to check if the ranks of all the inputs are equal
+ rank_equal_flag = True
+ for i in range(len(self.input_ranks)):
+ if self.input_ranks[i] != self.input_ranks[0] or self.input_ranks[i] != 2:
+ rank_equal_flag = False
+ break
+ if rank_equal_flag == False:
+ raise ConfigurationError("For layer CalculateDistance, the ranks of each inputs should be equal and 2!")
+
+
+class CalculateDistance(BaseLayer):
+ """ CalculateDistance layer to calculate the distance of sequences(2D representation)
+
+ Args:
+ layer_conf (CalculateDistanceConf): configuration of a layer
+ """
+
+ def __init__(self, layer_conf):
+ super(CalculateDistance, self).__init__(layer_conf)
+ self.layer_conf = layer_conf
+
+
+ def forward(self, x, x_len, y, y_len):
+ """
+
+ Args:
+ x: [batch_size, dim]
+ x_len: [batch_size]
+ y: [batch_size, dim]
+ y_len: [batch_size]
+ Returns:
+ Tensor: [batch_size, 1], None
+
+ """
+
+ batch_size = x.size()[0]
+ if "cos" in self.layer_conf.operations:
+ result = F.cosine_similarity(x , y)
+ elif "euclidean" in self.layer_conf.operations:
+ result = torch.sqrt(torch.sum((x-y)**2, dim=1))
+ elif "manhattan" in self.layer_conf.operations:
+ result = torch.sum(torch.abs((x - y)), dim=1)
+ elif "chebyshev" in self.layer_conf.operations:
+ result = torch.abs((x - y)).max(dim=1)
+ else:
+ raise ConfigurationError("This operation is not supported!")
+
+ result = result.view(batch_size, 1)
+ return result, None
diff --git a/block_zoo/op/Combination.py b/block_zoo/op/Combination.py
index eeec7f5..329d262 100644
--- a/block_zoo/op/Combination.py
+++ b/block_zoo/op/Combination.py
@@ -47,7 +47,6 @@ class CombinationConf(BaseConf):
self.output_dim[-1] += int(np.mean([input_dim[-1] for input_dim in self.input_dims])) # difference operation requires dimension of all the inputs should be equal
if "dot_multiply" in self.operations:
self.output_dim[-1] += int(np.mean([input_dim[-1] for input_dim in self.input_dims])) # dot_multiply operation requires dimension of all the inputs should be equal
-
super(CombinationConf, self).inference()
@DocInherit
diff --git a/block_zoo/op/Expand_plus.py b/block_zoo/op/Expand_plus.py
new file mode 100644
index 0000000..17ebb47
--- /dev/null
+++ b/block_zoo/op/Expand_plus.py
@@ -0,0 +1,76 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT license.
+
+# Come from http://www.hangli-hl.com/uploads/3/1/6/8/3168008/hu-etal-nips2014.pdf [ARC-II]
+
+import torch
+import torch.nn as nn
+import copy
+
+from block_zoo.BaseLayer import BaseLayer, BaseConf
+from utils.DocInherit import DocInherit
+from utils.exceptions import ConfigurationError
+
+class Expand_plusConf(BaseConf):
+ """Configuration for Expand_plus layer
+
+ """
+ def __init__(self, **kwargs):
+ super(Expand_plusConf, self).__init__(**kwargs)
+
+ @DocInherit
+ def default(self):
+ self.operation = 'Plus'
+
+ @DocInherit
+ def declare(self):
+ self.num_of_inputs = 2
+ self.input_ranks = [3, 3]
+
+ @DocInherit
+ def inference(self):
+ self.output_dim = copy.deepcopy(self.input_dims[0])
+ if self.input_dims[0][1] == -1 or self.input_dims[1][1] == -1:
+ raise ConfigurationError("For Expand_plus layer, the sequence length should be fixed")
+ self.output_dim.insert(2, self.input_dims[1][1]) # y_len
+ super(Expand_plusConf, self).inference() # PUT THIS LINE AT THE END OF inference()
+
+ @DocInherit
+ def verify(self):
+ super(Expand_plusConf, self).verify()
+
+
+class Expand_plus(BaseLayer):
+ """ Expand_plus layer
+ Given sequences X and Y, put X and Y expand_dim, and then add.
+
+ Args:
+ layer_conf (Expand_plusConf): configuration of a layer
+
+ """
+ def __init__(self, layer_conf):
+
+ super(Expand_plus, self).__init__(layer_conf)
+ assert layer_conf.input_dims[0][-1] == layer_conf.input_dims[1][-1]
+
+
+ def forward(self, x, x_len, y, y_len):
+ """
+
+ Args:
+ x: [batch_size, x_max_len, dim].
+ x_len: [batch_size], default is None.
+ y: [batch_size, y_max_len, dim].
+ y_len: [batch_size], default is None.
+
+ Returns:
+ output: batch_size, x_max_len, y_max_len, dim].
+
+ """
+
+ x_new = torch.stack([x]*y.size()[1], 2) # [batch_size, x_max_len, y_max_len, dim]
+ y_new = torch.stack([y]*x.size()[1], 1) # [batch_size, x_max_len, y_max_len, dim]
+
+ return x_new + y_new, None
+
+
diff --git a/block_zoo/op/__init__.py b/block_zoo/op/__init__.py
index 0be67bb..896cef6 100644
--- a/block_zoo/op/__init__.py
+++ b/block_zoo/op/__init__.py
@@ -4,4 +4,6 @@ from .Concat2D import Concat2D, Concat2DConf
from .Concat3D import Concat3D, Concat3DConf
from .Combination import Combination, CombinationConf
from .Match import Match, MatchConf
-from .Flatten import Flatten, FlattenConf
\ No newline at end of file
+from .Flatten import Flatten, FlattenConf
+from .Expand_plus import Expand_plus, Expand_plusConf
+from .CalculateDistance import CalculateDistance, CalculateDistanceConf
\ No newline at end of file
diff --git a/dataset/get_glove.sh b/dataset/get_glove.sh
old mode 100644
new mode 100755
index e9e9ade..f350a25
--- a/dataset/get_glove.sh
+++ b/dataset/get_glove.sh
@@ -1,13 +1,21 @@
preprocess_exec="sed -f tokenizer.sed"
glovepath='http://nlp.stanford.edu/data/glove.840B.300d.zip'
+glovepath_6B='http://nlp.stanford.edu/data/glove.6B.zip'
ZIPTOOL="unzip"
# GloVe
echo $glovepath
-mkdir GloVe
+if [ ! -d "/GloVe/"];then
+ mkdir GloVe
+fi
curl -LO $glovepath
$ZIPTOOL glove.840B.300d.zip -d GloVe/
rm glove.840B.300d.zip
+curl -LO $glovepath_6B
+$ZIPTOOL glove.6B.zip -d GloVe/
+rm glove.6B.zip
+
+
diff --git a/dataset/get_glove_6B.sh b/dataset/get_glove_6B.sh
new file mode 100644
index 0000000..3214c0e
--- /dev/null
+++ b/dataset/get_glove_6B.sh
@@ -0,0 +1,15 @@
+preprocess_exec="sed -f tokenizer.sed"
+
+glovepath='http://nlp.stanford.edu/data/glove.6B.zip'
+
+ZIPTOOL="unzip"
+
+# GloVe
+echo $glovepath
+if [ ! -d "/GloVe/"];then
+ mkdir GloVe
+fi
+curl -LO $glovepath
+$ZIPTOOL glove.6B.zip -d GloVe/
+rm glove.6B.zip
+
diff --git a/dataset/slot_tagging/conll_2003/eng.testa.tsv b/dataset/slot_tagging/conll_2003/eng.testa.tsv
new file mode 100644
index 0000000..60a27e6
--- /dev/null
+++ b/dataset/slot_tagging/conll_2003/eng.testa.tsv
@@ -0,0 +1,500 @@
+-DOCSTART- O
+CRICKET - LEICESTERSHIRE TAKE OVER AT TOP AFTER INNINGS VICTORY . O O S-ORG O O O O O O O O
+LONDON 1996-08-30 S-LOC O
+West Indian all-rounder Phil Simmons took four for 38 on Friday as Leicestershire beat Somerset by an innings and 39 runs in two days to take over at the head of the county championship . B-MISC E-MISC O B-PER E-PER O O O O O O O S-ORG O S-ORG O O O O O O O O O O O O O O O O O O O O
+Their stay on top , though , may be short-lived as title rivals Essex , Derbyshire and Surrey all closed in on victory while Kent made up for lost time in their rain-affected match against Nottinghamshire . O O O O O O O O O O O O O S-ORG O S-ORG O S-ORG O O O O O O S-ORG O O O O O O O O O O S-ORG O
+After bowling Somerset out for 83 on the opening morning at Grace Road , Leicestershire extended their first innings by 94 runs before being bowled out for 296 with England discard Andy Caddick taking three for 83 . O O S-ORG O O O O O O O O B-LOC E-LOC O S-ORG O O O O O O O O O O O O O O S-LOC O B-PER E-PER O O O O O
+Trailing by 213 , Somerset got a solid start to their second innings before Simmons stepped in to bundle them out for 174 . O O O O S-ORG O O O O O O O O O S-PER O O O O O O O O O
+Essex , however , look certain to regain their top spot after Nasser Hussain and Peter Such gave them a firm grip on their match against Yorkshire at Headingley . S-ORG O O O O O O O O O O O B-PER E-PER O B-PER E-PER O O O O O O O O O S-ORG O S-LOC O
+Hussain , considered surplus to England 's one-day requirements , struck 158 , his first championship century of the season , as Essex reached 372 and took a first innings lead of 82 . S-PER O O O O S-LOC O O O O O O O O O O O O O O O O S-ORG O O O O O O O O O O O
+By the close Yorkshire had turned that into a 37-run advantage but off-spinner Such had scuttled their hopes , taking four for 24 in 48 balls and leaving them hanging on 119 for five and praying for rain . O O O S-ORG O O O O O O O O O S-PER O O O O O O O O O O O O O O O O O O O O O O O O O
+At the Oval , Surrey captain Chris Lewis , another man dumped by England , continued to silence his critics as he followed his four for 45 on Thursday with 80 not out on Friday in the match against Warwickshire . O O S-LOC O S-ORG O B-PER E-PER O O O O O S-LOC O O O O O O O O O O O O O O O O O O O O O O O O O S-ORG O
+He was well backed by England hopeful Mark Butcher who made 70 as Surrey closed on 429 for seven , a lead of 234 . O O O O O S-LOC O B-PER E-PER O O O O S-ORG O O O O O O O O O O O
+Derbyshire kept up the hunt for their first championship title since 1936 by reducing Worcestershire to 133 for five in their second innings , still 100 runs away from avoiding an innings defeat . S-ORG O O O O O O O O O O O O O S-ORG O O O O O O O O O O O O O O O O O O O
+Australian Tom Moody took six for 82 but Chris Adams , 123 , and Tim O'Gorman , 109 , took Derbyshire to 471 and a first innings lead of 233 . S-MISC B-PER E-PER O O O O O B-PER E-PER O O O O B-PER E-PER O O O O S-ORG O O O O O O O O O O
+After the frustration of seeing the opening day of their match badly affected by the weather , Kent stepped up a gear to dismiss Nottinghamshire for 214 . O O O O O O O O O O O O O O O O O S-ORG O O O O O O S-ORG O O O
+They were held up by a gritty 84 from Paul Johnson but ex-England fast bowler Martin McCague took four for 55 . O O O O O O O O O B-PER E-PER O S-MISC O O B-PER E-PER O O O O O
+By stumps Kent had reached 108 for three . O O S-ORG O O O O O O
+-DOCSTART- O
+CRICKET - ENGLISH COUNTY CHAMPIONSHIP SCORES . O O B-MISC I-MISC E-MISC O O
+LONDON 1996-08-30 S-LOC O
+Result and close of play scores in English county championship matches on Friday : O O O O O O O S-MISC O O O O O O
+Leicester : Leicestershire beat Somerset by an innings and 39 runs . S-LOC O S-ORG O S-ORG O O O O O O O
+Somerset 83 and 174 ( P. Simmons 4-38 ) , Leicestershire 296 . S-ORG O O O O B-PER E-PER O O O S-ORG O O
+Leicestershire 22 points , Somerset 4 . S-ORG O O O S-ORG O O
+Chester-le-Street : Glamorgan 259 and 207 ( A. Dale 69 , H. Morris 69 ; D. Blenkiron 4-43 ) , Durham 114 ( S. Watkin 4-28 ) and 81-3 . S-LOC O S-ORG O O O O B-PER E-PER O O B-PER E-PER O O B-PER E-PER O O O S-ORG O O B-PER E-PER O O O O O
+Tunbridge Wells : Nottinghamshire 214 ( P. Johnson 84 ; M. McCague 4-55 ) , Kent 108-3 . B-LOC E-LOC O S-ORG O O B-PER E-PER O O B-PER E-PER O O O S-ORG O O
+London ( The Oval ) : Warwickshire 195 , Surrey 429-7 ( C. Lewis 80 not out , M. Butcher 70 , G. Kersey 63 , J. Ratcliffe 63 , D. Bicknell 55 ) . S-LOC O B-LOC E-LOC O O S-ORG O O S-ORG O O B-PER E-PER O O O O B-PER E-PER O O B-PER E-PER O O B-PER E-PER O O B-PER E-PER O O O
+Hove : Sussex 363 ( W. Athey 111 , V. Drakes 52 ; I. Austin 4-37 ) , Lancashire 197-8 ( W. Hegg 54 ) S-LOC O S-ORG O O B-PER E-PER O O B-PER E-PER O O B-PER E-PER O O O S-ORG O O B-PER E-PER O O
+Portsmouth : Middlesex 199 and 426 ( J. Pooley 111 , M. Ramprakash 108 , M. Gatting 83 ) , Hampshire 232 and 109-5 . S-LOC O S-ORG O O O O B-PER E-PER O O B-PER E-PER O O B-PER E-PER O O O S-ORG O O O O
+Chesterfield : Worcestershire 238 and 133-5 , Derbyshire 471 ( J. Adams 123 , T.O'Gorman 109 not out , K. Barnett 87 ; T. Moody 6-82 ) S-LOC O S-ORG O O O O S-ORG O O B-PER E-PER O O S-PER O O O O B-PER E-PER O O B-PER E-PER O O
+Bristol : Gloucestershire 183 and 185-6 ( J. Russell 56 not out ) , Northamptonshire 190 ( K. Curran 52 ; A. Smith 5-68 ) . S-LOC O S-ORG O O O O B-PER E-PER O O O O O S-ORG O O B-PER E-PER O O B-PER E-PER O O O
+-DOCSTART- O
+CRICKET - 1997 ASHES INTINERARY . O O O S-MISC O O
+LONDON 1996-08-30 S-LOC O
+Australia will defend the Ashes in S-LOC O O O S-MISC O
+a six-test series against England during a four-month tour O O O O S-LOC O O O O
+starting on May 13 next year , the Test and County Cricket Board O O O O O O O O B-ORG I-ORG I-ORG I-ORG E-ORG
+said on Friday . O O O O
+Australia will also play three one-day internationals and S-LOC O O O O O O O
+four one-day warm-up matches at the start of the tour . O O O O O O O O O O O
+The tourists will play nine first-class matches against O O O O O O O O
+English county sides and another against British Universities , S-MISC O O O O O B-ORG E-ORG O
+as well as one-day matches against the Minor Counties and O O O O O O O B-ORG E-ORG O
+Scotland . S-LOC O
+Tour itinerary : O O O
+May O
+May 13 Arrive in London O O O O S-LOC
+May 14 Practice at Lord 's O O O O B-LOC E-LOC
+May 15 v Duke of Norfolk 's XI ( at Arundel ) O O O B-ORG I-ORG I-ORG I-ORG E-ORG O O S-LOC O
+May 17 v Northampton O O O S-ORG
+May 18 v Worcestershire O O O S-ORG
+May 20 v Durham O O O S-ORG
+May 22 First one-day international ( at Headingley , O O O O O O O S-LOC O
+Leeds ) S-ORG O
+May 24 Second one-day international ( at The Oval , O O O O O O O B-LOC E-LOC O
+London ) S-LOC O
+May 25 Third one-day international ( at Lord 's , London ) O O O O O O O B-LOC E-LOC O S-LOC O
+May 27-29 v Gloucestershire or Sussex or Surrey ( three O O O S-ORG O S-ORG O S-ORG O O
+days ) O O
+May 31 - June 2 v Derbyshire ( three days ) O O O O O O S-ORG O O O O
+June O
+June 5-9 First test match ( at Edgbaston , Birmingham ) O O O O O O O S-LOC O S-LOC O
+June 11-13 v a first class county ( to be confirmed ) O O O O O O O O O O O O
+June 14-16 v Leicestershire ( three days ) O O O S-ORG O O O O
+June 19-23 Second test ( at Lord 's ) O O O O O O B-LOC E-LOC O
+June 25-27 v British Universities ( at Oxford , three days ) O O O B-ORG E-ORG O O S-LOC O O O O
+June 28-30 v Hampshire ( three days ) O O O S-ORG O O O O
+July O
+July 3-7 Third test ( at Old Trafford , Manchester ) O O O O O O B-LOC E-LOC O S-LOC O
+July 9 v Minor Counties XI O O O B-ORG I-ORG E-ORG
+July 12 v Scotland O O O S-LOC
+July 16-18 v Glamorgan ( three days ) O O O S-ORG O O O O
+July 19-21 v Middlesex ( three days ) O O O S-ORG O O O O
+July 24-28 Fourth test ( at Headingley ) O O O O O O S-LOC O
+August O
+August 1-4 v Somerset ( four days ) O O O S-ORG O O O O
+August 7-11 Fifth test ( at Trent Bridge , Nottingham ) O O O O O O B-LOC E-LOC O S-LOC O
+August 16-18 v Kent ( three days ) O O O S-ORG O O O O
+August 21-25 Sixth test ( at The Oval , London ) . O O O O O O B-LOC E-LOC O S-LOC O O
+-DOCSTART- O
+SOCCER - SHEARER NAMED AS ENGLAND CAPTAIN . O O S-PER O O S-LOC O O
+LONDON 1996-08-30 S-LOC O
+The world 's costliest footballer Alan Shearer was named as the new England captain on Friday . O O O O O B-PER E-PER O O O O O S-LOC O O O O
+The 26-year-old , who joined Newcastle for 15 million pounds sterling ( $ 23.4 million ) , takes over from Tony Adams , who led the side during the European championship in June , and former captain David Platt . O O O O O S-ORG O O O O O O O O O O O O O O B-PER E-PER O O O O O O O S-MISC O O O O O O O B-PER E-PER O
+Adams and Platt are both injured and will miss England 's opening World Cup qualifier against Moldova on Sunday . S-PER O S-PER O O O O O O S-LOC O O B-MISC E-MISC O O S-LOC O O O
+Shearer takes the captaincy on a trial basis , but new coach Glenn Hoddle said he saw no reason why the former Blackburn and Southampton skipper should not make the post his own . S-PER O O O O O O O O O O O B-PER E-PER O O O O O O O O S-ORG O S-ORG O O O O O O O O O
+" I 'm sure there wo n't be a problem , I 'm sure Alan is the man for the job , " Hoddle said . O O O O O O O O O O O O O O S-PER O O O O O O O O S-PER O O
+" There were three or four people who could have done it but when I spoke to Alan he was up for it and really wanted it . O O O O O O O O O O O O O O O O O S-PER O O O O O O O O O O
+" In four days it 's very difficult to come to a 100 percent conclusion about something like this ... O O O O O O O O O O O O O O O O O O O O
+but he knows how to conduct himself , his team mates respect him and he knows about the team situation even though he plays up front . " O O O O O O O O O O O O O O O O O O O O O O O O O O O O
+Shearer 's Euro 96 striking partner Teddy Sheringham withdrew from the squad with an injury on Friday . S-PER O B-MISC E-MISC O O B-PER E-PER O O O O O O O O O O
+He will probably be replaced by Shearer 's Newcastle team mate Les Ferdinand . O O O O O O S-PER O S-ORG O O B-PER E-PER O
+-DOCSTART- O
+BASKETBALL - INTERNATIONAL TOURNAMENT RESULT . O O O O O O
+BELGRADE 1996-08-30 S-LOC O
+Result in an international O O O O
+basketball tournament on Friday : O O O O O
+Red Star ( Yugoslavia ) beat Dinamo ( Russia ) 92-90 ( halftime B-ORG E-ORG O S-LOC O O S-ORG O S-LOC O O O O
+47-47 ) O O
+-DOCSTART- O
+SOCCER - ROMANIA BEAT LITHUANIA IN UNDER-21 MATCH . O O S-LOC O S-LOC O O O O
+BUCHAREST 1996-08-30 S-LOC O
+Romania beat Lithuania 2-1 ( halftime 1-1 ) in their European under-21 soccer match on Friday . S-LOC O S-LOC O O O O O O O S-MISC O O O O O O
+Scorers : O O
+Romania - Cosmin Contra ( 31st ) , Mihai Tararache ( 75th ) S-LOC O B-PER E-PER O O O O B-PER E-PER O O O
+Lithuania - Danius Gleveckas ( 13rd ) S-LOC O B-PER E-PER O O O
+Attendance : 200 O O O
+-DOCSTART- O
+SOCCER - ROTOR FANS LOCKED OUT AFTER VOLGOGRAD VIOLENCE . O O S-ORG O O O O S-LOC O O
+MOSCOW 1996-08-30 S-LOC O
+Rotor Volgograd must play their next home game behind closed doors after fans hurled bottles and stones at Dynamo Moscow players during a 1-0 home defeat on Saturday that ended Rotor 's brief spell as league leaders . B-ORG E-ORG O O O O O O O O O O O O O O O O B-ORG E-ORG O O O O O O O O O O S-ORG O O O O O O O
+The head of the Russian league 's disciplinary committee , Anatoly Gorokhovsky , said on Friday that Rotor would play Lada Togliatti to empty stands on September 3 . O O O O S-MISC O O O O O B-PER E-PER O O O O O S-ORG O O B-ORG E-ORG O O O O O O O
+The club , who put Manchester United out of last year 's UEFA Cup , were fined $ 1,000 . O O O O O B-ORG E-ORG O O O O O B-MISC E-MISC O O O O O O
+Despite the defeat , Rotor are well placed with 11 games to play in the championship . O O O O S-ORG O O O O O O O O O O O O
+Lying three points behind Alania and two behind Dynamo Moscow , the Volgograd side have a game in hand over the leaders and two over the Moscow club . O O O O S-ORG O O O B-ORG E-ORG O O S-LOC O O O O O O O O O O O O O S-LOC O O
+-DOCSTART- O
+BOXING - PANAMA 'S ROBERTO DURAN FIGHTS THE SANDS OF TIME . O O S-LOC O B-PER E-PER O O O O O O
+PANAMA CITY 1996-08-30 B-LOC E-LOC O
+Panamanian boxing legend Roberto " Hands of Stone " Duran climbs into the ring on Saturday in another age-defying attempt to sustain his long career . S-MISC O O B-PER I-PER I-PER I-PER I-PER I-PER E-PER O O O O O O O O O O O O O O O O
+Duran , 45 , takes on little-known Mexican Ariel Cruz , 30 , in a super middleweight non-title bout in Panama City . S-PER O O O O O O S-MISC B-PER E-PER O O O O O O O O O O B-LOC E-LOC O
+The fight , Duran 's first on home soil for 10 years , is being billed here as the " Return of the Legend " and Duran still talks as if he was in his prime . O O O S-PER O O O O O O O O O O O O O O O O B-MISC I-MISC I-MISC E-MISC O O S-PER O O O O O O O O O O
+" I want a fifth title . O O O O O O O
+This match is to prepare me . O O O O O O O
+I feel good . O O O O
+I 'm not retiring , " Duran told Reuters . O O O O O O S-PER O S-ORG O
+But those close to the boxer acknowledge that the man who has won championships in four different weight classes -- lightweight , welterweight , junior middleweight and middleweight -- is drawing close to the end of his career . O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O
+" Each time he fights , he 's on the last frontier of his career . O O O O O O O O O O O O O O O O
+If he loses Saturday , it could devalue his position as one of the world 's great boxers , " Panamanian Boxing Association President Ramon Manzanares said . O O O O O O O O O O O O O O O O O O O O S-MISC B-ORG E-ORG O B-PER E-PER O O
+Duran , whose 97-12 record spans three decades , hopes a win in the 10-round bout will earn him a rematch against Puerto Rico 's Hector " Macho " Camacho . S-PER O O O O O O O O O O O O O O O O O O O O O B-LOC E-LOC O B-PER I-PER I-PER I-PER E-PER O
+Camacho took a controversial points decision against the Panamanian in Atlantic City in June in a title fight . S-PER O O O O O O O S-MISC O B-LOC E-LOC O O O O O O O
+-DOCSTART- O
+SQUASH - HONG KONG OPEN QUARTER-FINAL RESULTS . O O B-MISC I-MISC E-MISC O O O
+HONG KONG 1996-08-30 B-LOC E-LOC O
+Quarter-final results in the Hong Kong Open on Friday ( prefix number denotes seeding ) : 1 - Jansher Khan ( Pakistan ) beat Mark Cairns ( England ) 15-10 15-6 15-7 O O O O B-MISC I-MISC E-MISC O O O O O O O O O O O B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O O
+Anthony Hill ( Australia ) beat Dan Jenson ( Australia ) 15-9 15-8 15-17 17-15 B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O O O
+4 - Peter Nicol ( Scotland ) beat 7 - Chris Walker ( England ) 15-8 15-13 13-15 15-9 O O B-PER E-PER O S-LOC O O O O B-PER E-PER O S-LOC O O O O O
+2 - Rodney Eyles ( Australia ) beat Derek Ryan ( Ireland ) 15-6 15-9 11-15 15-10 . O O B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O O O O
+-DOCSTART- O
+SOCCER - RESULTS OF SOUTH KOREAN PRO-SOCCER GAMES . O O O O B-MISC E-MISC O O O
+SEOUL 1996-08-30 S-LOC O
+Results of South Korean pro-soccer O O B-MISC E-MISC O
+games played on Thursday . O O O O O
+Pohang 3 Ulsan 2 ( halftime 1-0 ) S-ORG O S-ORG O O O O O
+Puchon 2 Chonbuk 1 ( halftime 1-1 ) S-ORG O S-ORG O O O O O
+Standings after games played on Thursday ( tabulate under - O O O O O O O O O O
+won , drawn , lost , goals for , goals against , points ) : O O O O O O O O O O O O O O O
+W D L G / F G / A P O O O O O O O O O O
+Puchon 3 1 0 6 1 10 S-ORG O O O O O O
+Chonan 3 0 1 13 10 9 S-ORG O O O O O O
+Pohang 2 1 1 11 10 7 S-ORG O O O O O O
+Suwan 1 3 0 7 3 6 S-ORG O O O O O O
+Ulsan 1 0 2 8 9 3 S-ORG O O O O O O
+Anyang 0 3 1 6 9 3 S-ORG O O O O O O
+Chonnam 0 2 1 4 5 2 S-ORG O O O O O O
+Pusan 0 2 1 3 7 2 S-ORG O O O O O O
+Chonbuk 0 0 3 3 7 0 S-ORG O O O O O O
+-DOCSTART- O
+BASEBALL - RESULTS OF S. KOREAN PROFESSIONAL GAMES . O O O O B-MISC E-MISC O O O
+SEOUL 1996-08-30 S-LOC O
+Results of South Korean O O B-MISC E-MISC
+professional baseball games played on Thursday . O O O O O O O
+LG 2 OB 0 S-ORG O S-ORG O
+Lotte 6 Hyundai 2 S-ORG O S-ORG O
+Hyundai 6 Lotte 5 S-ORG O S-ORG O
+Haitai 2 Samsung 0 S-ORG O S-ORG O
+Samsung 10 Haitai 3 S-ORG O S-ORG O
+Hanwha 6 Ssangbangwool 5 S-ORG O S-ORG O
+Note - Lotte and Hyundai , Haitai and Samsung played two games . O O S-ORG O S-ORG O S-ORG O S-ORG O O O O
+Standings after games played on Thursday ( tabulate under O O O O O O O O O
+won , drawn , lost , winning percentage , games behind first place ) O O O O O O O O O O O O O O
+W D L PCT GB O O O O O
+Haitai 64 2 43 .596 - S-ORG O O O O O
+Ssangbangwool 59 2 49 .545 5 1/2 S-ORG O O O O O O
+Hanwha 58 1 49 .542 6 S-ORG O O O O O
+Hyundai 57 5 49 .536 6 1/2 S-ORG O O O O O O
+Samsung 49 5 56 .468 14 S-ORG O O O O O
+Lotte 46 6 54 .462 14 1/2 S-ORG O O O O O O
+LG 46 5 59 .441 17 S-ORG O O O O O
+OB 42 6 62 .409 20 1/2 S-ORG O O O O O O
+-DOCSTART- O
+TENNIS - FRIDAY 'S RESULTS FROM THE U.S. OPEN . O O O O O O O B-MISC E-MISC O
+NEW YORK 1996-08-30 B-LOC E-LOC O
+Results from the U.S. Open Tennis Championships at the National Tennis Centre on Friday ( prefix number denotes seeding ) : O O O B-MISC I-MISC I-MISC E-MISC O O B-LOC I-LOC E-LOC O O O O O O O O O
+Women 's singles , third round O O O O O O
+Sandrine Testud ( France ) beat Ines Gorrochategui ( Argentina ) 4-6 6-2 6-1 B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O O
+Men 's singles , second round O O O O O O
+4 - Goran Ivanisevic ( Croatia ) beat Scott Draper ( Australia ) 6-7 ( 1-7 ) 6-3 6-4 6-4 O O B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O O O O O O
+Tim Henman ( Britain ) beat Doug Flach ( U.S. ) 6-3 6-4 6-2 B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O O
+Mark Philippoussis ( Australia ) beat Andrei Olhovskiy ( Russia ) 6 - 3 6-4 6-2 B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O O O O
+Sjeng Schalken ( Netherlands ) beat David Rikl ( Czech Republic ) 6 - 2 6-4 6-4 B-PER E-PER O S-LOC O O B-PER E-PER O B-LOC E-LOC O O O O O O
+Guy Forget ( France ) beat 17 - Felix Mantilla ( Spain ) 6-4 7-5 6-3 B-PER E-PER O S-LOC O O O O B-PER E-PER O S-LOC O O O O
+Men 's singles , second round O O O O O O
+Alexander Volkov ( Russia ) beat Mikael Tillstrom ( Sweden ) 1-6 6- 4 6-1 4-6 7-6 ( 10-8 ) B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O O O O O O O O
+Jonas Bjorkman ( Sweden ) beat David Nainkin ( South Africa ) ) 6-4 6-1 6-1 B-PER E-PER O S-LOC O O B-PER E-PER O B-LOC E-LOC O O O O O
+Women 's singles , third round O O O O O O
+8 - Lindsay Davenport ( U.S. ) beat Anne-Gaelle Sidot ( France ) 6-0 6-3 O O B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O
+4 - Conchita Martinez ( Spain ) beat Helena Sukova ( Czech Republic ) 6-4 6-3 O O B-PER E-PER O S-LOC O O B-PER E-PER O B-LOC E-LOC O O O
+Amanda Coetzer ( South Africa ) beat Irina Spirlea ( Romania ) 7-6 ( 7-5 ) 7-5 B-PER E-PER O B-LOC E-LOC O O B-PER E-PER O S-LOC O O O O O O
+Add Men 's singles , second round 16 - Cedric Pioline ( France ) beat Roberto Carretero ( Spain ) 4-6 6 - 2 6-2 6-1 Alex Corretja ( Spain ) beat Filippo Veglio ( Switzerland ) 6-7 ( 4- 7 ) 6-4 6-4 6-0 O O O O O O O O O B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O O O O O B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O O O O O O O
+Add Women 's singles , third round Linda Wild ( U.S. ) beat Barbara Rittner ( Germany ) 6-4 4-6 7-5 Asa Carlsson ( Sweden ) beat 15 - Gabriela Sabatini ( Argentina ) 7-5 3-6 6-2 O O O O O O O B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O O B-PER E-PER O S-LOC O O O O B-PER E-PER O S-LOC O O O O
+Add Men 's singles , second round 1 - Pete Sampras ( U.S. ) beat Jiri Novak ( Czech Republic ) 6-3 1-6 6-3 4-6 6-4 Paul Haarhuis ( Netherlands ) beat Michael Tebbutt ( Australia ) 1- 6 6-2 6-2 6-3 O O O O O O O O O B-PER E-PER O S-LOC O O B-PER E-PER O B-LOC E-LOC O O O O O O B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O O O O
+Add Women 's singles , third round Lisa Raymond ( U.S. ) beat Kimberly Po ( U.S. ) 6-3 6-2 O O O O O O O B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O
+: O
+Add men 's singles , second round O O O O O O O
+Hendrik Dreekmann ( Germany ) beat Thomas Johansson ( Sweden ) B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O
+7-6 ( 7-1 ) 6-2 4-6 6-1 O O O O O O O
+Andrei Medvedev ( Ukraine ) beat Jan Kroslak ( Slovakia ) 6-4 6-3 B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O
+6-2 O
+Petr Korda ( Czech Republic ) bat Bohdan Ulihrach ( Czech B-PER E-PER O B-LOC E-LOC O O B-PER E-PER O S-LOC
+Republic ) 6-0 7-6 ( 7-5 ) 6-2 S-LOC O O O O O O O
+Add women 's singles , third round O O O O O O O
+2 - Monica Seles ( U.S. ) beat Dally Randriantefy ( Madagascar ) O O B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O
+6-0 6-2 O O
+: O
+Add men 's singles , second round 12 - Todd Martin ( U.S. ) beat Andrea Gaudenzi ( Italy ) 6-3 6-2 6-2 Stefan Edberg ( Sweden ) beat Bernd Karbacher ( Germany ) 3-6 6-3 6-3 1-0 retired ( leg injury ) O O O O O O O O O B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O O B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O O O O O O O O
+-DOCSTART- O
+BASEBALL - MAJOR LEAGUE STANDINGS AFTER THURSDAY 'S GAMES . O O B-MISC E-MISC O O O O O O
+NEW YORK 1996-08-30 B-LOC E-LOC O
+Major League Baseball B-MISC I-MISC E-MISC
+standings after games played on Thursday ( tabulate under won , O O O O O O O O O O O
+lost , winning percentage and games behind ) : O O O O O O O O O
+AMERICAN LEAGUE B-MISC E-MISC
+EASTERN DIVISION B-MISC E-MISC
+W L PCT GB O O O O
+NEW YORK 74 59 .556 - B-ORG E-ORG O O O O
+BALTIMORE 70 63 .526 4 S-ORG O O O O
+BOSTON 69 65 .515 5 1/2 S-ORG O O O O O
+TORONTO 63 71 .470 11 1/2 S-ORG O O O O O
+DETROIT 48 86 .358 26 1/2 S-ORG O O O O O
+CENTRAL DIVISION B-MISC E-MISC
+CLEVELAND 80 53 .602 - S-ORG O O O O
+CHICAGO 71 64 .526 10 S-ORG O O O O
+MINNESOTA 67 67 .500 13 1/2 S-ORG O O O O O
+MILWAUKEE 64 71 .474 17 S-ORG O O O O
+KANSAS CITY 61 74 .452 20 B-ORG E-ORG O O O O
+WESTERN DIVISION B-MISC E-MISC
+TEXAS 75 58 .564 - S-ORG O O O O
+SEATTLE 70 63 .526 5 S-ORG O O O O
+OAKLAND 64 72 .471 12 1/2 S-ORG O O O O O
+CALIFORNIA 62 72 .463 13 1/2 S-ORG O O O O O
+FRIDAY , AUGUST 30 SCHEDULE O O O O O
+KANSAS CITY AT DETROIT B-ORG E-ORG O S-LOC
+CHICAGO AT TORONTO S-ORG O S-LOC
+MINNESOTA AT MILWAUKEE S-ORG O S-LOC
+CLEVELAND AT TEXAS S-ORG O S-LOC
+NEW YORK AT CALIFORNIA B-ORG E-ORG O S-LOC
+BOSTON AT OAKLAND S-ORG O S-LOC
+BALTIMORE AT SEATTLE S-ORG O S-LOC
+NATIONAL LEAGUE B-MISC E-MISC
+EASTERN DIVISION B-MISC E-MISC
+W L PCT GB O O O O
+ATLANTA 83 49 .629 - S-ORG O O O O
+MONTREAL 71 61 .538 12 S-ORG O O O O
+FLORIDA 64 70 .478 20 S-ORG O O O O
+NEW YORK 59 75 .440 25 B-ORG E-ORG O O O O
+PHILADELPHIA 54 80 .403 30 S-ORG O O O O
+CENTRAL DIVISION B-MISC E-MISC
+HOUSTON 72 63 .533 - S-ORG O O O O
+ST LOUIS 69 65 .515 2 1/2 B-ORG E-ORG O O O O O
+CINCINNATI 66 67 .496 5 S-ORG O O O O
+CHICAGO 65 66 .496 5 S-ORG O O O O
+PITTSBURGH 56 77 .421 15 S-ORG O O O O
+WESTERN DIVISION B-MISC E-MISC
+SAN DIEGO 75 60 .556 - B-ORG E-ORG O O O O
+LOS ANGELES 72 61 .541 2 B-ORG E-ORG O O O O
+COLORADO 70 65 .519 5 S-ORG O O O O
+SAN FRANCISCO 57 74 .435 16 B-ORG E-ORG O O O O
+FRIDAY , AUGUST 30 SCHEDULE O O O O O
+ATLANTA AT CHICAGO S-ORG O S-LOC
+FLORIDA AT CINCINNATI S-ORG O S-LOC
+SAN DIEGO AT MONTREAL B-ORG E-ORG O S-LOC
+LOS ANGELES AT PHILADELPHIA B-ORG E-ORG O S-LOC
+HOUSTON AT PITTSBURGH S-ORG O S-LOC
+SAN FRANCISCO AT NEW YORK B-ORG E-ORG O B-LOC E-LOC
+COLORADO AT ST LOUIS S-ORG O B-LOC E-LOC
+-DOCSTART- O
+BASEBALL - MAJOR LEAGUE RESULTS THURSDAY . O O B-MISC E-MISC O O O
+NEW YORK 1996-08-30 B-LOC E-LOC O
+Results of Major League O O B-MISC E-MISC
+Baseball games played on Thursday ( home team in CAPS ) : O O O O O O O O O O O O
+American League B-MISC E-MISC
+DETROIT 4 Kansas City 1 S-ORG O B-ORG E-ORG O
+Minnesota 6 MILWAUKEE 1 S-ORG O S-ORG O
+CALIFORNIA 14 New York 3 S-ORG O B-ORG E-ORG O
+SEATTLE 9 Baltimore 6 S-ORG O S-ORG O
+National League B-MISC E-MISC
+San Diego 3 NEW YORK 2 B-ORG E-ORG O B-ORG E-ORG O
+Chicago 4 HOUSTON 3 S-ORG O S-ORG O
+Cincinnati 18 COLORADO 7 S-ORG O S-ORG O
+Atlanta 5 PITTSBURGH 1 S-ORG O S-ORG O
+Los Angeles 2 MONTREAL 1 B-ORG E-ORG O S-ORG O
+Florida 10 ST LOUIS 9 S-ORG O B-ORG E-ORG O
+-DOCSTART- O
+TENNIS - TARANGO , O'BRIEN SPRING TWIN UPSETS UNDER THE LIGHTS . O O S-PER O S-PER O O O O O O O
+Larry Fine B-PER E-PER
+NEW YORK 1996-08-30 B-LOC E-LOC O
+Andre Agassi escaped disaster on Thursday but Wimbledon finalist MaliVai Washington and Marcelo Rios were not so fortunate on a night of upsets at the U.S. Open . B-PER E-PER O O O O O S-MISC O B-PER E-PER O B-PER E-PER O O O O O O O O O O O B-MISC E-MISC O
+The 11th-seeded Washington fell short of reprising his Wimbledon miracle comeback as he lost to red-hot wildcard Alex O'Brien 6-3 6-4 5-7 3-6 6-3 in a two hour 51 minute struggle on the Stadium court . O O S-PER O O O O O S-MISC O O O O O O O O B-PER E-PER O O O O O O O O O O O O O O O O O
+Next door on the grandstand , 10th seed Rios lost to another player with a Wimbledon connection -- bad boy Jeff Tarango . O O O O O O O O S-PER O O O O O O S-MISC O O O O B-PER E-PER O
+The temperamental left-hander defeated the Chilean 6-4 4-6 7-6 6-2 . O O O O O S-MISC O O O O O
+The day programme went smoothly although sixth-seeded former champion Agassi had to wriggle out of a dangerous 3-6 0-4 hole , winning 18 of the last 19 games against India 's Leander Paes . O O O O O O O O O S-PER O O O O O O O O O O O O O O O O O O O S-LOC O B-PER E-PER O
+But the night belonged to the upstarts . O O O O O O O O
+Washington , who climbed back from a 1-5 deficit , two sets down in the third set against Todd Martin in the Wimbledon semifinals , looked poised for another sensational comeback . S-PER O O O O O O O O O O O O O O O O O B-PER E-PER O O S-MISC O O O O O O O O O
+O'Brien , a winner two weeks ago in New Haven for his first pro title , served for the match at 5-4 in the third set before Washington came charging back . S-PER O O O O O O O B-LOC E-LOC O O O O O O O O O O O O O O O O O S-PER O O O O
+" I just kept saying to myself , ' keep giving yourself the best chance to win , keep battling , maybe something will happen , ' " said the 26-year-old O'Brien , ranked 65th . O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O S-PER O O O O
+" I kept my composure and I was proud of myself for that -- usually I would have folded up the tent and gone home . " O O O O O O O O O O O O O O O O O O O O O O O O O O O
+The hard-serving O'Brien , a former U.S. collegiate national champion , fired up 17 aces to ultimately subdue the never-say-die Washington . O O S-PER O O O S-LOC O O O O O O O O O O O O O S-PER O
+The fifth set stayed on serve until the sixth game , when Washington , after saving one break point with a forehand winner down the line , netted a backhand to give O'Brien a 4-2 lead . O O O O O O O O O O O O S-PER O O O O O O O O O O O O O O O O O O O S-PER O O O O
+The Texan blasted in two aces to hold serve at 5-2 and then converted his eighth match point for victory when Washington found the net with another backhand from 40-0 . O S-MISC O O O O O O O O O O O O O O O O O O O S-PER O O O O O O O O O
+" You just kind of keep fighting and you keep trying to make him play a little bit . O O O O O O O O O O O O O O O O O O O
+I think he got a little tight at a couple of moments , " said Washington . " O O O O O O O O O O O O O O O S-PER O O
+But I think he served pretty well when he had to . " O O O O O O O O O O O O O
+Tarango , whose Wimbledon tantrum two years ago brought him a $ 28,000 fine and suspension from this year 's tournament at the All-England Club , argued calls and taunted fans in his lively two hour , 24 minute tango with Rios on the grandstand . S-PER O O S-MISC O O O O O O O O O O O O O O O O O O O B-ORG E-ORG O O O O O O O O O O O O O O O O S-PER O O O O
+A boisterous cheering section backed the distracted Chilean and booed the lanky American , who ate up all the attention . O O O O O O O S-MISC O O O O S-MISC O O O O O O O O
+" I 'm an emotional player , " said the 104th-ranked Tarango . " O O O O O O O O O O O S-PER O O
+I think I played very well tonight , very focused . " O O O O O O O O O O O O
+The match turned on the third-set tiebreaker , which the American won 7-5 much to the dismay of the spectators . O O O O O O O O O O S-MISC O O O O O O O O O O
+" I love the crowd if they boo me every day . O O O O O O O O O O O O
+It fires me up , makes me play my best tennis , " Tarango said . O O O O O O O O O O O O O S-PER O O
+" I played some of my best tennis in college when fraternities were throwing beer on me . O O O O O O O O O O O O O O O O O O
+If tennis was like that every day , I think everybody wold be having a lot more fun . " O O O O O O O O O O O O O O O O O O O O
+Rios did not appreciate Tarango 's antics . S-PER O O O S-PER O O O
+" He 's always complaining too much , " said Rios . " O O O O O O O O O O S-PER O O
+But I think it 's not that . O O O O O O O O
+I think I played really bad . O O O O O O O
+It was tough to play at night . O O O O O O O O
+Balls were going really fast . O O O O O O
+I lost too many points that I never lose . O O O O O O O O O O
+I did n't play my tennis . " O O O O O O O O
+" I do n't see the ball like I see during the day . O O O O O O O O O O O O O O
+I play an American so that 's why I play at night . O O O S-MISC O O O O O O O O O
+I did n't feel good on the court . " O O O O O O O O O O
+At the end of the match , Tarango blew sarcastic kisses to the crowd , then jiggled his body to a Rios rooting section in a jeering salute . O O O O O O O S-PER O O O O O O O O O O O O O S-PER O O O O O O O
+" I support their enthusiasm , " Tarango said about the fans . " O O O O O O O S-PER O O O O O O
+At the same time , they 're cheering blatantly against me . O O O O O O O O O O O O
+After I won I figured I could give them a little razzle-dazzle . " O O O O O O O O O O O O O O
+-DOCSTART- O
+NFL AMERICAN FOOTBALL-RANDALL CUNNINGHAM RETIRES . S-ORG B-MISC E-MISC S-PER O O
+PHILADELPHIA 1996-08-29 S-LOC O
+Randall Cunningham , the National Football League 's all-time leading rusher as a quarterback and one of the most athletic players ever to line up over centre , retired Thursday . B-PER E-PER O O B-ORG I-ORG E-ORG O O O O O O O O O O O O O O O O O O O O O O O O
+Cunningham played his entire 11-year career with the Philadelphia Eagles . S-PER O O O O O O O B-ORG E-ORG O
+A three-time Pro Bowl selection , Cunningham rushed for 4,482 yards on 677 carries . O O B-MISC E-MISC O O S-PER O O O O O O O O
+" I would like to thank the Eagles organisation and the wonderful fans of Philadelphia for supporting me throughout my career , " Cunningham said . O O O O O O O S-ORG O O O O O O S-ORG O O O O O O O O S-PER O O
+" Although it saddens me to leave , I am looking forward to spending more time with my family and pursuing other interests that have been on the back burner for sometime . " O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O
+" Randall was one of the most exciting quarterbacks in NFL history , " said Eagles owner Jeffrey Lurie . " O S-PER O O O O O O O O S-ORG O O O O S-ORG O B-PER E-PER O O
+During his 11 years in Philadelphia , Randall was the cornerstone of the Eagles ' franchise and brought many great moments to fans in Philadelphia as well as across the NFL . " O O O O O S-LOC O S-PER O O O O O S-ORG O O O O O O O O O O S-LOC O O O O O S-ORG O O
+A second-round choice in 1985 , Cunningham completed 1,874-of-3,362 passes ( 55.7 percent ) for 22,877 yards and 150 touchdowns . O O O O O O S-PER O O O O O O O O O O O O O O
+Cunningham has already been signed as a broadcaster . S-PER O O O O O O O O
+-DOCSTART- O
+GOLF - LEADING SCORES AT GREATER MILWAUKEE OPEN . O O O O O B-MISC I-MISC E-MISC O
+MILWAUKEE , Wisconsin 1996-08-29 S-LOC O S-LOC O
+Leading scores in O O O
+the $ 1.2 million Greater Milwaukee Open at the par-71 , O O O O B-MISC I-MISC E-MISC O O O O
+6,739-yard Brown Deer Park Golf Course after the first round O B-LOC I-LOC I-LOC I-LOC E-LOC O O O O
+on Thursday ( players U.S. unless stated ) : O O O O S-LOC O O O O
+62 Nolan Henke O B-PER E-PER
+64 Bob Estes O B-PER E-PER
+65 Billy Andrade , Duffy Waldorf , Jesper Parnevik ( Sweden ) O B-PER E-PER O B-PER E-PER O B-PER E-PER O S-LOC O
+66 Neal Lancaster , Dave Barr ( Canada ) , Mike Sullivan , Willie O B-PER E-PER O B-PER E-PER O S-LOC O O B-PER E-PER O S-PER
+Wood , Loren Roberts , Steve Stricker , Brian Claar , Russ Cochran S-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER
+67 Mark Calcavecchia , Payne Stewart , Billy Mayfair , Ken O B-PER E-PER O B-PER E-PER O B-PER E-PER O S-PER
+Green , Jerry Kelly , Tim Simpson , Olin Browne , Shane Bortsch , S-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O
+Mike Hulbert , Brian Henninger , Tiger Woods , Steve Jurgenson , B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O
+Bryan Gorman B-PER E-PER
+-DOCSTART- O
+GOLF - HENKE TAKES LEAD IN MILWAUKEE , WOODS MAKES PRO DEBUT . O O S-PER O O O S-LOC O S-PER O O O O
+MILWAUKEE , Wisconsin 1996-08-29 S-LOC O S-LOC O
+Nolan Henke fired a nine-under-par 62 to grab a two-shot lead after the opening round of the $ 1.2 million Greater Milwaukee Open Thursday as 20-year-old Tiger Woods shot 67 in his professional debut . B-PER E-PER O O O O O O O O O O O O O O O O O O B-MISC I-MISC E-MISC O O O B-PER E-PER O O O O O O O
+Henke stood two strokes ahead of Bob Estes and three up on Billy Andrade , Duffy Waldorf and Jesper Parnevik . S-PER O O O O O B-PER E-PER O O O O B-PER E-PER O B-PER E-PER O B-PER E-PER O
+Woods , who turned pro Tuesday after winning an unprecedented third successive U.S. Amateur Championship , almost eagled the 18th hole . S-PER O O O O O O O O O O O B-MISC I-MISC E-MISC O O O O O O O
+He settled for a birdie and a four-under opening round that left him five shots off the pace . O O O O O O O O O O O O O O O O O O O
+" Yesterday was the toughest day I 've had for a long time , " Woods said . " O O O O O O O O O O O O O O O S-PER O O O
+Today , I got to play golf . " O O O O O O O O O
+He added : " I thought I got off off to a great start . O O O O O O O O O O O O O O O
+It was a perfect start . O O O O O O
+I 'm in a good position . " O O O O O O O O
+Henke , who called his round a " pleasant surprise , " finished with six birdies on the final eight holes . S-PER O O O O O O O O O O O O O O O O O O O O O
+" We finally got things going in the right direction , " he said . " O O O O O O O O O O O O O O O O
+It was my best round in a very long time . O O O O O O O O O O O
+My short game has improved since I 've had to use it so often . O O O O O O O O O O O O O O O
+That 's always been the worst part of my game . O O O O O O O O O O O
+All in all , playing bad 's been a good experience . " O O O O O O O O O O O O O
+Henke , who came within one shot of the course record set by Andrew Magee during Wednesday 's pro-am , has three career PGA Tour victories , but none since the 1993 BellSouth Classic . S-PER O O O O O O O O O O O O B-PER E-PER O O O O O O O O B-MISC E-MISC O O O O O O O B-MISC E-MISC O
+Estes , whose only win came at the 1994 Texas Open and whose best finish this year was a third-place tie at the Nortel Open in January , eagled the par-five fourth hole and added five birdies to grab sole possession of second place . S-PER O O O O O O O O B-MISC E-MISC O O O O O O O O O O O O B-MISC E-MISC O O O O O O O O O O O O O O O O O O O O
+" No bogeys on the card , " he noted . " O O O O O O O O O O O O
+Sometimes I take more pride in that . " O O O O O O O O O
+Woods was among a group of 13 players at four under , including 1993 champion Billy Mayfair , who tied for second at last week 's World Series of Golf , and former U.S. Open champ Payne Stewart . S-PER O O O O O O O O O O O O O O B-PER E-PER O O O O O O O O O B-MISC I-MISC I-MISC E-MISC O O O B-MISC E-MISC O B-PER E-PER O
+Defending champion Scott Hoch shot a three-under 68 and was six strokes back . O O B-PER E-PER O O O O O O O O O O
+Phil Mickelson , the only four-time winner on the PGA Tour , skipped the tournament after winning the World Series of Golf last week . B-PER E-PER O O O O O O O B-MISC E-MISC O O O O O O O B-MISC I-MISC I-MISC E-MISC O O O
+Mark Brooks , Tom Lehman and Mark O'Meara , who make up the rest of the top four on the money list , also took the week off . B-PER E-PER O B-PER E-PER O B-PER E-PER O O O O O O O O O O O O O O O O O O O O O
+-DOCSTART- O
+SOCCER - SILVA 'S `LOST PASSPORT ' EXCUSE NOT ENOUGH FOR FIFA . O O S-PER O O O O O O O O S-ORG O
+MADRID 1996-08-30 S-LOC O
+Spanish first division team Deportivo Coruna will be without key midfielder Mauro Silva for Saturday 's game with Real Madrid after FIFA , soccer 's world governing body , suspended the Brazilian for one game for missing his national side 's European tour . S-MISC O O O B-ORG E-ORG O O O O O B-PER E-PER O O O O O B-ORG E-ORG O S-ORG O O O O O O O O O S-MISC O O O O O O O O O S-MISC O O
+Silva excused his absence from Brazil 's game against Russia , on Wednesday , and Saturday 's match with the Netherlands by saying he had lost his passport . S-PER O O O O S-LOC O O O S-LOC O O O O O O O O O O S-LOC O O O O O O O O
+But that did not prevent him from collecting the one-match suspension . O O O O O O O O O O O O
+-DOCSTART- O
+ATHLETICS - MITCHELL DEFEATS BAILEY IN FRONT OF FORMER CHAMPIONS . O O S-PER O S-PER O O O O O O
+Adrian Warner B-PER E-PER
+BERLIN 1996-08-30 S-LOC O
+American Dennis Mitchell outclassed Olympic 100 metres champion Donovan Bailey for the third time at a major post-Games meeting in front of the most experienced sprinting crowd in the world on Friday . S-MISC B-PER E-PER O S-MISC O O O B-PER E-PER O O O O O O O S-MISC O O O O O O O O O O O O O O O
+Watched by an array of former Olympic sprint champions at the Berlin grand prix meeting , Mitchell made a brilliant start in the 100 metres and held off Bailey 's strong finish to win in 10.08 seconds despite cool conditions . O O O O O O S-MISC O O O O S-LOC O O O O S-PER O O O O O O O O O O O S-PER O O O O O O O O O O O O
+Bailey , who set a world record of 9.84 on his way to victory in Atlanta , could not catch his American rival and had to settle for third in a tight finish . S-PER O O O O O O O O O O O O O O S-LOC O O O O O S-MISC O O O O O O O O O O O O
+Jamaica 's Michael Green was second with 10.09 with Bailey finishing in 10.13 . S-LOC O B-PER E-PER O O O O O S-PER O O O O
+Last Friday Mitchell , who finished fourth at the Atlanta Games , upstaged a trio of Olympic champions including Bailey to win the 100 in Brussels . O O S-PER O O O O O O B-MISC E-MISC O O O O O S-MISC O O S-PER O O O O O S-LOC O
+Earlier this month he also beat world champion Bailey in Zurich . O O O O O O O O S-PER O S-LOC O
+Berlin , Brussels and Zurich all belong to the most lucrative series in the sport , the Golden Four . S-LOC O S-LOC O S-LOC O O O O O O O O O O O O B-MISC E-MISC O
+Among the crowd on Friday were Olympic 100 metres champions going back to 1948 . O O O O O O S-MISC O O O O O O O O
+They had been invited to the meeting to watch a special relay to mark the 60th anniversary of Jesse Owens 's four gold medals at the 1936 Olympics in the same Berlin stadium . O O O O O O O O O O O O O O O O O O B-PER E-PER O O O O O O O S-MISC O O O S-LOC O O
+" Today the concentration was the most important thing for me , " Mitchell said . O O O O O O O O O O O O O S-PER O O
+Despite the coolish conditions American Olympic champion Gail Devers looked in commanding form in the women 's 100 , clocking 10.89 to defeat Jamaican rival Merlene Ottey , who was second in 10.94 . O O O O B-MISC E-MISC O B-PER E-PER O O O O O O O O O O O O O O S-MISC O B-PER E-PER O O O O O O O
+-DOCSTART- O
+ATHLETICS - BERLIN GRAND PRIX RESULTS . O O B-MISC I-MISC E-MISC O O
+BERLIN 1996-08-30 S-LOC O
+Leading results at the Berlin O O O O S-MISC
+Grand Prix athletics meeting on Friday : B-MISC E-MISC O O O O O
+Women 's 100 metres hurdles O O O O O
+1. Michelle Freeman ( Jamaica ) 12.71 seconds O B-PER E-PER O S-LOC O O O
+2. Ludmila Engquist ( Sweden ) 12.74 O B-PER E-PER O S-LOC O O
+3. Aliuska Lopez ( Cuba ) 12.92 O B-PER E-PER O S-LOC O O
+4. Brigita Bokovec ( Slovenia ) 12.92 O B-PER E-PER O S-LOC O O
+5. Dionne Rose ( Jamaica ) 12.92 O B-PER E-PER O S-LOC O O
+6. Julie Baumann ( Switzerland ) 13.11 O B-PER E-PER O S-LOC O O
+7. Gillian Russell ( Jamaica ) 13.17 O B-PER E-PER O S-LOC O O
+Women 's 1,500 metres O O O O
+1. Svetlana Masterkova ( Russia ) four minutes 6.87 seconds O B-PER E-PER O S-LOC O O O O O
+2. Patricia Djate-Taillard ( France ) 4:08.22 O B-PER E-PER O S-LOC O O
+3. Carla Sacramento ( Portugal ) 4:08.96 O B-PER E-PER O S-LOC O O
+4. Yekaterina Podkopayeva ( Russia ) 4:09.25 O B-PER E-PER O S-LOC O O
+5. Leah Pells ( Canada ) 4:09.95 O B-PER E-PER O S-LOC O O
+6. Carmen Wuestenhagen ( Germany ) 4:10.38 O B-PER E-PER O S-LOC O O
+7. Margarita Maruseva ( Russia ) 4:10.87 O B-PER E-PER O S-LOC O O
+8. Sara Thorsett ( U.S. ) 4:11.06 O B-PER E-PER O S-LOC O O
+Men 's 110 metres hurdles O O O O O
+1. Mark Crear ( U.S. ) 13.26 seconds O B-PER E-PER O S-LOC O O O
+2. Tony Jarrett ( Britain ) 13.35 O B-PER E-PER O S-LOC O O
+3. Florian Schwarthoff ( Germany ) 13.36 O B-PER E-PER O S-LOC O O
+4. Emilio Valle ( Cuba ) 13.52 O B-PER E-PER O S-LOC O O
+5. Falk Balzer ( Germany ) 13.52 O B-PER E-PER O S-LOC O O
+6. Steve Brown ( U.S. ) 13.53 O B-PER E-PER O S-LOC O O
+7. Frank Busemann ( Germany ) 13.58 O B-PER E-PER O S-LOC O O
+8. Jack Pierce ( U.S. ) 13.60 O B-PER E-PER O S-LOC O O
+Men 's 200 metres O O O O
+1. Frankie Fredericks ( Namibia ) 19.97 seconds O B-PER E-PER O S-LOC O O O
+2. Michael Johnson ( U.S. ) 20.02 O B-PER E-PER O S-LOC O O
+3. Ato Boldon ( Trinidad ) 20.37 O B-PER E-PER O S-LOC O O
+4. Geir Moen ( Norway ) 20.41 O B-PER E-PER O S-LOC O O
+5. Patrick Stevens ( Belgium ) 20.54 O B-PER E-PER O S-LOC O O
+6. Jon Drummond ( U.S. ) 20.78 O B-PER E-PER O S-LOC O O
+7. Claus Hirsbro ( Denmark ) 20.90 O B-PER E-PER O S-LOC O O
+8. Ivan Garcia ( Cuba ) 20.96 O B-PER E-PER O S-LOC O O
+Women 's shot put O O O O
+1. Astrid Kumbernuss ( Germany ) 19.89 metres O B-PER E-PER O S-LOC O O O
+2. Claudia Mues ( Germany ) 18.80 O B-PER E-PER O S-LOC O O
+3. Irina Korzhanenko ( Russia ) 18.63 O B-PER E-PER O S-LOC O O
+4. Valentina Fedyushina ( Russia ) 18.55 O B-PER E-PER O S-LOC O O
+5. Stephanie Storp ( Germany ) 18.41 O B-PER E-PER O S-LOC O O
+Men 's mile O O O
+1. Noureddine Morceli ( Algeria ) 3 minutes 49.09 seconds O B-PER E-PER O S-LOC O O O O O
+2. Venuste Niyongabo ( Burundi ) 3:51.01 O B-PER E-PER O S-LOC O O
+3. William Tanui ( Kenya ) 3:51.40 O B-PER E-PER O S-LOC O O
+4. Laban Rotich ( Kenya ) 3:53.42 O B-PER E-PER O S-LOC O O
+5. Marko Koers ( Netherlands ) 3:53.47 O B-PER E-PER O S-LOC O O
+6. Isaac Viciosa ( Spain ) 3:53.85 O B-PER E-PER O S-LOC O O
+7. John Mayock ( Britain ) 3:54.67 O B-PER E-PER O S-LOC O O
+8. Marcus O'Sullivan ( Ireland ) 3:54.87 O B-PER E-PER O S-LOC O O
+Men 's discus O O O
+1. Lars Riedel ( Germany ) 70.60 metres O B-PER E-PER O S-LOC O O O
+2. Anthony Washington ( U.S. ) 68.44 O B-PER E-PER O S-LOC O O
+3. Vasily Kaptyukh ( Belarus ) 66.24 O B-PER E-PER O S-LOC O O
+4. Vladimir Dubrovshchik ( Belarus ) 65.30 O B-PER E-PER O S-LOC O O
+5. Virgilijus Alekna ( Lithuania ) 65.00 O B-PER E-PER O S-LOC O O
+6. Juergen Schult ( Germany ) 64.46 O B-PER E-PER O S-LOC O O
+7. Andreas Seelig ( Germany ) 62.00 O B-PER E-PER O S-LOC O O
+8. Michael Moellenbeck ( Germany ) 58.56 O B-PER E-PER O S-LOC O O
+Women 's 100 metres O O O O
+1. Gail Devers ( U.S. ) 10.89 seconds O B-PER E-PER O S-LOC O O O
+2. Merlene Ottey ( Jamaica ) 10.94 O B-PER E-PER O S-LOC O O
+3. Gwen Torrence ( U.S. ) 11.07 O B-PER E-PER O S-LOC O O
+4. Mary Onyali ( Nigeria ) 11.14 O B-PER E-PER O S-LOC O O
+5. Chryste Gaines ( U.S. ) 11.20 O B-PER E-PER O S-LOC O O
+6. Chandra Sturrup ( Bahamas ) 11.26 O B-PER E-PER O S-LOC O O
+7. Irina Privalova ( Russia ) 11.27 O B-PER E-PER O S-LOC O O
+8. Inger Miller ( U.S. ) 11.37 O B-PER E-PER O S-LOC O O
+Women 's 5,000 metres O O O O
+1. Gabriela Szabo ( Romania ) 15 minutes 04.95 seconds O B-PER E-PER O S-LOC O O O O O
+2. Gete Wami ( Ethiopia ) 15:05.21 O B-PER E-PER O S-LOC O O
+3. Rose Cheruiyot ( Kenya ) 15:05.41 O B-PER E-PER O S-LOC O O
+4. Annemari Sandell ( Finland ) 15:06.33 O B-PER E-PER O S-LOC O O
+5. Tegla Loroupe ( Kenya ) 15:08.79 O B-PER E-PER O S-LOC O O
+6. Gunhild Halle ( Norway ) 15:09.00 O B-PER E-PER O S-LOC O O
+7. Pauline Konga ( Kenya ) 15:09.74 O B-PER E-PER O S-LOC O O
+8. Sally Barsosio ( Kenya ) 15:14.34 O B-PER E-PER O S-LOC O O
+Men 's 400 metres hurdles O O O O O
+1. Torrance Zellner ( U.S. ) 48.23 seconds O B-PER E-PER O S-LOC O O O
+2. Samuel Matete ( Zambia ) 48.34 O B-PER E-PER O S-LOC O O
+3. Derrick Adkins ( U.S. ) 48.62 O B-PER E-PER O S-LOC O O
+4. Fabrizio Mori ( Italy ) 49.21 O B-PER E-PER O S-LOC O O
+5. Sven Nylander ( Sweden ) 49.22 O B-PER E-PER O S-LOC O O
+6. Eric Thomas ( U.S. ) 49.35 O B-PER E-PER O S-LOC O O
diff --git a/dataset/slot_tagging/conll_2003/eng.testb.tsv b/dataset/slot_tagging/conll_2003/eng.testb.tsv
new file mode 100644
index 0000000..bba30ac
--- /dev/null
+++ b/dataset/slot_tagging/conll_2003/eng.testb.tsv
@@ -0,0 +1,500 @@
+-DOCSTART- O
+SOCCER - JAPAN GET LUCKY WIN , CHINA IN SURPRISE DEFEAT . O O S-LOC O O O O S-PER O O O O
+Nadim Ladki B-PER E-PER
+AL-AIN , United Arab Emirates 1996-12-06 S-LOC O B-LOC I-LOC E-LOC O
+Japan began the defence of their Asian Cup title with a lucky 2-1 win against Syria in a Group C championship match on Friday . S-LOC O O O O O B-MISC E-MISC O O O O O O O S-LOC O O O O O O O O O
+But China saw their luck desert them in the second match of the group , crashing to a surprise 2-0 defeat to newcomers Uzbekistan . O S-LOC O O O O O O O O O O O O O O O O O O O O O S-LOC O
+China controlled most of the match and saw several chances missed until the 78th minute when Uzbek striker Igor Shkvyrin took advantage of a misdirected defensive header to lob the ball over the advancing Chinese keeper and into an empty net . S-LOC O O O O O O O O O O O O O O O S-MISC O B-PER E-PER O O O O O O O O O O O O O O S-MISC O O O O O O O
+Oleg Shatskiku made sure of the win in injury time , hitting an unstoppable left foot shot from just outside the area . B-PER E-PER O O O O O O O O O O O O O O O O O O O O O
+The former Soviet republic was playing in an Asian Cup finals tie for the first time . O O S-MISC O O O O O B-MISC E-MISC O O O O O O O
+Despite winning the Asian Games title two years ago , Uzbekistan are in the finals as outsiders . O O O B-MISC E-MISC O O O O O S-LOC O O O O O O O
+Two goals from defensive errors in the last six minutes allowed Japan to come from behind and collect all three points from their opening meeting against Syria . O O O O O O O O O O O S-LOC O O O O O O O O O O O O O O S-LOC O
+Takuya Takagi scored the winner in the 88th minute , rising to head a Hiroshige Yanagimoto cross towards the Syrian goal which goalkeeper Salem Bitar appeared to have covered but then allowed to slip into the net . B-PER E-PER O O O O O O O O O O O O B-PER E-PER O O O S-MISC O O O B-PER E-PER O O O O O O O O O O O O O
+It was the second costly blunder by Syria in four minutes . O O O O O O O S-LOC O O O O
+Defender Hassan Abbas rose to intercept a long ball into the area in the 84th minute but only managed to divert it into the top corner of Bitar 's goal . O B-PER E-PER O O O O O O O O O O O O O O O O O O O O O O O O S-PER O O O
+Nader Jokhadar had given Syria the lead with a well-struck header in the seventh minute . B-PER E-PER O O S-LOC O O O O O O O O O O O
+Japan then laid siege to the Syrian penalty area for most of the game but rarely breached the Syrian defence . S-LOC O O O O O S-MISC O O O O O O O O O O O S-MISC O O
+Bitar pulled off fine saves whenever they did . S-PER O O O O O O O O
+Japan coach Shu Kamo said : ' ' The Syrian own goal proved lucky for us . S-LOC O B-PER E-PER O O O O O S-MISC O O O O O O O
+The Syrians scored early and then played defensively and adopted long balls which made it hard for us . ' O S-MISC O O O O O O O O O O O O O O O O O O
+' O
+Japan , co-hosts of the World Cup in 2002 and ranked 20th in the world by FIFA , are favourites to regain their title here . S-LOC O O O O B-MISC E-MISC O O O O O O O O O S-ORG O O O O O O O O O
+Hosts UAE play Kuwait and South Korea take on Indonesia on Saturday in Group A matches . O S-LOC O S-LOC O B-LOC E-LOC O O S-LOC O O O O O O O
+All four teams are level with one point each from one game . O O O O O O O O O O O O O
+-DOCSTART- O
+RUGBY UNION - CUTTITTA BACK FOR ITALY AFTER A YEAR . B-ORG E-ORG O S-PER O O S-LOC O O O O
+ROME 1996-12-06 S-LOC O
+Italy recalled Marcello Cuttitta S-LOC O B-PER E-PER
+on Friday for their friendly against Scotland at Murrayfield more than a year after the 30-year-old wing announced he was retiring following differences over selection . O O O O O O S-LOC O S-LOC O O O O O O O O O O O O O O O O O
+Cuttitta , who trainer George Coste said was certain to play on Saturday week , was named in a 21-man squad lacking only two of the team beaten 54-21 by England at Twickenham last month . S-PER O O O B-PER E-PER O O O O O O O O O O O O O O O O O O O O O O O O S-LOC O S-LOC O O O
+Stefano Bordon is out through illness and Coste said he had dropped back row Corrado Covi , who had been recalled for the England game after five years out of the national team . B-PER E-PER O O O O O S-PER O O O O O O B-PER E-PER O O O O O O O S-LOC O O O O O O O O O O
+Cuttitta announced his retirement after the 1995 World Cup , where he took issue with being dropped from the Italy side that faced England in the pool stages . S-PER O O O O O B-MISC I-MISC E-MISC O O O O O O O O O O S-LOC O O O S-LOC O O O O O
+Coste said he had approached the player two months ago about a comeback . S-PER O O O O O O O O O O O O O
+" He ended the World Cup on the wrong note , " Coste said . O O O O B-MISC E-MISC O O O O O O S-PER O O
+" I thought it would be useful to have him back and he said he would be available . O O O O O O O O O O O O O O O O O O O
+I think now is the right time for him to return . " O O O O O O O O O O O O O
+Squad : Javier Pertile , Paolo Vaccari , Marcello Cuttitta , Ivan Francescato , Leandro Manteri , Diego Dominguez , Francesco Mazzariol , Alessandro Troncon , Orazio Arancio , Andrea Sgorlon , Massimo Giovanelli , Carlo Checchinato , Walter Cristofoletto , Franco Properzi Curti , Carlo Orlandi , Massimo Cuttitta , Giambatista Croci , Gianluca Guidi , Nicola Mazzucato , Alessandro Moscardi , Andrea Castellani . O O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER I-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O
+-DOCSTART- O
+SOCCER - LATE GOALS GIVE JAPAN WIN OVER SYRIA . O O O O O S-LOC O O S-LOC O
+AL-AIN , United Arab Emirates 1996-12-06 S-LOC O B-LOC I-LOC E-LOC O
+Two goals in the last six minutes gave holders Japan an uninspiring 2-1 Asian Cup victory over Syria on Friday . O O O O O O O O O S-LOC O O O B-MISC E-MISC O O S-LOC O O O
+Takuya Takagi headed the winner in the 88th minute of the group C game after goalkeeper Salem Bitar spoiled a mistake-free display by allowing the ball to slip under his body . B-PER E-PER O O O O O O O O O O O O O O B-PER E-PER O O O O O O O O O O O O O O
+It was the second Syrian defensive blunder in four minutes . O O O O S-MISC O O O O O O
+Defender Hassan Abbas rose to intercept a long ball into the area in the 84th minute but only managed to divert it into the top corner of Bitar 's goal . O B-PER E-PER O O O O O O O O O O O O O O O O O O O O O O O O S-PER O O O
+Syria had taken the lead from their first serious attack in the seventh minute . S-LOC O O O O O O O O O O O O O O
+Nader Jokhadar headed a cross from the right by Ammar Awad into the top right corner of Kenichi Shimokawa 's goal . B-PER E-PER O O O O O O O B-PER E-PER O O O O O O B-PER E-PER O O O
+Japan then laid siege to the Syrian penalty area and had a goal disallowed for offside in the 16th minute . S-LOC O O O O O S-MISC O O O O O O O O O O O O O O
+A minute later , Bitar produced a good double save , first from Kazuyoshi Miura 's header and then blocked a Takagi follow-up shot . O O O O S-PER O O O O O O O O B-PER E-PER O O O O O O S-PER O O O
+Bitar saved well again from Miura in the 37th minute , parrying away his header from a corner . S-PER O O O O S-PER O O O O O O O O O O O O O
+Japan started the second half brightly but Bitar denied them an equaliser when he dived to his right to save Naoki Soma 's low drive in the 53rd minute . S-LOC O O O O O O S-PER O O O O O O O O O O O O B-PER E-PER O O O O O O O O
+Japan : 19 - Kenichi Shimokawa , 2 - Hiroshige Yanagimoto , 3 - Naoki Soma , 4 - Masami Ihara , 5 - Norio Omura , 6 - Motohiro Yamaguchi , 8 - Masakiyo Maezono ( 7 - Yasuto Honda 71 ) , 9 - Takuya Takagi , 10 - Hiroshi Nanami , 11 - Kazuyoshi Miura , 15 - Hiroaki Morishima ( 14 - Masayuki Okano 75 ) . S-LOC O O O B-PER E-PER O O O B-PER E-PER O O O B-PER E-PER O O O B-PER E-PER O O O B-PER E-PER O O O B-PER E-PER O O O B-PER E-PER O O O B-PER E-PER O O O O O B-PER E-PER O O O B-PER E-PER O O O B-PER E-PER O O O B-PER E-PER O O O B-PER E-PER O O O
+Syria : 24 - Salem Bitar , 3 - Bachar Srour ; 4 - Hassan Abbas , 5 - Tarek Jabban , 6 - Ammar Awad ( 9 - Louay Taleb 69 ) , 8 - Nihad al-Boushi , 10 - Mohammed Afash , 12 - Ali Dib , 13 - Abdul Latif Helou ( 17 - Ammar Rihawiy 46 ) , 14 - Khaled Zaher ; 16 - Nader Jokhadar . S-LOC O O O B-PER E-PER O O O B-PER E-PER O O O B-PER E-PER O O O B-PER E-PER O O O B-PER E-PER O O O B-PER E-PER O O O O O B-PER E-PER O O O B-PER E-PER O O O B-PER E-PER O O O B-PER I-PER E-PER O O O B-PER E-PER O O O O O B-PER E-PER O O O B-PER E-PER O
+-DOCSTART- O
+FREESTYLE SKIING-WORLD CUP MOGUL RESULTS . O B-MISC E-MISC O O O
+TIGNES , France 1996-12-06 S-LOC O S-LOC O
+Results of the World Cup O O O B-MISC E-MISC
+freestyle skiing moguls competition on Friday : O O O O O O O
+Men O
+1. Jesper Ronnback ( Sweden ) 25.76 points O B-PER E-PER O S-LOC O O O
+2. Andrei Ivanov ( Russia ) 24.88 O B-PER E-PER O S-LOC O O
+3. Ryan Johnson ( Canada ) 24.57 O B-PER E-PER O S-LOC O O
+4. Jean-Luc Brassard ( Canada ) 24.40 O B-PER E-PER O S-LOC O O
+5. Korneilus Hole ( Norway ) 23.92 O B-PER E-PER O S-LOC O O
+6. Jeremie Collomb-Patton ( France ) 23.87 O B-PER E-PER O S-LOC O O
+7. Jim Moran ( U.S. ) 23.25 O B-PER E-PER O S-LOC O O
+8. Dominick Gauthier ( Canada ) 22.73 O B-PER E-PER O S-LOC O O
+9. Johann Gregoire ( France ) 22.58 O B-PER E-PER O S-LOC O O
+10. Troy Benson ( U.S. ) 22.56 O B-PER E-PER O S-LOC O O
+Women O
+1. Tatjana Mittermayer ( Germany ) 24.32 O B-PER E-PER O S-LOC O O
+2. Candice Gilg ( France ) 24.31 O B-PER E-PER O S-LOC O O
+3. Minna Karhu ( Finland ) 24.05 O B-PER E-PER O S-LOC O O
+4. Tae Satoya ( Japan ) 23.75 O B-PER E-PER O S-LOC O O
+5. Ann Battellle ( U.S. ) 23.56 O B-PER E-PER O S-LOC O O
+6. Donna Weinbrecht ( U.S. ) 22.48 O B-PER E-PER O S-LOC O O
+7. Liz McIntyre ( U.S. ) 22.00 O B-PER E-PER O S-LOC O O
+8. Elena Koroleva ( Russia ) 21.77 O B-PER E-PER O S-LOC O O
+9. Ljudmila Dymchenko ( Russia ) 21.59 O B-PER E-PER O S-LOC O O
+10. Katleen Allais ( France ) 21.58 O B-PER E-PER O S-LOC O O
+-DOCSTART- O
+SOCCER - ASIAN CUP GROUP C RESULTS . O O B-MISC E-MISC O O O O
+AL-AIN , United Arab Emirates 1996-12-06 S-LOC O B-LOC I-LOC E-LOC O
+Results of Asian Cup group C matches played on Friday : O O B-MISC E-MISC O O O O O O O
+Japan 2 Syria 1 ( halftime 0-1 ) S-LOC O S-LOC O O O O O
+Scorers : O O
+Japan - Hassan Abbas 84 own goal , Takuya Takagi 88 . S-LOC O B-PER E-PER O O O O B-PER E-PER O O
+Syria - Nader Jokhadar 7 S-LOC O B-PER E-PER O
+Attendance : 10,000 . O O O O
+China 0 Uzbekistan 2 ( halftime 0-0 ) S-LOC O S-LOC O O O O O
+Scorers : Shkvyrin Igor 78 , Shatskikh Oleg 90 O O B-PER E-PER O O B-PER E-PER O
+Attendence : 3,000 O O O
+Standings ( tabulate under played , won , drawn , lost , goals O O O O O O O O O O O O O
+for , goals against , points ) : O O O O O O O O
+Uzbekistan 1 1 0 0 2 0 3 S-LOC O O O O O O O
+Japan 1 1 0 0 2 1 3 S-LOC O O O O O O O
+Syria 1 0 0 1 1 2 0 S-LOC O O O O O O O
+China 1 0 0 1 0 2 0 S-LOC O O O O O O O
+-DOCSTART- O
+CRICKET - PAKISTAN V NEW ZEALAND ONE-DAY SCOREBOARD . O O S-LOC O B-LOC E-LOC O O O
+[ CORRECTED 14:06 GMT ] O O O S-MISC O
+SIALKOT , Pakistan 1996-12-06 S-LOC O S-LOC O
+Scoreboard in the second O O O O
+one-day cricket international between Pakistan and New Zealand O O O O S-LOC O B-LOC E-LOC
+on Friday : O O O
+Pakistan S-LOC
+Saeed Anwar run out 91 ( corrects from 90 ) B-PER E-PER O O O O O O O O
+Zahoor Elahi b Cairns 86 ( corrects from 87 ) B-PER E-PER O S-PER O O O O O O
+Ijaz Ahmad c Spearman b Vaughan 59 B-PER E-PER O S-PER O S-PER O
+Inzamamul Haq st Germon b Astle 2 B-PER E-PER O S-PER O S-PER O
+Wasim Akram b Harris 4 B-PER E-PER O S-PER O
+Shahid Afridi b Harris 2 B-PER E-PER O S-PER O
+Moin Khan c Astle b Harris 1 B-PER E-PER O S-PER O S-PER O
+Waqar Younis st Germon b Harris 0 B-PER E-PER O S-PER O S-PER O
+Saqlain Mushtaq b Harris 2 B-PER E-PER O S-PER O
+Mushtaq Ahmad not out 5 B-PER E-PER O O O
+Salim Malik not out 1 B-PER E-PER O O O
+Extras ( lb-8 nb-2 w-14 ) 24 O O O O O O O
+Total ( for 9 wickets in 47 overs ) 277 O O O O O O O O O O
+Fall of wicket : 1-177 ( corrects from 1-178 ) 2-225 3-240 4-247 5-252 6-260 7-261 8-269 9-276 O O O O O O O O O O O O O O O O O O
+Bowling : Doull 8-1-60-0 ( w-3 ) , Kennedy 3-0-24-0 ( w-7 nb-1 ) , O O S-PER O O O O O S-PER O O O O O O
+Cairns 8-1-35-1 ( w-2 ) , Vaughan 9-1-55-1 , Harris 10-0-42-5 ( w-1 ) , S-PER O O O O O S-PER O O S-PER O O O O O
+Astle 9-0-53-1 ( w-1 nb-1 ) S-PER O O O O O
+New Zealand innings B-LOC E-LOC O
+B. Young c Moin Khan b Waqar 5 B-PER E-PER O B-PER E-PER O S-PER O
+C. Spearman c Moin Khan b Wasim 0 B-PER E-PER O B-PER E-PER O S-PER O
+A. Parore c Ijaz Ahmad b Saqlain 37 B-PER E-PER O B-PER E-PER O S-PER O
+S. Fleming c and b Afridi 88 B-PER E-PER O O O S-PER O
+C. Cairns b Saqlain 10 B-PER E-PER O S-PER O
+N. Astle c Ijaz Ahmad b Salim Malik 20 B-PER E-PER O B-PER E-PER O B-PER E-PER O
+C. Harris lbw b Wasim 22 B-PER E-PER O O S-PER O
+L. Germon lbw b Afridi 2 B-PER E-PER O O S-PER O
+J. Vaughan c Moin Khan b Wasim 13 B-PER E-PER O B-PER E-PER O S-PER O
+S. Doull c subs ( M. Wasim ) b Waqar 1 B-PER E-PER O O O B-PER E-PER O O S-PER O
+R. Kennedy not out 7 B-PER E-PER O O O
+Extras ( b-9 lb-3 w-12 nb-2 ) 26 O O O O O O O O
+Total ( all out in 42.1 overs ) 231 O O O O O O O O O
+Fall of wickets : 1-3 2-7 3-125 4-146 5-170 6-190 7-195 O O O O O O O O O O O
+8-213 9-216 . O O O
+Bowling : Wasim Akram 8.1-0-43-3 ( 9w , 1nb ) , Waqar Younis O O B-PER E-PER O O O O O O O B-PER E-PER
+6-0-32-2 ( 2w , 1nb ) , Saqlain Mushtaq 8-0-54-2 , Mushtaq Ahmad O O O O O O O B-PER E-PER O O B-PER E-PER
+10-0-42-0 ( 1w ) , Shahid Afridi 7-0-40-2 , Salim Malik 2.5-0-8-1 , O O O O O B-PER E-PER O O B-PER E-PER O O
+Ijaz Ahmad 0.1-0-0-0 . B-PER E-PER O O
+Result : Pakistan won by 46 runs . O O S-LOC O O O O O
+Third one-day match : December 8 , in Karachi . O O O O O O O O S-LOC O
+-DOCSTART- O
+SOCCER - ENGLISH F.A. CUP SECOND ROUND RESULT . O O B-MISC I-MISC E-MISC O O O O
+LONDON 1996-12-06 S-LOC O
+Result of an English F.A. Challenge O O O B-MISC I-MISC E-MISC
+Cup second round match on Friday : S-MISC O O O O O O
+Plymouth 4 Exeter 1 S-ORG O S-ORG O
+-DOCSTART- O
+SOCCER - BLINKER BAN LIFTED . O O S-PER O O O
+LONDON 1996-12-06 S-LOC O
+Dutch forward Reggie Blinker had his indefinite suspension lifted by FIFA on Friday and was set to make his Sheffield Wednesday comeback against Liverpool on Saturday . S-MISC O B-PER E-PER O O O O O O S-ORG O O O O O O O O B-ORG E-ORG O O S-ORG O O O
+Blinker missed his club 's last two games after FIFA slapped a worldwide ban on him for appearing to sign contracts for both Wednesday and Udinese while he was playing for Feyenoord . S-PER O O O O O O O O S-ORG O O O O O O O O O O O O O S-ORG O S-ORG O O O O O S-ORG O
+FIFA 's players ' status committee , meeting in Barcelona , decided that although the Udinese document was basically valid , it could not be legally protected . S-ORG O O O O O O O O S-LOC O O O O O S-ORG O O O O O O O O O O O O
+The committee said the Italian club had violated regulations by failing to inform Feyenoord , with whom the player was under contract . O O O O S-MISC O O O O O O O O S-ORG O O O O O O O O O
+Blinker was fined 75,000 Swiss francs ( $ 57,600 ) for failing to inform the Engllsh club of his previous commitment to Udinese . S-PER O O O S-MISC O O O O O O O O O O S-MISC O O O O O O S-ORG O
+-DOCSTART- O
+SOCCER - LEEDS ' BOWYER FINED FOR PART IN FAST-FOOD FRACAS . O O S-ORG O S-PER O O O O O O O
+LONDON 1996-12-06 S-LOC O
+Leeds ' England under-21 striker Lee Bowyer was fined 4,500 pounds ( $ 7,400 ) on Friday for hurling chairs at restaurant staff during a disturbance at a McDonald 's fast-food restaurant . S-ORG O S-LOC O O B-PER E-PER O O O O O O O O O O O O O O O O O O O O O B-ORG E-ORG O O O
+Bowyer , 19 , who was caught in the act by security cameras , pleaded guilty to a charge of affray at a court in London . S-PER O O O O O O O O O O O O O O O O O O O O O O O O S-LOC O
+He was fined and ordered to pay a total of 175 pounds to two members of staff injured in the fracas in an east London restaurant in October . O O O O O O O O O O O O O O O O O O O O O O O O S-LOC O O O O
+Leeds had already fined Bowyer 4,000 pounds ( $ 6,600 ) and warned him a repeat of his criminal behaviour could cost him his place in the side . S-ORG O O O S-PER O O O O O O O O O O O O O O O O O O O O O O O O
+Bowyer , who moved to the Yorkshire club in August for 3.5 million pounds ( $ 5.8 million ) , was expected to play against Middlesbrough on Saturday . S-PER O O O O O S-LOC O O O O O O O O O O O O O O O O O O S-ORG O O O
+-DOCSTART- O
+BASKETBALL - EUROLEAGUE STANDINGS . O O S-MISC O O
+LONDON 1996-12-06 S-LOC O
+Standings in the men 's EuroLeague O O O O O S-MISC
+basketball championship after Thursday 's matches ( tabulate under O O O O O O O O O
+played , won , lost , points ) : O O O O O O O O O
+Group A O O
+CSKA Moscow ( Russia 9 6 3 15 B-ORG E-ORG O S-LOC O O O O
+Stefanel Milan ( Italy ) 9 6 3 15 B-ORG E-ORG O S-LOC O O O O O
+Maccabi Tel Aviv ( Israel ) 9 5 4 14 B-ORG I-ORG E-ORG O S-LOC O O O O O
+Ulker Spor ( Turkey ) 9 4 5 13 B-ORG E-ORG O S-LOC O O O O O
+Limoges ( France ) 9 3 6 12 S-ORG O S-LOC O O O O O
+Panionios ( Greece ) 9 3 6 12 S-ORG O S-LOC O O O O O
+Group B O O
+Teamsystem Bologna ( Italy ) 9 7 2 16 B-ORG E-ORG O S-LOC O O O O O
+Olympiakos ( Greece ) 9 5 4 14 S-ORG O S-LOC O O O O O
+Cibona Zagreb ( Croatia ) 9 5 4 14 B-ORG E-ORG O S-LOC O O O O O
+Alba Berlin ( Germany ) 9 5 4 14 B-ORG E-ORG O S-LOC O O O O O
+Estudiantes Madrid ( Spain ) 9 5 4 14 B-ORG E-ORG O S-LOC O O O O O
+Charleroi ( Belgium ) 9 0 9 9 S-ORG O S-LOC O O O O O
+Group C O O
+Panathinaikos ( Greece ) 9 7 2 16 S-ORG O S-LOC O O O O O
+Ljubljana ( Slovenia ) 9 6 3 15 S-ORG O S-LOC O O O O O
+Villeurbanne ( France ) 9 6 3 15 S-ORG O S-LOC O O O O O
+Barcelona ( Spain ) 9 4 5 13 S-ORG O S-LOC O O O O O
+Split ( Croatia ) 9 4 5 13 S-ORG O S-LOC O O O O O
+Bayer Leverkusen ( Germany ) 9 0 9 9 B-ORG E-ORG O S-LOC O O O O O
+Group D O O
+Efes Pilsen ( Turkey ) 9 7 2 16 B-ORG E-ORG O S-LOC O O O O O
+Pau-Orthez ( France ) 9 5 4 14 S-ORG O S-LOC O O O O O
+Partizan Belgrade ( Yugoslavia ) 9 5 4 14 B-ORG E-ORG O S-LOC O O O O O
+Kinder Bologna ( Italy ) 9 4 5 13 B-ORG E-ORG O S-LOC O O O O O
+Sevilla ( Spain ) 9 4 5 13 S-ORG O S-LOC O O O O O
+Dynamo Moscow ( Russia ) 9 2 7 11 B-ORG E-ORG O S-LOC O O O O O
+-DOCSTART- O
+RUGBY UNION - LITTLE TO MISS CAMPESE FAREWELL . B-ORG E-ORG O S-PER O O S-PER O O
+Robert Kitson B-PER E-PER
+LONDON 1996-12-06 S-LOC O
+Centre Jason Little will miss Australia 's end-of-tour fixture against the Barbarians at Twickenham on Saturday . O B-PER E-PER O O S-LOC O O O O O S-ORG O S-LOC O O O
+Little has opted not to risk aggravating the knee injury which ruled him out of a large chunk of the tour and is replaced by fellow Queenslander Daniel Herbert . S-PER O O O O O O O O O O O O O O O O O O O O O O O O O S-MISC B-PER E-PER O
+Owen Finegan has recovered from the knocks he took in last weekend 's test against Wales and retains his place in the back-row ahead of Daniel Manu . B-PER E-PER O O O O O O O O O O O O O S-LOC O O O O O O O O O B-PER E-PER O
+The Wallabies have their sights set on a 13th successive victory to end their European tour with a 100 percent record but also want to turn on the style and provide David Campese with a fitting send-off in his final match in Australian colours . O S-ORG O O O O O O O O O O O O S-MISC O O O O O O O O O O O O O O O O B-PER E-PER O O O O O O O O O S-MISC O O
+The Wallabies currently have no plans to make any special presentation to the 34-year-old winger but a full house of 75,000 spectators will still gather in the hope of witnessing one last moment of magic . O S-ORG O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O
+Campese will be up against a familiar foe in the shape of Barbarians captain Rob Andrew , the man who kicked Australia to defeat with a last-ditch drop-goal in the World Cup quarter-final in Cape Town . S-PER O O O O O O O O O O O S-ORG O B-PER E-PER O O O O O S-LOC O O O O O O O O B-MISC E-MISC O O B-LOC E-LOC O
+" Campo has a massive following in this country and has had the public with him ever since he first played here in 1984 , " said Andrew , also likely to be making his final Twickenham appearance . O S-PER O O O O O O O O O O O O O O O O O O O O O O O O O S-PER O O O O O O O O S-LOC O O
+On tour , Australia have won all four tests against Italy , Scotland , Ireland and Wales , and scored 414 points at an average of almost 35 points a game . O O O S-LOC O O O O O O S-LOC O S-LOC O S-LOC O S-LOC O O O O O O O O O O O O O O O
+League duties restricted the Barbarians ' selectorial options but they still boast 13 internationals including England full-back Tim Stimpson and recalled wing Tony Underwood , plus All Black forwards Ian Jones and Norm Hewitt . O O O O S-ORG O O O O O O O O O O S-LOC O B-PER E-PER O O O B-PER E-PER O O B-ORG E-ORG O B-PER E-PER O B-PER E-PER O
+Teams : O O
+Barbarians - 15 - Tim Stimpson ( England ) ; 14 - Nigel Walker ( Wales ) , 13 - Allan Bateman ( Wales ) , 12 - Gregor Townsend ( Scotland ) , 11 - Tony Underwood ( England ) ; 10 - Rob Andrew ( England ) , 9 - Rob Howley ( Wales ) ; 8 - Scott Quinnell ( Wales ) , 7 - Neil Back ( England ) , 6 - Dale McIntosh ( Pontypridd ) , 5 - Ian Jones ( New Zealand ) , 4 - Craig Quinnell ( Wales ) , 3 - Darren Garforth ( Leicester ) , 2 - Norm Hewitt ( New Zealand ) , 1 - Nick Popplewell ( Ireland ) . S-ORG O O O B-PER E-PER O S-LOC O O O O B-PER E-PER O S-LOC O O O O B-PER E-PER O S-LOC O O O O B-PER E-PER O S-LOC O O O O B-PER E-PER O S-LOC O O O O B-PER E-PER O S-LOC O O O O B-PER E-PER O S-LOC O O O O B-PER E-PER O S-LOC O O O O B-PER E-PER O S-LOC O O O O B-PER E-PER O S-LOC O O O O B-PER E-PER O B-LOC E-LOC O O O O B-PER E-PER O S-LOC O O O O B-PER E-PER O S-LOC O O O O B-PER E-PER O B-LOC E-LOC O O O O B-PER E-PER O S-LOC O O
+Australia - 15 - Matthew Burke ; 14 - Joe Roff , 13 - Daniel Herbert , 12 - Tim Horan ( captain ) , 11 - David Campese ; 10 - Pat Howard , 9 - Sam Payne ; 8 - Michael Brial , 7 - David Wilson , 6 - Owen Finegan , 5 - David Giffin , 4 - Tim Gavin , 3 - Andrew Blades , 2 - Marco Caputo , 1 - Dan Crowley . S-LOC O O O B-PER E-PER O O O B-PER E-PER O O O B-PER E-PER O O O B-PER E-PER O O O O O O B-PER E-PER O O O B-PER E-PER O O O B-PER E-PER O O O B-PER E-PER O O O B-PER E-PER O O O B-PER E-PER O O O B-PER E-PER O O O B-PER E-PER O O O B-PER E-PER O O O B-PER E-PER O O O B-PER E-PER O
+-DOCSTART- O
+GOLF - ZIMBABWE OPEN SECOND ROUND SCORES . O O B-MISC E-MISC O O O O
+HARARE 1996-12-06 S-LOC O
+Leading second round scores in the Zimbabwe Open at the par-72 Chapman Golf Club on Friday ( South African unless stated ) : 132 Des Terblanche 65 67 133 Mark McNulty ( Zimbabwe ) 72 61 134 Steve van Vuuren 65 69 136 Nick Price ( Zimbabwe ) 68 68 , Justin Hobday 71 65 , O O O O O O B-MISC E-MISC O O O B-LOC I-LOC E-LOC O O O B-MISC E-MISC O O O O O B-PER E-PER O O O B-PER E-PER O S-LOC O O O O B-PER I-PER E-PER O O O B-PER E-PER O S-LOC O O O O B-PER E-PER O O O
+Andrew Pitts ( U.S. ) 69 67 138 Mark Cayeux ( Zimbabwe ) 69 69 , Mark Murless 71 67 139 Hennie Swart 75 64 , Andrew Park 72 67 140 Schalk van der Merwe ( Namibia ) 67 73 , Desvonde B-PER E-PER O S-LOC O O O O B-PER E-PER O S-LOC O O O O B-PER E-PER O O O B-PER E-PER O O O B-PER E-PER O O O B-PER I-PER I-PER E-PER O S-LOC O O O O S-PER
+Botes 72 68 , Greg Reid 72 68 , Clinton Whitelaw 70 S-PER O O O B-PER E-PER O O O B-PER E-PER O
+70 , Brett Liddle 75 65 , Hugh Baiocchi 73 67 141 Adilson da Silva ( Brazil ) 72 69 , Sammy Daniels 73 O O B-PER E-PER O O O B-PER E-PER O O O B-PER I-PER E-PER O S-LOC O O O O B-PER E-PER O
+68 , Trevor Dodds ( Namibia ) 72 69 142 Don Robertson ( U.S. ) 73 69 , Dion Fourie 69 73 , O O B-PER E-PER O S-LOC O O O O B-PER E-PER O S-LOC O O O O B-PER E-PER O O O
+Steve Waltman 72 70 , Ian Dougan 73 69 B-PER E-PER O O O B-PER E-PER O O
+-DOCSTART- O
+SOCCER - UNCAPPED PLAYERS CALLED TO FACE MACEDONIA . O O O O O O O S-LOC O
+BUCHAREST 1996-12-06 S-LOC O
+Romania trainer Anghel Iordanescu called up three uncapped players on Friday in his squad to face Macedonia next week in a World Cup qualifier . S-LOC O B-PER E-PER O O O O O O O O O O O O S-LOC O O O O B-MISC E-MISC O O
+Midfielder Valentin Stefan and striker Viorel Ion of Otelul Galati and defender Liviu Ciobotariu of National Bucharest are the newcomers for the European group eight clash in Macedonia on December 14 . O B-PER E-PER O O B-PER E-PER O B-ORG E-ORG O O B-PER E-PER O B-ORG E-ORG O O O O O S-MISC O O O O S-LOC O O O O
+Iordanescu said he had picked them because of their good performances in the domestic championship in which National Bucharest are top and Otelul Galati third . " S-PER O O O O O O O O O O O O O O O O B-ORG E-ORG O O O B-ORG E-ORG O O O
+I think it 's fair to give them a chance , " he told reporters . O O O O O O O O O O O O O O O O
+League title-holders Steaua Bucharest , who finished bottom of their Champions ' League group in the European Cup , have only two players in the squad . O O B-ORG E-ORG O O O O O O B-MISC I-MISC E-MISC O O O B-MISC E-MISC O O O O O O O O O
+Attacking midfielder Adrian Ilie , who recently moved from Steaua to Turkish club Galatasaray , is ruled out after two yellow-card offences . O O B-PER E-PER O O O O O S-ORG O S-MISC O S-ORG O O O O O O O O O
+Squad : O O
+Goalkeepers - Bogdan Stelea , Florin Prunea . O O B-PER E-PER O B-PER E-PER O
+Defenders - Dan Petrescu , Daniel Prodan , Anton Dobos , Cornel Papura , Liviu Ciobotariu , Tibor Selymess , Iulian Filipescu . O O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O
+Midfielders - Gheorghe Hagi , Gheorghe Popescu , Constantin Galca , Valentin Stefan , Basarab Panduru , Dorinel Munteanu , Ovidiu Stinga . O O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O
+Forwards - Ioan Vladoiu , Gheorghe Craioveanu , Ionel Danciulescu , Viorel Ion . O O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O
+REUTER S-ORG
+-DOCSTART- O
+SOCCER - BRAZILIAN CHAMPIONSHIP RESULTS . O O S-MISC O O O
+RIO DE JANEIRO 1996-12-05 B-LOC I-LOC E-LOC O
+Results of Brazilian O O S-MISC
+soccer championship semifinal , first leg matches on Thursday . O O O O O O O O O O
+Goias 1 Gremio 3 S-ORG O S-ORG O
+Portuguesa 1 Atletico Mineiro 0 S-ORG O B-ORG E-ORG O
+-DOCSTART- O
+CRICKET - LARA ENDURES ANOTHER MISERABLE DAY . O O S-PER O O O O O
+Robert Galvin B-PER E-PER
+MELBOURNE 1996-12-06 S-LOC O
+Australia gave Brian Lara another reason to be miserable when they beat West Indies by five wickets in the opening World Series limited overs match on Friday . S-LOC O B-PER E-PER O O O O O O O O B-LOC E-LOC O O O O O O B-MISC E-MISC O O O O O O
+Lara , disciplined for misconduct on Wednesday , was dismissed for five to extend a disappointing run of form on tour . S-PER O O O O O O O O O O O O O O O O O O O O O
+Australia , who hold a 2-0 lead in the five-match test series , overhauled West Indies ' total of 172 all out with eight balls to spare to end a run of six successive one-day defeats . S-LOC O O O O O O O O O O O O O B-LOC E-LOC O O O O O O O O O O O O O O O O O O O O O
+All-rounder Greg Blewett steered his side to a comfortable victory with an unbeaten 57 in 90 balls to the delight of the 42,442 crowd . O B-PER E-PER O O O O O O O O O O O O O O O O O O O O O O
+Man-of-the match Blewett came to the wicket with the total on 70 for two and hit three fours during an untroubled innings lasting 129 minutes . O O S-PER O O O O O O O O O O O O O O O O O O O O O O O
+His crucial fifth-wicket partnership with fellow all-rounder Stuart Law , who scored 21 , added 71 off 85 balls . O O O O O O O B-PER E-PER O O O O O O O O O O O
+Lara looked out of touch during his brief stay at the crease before chipping a simple catch to Shane Warne at mid-wicket . S-PER O O O O O O O O O O O O O O O O O B-PER E-PER O O O
+West Indies tour manager Clive Lloyd has apologised for Lara 's behaviour on Tuesday . B-LOC E-LOC O O B-PER E-PER O O O S-PER O O O O O
+He ( Lara ) had told Australia coach Geoff Marsh that wicketkeeper Ian Healy was unwelcome in the visitors ' dressing room . O O S-PER O O O S-LOC O B-PER E-PER O O B-PER E-PER O O O O O O O O O
+The Melbourne crowd were clearly angered by the incident , loudly jeering the West Indies vice-captain as he walked to the middle . O S-LOC O O O O O O O O O O O B-LOC E-LOC O O O O O O O O
+It was left to fellow left-hander Shivnarine Chanderpaul to hold the innings together with a gritty 54 despite the handicap of an injured groin . O O O O O O B-PER E-PER O O O O O O O O O O O O O O O O O
+Chanderpaul was forced to rely on a runner for most of his innings after hurting himself as he scurried back to his crease to avoid being run out . S-PER O O O O O O O O O O O O O O O O O O O O O O O O O O O O
+Pakistan , who arrive in Australia later this month , are the other team competing in the World Series tournament . S-LOC O O O O S-LOC O O O O O O O O O O O B-MISC E-MISC O O
+-DOCSTART- O
+CRICKET - AUSTRALIA V WEST INDIES WORLD SERIES SCOREBOARD . O O S-LOC O B-LOC E-LOC B-MISC E-MISC O O
+MELBOURNE 1996-12-06 S-LOC O
+Scoreboard in the World Series O O O B-MISC E-MISC
+limited overs match between Australia and West Indies on Friday : O O O O S-LOC O B-LOC E-LOC O O O
+West Indies B-LOC E-LOC
+S. Campbell c Healy b Gillespie 31 B-PER E-PER O S-PER O S-PER O
+R. Samuels c M. Waugh b Gillespie 7 B-PER E-PER O B-PER E-PER O S-PER O
+B. Lara c Warne b Moody 5 B-PER E-PER O S-PER O S-PER O
+S. Chanderpaul c Healy b Blewett 54 B-PER E-PER O S-PER O S-PER O
+C. Hooper run out 7 B-PER E-PER O O O
+J. Adams lbw b Moody 5 B-PER E-PER O O S-PER O
+J. Murray c Blewett b Warne 24 B-PER E-PER O S-PER O S-PER O
+N. McLean c and b M. Waugh 7 B-PER E-PER O O O B-PER E-PER O
+K. Benjamin b Warne 8 B-PER E-PER O S-PER O
+C. Ambrose run out 2 B-PER E-PER O O O
+C. Walsh not out 8 B-PER E-PER O O O
+Extras ( lb-10 w-1 nb-3 ) 14 O O O O O O O
+Total ( 49.2 overs ) 172 O O O O O O
+Fall of wickets : 1-11 2-38 3-64 4-73 5-81 6-120 7-135 8-150 O O O O O O O O O O O O
+9-153 . O O
+Bowling : Reiffel 10-2-26-0 ( nb-3 ) , Gillespie 10-0-39-2 , O O S-PER O O O O O S-PER O O
+Moody 10-1-25-2 , Blewett 6.2-0-27-1 , Warne 10-0-34-2 ( w-1 ) , S-PER O O S-PER O O S-PER O O O O O
+M. Waugh 3-0-11-1 . B-PER E-PER O O
+Australia S-LOC
+M. Taylor b McLean 29 B-PER E-PER O S-PER O
+M. Waugh c Murray b Benjamin 27 B-PER E-PER O S-PER O S-PER O
+R. Ponting lbw McLean 5 B-PER E-PER O S-PER O
+G. Blewett not out 57 B-PER E-PER O O O
+M. Bevan st Murray b Hooper 3 B-PER E-PER O S-PER O S-PER O
+S. Law b Hooper 21 B-PER E-PER O S-PER O
+T. Moody not out 3 B-PER E-PER O O O
+Extras ( lb-17 nb-8 w-3 ) 28 O O O O O O O
+Total ( for five wickets , 48.4 overs ) 173 O O O O O O O O O O
+Fall of wickets : 1-59 2-70 3-78 4-90 5-160 . O O O O O O O O O O
+Did not bat : I. Healy , P. Reiffel , S. Warne , J. Gillespie . O O O O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O
+Bowling : Ambrose 10-3-19-0 ( 2nb 1w ) , Walsh 9-0-34-0 ( 4nb ) , O O S-PER O O O O O O S-PER O O O O O
+Benjamin 9.4-0-43-1 ( 1nb 1w ) , Hooper 10-0-27-2 ( 1nb ) , McLean S-PER O O O O O O S-PER O O O O O S-PER
+10-1-33-2 ( 1w ) . O O O O O
+Result : Australia won by five wickets . O O S-LOC O O O O O
+-DOCSTART- O
+CRICKET - AUSTRALIA BEAT WEST INDIES BY FIVE WICKETS . O O S-LOC O B-LOC E-LOC O O O O
+MELBOURNE 1996-12-06 S-LOC O
+Australia beat West Indies by five wickets in a World Series limited overs match at the Melbourne Cricket Ground on Friday . S-LOC O B-LOC E-LOC O O O O O B-MISC E-MISC O O O O O B-LOC I-LOC E-LOC O O O
+Scores : West Indies 172 all out in 49.2 overs ( Shivnarine Chanderpaul 54 ) ; Australia 173-5 in 48.4 overs ( Greg Blewett 57 not out ) . O O B-LOC E-LOC O O O O O O O B-PER E-PER O O O S-LOC O O O O O B-PER E-PER O O O O O
+-DOCSTART- O
+CRICKET - WEST INDIES 172 ALL OUT IN 49.2 OVERS V AUSTRALIA . O O B-LOC E-LOC O O O O O O O S-LOC O
+MELBOURNE 1996-12-06 S-LOC O
+West Indies were all out for 172 off 49.2 overs in the World Series limited overs match against Australia on Friday . B-LOC E-LOC O O O O O O O O O O B-MISC E-MISC O O O O S-LOC O O O
+-DOCSTART- O
+CRICKET - SHEFFIELD SHIELD SCORE . O O B-MISC E-MISC O O
+HOBART , Australia 1996-12-06 S-LOC O S-LOC O
+Score on the first day of the four-day Sheffield Shield match between Tasmania and Victoria at Bellerive Oval on Friday : O O O O O O O O B-MISC E-MISC O O S-LOC O S-LOC O B-LOC E-LOC O O O
+Tasmania 352 for three ( David Boon 106 not out , Shaun Young 86 not out , Michael DiVenuto 119 ) v Victoria . S-LOC O O O O B-PER E-PER O O O O B-PER E-PER O O O O B-PER E-PER O O O S-ORG O
+-DOCSTART- O
+CRICKET - LARA SUFFERS MORE AUSTRALIAN TOUR MISERY . O O S-PER O O O O O O
+MELBOURNE 1996-12-06 S-LOC O
+West Indies batsman Brian Lara suffered another blow to his Australian tour , after already being disciplined for misconduct , when he was dismissed cheaply in the first limited overs match against Australia on Friday . B-LOC E-LOC O B-PER E-PER O O O O O S-MISC O O O O O O O O O O O O O O O O O O O O O S-LOC O O O
+Lara , who earned a stern rebuke from his own tour management after an angry outburst against Australia wicketkeeper Ian Healy , scored five to prolong a run of poor form with the bat . S-PER O O O O O O O O O O O O O O O O S-LOC O B-PER E-PER O O O O O O O O O O O O O O
+The West Indies vice-captain struggled for timing during his 36-minute stay at the crease before chipping a ball from medium pacer Tom Moody straight to Shane Warne at mid-wicket . O B-LOC E-LOC O O O O O O O O O O O O O O O O O O B-PER E-PER O O B-PER E-PER O O O
+West Indies were 53 for two in 15 overs when rain stopped play at the Melbourne Cricket Ground after captain Courtney Walsh won the toss and elected to bat . B-LOC E-LOC O O O O O O O O O O O O O B-LOC I-LOC E-LOC O O B-PER E-PER O O O O O O O O
+Lara 's outburst three days ago has clearly turned some of the Australian public against him . S-PER O O O O O O O O O O O S-MISC O O O O
+As he walked to the wicket he was greeted by loud jeers from sections of the crowd . O O O O O O O O O O O O O O O O O O
+On several occasions during his innings , the crowd joined together in a series of obscene chants against him . O O O O O O O O O O O O O O O O O O O O
+Tour manager Clive Lloyd on Wednesday apologised for Lara 's behaviour in confronting Australia coach Geoff Marsh in the opposition dressing room to protest against his dismissal in the second test on Tuesday . O O B-PER E-PER O O O O S-PER O O O O S-LOC O B-PER E-PER O O O O O O O O O O O O O O O O O
+Lloyd did not say what form the discipline would take . S-PER O O O O O O O O O O
+Lara , who holds the record for the highest score in test and first-class cricket , was unhappy about Healy 's role in the incident and questioned whether the ball had carried to the Australia keeper . S-PER O O O O O O O O O O O O O O O O O O S-PER O O O O O O O O O O O O O O S-LOC O O
+Australia went on to win the match at the Sydney Cricket Ground by 124 runs to take a two-nil lead in the five-test series after Lara failed in both innings . S-LOC O O O O O O O O B-LOC I-LOC E-LOC O O O O O O O O O O O O O S-PER O O O O O
+Lara has yet to score a century since West Indies arrived in Australia five weeks ago . S-PER O O O O O O O B-LOC E-LOC O O S-LOC O O O O
+Both West Indies and Australia team management have played down the incident , stressing that relations between the two sides have not been adversely affected . O B-LOC E-LOC O S-LOC O O O O O O O O O O O O O O O O O O O O O
+Pakistan , who arrive next week , are the third team in the triangular World Series tournament . S-LOC O O O O O O O O O O O O O B-MISC E-MISC O O
+-DOCSTART- O
+CRICKET - WEST INDIES TO BAT AFTER WINNING THE TOSS . O O B-LOC E-LOC O O O O O O O
+MELBOURNE 1996-12-06 S-LOC O
+West Indies captain Courtney Walsh elected to bat after winning the toss in the first match in the World Series limited overs competition against Australia at the Melbourne Cricket Ground on Friday . B-LOC E-LOC O B-PER E-PER O O O O O O O O O O O O O B-MISC E-MISC O O O O S-LOC O O S-LOC O O O O O
+Teams : O O
+Australia - Mark Taylor ( captain ) , Mark Waugh , Ricky Ponting , Greg Blewett , Michael Bevan , Stuart Law , Tom Moody , Ian Healy , Paul Reiffel , Shane Warne , Jason Gillespie , Glenn McGrath 12th man . S-LOC O B-PER E-PER O O O O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O O O
+West Indies - Sherwin Campbell , Robert Samuels , Brian Lara , Shivnarine Chanderpaul , Carl Hooper , Jimmy Adams , Junior Murray , Nixon McLean , Kenneth Benjamin , Curtly Ambrose , Courtney Walsh ( captain ) , Roland Holder 12th man . B-LOC E-LOC O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O O O O B-PER E-PER O O O
+-DOCSTART- O
+BADMINTON - WORLD GRAND PRIX RESULTS . O O B-MISC I-MISC E-MISC O O
+BALI 1996-12-06 S-LOC O
+Results in last of the group matches at the World Grand Prix badminton finals on Friday : O O O O O O O O O B-MISC I-MISC E-MISC O O O O O
+Men 's singles O O O
+Group B O O
+Chen Gang ( China ) beat Martin Londgaard Hansen ( Denmark ) 15-12 15-6 B-PER E-PER O S-LOC O O B-PER I-PER E-PER O S-LOC O O O
+Dong Jiong ( China ) beat Thomas Stuer-Lauridsen ( Denmark ) 15-10 15-6 B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O
+Indra Wijaya ( Indonesia ) beat Ong Ewe Hock ( Malaysia ) 5-15 15-11 15-11 B-PER E-PER O S-LOC O O B-PER I-PER E-PER O S-LOC O O O O
+Group C O O
+Sun Jun ( China ) beat Rashid Sidek ( Malaysia ) 15-12 17-14 B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O
+Hermawan Susanto ( Indonesia ) beat Soren B. Nielsen ( Denmark ) 15-8 15-2 B-PER E-PER O S-LOC O O B-PER I-PER E-PER O S-LOC O O O
+Group D O O
+Allan Budi Kuksuma ( Indonesia ) beat Poul-Erik Hoyer-Larsen ( Denmark ) 15-7 15-4 B-PER I-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O
+Budi Santoso ( Indonesia ) beat Hu Zhilan ( China ) 15-4 15-5 B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O
+Semifinals ( on Saturday ) : Fung Permadi ( Taiwan ) v Indra O O O O O O B-PER E-PER O S-LOC O O S-PER
+Wijaya ( Indonesia ) ; Sun Jun ( China ) v Allan Budi Kusuma S-PER O S-LOC O O B-PER E-PER O S-LOC O O B-PER I-PER E-PER
+( Indonesia ) O S-LOC O
+Women 's singles O O O
+Group A O O
+Gong Zhichao ( China ) beat Mia Audina ( Indonesia ) 11-2 12-10 B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O
+Group B O O
+Ye Zhaoying ( China ) beat Meiluawati ( Indonesia ) 11-6 12-10 B-PER E-PER O S-LOC O O S-PER O S-LOC O O O
+Group C O O
+Camilla Martin ( Denmark ) beat Wang Chen ( China ) 11-0 12-10 B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O
+Group D O O
+Susi Susanti ( Indonesia ) beat Han Jingna ( China ) 11-5 11-4 . B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O O
+Semifinals ( on Saturday ) : Susi Susanti ( Indonesia ) v Camilla Martin ( Denmark ) ; Ye Zhaoying ( China ) v Gong Zichao ( China ) . O O O O O O B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O
+-DOCSTART- O
+SOCCER - ARAB CONTRACTORS WIN AFRICAN CUP WINNERS ' CUP . O O S-MISC O O B-MISC I-MISC I-MISC I-MISC E-MISC O
+CAIRO 1996-12-06 S-LOC O
+Result of the second leg of the African Cup Winners ' Cup final at the National stadium on Friday : Arab Contractors ( Egypt ) 4 Sodigraf ( Zaire ) 0 ( halftime 2-0 ) O O O O O O O B-MISC I-MISC I-MISC I-MISC E-MISC O O O B-LOC E-LOC O O O B-ORG E-ORG O S-LOC O O S-ORG O S-LOC O O O O O O
+Scorers : O O
+Aly Ashour 7 , 56 penalty , Mohamed Ouda 24 , 73 B-PER E-PER O O O O O B-PER E-PER O O O
+Contractors won 4-0 on aggregate . O O O O O O
+-DOCSTART- O
+NHL ICE HOCKEY - STANDINGS AFTER THURSDAY 'S GAMES . S-ORG O O O O O O O O O
+NEW YORK 1996-12-06 B-LOC E-LOC O
+Standings of National Hockey O O B-ORG E-ORG
+League teams after games played on Thursday ( tabulate under S-ORG O O O O O O O O O
+won , lost , tied , goals for , goals against , points ) : O O O O O O O O O O O O O O O
+EASTERN CONFERENCE O O
+NORTHEAST DIVISION O O
+W L T GF GA PTS O O O O O O
+HARTFORD 12 7 6 77 76 30 S-ORG O O O O O O
+BUFFALO 13 12 1 77 76 27 S-ORG O O O O O O
+BOSTON 10 11 4 74 84 24 S-ORG O O O O O O
+MONTREAL 10 14 4 96 103 24 S-ORG O O O O O O
+PITTSBURGH 9 13 3 81 91 21 S-ORG O O O O O O
+OTTAWA 7 11 6 62 72 20 S-ORG O O O O O O
+ATLANTIC DIVISION S-LOC O
+W L T GF GA PTS O O O O O O
+FLORIDA 17 4 6 83 53 40 S-ORG O O O O O O
+PHILADELPHIA 14 12 2 75 75 30 S-ORG O O O O O O
+NEW JERSEY 14 10 1 61 61 29 B-ORG E-ORG O O O O O O
+WASHINGTON 13 12 1 69 66 27 S-ORG O O O O O O
+NY RANGERS 10 13 5 91 81 25 B-ORG E-ORG O O O O O O
+NY ISLANDERS 7 11 8 65 72 22 B-ORG E-ORG O O O O O O
+TAMPA BAY 8 15 2 69 81 18 B-ORG E-ORG O O O O O O
+WESTERN CONFERENCE O O
+CENTRAL DIVISION B-MISC E-MISC
+W L T GF GA PTS O O O O O O
+DETROIT 15 9 4 81 53 34 S-ORG O O O O O O
+DALLAS 16 9 1 74 60 33 S-ORG O O O O O O
+CHICAGO 12 12 3 71 67 27 S-ORG O O O O O O
+ST LOUIS 13 14 0 78 81 26 B-ORG E-ORG O O O O O O
+TORONTO 11 15 0 76 89 22 S-ORG O O O O O O
+PHOENIX 9 13 4 61 74 22 S-ORG O O O O O O
+PACIFIC DIVISION S-LOC O
+W L T GF GA PTS O O O O O O
+COLORADO 17 6 4 97 56 38 S-ORG O O O O O O
+VANCOUVER 14 11 1 84 83 29 S-ORG O O O O O O
+EDMONTON 13 14 1 94 88 27 S-ORG O O O O O O
+LOS ANGELES 11 13 3 72 83 25 B-ORG E-ORG O O O O O O
+SAN JOSE 10 13 4 69 87 24 B-ORG E-ORG O O O O O O
+CALGARY 10 16 2 65 77 22 S-ORG O O O O O O
+ANAHEIM 9 14 4 73 86 22 S-ORG O O O O O O
+FRIDAY , DECEMBER 6 O O O O
+ANAHEIM AT BUFFALO S-ORG O S-LOC
+TORONTO AT NY RANGERS S-ORG O B-ORG E-ORG
+PITTSBURGH AT WASHINGTON S-ORG O S-LOC
+MONTREAL AT CHICAGO S-ORG O S-LOC
+PHILADELPHIA AT DALLAS S-ORG O S-LOC
+ST LOUIS AT COLORADO B-ORG E-ORG O S-LOC
+OTTAWA AT EDMONTON S-ORG O S-LOC
+-DOCSTART- O
+NHL ICE HOCKEY - THURSDAY 'S RESULTS . S-ORG O O O O O O O
+[ CORRECTED 08:40 GMT ] O O O S-MISC O
+NEW YORK 1996-12-06 B-LOC E-LOC O
+( Corrects headline from NBA to NHL and corrects team name in second result from La Clippers to Ny Islanders . O O O O S-ORG O S-ORG O O O O O O O O B-ORG E-ORG O B-ORG E-ORG O
+) O
+Results of National Hockey O O B-ORG E-ORG
+League games on Thursday ( home team in CAPS ) : S-ORG O O O O O O O O O O
+Hartford 4 BOSTON 2 S-ORG O S-ORG O
+FLORIDA 4 Ny Islanders 2 S-ORG O B-ORG E-ORG O
+NEW JERSEY 2 Calgary 1 B-ORG E-ORG O S-ORG O
+Phoenix 3 ST LOUIS 0 S-ORG O B-ORG E-ORG O
+Tampa Bay 2 LOS ANGELES 1 B-ORG E-ORG O B-ORG E-ORG O
+-DOCSTART- O
+NFL AMERICAN FOOTBALL-COLTS CLOBBER EAGLES TO STAY IN PLAYOFF HUNT . S-ORG O O O S-ORG O O O O O O
+INDIANAPOLIS 1996-12-06 S-LOC O
+The injury-plagued Indianapolis Colts lost another quarterback on Thursday but last year 's AFC finalists rallied together to shoot down the Philadelphia Eagles 37-10 in a showdown of playoff contenders . O O B-ORG E-ORG O O O O O O O O O O O O O O O O O B-ORG E-ORG O O O O O O O O
+Marshall Faulk rushed for 101 yards and two touchdowns and Jason Belser returned an interception 44 yards for a score as the Colts improved to 8-6 , the same mark as the Eagles , who lost for the fourth time in five games . B-PER E-PER O O O O O O O O B-PER E-PER O O O O O O O O O O S-ORG O O O O O O O O O S-ORG O O O O O O O O O O O
+Paul Justin , starting for the sidelined Jim Harbaugh , was 14-of-23 for 144 yards and a touchdown for the the Colts , who played their last home game of the season . B-PER E-PER O O O O O B-PER E-PER O O O O O O O O O O O O S-ORG O O O O O O O O O O O
+Indianapolis closes with games at Kansas City and Cincinnati . S-LOC O O O O B-LOC E-LOC O S-LOC O
+The Eagles were held without a touchdown until the final five seconds . O S-ORG O O O O O O O O O O O
+Philadelphia , which fell from an NFC East tie with the Dallas Cowboys and Washington Redskins , go on the road against the New York Jets and then entertain Arizona . S-LOC O O O O O O O O O O B-ORG E-ORG O B-ORG E-ORG O O O O O O O B-ORG I-ORG E-ORG O O O S-ORG O
+The loss by Philadelphia allowed the idle Green Bay Packers ( 10-3 ) to clinch the first NFC playoff berth . O O O S-ORG O O O B-ORG I-ORG E-ORG O O O O O O O O O O O
+The Colts won despite the absence of injured starting defensive tackle Tony Siragusa , cornerback Ray Buchanan and linebacker Quentin Coryatt . O S-ORG O O O O O O O O O B-PER E-PER O O B-PER E-PER O O B-PER E-PER O
+Faulk carried 16 times , including a 13-yard TD run in the first quarter and a seven-yard score early in the final period . S-PER O O O O O O O O O O O O O O O O O O O O O O O
+Justin made his second straight start for Harbaugh , who has a knee injury . S-PER O O O O O O S-PER O O O O O O O
+Justin suffered a sprained right shoulder in the third quarter and did not return . S-PER O O O O O O O O O O O O O O
+Third-stringer Kerwin Bell , a 1988 draft choice of the Miami Dolphins , made his NFL debut and was 5-of-5 for 75 yards , including a 20-yard scoring strike to Marvin Harrison in the third period . O B-PER E-PER O O O O O O O B-ORG E-ORG O O O S-ORG O O O O O O O O O O O O O O B-PER E-PER O O O O O
+A 39-yard interference penalty against Philadelphia 's Troy Vincent set up Faulk 's first score around left end that capped an 80-yard march 5:17 into the game and the rout was on . O O O O O S-LOC O B-PER E-PER O O S-PER O O O O O O O O O O O O O O O O O O O O O
+Eagles quarterback Ty Detmer was 17-of-34 for 182 yards before he was benched . S-ORG O B-PER E-PER O O O O O O O O O O
+Ricky Watters , who leads the NFC in rushing , left the game after getting kneed to the helmet after gaining 33 yards on seven carries . B-PER E-PER O O O O O O O O O O O O O O O O O O O O O O O O O
+-DOCSTART- O
+NBA BASKETBALL - STANDINGS AFTER THURSDAY 'S GAMES . S-ORG O O O O O O O O
+NEW YORK 1996-12-06 B-LOC E-LOC O
+Standings of National O O S-ORG
+Basketball Association teams after games played on Thursday B-ORG E-ORG O O O O O O
+( tabulate under won , lost , percentage , games behind ) : O O O O O O O O O O O O O
+EASTERN CONFERENCE O O
+ATLANTIC DIVISION S-LOC O
+W L PCT GB O O O O
+MIAMI 14 4 .778 - S-ORG O O O O
+NEW YORK 10 6 .625 3 B-ORG E-ORG O O O O
+ORLANDO 8 6 .571 4 S-ORG O O O O
+WASHINGTON 7 9 .438 6 S-ORG O O O O
+PHILADELPHIA 7 10 .412 6 1/2 S-ORG O O O O O
+BOSTON 4 12 .250 9 S-ORG O O O O
+NEW JERSEY 3 10 .231 8 1/2 B-ORG E-ORG O O O O O
+CENTRAL DIVISION O O
+W L PCT GB O O O O
+CHICAGO 17 1 .944 - S-ORG O O O O
+DETROIT 13 3 .813 3 S-ORG O O O O
+CLEVELAND 11 5 .688 5 S-ORG O O O O
+ATLANTA 10 8 .556 7 S-ORG O O O O
+CHARLOTTE 8 8 .500 8 S-ORG O O O O
+MILWAUKEE 8 8 .500 8 S-ORG O O O O
+INDIANA 7 8 .467 8 1/2 S-ORG O O O O O
+TORONTO 6 11 .353 10 1/2 S-ORG O O O O O
+WESTERN CONFERENCE O O
+MIDWEST DIVISION O O
+W L PCT GB O O O O
+HOUSTON 16 2 .889 - S-ORG O O O O
+UTAH 14 2 .875 1 S-ORG O O O O
+MINNESOTA 7 10 .412 8 1/2 S-ORG O O O O O
+DALLAS 6 11 .353 9 1/2 S-ORG O O O O O
+DENVER 5 14 .263 11 1/2 S-ORG O O O O O
+SAN ANTONIO 3 13 .188 12 B-ORG E-ORG O O O O
+VANCOUVER 2 16 .111 14 S-ORG O O O O
+PACIFIC DIVISION S-LOC O
+W L PCT GB O O O O
+SEATTLE 15 5 .750 - S-ORG O O O O
+LA LAKERS 13 7 .650 2 B-ORG E-ORG O O O O
+PORTLAND 11 8 .579 3 1/2 S-ORG O O O O O
+LA CLIPPERS 7 11 .389 7 B-ORG E-ORG O O O O
+GOLDEN STATE 6 12 .333 8 B-ORG E-ORG O O O O
diff --git a/dataset/slot_tagging/conll_2003/eng.train.tsv b/dataset/slot_tagging/conll_2003/eng.train.tsv
new file mode 100644
index 0000000..d7a8640
--- /dev/null
+++ b/dataset/slot_tagging/conll_2003/eng.train.tsv
@@ -0,0 +1,1000 @@
+-DOCSTART- O
+EU rejects German call to boycott British lamb . S-ORG O S-MISC O O O S-MISC O O
+Peter Blackburn B-PER E-PER
+BRUSSELS 1996-08-22 S-LOC O
+The European Commission said on Thursday it disagreed with German advice to consumers to shun British lamb until scientists determine whether mad cow disease can be transmitted to sheep . O B-ORG E-ORG O O O O O O S-MISC O O O O O S-MISC O O O O O O O O O O O O O O
+Germany 's representative to the European Union 's veterinary committee Werner Zwingmann said on Wednesday consumers should buy sheepmeat from countries other than Britain until the scientific advice was clearer . S-LOC O O O O B-ORG E-ORG O O O B-PER E-PER O O O O O O O O O O O S-LOC O O O O O O O
+" We do n't support any such recommendation because we do n't see any grounds for it , " the Commission 's chief spokesman Nikolaus van der Pas told a news briefing . O O O O O O O O O O O O O O O O O O O O S-ORG O O O B-PER I-PER I-PER E-PER O O O O O
+He said further scientific study was required and if it was found that action was needed it should be taken by the European Union . O O O O O O O O O O O O O O O O O O O O O O B-ORG E-ORG O
+He said a proposal last month by EU Farm Commissioner Franz Fischler to ban sheep brains , spleens and spinal cords from the human and animal food chains was a highly specific and precautionary move to protect human health . O O O O O O O S-ORG O O B-PER E-PER O O O O O O O O O O O O O O O O O O O O O O O O O O O O
+Fischler proposed EU-wide measures after reports from Britain and France that under laboratory conditions sheep could contract Bovine Spongiform Encephalopathy ( BSE ) -- mad cow disease . S-PER O S-MISC O O O O S-LOC O S-LOC O O O O O O O B-MISC I-MISC E-MISC O S-MISC O O O O O O
+But Fischler agreed to review his proposal after the EU 's standing veterinary committee , mational animal health officials , questioned if such action was justified as there was only a slight risk to human health . O S-PER O O O O O O O S-ORG O O O O O O O O O O O O O O O O O O O O O O O O O O O
+Spanish Farm Minister Loyola de Palacio had earlier accused Fischler at an EU farm ministers ' meeting of causing unjustified alarm through " dangerous generalisation . " S-MISC O O B-PER I-PER E-PER O O O S-PER O O S-ORG O O O O O O O O O O O O O O
+. O
+Only France and Britain backed Fischler 's proposal . O S-LOC O S-LOC O S-PER O O O
+The EU 's scientific veterinary and multidisciplinary committees are due to re-examine the issue early next month and make recommendations to the senior veterinary officials . O S-ORG O O O O O O O O O O O O O O O O O O O O O O O O
+Sheep have long been known to contract scrapie , a brain-wasting disease similar to BSE which is believed to have been transferred to cattle through feed containing animal waste . O O O O O O O O O O O O O O S-MISC O O O O O O O O O O O O O O O
+British farmers denied on Thursday there was any danger to human health from their sheep , but expressed concern that German government advice to consumers to avoid British lamb might influence consumers across Europe . S-MISC O O O O O O O O O O O O O O O O O O O S-MISC O O O O O O S-MISC O O O O O S-LOC O
+" What we have to be extremely careful of is how other countries are going to take Germany 's lead , " Welsh National Farmers ' Union ( NFU ) chairman John Lloyd Jones said on BBC radio . O O O O O O O O O O O O O O O O O S-LOC O O O O B-ORG I-ORG I-ORG I-ORG E-ORG O S-ORG O O B-PER I-PER E-PER O O B-ORG E-ORG O
+Bonn has led efforts to protect public health after consumer confidence collapsed in March after a British report suggested humans could contract an illness similar to mad cow disease by eating contaminated beef . S-LOC O O O O O O O O O O O O O O O S-MISC O O O O O O O O O O O O O O O O O
+Germany imported 47,600 sheep from Britain last year , nearly half of total imports . S-LOC O O O O S-LOC O O O O O O O O O
+It brought in 4,275 tonnes of British mutton , some 10 percent of overall imports . O O O O O O S-MISC O O O O O O O O O
+-DOCSTART- O
+Rare Hendrix song draft sells for almost $ 17,000 . O S-PER O O O O O O O O
+LONDON 1996-08-22 S-LOC O
+A rare early handwritten draft of a song by U.S. guitar legend Jimi Hendrix was sold for almost $ 17,000 on Thursday at an auction of some of the late musician 's favourite possessions . O O O O O O O O O S-LOC O O B-PER E-PER O O O O O O O O O O O O O O O O O O O O O
+A Florida restaurant paid 10,925 pounds ( $ 16,935 ) for the draft of " Ai n't no telling " , which Hendrix penned on a piece of London hotel stationery in late 1966 . O S-LOC O O O O O O O O O O O O O B-MISC I-MISC I-MISC E-MISC O O O S-PER O O O O O S-LOC O O O O O O
+At the end of a January 1967 concert in the English city of Nottingham he threw the sheet of paper into the audience , where it was retrieved by a fan . O O O O O O O O O O S-MISC O O S-LOC O O O O O O O O O O O O O O O O O O
+Buyers also snapped up 16 other items that were put up for auction by Hendrix 's former girlfriend Kathy Etchingham , who lived with him from 1966 to 1969 . O O O O O O O O O O O O O O S-PER O O O B-PER E-PER O O O O O O O O O O
+They included a black lacquer and mother of pearl inlaid box used by Hendrix to store his drugs , which an anonymous Australian purchaser bought for 5,060 pounds ( $ 7,845 ) . O O O O O O O O O O O O O S-PER O O O O O O O O S-MISC O O O O O O O O O O
+The guitarist died of a drugs overdose in 1970 aged 27 . O O O O O O O O O O O O
+-DOCSTART- O
+China says Taiwan spoils atmosphere for talks . S-LOC O S-LOC O O O O O
+BEIJING 1996-08-22 S-LOC O
+China on Thursday accused Taipei of spoiling the atmosphere for a resumption of talks across the Taiwan Strait with a visit to Ukraine by Taiwanese Vice President Lien Chan this week that infuriated Beijing . S-LOC O O O S-LOC O O O O O O O O O O O B-LOC E-LOC O O O O S-LOC O S-MISC O O B-PER E-PER O O O O S-LOC O
+Speaking only hours after Chinese state media said the time was right to engage in political talks with Taiwan , Foreign Ministry spokesman Shen Guofang told Reuters : " The necessary atmosphere for the opening of the talks has been disrupted by the Taiwan authorities . " O O O O S-MISC O O O O O O O O O O O O O S-LOC O B-ORG E-ORG O B-PER E-PER O S-ORG O O O O O O O O O O O O O O O O S-LOC O O O
+State media quoted China 's top negotiator with Taipei , Tang Shubei , as telling a visiting group from Taiwan on Wednesday that it was time for the rivals to hold political talks . O O O S-LOC O O O O S-LOC O B-PER E-PER O O O O O O O S-LOC O O O O O O O O O O O O O O
+" Now is the time for the two sides to engage in political talks ... O O O O O O O O O O O O O O O
+that is to end the state of hostility , " Thursday 's overseas edition of the People 's Daily quoted Tang as saying . O O O O O O O O O O O O O O O O B-ORG I-ORG E-ORG O S-PER O O O
+The foreign ministry 's Shen told Reuters Television in an interview he had read reports of Tang 's comments but gave no details of why the negotiator had considered the time right for talks with Taiwan , which Beijing considers a renegade province . O O O O S-ORG O B-ORG E-ORG O O O O O O O O S-PER O O O O O O O O O O O O O O O O O O S-LOC O O S-LOC O O O O O
+China , which has long opposed all Taipei efforts to gain greater international recognition , was infuriated by a visit to Ukraine this week by Taiwanese Vice President Lien . S-LOC O O O O O O S-LOC O O O O O O O O O O O O O S-LOC O O O S-MISC O O S-PER O
+-DOCSTART- O
+China says time right for Taiwan talks . S-LOC O O O O S-LOC O O
+BEIJING 1996-08-22 S-LOC O
+China has said it was time for political talks with Taiwan and that the rival island should take practical steps towards that goal . S-LOC O O O O O O O O O S-LOC O O O O O O O O O O O O O
+Consultations should be held to set the time and format of the talks , the official Xinhua news agency quoted Tang Shubei , executive vice chairman of the Association for Relations Across the Taiwan Straits , as saying late on Wednesday . O O O O O O O O O O O O O O O O S-ORG O O O B-PER E-PER O O O O O O B-ORG I-ORG I-ORG I-ORG I-ORG I-ORG E-ORG O O O O O O O
+-DOCSTART- O
+German July car registrations up 14.2 pct yr / yr . S-MISC O O O O O O O O O O
+FRANKFURT 1996-08-22 S-LOC O
+German first-time registrations of motor vehicles jumped 14.2 percent in July this year from the year-earlier period , the Federal office for motor vehicles said on Thursday . S-MISC O O O O O O O O O O O O O O O O O O B-ORG I-ORG I-ORG I-ORG E-ORG O O O O
+The office said 356,725 new cars were registered in July 1996 -- 304,850 passenger cars and 15,613 trucks . O O O O O O O O O O O O O O O O O O O
+The figures represent a 13.6 percent increase for passenger cars and a 2.2 percent decline for trucks from July 1995 . O O O O O O O O O O O O O O O O O O O O O
+Motor-bike registration rose 32.7 percent in the period . O O O O O O O O O
+The growth was partly due to an increased number of Germans buying German cars abroad , while manufacturers said that domestic demand was weak , the federal office said . O O O O O O O O O O S-MISC O S-MISC O O O O O O O O O O O O O O O O O
+Almost all German car manufacturers posted gains in registration numbers in the period . O O S-MISC O O O O O O O O O O O
+Volkswagen AG won 77,719 registrations , slightly more than a quarter of the total . B-ORG E-ORG O O O O O O O O O O O O O
+Opel AG together with General Motors came in second place with 49,269 registrations , 16.4 percent of the overall figure . B-ORG E-ORG O O B-ORG E-ORG O O O O O O O O O O O O O O O
+Third was Ford with 35,563 registrations , or 11.7 percent . O O S-ORG O O O O O O O O
+Only Seat and Porsche had fewer registrations in July 1996 compared to last year 's July . O S-ORG O S-ORG O O O O O O O O O O O O O
+Seat posted 3,420 registrations compared with 5522 registrations in July a year earlier . S-ORG O O O O O O O O O O O O O
+Porsche 's registrations fell to 554 from 643 . S-ORG O O O O O O O O
+-DOCSTART- O
+GREEK SOCIALISTS GIVE GREEN LIGHT TO PM FOR ELECTIONS . S-MISC O O O O O O O O O
+ATHENS 1996-08-22 S-LOC O
+The Greek socialist party 's executive bureau gave the green light to Prime Minister Costas Simitis to call snap elections , its general secretary Costas Skandalidis told reporters . O S-MISC O O O O O O O O O O O O B-PER E-PER O O O O O O O O B-PER E-PER O O O
+Prime Minister Costas Simitis is going to make an official announcement after a cabinet meeting later on Thursday , said Skandalidis . O O B-PER E-PER O O O O O O O O O O O O O O O O S-PER O
+-- Dimitris Kontogiannis , Athens Newsroom +301 3311812-4 O B-PER E-PER O B-ORG E-ORG O O
+-DOCSTART- O
+BayerVB sets C$ 100 million six-year bond . S-ORG O S-MISC O O O O O
+LONDON 1996-08-22 S-LOC O
+The following bond was announced by lead manager Toronto Dominion . O O O O O O O O B-PER E-PER O
+BORROWER BAYERISCHE VEREINSBANK O B-ORG E-ORG
+AMT C$ 100 MLN COUPON 6.625 MATURITY 24.SEP.02 O S-MISC O O O O O O
+TYPE STRAIGHT ISS PRICE 100.92 PAY DATE 24.SEP.96 O O O O O O O O
+FULL FEES 1.875 REOFFER 99.32 SPREAD +20 BP O O O O O O O O
+MOODY AA1 LISTING LUX PAY FREQ = O O O O O O O
+S&P = DENOMS ( K ) 1-10-100 SALE LIMITS US / UK / CA S-ORG O O O O O O O O S-LOC O S-LOC O S-LOC
+NEG PLG NO CRS DEFLT NO FORCE MAJ = O O O O O O O O O
+GOV LAW GERMAN HOME CTRY = TAX PROVS STANDARD O O S-MISC O O O O O O
+MGT / UND 0.275 SELL CONC 1.60 PRAECIP = O O O O O O O O O
+UNDERLYING GOVT BOND 7.0 PCT SEPT 2001 O O O O O O O
+NOTES BAYERISCHE VEREINSBANK IS JOINT LEAD MANAGER O B-ORG E-ORG O O O O
+-- London Newsroom +44 171 542 7658 O B-ORG E-ORG O O O O
+-DOCSTART- O
+Venantius sets $ 300 million January 1999 FRN . S-ORG O O O O O O O O
+LONDON 1996-08-22 S-LOC O
+The following floating-rate issue was announced by lead manager Lehman Brothers International . O O O O O O O O O B-ORG I-ORG E-ORG O
+BORROWER VENANTIUS AB ( SWEDISH NATIONAL MORTGAGE AGENCY ) O B-ORG E-ORG O S-MISC O O O O
+AMT $ 300 MLN SPREAD - 12.5 BP MATURITY 21.JAN.99 O O O O O O O O O O
+TYPE FRN BASE 3M LIBOR PAY DATE S23.SEP.96 O O O S-ORG O O O O
+LAST MOODY AA3 ISS PRICE 99.956 FULL FEES 10 BP O O O O O O O O O O
+LAST S&P AA+ REOFFER = O S-ORG O O O
+NOTES S SHORT FIRST COUPON O O O O O
+LISTING LONDON DENOMS ( K ) 1-10-100 SALE LIMITS US / UK / JP / FR O S-LOC O O O O O O O S-LOC O S-LOC O S-LOC O S-LOC
+NEG PLG YES CRS DEFLT NO FORCE MAJ IPMA 2 O O O O O O O O O O
+GOV LAW ENGLISH HOME CTRY SWEDEN TAX PROVS STANDARD O O S-MISC O O S-LOC O O O
+MGT / UND 5 BP SELL CONC 5 BP PRAECIP = O O O O O O O O O O O
+NOTES ISSUED OFF EMTN PROGRAMME O O O O O
+-- London Newsroom +44 171 542 8863 O B-ORG E-ORG O O O O
+-DOCSTART- O
+Port conditions update - Syria - Lloyds Shipping . O O O O S-LOC O B-ORG E-ORG O
+Port conditions from Lloyds Shipping Intelligence Service -- O O O B-ORG I-ORG I-ORG E-ORG O
+LATTAKIA , Aug 10 - waiting time at Lattakia and Tartous presently 24 hours . S-LOC O O O O O O O S-LOC O S-LOC O O O O
+-DOCSTART- O
+Israel plays down fears of war with Syria . S-LOC O O O O O O S-LOC O
+Colleen Siegel B-PER E-PER
+JERUSALEM 1996-08-22 S-LOC O
+Israel 's outgoing peace negotiator with Syria said on Thursday current tensions between the two countries appeared to be a storm in a teacup . S-LOC O O O O O S-LOC O O O O O O O O O O O O O O O O O O
+Itamar Rabinovich , who as Israel 's ambassador to Washington conducted unfruitful negotiations with Syria , told Israel Radio it looked like Damascus wanted to talk rather than fight . B-PER E-PER O O O S-LOC O O O S-LOC O O O O S-LOC O O B-ORG E-ORG O O O S-LOC O O O O O O O
+" It appears to me the Syrian priority is still to negotiate . O O O O O O S-MISC O O O O O O
+The Syrians are confused , they are definitely tense , but the general assessment here in Washington is that this is essentially a storm in a teacup , " he said . O S-MISC O O O O O O O O O O O O O O S-LOC O O O O O O O O O O O O O O O
+Rabinovich is winding up his term as ambassador . S-PER O O O O O O O O
+He will be replaced by Eliahu Ben-Elissar , a former Israeli envoy to Egypt and right-wing Likud party politician . O O O O O B-PER E-PER O O O S-MISC O O S-LOC O O S-ORG O O O
+Israel on Wednesday sent Syria a message , via Washington , saying it was committed to peace and wanted to open negotiations without preconditions . S-LOC O O O S-LOC O O O O S-LOC O O O O O O O O O O O O O O O
+But it slammed Damascus for creating what it called a dangerous atmosphere . O O O S-LOC O O O O O O O O O
+Syria accused Israel on Wednesday of launching a hysterical campaign against it after Israeli television reported that Damascus had recently test fired a missile . S-LOC O S-LOC O O O O O O O O O O S-MISC O O O S-LOC O O O O O O O
+It said its arms purchases were for defensive purposes . O O O O O O O O O O
+" The message that we sent to ( Syrian President Hafez al- ) Assad is that Israel is ready at any time without preconditions to enter peace negotiations , " Israeli Foreign Minister David Levy told Israel Radio in an interview . O O O O O O O O S-MISC O B-PER E-PER O S-PER O O S-LOC O O O O O O O O O O O O O S-MISC O O B-PER E-PER O B-ORG E-ORG O O O O
+Tension has mounted since Israeli Prime Minister Benjamin Netanyahu took office in June vowing to retain the Golan Heights Israel captured from Syria in the 1967 Middle East war . O O O O S-MISC O O B-PER E-PER O O O O O O O O B-LOC E-LOC S-LOC O O S-LOC O O O B-LOC E-LOC O O
+Israeli-Syrian peace talks have been deadlocked over the Golan since 1991 despite the previous government 's willingness to make Golan concessions . S-MISC O O O O O O O S-LOC O O O O O O O O O O S-LOC O O
+Peace talks between the two sides were last held in February . O O O O O O O O O O O O
+" The voices coming out of Damascus are bad , not good . O O O O O O S-LOC O O O O O O
+The media ... O O O
+are full of expressions and declarations that must be worrying ... O O O O O O O O O O O
+this artificial atmosphere is very dangerous because those who spread it could become its prisoners , " Levy said . O O O O O O O O O O O O O O O O O S-PER O O
+" We expect from Syria , if its face is to peace , that it will answer Israel 's message to enter peace negotiations because that is our goal , " he said . " O O O O S-LOC O O O O O O O O O O O O S-LOC O O O O O O O O O O O O O O O O O
+We do not want a war , God forbid . O O O O O O O S-PER O O
+No one benefits from wars . " O O O O O O O
+Israel 's Channel Two television said Damascus had sent a " calming signal " to Israel . S-LOC O B-ORG E-ORG O O S-LOC O O O O O O O O S-LOC O
+It gave no source for the report . O O O O O O O O
+Netanyahu and Levy 's spokesmen said they could not confirm it . S-PER O S-PER O O O O O O O O O
+The television also said that Netanyahu had sent messages to reassure Syria via Cairo , the United States and Moscow . O O O O O S-PER O O O O O S-LOC O S-LOC O O B-LOC E-LOC O S-LOC O
+-DOCSTART- O
+Polish diplomat denies nurses stranded in Libya . S-MISC O O O O O S-LOC O
+TUNIS 1996-08-22 S-LOC O
+A Polish diplomat on Thursday denied a Polish tabloid report this week that Libya was refusing exit visas to 100 Polish nurses trying to return home after working in the North African country . O S-MISC O O O O O S-MISC O O O O O S-LOC O O O O O O S-MISC O O O O O O O O O B-MISC E-MISC O O
+" This is not true . O O O O O O
+Up to today , we have no knowledge of any nurse stranded or kept in Libya without her will , and we have not received any complaint , " the Polish embassy 's charge d'affaires in Tripoli , Tadeusz Awdankiewicz , told Reuters by telephone . O O O O O O O O O O O O O O O S-LOC O O O O O O O O O O O O O O S-MISC O O O O O S-LOC O B-PER E-PER O O S-ORG O O O
+Poland 's labour ministry said this week it would send a team to Libya to investigate , but Awdankiewicz said the probe was prompted by some nurses complaining about their work conditions such as non-payment of their salaries . S-LOC O O O O O O O O O O O O S-LOC O O O O S-PER O O O O O O O O O O O O O O O O O O O O
+He said that there are an estimated 800 Polish nurses working in Libya . O O O O O O O O S-MISC O O O S-LOC O
+-DOCSTART- O
+Two Iranian opposition leaders meet in Baghdad . O S-MISC O O O O S-LOC O
+Hassan Hafidh B-PER E-PER
+BAGHDAD 1996-08-22 S-LOC O
+An Iranian exile group based in Iraq vowed on Thursday to extend support to Iran 's Kurdish rebels after they were attacked by Iranian troops deep inside Iraq last month . O S-MISC O O O O S-LOC O O O O O O O S-LOC O S-MISC O O O O O O S-MISC O O O S-LOC O O O
+A Mujahideen Khalq statement said its leader Massoud Rajavi met in Baghdad the Secretary-General of the Kurdistan Democratic Party of Iran ( KDPI ) Hassan Rastegar on Wednesday and voiced his support to Iran 's rebel Kurds . O B-ORG E-ORG O O O O B-PER E-PER O O S-LOC O O O O B-ORG I-ORG I-ORG I-ORG E-ORG O S-ORG O B-PER E-PER O O O O O O O S-LOC O O S-MISC O
+" Rajavi emphasised that the Iranian Resistance would continue to stand side by side with their Kurdish compatriots and the resistance movement in Iranian Kurdistan , " it said . O S-MISC O O O S-MISC S-ORG O O O O O O O O O S-MISC O O O O O O B-LOC E-LOC O O O O O
+A spokesman for the group said the meeting " signals a new level of cooperation between Mujahideen Khalq and the Iranian Kurdish oppositions " . O O O O O O O O O O O O O O O O B-ORG E-ORG O O B-MISC E-MISC O O O
+Iran heavily bombarded targets in northern Iraq in July in pursuit of KDPI guerrillas based in Iraqi Kurdish areas outside the control of the government in Baghdad . S-LOC O O O O O S-LOC O O O O O S-ORG O O O B-MISC E-MISC O O O O O O O O S-LOC O
+Iraqi Kurdish areas bordering Iran are under the control of guerrillas of the Iraqi Kurdish Patriotic Union of Kurdistan ( PUK ) group . B-MISC E-MISC O O S-LOC O O O O O O O O B-ORG I-ORG I-ORG I-ORG I-ORG E-ORG O S-ORG O O O
+PUK and Iraq 's Kurdistan Democratic Party ( KDP ) the two main Iraqi Kurdish factions , have had northern Iraq under their control since Iraqi forces were ousted from Kuwait in the 1991 Gulf War . S-ORG O S-LOC O B-ORG I-ORG E-ORG O S-ORG O O O O B-MISC E-MISC O O O O O S-LOC O O O O S-MISC O O O O S-LOC O O O B-MISC E-MISC O
+Clashes between the two parties broke out at the weekend in the most serious fighting since a U.S.-sponsored ceasefire last year . O O O O O O O O O O O O O O O O O S-MISC O O O O
+Mujahideen Khalq said Iranian troops had also been shelling KDP positions in Qasri region in Suleimaniya province near the Iranian border over the last two days . B-ORG E-ORG O S-MISC O O O O O S-ORG O O S-LOC O O S-LOC O O O S-MISC O O O O O O O
+It said about 100 Iraqi Kurds were killed or wounded in the attack . O O O O B-MISC E-MISC O O O O O O O O
+Both Iran and Turkey mount air and land strikes at targets in northern Iraq in pursuit of their own Kurdish rebels . O S-LOC O S-LOC O O O O O O O O O S-LOC O O O O O S-MISC O O
+A U.S.-led air force in southern Turkey protects Iraqi Kurds from possible attacks by Baghdad troops . O S-MISC O O O O S-LOC O B-MISC E-MISC O O O O S-LOC O O
+-DOCSTART- O
+Saudi riyal rates steady in quiet summer trade . S-MISC O O O O O O O O
+MANAMA 1996-08-22 S-LOC O
+The spot Saudi riyal against the dollar and riyal interbank deposit rates were mainly steady this week in quiet summer trade , dealers in the kingdom said . O O S-MISC O O O O O O O O O O O O O O O O O O O O O O O O O
+" There were no changes in Saudi riyal rates . O O O O O O S-MISC O O O
+The market was very quiet because of summer holidays , " one dealer said . O O O O O O O O O O O O O O O
+The spot riyal was put at 3.7504 / 06 to the dollar . O O O O O O O O O O O O O
+One-month interbank deposits were at 5-1/2 , 3/8 percent , three months were 5-5/8 , 1/2 percent and six months were 5-3/4 , 5/8 percent . S-MISC O O O O O O O O O O O O O O O O O O O O O O O O O
+One-year funds were at six , 5-7/8 percent . S-MISC O O O O O O O O
+-DOCSTART- O
+Israel approves Arafat 's flight to West Bank . S-LOC O S-PER O O O B-LOC E-LOC O
+JERUSALEM 1996-08-22 S-LOC O
+Israel gave Palestinian President Yasser Arafat permission on Thursday to fly over its territory to the West Bank , ending a brief Israeli-PLO crisis , an Arafat adviser said . S-LOC O S-MISC O B-PER E-PER O O O O O O O O O O B-LOC E-LOC O O O O S-MISC O O O S-PER O O O
+" The problem is over . O O O O O O
+The president 's aircraft has received permission to pass through Israeli airspace but the president is not expected to travel to the West Bank before Monday , " Nabil Abu Rdainah told Reuters . O O O O O O O O O O S-MISC O O O O O O O O O O O B-LOC E-LOC O O O O B-PER I-PER E-PER O S-ORG O
+Arafat had been scheduled to meet former Israeli prime minister Shimon Peres in the West Bank town of Ramallah on Thursday but the venue was changed to Gaza after Israel denied flight clearance to the Palestinian leader 's helicopters . S-PER O O O O O O S-MISC O O B-PER E-PER O O B-LOC E-LOC O O S-LOC O O O O O O O O S-LOC O S-LOC O O O O O S-MISC O O O O
+Palestinian officials accused right-wing Prime Minister Benjamin Netanyahu of trying to stop the Ramallah meeting by keeping Arafat grounded . S-MISC O O O O O B-PER E-PER O O O O O S-LOC O O O S-PER O O
+Arafat subsequently cancelled a meeting between Israeli and PLO officials , on civilian affairs , at the Allenby Bridge crossing between Jordan and the West Bank . S-PER O O O O O S-MISC O S-ORG O O O O O O O O B-LOC E-LOC O O S-LOC O O B-LOC E-LOC O
+Abu Rdainah said Arafat had decided against flying to the West Bank on Thursday , after Israel lifted the ban , because he had a busy schedule in Gaza and would not be free until Monday . B-PER E-PER O S-PER O O O O O O B-LOC E-LOC O O O O S-LOC O O O O O O O O O O O S-LOC O O O O O O O O
+-DOCSTART- O
+Arafat to meet Peres in Gaza after flight ban . S-PER O O S-PER O S-LOC O O O O
+JERUSALEM 1996-08-22 S-LOC O
+Yasser Arafat will meet Shimon Peres in Gaza on Thursday after Palestinians said the right-wing Israeli government had barred the Palestinian leader from flying to the West Bank for talks with the former prime minister . B-PER E-PER O O B-PER E-PER O S-LOC O O O S-MISC O O O S-MISC O O O O S-MISC O O O O O B-LOC E-LOC O O O O O O O O
+" The meeting between Peres and Arafat will take place at Erez checkpoint in Gaza and not in Ramallah as planned , " Peres ' office said . O O O O S-PER O S-PER O O O O S-LOC O O S-LOC O O O S-LOC O O O O S-PER O O O O
+Palestinian officials said the Israeli government had barred Arafat from overflying Israel in a Palestinian helicopter to the West Bank in an attempt to bar the meeting with Peres . S-MISC O O O S-MISC O O O S-PER O O S-LOC O O S-MISC O O O B-LOC E-LOC O O O O O O O O S-PER O
+Israeli Prime Minister Benjamin Netanyahu has accused opposition leader Peres , who he defeated in May elections , of trying to undermine his Likud government 's authority to conduct peace talks . S-MISC O O B-PER E-PER O O O O S-PER O O O O O O O O O O O O O S-ORG O O O O O O O O
+-DOCSTART- O
+Afghan UAE embassy says Taleban guards going home . S-MISC S-LOC O O S-MISC O O O O
+Hilary Gush B-PER E-PER
+DUBAI 1996-08-22 S-LOC O
+Three Afghan guards brought to the United Arab Emirates last week by Russian hostages who escaped from the Taleban militia will return to Afghanistan in a few days , the Afghan embassy in Abu Dhabi said on Thursday . O S-MISC O O O O B-LOC I-LOC E-LOC O O O S-MISC O O O O O S-MISC O O O O S-LOC O O O O O O S-MISC O O B-LOC E-LOC O O O O
+" Our ambassador is in touch with the UAE foreign ministry . O O O O O O O O S-LOC O O O
+Their return to Afghanistan will take place in two or three days , " an embassy official said . O O O S-LOC O O O O O O O O O O O O O O O
+" The embassy is issuing them travel documents for their return to their homeland . O O O O O O O O O O O O O O O
+There is no objection to their travel , " he added . O O O O O O O O O O O O
+The three Islamic Taleban guards were overpowered by seven Russian aircrew who escaped to UAE state Sharjah last Friday on board their own aircraft after a year in the captivity of Taleban militia in Kandahar in southern Afghanistan . O O B-MISC E-MISC O O O O O S-MISC O O O O S-LOC O S-LOC O O O O O O O O O O O O O O S-MISC O O S-LOC O O S-LOC O
+The UAE said on Monday it would hand over the three to the International Red Crescent , possibly last Tuesday . O S-LOC O O O O O O O O O O O B-ORG I-ORG E-ORG O O O O O
+It has since been silent on the issue . O O O O O O O O O
+When asked whether the three guards would travel back to Kandahar or the Afghan capital Kabul , the embassy official said : " That has not been decided , but possibly Kandahar . " O O O O O O O O O O S-LOC O O S-MISC O S-LOC O O O O O O O O O O O O O O O S-LOC O O
+Kandahar is the headquarters of the opposition Taleban militia . S-LOC O O O O O O S-MISC O O
+Kabul is controlled by President Burhanuddin Rabbani 's government , which Taleban is fighting to overthrow . S-LOC O O O O B-PER E-PER O O O O S-MISC O O O O O
+The embassy official said the three men , believed to be in their 20s , were currently in Abu Dhabi . O O O O O O O O O O O O O O O O O O B-LOC E-LOC O
+He did not elaborate . O O O O O
+The Russians , working for the Aerostan firm in the Russian republic of Tatarstan , were taken hostage after a Taleban MiG-19 fighter forced their cargo plane to land in August 1995 . O S-MISC O O O O S-ORG O O O S-MISC O O S-LOC O O O O O O S-MISC S-MISC O O O O O O O O O O O
+Taleban said its shipment of ammunition from Albania was evidence of Russian military support for Rabbani 's government . S-MISC O O O O O O S-LOC O O O S-MISC O O O S-PER O O O
+Moscow said the crew 's nationality was coincidental . S-LOC O O O O O O O O
+Numerous diplomatic attempts to free the seven failed . O O O O O O O O O
+The Russians , who said they overpowered the guards -- two armed with Kalashnikov automatic rifles -- while doing regular maintenance work on their Ilyushin 76 cargo plane last Friday , left the UAE capital Abu Dhabi for home on Sunday . O S-MISC O O O O O O O O O O O S-MISC O O O O O O O O O O B-MISC E-MISC O O O O O O O S-LOC O B-LOC E-LOC O O O O O
+-DOCSTART- O
+Iraq 's Saddam meets Russia 's Zhirinovsky . S-LOC O S-PER O S-LOC O S-PER O
+BAGHDAD 1996-08-22 S-LOC O
+Iraqi President Saddam Hussein has told visiting Russian ultra-nationalist Vladimir Zhirinovsky that Baghdad wanted to maintain " friendship and cooperation " with Moscow , official Iraqi newspapers said on Thursday . S-MISC O B-PER E-PER O O O S-MISC O B-PER E-PER O S-LOC O O O O O O O O O S-LOC O O S-MISC O O O O O
+" President Saddam Hussein stressed during the meeting Iraq 's keenness to maintain friendship and cooperation with Russia , " the papers said . O O B-PER E-PER O O O O S-LOC O O O O O O O O S-LOC O O O O O O
+They said Zhirinovsky told Saddam before he left Baghdad on Wednesday that his Liberal Democratic party and the Russian Duma ( parliament ) " are calling for an immediate lifting of the embargo " imposed on Iraq after its 1990 invasion of Kuwait . O O S-PER O S-PER O O O S-LOC O O O O B-ORG I-ORG E-ORG O O S-MISC S-ORG O O O O O O O O O O O O O O O O S-LOC O O O O O S-LOC O
+Zhirinovsky said on Tuesday he would press the Russian government to help end U.N. trade sanctions on Iraq and blamed Moscow for delaying establishment of good ties with Baghdad . S-PER O O O O O O O S-MISC O O O O S-ORG O O O S-LOC O O S-LOC O O O O O O O S-LOC O
+" Our stand is firm , namely we are calling on ( the Russian ) government to end the economic embargo on Iraq and resume trade ties between Russia and Iraq , " he told reporters . O O O O O O O O O O O O O S-MISC O O O O O O O O S-LOC O O O O O S-LOC O S-LOC O O O O O O
+Zhirinovsky visited Iraq twice in 1995 . S-PER O S-LOC O O O O
+Last October he was invited to attend the referendum held on Iraq 's presidency , which extended Saddam 's term for seven more years . O O O O O O O O O O O S-LOC O O O O O S-PER O O O O O O O
+-DOCSTART- O
+PRESS DIGEST - Iraq - Aug 22 . O O O S-LOC O O O O
+BAGHDAD 1996-08-22 S-LOC O
+These are some of the leading stories in the official Iraqi press on Thursday . O O O O O O O O O O S-MISC O O O O
+Reuters has not verified these stories and does not vouch for their accuracy . S-ORG O O O O O O O O O O O O O
+THAWRA S-ORG
+- Iraq 's President Saddam Hussein meets with chairman of the Russian liberal democratic party Vladimir Zhirinovsky . O S-LOC O O B-PER E-PER O O O O O S-MISC O O O B-PER E-PER O
+- Turkish foreign minister says Turkey will take part in the Baghdad trade fair that will be held in November . O S-MISC O O O S-LOC O O O O O S-LOC O O O O O O O O O
+IRAQ S-LOC
+- A shipload of 12 tonnes of rice arrives in Umm Qasr port in the Gulf . O O O O O O O O O O B-LOC E-LOC O O O S-LOC O
+-DOCSTART- O
+PRESS DIGEST - Lebanon - Aug 22 . O O O S-LOC O O O O
+BEIRUT 1996-08-22 S-LOC O
+These are the leading stories in the Beirut press on Thursday . O O O O O O O S-LOC O O O O
+Reuters has not verified these stories and does not vouch for their accuracy . S-ORG O O O O O O O O O O O O O
+AN-NAHAR S-ORG
+- Confrontation is escalating between Hizbollah and the government . O O O O O S-ORG O O O O
+- Prime Minister Hariri : Israeli threats do no serve peace . O O O S-PER O S-MISC O O O O O O
+AS-SAFIR S-ORG
+- Parliament Speaker Berri : Israel is preparing for war against Syria and Lebanon . O O O S-PER O S-LOC O O O O O S-LOC O S-LOC O
+- Parliamentary battle in Beirut .. O O O O S-LOC O
+The three main lists have been prepared . O O O O O O O O
+AL-ANWAR S-ORG
+- Continued criticism of law violation incidents -- which occurred in the Mount Lebanon elections last Sunday . O O O O O O O O O O O O B-LOC E-LOC O O O O
+AD-DIYAR S-ORG
+- Financial negotiations between Lebanon and Pakistan . O O O O S-LOC O S-LOC O
+- Hariri to step into the election battle with an incomplete list . O S-PER O O O O O O O O O O O
+NIDA'A AL-WATAN B-ORG E-ORG
+- Maronite Patriarch Sfeir expressed sorrow over the violations in Sunday ' elections . O S-ORG O S-PER O O O O O O O O O O
+-DOCSTART- O
+CME live and feeder cattle calls range mixed . S-ORG O O O O O O O O
+CHICAGO 1996-08-22 S-LOC O
+Early calls on CME live and feeder cattle futures ranged from 0.200 cent higher to 0.100 lower , livestock analysts said . O O O S-ORG O O O O O O O O O O O O O O O O O O
+The continued strong tone to cash cattle and beef markets should prompt further support . O O O O O O O O O O O O O O O
+Outlook for a bullish cattle-on-feed report is also expected to lend support and prompt some bull spreading , analysts said . O O O O O O O O O O O O O O O O O O O O O
+However , trade will likely be light and prices could drift on evening up ahead of the report . O O O O O O O O O O O O O O O O O O O
+Cash markets are also expected to be quiet after the record amount of feedlot cattle traded this week , they said . O O O O O O O O O O O O O O O O O O O O O O
+-DOCSTART- O
+Kindercare says debt buy to hit Q1 results . O O O O O O O O O
+MONTGOMERY , Ala . S-LOC O S-LOC O
+1996-08-22 O
+KinderCare Learning Centers Inc said on Thursday that a debt buyback would mean an extraordinary loss of $ 1.2 million in its fiscal 1997 first quarter . B-ORG I-ORG I-ORG E-ORG O O O O O O O O O O O O O O O O O O O O O O O
+The company said that during the quarter , which began June 1 , it bought $ 30 million par value of its outstanding 10-3/8 percent senior notes due 2001 . O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O
+The notes were bought for $ 31.5 million . O O O O O O O O O
+Philip Maslowe , chief financial officer of the preschool and child care company , said the buyback " offered an opportunity to reduce the company 's weighted average interest costs and improve future cash flows and earnings . " B-PER E-PER O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O
+-DOCSTART- O
+RESEARCH ALERT - Lehman starts SNET . O O O S-ORG O S-ORG O
+-- Lehman analyst Blake Bath started Southern New England Telecommunciations Corp with an outperform rating , his office said . O S-ORG O B-PER E-PER O B-ORG I-ORG I-ORG I-ORG E-ORG O O O O O O O O O
+-- The analyst set a 12-month price target of $ 45 and a fiscal 1996 year earnings estimate of $ 3.09 per share , his office said . O O O O O O O O O O O O O O O O O O O O O O O O O O O O
+-- The analyst also set an earnings estimate for the 1997 year , but the figure was not immediately available . O O O O O O O O O O O O O O O O O O O O O
+-- Southern New England closed at 38-1/2 Wednesday . O B-ORG I-ORG E-ORG O O O O O
+-- E. Auchard , Wall Street bureau , 212-859-1736 O B-PER E-PER O B-ORG I-ORG E-ORG O O
+-DOCSTART- O
+Gateway Data Sciences Q2 net rises . B-ORG I-ORG E-ORG O O O O
+PHOENIX 1996-08-22 S-LOC O
+Summary of Consolidated Financial Data O O B-ORG I-ORG E-ORG
+( In Thousands , except per share data ) O O O O O O O O O
+Six Months Ended Quarter Ended O O O O O
+Jul 31 , Jul 31 , Jul 31 , Jul 31 , O O O O O O O O O O O O
+1996 1995 1996 1995 O O O O
+Income Statement Data : O O O O
+Total Revenue $ 10,756 $ 13,102 $ 7,961 $ 5,507 O O O O O O O O O O
+Software Revenue 2,383 1,558 1,086 1,074 O O O O O O
+Services Revenue 1,154 692 624 465 O O O O O O
+Operating Income 906 962 599 515 O O O O O O
+Net Income 821 512 565 301 O O O O O O
+Earnings Per Share 0.31 0.34 0.19 0.20 O O O O O O O
+Jul 31 , 1996 Jan 31 , 1996 O O O O O O O O
+Balance Sheet Data : O O O O
+Working Capital $ 5,755 ( $ 881 ) O O O O O O O O
+Cash and Cash Equivalents 2,386 93 O O O O O O
+Total Assets 14,196 7,138 O O O O
+Shareholders ' Equity 5,951 ( 1,461 ) O O O O O O O
+-DOCSTART- O
+Greek socialists give PM green light for election . S-MISC O O O O O O O O
+ATHENS 1996-08-22 S-LOC O
+The Greek socialist party 's executive bureau gave Prime Minister Costas Simitis its backing if he chooses to call snap elections , its general secretary Costas Skandalidis told reporters on Thursday . O S-MISC O O O O O O O O B-PER E-PER O O O O O O O O O O O O O B-PER E-PER O O O O O
+Prime Minister Costas Simitis will make an official announcement after a cabinet meeting later on Thursday , said Skandalidis . O O B-PER E-PER O O O O O O O O O O O O O O S-PER O
+-- Dimitris Kontogiannis , Athens Newsroom +301 3311812-4 O B-PER E-PER O B-ORG E-ORG O O
+-DOCSTART- O
+PRESS DIGEST - France - Le Monde Aug 22 . O O O S-LOC O B-ORG E-ORG O O O
+PARIS 1996-08-22 S-LOC O
+These are leading stories in Thursday 's afternoon daily Le Monde , dated Aug 23 . O O O O O O O O O B-ORG E-ORG O O O O O
+FRONT PAGE O O
+-- Africans seeking to renew or obtain work and residence rights say Prime Minister Alain Juppe 's proposals are insufficient as hunger strike enters 49th day in Paris church and Wednesday rally attracts 8,000 sympathisers . O S-MISC O O O O O O O O O O O O B-PER E-PER O O O O O O O O O O O S-LOC O O O O O O O O
+-- FLNC Corsican nationalist movement announces end of truce after last night 's attacks . O S-ORG S-MISC O O O O O O O O O O O O
+BUSINESS PAGES O O
+-- Shutdown of Bally 's French factories points up shoe industry crisis , with French manufacturers undercut by low-wage country competition and failure to keep abreast of trends . O O O S-ORG O S-MISC O O O O O O O O S-MISC O O O O O O O O O O O O O O
+-- Secretary general of the Sud-PTT trade union at France Telecom all the elements are in place for social unrest in the next few weeks . O O O O O S-MISC O O O B-ORG E-ORG O O O O O O O O O O O O O O O
+-- Paris Newsroom +33 1 42 21 53 81 O B-ORG E-ORG O O O O O O
+-DOCSTART- O
+Well repairs to lift Heidrun oil output - Statoil . O O O O S-LOC O O O S-ORG O
+OSLO 1996-08-22 S-LOC O
+Three plugged water injection wells on the Heidrun oilfield off mid-Norway will be reopened over the next month , operator Den Norske Stats Oljeselskap AS ( Statoil ) said on Thursday . O O O O O O O S-LOC O O S-MISC O O O O O O O O O B-ORG I-ORG I-ORG I-ORG E-ORG O S-ORG O O O O O
+The plugged wells have accounted for a dip of 30,000 barrels per day ( bpd ) in Heidrun output to roughly 220,000 bpd , according to the company 's Status Weekly newsletter . O O O O O O O O O O O O O O O O O S-LOC O O O O O O O O O O O B-ORG E-ORG O O
+The wells will be reperforated and gravel will be pumped into the reservoir through one of the wells to avoid plugging problems in the future , it said . O O O O O O O O O O O O O O O O O O O O O O O O O O O O O
+-- Oslo newsroom +47 22 42 50 41 O S-LOC O O O O O O
+-DOCSTART- O
+Finnish April trade surplus 3.8 billion markka - NCB . S-MISC O O O O O O O S-ORG O
+HELSINKI 1996-08-22 S-LOC O
+Finland 's trade surplus rose to 3.83 billion markka in April from 3.43 billion in March , the National Customs Board ( NCB ) said in a statement on Thursday . S-LOC O O O O O O O O O O O O O O O O O B-ORG I-ORG E-ORG O S-ORG O O O O O O O O
+The value of exports fell one percent year-on-year in April and the value of imports fell two percent , NCB said . O O O O O O O O O O O O O O O O O O O S-ORG O O
+Trade balance ( million markka ) : O O O O O O O
+April ' 96 March ' 96 Jan-April ' 96 Jan-April ' 95 O O O O O O O O O O O O
+Imports 10,663 10,725 43,430 40,989 O O O O O
+Exports 14,494 14,153 56,126 56,261 O O O O O
+Balance +3,831 +3,428 +12,696 +15,272 O O O O O
+The January-April 1995 import figure was revised from 39,584 million markka and the export figure from 55,627 million markka . O O O O O O O O O O O O O O O O O O O O
+The Bank of Finland earlier estimated the April trade surplus at 3.2 billion markka with exports projected at 14.5 billion and imports at 11.3 billion . O B-ORG I-ORG E-ORG O O O O O O O O O O O O O O O O O O O O O O
+The NCB 's official monthly trade statistics are lagging behind due to changes in customs procedures when Finland joined the European Union at the start of 1995 . O S-ORG O O O O O O O O O O O O O O O S-LOC O O B-ORG E-ORG O O O O O O
+-- Helsinki Newsroom +358 - 0 - 680 50 245 O B-ORG E-ORG O O O O O O O
+-DOCSTART- O
+Dutch state raises tap sale price to 99.95 . S-MISC O O O O O O O O
+AMSTERDAM 1996-08-22 S-LOC O
+The Finance Ministry raised the price for tap sales of the Dutch government 's new 5.75 percent bond due September 2002 to 99.95 from 99.90 . O B-ORG E-ORG O O O O O O O O S-MISC O O O O O O O O O O O O O O
+Tap sales began on Monday and are being held daily from 07.00 GMT to 15.00 GMT until further notice . O O O O O O O O O O O O S-MISC O O S-MISC O O O O
+The ministry had raised 2.3 billion guilders from sales of the new bond by the close of trade on Wednesday . O O O O O O O O O O O O O O O O O O O O O
+-- Amsterdam newsroom +31 20 504 5000 O S-LOC O O O O O
+-DOCSTART- O
+German farm ministry tells consumers to avoid British mutton . S-MISC O O O O O O S-MISC O O
+BONN 1996-08-22 S-LOC O
+Germany 's Agriculture Ministry suggested on Wednesday that consumers avoid eating meat from British sheep until scientists determine whether mad cow disease can be transmitted to the animals . S-LOC O B-ORG E-ORG O O O O O O O O O S-MISC O O O O O O O O O O O O O O O
+" Until this is cleared up by the European Union 's scientific panels -- and we have asked this to be done as quickly as possible -- ( consumers ) should if at all possible give preference to sheepmeat from other countries , " ministry official Werner Zwingmann told ZDF television . O O O O O O O O B-ORG E-ORG O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O B-PER E-PER O S-ORG O O
+" I do not want to say that there is a concrete danger for consumers , " he added . " O O O O O O O O O O O O O O O O O O O O O
+There are too many holes in what we know , and these must be filled very quickly . " O O O O O O O O O O O O O O O O O O O
+Bonn has led efforts to ensure consumer protection tops the list of priorities in dealing with the mad cow crisis , which erupted in March when Britain acknowledged humans could contract a similar illness by eating contaminated beef . S-LOC O O O O O O O O O O O O O O O O O O O O O O O O O S-LOC O O O O O O O O O O O O
+The European Commission agreed this month to rethink a proposal to ban the use of suspect sheep tissue after some EU veterinary experts questioned whether it was justified . O B-ORG E-ORG O O O O O O O O O O O O O O O O O S-ORG O O O O O O O O
+EU Farm Commissioner Franz Fischler had proposed banning sheep brains , spleens and spinal cords from the human and animal food chains after reports from Britain and France that under laboratory conditions sheep could contract Bovine Spongiform Encephalopathy ( BSE ) -- mad cow disease . S-ORG O O B-PER E-PER O O O O O O O O O O O O O O O O O O O O S-LOC O S-LOC O O O O O O O B-MISC I-MISC E-MISC O S-MISC O O O O O O
+But some members of the EU 's standing veterinary committee questioned whether the action was necessary given the slight risk to human health . O O O O O S-ORG O O O O O O O O O O O O O O O O O O
+The question is being studied separately by two EU scientific committees . O O O O O O O O S-ORG O O O
+Sheep have long been known to contract scrapie , a similar brain-wasting disease to BSE which is believed to have been transferred to cattle through feed containing animal waste . O O O O O O O O O O O O O O S-MISC O O O O O O O O O O O O O O O
+British officials say sheep meat is perfectly safe to eat . S-MISC O O O O O O O O O O
+ZDF said Germany imported 47,600 sheep from Britain last year , nearly half of total imports . S-ORG O S-LOC O O O O S-LOC O O O O O O O O O
+It brought in 4,275 tonnes of British mutton , some 10 percent of overall imports . O O O O O O S-MISC O O O O O O O O O
+After the British government admitted a possible link between mad cow disease and its fatal human equivalent , the EU imposed a worldwide ban on British beef exports . O O S-MISC O O O O O O O O O O O O O O O O S-ORG O O O O O S-MISC O O O
+EU leaders agreed at a summit in June to a progressive lifting of the ban as Britain takes parallel measures to eradicate the disease . S-ORG O O O O O O O O O O O O O O O S-LOC O O O O O O O O
+-DOCSTART- O
+GOLF - SCORES AT WORLD SERIES OF GOLF . O O O O B-MISC I-MISC I-MISC E-MISC O
+AKRON , Ohio 1996-08-22 S-LOC O S-LOC O
+Scores from the $ 2.1 O O O O O
+million NEC World Series of Golf after the first round O B-MISC I-MISC I-MISC I-MISC E-MISC O O O O
+Thursday at the 7,149 yard , par 70 Firestone C.C course O O O O O O O O B-LOC E-LOC O
+( players U.S. unless stated ) : O O S-LOC O O O O
+66 Paul Goydos , Billy Mayfair , Hidemichi Tanaka ( Japan ) O B-PER E-PER O B-PER E-PER O B-PER E-PER O S-LOC O
+68 Steve Stricker O B-PER E-PER
+69 Justin Leonard , Mark Brooks O B-PER E-PER O B-PER E-PER
+70 Tim Herron , Duffy Waldorf , Davis Love , Anders Forsbrand O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER
+( Sweden ) , Nick Faldo ( Britain ) , John Cook , Steve Jones , Phil O S-LOC O O B-PER E-PER O S-LOC O O B-PER E-PER O B-PER E-PER O S-PER
+Mickelson , Greg Norman ( Australia ) S-PER O B-PER E-PER O S-LOC O
+71 Ernie Els ( South Africa ) , Scott Hoch O B-PER E-PER O B-LOC E-LOC O O B-PER E-PER
+72 Clarence Rose , Loren Roberts , Fred Funk , Sven Struver O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER
+( Germany ) , Alexander Cejka ( Germany ) , Hal Sutton , Tom Lehman O S-LOC O O B-PER E-PER O S-LOC O O B-PER E-PER O B-PER E-PER
+73 D.A. Weibring , Brad Bryant , Craig Parry ( Australia ) , O B-PER E-PER O B-PER E-PER O B-PER E-PER O S-LOC O O
+Stewart Ginn ( Australia ) , Corey Pavin , Craig Stadler , Mark B-PER E-PER O S-LOC O O B-PER E-PER O B-PER E-PER O S-PER
+O'Meara , Fred Couples S-PER O B-PER E-PER
+74 Paul Stankowski , Costantino Rocca ( Italy ) O B-PER E-PER O B-PER E-PER O S-LOC O
+75 Jim Furyk , Satoshi Higashi ( Japan ) , Willie Wood , Shigeki O B-PER E-PER O B-PER E-PER O S-LOC O O B-PER E-PER O S-PER
+Maruyama ( Japan ) S-PER O S-LOC O
+76 Scott McCarron O B-PER E-PER
+77 Wayne Westner ( South Africa ) , Steve Schneiter O B-PER E-PER O B-LOC E-LOC O O B-PER E-PER
+79 Tom Watson O B-PER E-PER
+81 Seiki Okuda ( Japan ) O B-PER E-PER O S-LOC O
+-DOCSTART- O
+SOCCER - GLORIA BISTRITA BEAT 2-1 F.C. VALLETTA . O O B-ORG E-ORG O O B-ORG E-ORG O
+BISTRITA 1996-08-22 S-LOC O
+Gloria Bistrita ( Romania ) beat 2-1 ( halftime 1-1 ) F.C. Valletta ( Malta ) in their Cup winners Cup match , second leg of the preliminary round , on Thursday . B-ORG E-ORG O S-LOC O O O O O O O B-ORG E-ORG O S-LOC O O O B-MISC I-MISC E-MISC O O O O O O O O O O O O
+Scorers : O O
+Gloria Bistrita - Ilie Lazar ( 32nd ) , Eugen Voica ( 84th ) B-ORG E-ORG O B-PER E-PER O O O O B-PER E-PER O O O
+F.C. La Valletta - Gilbert Agius ( 24th ) B-ORG I-ORG E-ORG O B-PER E-PER O O O
+Attendance : 8,000 O O O
+Gloria Bistrita won 4-2 on aggregate and qualified for the first round of the Cup winners Cup . B-ORG E-ORG O O O O O O O O O O O O B-MISC I-MISC E-MISC O
+REUTER S-PER
+-DOCSTART- O
+HORSE RACING - PIVOTAL ENDS 25-YEAR WAIT FOR TRAINER PRESCOTT . O O O S-PER O O O O O S-PER O
+YORK , England 1996-08-22 S-LOC O S-LOC O
+Sir Mark Prescott landed his first group one victory in 25 years as a trainer when his top sprinter Pivotal , a 100-30 chance , won the Nunthorpe Stakes on Thursday . O B-PER E-PER O O O O O O O O O O O O O O O O S-PER O O O O O O O B-MISC E-MISC O O O
+The three-year-old , partnered by veteran George Duffield , snatched a short head verdict in the last stride to deny Eveningperformance ( 16-1 ) , trained by Henry Candy and ridden by Chris Rutter . O O O O O O B-PER E-PER O O O O O O O O O O O O S-PER O O O O O O B-PER E-PER O O O B-PER E-PER O
+Hever Golf Rose ( 11-4 ) , last year 's Prix de l ' Abbaye winner at Longchamp , finished third , a further one and a quarter lengths away with the 7-4 favourite Mind Games in fourth . B-PER I-PER E-PER O O O O O O O B-MISC I-MISC I-MISC I-MISC E-MISC O O S-LOC O O O O O O O O O O O O O O O O B-PER E-PER O O O
+Pivotal , a Royal Ascot winner in June , may now be aimed at this season 's Abbaye , Europe 's top sprint race . S-PER O O B-PER E-PER O O O O O O O O O O O O S-MISC O S-LOC O O O O O
+Prescott , reluctant to go into the winner 's enclosure until the result of the photo-finish was announced , said : " Twenty-five years and I have never been there so I thought I had better wait a bit longer . " S-PER O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O
+He added : " It 's very sad to beat Henry Candy because I am godfather to his daughter . " O O O O O O O O O O B-PER E-PER O O O O O O O O O
+Like Prescott , Jack Berry , trainer of Mind Games , had gone into Thursday 's race in search of a first group one success after many years around the top of his profession . O S-PER O B-PER E-PER O O O B-PER E-PER O O O O O O O O O O O O O O O O O O O O O O O O O
+Berry said : " I`m disappointed but I do n't feel suicidal . S-PER O O O O O O O O O O O O
+He ( Mind Games ) was going as well as any of them one and a half furlongs ( 300 metres ) out but he just did n't quicken . " O O B-PER E-PER O O O O O O O O O O O O O O O O O O O O O O O O O O O
+-DOCSTART- O
+HORSE RACING - NUNTHORPE STAKES RESULTS . O O O O O O O
+YORK , England 1996-08-22 S-LOC O S-LOC O
+Result of the Nunthorpe Stakes , a group one race for two-year-olds and upwards , run over five furlongs ( 1 km ) on Thursday : O O O B-MISC E-MISC O O O O O O O O O O O O O O O O O O O O O
+1. Pivotal 100-30 ( ridden by George Duffield ) O S-PER O O O O B-PER E-PER O
+2. Eveningperformance 16-1 ( Chris Rutter ) O S-PER O O B-PER E-PER O
+3. Hever Golf Rose 11-4 ( Jason Weaver ) O B-PER I-PER E-PER O O B-PER E-PER O
+Eight ran . O O O
+Favourite : Mind Games ( 7-4 ) finished 4th O O B-PER E-PER O O O O O
+Distances : a short head , 1-1/4 lengths . O O O O O O O O O
+Winner owned by the Cheveley Park Stud and trained by Sir O O O O B-ORG I-ORG E-ORG O O O O
+Mark Prescott at Newmarket . B-PER E-PER O S-LOC O
+Value to winner : 72,464 pounds sterling ( $ 112,200 ) O O O O O O O O O O O
+-DOCSTART- O
+TENNIS - RESULTS AT TOSHIBA CLASSIC . O O O O B-MISC E-MISC O
+CARLSBAD , California 1996-08-21 S-LOC O S-LOC O
+Results from the O O O
+$ 450,000 Toshiba Classic tennis tournament on Wednesday O O B-MISC E-MISC O O O O
+( prefix number denotes seeding ) : O O O O O O O
+Second round O O
+1 - Arantxa Sanchez Vicario ( Spain ) beat Naoko Kijimuta ( Japan ) O O B-PER I-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O
+1-6 6-4 6-3 O O O
+4 - Kimiko Date ( Japan ) beat Yone Kamio ( Japan ) 6-2 7-5 O O B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O
+Sandrine Testud ( France ) beat 7 - Ai Sugiyama ( Japan ) 6-3 4-6 B-PER E-PER O S-LOC O O O O B-PER E-PER O S-LOC O O O
+6-4 O
+8 - Nathalie Tauziat ( France ) beat Shi-Ting Wang ( Taiwan ) 6-4 O O B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O
+6-2 O
+-DOCSTART- O
+TENNIS - RESULTS AT HAMLET CUP . O O O O B-MISC E-MISC O
+COMMACK , New York 1996-08-21 S-LOC O B-LOC E-LOC O
+Results from the O O O
+Waldbaum Hamlet Cup tennis tournament on Wednesday ( prefix B-MISC I-MISC E-MISC O O O O O O
+number denotes seeding ) : O O O O O
+Second round O O
+1 - Michael Chang ( U.S. ) beat Sergi Bruguera ( Spain ) 6-3 6-2 O O B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O
+Michael Joyce ( U.S. ) beat 3 - Richey Reneberg ( U.S. ) 3-6 6-4 B-PER E-PER O S-LOC O O O O B-PER E-PER O S-LOC O O O
+6-3 O
+Martin Damm ( Czech Republic ) beat 6 - Younes El Aynaoui B-PER E-PER O B-LOC E-LOC O O O O B-PER I-PER E-PER
+( Morocco ) 5-7 6-3 3-0 retired O S-LOC O O O O O
+Karol Kucera ( Slovakia ) beat Hicham Arazi ( Morocco ) 7-6 ( 7-4 ) B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O O O
+7-5 O
+-DOCSTART- O
+SOCCER - DALGLISH SAD OVER BLACKBURN PARTING . O O S-PER O O S-ORG O O
+LONDON 1996-08-22 S-LOC O
+Kenny Dalglish spoke on Thursday of his sadness at leaving Blackburn , the club he led to the English premier league title in 1994-95 . B-PER E-PER O O O O O O O O S-ORG O O O O O O O S-MISC O O O O O O
+Blackburn announced on Wednesday they and Dalglish had parted by mutual consent . S-ORG O O O O O S-PER O O O O O O
+But the ex-manager confessed on Thursday to being " sad " at leaving after taking Blackburn from the second division to the premier league title inside three and a half years . O O O O O O O O O O O O O O O S-ORG O O O O O O O O O O O O O O O O
+In a telephone call to a local newspaper from his holiday home in Spain , Dalglish said : " We came to the same opinion , albeit the club came to it a little bit earlier than me . " O O O O O O O O O O O O O S-LOC O S-PER O O O O O O O O O O O O O O O O O O O O O O O O
+He added : " If no one asked , I never opened my mouth . O O O O O O O O O O O O O O O
+I have stayed out of the way and let them get on with the job . O O O O O O O O O O O O O O O O
+The club thought it ( the job ) had run its course and I came to the same conclusion . " O O O O O O O O O O O O O O O O O O O O O
+Dalglish had been with Blackburn for nearly five years , first as manager and then , for the past 15 months , as director of football . S-PER O O O S-ORG O O O O O O O O O O O O O O O O O O O O O O
+-DOCSTART- O
+CRICKET - ENGLISH COUNTY CHAMPIONSHIP SCORES . O O B-MISC I-MISC E-MISC O O
+LONDON 1996-08-22 S-LOC O
+Close of play scores in four-day O O O O O O
+English County Championship cricket matches on Thursday : S-MISC B-MISC E-MISC O O O O O
+Second day O O
+At Weston-super-Mare : Durham 326 ( D. Cox 95 not out , O S-LOC O S-ORG O O B-PER E-PER O O O O
+S. Campbell 69 ; G. Rose 7-73 ) . B-PER E-PER O O B-PER E-PER O O O
+Somerset 236-4 ( M. Lathwell 85 ) . S-ORG O O B-PER E-PER O O O
+Firsy day O O
+At Colchester : Gloucestershire 280 ( J. Russell 63 , A. Symonds O S-LOC O S-ORG O O B-PER E-PER O O B-PER E-PER
+52 ; A. Cowan 5-68 ) . O O B-PER E-PER O O O
+Essex 72-0 . S-ORG O O
+At Cardiff : Kent 128-1 ( M. Walker 59 , D. Fulton 53 not out ) v O S-LOC O S-ORG O O B-PER E-PER O O B-PER E-PER O O O O O
+Glamorgan . S-ORG O
+At Leicester : Leicestershire 343-8 ( P. Simmons 108 , P. Nixon O S-LOC O S-ORG O O B-PER E-PER O O B-PER E-PER
+67 not out ) v Hampshire . O O O O O S-ORG O
+At Northampton : Sussex 368-7 ( N. Lenham 145 , V. Drakes 59 not O S-LOC O S-ORG O O B-PER E-PER O O B-PER E-PER O O
+out , A. Wells 51 ) v Northamptonshire . O O B-PER E-PER O O O S-ORG O
+At Trent Bridge : Nottinghamshire 392-6 ( G. Archer 143 not O B-LOC E-LOC O S-ORG O O B-PER E-PER O O
+out , M. Dowman 107 ) v Surrey . O O B-PER E-PER O O O S-ORG O
+At Worcester : Warwickshire 255-9 ( A. Giles 57 not out , W. Khan O S-LOC O S-ORG O O B-PER E-PER O O O O B-PER E-PER
+52 ) v Worcestershire . O O O S-ORG O
+At Headingley : Yorkshire 305-5 ( C. White 66 not out , M. Moxon O S-LOC O S-ORG O O B-PER E-PER O O O O B-PER E-PER
+66 , M. Vaughan 57 ) v Lancashire . O O B-PER E-PER O O O S-ORG O
+-DOCSTART- O
+CRICKET - ENGLAND V PAKISTAN FINAL TEST SCOREBOARD . O O S-LOC O S-LOC O O O O
+LONDON 1996-08-22 S-LOC O
+Scoreboard on the first day of the O O O O O O O
+third and final test between England and Pakistan at The Oval on O O O O O S-LOC O S-LOC O B-LOC E-LOC O
+Thursday : O O
+England first innings S-LOC O O
+M. Atherton b Waqar Younis 31 B-PER E-PER O B-PER E-PER O
+A. Stewart b Mushtaq Ahmed 44 B-PER E-PER O B-PER E-PER O
+N. Hussain c Saeed Anwar b Waqar Younis 12 B-PER E-PER O B-PER E-PER O B-PER E-PER O
+G. Thorpe lbw b Mohammad Akram 54 B-PER E-PER O O B-PER E-PER O
+J. Crawley not out 94 B-PER E-PER O O O
+N. Knight b Mushtaq Ahmed 17 B-PER E-PER O B-PER E-PER O
+C. Lewis b Wasim Akram 5 B-PER E-PER O B-PER E-PER O
+I. Salisbury not out 1 B-PER E-PER O O O
+Extras ( lb-11 w-1 nb-8 ) 20 O O O O O O O
+Total ( for six wickets ) 278 O O O O O O O
+Fall of wickets : 1-64 2-85 3-116 4-205 5-248 6-273 O O O O O O O O O O
+To bat : R. Croft , D. Cork , A. Mullally O O O B-PER E-PER O B-PER E-PER O B-PER E-PER
+Bowling ( to date ) : Wasim Akram 25-8-61-1 , Waqar Younis O O O O O O B-PER E-PER O O B-PER E-PER
+20-6-70-2 , Mohammad Akram 12-1-41-1 , Mushtaq Ahmed 27-5-78-2 , O O B-PER E-PER O O B-PER E-PER O O
+Aamir Sohail 6-1-17-0 B-PER E-PER O
+Pakistan : Aamir Sohail , Saeed Anwar , Ijaz Ahmed , S-LOC O B-PER E-PER O B-PER E-PER O B-PER E-PER O
+Inzamam-ul-Haq , Salim Malik , Asif Mujtaba , Wasim Akram , Moin S-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O S-PER
+Khan , Mushtaq Ahmed , Waqar Younis , Mohammad Akam S-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER
+-DOCSTART- O
+SOCCER - FERGUSON BACK IN SCOTTISH SQUAD AFTER 20 MONTHS . O O S-PER O O S-MISC O O O O O
+GLASGOW 1996-08-22 S-LOC O
+Everton 's Duncan Ferguson , who scored twice against Manchester United on Wednesday , was picked on Thursday for the Scottish squad after a 20-month exile . S-ORG O B-PER E-PER O O O O O B-ORG E-ORG O O O O O O O O O S-MISC O O O O O O
+Glasgow Rangers striker Ally McCoist , another man in form after two hat-tricks in four days , was also named for the August 31 World Cup qualifier against Austria in Vienna . B-ORG E-ORG O B-PER E-PER O O O O O O O O O O O O O O O O O O O B-MISC E-MISC O O S-LOC O S-LOC O
+Ferguson , who served six weeks in jail in late 1995 for head-butting an opponent , won the last of his five Scotland caps in December 1994 . S-PER O O O O O O O O O O O O O O O O O O O O O S-LOC O O O O O
+Scotland manager Craig Brown said on Thursday : " I 've watched Duncan Ferguson in action twice recently and he 's bang in form . S-LOC O B-PER E-PER O O O O O O O O B-PER E-PER O O O O O O O O O O O
+Ally McCoist is also in great scoring form at the moment . " B-PER E-PER O O O O O O O O O O O
+Celtic 's Jackie McNamara , who did well with last season 's successful under-21 team , earns a call-up to the senior squad . S-ORG O B-PER E-PER O O O O O O O O O O O O O O O O O O O O
+-DOCSTART- O
+CRICKET - ENGLAND 100-2 AT LUNCH ON FIRST DAY OF THIRD TEST . O O S-LOC O O O O O O O O O O
+LONDON 1996-08-22 S-LOC O
+England were 100 for two at lunch on the first day of the third and final test against Pakistan at The Oval on Thursday . S-LOC O O O O O O O O O O O O O O O O O S-LOC O B-LOC E-LOC O O O
+-DOCSTART- O
+SOCCER - KEANE SIGNS FOUR-YEAR CONTRACT WITH MANCHESTER UNITED . O O S-PER O O O O B-LOC E-LOC O
+LONDON 1996-08-22 S-LOC O
+Ireland midfielder Roy Keane has signed a new four-year contract with English league and F.A. Cup champions Manchester United . S-LOC O B-PER E-PER O O O O O O O S-MISC O O B-MISC E-MISC O B-ORG E-ORG O
+" Roy agreed a new deal before last night 's game against Everton and we are delighted , " said United manager Alex Ferguson on Thursday . O S-PER O O O O O O O O O O S-ORG O O O O O O O S-ORG O B-PER E-PER O O O
+-DOCSTART- O
+TENNIS - RESULTS AT CANADIAN OPEN . O O O O B-MISC E-MISC O
+TORONTO 1996-08-21 S-LOC O
+Results from the Canadian Open O O O B-MISC E-MISC
+tennis tournament on Wednesday ( prefix number denotes O O O O O O O O
+seeding ) : O O O
+Second round O O
+Daniel Nestor ( Canada ) beat 1 - Thomas Muster ( Austria ) 6-3 7-5 B-PER E-PER O S-LOC O O O O B-PER E-PER O S-LOC O O O
+Mikael Tillstrom ( Sweden ) beat 2 - Goran Ivanisevic ( Croatia ) B-PER E-PER O S-LOC O O O O B-PER E-PER O S-LOC O
+6-7 ( 3-7 ) 6-4 6-4 O O O O O O
+3 - Wayne Ferreira ( South Africa ) beat Jiri Novak ( Czech O O B-PER E-PER O B-LOC E-LOC O O B-PER E-PER O S-LOC
+Republic ) 7-5 6-3 S-LOC O O O
+4 - Marcelo Rios ( Chile ) beat Kenneth Carlsen ( Denmark ) 6-3 6-2 O O B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O
+6 - MaliVai Washington ( U.S. ) beat Alex Corretja ( Spain ) 6-4 O O B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O
+6-2 O
+7 - Todd Martin ( U.S. ) beat Renzo Furlan ( Italy ) 7-6 ( 7-3 ) 6-3 O O B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O O O O
+Mark Philippoussis ( Australia ) beat 8 - Marc Rosset B-PER E-PER O S-LOC O O O O B-PER E-PER
+( Switzerland ) 6-3 3-6 7-6 ( 8-6 ) O S-LOC O O O O O O O
+9 - Cedric Pioline ( France ) beat Gregory Carraz ( France ) 7-6 O O B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O
+( 7-1 ) 6-4 O O O O
+Patrick Rafter ( Australia ) beat 11 - Alberto Berasategui B-PER E-PER O S-LOC O O O O B-PER E-PER
+( Spain ) 6-1 6-2 O S-LOC O O O
+Petr Korda ( Czech Republic ) beat 12 - Francisco Clavet ( Spain ) B-PER E-PER O B-LOC E-LOC O O O O B-PER E-PER O S-LOC O
+6-3 6-4 O O
+Daniel Vacek ( Czech Republic ) beat 13 - Jason Stoltenberg B-PER E-PER O B-LOC E-LOC O O O O B-PER E-PER
+( Australia ) 5-7 7-6 ( 7-1 ) 7-6 ( 13-11 ) O S-LOC O O O O O O O O O O
+Todd Woodbridge ( Australia beat Sebastien Lareau ( Canada ) 6-3 B-PER E-PER O S-LOC O B-PER E-PER O S-LOC O O
+1-6 6-3 O O
+Alex O'Brien ( U.S. ) beat Byron Black ( Zimbabwe ) 7-6 ( 7-2 ) 6-2 B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O O O O
+Bohdan Ulihrach ( Czech Republic ) beat Andrea Gaudenzi ( Italy ) B-PER E-PER O B-LOC E-LOC O O B-PER E-PER O S-LOC O
+6-3 4-6 6-1 O O O
+Tim Henman ( Britain ) beat Chris Woodruff ( U.S. ) , walkover B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O
+-DOCSTART- O
+CRICKET - MILLNS SIGNS FOR BOLAND . O O S-PER O O S-ORG O
+CAPE TOWN 1996-08-22 B-LOC E-LOC O
+South African provincial side Boland said on Thursday they had signed Leicestershire fast bowler David Millns on a one year contract . B-MISC E-MISC O O S-ORG O O O O O O S-ORG O O B-PER E-PER O O O O O O
+Millns , who toured Australia with England A in 1992/93 , replaces former England all-rounder Phillip DeFreitas as Boland 's overseas professional . S-MISC O O O S-LOC O S-LOC O O O O O O S-LOC O B-PER E-PER O S-ORG O O O O
+-DOCSTART- O
+SOCCER - EUROPEAN CUP WINNERS ' CUP RESULTS . O O B-MISC I-MISC I-MISC I-MISC E-MISC O O
+TIRANA 1996-08-22 S-LOC O
+Results of European Cup Winners ' O O B-MISC I-MISC I-MISC E-MISC
+Cup qualifying round , second leg soccer matches on Thursday : S-MISC O O O O O O O O O O
+In Tirana : Flamurtari Vlore ( Albania ) 0 Chemlon Humenne O S-LOC O B-ORG E-ORG O S-LOC O O B-ORG E-ORG
+( Slovakia ) 2 ( halftime 0-0 ) O S-LOC O O O O O O
+Scorers : Lubarskij ( 50th minute ) , Valkucak ( 54th ) O O S-PER O O O O O S-PER O O O
+Attendance : 5,000 O O O
+Chemlon Humenne win 3-0 on aggregate B-ORG E-ORG O O O O
+In Bistrita : Gloria Bistrita ( Romania ) 2 Valletta ( Malta ) 1 O S-LOC O B-ORG E-ORG O S-LOC O O S-LOC O S-LOC O O
+( 1-1 ) O O O
+Scorers : O O
+Gloria Bistrita - Ilie Lazar ( 32nd ) , Eugen Voica ( 84th ) B-ORG E-ORG O B-PER E-PER O O O O B-PER E-PER O O O
+Valletta - Gilbert Agius ( 24th ) S-LOC O B-PER E-PER O O O
+Attendance : 8,000 O O O
+Gloria Bistrita win 4-2 on aggregate . B-ORG E-ORG O O O O O
+In Chorzow : Ruch Chorzow ( Poland ) 5 Llansantffraid ( Wales ) 0 O S-LOC O B-ORG E-ORG O S-LOC O O S-ORG O S-LOC O O
+( 1-0 ) O O O
+Scorers : Arkadiusz Bak ( 1st and 55th ) , Arwel Jones ( 47th , O O B-PER E-PER O O O O O O B-PER E-PER O O O
+own goal ) , Miroslav Bak ( 62nd and 63rd ) O O O O B-PER E-PER O O O O O
+Attendance : 6,500 O O O
+Ruch Chorzow win 6-1 on aggregate B-ORG E-ORG O O O O
+In Larnaca : AEK Larnaca ( Cyprus ) 5 Kotaik Abovyan ( Armenia ) O S-LOC O B-ORG E-ORG O S-LOC O O B-ORG E-ORG O S-LOC O
+0 ( 2-0 ) O O O O
+Scorers : Zoran Kundic ( 28th ) , Klimis Alexandrou ( 41st ) , O O B-PER E-PER O O O O B-PER E-PER O O O O
+Milenko Kovasevic ( 60th , penalty ) , Goran Koprinovic ( 82nd ) , B-PER E-PER O O O O O O B-PER E-PER O O O O
+Pavlos Markou ( 84th ) B-PER E-PER O O O
+Attendance : 5,000 O O O
+AEK Larnaca win 5-1 on aggregate B-ORG E-ORG O O O O
+In Siauliai : Kareda Siauliai ( Lithuania ) 0 Sion O S-LOC O B-ORG E-ORG O S-LOC O O S-ORG
+( Switzerland ) 0 O S-LOC O O
+Attendance : 5,000 O O O
+Sion win 4-2 on agrregate . S-ORG O O O O O
+In Vinnytsya : O S-LOC O
+Nyva Vinnytsya ( Ukraine ) 1 Tallinna Sadam ( Estonia ) 0 ( 0-0 ) B-ORG E-ORG O S-LOC O O B-ORG E-ORG O S-LOC O O O O O
+Attendance : 3,000 O O O
+Aggregate score 2-2 . O O O O
+Nyva qualified on away goals rule . S-ORG O O O O O O
+In Bergen : Brann ( Norway ) 2 Shelbourne ( Ireland ) 1 ( 1-1 ) O S-LOC O S-ORG O S-LOC O O S-ORG O S-LOC O O O O O
+Scorers : O O
+Brann - Mons Ivar Mjelde ( 10th ) , Jan Ove Pedersen ( 72nd ) S-ORG O B-PER I-PER E-PER O O O O B-PER I-PER E-PER O O O
+Shelbourne - Mark Rutherford ( 5th ) S-ORG O B-PER E-PER O O O
+Attendance : 2,189 O O O
+Brann win 5-2 on aggregate S-ORG O O O O
+In Sofia : Levski Sofia ( Bulgaria ) 1 Olimpija ( Slovenia ) 0 O S-LOC O B-ORG E-ORG O S-LOC O O S-ORG O S-LOC O O
+( 0-0 ) O O O
+Scorer : Ilian Simeonov ( 58th ) O O B-PER E-PER O O O
+Attendance : 25,000 O O O
+Aggregate 1-1 . O O O
+Olimpija won 4-3 on penalties . S-ORG O O O O O
+In Vaduz : Vaduz ( Liechtenstein ) 1 RAF Riga ( Latvia ) 1 ( 0-0 ) O S-LOC O S-LOC O S-LOC O O B-ORG E-ORG O S-LOC O O O O O
+Scorers : O O
+Vaduz - Daniele Polverino ( 90th ) S-LOC O B-PER E-PER O O O
+RAF Riga - Agrins Zarins ( 47th ) B-ORG E-ORG O B-PER E-PER O O O
+Aggregate 2-2 . O O O
+Vaduz won 4-2 on penalties . S-LOC O O O O O
+In Luxembourg : US Luxembourg ( Luxembourg ) 0 Varteks Varazdin O S-LOC O B-ORG E-ORG O S-LOC O O B-ORG E-ORG
+( Croatia ) 3 ( 0-0 ) O S-LOC O O O O O
+Scorers : Drazen Beser ( 63rd ) , Miljenko Mumler ( penalty , O O B-PER E-PER O O O O B-PER E-PER O O O
+78th ) , Jamir Cvetko ( 87th ) O O O B-PER E-PER O O O
+Attendance : 800 O O O
+Varteks Varazdin win 5-1 on aggregate . B-ORG E-ORG O O O O O
+In Torshavn : Havnar Boltfelag ( Faroe Islands ) 0 Dynamo O S-LOC O B-ORG E-ORG O B-LOC E-LOC O O S-ORG
+Batumi ( Georgia ) 3 ( 0-2 ) S-ORG O S-LOC O O O O O
+Dynamo Batumi win 9-0 on aggregate . B-ORG E-ORG O O O O O
+In Prague : Sparta Prague ( Czech Republic ) 8 Glentoran O S-LOC O B-ORG E-ORG O B-LOC E-LOC O O S-ORG
+( Northern Ireland ) 0 ( 4-0 ) O B-LOC E-LOC O O O O O
+Scorers : Petr Gunda ( 1st and 26th ) , Lumir Mistr ( 19th ) , O O B-PER E-PER O O O O O O B-PER E-PER O O O O
+Horst Siegl ( 24th , 48th , 80th ) , Zdenek Svoboda ( 76th ) , Petr B-PER E-PER O O O O O O O O B-PER E-PER O O O O S-PER
+Gabriel ( 86th ) S-PER O O O
+Sparta win 10-1 on aggregate . S-ORG O O O O O
+In Edinburgh : Hearts ( Scotland ) 1 Red Star Belgrade O S-LOC O S-ORG O S-LOC O O B-ORG I-ORG E-ORG
+( Yugoslavia ) 1 ( 1-0 ) O S-LOC O O O O O
+Scorers : O O
+Hearts - Dave McPherson ( 44th ) S-ORG O B-PER E-PER O O O
+Red Star - Vinko Marinovic ( 59th ) B-ORG E-ORG O B-MISC E-MISC O O O
+Attendance : 15,062 O O O
+Aggregate 1-1 . O O O
+Red Star win on away goals rule . B-ORG E-ORG O O O O O O
+In Rishon-Lezion : Hapoel Ironi ( Israel ) 3 Constructorul O S-LOC O B-ORG E-ORG O S-LOC O O S-ORG
+Chisinau ( Moldova ) 2 ( 2-1 ) S-ORG O S-LOC O O O O O
+Aggregate 3-3 . O O O
+Constructorul win on away goals rule . S-ORG O O O O O O
+In Anjalonkoski : MyPa-47 ( Finland ) 1 Karabach Agdam O S-MISC O S-ORG O S-LOC O O B-ORG E-ORG
+( Azerbaijan ) 1 ( 0-0 ) O S-LOC O O O O O
+Mypa-47 win 2-1 on aggregate . S-ORG O O O O O
+In Skopje : Sloga Jugomagnat ( Macedonia ) 0 Kispest Honved O S-LOC O B-ORG E-ORG O S-LOC O O B-ORG E-ORG
+( Hungary 1 ( 0-0 ) O S-LOC O O O O
+Kispest Honved win 2-0 on aggregate . B-ORG E-ORG O O O O O
+Add Hapoel Ironi v Constructorul Chisinau B-ORG I-ORG E-ORG O B-ORG E-ORG
+Scorers : O O
+Rishon - Moshe Sabag ( 10th minute ) , Nissan Kapeta ( 26th ) , S-ORG O B-PER E-PER O O O O O B-PER E-PER O O O O
+Tomas Cibola ( 58th ) . B-PER E-PER O O O O
+Constructorol - Sergei Rogachev ( 42nd ) , Gennadi Skidan S-ORG O B-PER E-PER O O O O B-PER E-PER
+( 87th ) . O O O O
+Attendance : 1,500 . O O O O
+-DOCSTART- O
+SOCCER - GOTHENBURG PUT FERENCVAROS OUT OF EURO CUP . O O S-LOC O S-ORG O O B-MISC E-MISC O
+BUDAPEST 1996-08-21 S-LOC O
+IFK Gothenburg of Sweden drew 1-1 ( 1-0 ) with Ferencvaros of Hungary in the second leg of their European Champions Cup preliminary round tie played on Wednesday . B-ORG E-ORG O S-LOC O O O O O O S-ORG O S-LOC O O O O O O B-MISC I-MISC E-MISC O O O O O O O
+Gothenburg go through 4-1 on aggregate . S-LOC O O O O O O
+Scorers : O O
+Ferencvaros : S-ORG O
+Ferenc Horvath ( 15th ) B-PER E-PER O O O
+IFK Gothenburg : B-ORG E-ORG O
+Andreas Andersson ( 87th ) B-PER E-PER O O O
+Attendance : 9,000 O O O
+-DOCSTART- O
+SOCCER - BRAZILIAN CHAMPIONSHIP RESULTS . O O S-MISC O O O
+RIO DE JANEIRO 1996-08-22 B-LOC I-LOC E-LOC O
+Results of midweek O O O
+matches in the Brazilian soccer championship . O O O S-MISC O O O
+Bahia 2 Atletico Paranaense 0 S-ORG O B-ORG E-ORG O
+Corinthians 1 Guarani 0 S-ORG O S-ORG O
+Coritiba 1 Atletico Mineiro 0 S-ORG O B-ORG E-ORG O
+Cruzeiro 2 Vitoria 1 S-ORG O S-ORG O
+Flamengo 0 Juventude 1 S-ORG O S-ORG O
+Goias 3 Sport Recife 1 S-ORG O B-ORG E-ORG O
+Gremio 6 Bragantino 1 S-ORG O S-ORG O
+Palmeiras 3 Vasco da Gama 1 S-ORG O B-ORG I-ORG E-ORG O
+Portuguesa 2 Parana 0 S-ORG O S-ORG O
+-DOCSTART- O
+TENNIS - NEWCOMBE PONDERS HIS DAVIS CUP FUTURE . O O S-PER O O B-MISC E-MISC O O
+SYDNEY 1996-08-22 S-LOC O
+Australian Davis Cup captain John Newcombe on Thursday signalled his possible resignation if his team loses an away tie against Croatia next month . S-MISC B-MISC E-MISC O B-PER E-PER O O O O O O O O O O O O O O S-LOC O O O
+The former Wimbledon champion said the immediate future of Australia 's Davis Cup coach Tony Roche could also be determined by events in Split . O O S-MISC O O O O O O S-LOC O B-MISC E-MISC O B-PER E-PER O O O O O O O S-LOC O
+" If we lose this one , Tony and I will have to have a good look at giving someone else a go , " Newcombe was quoted as saying in Sydney 's Daily Telegraph newspaper . O O O O O O O S-PER O O O O O O O O O O O O O O O O O S-PER O O O O O S-LOC O B-ORG E-ORG O O
+Australia face Croatia in the world group qualifying tie on clay from September 20-22 . S-LOC O S-LOC O O O O O O O O O O O O
+Under Newcombe 's leadership , Australia were relegated from the elite world group last year , the first time the 26-time Davis Cup winners had slipped from the top rank . O S-PER O O O S-LOC O O O O O O O O O O O O O O O B-MISC E-MISC O O O O O O O O
+Since taking over as captain from Neale Fraser in 1994 , Newcombe 's record in tandem with Roche , his former doubles partner , has been three wins and three losses . O O O O O O B-PER E-PER O O O S-PER O O O O O S-PER O O O O O O O O O O O O O O
+Newcombe has selected Wimbledon semifinalist Jason Stoltenberg , Patrick Rafter , Mark Philippoussis , and Olympic doubles champions Todd Woodbridge and Mark Woodforde to face the Croatians . S-PER O O S-MISC O B-PER E-PER O B-PER E-PER O B-PER E-PER O O S-MISC O O B-PER E-PER O B-PER E-PER O O O S-MISC O
+The home side boasts world number six Goran Ivanisevic , and Newcombe conceded his players would be hard-pressed to beat the Croatian number one . O O O O O O O B-PER E-PER O O S-PER O O O O O O O O O S-MISC O O O
+" We are ready to fight to our last breath -- Australia must play at its absolute best to win , " said Newcombe , who described the tie as the toughest he has faced as captain . O O O O O O O O O O O S-LOC O O O O O O O O O O O S-PER O O O O O O O O O O O O O O
+Australia last won the Davis Cup in 1986 , but they were beaten finalists against Germany three years ago under Fraser 's guidance . S-LOC O O O B-MISC E-MISC O O O O O O O O O S-LOC O O O O S-PER O O O
+-DOCSTART- O
+BADMINTON - MALAYSIAN OPEN RESULTS . O O B-MISC E-MISC O O
+KUALA LUMPUR 1996-08-22 B-LOC E-LOC O
+Results in the Malaysian O O O S-MISC
+Open badminton tournament on Thursday ( prefix number denotes S-MISC O O O O O O O O
+seeding ) : O O O
+Men 's singles , third round O O O O O O
+9/16 - Luo Yigang ( China ) beat Hwang Sun-ho ( South Korea ) 15-3 O O B-PER E-PER O S-LOC O O S-PER S-MISC O B-LOC E-LOC O O
+15-7 O
+Jason Wong ( Malaysia ) beat Abdul Samad Ismail ( Malaysia ) 16-18 B-PER E-PER O S-LOC O O B-PER I-PER E-PER O S-LOC O O
+15-2 17-14 O O
+P. Kantharoopan ( Malaysia ) beat 3/4 - Jeroen Van Dijk B-PER E-PER O S-LOC O O O O B-PER I-PER E-PER
+( Netherlands ) 15-11 18-14 O S-LOC O O O
+Wijaya Indra ( Indonesia ) beat 5/8 - Pang Chen ( Malaysia ) 15-6 B-PER E-PER O S-LOC O O O O B-PER E-PER O S-LOC O O
+6-15 15-7 O O
+3/4 - Hu Zhilan ( China ) beat Nunung Subandoro ( Indonesia ) 5-15 O O B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O
+18-15 15-6 O O
+9/16 - Hermawan Susanto ( Indonesia ) beat 1 - Fung Permadi ( Taiwan ) O O B-PER E-PER O S-LOC O O O O B-PER E-PER O S-LOC O
+15-8 15-12 O O
+Women 's singles 2nd round O O O O O
+1 - Wang Chen ( China ) beat Cindana ( Indonesia ) 11-3 1ama ( Japan ) beat Margit Borg ( Sweden ) 11-6 11-6 O O B-PER E-PER O S-LOC O O S-PER O S-LOC O O S-PER O S-LOC O O B-PER E-PER O S-LOC O O O
+Sun Jian ( China ) beat Marina Andrievskaqya ( Sweden ) 11-8 11-2 B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O
+5/8 - Meluawati ( Indonesia ) beat Chan Chia Fong ( Malaysia ) 11-6 O O S-PER O S-LOC O O B-PER I-PER E-PER O S-LOC O O
+11-1 O
+Gong Zhichao ( China ) beat Liu Lufung ( China ) 6-11 11-7 11-3 B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O O
+Zeng Yaqiong ( China ) beat Li Feng ( New Zealand ) 11-9 11-6 B-PER E-PER O S-LOC O O B-PER E-PER O B-LOC E-LOC O O O
+5/8 - Christine Magnusson ( Sweden ) beat Ishwari Boopathy O O B-PER E-PER O S-LOC O O B-PER E-PER
+( Malaysia ) 11-1 10-12 11-4 O S-LOC O O O O
+2 - Zhang Ning ( China ) beat Olivia ( Indonesia ) 11-8 11-6 O O B-PER E-PER O S-LOC O O S-PER O S-LOC O O O
+-DOCSTART- O
+TENNIS - REVISED MEN 'S DRAW FOR U.S. OPEN . O O O O O O O B-MISC E-MISC O
+NEW YORK 1996-08-22 B-LOC E-LOC O
+Revised singles draw for the O O O O O
+U.S. Open tennis championships beginning Monday at the U.S . B-MISC E-MISC O O O O O O S-LOC O
+National Tennis Centre ( prefix denotes seeding ) : B-LOC I-LOC E-LOC O O O O O O
+Men 's Draw O O O
+1 - Pete Sampras ( U.S. ) vs. Adrian Voinea ( Romania ) O O B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O
+Jiri Novak ( Czech Republic ) vs. qualifier B-PER E-PER O B-LOC E-LOC O O O
+Magnus Larsson ( Sweden ) vs. Alexander Volkov ( Russia ) B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O
+Mikael Tillstrom ( Sweden ) vs qualifier B-PER E-PER O S-LOC O O O
+Qualifier vs. Andrei Olhovskiy ( Russia ) O O B-PER E-PER O S-LOC O
+Mark Woodforde ( Australia ) vs. Mark Philippoussis ( Australia ) B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O
+Roberto Carretero ( Spain ) vs. Jordi Burillo ( Spain ) B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O
+Francisco Clavet ( Spain ) vs. 16 - Cedric Pioline ( France ) B-PER E-PER O S-LOC O O O O B-PER E-PER O S-LOC O
+------------------------ O
+9 - Wayne Ferreira ( South Africa ) vs. qualifier O O B-PER E-PER O B-LOC E-LOC O O O
+Karol Kucera ( Slovakia ) vs. Jonas Bjorkman ( Sweden ) B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O
+Qualifier vs. Christian Rudd ( Norway ) O O B-PER E-PER O S-LOC O
+Alex Corretja ( Spain ) vs. Byron Black ( Zimbabwe ) B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O
+David Rikl ( Czech Republic ) vs. Hicham Arazi ( Morocco ) B-PER E-PER O B-LOC E-LOC O O B-PER E-PER O S-LOC O
+Sjeng Schalken ( Netherlands ) vs. Gilbert Schaller ( Austria ) B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O
+Grant Stafford ( South Africa ) vs. Guy Forget ( France ) B-PER E-PER O B-LOC E-LOC O O B-PER E-PER O S-LOC O
+Fernando Meligeni ( Brazil ) vs. 7 - Yevgeny Kafelnikov ( Russia ) B-PER E-PER O S-LOC O O O O B-PER E-PER O S-LOC O
+------------------------ O
+4 - Goran Ivanisevic ( Croatia ) vs. Andrei Chesnokov ( Russia ) O O B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O
+Scott Draper ( Australia ) vs. Galo Blanco ( Spain ) B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O
+Renzo Furlan ( Italy ) vs. Thomas Johansson ( Sweden ) B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O
+Hendrik Dreekman ( Germany ) vs. Greg Rusedski ( Britain ) B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O
+Andrei Medvedev ( Ukraine ) vs. Jean-Philippe Fleurian ( France ) B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O
+Jan Kroslak ( Slovakia ) vs. Chris Woodruff ( U.S. ) B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O
+Qualifier vs. Petr Korda ( Czech Republic ) O O B-PER E-PER O B-LOC E-LOC O
+Bohdan Ulihrach ( Czech Republic ) vs. 14 - Alberto Costa B-PER E-PER O B-LOC E-LOC O O O O B-PER E-PER
+( Spain ) O S-LOC O
+------------------------ O
+12 - Todd Martin ( U.S. ) vs. Younnes El Aynaoui ( Morocco ) O O B-PER E-PER O S-LOC O O B-PER I-PER E-PER O S-LOC O
+Andrea Gaudenzi ( Italy ) vs. Shuzo Matsuoka ( Japan ) B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O
+Doug Flach ( U.S. ) vs. qualifier B-PER E-PER O S-LOC O O O
+Mats Wilander ( Sweden ) vs. Tim Henman ( Britain ) B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O
+Paul Haarhuis ( Netherlands ) vs. Michael Joyce ( U.S. ) B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O
+Michael Tebbutt ( Australia ) vs. Richey Reneberg ( U.S. ) B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O
+Jonathan Stark ( U.S. ) vs. Bernd Karbacher ( Germany ) B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O
+Stefan Edberg ( Sweden ) vs. 5 - Richard Krajicek ( Netherlands ) B-PER E-PER O S-LOC O O O O B-PER E-PER O S-LOC O
+------------------------ O
+6 - Andre Agassi ( U.S. ) vs. Mauricio Hadad ( Colombia ) O O B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O
+Marcos Ondruska ( South Africa ) vs. Felix Mantilla ( Spain ) B-PER E-PER O B-LOC E-LOC O O B-PER E-PER O S-LOC O
+Carlos Moya ( Spain ) vs. Scott Humphries ( U.S. ) B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O
+Jan Siemerink ( Netherlands ) vs. Carl-Uwe Steeb ( Germany ) B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O
+Qualifier vs. qualifier O O O
+David Wheaton ( U.S. ) vs. Kevin Kim ( U.S. ) B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O
+Nicolas Lapentti ( Ecuador ) vs. Alex O'Brien ( U.S. ) B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O
+Karim Alami ( Morocco ) vs. 11 - MaliVai Washington ( U.S. ) B-PER E-PER O S-LOC O O O O B-PER E-PER O S-LOC O
+------------------------ O
+13 - Thomas Enqvist ( Sweden ) vs. Stephane Simian ( France ) O O B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O
+Guillaume Raoux ( France ) vs. Filip Dewulf ( Belgium ) B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O
+Mark Knowles ( Bahamas ) vs. Marcelo Filippini ( Uruguay ) B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O
+Todd Woodbridge ( Australia ) vs. qualifier B-PER E-PER O S-LOC O O O
+Kris Goossens ( Belgium ) vs. Sergi Bruguera ( Spain ) B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O
+Qualifier vs. Michael Stich ( Germany ) O O B-PER E-PER O S-LOC O
+Qualifier vs. Chuck Adams ( U.S. ) O O B-PER E-PER O S-LOC O
+Javier Frana ( Argentina ) vs. 3 - Thomas Muster ( Austria ) B-PER E-PER O S-LOC O O O O B-PER E-PER O S-LOC O
+------------------------ O
+8 - Jim Courier ( U.S. ) vs. Javier Sanchez ( Spain ) O O B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O
+Jim Grabb ( U.S. ) vs. Sandon Stolle ( Australia ) B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O
+Patrick Rafter ( Australia ) vs. Kenneth Carlsen ( Denmark ) B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O
+Jason Stoltenberg ( Australia ) vs. Stefano Pescosolido ( Italy ) B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O
+Arnaud Boetsch ( France ) vs. Nicolas Pereira ( Venezuela ) B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O
+Carlos Costa ( Spain ) vs. Magnus Gustafsson ( Sweden ) B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O
+Jeff Tarango ( U.S. ) vs. Alex Radulescu ( Germany ) B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O
+Qualifier vs. 10 - Marcelo Rios ( Chile ) O O O O B-PER E-PER O S-LOC O
+------------------------ O
+15 - Marc Rosset ( Switzerland vs. Jared Palmer ( U.S. ) O O B-PER E-PER O S-LOC O B-PER E-PER O S-LOC O
+Martin Damm ( Czech Republic ) vs. Hernan Gumy ( Argentina ) B-PER E-PER O B-LOC E-LOC O O B-PER E-PER O S-LOC O
+Nicklas Kulti ( Sweden ) vs. Jakob Hlasek ( Switzerland ) B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O
+Cecil Mamiit ( U.S. ) vs. Alberto Berasategui ( Spain ) B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O
+Vince Spadea ( U.S. ) vs. Daniel Vacek ( Czech Republic ) B-PER E-PER O S-LOC O O B-PER E-PER O B-LOC E-LOC O
+David Prinosil ( Germany ) vs. qualifier B-PER E-PER O S-LOC O O O
+Qualifier vs. Tomas Carbonell ( Spain ) O O B-PER E-PER O S-LOC O
+Qualifier vs. 2 - Michael Chang ( U.S. ) O O O O B-PER E-PER O S-LOC O
+-DOCSTART- O
+BASEBALL - ORIOLES ' MANAGER DAVEY JOHNSON HOSPITALIZED . O O S-ORG O O B-PER E-PER O O
+BALTIMORE 1996-08-22 S-LOC O
+Baltimore Orioles manager Davey Johnson will miss Thursday night 's game against the Seattle Mariners after being admitted to a hospital with an irregular heartbeat . B-ORG E-ORG O B-PER E-PER O O O O O O O O B-ORG E-ORG O O O O O O O O O O O
+The 53-year-old Johnson was hospitalized after experiencing dizziness . O O S-PER O O O O O O
+" He is in no danger and will be treated and observed this evening , " said Orioles team physician Dr. William Goldiner , adding that Johnson is expected to be released on Friday . O O O O O O O O O O O O O O O O O S-ORG O O O B-PER E-PER O O O S-PER O O O O O O O O
+Orioles ' bench coach Andy Etchebarren will manage the club in Johnson 's absence . S-ORG O O O B-PER E-PER O O O O O S-PER O O O
+Johnson is the second manager to be hospitalized this week after California Angels skipper John McNamara was admitted to New York 's Columbia Presbyterian Hospital on Wednesday with a blood clot in his left calf . S-PER O O O O O O O O O O B-ORG E-ORG O B-PER E-PER O O O B-LOC E-LOC O B-LOC I-LOC E-LOC O O O O O O O O O O O
+Johnson , who played eight seasons in Baltimore , was named Orioles manager in the off-season replacing Phil Regan . S-PER O O O O O O S-LOC O O O S-ORG O O O O O B-PER E-PER O
+He led the Cincinnati Reds to the National League Championship Series last year and guided the New York Mets to a World Series championship in 1986 . O O O B-ORG E-ORG O O B-MISC I-MISC I-MISC E-MISC O O O O O B-ORG I-ORG E-ORG O O B-MISC E-MISC O O O O
+Baltimore has won 16 of its last 22 games to pull within five games of the slumping New York Yankees in the American League East Division . S-ORG O O O O O O O O O O O O O O O O B-ORG I-ORG E-ORG O O B-MISC I-MISC I-MISC E-MISC O
+-DOCSTART- O
+BASEBALL - MAJOR LEAGUE STANDINGS AFTER WEDNESDAY 'S GAMES . O O B-MISC E-MISC O O O O O O
+NEW YORK 1996-08-22 B-LOC E-LOC O
+Major League Baseball B-MISC I-MISC E-MISC
+standings after games played on Wednesday ( tabulate under won , O O O O O O O O O O O
+lost , winning percentage and games behind ) : O O O O O O O O O
+AMERICAN LEAGUE B-MISC E-MISC
+EASTERN DIVISION B-MISC E-MISC
+W L PCT GB O O O O
+NEW YORK 72 53 .576 - B-ORG E-ORG O O O O
+BALTIMORE 67 58 .536 5 S-ORG O O O O
+BOSTON 63 64 .496 10 S-ORG O O O O
+TORONTO 58 69 .457 15 S-ORG O O O O
+DETROIT 44 82 .349 28 1/2 S-ORG O O O O O
+CENTRAL DIVISION B-MISC E-MISC
+CLEVELAND 76 51 .598 - S-ORG O O O O
+CHICAGO 69 59 .539 7 1/2 S-ORG O O O O O
+MINNESOTA 63 63 .500 12 1/2 S-ORG O O O O O
+MILWAUKEE 60 68 .469 16 1/2 S-ORG O O O O O
+KANSAS CITY 58 70 .453 18 1/2 B-ORG E-ORG O O O O O
+WESTERN DIVISION B-MISC E-MISC
+TEXAS 73 54 .575 - S-ORG O O O O
+SEATTLE 64 61 .512 8 S-ORG O O O O
+OAKLAND 62 67 .481 12 S-ORG O O O O
+CALIFORNIA 58 68 .460 14 1/2 S-ORG O O O O O
+THURSDAY , AUGUST 22 SCHEDULE O O O O O
+OAKLAND AT BOSTON S-ORG O S-LOC
+SEATTLE AT BALTIMORE S-ORG O S-LOC
+CALIFORNIA AT NEW YORK S-ORG O B-LOC E-LOC
+TORONTO AT CHICAGO S-ORG O S-LOC
+DETROIT AT KANSAS CITY S-ORG O B-LOC E-LOC
+TEXAS AT MINNESOTA S-ORG O S-LOC
+NATIONAL LEAGUE B-MISC E-MISC
+EASTERN DIVISION B-MISC E-MISC
+W L PCT GB O O O O
+ATLANTA 79 46 .632 - S-ORG O O O O
+MONTREAL 67 58 .536 12 S-ORG O O O O
+NEW YORK 59 69 .461 21 1/2 B-ORG E-ORG O O O O O
+FLORIDA 58 69 .457 22 S-ORG O O O O
+PHILADELPHIA 52 75 .409 28 S-ORG O O O O
+CENTRAL DIVISION B-MISC E-MISC
+HOUSTON 68 59 .535 - S-ORG O O O O
+ST LOUIS 67 59 .532 1/2 B-ORG E-ORG O O O O
+CHICAGO 63 62 .504 4 S-ORG O O O O
+CINCINNATI 62 62 .500 4 1/2 S-ORG O O O O O
+PITTSBURGH 53 73 .421 14 1/2 S-ORG O O O O O
+WESTERN DIVISION B-MISC E-MISC
+SAN DIEGO 70 59 .543 - B-ORG E-ORG O O O O
+LOS ANGELES 66 60 .524 2 1/2 B-ORG E-ORG O O O O O
+COLORADO 65 62 .512 4 S-ORG O O O O
+SAN FRANCISCO 54 70 .435 13 1/2 B-ORG E-ORG O O O O O
+THURSDAY , AUGUST 22 SCHEDULE O O O O O
+ST LOUIS AT COLORADO B-ORG E-ORG O S-LOC
+CINCINNATI AT ATLANTA S-ORG O S-LOC
+PITTSBURGH AT HOUSTON S-ORG O S-LOC
+PHILADELPHIA AT LOS ANGELES S-ORG O B-LOC E-LOC
+MONTREAL AT SAN FRANCISCO S-ORG O B-LOC E-LOC
+-DOCSTART- O
+BASEBALL - MAJOR LEAGUE RESULTS WEDNESDAY . O O B-MISC E-MISC O O O
+NEW YORK 1996-08-22 B-LOC E-LOC O
+Results of Major League O O B-MISC E-MISC
+Baseball games played on Wednesday ( home team in CAPS ) : O O O O O O O O O O O O
+American League B-MISC E-MISC
+California 7 NEW YORK 1 S-ORG O B-ORG E-ORG O
+DETROIT 7 Chicago 4 S-ORG O S-ORG O
+Milwaukee 10 MINNESOTA 7 S-ORG O S-ORG O
+BOSTON 6 Oakland 4 S-ORG O S-ORG O
+BALTIMORE 10 Seattle 5 S-ORG O S-ORG O
+Texas 10 CLEVELAND 8 ( in 10 ) S-ORG O S-ORG O O O O O
+Toronto 6 KANSAS CITY 2 S-ORG O B-ORG E-ORG O
+National League B-MISC E-MISC
+CHICAGO 8 Florida 3 S-ORG O S-ORG O
+SAN FRANCISCO 12 New York 11 B-ORG E-ORG O B-ORG E-ORG O
+ATLANTA 4 Cincinnati 3 S-ORG O S-ORG O
+Pittsburgh 5 HOUSTON 2 S-ORG O S-ORG O
+COLORADO 10 St Louis 2 S-ORG O B-ORG E-ORG O
+Philadelphia 6 LOS ANGELES 0 S-ORG O B-ORG E-ORG O
+SAN DIEGO 7 Montreal 2 B-ORG E-ORG O S-ORG O
+-DOCSTART- O
+BASEBALL - GREER HOMER IN 10TH LIFTS TEXAS PAST INDIANS . O O S-PER O O O O S-ORG O S-ORG O
+CLEVELAND 1996-08-22 S-LOC O
+Rusty Greer 's two-run homer in the top of the 10th inning rallied the Texas Rangers to a 10-8 victory over the Cleveland Indians Wednesday in the rubber game of a three-game series between division leaders . B-PER E-PER O O O O O O O O O O O O B-ORG E-ORG O O O O O O B-ORG E-ORG O O O O O O O O O O O O O
+With one out , Greer hit a 1-1 pitch from Julian Tavarez ( 4-7 ) over the right-field fence for his 15th home run . O O O O S-PER O O O O O B-PER E-PER O O O O O O O O O O O O O
+" It was an off-speed pitch and I just tried to get a good swing on it and put it in play , " Greer said . " O O O O O O O O O O O O O O O O O O O O O O O O S-PER O O O
+This was a big game . O O O O O O
+The crowd was behind him and it was intense . " O O O O O O O O O O O
+The shot brought home Ivan Rodriguez , who had his second double of the game , giving him 42 this season , 41 as a catcher . O O O O B-PER E-PER O O O O O O O O O O O O O O O O O O O O O
+He joined Mickey Cochrane , Johnny Bench and Terry Kennedy as the only catchers with 40 doubles in a season . O O B-PER E-PER O B-PER E-PER O B-PER E-PER O O O O O O O O O O O
+The Rangers have won 10 of their last 12 games and six of nine meetings against the Indians this season . O S-ORG O O O O O O O O O O O O O O O S-ORG O O O
+The American League Western leaders have won eight of 15 games at Jacobs Field , joining the Yankees as the only teams with a winning record at the A.L. Central leaders ' home . O B-MISC I-MISC E-MISC O O O O O O O O B-LOC E-LOC O O O S-ORG O O O O O O O O O O B-MISC E-MISC O O O O
+Cleveland lost for just the second time in six games . S-ORG O O O O O O O O O O
+The Indians sent the game into extra innings in the ninth on Kenny Lofton 's two-run single . O S-ORG O O O O O O O O O O B-PER E-PER O O O O
+Ed Vosberg ( 1-0 ) blew his first save opportunity but got the win , allowing three hits with two walks and three strikeouts in 1 2/3 scoreless innings . B-PER E-PER O O O O O O O O O O O O O O O O O O O O O O O O O O O O
+Dean Palmer hit his 30th homer for the Rangers . B-PER E-PER O O O O O O S-ORG O
+In Baltimore , Cal Ripken had four hits and snapped a fifth-inning tie with a solo homer and Bobby Bonilla added a three-run shot in the seventh to power the surging Orioles to a 10-5 victory over the Seattle Mariners . O S-LOC O B-PER E-PER O O O O O O O O O O O O O B-PER E-PER O O O O O O O O O O O S-ORG O O O O O O B-ORG E-ORG O
+The Mariners scored four runs in the top of the fifth to tie the game 5-5 but Ripken led off the bottom of the inning with his 21st homer off starter Sterling Hitchcock ( 12-6 ) . O S-ORG O O O O O O O O O O O O O O O S-PER O O O O O O O O O O O O O B-PER E-PER O O O O
+Bonilla 's blast was the first time Randy Johnson , last season 's Cy Young winner , allowed a run in five relief appearances since coming off the disabled list on August 6 . S-PER O O O O O O B-PER E-PER O O O O B-PER E-PER O O O O O O O O O O O O O O O O O O O
+Bonilla has 21 RBI and 15 runs in his last 20 games . S-PER O O S-MISC O O O O O O O O O
+Baltimore has won seven of nine and 16 of its last 22 and cut the Yankees ' lead in the A.L. East to five games . S-LOC O O O O O O O O O O O O O O S-ORG O O O O B-MISC E-MISC O O O O
+Scott Erickson ( 8-10 ) laboured to his third straight win . B-PER E-PER O O O O O O O O O O
+Alex Rodriguez had two homers and four RBI for the Mariners , who have dropped three in a row and 11 of 15 . B-PER E-PER O O O O O S-MISC O O S-ORG O O O O O O O O O O O O O
+He became the fifth shortstop in major-league history to hit 30 homers in a season and the first since Ripken hit 34 in 1991 . O O O O O O O O O O O O O O O O O O O S-PER O O O O O
+Chris Hoiles hit his 22nd homer for Baltimore . B-PER E-PER O O O O O S-LOC O
+In New York , Jason Dickson scattered 10 hits over 6 1/3 innings in his major-league debut and Chili Davis belted a homer from each side of the plate as the California Angels defeated the Yankees 7-1 . O B-LOC E-LOC O B-PER E-PER O O O O O O O O O O O O B-PER E-PER O O O O O O O O O O O B-ORG E-ORG O O S-ORG O O
+Dickson allowed a homer to Derek Jeter on his first major-league pitch but settled down . S-PER O O O O B-PER E-PER O O O O O O O O O
+He was the 27th pitcher used by the Angels this season , tying a major-league record . O O O O O O O O S-ORG O O O O O O O O
+Jimmy Key ( 9-10 ) took the loss as the Yankees lost their ninth in 14 games . B-PER E-PER O O O O O O O O S-ORG O O O O O O O
+They stranded 11 baserunners . O O O O O
+California played without interim manager John McNamara , who was admitted to a New York hospital with a blood clot in his right calf . S-LOC O O O O B-PER E-PER O O O O O O B-LOC E-LOC O O O O O O O O O O
+In Boston , Mike Stanley 's bases-loaded two-run single snapped an eighth-inning tie and gave the Red Sox their third straight win , 6-4 over the Oakland Athletics . O S-LOC O B-PER E-PER O O O O O O O O O O O B-ORG E-ORG O O O O O O O O B-ORG E-ORG O
+Stanley owns a .367 career batting average with the bases loaded ( 33-for-90 ) . S-PER O O O O O O O O O O O O O O
+Boston 's Mo Vaughn went 3-for-3 with a walk , stole home for one of his three runs scored and collected his 116th RBI . S-LOC O B-PER E-PER O O O O O O O O O O O O O O O O O O O S-MISC O
+Scott Brosius homered and drove in two runs for the Athletics , who have lost seven of their last nine games . B-PER E-PER O O O O O O O O S-ORG O O O O O O O O O O O
+In Detroit , Brad Ausmus 's three-run homer capped a four-run eighth and lifted the Tigers to a 7-4 victory over the reeling Chicago White Sox . O S-LOC O B-PER E-PER O O O O O O O O O O S-ORG O O O O O O O B-ORG I-ORG E-ORG O
+The Tigers have won consecutive games after dropping eight in a row , but have won nine of their last 12 at home . O S-ORG O O O O O O O O O O O O O O O O O O O O O O
+The White Sox have lost six of their last eight games . O B-ORG E-ORG O O O O O O O O O
+In Kansas City , Juan Guzman tossed a complete-game six-hitter to win for the first time in over a month and lower his league-best ERA as the Toronto Blue Jays won their fourth straight , 6-2 over the Royals . O B-LOC E-LOC O B-PER E-PER O O O O O O O O O O O O O O O O O O S-MISC O O B-ORG I-ORG E-ORG O O O O O O O O S-ORG O
+Guzman ( 10-8 ) won for the first time since July 16 , a span of six starts . S-PER O O O O O O O O O O O O O O O O O O
+He allowed two runs -- one earned -- and lowered his ERA to 2.99 . O O O O O O O O O O O S-MISC O O O
+At Minnesota , John Jaha 's three-run homer , his 26th , capped a five-run eighth inning that rallied the Milwaukee Brewers to a 10-7 victory over the Twins . O S-LOC O B-PER E-PER O O O O O O O O O O O O O O O B-ORG E-ORG O O O O O O S-ORG O
+Jaha added an RBI single in the ninth and had four RBI . S-PER O O S-MISC O O O O O O O S-MISC O
+Jose Valentin hit his 21st homer for Milwaukee . B-PER E-PER O O O O O S-ORG O
+-DOCSTART- O
+SOCCER - COCU DOUBLE EARNS PSV 4-1 WIN . O O S-PER O O S-ORG O O O
+AMSTERDAM 1996-08-22 S-LOC O
+Philip Cocu scored twice in the second half to spur PSV Eindhoven to a 4-1 away win over NEC Nijmegen in the Dutch first division on Thursday . B-PER E-PER O O O O O O O O B-ORG E-ORG O O O O O O B-ORG E-ORG O O S-MISC O O O O O
+He scored from close range in the 54th minute and from a bicycle kick 13 minutes later . O O O O O O O O O O O O O O O O O O
+Arthur Numan and Luc Nilis , Dutch top scorer last season , were PSV 's other marksmen . B-PER E-PER O B-PER E-PER O S-MISC O O O O O O S-ORG O O O O
+Ajax Amsterdam opened their title defence with a 1-0 win over NAC Breda on Wednesday . B-ORG E-ORG O O O O O O O O O B-ORG E-ORG O O O
+-DOCSTART- O
+SOCCER - DUTCH FIRST DIVISION SUMMARY . O O S-MISC O O O O
+AMSTERDAM 1996-08-22 S-LOC O
+Summary of Thursday 's only O O O O O
+Dutch first division match : S-MISC O O O O
+NEC Nijmegen 1 ( Van Eykeren 15th ) PSV Eindhoven 4 ( Numan 11th , B-ORG E-ORG O O O O O O B-ORG E-ORG O O O O O
+Nilis 42nd , Cocu 54th , 67th ) . S-PER O O S-PER O O O O O
+Halftime 1-2 . O O O
+Attendance 8,000 O O
+-DOCSTART- O
+SOCCER - DUTCH FIRST DIVISION RESULT . O O S-MISC O O O O
+AMSTERDAM 1996-08-22 S-LOC O
+Result of a Dutch first O O O S-MISC O
+division match on Thursday : O O O O O
+NEC Nijmegen 1 PSV Eindhoven 4 B-ORG E-ORG O B-ORG E-ORG O
+-DOCSTART- O
+SOCCER - SHARPSHOOTER KNUP BACK IN SWISS SQUAD . O O O S-PER O O S-MISC O O
+GENEVA 1996-08-22 S-LOC O
+Galatasaray striker Adrian Knup , scorer of 26 goals in 45 internationals , has been recalled by Switzerland for the World Cup qualifier against Azerbaijan in Baku on August 31 . S-ORG O B-PER E-PER O O O O O O O O O O O O O S-LOC O O B-MISC E-MISC O O S-LOC O S-LOC O O O O
+Knup was overlooked by Artur Jorge for the European championship finals earlier this year . S-PER O O O B-PER E-PER O O S-MISC O O O O O O
+But new coach Rolf Fringer is clearly a Knup fan and included him in his 19-man squad on Thursday . O O O B-PER E-PER O O O S-PER O O O O O O O O O O O
+Switzerland failed to progress beyond the opening group phase in Euro 96 . S-LOC O O O O O O O O O B-MISC E-MISC O
+Squad : O O
+Goalkeepers - Marco Pascolo ( Cagliari ) , Pascal Zuberbuehler ( Grasshoppers ) . O O B-PER E-PER O S-ORG O O B-PER E-PER O S-ORG O O
+Defenders - Stephane Henchoz ( Hamburg ) , Marc Hottiger ( Everton ) , Yvan Quentin ( Sion ) , Ramon Vega ( Cagliari ) Raphael Wicky ( Sion ) . O O B-PER E-PER O S-ORG O O B-PER E-PER O S-ORG O O B-PER E-PER O S-ORG O O B-PER E-PER O S-ORG O B-PER E-PER O S-ORG O O
+Midfielders - Alexandre Comisetti ( Grasshoppers ) , Antonio Esposito ( Grasshoppers ) , Sebastien Fournier ( Stuttgart ) , Christophe Ohrel ( Lausanne ) , Patrick Sylvestre ( Sion ) , David Sesa ( Servette ) , Ciriaco Sforza ( Inter Milan ) Murat Yakin ( Grasshoppers ) . O O B-PER E-PER O S-ORG O O B-PER E-PER O S-ORG O O B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O B-PER E-PER O S-ORG O O B-PER E-PER O S-ORG O O B-PER E-PER O B-ORG E-ORG O B-PER E-PER O S-ORG O O
+Strikers - Kubilay Turkyilmaz ( Grasshoppers ) , Adrian Knup ( Galatasaray ) , Christophe Bonvin ( Sion ) , Stephane Chapuisat ( Borussia Dortmund ) . O O B-PER E-PER O S-ORG O O B-PER E-PER O S-ORG O O B-PER E-PER O S-ORG O O B-PER E-PER O B-ORG E-ORG O O
+-DOCSTART- O
+ATHLETICS - IT 'S A RECORD - 40,000 BEERS ON THE HOUSE . O O O O O O O O O O O O O
+BRUSSELS 1996-08-22 S-LOC O
+Spectators at Friday 's Brussels grand prix meeting have an extra incentive to cheer on the athletes to world record performances -- a free glass of beer . O O O O S-LOC O O O O O O O O O O O O O O O O O O O O O O O
+A Belgian brewery has offered to pay for a free round of drinks for all of the 40,000 crowd if a world record goes at the meeting , organisers said on Thursday . O S-MISC O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O
+It could be one of the most expensive rounds of drinks ever . O O O O O O O O O O O O O
+The meeting is sold out already . O O O O O O O
+Two world records are in serious danger of being broken at the meeting -- the women 's 1,000 metres and the men 's 3,000 metres . O O O O O O O O O O O O O O O O O O O O O O O O O O
+-DOCSTART- O
+GOLF - GERMAN OPEN FIRST ROUND SCORES . O O B-MISC E-MISC O O O O
+STUTTGART , Germany 1996-08-22 S-LOC O S-LOC O
+Leading first round O O O
+scores in the German Open golf championship on Thursday ( Britain O O O B-MISC E-MISC O O O O O S-LOC
+unless stated ) : O O O O
+62 Paul Broadhurst O B-PER E-PER
+63 Raymond Russell O B-PER E-PER
+64 David J. Russell , Michael Campbell ( New Zealand ) , Ian O B-PER I-PER E-PER O B-PER E-PER O B-LOC E-LOC O O S-PER
+Woosnam , Bernhard Langer ( Germany ) , Ronan Rafferty , Mats S-PER O B-PER E-PER O S-LOC O O B-PER E-PER O S-PER
+Lanner ( Sweden ) , Wayne Riley ( Australia ) S-PER O S-LOC O O B-PER E-PER O S-LOC O
+65 Eamonn Darcy ( Ireland ) , Per Nyman ( Sweden ) , Russell Claydon , O B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O B-PER E-PER O
+Mark Roe , Retief Goosen ( South Africa ) , Carl Suneson B-PER E-PER O B-PER E-PER O B-LOC E-LOC O O B-PER E-PER
+66 Stephen Field , Paul Lawrie , Ian Pyman , Max Anglert O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER
+( Sweden ) , Miles Tunnicliff , Christian Cevaer ( France ) , O S-LOC O O B-PER E-PER O B-PER E-PER O S-LOC O O
+Des Smyth ( Ireland ) , David Carter , Lee Westwood , Greg B-PER E-PER O S-LOC O O B-PER E-PER O B-PER E-PER O S-PER
+Chalmers ( Australia ) , Miguel Angel Martin ( Spain ) , S-PER O S-LOC O O B-PER I-PER E-PER O S-LOC O O
+Thomas Bjorn ( Denmark ) , Fernando Roca ( Spain ) , Derrick B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O S-PER
+Cooper S-PER
+67 Jeff Hawksworth , Padraig Harrington ( Ireland ) , Michael O B-PER E-PER O B-PER E-PER O S-LOC O O S-PER
+Welch , Thomas Gogele ( Germany ) , Paul McGinley ( Ireland ) , S-PER O B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O
+Gary Orr , Jose-Maria Canizares ( Spain ) , Michael Jonzon B-PER E-PER O B-PER E-PER O S-LOC O O B-PER E-PER
+( Sweden ) , Paul Eales , David Williams , Andrew Coltart , O S-LOC O O B-PER E-PER O B-PER E-PER O B-PER E-PER O
+Jonathan Lomas , Jose Rivero ( Spain ) , Robert Karlsson B-PER E-PER O B-PER E-PER O S-LOC O O B-PER E-PER
+( Sweden ) , Marcus Wills , Pedro Linhart ( Spain ) , Jamie O S-LOC O O B-PER E-PER O B-PER E-PER O S-LOC O O S-PER
+Spence , Terry Price ( Australia ) , Juan Carlos Pinero ( Spain ) , S-PER O B-PER E-PER O S-LOC O O B-PER I-PER E-PER O S-LOC O O
+Mark Mouland B-PER E-PER
+-DOCSTART- O
diff --git a/losses/CRFLoss.py b/losses/CRFLoss.py
new file mode 100644
index 0000000..718b0b9
--- /dev/null
+++ b/losses/CRFLoss.py
@@ -0,0 +1,71 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT license.
+
+import torch
+import torch.nn as nn
+import torch.autograd as autograd
+
+
+class CRFLoss(nn.Module):
+ """CRFLoss
+ use for crf output layer for sequence tagging task.
+ """
+ def __init__(self):
+ super(CRFLoss, self).__init__()
+
+ def _score_sentence(self, scores, mask, tags, transitions, crf_layer_conf):
+ """
+ input:
+ scores: variable (seq_len, batch, tag_size, tag_size)
+ mask: (batch, seq_len)
+ tags: tensor (batch, seq_len)
+ output:
+ score: sum of score for gold sequences within whole batch
+ """
+ # Gives the score of a provided tag sequence
+ batch_size = scores.size(1)
+ seq_len = scores.size(0)
+ tag_size = scores.size(2)
+ # convert tag value into a new format, recorded label bigram information to index
+ new_tags = autograd.Variable(torch.LongTensor(batch_size, seq_len))
+ if crf_layer_conf.use_gpu:
+ new_tags = new_tags.cuda()
+ for idx in range(seq_len):
+ if idx == 0:
+ # start -> first score
+ new_tags[:, 0] = (tag_size-2)*tag_size + tags[:, 0]
+ else:
+ new_tags[:, idx] = tags[:, idx-1]*tag_size + tags[:, idx]
+
+ # transition for label to STOP_TAG
+ end_transition = transitions[:, crf_layer_conf.target_dict[crf_layer_conf.STOP_TAG]].contiguous().view(1, tag_size).expand(batch_size, tag_size)
+ # length for batch, last word position = length - 1
+ length_mask = torch.sum(mask.long(), dim=1).view(batch_size, 1).long()
+ # index the label id of last word
+ end_ids = torch.gather(tags, 1, length_mask - 1)
+
+ # index the transition score for end_id to STOP_TAG
+ end_energy = torch.gather(end_transition, 1, end_ids)
+
+ # convert tag as (seq_len, batch_size, 1)
+ new_tags = new_tags.transpose(1, 0).contiguous().view(seq_len, batch_size, 1)
+ # need convert tags id to search from positions of scores
+ tg_energy = torch.gather(scores.view(seq_len, batch_size, -1), 2, new_tags).view(seq_len, batch_size) # seq_len * batch_size
+ # mask transpose to (seq_len, batch_size)
+ tg_energy = tg_energy.masked_select(mask.transpose(1, 0))
+
+ # add all score together
+ gold_score = tg_energy.sum() + end_energy.sum()
+ return gold_score
+
+ def forward(self, forward_score, scores, masks, tags, transitions, crf_layer_conf):
+ """
+
+ :param forward_score: Tensor scale
+ :param scores: Tensor [seq_len, batch_size, target_size, target_size]
+ :param masks: Tensor [batch_size, seq_len]
+ :param tags: Tensor [batch_size, seq_len]
+ :return: goal_score - forward_score
+ """
+ gold_score = self._score_sentence(scores, masks, tags, transitions, crf_layer_conf)
+ return forward_score - gold_score
\ No newline at end of file
diff --git a/losses/Loss.py b/losses/Loss.py
index c20305a..bf71047 100644
--- a/losses/Loss.py
+++ b/losses/Loss.py
@@ -8,7 +8,7 @@ import logging
sys.path.append('../')
from settings import LossOperationType
from torch.nn import CrossEntropyLoss, L1Loss, MSELoss, NLLLoss, PoissonNLLLoss, NLLLoss2d, KLDivLoss, BCELoss, BCEWithLogitsLoss, MarginRankingLoss, HingeEmbeddingLoss, MultiLabelMarginLoss, SmoothL1Loss, SoftMarginLoss, MultiLabelSoftMarginLoss, CosineEmbeddingLoss, MultiMarginLoss, TripletMarginLoss
-
+from .CRFLoss import CRFLoss
class Loss(nn.Module):
'''
diff --git a/losses/__init__.py b/losses/__init__.py
index fd6bac2..f74b0b7 100644
--- a/losses/__init__.py
+++ b/losses/__init__.py
@@ -1,5 +1,6 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT license.
from .FocalLoss import FocalLoss
+from .CRFLoss import CRFLoss
from .Loss import Loss
from torch.nn import CrossEntropyLoss, L1Loss, MSELoss, NLLLoss, PoissonNLLLoss, NLLLoss2d, KLDivLoss, BCELoss, BCEWithLogitsLoss, MarginRankingLoss, HingeEmbeddingLoss, MultiLabelMarginLoss, SmoothL1Loss, SoftMarginLoss, MultiLabelSoftMarginLoss, CosineEmbeddingLoss, MultiMarginLoss, TripletMarginLoss
\ No newline at end of file
diff --git a/metrics/Evaluator.py b/metrics/Evaluator.py
index 11d520d..2a9a61a 100644
--- a/metrics/Evaluator.py
+++ b/metrics/Evaluator.py
@@ -4,6 +4,7 @@
from sklearn import metrics
from sklearn.metrics import mean_squared_error
from .conlleval import countChunks, evaluate, to_conll_format
+from .slot_tagging_metrics import get_ner_BIOES, get_ner_BIO
from settings import TaggingSchemes
import numpy as np
import re
@@ -138,24 +139,72 @@ class Evaluator(object):
def accuracy(self, y_true, y_pred):
return metrics.accuracy_score(y_true, y_pred)
- def seq_tag_f1(self, y_true, y_pred):
- """ For sequence tagging task, calculate F1-score(e.g. CONLL 2000)
+ def seq_tag_f1(self, y_ture, y_pred):
+ '''
- Args:
- y_true:
- y_pred:
-
- Returns:
-
- """
+ :param y_ture:
+ :param y_pred:
+ :return:
+ '''
assert self.__tagging_scheme is not None, "Please define tagging scheme!"
- if TaggingSchemes[self.__tagging_scheme] == TaggingSchemes.BIO:
- result_conll_format = to_conll_format(y_true, y_pred)
- correctChunk, foundGuessed, foundCorrect, correctTags, tokenCounter = countChunks(result_conll_format)
- overall_precision, overall_recall, overall_FB1 = evaluate(correctChunk, foundGuessed, foundCorrect, correctTags, tokenCounter)
+ sent_num = len(y_pred)
+ golden_full = []
+ predict_full = []
+ right_full = []
+ for idx in range(0, sent_num):
+ golden_list = y_ture[idx]
+ predict_list = y_pred[idx]
+ if self.__tagging_scheme == "BMES" or self.__tagging_scheme == "BIOES":
+ gold_matrix = get_ner_BIOES(golden_list)
+ pred_matrix = get_ner_BIOES(predict_list)
+ elif self.__tagging_scheme == "BIO":
+ gold_matrix = get_ner_BIO(golden_list)
+ pred_matrix = get_ner_BIO(predict_list)
+ else:
+ # raise Exception("DETECT UNKNOWN TAGGING SCHEMES! YOU CAN USE OUR SCRIPT TO CONVERT TAG SCHEME!")
+ raise Exception("DETECT UNKNOWN TAGGING SCHEMES!")
+ right_ner = list(set(gold_matrix).intersection(set(pred_matrix)))
+ golden_full += gold_matrix
+ predict_full += pred_matrix
+ right_full += right_ner
+ right_num = len(right_full)
+ golden_num = len(golden_full)
+ predict_num = len(predict_full)
+ if predict_num == 0:
+ precision = -1
else:
- raise Exception("TO DO: SUPPORT MORE TAGGING SCHEMES")
- return overall_FB1
+ precision = (right_num + 0.0) / predict_num
+ if golden_num == 0:
+ recall = -1
+ else:
+ recall = (right_num + 0.0) / golden_num
+ if (precision == -1) or (recall == -1) or (precision + recall) <= 0.:
+ f_measure = -1
+ else:
+ f_measure = 2 * precision * recall / (precision + recall)
+ return f_measure
+
+
+ def seq_tag_accuracy(self, y_ture, y_pred):
+ '''
+
+ :param y_ture:
+ :param y_pred:
+ :return:
+ '''
+ sent_num = len(y_pred)
+ right_tag = 0
+ all_tag = 0
+ for idx in range(0, sent_num):
+ golden_list = y_ture[idx]
+ predict_list = y_pred[idx]
+ for idy in range(len(golden_list)):
+ if golden_list[idy] == predict_list[idy]:
+ right_tag += 1
+ all_tag += len(golden_list)
+ accuracy = (right_tag + 0.0) / all_tag
+ return accuracy
+
def macro_f1(self, y_true, y_pred):
""" For classification task, calculate f1-score for each label, and find their unweighted mean. This does not take label imbalance into account.
diff --git a/metrics/slot_tagging_metrics.py b/metrics/slot_tagging_metrics.py
new file mode 100644
index 0000000..d37a740
--- /dev/null
+++ b/metrics/slot_tagging_metrics.py
@@ -0,0 +1,97 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT license.
+
+
+def get_ner_BIOES(label_list):
+ list_len = len(label_list)
+ begin_label = 'B-'
+ end_label = 'E-'
+ single_label = 'S-'
+ whole_tag = ''
+ index_tag = ''
+ tag_list = []
+ stand_matrix = []
+ for i in range(0, list_len):
+ current_label = label_list[i].upper()
+ if begin_label in current_label:
+ if index_tag != '':
+ tag_list.append(whole_tag + ',' + str(i-1))
+ whole_tag = current_label.replace(begin_label, "", 1) + '[' + str(i)
+ index_tag = current_label.replace(begin_label, "", 1)
+
+ elif single_label in current_label:
+ if index_tag != '':
+ tag_list.append(whole_tag + ',' + str(i-1))
+ whole_tag = current_label.replace(single_label, "", 1) + '[' +str(i)
+ tag_list.append(whole_tag)
+ whole_tag = ""
+ index_tag = ""
+ elif end_label in current_label:
+ if index_tag != '':
+ tag_list.append(whole_tag + ',' + str(i))
+ whole_tag = ''
+ index_tag = ''
+ else:
+ continue
+ if (whole_tag != '')&(index_tag != ''):
+ tag_list.append(whole_tag)
+ tag_list_len = len(tag_list)
+
+ for i in range(0, tag_list_len):
+ if len(tag_list[i]) > 0:
+ tag_list[i] = tag_list[i]+ ']'
+ insert_list = reverse_style(tag_list[i])
+ stand_matrix.append(insert_list)
+ return stand_matrix
+
+
+def get_ner_BIO(label_list):
+ list_len = len(label_list)
+ begin_label = 'B-'
+ inside_label = 'I-'
+ whole_tag = ''
+ index_tag = ''
+ tag_list = []
+ stand_matrix = []
+ for i in range(0, list_len):
+ current_label = label_list[i].upper()
+ if begin_label in current_label:
+ if index_tag == '':
+ whole_tag = current_label.replace(begin_label, "", 1) + '[' + str(i)
+ index_tag = current_label.replace(begin_label, "", 1)
+ else:
+ tag_list.append(whole_tag + ',' + str(i-1))
+ whole_tag = current_label.replace(begin_label, "", 1) + '[' + str(i)
+ index_tag = current_label.replace(begin_label, "", 1)
+
+ elif inside_label in current_label:
+ if current_label.replace(inside_label, "", 1) == index_tag:
+ whole_tag = whole_tag
+ else:
+ if (whole_tag != '')&(index_tag != ''):
+ tag_list.append(whole_tag + ',' + str(i-1))
+ whole_tag = ''
+ index_tag = ''
+ else:
+ if (whole_tag != '')&(index_tag != ''):
+ tag_list.append(whole_tag + ',' + str(i-1))
+ whole_tag = ''
+ index_tag = ''
+
+ if (whole_tag != '')&(index_tag != ''):
+ tag_list.append(whole_tag)
+ tag_list_len = len(tag_list)
+
+ for i in range(0, tag_list_len):
+ if len(tag_list[i]) > 0:
+ tag_list[i] = tag_list[i]+ ']'
+ insert_list = reverse_style(tag_list[i])
+ stand_matrix.append(insert_list)
+ return stand_matrix
+
+
+def reverse_style(input_string):
+ target_position = input_string.index('[')
+ input_len = len(input_string)
+ output_string = input_string[target_position:input_len] + input_string[0:target_position]
+ return output_string
\ No newline at end of file
diff --git a/model_zoo/advanced/conf.json b/model_zoo/advanced/conf.json
index bb372e2..0fdeb60 100644
--- a/model_zoo/advanced/conf.json
+++ b/model_zoo/advanced/conf.json
@@ -49,7 +49,8 @@
"training_params": {
"vocabulary": {
"min_word_frequency": 1,
- "max_vocabulary": 100000
+ "max_vocabulary": 100000,
+ "max_building_lines": 1000000
},
"optimizer": {
"name": "Adam",
@@ -57,6 +58,7 @@
"lr": 0.001
}
},
+ "chunk_size": 1000000,
"lr_decay": 0.95,
"minimum_lr": 0.0001,
"epoch_start_lr_decay": 1,
@@ -65,7 +67,7 @@
"batch_size": 30,
"batch_num_to_show_results": 10,
"max_epoch": 3,
- "valid_times_per_epoch": 1,
+ "steps_per_validation": 10,
"text_preprocessing": ["DBC2SBC"],
"max_lengths":{
"question": 30,
@@ -75,6 +77,7 @@
"architecture":[
{
"layer": "Embedding",
+ "weight_on_gpu": false,
"conf": {
"word": {
"cols": ["question_text", "answer_text"],
@@ -89,11 +92,11 @@
"cols": ["question_char", "answer_char"],
"type": "CNNCharEmbedding",
"dropout": 0.2,
- "dim": 30,
- "embedding_matrix_dim": 8,
- "stride":1,
- "window_size": 5,
- "activation": null
+ "dim": [30, 20, 100],
+ "embedding_matrix_dim": 50,
+ "stride":[1, 2, 3],
+ "window_size": [3,3,5],
+ "activation": "ReLU"
}
}
},
diff --git a/model_zoo/nlp_tasks/knowledge_distillation/query_binary_classifier_compression/conf_kdqbc_bilstmattn_cnn.json b/model_zoo/nlp_tasks/knowledge_distillation/query_binary_classifier_compression/conf_kdqbc_bilstmattn_cnn.json
index 976d30e..9220e45 100644
--- a/model_zoo/nlp_tasks/knowledge_distillation/query_binary_classifier_compression/conf_kdqbc_bilstmattn_cnn.json
+++ b/model_zoo/nlp_tasks/knowledge_distillation/query_binary_classifier_compression/conf_kdqbc_bilstmattn_cnn.json
@@ -53,7 +53,7 @@
"batch_size": 256,
"batch_num_to_show_results": 10,
"max_epoch": 30,
- "valid_times_per_epoch": 10,
+ "steps_per_validation": 10,
"fixed_lengths":{
"query": 30
}
diff --git a/model_zoo/nlp_tasks/question_answer_matching/conf_question_answer_matching_arci.json b/model_zoo/nlp_tasks/question_answer_matching/conf_question_answer_matching_arci.json
new file mode 100644
index 0000000..31854b9
--- /dev/null
+++ b/model_zoo/nlp_tasks/question_answer_matching/conf_question_answer_matching_arci.json
@@ -0,0 +1,241 @@
+{
+ "license": "Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license.",
+ "tool_version": "1.1.0",
+ "model_description": "This model is used for question answer matching task, and it achieved auc: 0.7508 in WikiQACorpus test set",
+ "language": "English",
+ "inputs": {
+ "use_cache": true,
+ "dataset_type": "classification",
+ "data_paths": {
+ "train_data_path": "./dataset/WikiQACorpus/WikiQA-train.tsv",
+ "valid_data_path": "./dataset/WikiQACorpus/WikiQA-dev.tsv",
+ "test_data_path": "./dataset/WikiQACorpus/WikiQA-test.tsv",
+ "pre_trained_emb": "./dataset/Glove/glove.840B.300d.txt"
+ },
+ "file_with_col_header": true,
+ "add_start_end_for_seq": true,
+ "file_header": {
+ "question_id": 0,
+ "question_text": 1,
+ "document_id": 2,
+ "document_title": 3,
+ "passage_id": 4,
+ "passage_text": 5,
+ "label": 6
+ },
+ "model_inputs": {
+ "question": ["question_text"],
+ "passage": ["passage_text"]
+ },
+ "target": ["label"]
+ },
+ "outputs":{
+ "save_base_dir": "./models/wikiqa_arci/",
+ "model_name": "model.nb",
+ "train_log_name": "train.log",
+ "test_log_name": "test.log",
+ "predict_log_name": "predict.log",
+ "predict_fields": ["prediction"],
+ "predict_output_name": "predict.tsv",
+ "cache_dir": ".cache.wikiqa_arci/"
+ },
+ "training_params": {
+ "vocabulary": {
+ "min_word_frequency": 1
+ },
+ "optimizer": {
+ "name": "Adam",
+ "params": {
+ "lr": 0.001
+ }
+ },
+ "fixed_lengths": {
+ "question": 200,
+ "passage": 200
+ },
+ "lr_decay": 0.90,
+ "minimum_lr": 0.00005,
+ "epoch_start_lr_decay": 20,
+ "use_gpu": true,
+ "cpu_num_workers": 1,
+ "batch_size": 64,
+ "batch_num_to_show_results": 500,
+ "max_epoch": 10,
+ "valid_times_per_epoch": 2
+ },
+ "architecture":[
+ {
+ "layer": "Embedding",
+ "conf": {
+ "word": {
+ "cols": ["question_text", "passage_text"],
+ "dim": 300,
+ "fix_weight": false
+ }
+ }
+ },
+ {
+ "layer_id": "s1_dropout",
+ "layer": "Dropout",
+ "conf": {
+ "dropout": 0.5
+ },
+ "inputs": ["question"]
+ },
+ {
+ "layer_id": "s2_dropout",
+ "layer": "Dropout",
+ "conf": {
+ "dropout": 0.5
+ },
+ "inputs": ["passage"]
+ },
+ {
+ "layer_id": "s1_conv_1",
+ "layer": "Conv",
+ "conf": {
+ "window_size": 3,
+ "output_channel_num": 32,
+ "padding_type": "SAME",
+ "remind_lengths": false
+ },
+ "inputs": ["s1_dropout"]
+ },
+ {
+ "layer_id": "s1_pool_1",
+ "layer": "Pooling1D",
+ "conf": {
+ "stride": 1,
+ "window_size": 2
+ },
+ "inputs": ["s1_conv_1"]
+ },
+ {
+ "layer_id": "s1_conv_2",
+ "layer": "Conv",
+ "conf": {
+ "window_size": 3,
+ "output_channel_num": 32,
+ "padding_type": "SAME"
+ },
+ "inputs": ["s1_pool_1"]
+ },
+ {
+ "layer_id": "s1_pool_2",
+ "layer": "Pooling1D",
+ "conf": {
+ "stride": 1,
+ "window_size": 2
+ },
+ "inputs": ["s1_conv_2"]
+ },
+ {
+ "layer_id": "s1_flatten",
+ "layer": "Flatten",
+ "conf": {
+
+ },
+ "inputs": ["s1_pool_2"]
+ },
+ {
+ "layer_id": "s2_conv_1",
+ "layer": "Conv",
+ "conf": {
+ "window_size": 3,
+ "output_channel_num": 32,
+ "padding_type": "SAME",
+ "remind_lengths": false
+ },
+ "inputs": ["s2_dropout"]
+ },
+ {
+ "layer_id": "s2_pool_1",
+ "layer": "Pooling1D",
+ "conf": {
+ "stride": 1,
+ "window_size": 2
+ },
+ "inputs": ["s2_conv_1"]
+ },
+ {
+ "layer_id": "s2_conv_2",
+ "layer": "Conv",
+ "conf": {
+ "window_size": 3,
+ "output_channel_num": 32,
+ "padding_type": "SAME"
+ },
+ "inputs": ["s2_pool_1"]
+ },
+ {
+ "layer_id": "s2_pool_2",
+ "layer": "Pooling1D",
+ "conf": {
+ "stride": 1,
+ "window_size": 2
+ },
+ "inputs": ["s2_conv_2"]
+ },
+ {
+ "layer_id": "s2_flatten",
+ "layer": "Flatten",
+ "conf": {
+
+ },
+ "inputs": ["s2_pool_2"]
+ },
+ {
+ "layer_id": "comb",
+ "layer": "Combination",
+ "conf": {
+ "operations": ["origin"]
+ },
+ "inputs": ["s1_flatten", "s2_flatten"]
+ },
+ {
+ "layer_id": "comb_dropout",
+ "layer": "Dropout",
+ "conf": {
+ "dropout": 0.5
+ },
+ "inputs": ["comb"]
+ },
+ {
+ "layer_id": "mlp",
+ "layer": "Linear",
+ "conf": {
+ "hidden_dim": [64, 32],
+ "activation": "ReLU",
+ "batch_norm": true,
+ "last_hidden_activation": true
+ },
+ "inputs": ["comb_dropout"]
+ },
+ {
+ "output_layer_flag": true,
+ "layer_id": "output",
+ "layer": "Linear",
+ "conf": {
+ "hidden_dim": [-1],
+ "activation": "ReLU",
+ "batch_norm": true,
+ "last_hidden_activation": false,
+ "last_hidden_softmax": true
+ },
+ "inputs": ["mlp"]
+ }
+ ],
+ "loss": {
+ "losses": [
+ {
+ "type": "CrossEntropyLoss",
+ "conf": {
+ "weight": [0.1,0.9],
+ "size_average": true
+ },
+ "inputs": ["output","label"]
+ }
+ ]
+ },
+ "metrics": ["auc", "accuracy"]
+}
\ No newline at end of file
diff --git a/model_zoo/nlp_tasks/question_answer_matching/conf_question_answer_matching_arcii.json b/model_zoo/nlp_tasks/question_answer_matching/conf_question_answer_matching_arcii.json
new file mode 100644
index 0000000..e9bf0d9
--- /dev/null
+++ b/model_zoo/nlp_tasks/question_answer_matching/conf_question_answer_matching_arcii.json
@@ -0,0 +1,212 @@
+{
+ "license": "Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license.",
+ "tool_version": "1.1.0",
+ "model_description": "This model is used for question answer matching task, and it achieved auc: 0.7612 in WikiQACorpus test set",
+ "language": "English",
+ "inputs": {
+ "use_cache": true,
+ "dataset_type": "classification",
+ "data_paths": {
+ "train_data_path": "./dataset/WikiQACorpus/WikiQA-train.tsv",
+ "valid_data_path": "./dataset/WikiQACorpus/WikiQA-dev.tsv",
+ "test_data_path": "./dataset/WikiQACorpus/WikiQA-test.tsv",
+ "pre_trained_emb": "./dataset/Glove/glove.840B.300d.txt"
+ },
+ "file_with_col_header": true,
+ "add_start_end_for_seq": true,
+ "file_header": {
+ "question_id": 0,
+ "question_text": 1,
+ "document_id": 2,
+ "document_title": 3,
+ "passage_id": 4,
+ "passage_text": 5,
+ "label": 6
+ },
+ "model_inputs": {
+ "question": ["question_text"],
+ "passage": ["passage_text"]
+ },
+ "target": ["label"]
+ },
+ "outputs":{
+ "save_base_dir": "./models/wikiqa_arcii/",
+ "model_name": "model.nb",
+ "train_log_name": "train.log",
+ "test_log_name": "test.log",
+ "predict_log_name": "predict.log",
+ "predict_fields": ["prediction"],
+ "predict_output_name": "predict.tsv",
+ "cache_dir": ".cache.wikiqa_arcii/"
+ },
+ "training_params": {
+ "vocabulary": {
+ "min_word_frequency": 1
+ },
+ "optimizer": {
+ "name": "Adam",
+ "params": {
+ "lr": 0.001
+ }
+ },
+ "fixed_lengths": {
+ "question": 200,
+ "passage": 200
+ },
+ "lr_decay": 0.9,
+ "minimum_lr": 0.00005,
+ "epoch_start_lr_decay": 20,
+ "use_gpu": true,
+ "cpu_num_workers": 1,
+ "batch_size": 64,
+ "batch_num_to_show_results": 500,
+ "max_epoch": 10,
+ "valid_times_per_epoch": 1
+ },
+ "architecture":[
+ {
+ "layer": "Embedding",
+ "conf": {
+ "word": {
+ "cols": ["question_text", "passage_text"],
+ "dim": 300,
+ "fix_weight": true
+ }
+ }
+ },
+ {
+ "layer_id": "s1_dropout",
+ "layer": "Dropout",
+ "conf": {
+ "dropout": 0.5
+ },
+ "inputs": ["question"]
+ },
+ {
+ "layer_id": "s2_dropout",
+ "layer": "Dropout",
+ "conf": {
+ "dropout": 0.5
+ },
+ "inputs": ["passage"]
+ },
+ {
+ "layer_id": "s1_conv_1",
+ "layer": "Conv",
+ "conf": {
+ "window_size": 3,
+ "output_channel_num": 32,
+ "padding_type": "SAME",
+ "remind_lengths": false
+ },
+ "inputs": ["s1_dropout"]
+ },
+ {
+ "layer_id": "s2_conv_1",
+ "layer": "Conv",
+ "conf": {
+ "window_size": 3,
+ "output_channel_num": 32,
+ "padding_type": "SAME",
+ "remind_lengths": false
+ },
+ "inputs": ["s2_dropout"]
+ },
+ {
+ "layer_id": "match",
+ "layer": "Expand_plus",
+ "conf": {
+ },
+ "inputs": ["s1_conv_1", "s2_conv_1"]
+ },
+ {
+ "layer_id": "conv2D_1",
+ "layer": "Conv2D",
+ "conf": {
+ "window_size": [3,3],
+ "output_channel_num": 32,
+ "padding_type": "SAME"
+ },
+ "inputs": ["match"]
+ },
+ {
+ "layer_id": "pool2D_1",
+ "layer": "Pooling2D",
+ "conf": {
+ "window_size": [2,2]
+ },
+ "inputs": ["conv2D_1"]
+ },
+ {
+ "layer_id": "conv2D_2",
+ "layer": "Conv2D",
+ "conf": {
+ "window_size": [3,3],
+ "output_channel_num": 32,
+ "padding_type": "SAME"
+ },
+ "inputs": ["pool2D_1"]
+ },
+ {
+ "layer_id": "pool2D_2",
+ "layer": "Pooling2D",
+ "conf": {
+ "window_size": [2,2]
+ },
+ "inputs": ["conv2D_2"]
+ },
+ {
+ "layer_id": "flatten",
+ "layer": "Flatten",
+ "conf": {
+
+ },
+ "inputs": ["pool2D_2"]
+ },
+ {
+ "layer_id": "dropout",
+ "layer": "Dropout",
+ "conf": {
+ "dropout": 0.5
+ },
+ "inputs": ["flatten"]
+ },
+ {
+ "layer_id": "mlp",
+ "layer": "Linear",
+ "conf": {
+ "hidden_dim": [64, 32],
+ "activation": "ReLU",
+ "batch_norm": true,
+ "last_hidden_activation": true
+ },
+ "inputs": ["dropout"]
+ },
+ {
+ "output_layer_flag": true,
+ "layer_id": "output",
+ "layer": "Linear",
+ "conf": {
+ "hidden_dim": [-1],
+ "activation": "ReLU",
+ "batch_norm": true,
+ "last_hidden_activation": false,
+ "last_hidden_softmax": true
+ },
+ "inputs": ["mlp"]
+ }
+ ],
+ "loss": {
+ "losses": [
+ {
+ "type": "CrossEntropyLoss",
+ "conf": {
+ "weight": [0.1,0.9],
+ "size_average": true
+ },
+ "inputs": ["output","label"]
+ }
+ ]
+ },
+ "metrics": ["auc", "accuracy"]
+}
\ No newline at end of file
diff --git a/model_zoo/nlp_tasks/question_answer_matching/conf_question_answer_matching_pyramid.json b/model_zoo/nlp_tasks/question_answer_matching/conf_question_answer_matching_pyramid.json
index 0cc4e9d..7301193 100644
--- a/model_zoo/nlp_tasks/question_answer_matching/conf_question_answer_matching_pyramid.json
+++ b/model_zoo/nlp_tasks/question_answer_matching/conf_question_answer_matching_pyramid.json
@@ -57,7 +57,7 @@
"valid_times_per_epoch": 5,
"fixed_lengths":{
"question": 30,
- "passage": 120
+ "passage": 200
}
},
"architecture":[
@@ -92,7 +92,6 @@
"layer": "Interaction",
"conf": {
"dropout": 0.2,
- "hidden_dim": 300,
"matching_type": "general"
},
"inputs": ["question_dropout", "passage_dropout"]
diff --git a/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging.json b/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging.json
new file mode 100644
index 0000000..6e00cda
--- /dev/null
+++ b/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging.json
@@ -0,0 +1,119 @@
+{
+ "license": "Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license.",
+ "tool_version": "1.1.0",
+ "model_description": "This model is used for sequence tagging task. It achieved a f1-score of 88.50 on the dataset conll-2003",
+ "inputs": {
+ "use_cache": true,
+ "dataset_type": "sequence_tagging",
+ "tagging_scheme": "BIOES",
+ "data_paths": {
+ "train_data_path": "./dataset/slot_tagging/conll_2003/eng.train.tsv",
+ "valid_data_path": "./dataset/slot_tagging/conll_2003/eng.testa.tsv",
+ "test_data_path": "./dataset/slot_tagging/conll_2003/eng.testb.tsv",
+ "pre_trained_emb": "dataset/GloVe/glove.6B.100d.txt"
+ },
+ "add_start_end_for_seq": false,
+ "file_header": {
+ "sequence": 0,
+ "tag": 1
+ },
+ "model_inputs": {
+ "words": ["sequence"]
+ },
+ "target": ["tag"]
+ },
+ "outputs":{
+ "save_base_dir": "./models/slot_tagging/",
+ "model_name": "model.nb",
+ "train_log_name": "train.log",
+ "test_log_name": "test.log",
+ "predict_log_name": "predict.log",
+ "predict_fields": ["prediction"],
+ "predict_output_name": "predict.tsv",
+ "cache_dir": ".cache.slot_tagging/"
+ },
+ "training_params": {
+ "vocabulary": {
+ "min_word_frequency": 1
+ },
+ "cpu_num_workers": 4,
+ "optimizer": {
+ "name": "SGD",
+ "params": {
+ "lr": 0.015,
+ "weight_decay": 1e-8
+ }
+ },
+ "lr_decay": 0.95,
+ "minimum_lr": 0.00001,
+ "epoch_start_lr_decay": 1,
+ "use_gpu": true,
+ "batch_size": 10,
+ "batch_num_to_show_results": 500,
+ "max_epoch": 2,
+ "valid_times_per_epoch": 1
+ },
+ "architecture":[
+ {
+ "layer": "Embedding",
+ "weight_on_gpu": true,
+ "conf": {
+ "word": {
+ "cols": ["sequence"],
+ "dim": 100
+ }
+ }
+ },
+ {
+ "layer_id": "emb_dropout",
+ "layer": "Dropout",
+ "conf": {
+ "dropout": 0.5
+ },
+ "inputs": ["words"]
+ },
+ {
+ "layer_id": "sentence_BiLSTM",
+ "layer": "BiLSTM",
+ "conf": {
+ "hidden_dim": 100,
+ "num_layers": 1
+ },
+ "inputs": ["emb_dropout"]
+ },
+ {
+ "layer_id": "lstm_dropout",
+ "layer": "Dropout",
+ "conf": {
+ "dropout": 0.5
+ },
+ "inputs": ["sentence_BiLSTM"]
+ },
+ {
+ "output_layer_flag": true,
+ "layer_id": "output",
+ "layer": "Linear",
+ "conf": {
+ "hidden_dim": [-1],
+ "activation": "PReLU",
+ "batch_norm": false,
+ "last_hidden_activation": false,
+ "last_hidden_softmax": false
+ },
+ "inputs": ["lstm_dropout"]
+ }
+ ],
+ "loss": {
+ "losses": [
+ {
+ "type": "CrossEntropyLoss",
+ "conf": {
+ "size_average": false,
+ "ignore_index": 0
+ },
+ "inputs": ["output","tag"]
+ }
+ ]
+ },
+ "metrics": ["seq_tag_f1","seq_tag_accuracy"]
+}
\ No newline at end of file
diff --git a/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_ccnn_wcnn.json b/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_ccnn_wcnn.json
new file mode 100644
index 0000000..e97f89a
--- /dev/null
+++ b/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_ccnn_wcnn.json
@@ -0,0 +1,208 @@
+{
+ "license": "Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license.",
+ "tool_version": "1.1.0",
+ "model_description": "This model is used for sequence tagging task. It achieved a f1-score of 90.36 on the dataset conll-2003",
+ "inputs": {
+ "use_cache": true,
+ "dataset_type": "sequence_tagging",
+ "tagging_scheme": "BIOES",
+ "data_paths": {
+ "train_data_path": "./dataset/slot_tagging/conll_2003/eng.train.tsv",
+ "valid_data_path": "./dataset/slot_tagging/conll_2003/eng.testa.tsv",
+ "test_data_path": "./dataset/slot_tagging/conll_2003/eng.testb.tsv",
+ "pre_trained_emb": "./dataset/GloVe/glove.6B.100d.txt"
+ },
+ "add_start_end_for_seq": false,
+ "file_header": {
+ "word": 0,
+ "tag": 1
+ },
+ "model_inputs": {
+ "words": ["word", "word_char"]
+ },
+ "target": ["tag"]
+ },
+ "outputs":{
+ "save_base_dir": "./models/slot_tagging_ccnn_wcnn/",
+ "model_name": "model.nb",
+ "train_log_name": "train.log",
+ "test_log_name": "test.log",
+ "predict_log_name": "predict.log",
+ "predict_fields": ["prediction"],
+ "predict_output_name": "predict.tsv",
+ "cache_dir": ".cache.slot_tagging_ccnn_wcnn/"
+ },
+ "training_params": {
+ "vocabulary": {
+ "min_word_frequency": 1
+ },
+ "cpu_num_workers": 4,
+ "optimizer": {
+ "name": "SGD",
+ "params": {
+ "lr": 0.005,
+ "weight_decay": 1e-8
+ }
+ },
+ "lr_decay": 0.95,
+ "minimum_lr": 0.00001,
+ "epoch_start_lr_decay": 1,
+ "use_gpu": true,
+ "batch_size": 10,
+ "batch_num_to_show_results": 500,
+ "max_epoch": 100,
+ "valid_times_per_epoch": 1
+ },
+ "architecture":[
+ {
+ "layer": "Embedding",
+ "weight_on_gpu": true,
+ "conf": {
+ "word": {
+ "cols": ["word"],
+ "dim": 100
+ },
+ "char":{
+ "cols":["word_char"],
+ "type":"CNNCharEmbedding",
+ "dropout": 0.5,
+ "dim": 50,
+ "embedding_matrix_dim": 30,
+ "stride":1,
+ "window_size": 3,
+ "activation": null
+ }
+ }
+ },
+ {
+ "layer_id": "emb_dropout",
+ "layer": "Dropout",
+ "conf": {
+ "dropout": 0.5
+ },
+ "inputs": ["words"]
+ },
+ {
+ "layer_id": "Linear1",
+ "layer": "Linear",
+ "conf": {
+ "hidden_dim": [200],
+ "activation": "Tanh",
+ "batch_norm": true,
+ "last_hidden_activation": true,
+ "last_hidden_softmax": false
+ },
+ "inputs": ["emb_dropout"]
+ },
+ {
+ "layer_id": "Conv1",
+ "layer": "Conv",
+ "conf": {
+ "stride": 1,
+ "window_size": 3,
+ "output_channel_num": 200,
+ "batch_norm": true,
+ "activation": "ReLU",
+ "padding_type": "SAME"
+ },
+ "inputs": ["Linear1"]
+ },
+ {
+ "layer_id": "Dropout1",
+ "layer": "Dropout",
+ "conf": {
+ "dropout": 0.5
+ },
+ "inputs": ["Conv1"]
+ },
+ {
+ "layer_id": "Conv2",
+ "layer": "Conv",
+ "conf": {
+ "stride": 1,
+ "window_size": 3,
+ "output_channel_num": 200,
+ "batch_norm": true,
+ "activation": "ReLU",
+ "padding_type": "SAME"
+ },
+ "inputs": ["Dropout1"]
+ },
+ {
+ "layer_id": "Dropout2",
+ "layer": "Dropout",
+ "conf": {
+ "dropout": 0.5
+ },
+ "inputs": ["Conv2"]
+ },
+ {
+ "layer_id": "Conv3",
+ "layer": "Conv",
+ "conf": {
+ "stride": 1,
+ "window_size": 3,
+ "output_channel_num": 200,
+ "batch_norm": true,
+ "activation": "ReLU",
+ "padding_type": "SAME"
+ },
+ "inputs": ["Dropout2"]
+ },
+ {
+ "layer_id": "Dropout3",
+ "layer": "Dropout",
+ "conf": {
+ "dropout": 0.5
+ },
+ "inputs": ["Conv3"]
+ },
+ {
+ "layer_id": "Conv4",
+ "layer": "Conv",
+ "conf": {
+ "stride": 1,
+ "window_size": 3,
+ "output_channel_num": 200,
+ "batch_norm": true,
+ "activation": "ReLU",
+ "padding_type": "SAME"
+ },
+ "inputs": ["Dropout3"]
+ },
+ {
+ "layer_id": "Dropout4",
+ "layer": "Dropout",
+ "conf": {
+ "dropout": 0.5
+ },
+ "inputs": ["Conv4"]
+ },
+ {
+ "output_layer_flag": true,
+ "layer_id": "output",
+ "layer": "Linear",
+ "conf": {
+ "hidden_dim": [-1],
+ "activation": "PReLU",
+ "batch_norm": false,
+ "last_hidden_activation": false,
+ "last_hidden_softmax": false
+ },
+ "inputs": ["Dropout4"]
+ }
+ ],
+ "loss": {
+ "losses": [
+ {
+ "type": "CrossEntropyLoss",
+ "conf": {
+ "size_average": false,
+ "ignore_index": 0
+ },
+ "inputs": ["output","tag"]
+ }
+ ]
+ },
+ "metrics": ["seq_tag_f1","seq_tag_accuracy"]
+}
\ No newline at end of file
diff --git a/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_ccnn_wcnn_crf.json b/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_ccnn_wcnn_crf.json
new file mode 100644
index 0000000..ecd0954
--- /dev/null
+++ b/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_ccnn_wcnn_crf.json
@@ -0,0 +1,185 @@
+{
+ "license": "Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license.",
+ "tool_version": "1.1.0",
+ "model_description": "This model is used for sequence tagging task. It achieved a f1-score of 90.36 on the dataset conll-2003",
+ "inputs": {
+ "use_cache": true,
+ "dataset_type": "sequence_tagging",
+ "tagging_scheme": "BIOES",
+ "data_paths": {
+ "train_data_path": "./dataset/slot_tagging/conll_2003/eng.train.tsv",
+ "valid_data_path": "./dataset/slot_tagging/conll_2003/eng.testa.tsv",
+ "test_data_path": "./dataset/slot_tagging/conll_2003/eng.testb.tsv",
+ "pre_trained_emb": "./dataset/GloVe/glove.6B.100d.txt"
+ },
+ "add_start_end_for_seq": false,
+ "file_header": {
+ "word": 0,
+ "tag": 1
+ },
+ "model_inputs": {
+ "words": ["word", "word_char"]
+ },
+ "target": ["tag"]
+ },
+ "outputs":{
+ "save_base_dir": "./models/slot_tagging_wcnn_ccnn_crf/",
+ "model_name": "model.nb",
+ "train_log_name": "train.log",
+ "test_log_name": "test.log",
+ "predict_log_name": "predict.log",
+ "predict_fields": ["prediction"],
+ "predict_output_name": "predict.tsv",
+ "cache_dir": ".cache.slot_tagging_wcnn_ccnn_crf/"
+ },
+ "training_params": {
+ "vocabulary": {
+ "min_word_frequency": 1
+ },
+ "cpu_num_workers": 4,
+ "optimizer": {
+ "name": "SGD",
+ "params": {
+ "lr": 0.005,
+ "weight_decay": 1e-8
+ }
+ },
+ "lr_decay": 0.95,
+ "minimum_lr": 0.00001,
+ "epoch_start_lr_decay": 1,
+ "use_gpu": true,
+ "batch_size": 10,
+ "batch_num_to_show_results": 500,
+ "max_epoch": 100,
+ "valid_times_per_epoch": 1
+ },
+ "architecture":[
+ {
+ "layer": "Embedding",
+ "weight_on_gpu": true,
+ "conf": {
+ "word": {
+ "cols": ["word"],
+ "dim": 100
+ },
+ "char":{
+ "cols":["word_char"],
+ "type":"CNNCharEmbedding",
+ "dropout": 0.5,
+ "dim": 50,
+ "embedding_matrix_dim": 30,
+ "stride":1,
+ "window_size": 3,
+ "activation": null
+ }
+ }
+ },
+ {
+ "layer_id": "emb_dropout",
+ "layer": "Dropout",
+ "conf": {
+ "dropout": 0.5
+ },
+ "inputs": ["words"]
+ },
+ {
+ "layer_id": "Linear1",
+ "layer": "Linear",
+ "conf": {
+ "hidden_dim": [200],
+ "activation": "Tanh",
+ "batch_norm": false,
+ "last_hidden_activation": true,
+ "last_hidden_softmax": false
+ },
+ "inputs": ["emb_dropout"]
+ },
+ {
+ "layer_id": "Conv1",
+ "layer": "Conv",
+ "conf": {
+ "stride": 1,
+ "window_size": 3,
+ "output_channel_num": 200,
+ "batch_norm": true,
+ "activation": "ReLU",
+ "padding_type": "SAME"
+ },
+ "inputs": ["Linear1"]
+ },
+
+ {
+ "layer_id": "Conv2",
+ "layer": "Conv",
+ "conf": {
+ "stride": 1,
+ "window_size": 3,
+ "output_channel_num": 200,
+ "batch_norm": true,
+ "activation": "ReLU",
+ "padding_type": "SAME"
+ },
+ "inputs": ["Conv1"]
+ },
+
+ {
+ "layer_id": "Conv3",
+ "layer": "Conv",
+ "conf": {
+ "stride": 1,
+ "window_size": 3,
+ "output_channel_num": 200,
+ "batch_norm": true,
+ "activation": "ReLU",
+ "padding_type": "SAME"
+ },
+ "inputs": ["Conv2"]
+ },
+
+ {
+ "layer_id": "Conv4",
+ "layer": "Conv",
+ "conf": {
+ "stride": 1,
+ "window_size": 3,
+ "output_channel_num": 200,
+ "batch_norm": true,
+ "activation": "ReLU",
+ "padding_type": "SAME"
+ },
+ "inputs": ["Conv3"]
+ },
+
+ {
+ "layer_id": "rep2tag",
+ "layer": "Linear",
+ "conf": {
+ "hidden_dim": "#target#",
+ "activation": "PReLU",
+ "batch_norm": false,
+ "last_hidden_activation": false,
+ "last_hidden_softmax": false
+ },
+ "inputs": ["Conv4"]
+ },
+ {
+ "output_layer_flag": true,
+ "layer_id": "output",
+ "layer": "CRF",
+ "conf": {},
+ "inputs": ["rep2tag"]
+ }
+ ],
+ "loss": {
+ "losses": [
+ {
+ "type": "CRFLoss",
+ "conf": {
+
+ },
+ "inputs": ["output","tag"]
+ }
+ ]
+ },
+ "metrics": ["seq_tag_f1","seq_tag_accuracy"]
+}
\ No newline at end of file
diff --git a/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_ccnn_wlstm.json b/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_ccnn_wlstm.json
new file mode 100644
index 0000000..7483316
--- /dev/null
+++ b/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_ccnn_wlstm.json
@@ -0,0 +1,130 @@
+{
+ "license": "Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license.",
+ "tool_version": "1.1.0",
+ "model_description": "This model is used for sequence tagging task. It achieved a f1-score of 90.63 on the dataset conll-2003",
+ "inputs": {
+ "use_cache": true,
+ "dataset_type": "sequence_tagging",
+ "tagging_scheme": "BIOES",
+ "data_paths": {
+ "train_data_path": "./dataset/slot_tagging/conll_2003/eng.train.tsv",
+ "valid_data_path": "./dataset/slot_tagging/conll_2003/eng.testa.tsv",
+ "test_data_path": "./dataset/slot_tagging/conll_2003/eng.testb.tsv",
+ "pre_trained_emb": "./dataset/GloVe/glove.6B.100d.txt"
+ },
+ "add_start_end_for_seq": false,
+ "file_header": {
+ "word": 0,
+ "tag": 1
+ },
+ "model_inputs": {
+ "words": ["word", "word_char"]
+ },
+ "target": ["tag"]
+ },
+ "outputs":{
+ "save_base_dir": "./models/slot_tagging_ccnn_wlstm/",
+ "model_name": "model.nb",
+ "train_log_name": "train.log",
+ "test_log_name": "test.log",
+ "predict_log_name": "predict.log",
+ "predict_fields": ["prediction"],
+ "predict_output_name": "predict.tsv",
+ "cache_dir": ".cache.slot_tagging_ccnn_wlstm/"
+ },
+ "training_params": {
+ "vocabulary": {
+ "min_word_frequency": 1
+ },
+ "cpu_num_workers": 4,
+ "optimizer": {
+ "name": "SGD",
+ "params": {
+ "lr": 0.015,
+ "weight_decay": 1e-8
+ }
+ },
+ "lr_decay": 0.95,
+ "minimum_lr": 0.00001,
+ "epoch_start_lr_decay": 1,
+ "use_gpu": true,
+ "batch_size": 10,
+ "batch_num_to_show_results": 500,
+ "max_epoch": 100,
+ "valid_times_per_epoch": 1
+ },
+ "architecture":[
+ {
+ "layer": "Embedding",
+ "weight_on_gpu": true,
+ "conf": {
+ "word": {
+ "cols": ["word"],
+ "dim": 100
+ },
+ "char":{
+ "cols":["word_char"],
+ "type":"CNNCharEmbedding",
+ "dropout": 0.5,
+ "dim": 50,
+ "embedding_matrix_dim": 30,
+ "stride":1,
+ "window_size": 3,
+ "activation": null
+ }
+ }
+ },
+
+ {
+ "layer_id": "emb_dropout",
+ "layer": "Dropout",
+ "conf": {
+ "dropout": 0.5
+ },
+ "inputs": ["words"]
+ },
+ {
+ "layer_id": "sentence_BiLSTM",
+ "layer": "BiLSTM",
+ "conf": {
+ "hidden_dim": 100,
+ "num_layers": 1
+ },
+ "inputs": ["emb_dropout"]
+ },
+ {
+ "layer_id": "lstm_dropout",
+ "layer": "Dropout",
+ "conf": {
+ "dropout": 0.5
+ },
+ "inputs": ["sentence_BiLSTM"]
+ },
+ {
+ "output_layer_flag": true,
+ "layer_id": "output",
+ "layer": "Linear",
+ "conf": {
+ "hidden_dim": [-1],
+ "activation": "PReLU",
+ "batch_norm": false,
+ "last_hidden_activation": false,
+ "last_hidden_softmax": false
+ },
+ "inputs": ["lstm_dropout"]
+ }
+ ],
+ "loss": {
+ "losses": [
+ {
+ "type": "CrossEntropyLoss",
+ "conf": {
+ "size_average": false,
+ "ignore_index": 0
+ },
+ "inputs": ["output","tag"]
+ }
+ ]
+ },
+ "metrics": ["seq_tag_f1","seq_tag_accuracy"]
+}
\ No newline at end of file
diff --git a/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_ccnn_wlstm_crf.json b/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_ccnn_wlstm_crf.json
new file mode 100644
index 0000000..e3d5f65
--- /dev/null
+++ b/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_ccnn_wlstm_crf.json
@@ -0,0 +1,134 @@
+{
+ "license": "Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license.",
+ "tool_version": "1.1.0",
+ "model_description": "This model is used for slot sequence task. It achieved a f1-score of 91.38 on the dataset conll-2003",
+ "inputs": {
+ "use_cache": true,
+ "dataset_type": "sequence_tagging",
+ "tagging_scheme": "BIOES",
+ "data_paths": {
+ "train_data_path": "./dataset/slot_tagging/conll_2003/eng.train.tsv",
+ "valid_data_path": "./dataset/slot_tagging/conll_2003/eng.testa.tsv",
+ "test_data_path": "./dataset/slot_tagging/conll_2003/eng.testb.tsv",
+ "pre_trained_emb": "./dataset/GloVe/glove.6B.100d.txt"
+ },
+ "add_start_end_for_seq": false,
+ "file_header": {
+ "word": 0,
+ "tag": 1
+ },
+ "model_inputs": {
+ "words": ["word", "word_char"]
+ },
+ "target": ["tag"]
+ },
+ "outputs":{
+ "save_base_dir": "./models/slot_tagging_ccnn_wlstm_crf/",
+ "model_name": "model.nb",
+ "train_log_name": "train.log",
+ "test_log_name": "test.log",
+ "predict_log_name": "predict.log",
+ "predict_fields": ["prediction"],
+ "predict_output_name": "predict.tsv",
+ "cache_dir": ".cache.slot_tagging_ccnn_wlstm_crf/"
+ },
+ "training_params": {
+ "vocabulary": {
+ "min_word_frequency": 1
+ },
+ "cpu_num_workers": 4,
+ "optimizer": {
+ "name": "SGD",
+ "params": {
+ "lr": 0.015,
+ "weight_decay": 1e-8
+ }
+ },
+ "lr_decay": 0.95,
+ "minimum_lr": 0.00001,
+ "epoch_start_lr_decay": 1,
+ "use_gpu": true,
+ "batch_size": 10,
+ "batch_num_to_show_results": 500,
+ "max_epoch": 100,
+ "valid_times_per_epoch": 1
+ },
+ "architecture":[
+ {
+ "layer": "Embedding",
+ "weight_on_gpu": true,
+ "conf": {
+ "word": {
+ "cols": ["word"],
+ "dim": 100
+ },
+ "char":{
+ "cols":["word_char"],
+ "type":"CNNCharEmbedding",
+ "dropout": 0.5,
+ "dim": 50,
+ "embedding_matrix_dim": 30,
+ "stride":1,
+ "window_size": 3,
+ "activation": null
+ }
+ }
+ },
+ {
+ "layer_id": "emb_dropout",
+ "layer": "Dropout",
+ "conf": {
+ "dropout": 0.5
+ },
+ "inputs": ["words"]
+ },
+ {
+ "layer_id": "sentence_BiLSTM",
+ "layer": "BiLSTM",
+ "conf": {
+ "hidden_dim": 100,
+ "num_layers": 1
+ },
+ "inputs": ["emb_dropout"]
+ },
+ {
+ "layer_id": "lstm_dropout",
+ "layer": "Dropout",
+ "conf": {
+ "dropout": 0.5
+ },
+ "inputs": ["sentence_BiLSTM"]
+ },
+ {
+ "layer_id": "rep2tag",
+ "layer": "Linear",
+ "conf": {
+ "hidden_dim": "#target#",
+ "activation": "PReLU",
+ "batch_norm": false,
+ "last_hidden_activation": false,
+ "last_hidden_softmax": false
+ },
+ "inputs": ["lstm_dropout"]
+ },
+ {
+ "output_layer_flag": true,
+ "layer_id": "output",
+ "layer": "CRF",
+ "conf": {},
+ "inputs": ["rep2tag"]
+ }
+ ],
+ "loss": {
+ "losses": [
+ {
+ "type": "CRFLoss",
+ "conf": {
+
+ },
+ "inputs": ["output","tag"]
+ }
+ ]
+ },
+ "metrics": ["seq_tag_f1","seq_tag_accuracy"]
+}
\ No newline at end of file
diff --git a/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_clstm_wcnn_crf.json b/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_clstm_wcnn_crf.json
new file mode 100644
index 0000000..28faad8
--- /dev/null
+++ b/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_clstm_wcnn_crf.json
@@ -0,0 +1,191 @@
+{
+ "license": "Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license.",
+ "tool_version": "1.1.0",
+ "model_description": "This model is used for sequence tagging task. It achieved a f1-score of 90.27 on the dataset conll-2003",
+ "inputs": {
+ "use_cache": true,
+ "dataset_type": "sequence_tagging",
+ "tagging_scheme": "BIOES",
+ "data_paths": {
+ "train_data_path": "./dataset/slot_tagging/conll_2003/eng.train.tsv",
+ "valid_data_path": "./dataset/slot_tagging/conll_2003/eng.testa.tsv",
+ "test_data_path": "./dataset/slot_tagging/conll_2003/eng.testb.tsv",
+ "pre_trained_emb": "./dataset/GloVe/glove.6B.100d.txt"
+ },
+ "add_start_end_for_seq": false,
+ "file_header": {
+ "word": 0,
+ "tag": 1
+ },
+ "model_inputs": {
+ "words": ["word", "word_char"]
+ },
+ "target": ["tag"]
+ },
+ "outputs":{
+ "save_base_dir": "./models/slot_tagging_clstm_wcnn_crf/",
+ "model_name": "model.nb",
+ "train_log_name": "train.log",
+ "test_log_name": "test.log",
+ "predict_log_name": "predict.log",
+ "predict_fields": ["prediction"],
+ "predict_output_name": "predict.tsv",
+ "cache_dir": ".cache.slot_tagging_clstm_wcnn_crf/"
+ },
+ "training_params": {
+ "vocabulary": {
+ "min_word_frequency": 1
+ },
+ "cpu_num_workers": 4,
+ "optimizer": {
+ "name": "SGD",
+ "params": {
+ "lr": 0.005,
+ "weight_decay": 1e-8
+ }
+ },
+ "lr_decay": 0.95,
+ "minimum_lr": 0.00001,
+ "epoch_start_lr_decay": 1,
+ "use_gpu": true,
+ "batch_size": 10,
+ "batch_num_to_show_results": 500,
+ "max_epoch": 100,
+ "valid_times_per_epoch": 1
+ },
+ "architecture":[
+ {
+ "layer": "Embedding",
+ "weight_on_gpu": true,
+ "conf": {
+ "word": {
+ "cols": ["word"],
+ "dim": 100
+ },
+ "char":{
+ "cols":["word_char"],
+ "type":"LSTMCharEmbedding",
+ "dropout": 0.5,
+ "dim": 50,
+ "embedding_matrix_dim": 30,
+ "bidirect_flag": true
+ }
+ }
+ },
+ {
+ "layer_id": "emb_dropout",
+ "layer": "Dropout",
+ "conf": {
+ "dropout": 0.2
+ },
+ "inputs": ["words"]
+ },
+ {
+ "layer_id": "Linear1",
+ "layer": "Linear",
+ "conf": {
+ "hidden_dim": [200],
+ "activation": "Tanh",
+ "batch_norm": false,
+ "last_hidden_activation": true,
+ "last_hidden_softmax": false
+ },
+ "inputs": ["emb_dropout"]
+ },
+ {
+ "layer_id": "Conv1",
+ "layer": "Conv",
+ "conf": {
+ "stride": 1,
+ "window_size": 3,
+ "output_channel_num": 200,
+ "batch_norm": true,
+ "activation": "ReLU",
+ "padding_type": "SAME",
+ "dropout": 0.2,
+ "remind_lengths": true
+ },
+ "inputs": ["Linear1"]
+ },
+
+ {
+ "layer_id": "Conv2",
+ "layer": "Conv",
+ "conf": {
+ "stride": 1,
+ "window_size": 3,
+ "output_channel_num": 200,
+ "batch_norm": true,
+ "activation": "ReLU",
+ "padding_type": "SAME",
+ "dropout": 0.2,
+ "remind_lengths": true
+ },
+ "inputs": ["Conv1"]
+ },
+
+ {
+ "layer_id": "Conv3",
+ "layer": "Conv",
+ "conf": {
+ "stride": 1,
+ "window_size": 3,
+ "output_channel_num": 200,
+ "batch_norm": true,
+ "activation": "ReLU",
+ "padding_type": "SAME",
+ "dropout": 0.2,
+ "remind_lengths": true
+ },
+ "inputs": ["Conv2"]
+ },
+
+ {
+ "layer_id": "Conv4",
+ "layer": "Conv",
+ "conf": {
+ "stride": 1,
+ "window_size": 3,
+ "output_channel_num": 200,
+ "batch_norm": true,
+ "activation": "ReLU",
+ "padding_type": "SAME",
+ "dropout": 0.2,
+ "remind_lengths": true
+ },
+ "inputs": ["Conv3"]
+ },
+
+ {
+ "layer_id": "rep2tag",
+ "layer": "Linear",
+ "conf": {
+ "hidden_dim": "#target#",
+ "activation": "PReLU",
+ "batch_norm": false,
+ "last_hidden_activation": false,
+ "last_hidden_softmax": false
+ },
+ "inputs": ["Conv4"]
+ },
+ {
+ "output_layer_flag": true,
+ "layer_id": "output",
+ "layer": "CRF",
+ "conf": {},
+ "inputs": ["rep2tag"]
+ }
+ ],
+ "loss": {
+ "losses": [
+ {
+ "type": "CRFLoss",
+ "conf": {
+
+ },
+ "inputs": ["output","tag"]
+ }
+ ]
+ },
+ "metrics": ["seq_tag_f1","seq_tag_accuracy"]
+}
\ No newline at end of file
diff --git a/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_clstm_wlstm_crf.json b/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_clstm_wlstm_crf.json
new file mode 100644
index 0000000..eacf304
--- /dev/null
+++ b/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_clstm_wlstm_crf.json
@@ -0,0 +1,132 @@
+{
+ "license": "Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license.",
+ "tool_version": "1.1.0",
+ "model_description": "This model is used for sequence tagging task. It achieved a f1-score of 90.83 on the dataset conll-2003",
+ "inputs": {
+ "use_cache": true,
+ "dataset_type": "sequence_tagging",
+ "tagging_scheme": "BIOES",
+ "data_paths": {
+ "train_data_path": "./dataset/slot_tagging/conll_2003/eng.train.tsv",
+ "valid_data_path": "./dataset/slot_tagging/conll_2003/eng.testa.tsv",
+ "test_data_path": "./dataset/slot_tagging/conll_2003/eng.testb.tsv",
+ "pre_trained_emb": "./dataset/GloVe/glove.6B.100d.txt"
+ },
+ "add_start_end_for_seq": false,
+ "file_header": {
+ "word": 0,
+ "tag": 1
+ },
+ "model_inputs": {
+ "words": ["word", "word_char"]
+ },
+ "target": ["tag"]
+ },
+ "outputs":{
+ "save_base_dir": "./models/slot_tagging_clstm_wlstm_crf/",
+ "model_name": "model.nb",
+ "train_log_name": "train.log",
+ "test_log_name": "test.log",
+ "predict_log_name": "predict.log",
+ "predict_fields": ["prediction"],
+ "predict_output_name": "predict.tsv",
+ "cache_dir": ".cache.slot_tagging_clstm_wlstm_crf/"
+ },
+ "training_params": {
+ "vocabulary": {
+ "min_word_frequency": 1
+ },
+ "cpu_num_workers": 4,
+ "optimizer": {
+ "name": "SGD",
+ "params": {
+ "lr": 0.015,
+ "weight_decay": 1e-8
+ }
+ },
+ "lr_decay": 0.95,
+ "minimum_lr": 0.00001,
+ "epoch_start_lr_decay": 1,
+ "use_gpu": true,
+ "batch_size": 10,
+ "batch_num_to_show_results": 500,
+ "max_epoch": 100,
+ "valid_times_per_epoch": 1
+ },
+ "architecture":[
+ {
+ "layer": "Embedding",
+ "weight_on_gpu": true,
+ "conf": {
+ "word": {
+ "cols": ["word"],
+ "dim": 100
+ },
+ "char":{
+ "cols":["word_char"],
+ "type":"LSTMCharEmbedding",
+ "dropout": 0.2,
+ "dim": 50,
+ "embedding_matrix_dim": 30,
+ "bidirect_flag": true
+ }
+ }
+ },
+ {
+ "layer_id": "emb_dropout",
+ "layer": "Dropout",
+ "conf": {
+ "dropout": 0.2
+ },
+ "inputs": ["words"]
+ },
+ {
+ "layer_id": "sentence_BiLSTM",
+ "layer": "BiLSTM",
+ "conf": {
+ "hidden_dim": 100,
+ "num_layers": 1
+ },
+ "inputs": ["emb_dropout"]
+ },
+ {
+ "layer_id": "lstm_dropout",
+ "layer": "Dropout",
+ "conf": {
+ "dropout": 0.2
+ },
+ "inputs": ["sentence_BiLSTM"]
+ },
+ {
+ "layer_id": "rep2tag",
+ "layer": "Linear",
+ "conf": {
+ "hidden_dim": "#target#",
+ "activation": "PReLU",
+ "batch_norm": false,
+ "last_hidden_activation": false,
+ "last_hidden_softmax": false
+ },
+ "inputs": ["lstm_dropout"]
+ },
+ {
+ "output_layer_flag": true,
+ "layer_id": "output",
+ "layer": "CRF",
+ "conf": {},
+ "inputs": ["rep2tag"]
+ }
+ ],
+ "loss": {
+ "losses": [
+ {
+ "type": "CRFLoss",
+ "conf": {
+
+ },
+ "inputs": ["output","tag"]
+ }
+ ]
+ },
+ "metrics": ["seq_tag_f1","seq_tag_accuracy"]
+}
\ No newline at end of file
diff --git a/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_encoder_decoder.json b/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_encoder_decoder.json
index 4437625..7e129f0 100644
--- a/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_encoder_decoder.json
+++ b/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_encoder_decoder.json
@@ -23,13 +23,13 @@
},
"outputs":{
"save_base_dir": "./models/slot_tagging_encoder_decoder/",
- "model_name": "model_debug.nb",
- "train_log_name": "train_debug.log",
- "test_log_name": "test_debug.log",
- "predict_log_name": "predict_debug.log",
+ "model_name": "model.nb",
+ "train_log_name": "train.log",
+ "test_log_name": "test.log",
+ "predict_log_name": "predict.log",
"predict_fields": ["prediction"],
- "predict_output_name": "predict_debug.tsv",
- "cache_dir": ".cache.atis/"
+ "predict_output_name": "predict.tsv",
+ "cache_dir": ".cache.slot_tagging_encoder_decoder/"
},
"training_params": {
"vocabulary": {
diff --git a/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_wcnn.json b/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_wcnn.json
new file mode 100644
index 0000000..4e52d05
--- /dev/null
+++ b/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_wcnn.json
@@ -0,0 +1,198 @@
+{
+ "license": "Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license.",
+ "tool_version": "1.1.0",
+ "model_description": "This model is used for sequence tagging task. It achieved a f1-score of 88.51 on the dataset conll-2003",
+ "inputs": {
+ "use_cache": true,
+ "dataset_type": "sequence_tagging",
+ "tagging_scheme": "BIOES",
+ "data_paths": {
+ "train_data_path": "./dataset/slot_tagging/conll_2003/eng.train.tsv",
+ "valid_data_path": "./dataset/slot_tagging/conll_2003/eng.testa.tsv",
+ "test_data_path": "./dataset/slot_tagging/conll_2003/eng.testb.tsv",
+ "pre_trained_emb": "./dataset/GloVe/glove.6B.100d.txt"
+ },
+ "add_start_end_for_seq": false,
+ "file_header": {
+ "word": 0,
+ "tag": 1
+ },
+ "model_inputs": {
+ "words": ["word"]
+ },
+ "target": ["tag"]
+ },
+ "outputs":{
+ "save_base_dir": "./models/slot_tagging_wcnn/",
+ "model_name": "model.nb",
+ "train_log_name": "train.log",
+ "test_log_name": "test.log",
+ "predict_log_name": "predict.log",
+ "predict_fields": ["prediction"],
+ "predict_output_name": "predict.tsv",
+ "cache_dir": ".cache.slot_tagging_wcnn/"
+ },
+ "training_params": {
+ "vocabulary": {
+ "min_word_frequency": 1
+ },
+ "cpu_num_workers": 4,
+ "optimizer": {
+ "name": "SGD",
+ "params": {
+ "lr": 0.005,
+ "weight_decay": 1e-8
+ }
+ },
+ "lr_decay": 0.95,
+ "minimum_lr": 0.00001,
+ "epoch_start_lr_decay": 1,
+ "use_gpu": true,
+ "batch_size": 10,
+ "batch_num_to_show_results": 500,
+ "max_epoch": 100,
+ "valid_times_per_epoch": 1
+ },
+ "architecture":[
+ {
+ "layer": "Embedding",
+ "weight_on_gpu": true,
+ "conf": {
+ "word": {
+ "cols": ["word"],
+ "dim": 100
+ }
+ }
+ },
+ {
+ "layer_id": "emb_dropout",
+ "layer": "Dropout",
+ "conf": {
+ "dropout": 0.2
+ },
+ "inputs": ["words"]
+ },
+ {
+ "layer_id": "Linear1",
+ "layer": "Linear",
+ "conf": {
+ "hidden_dim": [200],
+ "activation": "Tanh",
+ "batch_norm": true,
+ "last_hidden_activation": true,
+ "last_hidden_softmax": false
+ },
+ "inputs": ["emb_dropout"]
+ },
+ {
+ "layer_id": "Conv1",
+ "layer": "Conv",
+ "conf": {
+ "stride": 1,
+ "window_size": 3,
+ "output_channel_num": 200,
+ "batch_norm": true,
+ "activation": "ReLU",
+ "padding_type": "SAME"
+ },
+ "inputs": ["Linear1"]
+ },
+ {
+ "layer_id": "Dropout1",
+ "layer": "Dropout",
+ "conf": {
+ "dropout": 0.2
+ },
+ "inputs": ["Conv1"]
+ },
+ {
+ "layer_id": "Conv2",
+ "layer": "Conv",
+ "conf": {
+ "stride": 1,
+ "window_size": 3,
+ "output_channel_num": 200,
+ "batch_norm": true,
+ "activation": "ReLU",
+ "padding_type": "SAME"
+ },
+ "inputs": ["Dropout1"]
+ },
+ {
+ "layer_id": "Dropout2",
+ "layer": "Dropout",
+ "conf": {
+ "dropout": 0.2
+ },
+ "inputs": ["Conv2"]
+ },
+ {
+ "layer_id": "Conv3",
+ "layer": "Conv",
+ "conf": {
+ "stride": 1,
+ "window_size": 3,
+ "output_channel_num": 200,
+ "batch_norm": true,
+ "activation": "ReLU",
+ "padding_type": "SAME"
+ },
+ "inputs": ["Dropout2"]
+ },
+ {
+ "layer_id": "Dropout3",
+ "layer": "Dropout",
+ "conf": {
+ "dropout": 0.2
+ },
+ "inputs": ["Conv3"]
+ },
+ {
+ "layer_id": "Conv4",
+ "layer": "Conv",
+ "conf": {
+ "stride": 1,
+ "window_size": 3,
+ "output_channel_num": 200,
+ "batch_norm": true,
+ "activation": "ReLU",
+ "padding_type": "SAME"
+ },
+ "inputs": ["Dropout3"]
+ },
+ {
+ "layer_id": "Dropout4",
+ "layer": "Dropout",
+ "conf": {
+ "dropout": 0.2
+ },
+ "inputs": ["Conv4"]
+ },
+ {
+ "output_layer_flag": true,
+ "layer_id": "output",
+ "layer": "Linear",
+ "conf": {
+ "hidden_dim": [-1],
+ "activation": "PReLU",
+ "batch_norm": false,
+ "last_hidden_activation": false,
+ "last_hidden_softmax": false
+ },
+ "inputs": ["Dropout4"]
+ }
+ ],
+ "loss": {
+ "losses": [
+ {
+ "type": "CrossEntropyLoss",
+ "conf": {
+ "size_average": false,
+ "ignore_index": 0
+ },
+ "inputs": ["output","tag"]
+ }
+ ]
+ },
+ "metrics": ["seq_tag_f1","seq_tag_accuracy"]
+}
\ No newline at end of file
diff --git a/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_wcnn_crf.json b/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_wcnn_crf.json
new file mode 100644
index 0000000..28476f7
--- /dev/null
+++ b/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_wcnn_crf.json
@@ -0,0 +1,183 @@
+{
+ "license": "Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license.",
+ "tool_version": "1.1.0",
+ "model_description": "This model is used for sequence tagging task. It achieved a f1-score of 88.72 on the dataset conll-2003",
+ "inputs": {
+ "use_cache": true,
+ "dataset_type": "sequence_tagging",
+ "tagging_scheme": "BIOES",
+ "data_paths": {
+ "train_data_path": "./dataset/slot_tagging/conll_2003/eng.train.tsv",
+ "valid_data_path": "./dataset/slot_tagging/conll_2003/eng.testa.tsv",
+ "test_data_path": "./dataset/slot_tagging/conll_2003/eng.testb.tsv",
+ "pre_trained_emb": "./dataset/GloVe/glove.6B.100d.txt"
+ },
+ "add_start_end_for_seq": false,
+ "file_header": {
+ "word": 0,
+ "tag": 1
+ },
+ "model_inputs": {
+ "words": ["word"]
+ },
+ "target": ["tag"]
+ },
+ "outputs":{
+ "save_base_dir": "./models/slot_tagging_wcnn_crf/",
+ "model_name": "model.nb",
+ "train_log_name": "train.log",
+ "test_log_name": "test.log",
+ "predict_log_name": "predict.log",
+ "predict_fields": ["prediction"],
+ "predict_output_name": "predict.tsv",
+ "cache_dir": ".cache.slot_tagging_wcnn_crf/"
+ },
+ "training_params": {
+ "vocabulary": {
+ "min_word_frequency": 1
+ },
+ "cpu_num_workers": 4,
+ "optimizer": {
+ "name": "SGD",
+ "params": {
+ "lr": 0.005,
+ "weight_decay": 1e-8
+ }
+ },
+ "lr_decay": 0.95,
+ "minimum_lr": 0.00001,
+ "epoch_start_lr_decay": 1,
+ "use_gpu": true,
+ "batch_size": 10,
+ "batch_num_to_show_results": 500,
+ "max_epoch": 100,
+ "valid_times_per_epoch": 1
+ },
+ "architecture":[
+ {
+ "layer": "Embedding",
+ "weight_on_gpu": true,
+ "conf": {
+ "word": {
+ "cols": ["word"],
+ "dim": 100
+ }
+ }
+ },
+ {
+ "layer_id": "emb_dropout",
+ "layer": "Dropout",
+ "conf": {
+ "dropout": 0.2
+ },
+ "inputs": ["words"]
+ },
+ {
+ "layer_id": "Linear1",
+ "layer": "Linear",
+ "conf": {
+ "hidden_dim": [200],
+ "activation": "Tanh",
+ "batch_norm": false,
+ "last_hidden_activation": true,
+ "last_hidden_softmax": false
+ },
+ "inputs": ["emb_dropout"]
+ },
+ {
+ "layer_id": "Conv1",
+ "layer": "Conv",
+ "conf": {
+ "stride": 1,
+ "window_size": 3,
+ "output_channel_num": 200,
+ "batch_norm": true,
+ "activation": "ReLU",
+ "padding_type": "SAME",
+ "dropout": 0.2,
+ "remind_lengths": true
+ },
+ "inputs": ["Linear1"]
+ },
+
+ {
+ "layer_id": "Conv2",
+ "layer": "Conv",
+ "conf": {
+ "stride": 1,
+ "window_size": 3,
+ "output_channel_num": 200,
+ "batch_norm": true,
+ "activation": "ReLU",
+ "padding_type": "SAME",
+ "dropout": 0.2,
+ "remind_lengths": true
+ },
+ "inputs": ["Conv1"]
+ },
+
+ {
+ "layer_id": "Conv3",
+ "layer": "Conv",
+ "conf": {
+ "stride": 1,
+ "window_size": 3,
+ "output_channel_num": 200,
+ "batch_norm": true,
+ "activation": "ReLU",
+ "padding_type": "SAME",
+ "dropout": 0.2,
+ "remind_lengths": true
+ },
+ "inputs": ["Conv2"]
+ },
+
+ {
+ "layer_id": "Conv4",
+ "layer": "Conv",
+ "conf": {
+ "stride": 1,
+ "window_size": 3,
+ "output_channel_num": 200,
+ "batch_norm": true,
+ "activation": "ReLU",
+ "padding_type": "SAME",
+ "dropout": 0.2,
+ "remind_lengths": true
+ },
+ "inputs": ["Conv3"]
+ },
+
+ {
+ "layer_id": "rep2tag",
+ "layer": "Linear",
+ "conf": {
+ "hidden_dim": "#target#",
+ "activation": "PReLU",
+ "batch_norm": false,
+ "last_hidden_activation": false,
+ "last_hidden_softmax": false
+ },
+ "inputs": ["Conv4"]
+ },
+ {
+ "output_layer_flag": true,
+ "layer_id": "output",
+ "layer": "CRF",
+ "conf": {},
+ "inputs": ["rep2tag"]
+ }
+ ],
+ "loss": {
+ "losses": [
+ {
+ "type": "CRFLoss",
+ "conf": {
+
+ },
+ "inputs": ["output","tag"]
+ }
+ ]
+ },
+ "metrics": ["seq_tag_f1","seq_tag_accuracy"]
+}
\ No newline at end of file
diff --git a/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_wlstm_crf.json b/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_wlstm_crf.json
new file mode 100644
index 0000000..73c048a
--- /dev/null
+++ b/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_wlstm_crf.json
@@ -0,0 +1,124 @@
+{
+ "license": "Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license.",
+ "tool_version": "1.1.0",
+ "model_description": "This model is used for sequence tagging task. It achieved a f1-score of 89.34 on the dataset conll-2003",
+ "inputs": {
+ "use_cache": true,
+ "dataset_type": "sequence_tagging",
+ "tagging_scheme": "BIOES",
+ "data_paths": {
+ "train_data_path": "./dataset/slot_tagging/conll_2003/eng.train.tsv",
+ "valid_data_path": "./dataset/slot_tagging/conll_2003/eng.testa.tsv",
+ "test_data_path": "./dataset/slot_tagging/conll_2003/eng.testb.tsv",
+ "pre_trained_emb": "./dataset/GloVe/glove.6B.100d.txt"
+ },
+ "add_start_end_for_seq": false,
+ "file_header": {
+ "word": 0,
+ "tag": 1
+ },
+ "model_inputs": {
+ "words": ["word"]
+ },
+ "target": ["tag"]
+ },
+ "outputs":{
+ "save_base_dir": "./models/slot_tagging_wlstm_crf/",
+ "model_name": "model.nb",
+ "train_log_name": "train.log",
+ "test_log_name": "test.log",
+ "predict_log_name": "predict.log",
+ "predict_fields": ["prediction"],
+ "predict_output_name": "predict.tsv",
+ "cache_dir": ".cache.slot_tagging_wlstm_crf/"
+ },
+ "training_params": {
+ "vocabulary": {
+ "min_word_frequency": 1
+ },
+ "cpu_num_workers": 4,
+ "optimizer": {
+ "name": "SGD",
+ "params": {
+ "lr": 0.015,
+ "weight_decay": 1e-8
+ }
+ },
+ "lr_decay": 0.95,
+ "minimum_lr": 0.00001,
+ "epoch_start_lr_decay": 1,
+ "use_gpu": true,
+ "batch_size": 10,
+ "batch_num_to_show_results": 500,
+ "max_epoch": 100,
+ "valid_times_per_epoch": 1
+ },
+ "architecture":[
+ {
+ "layer": "Embedding",
+ "weight_on_gpu": true,
+ "conf": {
+ "word": {
+ "cols": ["word"],
+ "dim": 100
+ }
+ }
+ },
+ {
+ "layer_id": "emb_dropout",
+ "layer": "Dropout",
+ "conf": {
+ "dropout": 0.4
+ },
+ "inputs": ["words"]
+ },
+ {
+ "layer_id": "sentence_BiLSTM",
+ "layer": "BiLSTM",
+ "conf": {
+ "hidden_dim": 100,
+ "num_layers": 1
+ },
+ "inputs": ["emb_dropout"]
+ },
+ {
+ "layer_id": "lstm_dropout",
+ "layer": "Dropout",
+ "conf": {
+ "dropout": 0.4
+ },
+ "inputs": ["sentence_BiLSTM"]
+ },
+ {
+ "layer_id": "rep2tag",
+ "layer": "Linear",
+ "conf": {
+ "hidden_dim": "#target#",
+ "activation": "PReLU",
+ "batch_norm": false,
+ "last_hidden_activation": false,
+ "last_hidden_softmax": false
+ },
+ "inputs": ["lstm_dropout"]
+ },
+ {
+ "output_layer_flag": true,
+ "layer_id": "output",
+ "layer": "CRF",
+ "conf": {},
+ "inputs": ["rep2tag"]
+ }
+ ],
+ "loss": {
+ "losses": [
+ {
+ "type": "CRFLoss",
+ "conf": {
+
+ },
+ "inputs": ["output","tag"]
+ }
+ ]
+ },
+ "metrics": ["seq_tag_f1","seq_tag_accuracy"]
+}
\ No newline at end of file
diff --git a/predict.py b/predict.py
index 3e96f11..9c1455d 100644
--- a/predict.py
+++ b/predict.py
@@ -34,13 +34,46 @@ def main(params):
lm = LearningMachine('predict', conf, problem, vocab_info=None, initialize=False, use_gpu=conf.use_gpu)
lm.load_model(conf.previous_model_path)
- logging.info('Predicting %s with the model saved at %s' % (conf.predict_data_path, conf.previous_model_path))
- lm.predict(conf.predict_data_path, conf.predict_output_path, conf.predict_file_columns, conf.predict_fields)
- logging.info("Predict done! The predict result: %s" % conf.predict_output_path)
+ if params.predict_mode == 'batch':
+ logging.info('Predicting %s with the model saved at %s' % (conf.predict_data_path, conf.previous_model_path))
+ if params.predict_mode == 'batch':
+ lm.predict(conf.predict_data_path, conf.predict_output_path, conf.predict_file_columns, conf.predict_fields)
+ logging.info("Predict done! The predict result: %s" % conf.predict_output_path)
+ elif params.predict_mode == 'interactive':
+ print('='*80)
+ task_type = str(ProblemTypes[problem.problem_type]).split('.')[1]
+ sample_format = list(conf.predict_file_columns.keys())
+ target_ = conf.conf['inputs'].get('target', None)
+ target_list = list(target_) if target_ else []
+ for single_element in sample_format[:]:
+ if single_element in target_list:
+ sample_format.remove(single_element)
+ predict_file_columns = {}
+ for index, single in enumerate(sample_format):
+ predict_file_columns[single] = index
+ print('Enabling Interactive Inference Mode for %s Task...' % (task_type.upper()))
+ print('%s Task Interactive. The sample format is <%s>' % (task_type.upper(), ', '.join(sample_format)))
+ case_cnt = 1
+ while True:
+ print('Case%d:' % case_cnt)
+ sample = []
+ for single in sample_format:
+ temp_ = input('\t%s: ' % single)
+ if temp_.lower() == 'exit':
+ exit(0)
+ sample.append(temp_)
+ sample = '\t'.join(sample)
+ result = lm.interactive([sample], predict_file_columns, conf.predict_fields, params.predict_mode)
+ print('\tInference result: %s' % result)
+ case_cnt += 1
+ else:
+ raise Exception('Predict mode support interactive|batch, get %s' % params.predict_mode)
+
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Prediction')
parser.add_argument("--conf_path", type=str, help="configuration path")
+ parser.add_argument("--predict_mode", type=str, default='batch', help='interactive|batch')
parser.add_argument("--predict_data_path", type=str, help='specify another predict data path, instead of the one defined in configuration file')
parser.add_argument("--previous_model_path", type=str, help='load model trained previously.')
parser.add_argument("--predict_output_path", type=str, help='specify another prediction output path, instead of conf[outputs][save_base_dir] + conf[outputs][predict_output_name] defined in configuration file')
diff --git a/problem.py b/problem.py
index 2610da8..cc150d5 100644
--- a/problem.py
+++ b/problem.py
@@ -12,9 +12,9 @@ nltk.download('stopwords', quiet=True)
from utils.BPEEncoder import BPEEncoder
import os
import pickle as pkl
-from utils.common_utils import load_from_pkl, dump_to_pkl
+from utils.common_utils import load_from_pkl, dump_to_pkl, load_from_json, dump_to_json, prepare_dir, md5
-from settings import ProblemTypes
+from settings import ProblemTypes, Setting as st
import math
from utils.ProcessorsScheduler import ProcessorsScheduler
@@ -65,6 +65,11 @@ class Problem():
target_with_start, target_with_end, target_with_unk, target_with_pad, same_length = (False, ) * 5
with_bos_eos = False
+ if ProblemTypes[problem_type] == ProblemTypes.sequence_tagging:
+ target_with_start = False
+ target_with_end = False
+ target_with_unk = False
+
self.lowercase = lowercase
self.problem_type = problem_type
self.tagging_scheme = tagging_scheme
@@ -107,24 +112,21 @@ class Problem():
else:
return None
- def get_data_generator_from_file(self, data_path_list, file_with_col_header, chunk_size=1000000):
- # NOTE: file_path is a list type
- for single_path in data_path_list:
- data_list = list()
- if single_path is not None:
- with open(single_path, "r", encoding='utf-8') as f:
- if file_with_col_header:
- f.readline()
- for index, line in enumerate(f):
- line = line.rstrip()
- if not line:
- break
- data_list.append(line)
- if (index + 1) % chunk_size == 0:
- yield data_list
- data_list = list()
- if len(data_list) > 0:
- yield data_list
+ def get_data_generator_from_file(self, data_path, file_with_col_header, chunk_size=1000000):
+ data_list = list()
+ with open(data_path, "r", encoding='utf-8') as f:
+ if file_with_col_header:
+ f.readline()
+ for index, line in enumerate(f):
+ line = line.rstrip()
+ if not line:
+ break
+ data_list.append(line)
+ if (index + 1) % chunk_size == 0:
+ yield data_list
+ data_list = list()
+ if len(data_list) > 0:
+ yield data_list
def build_training_data_list(self, training_data_list, file_columns, input_types, answer_column_name, bpe_encoder=None):
docs = dict() # docs of each type of input
@@ -162,7 +164,10 @@ class Problem():
line_split[i] = self.text_preprocessor.preprocess(line_split[i])
if col_index_types[i] == 'word':
- token_list = self.tokenizer.tokenize(line_split[i])
+ if ProblemTypes[self.problem_type] == ProblemTypes.sequence_tagging:
+ token_list = line_split[i].split(" ")
+ else:
+ token_list = self.tokenizer.tokenize(line_split[i])
docs[col_index_types[i]].append(token_list)
if 'char' in docs:
# add char
@@ -218,11 +223,11 @@ class Problem():
def build(self, data_path_list, file_columns, input_types, file_with_col_header, answer_column_name, word2vec_path=None, word_emb_dim=None,
format=None, file_type=None, involve_all_words=None, file_format="tsv", show_progress=True,
- cpu_num_workers=-1, max_vocabulary=800000, word_frequency=3):
+ cpu_num_workers=-1, max_vocabulary=800000, word_frequency=3, max_building_lines=1000*1000):
"""
Args:
- training_data_path:
+ data_path_list:
file_columns: {
"word1": 0,
"word2": 1,
@@ -260,39 +265,29 @@ class Problem():
"""
# parameter check
- if not word2vec_path:
- word_emb_dim, format, file_type, involve_all_words = None, None, None, None
-
- if 'bpe' in input_types:
- try:
- bpe_encoder = BPEEncoder(input_types['bpe']['bpe_path'])
- except KeyError:
- raise Exception('Please define a bpe path at the embedding layer.')
- else:
- bpe_encoder = None
-
+ bpe_encoder = self._check_bpe_encoder(input_types)
self.file_column_num = len(file_columns)
- progress = self.get_data_generator_from_file(data_path_list, file_with_col_header)
- preprocessed_data_generator = self.build_training_multi_processor(progress, cpu_num_workers, file_columns, input_types, answer_column_name, bpe_encoder=bpe_encoder)
-
- # update symbol universe
- total_cnt_legal, total_cnt_illegal = 0, 0
- for docs, target_docs, cnt_legal, cnt_illegal in tqdm(preprocessed_data_generator):
- total_cnt_legal += cnt_legal
- total_cnt_illegal += cnt_illegal
- # input_type
- for input_type in input_types:
- self.input_dicts[input_type].update(docs[input_type])
-
- # problem_type
- if ProblemTypes[self.problem_type] == ProblemTypes.classification or \
- ProblemTypes[self.problem_type] == ProblemTypes.sequence_tagging:
- self.output_dict.update(list(target_docs.values())[0])
- elif ProblemTypes[self.problem_type] == ProblemTypes.regression or \
- ProblemTypes[self.problem_type] == ProblemTypes.mrc:
- pass
- logging.info("Corpus imported: %d legal lines, %d illegal lines." % (total_cnt_legal, total_cnt_illegal))
+ for data_path in data_path_list:
+ if data_path:
+ progress = self.get_data_generator_from_file(data_path, file_with_col_header, chunk_size=max_building_lines)
+ preprocessed_data_generator= self.build_training_multi_processor(progress, cpu_num_workers, file_columns, input_types, answer_column_name, bpe_encoder=bpe_encoder)
+
+ # update symbol universe
+ docs, target_docs, cnt_legal, cnt_illegal = next(preprocessed_data_generator)
+
+ # input_type
+ for input_type in input_types:
+ self.input_dicts[input_type].update(docs[input_type])
+
+ # problem_type
+ if ProblemTypes[self.problem_type] == ProblemTypes.classification or \
+ ProblemTypes[self.problem_type] == ProblemTypes.sequence_tagging:
+ self.output_dict.update(list(target_docs.values())[0])
+ elif ProblemTypes[self.problem_type] == ProblemTypes.regression or \
+ ProblemTypes[self.problem_type] == ProblemTypes.mrc:
+ pass
+ logging.info("[Building Dictionary] in %s at most %d lines imported: %d legal lines, %d illegal lines." % (data_path, max_building_lines, cnt_legal, cnt_illegal))
# build dictionary
for input_type in input_types:
@@ -300,6 +295,11 @@ class Problem():
logging.info("%d types in %s column" % (self.input_dicts[input_type].cell_num(), input_type))
if self.output_dict:
self.output_dict.build(threshold=0)
+ if ProblemTypes[self.problem_type] == ProblemTypes.sequence_tagging:
+ self.output_dict.cell_id_map[""] = len(self.output_dict.cell_id_map)
+ self.output_dict.id_cell_map[len(self.output_dict.id_cell_map)] = ""
+ self.output_dict.cell_id_map[""] = len(self.output_dict.cell_id_map)
+ self.output_dict.id_cell_map[len(self.output_dict.id_cell_map)] = ""
logging.info("%d types in target column" % (self.output_dict.cell_num()))
logging.debug("training data dict built")
@@ -313,7 +313,12 @@ class Problem():
self.input_dicts['word'].update([list(word_emb_dict.keys())])
self.input_dicts['word'].build(threshold=0, max_vocabulary_num=len(word_emb_dict))
else:
- word_emb_dict = load_embedding(word2vec_path, word_emb_dim, format, file_type, with_head=False, word_set=self.input_dicts['word'].cell_id_map.keys())
+ extend_vocabulary = set()
+ for single_word in self.input_dicts['word'].cell_id_map.keys():
+ extend_vocabulary.add(single_word)
+ if single_word.lower() != single_word:
+ extend_vocabulary.add(single_word.lower())
+ word_emb_dict = load_embedding(word2vec_path, word_emb_dim, format, file_type, with_head=False, word_set=extend_vocabulary)
for word in word_emb_dict:
loaded_emb_dim = len(word_emb_dict[word])
@@ -329,11 +334,15 @@ class Problem():
word_emb_matrix = []
unknown_word_count = 0
+ scale = np.sqrt(3.0 / word_emb_dim)
for i in range(self.input_dicts['word'].cell_num()):
- if self.input_dicts['word'].id_cell_map[i] in word_emb_dict:
- word_emb_matrix.append(word_emb_dict[self.input_dicts['word'].id_cell_map[i]])
+ single_word = self.input_dicts['word'].id_cell_map[i]
+ if single_word in word_emb_dict:
+ word_emb_matrix.append(word_emb_dict[single_word])
+ elif single_word.lower() in word_emb_dict:
+ word_emb_matrix.append(word_emb_dict[single_word.lower()])
else:
- word_emb_matrix.append(word_emb_dict[''])
+ word_emb_matrix.append(np.random.uniform(-scale, scale, word_emb_dim))
unknown_word_count += 1
word_emb_matrix = np.array(word_emb_matrix)
logging.info("word embedding matrix shape:(%d, %d); unknown word count: %d;" %
@@ -382,8 +391,6 @@ class Problem():
def encode_data_multi_processor(self, data_generator, cpu_num_workers, file_columns, input_types, object_inputs,
answer_column_name, min_sentence_len, extra_feature, max_lengths=None, fixed_lengths=None, file_format="tsv", bpe_encoder=None):
-
-
for data in data_generator:
scheduler = ProcessorsScheduler(cpu_num_workers)
func_args = (data, file_columns, input_types, object_inputs,
@@ -403,7 +410,7 @@ class Problem():
yield output_data, lengths, target, cnt_legal, cnt_illegal
def encode_data_list(self, data_list, file_columns, input_types, object_inputs, answer_column_name, min_sentence_len,
- extra_feature, max_lengths=None, fixed_lengths=None, file_format="tsv", bpe_encoder=None):
+ extra_feature, max_lengths=None, fixed_lengths=None, file_format="tsv", bpe_encoder=None, predict_mode='batch'):
data = dict()
lengths = dict()
char_emb = True if 'char' in [single_input_type.lower() for single_input_type in input_types] else False
@@ -423,6 +430,9 @@ class Problem():
type_branches = dict() # branch of input type, e.g. type_branches['query_index'] = 'query'
+ # for char: don't split these word
+ word_no_split = ['', '', '', '']
+
for branch in object_inputs:
data[branch] = dict()
lengths[branch] = dict()
@@ -461,11 +471,14 @@ class Problem():
line_split = line.rstrip().split('\t')
cnt_all += 1
if len(line_split) != len(file_columns):
- # logging.warning("Current line is inconsistent with configuration/inputs/file_header. Ingore now. %s" % line)
- cnt_illegal += 1
- if cnt_illegal / cnt_all > 0.33:
- raise PreprocessError('The illegal data is too much. Please check the number of data columns or text token version.')
- continue
+ if predict_mode == 'batch':
+ cnt_illegal += 1
+ if cnt_illegal / cnt_all > 0.33:
+ raise PreprocessError('The illegal data is too much. Please check the number of data columns or text token version.')
+ continue
+ else:
+ print('\tThe case is illegal! Please check your case and input again!')
+ return [None]*5
# cnt_legal += 1
length_appended_set = set() # to store branches whose length have been appended to lengths[branch]
@@ -496,7 +509,7 @@ class Problem():
data[extra_info_type]['extra_passage_text'].append(line_split[i])
data[extra_info_type]['extra_passage_token_offsets'].append(passage_token_offsets)
else:
- if extra_feature == False:
+ if extra_feature == False and ProblemTypes[self.problem_type] != ProblemTypes.sequence_tagging:
tokens = self.tokenizer.tokenize(line_split[i])
else:
tokens = line_split[i].split(' ')
@@ -505,6 +518,28 @@ class Problem():
else:
tokens = line_split[i].split(' ')
+ # for sequence labeling task, the length must be record the corpus truth length
+ if ProblemTypes[self.problem_type] == ProblemTypes.sequence_tagging:
+ if not branch in length_appended_set:
+ lengths[branch]['sentence_length'].append(len(tokens))
+ length_appended_set.add(branch)
+ else:
+ if len(tokens) != lengths[branch]['sentence_length'][-1]:
+ # logging.warning(
+ # "The length of inputs are not consistent. Ingore now. %s" % line)
+ cnt_illegal += 1
+ if cnt_illegal / cnt_all > 0.33:
+ raise PreprocessError(
+ "The illegal data is too much. Please check the number of data columns or text token version.")
+ lengths[branch]['sentence_length'].pop()
+ true_len = len(lengths[branch]['sentence_length'])
+ # need delete the last example
+ check_list = ['data', 'lengths', 'target']
+ for single_check in check_list:
+ single_check = eval(single_check)
+ self.delete_example(single_check, true_len)
+ break
+
if fixed_lengths and type_branches[input_type[0]] in fixed_lengths:
if len(tokens) >= fixed_lengths[type_branches[input_type[0]]]:
tokens = tokens[:fixed_lengths[type_branches[input_type[0]]]]
@@ -520,32 +555,45 @@ class Problem():
if self.with_bos_eos is True:
tokens = [''] + tokens + [''] # so that source_with_start && source_with_end should be True
- if not branch in length_appended_set:
- lengths[branch]['sentence_length'].append(len(tokens))
- length_appended_set.add(branch)
- else:
- if len(tokens) != lengths[branch]['sentence_length'][-1]:
- # logging.warning(
- # "The length of inputs are not consistent. Ingore now. %s" % line)
- cnt_illegal += 1
- if cnt_illegal / cnt_all > 0.33:
- raise PreprocessError("The illegal data is too much. Please check the number of data columns or text token version.")
- lengths[branch]['sentence_length'].pop()
- true_len = len(lengths[branch]['sentence_length'])
- # need delete the last example
- check_list = ['data', 'lengths', 'target']
- for single_check in check_list:
- single_check = eval(single_check)
- self.delete_example(single_check, true_len)
- break
+ # for other tasks, length must be same as data length because fix/max_length operation
+ if not ProblemTypes[self.problem_type] == ProblemTypes.sequence_tagging:
+ if not branch in length_appended_set:
+ lengths[branch]['sentence_length'].append(len(tokens))
+ length_appended_set.add(branch)
+ else:
+ if len(tokens) != lengths[branch]['sentence_length'][-1]:
+ # logging.warning(
+ # "The length of inputs are not consistent. Ingore now. %s" % line)
+ cnt_illegal += 1
+ if cnt_illegal / cnt_all > 0.33:
+ raise PreprocessError(
+ "The illegal data is too much. Please check the number of data columns or text token version.")
+ lengths[branch]['sentence_length'].pop()
+ true_len = len(lengths[branch]['sentence_length'])
+ # need delete the last example
+ check_list = ['data', 'lengths', 'target']
+ for single_check in check_list:
+ single_check = eval(single_check)
+ self.delete_example(single_check, true_len)
+ break
for single_input_type in input_type:
if 'char' in single_input_type:
temp_word_char = []
temp_word_length = []
for single_token in tokens:
- temp_word_char.append(self.input_dicts[type2cluster[single_input_type]].lookup(single_token))
- temp_word_length.append(len(single_token))
+ if single_token in word_no_split:
+ # temp_word_length.append(1)
+ temp_id = [self.input_dicts[type2cluster[single_input_type]].id(single_token)]
+ else:
+ temp_id = self.input_dicts[type2cluster[single_input_type]].lookup(single_token)
+ if fixed_lengths and 'word' in fixed_lengths:
+ if len(temp_id) >= fixed_lengths['word']:
+ temp_id = temp_id[:fixed_lengths['word']]
+ else:
+ temp_id = temp_id + [self.input_dicts[type2cluster[single_input_type]].id('')] * (fixed_lengths['word'] - len(temp_id))
+ temp_word_char.append(temp_id)
+ temp_word_length.append(len(temp_id))
data[branch][single_input_type].append(temp_word_char)
lengths[branch]['word_length'].append(temp_word_length)
else:
@@ -625,7 +673,7 @@ class Problem():
def encode(self, data_path, file_columns, input_types, file_with_col_header, object_inputs, answer_column_name,
min_sentence_len, extra_feature, max_lengths=None, fixed_lengths=None, file_format="tsv", show_progress=True,
- cpu_num_workers = -1):
+ cpu_num_workers=-1, chunk_size=1000*1000):
"""
Args:
@@ -701,22 +749,16 @@ class Problem():
target: [...]
"""
- if 'bpe' in input_types:
- try:
- bpe_encoder = BPEEncoder(input_types['bpe']['bpe_path'])
- except KeyError:
- raise Exception('Please define a bpe path at the embedding layer.')
- else:
- bpe_encoder = None
+ bpe_encoder = self._check_bpe_encoder(input_types)
- progress = self.get_data_generator_from_file([data_path], file_with_col_header)
- encoder_generator = self.encode_data_multi_processor(progress, cpu_num_workers,
+ progress = self.get_data_generator_from_file(data_path, file_with_col_header, chunk_size=chunk_size)
+ encode_generator = self.encode_data_multi_processor(progress, cpu_num_workers,
file_columns, input_types, object_inputs, answer_column_name, min_sentence_len, extra_feature, max_lengths,
fixed_lengths, file_format, bpe_encoder=bpe_encoder)
data, lengths, target = dict(), dict(), dict()
cnt_legal, cnt_illegal = 0, 0
- for temp_data, temp_lengths, temp_target, temp_cnt_legal, temp_cnt_illegal in tqdm(encoder_generator):
+ for temp_data, temp_lengths, temp_target, temp_cnt_legal, temp_cnt_illegal in tqdm(encode_generator):
data = self._merge_encode_data(data, temp_data)
lengths = self._merge_encode_lengths(lengths, temp_lengths)
target = self._merge_target(target, temp_target)
@@ -726,6 +768,59 @@ class Problem():
logging.info("%s: %d legal samples, %d illegal samples" % (data_path, cnt_legal, cnt_illegal))
return data, lengths, target
+ def build_encode_cache(self, conf, file_format="tsv"):
+ logging.info("[Cache] building encoding cache")
+ build_encode_cache_generator = self.get_encode_generator(conf, build_cache=True, file_format=file_format)
+ for _ in build_encode_cache_generator:
+ continue
+ logging.info("[Cache] encoding is saved to %s" % conf.encoding_cache_dir)
+
+ def get_encode_generator(self, conf, build_cache=True, file_format="tsv"):
+ # parameter check
+ if build_cache:
+ assert conf.encoding_cache_dir, 'There is no property encoding_cache_dir in object conf'
+ assert conf.encoding_cache_index_file_path, 'There is no property encoding_cache_index_file_path in object conf'
+ assert conf.encoding_cache_index_file_md5_path, 'There is no property encoding_cache_index_file_md5_path in object conf'
+
+ bpe_encoder = self._check_bpe_encoder(conf.input_types)
+ data_generator = self.get_data_generator_from_file(conf.train_data_path, conf.file_with_col_header, chunk_size=conf.chunk_size)
+ encode_generator = self.encode_data_multi_processor(data_generator, conf.cpu_num_workers,
+ conf.file_columns, conf.input_types, conf.object_inputs, conf.answer_column_name,
+ conf.min_sentence_len, conf.extra_feature, conf.max_lengths,
+ conf.fixed_lengths, file_format, bpe_encoder=bpe_encoder)
+
+ file_index = []
+ total_cnt_legal, total_cnt_illegal = 0, 0
+ for part_number, encode_data in enumerate(encode_generator):
+ data, lengths, target, cnt_legal, cnt_illegal = encode_data
+ if build_cache:
+ total_cnt_legal = total_cnt_legal + cnt_legal
+ total_cnt_illegal = total_cnt_illegal + cnt_illegal
+ file_name = st.cencoding_file_name_pattern % (part_number)
+ file_path = os.path.join(conf.encoding_cache_dir, file_name)
+ dump_to_pkl((data, lengths, target), file_path)
+ file_index.append([file_name, md5([file_path])])
+ logging.info("Up to now, in %s: %d legal samples, %d illegal samples" % (conf.train_data_path, total_cnt_legal, total_cnt_illegal))
+ yield data, lengths, target
+
+ if build_cache:
+ cache_index = dict()
+ cache_index[st.cencoding_key_index] = file_index
+ cache_index[st.cencoding_key_legal_cnt] = total_cnt_legal
+ cache_index[st.cencoding_key_illegal_cnt] = total_cnt_illegal
+ dump_to_json(cache_index, conf.encoding_cache_index_file_path)
+ dump_to_json(md5([conf.encoding_cache_index_file_path]), conf.encoding_cache_index_file_md5_path)
+
+ @staticmethod
+ def _check_bpe_encoder(input_types):
+ bpe_encoder = None
+ if 'bpe' in input_types:
+ try:
+ bpe_encoder = BPEEncoder(input_types['bpe']['bpe_path'])
+ except KeyError:
+ raise Exception('Please define a bpe path at the embedding layer.')
+ return bpe_encoder
+
def decode(self, model_output, lengths=None, batch_data=None):
""" decode the model output, either a batch of output or a single output
diff --git a/register_block.py b/register_block.py
new file mode 100644
index 0000000..c4956cb
--- /dev/null
+++ b/register_block.py
@@ -0,0 +1,63 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT license.
+
+import os
+import argparse
+
+
+def get_block_path(block_name, path='./block_zoo'):
+ ''' find the block_name.py file in block_zoo
+ Args:
+ block_name: the name need to be registered. eg. BiLSTM/ CRF
+ '''
+ get_dir = os.listdir(path)
+ for single in get_dir:
+ sub_dir = os.path.join(path, single)
+ if os.path.isdir(sub_dir):
+ result = get_block_path(block_name, path=sub_dir)
+ if result:
+ return result
+ else:
+ if block_name + '.py' == single:
+ return sub_dir
+ return None
+
+
+def write_file(new_block_path, file_path):
+ init_path = os.path.join(file_path, '__init__.py')
+ diff = new_block_path[len(file_path):].split('/')
+ if diff[0] == '':
+ diff.pop(0)
+ # delete '.py' in the last str
+ diff[-1] = diff[-1][:-3]
+ line = 'from .' + diff[0] + ' import ' + diff[-1] + ', ' + diff[-1] + 'Conf'
+ with open(init_path, 'a', encoding='utf-8') as fin:
+ fin.write('\n' + line + '\n')
+
+
+def register(block_name, new_block_path):
+ ''' Add import code in the corresponding file. eg. block_zoo/__init__.py or block_zoo/subdir/__init__.py
+
+ '''
+ # check if block exist or not
+ if new_block_path:
+ block_path_split = new_block_path.split('/')
+ for i in range(len(block_path_split)-1, 1, -1):
+ # need_add_file.append(os.path.join('/'.join(block_path_split[:i])))
+ write_file(new_block_path, os.path.join('/'.join(block_path_split[:i])))
+ print('The block %s is registered successfully.' % block_name)
+ else:
+ raise Exception('The %s.py file does not exist! Please check your program or file name.' % block_name)
+
+
+def main(params):
+ new_block_path = get_block_path(params.block_name)
+ register(params.block_name, new_block_path)
+
+
+if __name__ == '__main__':
+ parse = argparse.ArgumentParser(description='Register Block')
+ parse.add_argument("--block_name", type=str, help="block name want to be registered")
+ params, _ = parse.parse_known_args()
+ assert params.block_name, 'Please specify a block_name via --block_name'
+ main(params)
diff --git a/settings.py b/settings.py
index df84c99..cf864b5 100644
--- a/settings.py
+++ b/settings.py
@@ -18,11 +18,11 @@ LanguageTypes = Enum('LanguageTypes', ('english', 'chinese'))
ProblemTypes = Enum('ProblemTypes', ('sequence_tagging', 'classification', 'regression', 'mrc'))
# Supported sequence tagging scheme
-TaggingSchemes = Enum('TaggingSchemes', ('BIO'))
+TaggingSchemes = Enum('TaggingSchemes', ('BIO', 'BIOES'))
# supported metrics
SupportedMetrics = {
- ProblemTypes.sequence_tagging: set(['seq_tag_f1', 'accuracy']),
+ ProblemTypes.sequence_tagging: set(['seq_tag_f1', 'seq_tag_accuracy']),
ProblemTypes.classification: set(['auc', 'accuracy', 'f1', 'macro_f1', 'macro_precision', 'macro_recall', 'micro_f1', 'micro_precision', 'micro_recall', 'weighted_f1', 'weighted_precision', 'weighted_recall']),
# In addition, for auc in multi-type classification,
# if there is a type named 1, auc@1 means use 1 as the positive label
@@ -53,3 +53,27 @@ DefaultPredictionFields = {
# nltk's models
nltk.data.path.append(os.path.join(os.getcwd(), 'dataset', 'nltk_data'))
+
+class Constant(type):
+ def __setattr__(self, name, value):
+ raise AttributeError("Class %s can not be modified"%(self.__name__))
+
+class ConstantStatic(metaclass=Constant):
+ def __init__(self, *args,**kwargs):
+ raise Exception("Class %s can not be instantiated"%(self.__class__.__name__))
+
+
+class Setting(ConstantStatic):
+ # cache
+
+ ## cencoding (cache_encoding)
+ cencodig_index_file_name = 'index.json'
+ cencoding_index_md5_file_name = 'index_md5.json'
+ cencoding_file_name_pattern = 'encoding_cache_%s.pkl'
+ cencoding_key_finish = 'finish'
+ cencoding_key_index = 'index'
+ cencoding_key_legal_cnt = 'legal_line_cnt'
+ cencoding_key_illegal_cnt = 'illegal_line_cnt'
+
+
+
diff --git a/test.py b/test.py
index a1ef8c8..c3df2b1 100644
--- a/test.py
+++ b/test.py
@@ -19,7 +19,7 @@ def main(params):
problem = Problem("test", conf.problem_type, conf.input_types, conf.answer_column_name,
with_bos_eos=conf.add_start_end_for_seq, tagging_scheme=conf.tagging_scheme, tokenizer=conf.tokenizer,
remove_stopwords=conf.remove_stopwords, DBC2SBC=conf.DBC2SBC, unicode_fix=conf.unicode_fix)
-
+
if os.path.isfile(conf.saved_problem_path):
problem.load_problem(conf.saved_problem_path)
logging.info("Problem loaded!")
diff --git a/tools/calculate_AUC.py b/tools/calculate_auc.py
similarity index 100%
rename from tools/calculate_AUC.py
rename to tools/calculate_auc.py
diff --git a/tools/tagging_schemes_converter.py b/tools/tagging_schemes_converter.py
new file mode 100644
index 0000000..f206191
--- /dev/null
+++ b/tools/tagging_schemes_converter.py
@@ -0,0 +1,112 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT license.
+
+import sys
+
+
+def BIO2BIOES(input_labels_list):
+ output_labels_list = []
+ for labels in input_labels_list:
+ new_labels = []
+ sent_len = len(labels)
+ for idx in range(sent_len):
+ if "-" not in labels[idx]:
+ new_labels.append(labels[idx])
+ else:
+ label_type = labels[idx].split('-')[-1]
+ if "B-" in labels[idx]:
+ if (idx == sent_len - 1) or ("I-" not in labels[idx + 1]):
+ new_labels.append("S-"+label_type)
+ else:
+ new_labels.append("B-"+label_type)
+ elif "I-" in labels[idx]:
+ if (idx == sent_len - 1) or ("I-" not in labels[idx + 1]):
+ new_labels.append("E-"+label_type)
+ else:
+ new_labels.append("I-"+label_type)
+ assert len(labels) == len(new_labels)
+ output_labels_list.append(new_labels)
+ return output_labels_list
+
+
+def BIOES2BIO(input_labels_list):
+ output_labels_list = []
+ for labels in input_labels_list:
+ new_labels = []
+ sent_len = len(labels)
+ for idx in range(sent_len):
+ if "-" not in labels[idx]:
+ new_labels.append(labels[idx])
+ else:
+ label_type = labels[idx].split('-')[-1]
+ if "E-" in labels[idx]:
+ new_labels.append("I-" + label_type)
+ elif "S-" in labels[idx]:
+ new_labels.append("B-" + label_type)
+ else:
+ new_labels.append(labels[idx])
+ assert len(labels) == len(new_labels)
+ output_labels_list.append(new_labels)
+ return output_labels_list
+
+
+def IOB2BIO(input_labels_list):
+ output_labels_list = []
+ for labels in input_labels_list:
+ new_labels = []
+ sent_len = len(labels)
+ for idx in range(sent_len):
+ if "I-" in labels[idx]:
+ label_type = labels[idx].split('-')[-1]
+ if (idx == 0) or (labels[idx - 1] == "O") or (label_type != labels[idx - 1].split('-')[-1]):
+ new_labels.append("B-" + label_type)
+ else:
+ new_labels.append(labels[idx])
+ else:
+ new_labels.append(labels[idx])
+ assert len(labels) == len(new_labels)
+ output_labels_list.append(new_labels)
+ return output_labels_list
+
+
+if __name__ == '__main__':
+ '''Convert NER tagging schemes among IOB/BIO/BIOES.
+ For example: if you want to convert the IOB tagging scheme to BIO, then you run as following:
+ python taggingSchemes_Converter.py IOB2BIO input_iob_file output_bio_file
+ Input data format is tsv format.
+ '''
+ input_file_name, output_file_name = sys.argv[2], sys.argv[3]
+ words_list, labels_list, new_labels_list = [], [], []
+ with open(input_file_name, 'r') as input_file:
+ for line in input_file:
+ item = line.rstrip().split('\t')
+ assert len(item) == 2
+ words, labels = item[0].split(' '), item[1].split(' ')
+ if len(words) != len(labels):
+ print("Error line: " + line.rstrip())
+ continue
+ words_list.append(words)
+ labels_list.append(labels)
+
+ if sys.argv[1].upper() == "IOB2BIO":
+ print("Convert IOB -> BIO...")
+ new_labels_list = IOB2BIO(labels_list)
+ elif sys.argv[1].upper() == "BIO2BIOES":
+ print("Convert BIO -> BIOES...")
+ new_labels_list = BIO2BIOES(labels_list)
+ elif sys.argv[1].upper() == "BIOES2BIO":
+ print("Convert BIOES -> BIO...")
+ new_labels_list = BIOES2BIO(labels_list)
+ elif sys.argv[1].upper() == "IOB2BIOES":
+ print("Convert IOB -> BIOES...")
+ tmp_labels_list = IOB2BIO(labels_list)
+ new_labels_list = BIO2BIOES(tmp_labels_list)
+ else:
+ print("Argument error: sys.argv[1] should belongs to \"IOB2BIO/BIO2BIOES/BIOES2BIO/IOB2BIOES\"")
+
+ with open(output_file_name, 'w') as output_file:
+ for index in range(len(words_list)):
+ words, labels = words_list[index], new_labels_list[index]
+ line = " ".join(words) + '\t' + " ".join(labels) + '\n'
+ output_file.write(line)
+
diff --git a/train.py b/train.py
index 4151b75..f43e29f 100644
--- a/train.py
+++ b/train.py
@@ -1,7 +1,7 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT license.
-from settings import ProblemTypes, version
+from settings import ProblemTypes, version, Setting as st
import os
import argparse
@@ -15,7 +15,7 @@ import torch
import torch.nn as nn
from ModelConf import ModelConf
from problem import Problem
-from utils.common_utils import dump_to_pkl, load_from_pkl, prepare_dir
+from utils.common_utils import dump_to_pkl, load_from_pkl, load_from_json, dump_to_json, prepare_dir, md5
from utils.philly_utils import HDFSDirectTransferer
from losses import *
from optimizers import *
@@ -34,33 +34,76 @@ class Cache:
self.dictionary_invalid = True
self.embedding_invalid = True
- # cache_conf
- cache_conf = None
- cache_conf_path = os.path.join(conf.cache_dir, 'conf_cache.json')
- if os.path.isfile(cache_conf_path):
- params_cache = copy.deepcopy(params)
- try:
- cache_conf = ModelConf('cache', cache_conf_path, version, params_cache)
- except Exception as e:
- cache_conf = None
- if cache_conf is None or not self._verify_conf(cache_conf, conf):
- return False
-
- # problem
- if not os.path.isfile(conf.problem_path):
- return False
-
- # embedding
- if conf.emb_pkl_path:
- if not os.path.isfile(conf.emb_pkl_path):
+ if not conf.pretrained_model_path:
+ # cache_conf
+ cache_conf = None
+ cache_conf_path = os.path.join(conf.cache_dir, 'conf_cache.json')
+ if os.path.isfile(cache_conf_path):
+ params_cache = copy.deepcopy(params)
+ try:
+ cache_conf = ModelConf('cache', cache_conf_path, version, params_cache)
+ except Exception as e:
+ cache_conf = None
+ if cache_conf is None or not self._verify_conf(cache_conf, conf):
return False
- self.embedding_invalid = False
+
+ # problem
+ if not os.path.isfile(conf.problem_path):
+ return False
+
+ # embedding
+ if conf.emb_pkl_path:
+ if not os.path.isfile(conf.emb_pkl_path):
+ return False
+ self.embedding_invalid = False
- self.dictionary_invalid = False
+ self.dictionary_invalid = False
+ logging.info('[Cache] dictionary found')
return True
def _check_encoding(self, conf):
+ self.encoding_invalid = True
+ if not conf.pretrained_model_path and self.dictionary_invalid:
+ return False
+
+ # Calculate the MD5 of problem
+ problem_path = conf.problem_path if not conf.pretrained_model_path else conf.saved_problem_path
+ try:
+ conf.problem_md5 = md5([problem_path])
+ except Exception as e:
+ conf.problem_md5 = None
+ logging.info('Can not calculate md5 of problem.pkl from %s'%(problem_path))
+ return False
+
+ # check the valid of encoding cache
+ ## encoding cache dir
+ conf.encoding_cache_dir = os.path.join(conf.cache_dir, conf.train_data_md5 + conf.problem_md5)
+ logging.debug('[Cache] conf.encoding_cache_dir %s' % (conf.encoding_cache_dir))
+ if not os.path.exists(conf.encoding_cache_dir):
+ return False
+
+ ## encoding cache index
+ conf.encoding_cache_index_file_path = os.path.join(conf.encoding_cache_dir, st.cencodig_index_file_name)
+ conf.encoding_cache_index_file_md5_path = os.path.join(conf.encoding_cache_dir, st.cencoding_index_md5_file_name)
+ if not os.path.exists(conf.encoding_cache_index_file_path) or not os.path.exists(conf.encoding_cache_index_file_md5_path):
+ return False
+ if md5([conf.encoding_cache_index_file_path]) != load_from_json(conf.encoding_cache_index_file_md5_path):
+ return False
+ cache_index = load_from_json(conf.encoding_cache_index_file_path)
+
+ ## encoding cache content
+ for index in cache_index[st.cencoding_key_index]:
+ file_name, file_md5 = index[0], index[1]
+ if file_md5 != md5([os.path.join(conf.encoding_cache_dir, file_name)]):
+ return False
+
+ if (st.cencoding_key_legal_cnt in cache_index) and (st.cencoding_key_illegal_cnt in cache_index):
+ conf.encoding_cache_legal_line_cnt = cache_index[st.cencoding_key_legal_cnt]
+ conf.encoding_cache_illegal_line_cnt = cache_index[st.cencoding_key_illegal_cnt]
+
self.encoding_invalid = False
+ logging.info('[Cache] encoding found')
+ logging.info('%s: %d legal samples, %d illegal samples' % (conf.train_data_path, conf.encoding_cache_legal_line_cnt, conf.encoding_cache_illegal_line_cnt))
return True
def check(self, conf, params):
@@ -70,7 +113,7 @@ class Cache:
return
# encoding
if not self._check_encoding(conf):
- self._renew_cache(params, conf.cache_dir)
+ self._renew_cache(params, conf.encoding_cache_dir)
def load(self, conf, problem, emb_matrix):
# load dictionary when (not finetune) and (cache valid)
@@ -81,13 +124,17 @@ class Cache:
logging.info('[Cache] loading dictionary successfully')
if not self.encoding_invalid:
- pass
+ self._prepare_encoding_cache(conf, problem, build=False)
+ logging.info('[Cache] preparing encoding successfully')
return problem, emb_matrix
def save(self, conf, params, problem, emb_matrix):
+ # make cache dir
if not os.path.exists(conf.cache_dir):
os.makedirs(conf.cache_dir)
shutil.copy(params.conf_path, os.path.join(conf.cache_dir, 'conf_cache.json'))
+
+ # dictionary
if self.dictionary_invalid:
if conf.mode == 'philly' and conf.emb_pkl_path.startswith('/hdfs/'):
with HDFSDirectTransferer(conf.problem_path, with_hdfs_command=True) as transferer:
@@ -101,10 +148,11 @@ class Cache:
transferer.pkl_dump(emb_matrix)
else:
dump_to_pkl(emb_matrix, conf.emb_pkl_path)
- logging.info("Embedding matrix saved to %s" % conf.emb_pkl_path)
+ logging.info("[Cache] Embedding matrix saved to %s" % conf.emb_pkl_path)
+ # encoding
if self.encoding_invalid:
- pass
+ self._prepare_encoding_cache(conf, problem, build=params.make_cache_only)
def back_up(self, conf, problem):
cache_bakup_path = os.path.join(conf.save_base_dir, 'necessary_cache/')
@@ -150,6 +198,34 @@ class Cache:
flag = False
return flag
+ def _prepare_encoding_cache(self, conf, problem, build=False):
+ # encoding cache dir
+ problem_path = conf.problem_path if not conf.pretrained_model_path else conf.saved_problem_path
+ conf.problem_md5 = md5([problem_path])
+ conf.encoding_cache_dir = os.path.join(conf.cache_dir, conf.train_data_md5 + conf.problem_md5)
+ if not os.path.exists(conf.encoding_cache_dir):
+ os.makedirs(conf.encoding_cache_dir)
+
+ # encoding cache files
+ conf.encoding_cache_index_file_path = os.path.join(conf.encoding_cache_dir, st.cencodig_index_file_name)
+ conf.encoding_cache_index_file_md5_path = os.path.join(conf.encoding_cache_dir, st.cencoding_index_md5_file_name)
+ conf.load_encoding_cache_generator = self._load_encoding_cache_generator
+
+ if build:
+ prepare_dir(conf.encoding_cache_dir, True, allow_overwrite=True, clear_dir_if_exist=True)
+ problem.build_encode_cache(conf)
+ self.encoding_invalid = False
+
+ if not self.encoding_invalid:
+ cache_index = load_from_json(conf.encoding_cache_index_file_path)
+ conf.encoding_file_index = cache_index[st.cencoding_key_index]
+
+ @staticmethod
+ def _load_encoding_cache_generator(cache_dir, file_index):
+ for index in file_index:
+ file_path = os.path.join(cache_dir, index[0])
+ yield load_from_pkl(file_path)
+
def main(params):
# init
conf = ModelConf("train", params.conf_path, version, params, mode=params.mode)
@@ -172,6 +248,7 @@ def main(params):
# data preprocessing
## build dictionary when (not in finetune model) and (not use cache or cache invalid)
if (not conf.pretrained_model_path) and ((conf.use_cache == False) or cache.dictionary_invalid):
+ logging.info("="*100)
logging.info("Preprocessing... Depending on your corpus size, this step may take a while.")
# modify train_data_path to [train_data_path, valid_data_path, test_data_path]
# remember the test_data may be None
@@ -181,11 +258,7 @@ def main(params):
word_emb_dim=conf.pretrained_emb_dim, format=conf.pretrained_emb_type,
file_type=conf.pretrained_emb_binary_or_text, involve_all_words=conf.involve_all_words_in_pretrained_emb,
show_progress=True if params.mode == 'normal' else False, cpu_num_workers = conf.cpu_num_workers,
- max_vocabulary=conf.max_vocabulary, word_frequency=conf.min_word_frequency)
-
- ## encode rawdata when do not use cache
- if conf.use_cache == False:
- pass
+ max_vocabulary=conf.max_vocabulary, word_frequency=conf.min_word_frequency, max_building_lines=conf.max_building_lines)
# environment preparing
## cache save
@@ -234,9 +307,20 @@ def main(params):
### optimizer
if isinstance(lm.model, nn.DataParallel):
- optimizer = eval(conf.optimizer_name)(list(lm.model.parameters()) + list(lm.model.module.layers['embedding'].get_parameters()), **conf.optimizer_params)
+ if isinstance(lm.model.module.layers['embedding'].embeddings, nn.ModuleDict):
+ optimizer = eval(conf.optimizer_name)(list(lm.model.parameters()), **conf.optimizer_params)
+ else:
+ optimizer = eval(conf.optimizer_name)(
+ list(lm.model.parameters()) + list(lm.model.module.layers['embedding'].get_parameters()),
+ **conf.optimizer_params)
else:
- optimizer = eval(conf.optimizer_name)(list(lm.model.parameters()) + list(lm.model.layers['embedding'].get_parameters()), **conf.optimizer_params)
+ if isinstance(lm.model.layers['embedding'].embeddings, nn.ModuleDict):
+ optimizer = eval(conf.optimizer_name)(
+ list(lm.model.parameters()), **conf.optimizer_params)
+ else:
+ optimizer = eval(conf.optimizer_name)(
+ list(lm.model.parameters()) + list(lm.model.layers['embedding'].get_parameters()),
+ **conf.optimizer_params)
## train
lm.train(optimizer, loss_fn)
diff --git a/utils/common_utils.py b/utils/common_utils.py
index a7c413b..3e790ba 100644
--- a/utils/common_utils.py
+++ b/utils/common_utils.py
@@ -3,6 +3,7 @@
import logging
import pickle as pkl
+import json
import torch
import torch.nn as nn
import os
@@ -49,6 +50,17 @@ def dump_to_pkl(obj, pkl_path):
pkl.dump(obj, fout, protocol=pkl.HIGHEST_PROTOCOL)
logging.debug("Obj dumped to %s!" % pkl_path)
+def load_from_json(json_path):
+ data = None
+ with open(json_path, 'r', encoding='utf-8') as f:
+ data = json.loads(f.read())
+ logging.debug("%s loaded!" % json_path)
+ return data
+
+def dump_to_json(obj, json_path):
+ with open(json_path, 'w', encoding='utf-8') as f:
+ f.write(json.dumps(obj))
+ logging.debug("Obj dumped to %s!" % json_path)
def get_trainable_param_num(model):
""" get the number of trainable parameters
@@ -60,9 +72,15 @@ def get_trainable_param_num(model):
"""
if isinstance(model, nn.DataParallel):
- model_param = list(model.parameters()) + list(model.module.layers['embedding'].get_parameters())
+ if isinstance(model.module.layers['embedding'].embeddings, dict):
+ model_param = list(model.parameters()) + list(model.module.layers['embedding'].get_parameters())
+ else:
+ model_param = list(model.parameters())
else:
- model_param = list(model.parameters()) + list(model.layers['embedding'].get_parameters())
+ if isinstance(model.layers['embedding'].embeddings, dict):
+ model_param = list(model.parameters()) + list(model.layers['embedding'].get_parameters())
+ else:
+ model_param = list(model.parameters())
return sum(p.numel() for p in model_param if p.requires_grad)
@@ -228,7 +246,7 @@ def md5(file_paths, chunk_size=1024*1024*1024):
""" Calculate a md5 of lists of files.
Args:
- file_paths: an iterable object contains files. Files will be concatenated orderly if there are more than one file
+ file_paths: an iterable object contains file paths. Files will be concatenated orderly if there are more than one file
chunk_size: unit is byte, default value is 1GB
Returns:
md5
@@ -242,4 +260,17 @@ def md5(file_paths, chunk_size=1024*1024*1024):
if not data:
break
md5.update(data)
- return md5.hexdigest()
\ No newline at end of file
+ return md5.hexdigest()
+
+
+def get_layer_class(model, layer_id):
+ """get the layer class use layer_id
+
+ Args:
+ model: the model architecture, maybe nn.DataParallel type or model
+ layer_id: layer id from configuration
+ """
+ if isinstance(model, nn.DataParallel):
+ return model.module.layers[layer_id]
+ else:
+ return model.layers[layer_id]
\ No newline at end of file
diff --git a/utils/corpus_utils.py b/utils/corpus_utils.py
index d680349..773019b 100644
--- a/utils/corpus_utils.py
+++ b/utils/corpus_utils.py
@@ -16,6 +16,7 @@ import codecs
import copy
from settings import ProblemTypes
import torch
+import time
if sys.version_info < (3,):
@@ -173,7 +174,7 @@ def corpus_permutation(*corpora):
return corpora_perm
-def get_batches(problem, data, length, target, batch_size, input_types, pad_ids=None, permutate=False, transform_tensor=True):
+def get_batches(problem, data, length, target, batch_size, input_types, pad_ids=None, permutate=False, transform_tensor=True, predict_mode='batch'):
"""
Args:
@@ -232,13 +233,14 @@ def get_batches(problem, data, length, target, batch_size, input_types, pad_ids=
target_batches: ndarray/Variable shape: [number of batches, batch_size, targets]
"""
- logging.info("Start making batches")
+ if predict_mode == 'batch':
+ logging.info("Start making batches")
if permutate is True:
#CAUTION! data and length would be revised
- data = copy.deepcopy(data)
- length = copy.deepcopy(length)
- if target is not None:
- target = copy.deepcopy(target)
+ # data = copy.deepcopy(data)
+ # length = copy.deepcopy(length)
+ # if target is not None:
+ # target = copy.deepcopy(target)
# shuffle the data
permutation = np.random.permutation(len(list(target.values())[0]))
@@ -392,7 +394,8 @@ def get_batches(problem, data, length, target, batch_size, input_types, pad_ids=
target_batches.append(target_batch)
- logging.info("Batches got!")
+ if predict_mode == 'batch':
+ logging.info("Batches got!")
return data_batches, length_batches, target_batches