Merge branch 'master' into dev/wutlin

2019-08-13 16:03:48 +08:00 · 2019-08-13 16:03:48 +08:00 · 1b4995c639
--- a/.gitignore
+++ b/.gitignore
@ -5,4 +5,5 @@
 *.vs*
 dataset/GloVe/
 dataset/20_newsgroups/
+dataset/SST-2/
 models/
--- a/Contributing.md
+++ b/Contributing.md
@ -18,6 +18,64 @@ contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additio
 - Contribute Model to **Model Zoo**
    - We encourage everyone to contribute their NLP models (namely JSON configuration files). Please follow the structure in model_zoo to  create a pull request.
 - Contribute Block to **Block Zoo**
-    - We encourage everyone to improve this toolkit by contributing code, such as customized Blocks. So other users can further benefit from these new Blocks. 
+
+    We encourage everyone to improve this toolkit by contributing code, such as customized Blocks. So other users can further benefit from these new Blocks.
+    
+    For adding a new block to NeuronBlocks, you need *Three steps*(take [BiLSTM block](https://github.com/microsoft/NeuronBlocks/blob/master/block_zoo/BiLSTM.py) for example):
+    - Define the new block's Configuration class(BiLSTMConf class in BiLSTM block). The Configuration class should inheritance [Base Configuration Class](https://github.com/microsoft/NeuronBlocks/blob/master/block_zoo/BaseLayer.py) that define some necessary functions, and rewrite these functions.
+    We will give the details of these functions:
+        ```bash
+        def default():
+            '''
+            Define the default hyper parameters for block, it will read the corresponding block hyper parameters in configuration json files firstly.
+            '''
+        
+        def declare():
+            '''
+            Define things like "input_ranks" and "num_of_inputs", which are certain with regard to the block.
+            num_of_input is N(N>0) means this layer accepts N inputs;
+            num_of_input is -1 means this layer accepts any number of inputs;
+            
+            The rank here is not the same as matrix rank:
+                For a scalar, its rank is 0;
+                For a vector, its rank is 1;
+                For a matrix, its rank is 2;
+                For a cube of numbers, its rank is 3.
+            
+            if num_of_input > 0:
+                len(input_ranks) should be equal to num_of_input
+            elif num_of_input == -1:
+                input_ranks should be a list with only one element and the rank of all the inputs should be equal to that element.
+            '''
+        
+        def inference():
+            '''
+            Inference things like output_dim, which may relies on defined hyper parameter or the block special operation.
+            '''
+        
+        def verify():
+            '''
+            Define some necessary varification for your layer when we define the model.
+            '''
+        ```
+    - Implement the new block's class(BiLSTM class in BiLSTM block). The block class should inheritance [Base Block Class](https://github.com/microsoft/NeuronBlocks/blob/master/block_zoo/BaseLayer.py) and rewrite __init__ and forward function.
+        ```bash
+        def __init__():
+            '''
+            Define necessary attributions that would be used in block operation logic.
+            '''
+        
+        def forward():
+            '''
+            Tensor operation logic.
+            '''
+        ```
+    - Register the new block in block_zoo.
+        NeuronBlocks provides a script that can register new block automatically, and blocks contributors just focus on block logic.        
+        *Tips: PROJECTROOT denotes the root directory of this project.*
+        ```bash
+        cd PROJECT_ROOT
+        python register_block.py --block_name=new_block_name
+        ```
    
 *Tips: Before you contribute your code, we strongly suggest to verify that your improvements are valid by **[AUTOTEST](./autotest)**. We also encourage everyone to improve this autotest tool by contributing code, such as adding test tasks.*
--- a/LearningMachine.py
+++ b/LearningMachine.py
@ -13,7 +13,7 @@ import codecs
 import pickle as pkl

 from utils.common_utils import dump_to_pkl, load_from_pkl, get_param_num, get_trainable_param_num, \
-    transfer_to_gpu, transform_params2tensors
+    transfer_to_gpu, transform_params2tensors, get_layer_class, load_from_json, dump_to_json
 from utils.philly_utils import HDFSDirectTransferer, open_and_move, convert_to_tmppath, \
    convert_to_hdfspath, move_from_local_to_hdfs
 from Model import Model
@ -22,8 +22,10 @@ from metrics.Evaluator import Evaluator
 from utils.corpus_utils import get_batches
 from core.StreamingRecorder import StreamingRecorder
 from core.LRScheduler import LRScheduler
-from settings import ProblemTypes
+from settings import ProblemTypes, Setting as st
 from block_zoo import Linear
+from block_zoo import CRF
+from losses.CRFLoss import CRFLoss


 class LearningMachine(object):
@ -40,6 +42,7 @@ class LearningMachine(object):
            device = 'GPU' if 'cuda' in emb_weight_device else 'CPU'
            logging.info(
                "The embedding matrix is on %s now, you can modify the weight_on_gpu parameter to change embeddings weight device." % device)
+            logging.info("="*100 + '\n' + "*"*15 + "Model Achitecture" + "*"*15)
            logging.info(self.model)
            #logging.info("Total parameters: %d; trainable parameters: %d" % (get_param_num(self.model), get_trainable_param_num(self.model)))
            logging.info("Total trainable parameters: %d" % (get_trainable_param_num(self.model)))
@ -89,33 +92,18 @@ class LearningMachine(object):

    def train(self, optimizer, loss_fn):
        self.model.train()
-        if not self.conf.train_data_path.endswith('.pkl'):
-            train_data, train_length, train_target = self.problem.encode(self.conf.train_data_path, self.conf.file_columns,
-                self.conf.input_types, self.conf.file_with_col_header, self.conf.object_inputs, self.conf.answer_column_name, max_lengths=self.conf.max_lengths,
-                min_sentence_len = self.conf.min_sentence_len, extra_feature=self.conf.extra_feature,fixed_lengths=self.conf.fixed_lengths, file_format='tsv',
-                show_progress=True if self.conf.mode == 'normal' else False, cpu_num_workers=self.conf.cpu_num_workers)
-        else:
-            train_pkl_data = load_from_pkl(self.conf.train_data_path)
-            train_data, train_length, train_target = train_pkl_data['data'], train_pkl_data['length'], train_pkl_data['target']
+        logging.info("="*100 + '\n' + "*"*15 + 'Prepare data for training' + "*"*15)

-        if not self.conf.valid_data_path.endswith('.pkl'):
-            valid_data, valid_length, valid_target = self.problem.encode(self.conf.valid_data_path, self.conf.file_columns,
-                self.conf.input_types, self.conf.file_with_col_header, self.conf.object_inputs, self.conf.answer_column_name, max_lengths=self.conf.max_lengths,
-                min_sentence_len = self.conf.min_sentence_len, extra_feature = self.conf.extra_feature,fixed_lengths=self.conf.fixed_lengths, file_format='tsv',
-                show_progress=True if self.conf.mode == 'normal' else False, cpu_num_workers=self.conf.cpu_num_workers)
-        else:
-            valid_pkl_data = load_from_pkl(self.conf.valid_data_path)
-            valid_data, valid_length, valid_target = valid_pkl_data['data'], valid_pkl_data['length'], valid_pkl_data['target']
+        valid_data, valid_length, valid_target = self.problem.encode(self.conf.valid_data_path, self.conf.file_columns,
+            self.conf.input_types, self.conf.file_with_col_header, self.conf.object_inputs, self.conf.answer_column_name, max_lengths=self.conf.max_lengths,
+            min_sentence_len = self.conf.min_sentence_len, extra_feature = self.conf.extra_feature,fixed_lengths=self.conf.fixed_lengths, file_format='tsv',
+            show_progress=True if self.conf.mode == 'normal' else False, cpu_num_workers=self.conf.cpu_num_workers, chunk_size=self.conf.chunk_size)

        if self.conf.test_data_path is not None:
-            if not self.conf.test_data_path.endswith('.pkl'):
-                test_data, test_length, test_target = self.problem.encode(self.conf.test_data_path, self.conf.file_columns, self.conf.input_types,
-                    self.conf.file_with_col_header, self.conf.object_inputs, self.conf.answer_column_name, max_lengths=self.conf.max_lengths,
-                    min_sentence_len = self.conf.min_sentence_len, extra_feature = self.conf.extra_feature,fixed_lengths=self.conf.fixed_lengths,
-                    file_format='tsv', show_progress=True if self.conf.mode == 'normal' else False, cpu_num_workers=self.conf.cpu_num_workers)
-            else:
-                test_pkl_data = load_from_pkl(self.conf.test_data_path)
-                test_data, test_length, test_target = test_pkl_data['data'], test_pkl_data['length'], test_pkl_data['target']
+            test_data, test_length, test_target = self.problem.encode(self.conf.test_data_path, self.conf.file_columns, 
+            self.conf.input_types, self.conf.file_with_col_header, self.conf.object_inputs, self.conf.answer_column_name, max_lengths=self.conf.max_lengths,
+            min_sentence_len = self.conf.min_sentence_len, extra_feature = self.conf.extra_feature,fixed_lengths=self.conf.fixed_lengths, file_format='tsv', 
+            show_progress=True if self.conf.mode == 'normal' else False, cpu_num_workers=self.conf.cpu_num_workers, chunk_size=self.conf.chunk_size)

        stop_training = False
        epoch = 1
@ -132,196 +120,219 @@ class LearningMachine(object):
        elif ProblemTypes[self.problem.problem_type] == ProblemTypes.mrc:
            streaming_recoder = StreamingRecorder(['prediction', 'answer_text'])

+        logging.info("=" * 100 + '\n' + "*" * 15 + 'Start training' + "*" * 15)
        while not stop_training and epoch <= self.conf.max_epoch:
            logging.info('Training: Epoch ' + str(epoch))
+            train_data_generator = self._get_training_data_generator()
+            part_index = 1
+            for train_data, train_length, train_target in train_data_generator:
+                logging.debug('Training: Epoch %s Part %s'%(epoch, part_index))
+                part_index += 1
+                data_batches, length_batches, target_batches = \
+                    get_batches(self.problem, train_data, train_length, train_target, self.conf.batch_size_total,
+                        self.conf.input_types, None, permutate=True, transform_tensor=True)

-            data_batches, length_batches, target_batches = \
-                get_batches(self.problem, train_data, train_length, train_target, self.conf.batch_size_total,
-                    self.conf.input_types, None, permutate=True, transform_tensor=True)
-
-            whole_batch_num = len(target_batches)
-            valid_batch_num = max(len(target_batches) // self.conf.valid_times_per_epoch, 1)
-            if torch.cuda.device_count() > 1:
-                small_batch_num = whole_batch_num * torch.cuda.device_count()       # total batch num over all the gpus
-                valid_batch_num_show = valid_batch_num * torch.cuda.device_count()      # total batch num over all the gpus to do validation
-            else:
+                whole_batch_num = len(target_batches)
+                valid_batch_num = min(self.conf.steps_per_validation, whole_batch_num)
                small_batch_num = whole_batch_num
                valid_batch_num_show = valid_batch_num
+                batch_num_to_show_results = self.conf.batch_num_to_show_results
+                if torch.cuda.device_count() > 1:
+                    batch_num_to_show_results *= torch.cuda.device_count() # total batch num overall all the gpus to log 
+                    small_batch_num *= torch.cuda.device_count()       # total batch num over all the gpus
+                    valid_batch_num_show *= torch.cuda.device_count()      # total batch num over all the gpus to do validation
+                
+                streaming_recoder.clear_records()
+                all_costs = []

-            streaming_recoder.clear_records()
-            all_costs = []
+                logging.info('There are %d batches during current period; validation are conducted every %d batch' % (small_batch_num, valid_batch_num_show))

-            logging.info('There are %d batches during an epoch; validation are conducted every %d batch' % (small_batch_num, valid_batch_num_show))
+                if self.conf.mode == 'normal':
+                    progress = tqdm(range(len(target_batches)))
+                elif self.conf.mode == 'philly':
+                    progress = range(len(target_batches))
+                for i in progress:
+                    # the result shape: for classification: [batch_size, # of classes]; for sequence tagging: [batch_size, seq_len, # of tags]
+                    param_list, inputs_desc, length_desc = transform_params2tensors(data_batches[i], length_batches[i])
+                    logits = self.model(inputs_desc, length_desc, *param_list)

-            if self.conf.mode == 'normal':
-                progress = tqdm(range(len(target_batches)))
-            elif self.conf.mode == 'philly':
-                progress = range(len(target_batches))
-            for i in progress:
-                # the result shape: for classification: [batch_size, # of classes]; for sequence tagging: [batch_size, seq_len, # of tags]
-                param_list, inputs_desc, length_desc = transform_params2tensors(data_batches[i], length_batches[i])
-                logits = self.model(inputs_desc, length_desc, *param_list)
-
-                logits_softmax = {}
-                if isinstance(self.model, nn.DataParallel):
-                    for tmp_output_layer_id in self.model.module.output_layer_id:
-                        if isinstance(self.model.module.layers[tmp_output_layer_id], Linear) and \
-                                (not self.model.module.layers[tmp_output_layer_id].layer_conf.last_hidden_softmax):
-                            logits_softmax[tmp_output_layer_id] = nn.functional.softmax(
-                                logits[tmp_output_layer_id], dim=-1)
-                        else:
-                            logits_softmax[tmp_output_layer_id] = logits[tmp_output_layer_id]
-                else:
-                    for tmp_output_layer_id in self.model.output_layer_id:
-                        if isinstance(self.model.layers[tmp_output_layer_id], Linear) and \
-                                (not self.model.layers[tmp_output_layer_id].layer_conf.last_hidden_softmax):
-                            logits_softmax[tmp_output_layer_id] = nn.functional.softmax(
-                                logits[tmp_output_layer_id], dim=-1)
-                        else:
-                            logits_softmax[tmp_output_layer_id] = logits[tmp_output_layer_id]
-
-                # check the output
-                if ProblemTypes[self.problem.problem_type] == ProblemTypes.classification:
-                    logits = list(logits.values())[0]
-                    logits_softmax = list(logits_softmax.values())[0]
-                    assert len(logits_softmax.shape) == 2, 'The dimension of your output is %s, but we need [batch_size*GPUs, class num]' % (str(list(logits_softmax.shape)))
-                    assert logits_softmax.shape[1] == self.problem.output_target_num(), 'The dimension of your output layer %d is inconsistent with your type number %d!' % (logits_softmax.shape[1], self.problem.output_target_num())
-                    # for auc metric
-                    prediction_scores = logits_softmax[:, self.conf.pos_label].cpu().data.numpy()
-                    if self.evaluator.has_auc_type_specific:
-                        prediction_scores_all = logits_softmax.cpu().data.numpy()
-                    else:
-                        prediction_scores_all = None
-                elif ProblemTypes[self.problem.problem_type] == ProblemTypes.sequence_tagging:
-                    logits = list(logits.values())[0]
-                    logits_softmax = list(logits_softmax.values())[0]
-                    assert len(logits_softmax.shape) == 3, 'The dimension of your output is %s, but we need [batch_size*GPUs, sequence length, representation dim]' % (str(list(logits_softmax.shape)), )
-                    prediction_scores = None
-                    prediction_scores_all = None
-                elif ProblemTypes[self.problem.problem_type] == ProblemTypes.regression:
-                    logits = list(logits.values())[0]
-                    logits_softmax = list(logits_softmax.values())[0]
-                    assert len(logits_softmax.shape) == 2 and logits_softmax.shape[1] == 1, 'The dimension of your output is %s, but we need [batch_size*GPUs, 1]' % (str(list(logits_softmax.shape)))
-                    prediction_scores = None
-                    prediction_scores_all = None
-                elif ProblemTypes[self.problem.problem_type] == ProblemTypes.mrc:
-                    for single_value in logits_softmax.values():
-                        assert len(single_value.shape) == 3, 'The dimension of your output is %s, but we need [batch_size*GPUs, sequence_len, 1]' % (str(list(single_value.shape)))
-                    prediction_scores = None
-                    prediction_scores_all = None
-
-                logits_flat = dict()
-                if ProblemTypes[self.problem.problem_type] == ProblemTypes.sequence_tagging:
-                    # Transform output shapes for metric evaluation
-                    # for seq_tag_f1 metric
-                    prediction_indices = logits_softmax.data.max(2)[1].cpu().numpy()    # [batch_size, seq_len]
-                    streaming_recoder.record_one_row([self.problem.decode(prediction_indices, length_batches[i]['target'][self.conf.answer_column_name[0]].numpy()),
-                                                      prediction_scores, self.problem.decode(target_batches[i][self.conf.answer_column_name[0]],
-                                                                                             length_batches[i]['target'][self.conf.answer_column_name[0]].numpy())], keep_dim=False)
-
-                    # pytorch's CrossEntropyLoss only support this
-                    logits_flat[self.conf.output_layer_id[0]] = logits.view(-1, logits.size(2))    # [batch_size * seq_len, # of tags]
-                    #target_batches[i] = target_batches[i].view(-1)                      # [batch_size * seq_len]
-                    # [batch_size * seq_len]
-                    target_batches[i][self.conf.answer_column_name[0]] = target_batches[i][self.conf.answer_column_name[0]].reshape(-1)
-
-                elif ProblemTypes[self.problem.problem_type] == ProblemTypes.classification:
-                    prediction_indices = logits_softmax.detach().max(1)[1].cpu().numpy()
-                    # Should not decode!
-                    streaming_recoder.record_one_row([prediction_indices, prediction_scores, prediction_scores_all, target_batches[i][self.conf.answer_column_name[0]].numpy()])
-                    logits_flat[self.conf.output_layer_id[0]] = logits
-                elif ProblemTypes[self.problem.problem_type] == ProblemTypes.regression:
-                    temp_logits_flat = logits.squeeze(1)
-                    prediction_scores = temp_logits_flat.detach().cpu().numpy()
-                    streaming_recoder.record_one_row([prediction_scores, target_batches[i][self.conf.answer_column_name[0]].numpy()])
-                    logits_flat[self.conf.output_layer_id[0]] = temp_logits_flat
-                elif ProblemTypes[self.problem.problem_type] == ProblemTypes.mrc:
-                    for key, value in logits.items():
-                        logits[key] = value.squeeze()
-                    for key, value in logits_softmax.items():
-                        logits_softmax[key] = value.squeeze()
-                    passage_identify = None
-                    for type_key in data_batches[i].keys():
-                        if 'p' in type_key.lower():
-                            passage_identify = type_key
-                            break
-                    if not passage_identify:
-                        raise Exception('MRC task need passage information.')
-                    prediction = self.problem.decode(logits_softmax, lengths=length_batches[i][passage_identify],
-                                                     batch_data=data_batches[i][passage_identify])
-                    logits_flat = logits
-                    mrc_answer_target = None
-                    for single_target in target_batches[i]:
-                        if isinstance(target_batches[i][single_target][0], str):
-                            mrc_answer_target = target_batches[i][single_target]
-                    streaming_recoder.record_one_row([prediction, mrc_answer_target])
-
-                if self.use_gpu:
-                    for single_target in self.conf.answer_column_name:
-                        if isinstance(target_batches[i][single_target], torch.Tensor):
-                            target_batches[i][single_target] = transfer_to_gpu(target_batches[i][single_target])
-                loss = loss_fn(logits_flat, target_batches[i])
-
-                all_costs.append(loss.item())
-                optimizer.zero_grad()
-                loss.backward()
-                if self.conf.clip_grad_norm_max_norm != -1:
-                    torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.conf.clip_grad_norm_max_norm)
+                    logits_softmax = {}
                    if isinstance(self.model, nn.DataParallel):
-                        torch.nn.utils.clip_grad_norm_(self.model.module.layers['embedding'].get_parameters(), self.conf.clip_grad_norm_max_norm)
+                        for tmp_output_layer_id in self.model.module.output_layer_id:
+                            if isinstance(self.model.module.layers[tmp_output_layer_id], Linear) and \
+                                    (not self.model.module.layers[tmp_output_layer_id].layer_conf.last_hidden_softmax):
+                                logits_softmax[tmp_output_layer_id] = nn.functional.softmax(
+                                    logits[tmp_output_layer_id], dim=-1)
+                            elif isinstance(get_layer_class(self.model, tmp_output_layer_id), CRF):
+                                pass
+                            else:
+                                logits_softmax[tmp_output_layer_id] = logits[tmp_output_layer_id]
                    else:
-                        torch.nn.utils.clip_grad_norm_(self.model.layers['embedding'].get_parameters(), self.conf.clip_grad_norm_max_norm)
-                optimizer.step()
+                        for tmp_output_layer_id in self.model.output_layer_id:
+                            if isinstance(self.model.layers[tmp_output_layer_id], Linear) and \
+                                    (not self.model.layers[tmp_output_layer_id].layer_conf.last_hidden_softmax):
+                                logits_softmax[tmp_output_layer_id] = nn.functional.softmax(
+                                    logits[tmp_output_layer_id], dim=-1)
+                            elif isinstance(get_layer_class(self.model, tmp_output_layer_id), CRF):
+                                pass
+                            else:
+                                logits_softmax[tmp_output_layer_id] = logits[tmp_output_layer_id]

-                del loss, logits, logits_softmax, logits_flat
-                del prediction_scores
-                if ProblemTypes[self.problem.problem_type] == ProblemTypes.sequence_tagging \
-                        or ProblemTypes[self.problem.problem_type] == ProblemTypes.classification:
-                    del prediction_indices
-
-                if show_result_cnt == self.conf.batch_num_to_show_results:
+                    # check the output
                    if ProblemTypes[self.problem.problem_type] == ProblemTypes.classification:
-                        result = self.evaluator.evaluate(streaming_recoder.get('target'),
-                            streaming_recoder.get('prediction'), y_pred_pos_score=streaming_recoder.get('pred_scores'),
-                            y_pred_scores_all=streaming_recoder.get('pred_scores_all'), formatting=True)
+                        logits = list(logits.values())[0]
+                        logits_softmax = list(logits_softmax.values())[0]
+                        assert len(logits_softmax.shape) == 2, 'The dimension of your output is %s, but we need [batch_size*GPUs, class num]' % (str(list(logits_softmax.shape)))
+                        assert logits_softmax.shape[1] == self.problem.output_target_num(), 'The dimension of your output layer %d is inconsistent with your type number %d!' % (logits_softmax.shape[1], self.problem.output_target_num())
+                        # for auc metric
+                        prediction_scores = logits_softmax[:, self.conf.pos_label].cpu().data.numpy()
+                        if self.evaluator.has_auc_type_specific:
+                            prediction_scores_all = logits_softmax.cpu().data.numpy()
+                        else:
+                            prediction_scores_all = None
                    elif ProblemTypes[self.problem.problem_type] == ProblemTypes.sequence_tagging:
-                        result = self.evaluator.evaluate(streaming_recoder.get('target'),
-                            streaming_recoder.get('prediction'), y_pred_pos_score=streaming_recoder.get('pred_scores'),
-                            formatting=True)
+                        logits = list(logits.values())[0]
+                        if not isinstance(get_layer_class(self.model, tmp_output_layer_id), CRF):
+                            logits_softmax = list(logits_softmax.values())[0]
+                            assert len(logits_softmax.shape) == 3, 'The dimension of your output is %s, but we need [batch_size*GPUs, sequence length, representation dim]' % (str(list(logits_softmax.shape)), )
+                        prediction_scores = None
+                        prediction_scores_all = None
                    elif ProblemTypes[self.problem.problem_type] == ProblemTypes.regression:
-                        result = self.evaluator.evaluate(streaming_recoder.get('target'),
-                            streaming_recoder.get('prediction'), y_pred_pos_score=None, y_pred_scores_all=None, formatting=True)
+                        logits = list(logits.values())[0]
+                        logits_softmax = list(logits_softmax.values())[0]
+                        assert len(logits_softmax.shape) == 2 and logits_softmax.shape[1] == 1, 'The dimension of your output is %s, but we need [batch_size*GPUs, 1]' % (str(list(logits_softmax.shape)))
+                        prediction_scores = None
+                        prediction_scores_all = None
                    elif ProblemTypes[self.problem.problem_type] == ProblemTypes.mrc:
-                        result = self.evaluator.evaluate(streaming_recoder.get('answer_text'), streaming_recoder.get('prediction'),
-                                                         y_pred_pos_score=None, y_pred_scores_all=None, formatting=True)
+                        for single_value in logits_softmax.values():
+                            assert len(single_value.shape) == 3, 'The dimension of your output is %s, but we need [batch_size*GPUs, sequence_len, 1]' % (str(list(single_value.shape)))
+                        prediction_scores = None
+                        prediction_scores_all = None

-                    if torch.cuda.device_count() > 1:
-                        logging.info("Epoch %d batch idx: %d; lr: %f; since last log, loss=%f; %s" % \
-                            (epoch, i * torch.cuda.device_count(), lr_scheduler.get_lr(), np.mean(all_costs), result))
+                    logits_flat = dict()
+                    if ProblemTypes[self.problem.problem_type] == ProblemTypes.sequence_tagging:
+                        # Transform output shapes for metric evaluation
+                        # for seq_tag_f1 metric
+                        if isinstance(get_layer_class(self.model, tmp_output_layer_id), CRF):
+                            forward_score, scores, masks, tag_seq, transitions, layer_conf = logits
+                            prediction_indices = tag_seq.cpu().numpy()
+                            streaming_recoder.record_one_row([self.problem.decode(prediction_indices, length_batches[i]['target'][self.conf.answer_column_name[0]].numpy()),
+                                                            prediction_scores, self.problem.decode(
+                                    target_batches[i][self.conf.answer_column_name[0]],
+                                    length_batches[i]['target'][self.conf.answer_column_name[0]].numpy())], keep_dim=False)
+
+                        else:
+                            prediction_indices = logits_softmax.data.max(2)[1].cpu().numpy()    # [batch_size, seq_len]
+                            # pytorch's CrossEntropyLoss only support this
+                            logits_flat[self.conf.output_layer_id[0]] = logits.view(-1, logits.size(2))  # [batch_size * seq_len, # of tags]
+                            streaming_recoder.record_one_row([self.problem.decode(prediction_indices, length_batches[i]['target'][self.conf.answer_column_name[0]].numpy()),
+                                                            prediction_scores, self.problem.decode(
+                                    target_batches[i][self.conf.answer_column_name[0]],
+                                    length_batches[i]['target'][self.conf.answer_column_name[0]].numpy())], keep_dim=False)
+
+                            target_batches[i][self.conf.answer_column_name[0]] = target_batches[i][
+                                self.conf.answer_column_name[0]].reshape(-1)
+
+                    elif ProblemTypes[self.problem.problem_type] == ProblemTypes.classification:
+                        prediction_indices = logits_softmax.detach().max(1)[1].cpu().numpy()
+                        # Should not decode!
+                        streaming_recoder.record_one_row([prediction_indices, prediction_scores, prediction_scores_all, target_batches[i][self.conf.answer_column_name[0]].numpy()])
+                        logits_flat[self.conf.output_layer_id[0]] = logits
+                    elif ProblemTypes[self.problem.problem_type] == ProblemTypes.regression:
+                        temp_logits_flat = logits.squeeze(1)
+                        prediction_scores = temp_logits_flat.detach().cpu().numpy()
+                        streaming_recoder.record_one_row([prediction_scores, target_batches[i][self.conf.answer_column_name[0]].numpy()])
+                        logits_flat[self.conf.output_layer_id[0]] = temp_logits_flat
+                    elif ProblemTypes[self.problem.problem_type] == ProblemTypes.mrc:
+                        for key, value in logits.items():
+                            logits[key] = value.squeeze()
+                        for key, value in logits_softmax.items():
+                            logits_softmax[key] = value.squeeze()
+                        passage_identify = None
+                        for type_key in data_batches[i].keys():
+                            if 'p' in type_key.lower():
+                                passage_identify = type_key
+                                break
+                        if not passage_identify:
+                            raise Exception('MRC task need passage information.')
+                        prediction = self.problem.decode(logits_softmax, lengths=length_batches[i][passage_identify],
+                                                        batch_data=data_batches[i][passage_identify])
+                        logits_flat = logits
+                        mrc_answer_target = None
+                        for single_target in target_batches[i]:
+                            if isinstance(target_batches[i][single_target][0], str):
+                                mrc_answer_target = target_batches[i][single_target]
+                        streaming_recoder.record_one_row([prediction, mrc_answer_target])
+
+                    if self.use_gpu:
+                        for single_target in self.conf.answer_column_name:
+                            if isinstance(target_batches[i][single_target], torch.Tensor):
+                                target_batches[i][single_target] = transfer_to_gpu(target_batches[i][single_target])
+                    if isinstance(loss_fn.loss_fn[0], CRFLoss):
+                        loss = loss_fn.loss_fn[0](forward_score, scores, masks, list(target_batches[i].values())[0], transitions, layer_conf)
                    else:
-                        logging.info("Epoch %d batch idx: %d; lr: %f; since last log, loss=%f; %s" % \
-                            (epoch, i, lr_scheduler.get_lr(), np.mean(all_costs), result))
-                    show_result_cnt = 0
-                    # The loss and other metrics printed during a training epoch are just the result of part of the training data.
-                    all_costs = []
-                    streaming_recoder.clear_records()
+                        loss = loss_fn(logits_flat, target_batches[i])

-                if (i != 0 and i % valid_batch_num == 0) or i == len(target_batches) - 1:
-                    torch.cuda.empty_cache()    # actually useless
-                    logging.info('Valid & Test : Epoch ' + str(epoch))
-                    new_result = self.evaluate(valid_data, valid_length, valid_target,
-                        self.conf.input_types, self.evaluator, loss_fn, pad_ids=None, cur_best_result=best_result,
-                        model_save_path=self.conf.model_save_path, phase="valid", epoch=epoch)
-                    renew_flag = best_result != new_result
-                    best_result = new_result
+                    all_costs.append(loss.item())
+                    optimizer.zero_grad()
+                    loss.backward()
+                    if self.conf.clip_grad_norm_max_norm != -1:
+                        torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.conf.clip_grad_norm_max_norm)
+                        if isinstance(self.model, nn.DataParallel):
+                            torch.nn.utils.clip_grad_norm_(self.model.module.layers['embedding'].get_parameters(), self.conf.clip_grad_norm_max_norm)
+                        else:
+                            torch.nn.utils.clip_grad_norm_(self.model.layers['embedding'].get_parameters(), self.conf.clip_grad_norm_max_norm)
+                    optimizer.step()

-                    if renew_flag and self.conf.test_data_path is not None:
-                        self.evaluate(test_data, test_length, test_target,
-                            self.conf.input_types, self.evaluator, loss_fn, pad_ids=None, phase="test", epoch=epoch)
-                    self.model.train()
-                show_result_cnt += 1
+                    del loss, logits, logits_softmax, logits_flat
+                    del prediction_scores
+                    if ProblemTypes[self.problem.problem_type] == ProblemTypes.sequence_tagging \
+                            or ProblemTypes[self.problem.problem_type] == ProblemTypes.classification:
+                        del prediction_indices

-            del data_batches, length_batches, target_batches
+                    if show_result_cnt == batch_num_to_show_results:
+                        if ProblemTypes[self.problem.problem_type] == ProblemTypes.classification:
+                            result = self.evaluator.evaluate(streaming_recoder.get('target'),
+                                streaming_recoder.get('prediction'), y_pred_pos_score=streaming_recoder.get('pred_scores'),
+                                y_pred_scores_all=streaming_recoder.get('pred_scores_all'), formatting=True)
+                        elif ProblemTypes[self.problem.problem_type] == ProblemTypes.sequence_tagging:
+                            result = self.evaluator.evaluate(streaming_recoder.get('target'),
+                                streaming_recoder.get('prediction'), y_pred_pos_score=streaming_recoder.get('pred_scores'),
+                                formatting=True)
+                        elif ProblemTypes[self.problem.problem_type] == ProblemTypes.regression:
+                            result = self.evaluator.evaluate(streaming_recoder.get('target'),
+                                streaming_recoder.get('prediction'), y_pred_pos_score=None, y_pred_scores_all=None, formatting=True)
+                        elif ProblemTypes[self.problem.problem_type] == ProblemTypes.mrc:
+                            result = self.evaluator.evaluate(streaming_recoder.get('answer_text'), streaming_recoder.get('prediction'),
+                                                                y_pred_pos_score=None, y_pred_scores_all=None, formatting=True)
+
+                        if torch.cuda.device_count() > 1:
+                            logging.info("Epoch %d batch idx: %d; lr: %f; since last log, loss=%f; %s" % \
+                                (epoch, i * torch.cuda.device_count(), lr_scheduler.get_lr(), np.mean(all_costs), result))
+                        else:
+                            logging.info("Epoch %d batch idx: %d; lr: %f; since last log, loss=%f; %s" % \
+                                (epoch, i, lr_scheduler.get_lr(), np.mean(all_costs), result))
+                        show_result_cnt = 0
+                        # The loss and other metrics printed during a training epoch are just the result of part of the training data.
+                        all_costs = []
+                        streaming_recoder.clear_records()
+
+                    if (i != 0 and i % valid_batch_num == 0) or i == len(target_batches) - 1:
+                        torch.cuda.empty_cache()    # actually useless
+                        logging.info('Valid & Test : Epoch ' + str(epoch))
+                        new_result = self.evaluate(valid_data, valid_length, valid_target,
+                            self.conf.input_types, self.evaluator, loss_fn, pad_ids=None, cur_best_result=best_result,
+                            model_save_path=self.conf.model_save_path, phase="valid", epoch=epoch)
+                        renew_flag = best_result != new_result
+                        best_result = new_result
+
+                        if renew_flag and self.conf.test_data_path is not None:
+                            self.evaluate(test_data, test_length, test_target,
+                                self.conf.input_types, self.evaluator, loss_fn, pad_ids=None, phase="test", epoch=epoch)
+                        self.model.train()
+                    show_result_cnt += 1
+
+                del data_batches, length_batches, target_batches
            lr_scheduler.step()
            epoch += 1

@ -334,7 +345,7 @@ class LearningMachine(object):
            test_data, test_length, test_target = self.problem.encode(test_data_path, self.conf.file_columns, self.conf.input_types,
                self.conf.file_with_col_header, self.conf.object_inputs, self.conf.answer_column_name, max_lengths=self.conf.max_lengths,
                min_sentence_len = self.conf.min_sentence_len, extra_feature = self.conf.extra_feature,fixed_lengths=self.conf.fixed_lengths, file_format='tsv',
-                show_progress=True if self.conf.mode == 'normal' else False, cpu_num_workers=self.conf.cpu_num_workers)
+                show_progress=True if self.conf.mode == 'normal' else False, cpu_num_workers=self.conf.cpu_num_workers, chunk_size=self.conf.chunk_size)
        else:
            test_pkl_data = load_from_pkl(test_data_path)
            test_data, test_length, test_target = test_pkl_data['data'], test_pkl_data['length'], test_pkl_data['target']
@ -472,18 +483,29 @@ class LearningMachine(object):
                logits_flat = {}
                if ProblemTypes[self.problem.problem_type] == ProblemTypes.sequence_tagging:
                    logits = list(logits.values())[0]
-                    logits_softmax = list(logits_softmax.values())[0]
-                    # Transform output shapes for metric evaluation
-                    # for seq_tag_f1 metric
-                    prediction_indices = logits_softmax.data.max(2)[1].cpu().numpy()  # [batch_size, seq_len]
-                    streaming_recoder.record_one_row(
-                        [self.problem.decode(prediction_indices, length_batches[i]['target'][self.conf.answer_column_name[0]].numpy()), prediction_pos_scores,
-                         self.problem.decode(target_batches[i], length_batches[i]['target'][self.conf.answer_column_name[0]].numpy())], keep_dim=False)
+                    if isinstance(get_layer_class(self.model, tmp_output_layer_id), CRF):
+                        forward_score, scores, masks, tag_seq, transitions, layer_conf = logits
+                        prediction_indices = tag_seq.cpu().numpy()
+                        streaming_recoder.record_one_row(
+                            [self.problem.decode(prediction_indices, length_batches[i]['target'][self.conf.answer_column_name[0]].numpy()),
+                             prediction_pos_scores,
+                             self.problem.decode(target_batches[i], length_batches[i]['target'][self.conf.answer_column_name[0]].numpy())],
+                            keep_dim=False)
+                    else:
+                        logits_softmax = list(logits_softmax.values())[0]
+                        # Transform output shapes for metric evaluation
+                        # for seq_tag_f1 metric
+                        prediction_indices = logits_softmax.data.max(2)[1].cpu().numpy()  # [batch_size, seq_len]
+                        # pytorch's CrossEntropyLoss only support this
+                        logits_flat[self.conf.output_layer_id[0]] = logits.view(-1, logits.size(2))  # [batch_size * seq_len, # of tags]
+                        streaming_recoder.record_one_row(
+                            [self.problem.decode(prediction_indices, length_batches[i]['target'][self.conf.answer_column_name[0]].numpy()),
+                             prediction_pos_scores,
+                             self.problem.decode(target_batches[i], length_batches[i]['target'][self.conf.answer_column_name[0]].numpy())],
+                            keep_dim=False)

-                    # pytorch's CrossEntropyLoss only support this
-                    logits_flat[self.conf.output_layer_id[0]] = logits.view(-1, logits.size(2))  # [batch_size * seq_len, # of tags]
-                    #target_batches[i] = target_batches[i].view(-1)  # [batch_size * seq_len]
-                    target_batches[i][self.conf.answer_column_name[0]] = target_batches[i][self.conf.answer_column_name[0]].reshape(-1)  # [batch_size * seq_len]
+                        target_batches[i][self.conf.answer_column_name[0]] = target_batches[i][
+                            self.conf.answer_column_name[0]].reshape(-1)  # [batch_size * seq_len]

                    if to_predict:
                        prediction_batch = self.problem.decode(prediction_indices, length_batches[i][key_random].numpy())
@ -546,8 +568,13 @@ class LearningMachine(object):
                        predict_stream_recoder.record_one_row([prediction])

                if to_predict:
-                    logits_len = len(list(logits.values())[0]) \
-                        if ProblemTypes[self.problem.problem_type] == ProblemTypes.mrc else len(logits)
+                    if ProblemTypes[self.problem.problem_type] == ProblemTypes.mrc:
+                        logits_len = len(list(logits.values())[0])
+                    elif ProblemTypes[self.problem.problem_type] == ProblemTypes.sequence_tagging and isinstance(get_layer_class(self.model, tmp_output_layer_id), CRF):
+                        # for sequence_tagging task, logits is tuple type which index 3 is tag_seq [batch_size*seq_len]
+                        logits_len = logits[3].size(0)
+                    else:
+                        logits_len = len(logits)
                    for sample_idx in range(logits_len):
                        while True:
                            sample = fin.readline().rstrip()
@ -563,7 +590,10 @@ class LearningMachine(object):
                    for single_target in self.conf.answer_column_name:
                        if isinstance(target_batches[i][single_target], torch.Tensor):
                            target_batches[i][single_target] = transfer_to_gpu(target_batches[i][single_target])
-                loss = loss_fn(logits_flat, target_batches[i])
+                if isinstance(loss_fn.loss_fn[0], CRFLoss):
+                    loss = loss_fn.loss_fn[0](forward_score, scores, masks, list(target_batches[i].values())[0], transitions, layer_conf)
+                else:
+                    loss = loss_fn(logits_flat, target_batches[i])
                loss_recoder.record('loss', loss.item())

                del loss, logits, logits_softmax, logits_flat
@ -639,7 +669,7 @@ class LearningMachine(object):
            self.conf.file_with_col_header,self.conf.object_inputs, None, min_sentence_len=self.conf.min_sentence_len,
            extra_feature=self.conf.extra_feature,max_lengths=self.conf.max_lengths, fixed_lengths=self.conf.fixed_lengths,
            file_format='tsv', show_progress=True if self.conf.mode == 'normal' else False, 
-            cpu_num_workers=self.conf.cpu_num_workers)
+            cpu_num_workers=self.conf.cpu_num_workers, chunk_size=self.conf.chunk_size)

        logging.info("Starting predict ...")
        self.model.eval()
@ -685,9 +715,14 @@ class LearningMachine(object):

                    if ProblemTypes[self.problem.problem_type] == ProblemTypes.sequence_tagging:
                        logits = list(logits.values())[0]
-                        logits_softmax = list(logits_softmax.values())[0]
-                        # Transform output shapes for metric evaluation
-                        prediction_indices = logits_softmax.data.max(2)[1].cpu().numpy()  # [batch_size, seq_len]
+                        if isinstance(get_layer_class(self.model, tmp_output_layer_id), CRF):
+                            forward_score, scores, masks, tag_seq, transitions, layer_conf = logits
+                            prediction_indices = tag_seq.cpu().numpy()
+                        else:
+                            logits_softmax = list(logits_softmax.values())[0]
+                            # Transform output shapes for metric evaluation
+                            # for seq_tag_f1 metric
+                            prediction_indices = logits_softmax.data.max(2)[1].cpu().numpy()  # [batch_size, seq_len]
                        prediction_batch = self.problem.decode(prediction_indices, length_batches[i][key_random].numpy())
                        for prediction_sample in prediction_batch:
                            streaming_recoder.record('prediction', " ".join(prediction_sample))
@ -745,6 +780,107 @@ class LearningMachine(object):

        fin.close()

+    def interactive(self, sample, file_columns, predict_fields=['prediction'], predict_mode='batch'):
+        """ interactive prediction
+
+         Args:
+            file_columns: representation the columns of sample
+            predict_mode: interactive|batch(need a predict file)
+        """
+        predict_data, predict_length, _, _, _ = \
+            self.problem.encode_data_list(sample, file_columns, self.conf.input_types, self.conf.object_inputs, None,
+                                          self.conf.min_sentence_len, self.conf.extra_feature, self.conf.max_lengths,
+                                          self.conf.fixed_lengths, predict_mode=predict_mode)
+        if predict_data is None:
+            return 'Wrong Case!'
+        self.model.eval()
+        with torch.no_grad():
+            data_batches, length_batches, _ = \
+                get_batches(self.problem, predict_data, predict_length, None, 1,
+                            self.conf.input_types, None, permutate=False, transform_tensor=True, predict_mode=predict_mode)
+            streaming_recoder = StreamingRecorder(predict_fields)
+
+            key_random = random.choice(
+                list(length_batches[0].keys()).remove('target') if 'target' in list(length_batches[0].keys()) else
+                list(length_batches[0].keys()))
+            param_list, inputs_desc, length_desc = transform_params2tensors(data_batches[0], length_batches[0])
+            logits = self.model(inputs_desc, length_desc, *param_list)
+
+            logits_softmax = {}
+            if isinstance(self.model, nn.DataParallel):
+                for tmp_output_layer_id in self.model.module.output_layer_id:
+                    if isinstance(self.model.module.layers[tmp_output_layer_id], Linear) and \
+                            (not self.model.module.layers[tmp_output_layer_id].layer_conf.last_hidden_softmax):
+                        logits_softmax[tmp_output_layer_id] = nn.functional.softmax(
+                            logits[tmp_output_layer_id], dim=-1)
+                    else:
+                        logits_softmax[tmp_output_layer_id] = logits[tmp_output_layer_id]
+            else:
+                for tmp_output_layer_id in self.model.output_layer_id:
+                    if isinstance(self.model.layers[tmp_output_layer_id], Linear) and \
+                            (not self.model.layers[tmp_output_layer_id].layer_conf.last_hidden_softmax):
+                        logits_softmax[tmp_output_layer_id] = nn.functional.softmax(
+                            logits[tmp_output_layer_id], dim=-1)
+                    else:
+                        logits_softmax[tmp_output_layer_id] = logits[tmp_output_layer_id]
+
+            if ProblemTypes[self.problem.problem_type] == ProblemTypes.sequence_tagging:
+                logits = list(logits.values())[0]
+                if isinstance(get_layer_class(self.model, tmp_output_layer_id), CRF):
+                    forward_score, scores, masks, tag_seq, transitions, layer_conf = logits
+                    prediction_indices = tag_seq.cpu().numpy()
+                else:
+                    logits_softmax = list(logits_softmax.values())[0]
+                    # Transform output shapes for metric evaluation
+                    # for seq_tag_f1 metric
+                    prediction_indices = logits_softmax.data.max(2)[1].cpu().numpy()  # [batch_size, seq_len]
+                prediction_batch = self.problem.decode(prediction_indices, length_batches[0][key_random].numpy())
+                for prediction_sample in prediction_batch:
+                    streaming_recoder.record('prediction', " ".join(prediction_sample))
+            elif ProblemTypes[self.problem.problem_type] == ProblemTypes.classification:
+                logits = list(logits.values())[0]
+                logits_softmax = list(logits_softmax.values())[0]
+                prediction_indices = logits_softmax.data.max(1)[1].cpu().numpy()
+
+                for field in predict_fields:
+                    if field == 'prediction':
+                        streaming_recoder.record(field,
+                                                 self.problem.decode(prediction_indices,
+                                                                     length_batches[0][key_random].numpy()))
+                    elif field == 'confidence':
+                        prediction_scores = logits_softmax.cpu().data.numpy()
+                        for prediction_score, prediction_idx in zip(prediction_scores, prediction_indices):
+                            streaming_recoder.record(field, prediction_score[prediction_idx])
+                    elif field.startswith('confidence') and field.find('@') != -1:
+                        label_specified = field.split('@')[1]
+                        label_specified_idx = self.problem.output_dict.id(label_specified)
+                        confidence_specified = torch.index_select(logits_softmax.cpu(), 1, torch.tensor([label_specified_idx], dtype=torch.long)).squeeze(1)
+                        streaming_recoder.record(field, confidence_specified.data.numpy())
+            elif ProblemTypes[self.problem.problem_type] == ProblemTypes.regression:
+                logits = list(logits.values())[0]
+                # logits_softmax is unuseful for regression task!
+                logits_softmax = list(logits_softmax.values())[0]
+                logits_flat = logits.squeeze(1)
+                prediction_scores = logits_flat.detach().cpu().numpy()
+                streaming_recoder.record_one_row([prediction_scores])
+            elif ProblemTypes[self.problem.problem_type] == ProblemTypes.mrc:
+                for key, value in logits.items():
+                    logits[key] = value.squeeze()
+                for key, value in logits_softmax.items():
+                    logits_softmax[key] = value.squeeze()
+                passage_identify = None
+                for type_key in data_batches[0].keys():
+                    if 'p' in type_key.lower():
+                        passage_identify = type_key
+                        break
+                if not passage_identify:
+                    raise Exception('MRC task need passage information.')
+                prediction = self.problem.decode(logits_softmax, lengths=length_batches[0][passage_identify],
+                                                 batch_data=data_batches[0][passage_identify])
+                streaming_recoder.record_one_row([prediction])
+
+            return "\t".join([str(streaming_recoder.get(field)[0]) for field in predict_fields])
+
    def load_model(self, model_path):
        if self.use_gpu is True:
            self.model = torch.load(model_path)
@ -762,5 +898,19 @@ class LearningMachine(object):
        logging.info("Model %s loaded!" % model_path)
        logging.info("Total trainable parameters: %d" % (get_trainable_param_num(self.model)))

+    def _get_training_data_generator(self):
+        if not self.conf.use_cache:
+            return self.problem.get_encode_generator(self.conf, build_cache=False)
+        if not self.conf.encoding_file_index:
+            return self._get_save_encode_generator()
+        assert self.conf.load_encoding_cache_generator, 'function conf.load_encoding_cache_generator is not defined'
+        return self.conf.load_encoding_cache_generator(self.conf.encoding_cache_dir, self.conf.encoding_file_index)
+
+    def _get_save_encode_generator(self):
+        load_save_encode_generator = self.problem.get_encode_generator(self.conf, build_cache=True)
+        for data, lengths, target in load_save_encode_generator:
+            yield data, lengths, target
+        cache_index = load_from_json(self.conf.encoding_cache_index_file_path)
+        self.conf.encoding_file_index = cache_index[st.cencoding_key_index]


--- a/Model.py
+++ b/Model.py
@ -18,7 +18,7 @@ EMBED_LAYER_ID = 'embedding'

 def get_conf(layer_id, layer_name, input_layer_ids, all_layer_configs, model_input_ids, use_gpu,
        conf_dict=None, shared_conf=None, succeed_embedding_flag=False, output_layer_flag=False,
-        target_num=None, fixed_lengths=None):
+        target_num=None, fixed_lengths=None, target_dict=None):
    """ get layer configuration

    Args
@ -51,14 +51,24 @@ def get_conf(layer_id, layer_name, input_layer_ids, all_layer_configs, model_inp

            # for classification tasks, we usually add a Linear layer to project the output to dimension of number of classes. If we don't know the #classes, we can use '-1' instead and we would calculate the number of classes from the corpus.
            if layer_name == 'Linear':
-                if isinstance(conf_dict['hidden_dim'], list) and conf_dict['hidden_dim'][-1] == -1:
-                    assert output_layer_flag is True, "Only in the last layer, hidden_dim == -1 is allowed!"
-                    assert target_num is not None, "Number of targets should be given!"
-                    conf_dict['hidden_dim'][-1] = target_num
+                if isinstance(conf_dict['hidden_dim'], list):
+                    if conf_dict['hidden_dim'][-1] == -1:
+                        assert output_layer_flag is True, "Only in the last layer, hidden_dim == -1 is allowed!"
+                        assert target_num is not None, "Number of targets should be given!"
+                        conf_dict['hidden_dim'][-1] = target_num
+                    elif conf_dict['hidden_dim'][-1] == '#target#':
+                        logging.info('#target# position will be replace by target num: %d' % target_num)
+                        conf_dict['hidden_dim'][-1] = target_num
                elif isinstance(conf_dict['hidden_dim'], int) and conf_dict['hidden_dim'] == -1:
                    assert output_layer_flag is True, "Only in the last layer, hidden_dim == -1 is allowed!"
                    assert target_num is not None, "Number of targets should be given!"
                    conf_dict['hidden_dim'] = target_num
+                elif isinstance(conf_dict['hidden_dim'], str) and conf_dict['hidden_dim'] == '#target#':
+                    logging.info('#target# position will be replace by target num: %d' % target_num)
+                    conf_dict['hidden_dim'] = target_num
+            # add some necessary attribute for CRF layer
+            if layer_name == 'CRF':
+                conf_dict['target_dict'] = target_dict

            conf = eval(layer_name + "Conf")(**conf_dict)
        except NameError as e:
@ -104,6 +114,8 @@ def get_conf(layer_id, layer_name, input_layer_ids, all_layer_configs, model_inp
    # inference and varification inside the layer
    conf.inference()        # update some attributes which relies on input dimension or something else
    conf.verify()           # verify if the configuration is legal
+    former_conf = None if len(all_layer_configs) == 0 else list(all_layer_configs.values())[-1]
+    conf.verify_former_block(former_conf)  # check if has special attribute rely on former layer

    logging.debug('Layer id: %s; name: %s; input_dims: %s; input_ranks: %s; output_dim: %s; output_rank: %s' % (layer_id, layer_name, conf.input_dims if layer_id != 'embedding' else 'None', conf.input_ranks, conf.output_dim, conf.output_rank))

@ -211,7 +223,7 @@ class Model(nn.Module):
                all_layer_configs[EMBED_LAYER_ID] = get_conf(EMBED_LAYER_ID, layer_arch['layer'],
                    None, all_layer_configs, inputs, self.use_gpu, conf_dict={'conf': emb_conf},
                    shared_conf=None, succeed_embedding_flag=False, output_layer_flag=output_layer_flag,
-                    target_num=target_num, fixed_lengths=fixed_lengths_corrected)
+                    target_num=target_num, fixed_lengths=fixed_lengths_corrected, target_dict=problem.output_dict)
                self.add_layer(EMBED_LAYER_ID, get_layer(layer_arch['layer'], all_layer_configs[EMBED_LAYER_ID]))
            else:
                if layer_arch['layer'] in self.layers and not 'conf' in layer_arch:
@ -230,7 +242,7 @@ class Model(nn.Module):
                    layer_arch['inputs'], all_layer_configs, inputs, self.use_gpu, conf_dict=conf_dict,
                    shared_conf=shared_conf, succeed_embedding_flag=succeed_embedding_flag,
                    output_layer_flag=output_layer_flag, target_num=target_num,
-                    fixed_lengths=fixed_lengths_corrected)
+                    fixed_lengths=fixed_lengths_corrected, target_dict=problem.output_dict)

                if layer_arch['layer'] in self.layers and not 'conf' in layer_arch:
                    self.add_layer(layer_arch['layer_id'], self.layers[layer_arch['layer']])
@ -391,7 +403,7 @@ class Model(nn.Module):
        return representation_output

    def is_cuda(self):
-        return next(self.parameters()).data.is_cuda
+        return list(self.parameters())[-1].data.is_cuda

    def update_use_gpu(self, new_use_gpu):
        self.use_gpu = new_use_gpu
--- a/ModelConf.py
+++ b/ModelConf.py
@ -14,8 +14,8 @@ import shutil

 from losses.BaseLossConf import BaseLossConf
 #import traceback
-from settings import LanguageTypes, ProblemTypes, TaggingSchemes, SupportedMetrics, PredictionTypes, DefaultPredictionFields
-from utils.common_utils import log_set, prepare_dir
+from settings import LanguageTypes, ProblemTypes, TaggingSchemes, SupportedMetrics, PredictionTypes, DefaultPredictionFields, ConstantStatic
+from utils.common_utils import log_set, prepare_dir, md5
 from utils.exceptions import ConfigurationError
 import numpy as np

@ -219,6 +219,10 @@ class ModelConf(object):
        # vocabulary setting
        self.max_vocabulary = self.get_item(['training_params', 'vocabulary', 'max_vocabulary'], default=800000, use_default=True)
        self.min_word_frequency = self.get_item(['training_params', 'vocabulary', 'min_word_frequency'], default=3, use_default=True)
+        self.max_building_lines = self.get_item(['training_params', 'vocabulary', 'max_building_lines'], default=1000 * 1000, use_default=True)
+
+        # chunk_size
+        self.chunk_size = self.get_item(['training_params', 'chunk_size'], default=1000 * 1000, use_default=True)

        # file column header setting
        self.file_with_col_header = self.get_item(['inputs', 'file_with_col_header'], default=False, use_default=True)
@ -280,6 +284,9 @@ class ModelConf(object):
            tmp_problem_path = os.path.join(self.save_base_dir, '.necessary_cache', 'problem.pkl')
            self.problem_path = tmp_problem_path if os.path.isfile(tmp_problem_path) else os.path.join(self.save_base_dir, 'necessary_cache', 'problem.pkl')

+        # cache configuration
+        self._load_cache_config_from_conf()
+
        # training params
        self.training_params = self.get_item(['training_params'])

@ -303,12 +310,17 @@ class ModelConf(object):
            self.max_epoch = self.params.max_epoch
        else:
            self.max_epoch = self.get_item(['training_params', 'max_epoch'], default=float('inf'))
-        self.valid_times_per_epoch = self.get_item(['training_params', 'valid_times_per_epoch'], default=1)
+        if 'valid_times_per_epoch' in self.conf['training_params']:
+            logging.info("configuration[training_params][valid_times_per_epoch] is deprecated, please use configuration[training_params][steps_per_validation] instead")
+        self.steps_per_validation = self.get_item(['training_params', 'steps_per_validation'], default=10)
        self.batch_num_to_show_results = self.get_item(['training_params', 'batch_num_to_show_results'], default=10)
        self.max_lengths = self.get_item(['training_params', 'max_lengths'], default=None, use_default=True)
        self.fixed_lengths = self.get_item(['training_params', 'fixed_lengths'], default=None, use_default=True)
        if self.fixed_lengths:
            self.max_lengths = None
+        if ProblemTypes[self.problem_type] == ProblemTypes.sequence_tagging:
+            self.fixed_lengths = None
+            self.max_lengths = None

        if torch.cuda.device_count() > 1:
            self.batch_size_total = torch.cuda.device_count() * self.training_params['batch_size']
@ -403,7 +415,8 @@ class ModelConf(object):
                    "The configuration file %s is illegal. There should be an item configuration[%s], "
                    "but the item %s is not found." % (self.conf_path, "][".join(error_keys), key))
            else:
-                print("configuration[%s] is not found in %s, use default value %s" % ("][".join(error_keys), self.conf_path, repr(default)))
+                # print("configuration[%s] is not found in %s, use default value %s" %
+                #               ("][".join(error_keys), self.conf_path, repr(default)))
                item = default

        return item
@ -525,3 +538,23 @@ class ModelConf(object):
        shutil.copy(params.conf_path, self.save_base_dir)
        logging.info('Configuration file is backed up to %s' % (self.save_base_dir))
        
+    def _load_cache_config_from_conf(self):
+        # training data
+        self.train_data_md5 = None
+        if self.phase == 'train' and self.train_data_path:
+            logging.info("Calculating the md5 of traing data ...")
+            self.train_data_md5 = md5([self.train_data_path])
+            logging.info("the md5 of traing data is %s"%(self.train_data_md5))
+
+        # problem
+        self.problem_md5 = None
+        
+        # encoding 
+        self.encoding_cache_dir = None
+        self.encoding_cache_index_file_path = None
+        self.encoding_cache_index_file_md5_path = None
+        self.encoding_file_index = None
+        self.encoding_cache_legal_line_cnt = 0
+        self.encoding_cache_illegal_line_cnt = 0
+        self.load_encoding_cache_generator = None
+        
--- a/README.md
+++ b/README.md
@ -1,4 +1,6 @@
-# ***NeuronBlocks*** - Building Your NLP DNN Models Like Playing Lego
+<img src="https://i.imgur.com/IanH6xI.png" width="450"> 
+
+## Building Your NLP DNN Models Like Playing Lego

 [![language](https://img.shields.io/badge/language-en%20%7C%20中文-brightgreen.svg)](#language-supported)
 [![python](https://img.shields.io/badge/python-3.6%20%7C%203.7-blue.svg)](https://www.python.org)
@ -7,7 +9,7 @@

 [简体中文](README_zh_CN.md)

-[Tutorial](Tutorial.md) [中文教程](Tutorial_zh_CN.md) 
+[Tutorial](Tutorial.md) [中文教程](Tutorial_zh_CN.md) [Demo Video](https://youtu.be/x6cOpVSZcdo)


 # Table of Contents
@ -29,7 +31,7 @@ NeuronBlocks consists of two major components: ***Block Zoo*** and ***Model Zoo*
 - In ***Block Zoo***, we provide commonly used neural network components as building blocks for model architecture design.  
 - In ***Model Zoo***, we provide a suite of NLP models for common NLP tasks, in the form of **JSON configuration** files. 
 
-<img src="https://i.imgur.com/LMD0PFQ.png" width="300">
+<img src="https://i.imgur.com/LMD0PFQ.png" width="250">

 ### <span id="language-supported">Language Supported</span>
 - English
@ -50,6 +52,9 @@ Users can either pick existing models (config files) in *Model Zoo* to start mod

 <img src="https://i.imgur.com/q0p6Wvz.png" width="300">

+<img src="https://i.imgur.com/lFaBtnh.png" width="700">
+
+

 # Get Started in 60 Seconds
 ## <span id="installation">Installation</span>
@ -93,6 +98,21 @@ python test.py --conf_path=model_zoo/demo/conf.json
 # predict
 python predict.py --conf_path=model_zoo/demo/conf.json
 ```
+
+For prediction, NeuronBlocks have two modes: **Interactive** and **Batch**.
+- *Interactive Prediction Mode:* The interactive mode provides interactive interface, users can input case according to corresponding prompt message and get realtime prediction result from trained model, and input "exit" to exit interactive interface.
+```bash
+# use the above example
+# interactive prediction
+python predict.py --conf_path=model_zoo/demo/conf.json --predict_mode='interactive'
+```
+- *Batch Prediction Mode:* For batched cases prediction, NeuronBlocks provides batch prediction mode which receives a cases file as input and write the prediction results in the prediction file.
+```bash
+# use the above example
+# batch prediction
+python predict.py --conf_path=model_zoo/demo/conf.json --predict_mode='batch' --predict_data_path=dataset/demo/predict.tsv
+```
+
 For more details, please refer to [Tutorial.md](Tutorial.md) and [Code documentation](https://microsoft.github.io/NeuronBlocks/).

 # Who should consider using NeuronBlocks
@ -134,7 +154,7 @@ Anyone who are familiar with are highly encouraged to contribute code.
 ```
@article{gong2019neuronblocks,
  title={NeuronBlocks--Building Your NLP DNN Models Like Playing Lego},
-  author={Gong, Ming and Shou, Linjun and Lin, Wutao and Sang, Zhijie and Yan, Quanjia and Yang, Ze and Jiang, Daxin},
+  author={Gong, Ming and Shou, Linjun and Lin, Wutao and Sang, Zhijie and Yan, Quanjia and Yang, Ze, Cheng, Feixiang and Jiang, Daxin},
  journal={arXiv preprint arXiv:1904.09535},
  year={2019}
 }
@ -155,5 +175,5 @@ If you have any questions, please contact NeuronBlocks@microsoft.com

 If you have wechat, you can also add the following account:

-<img src="https://i.imgur.com/lI2oQWo.jpg" width="200">
+<img src="https://i.imgur.com/UfOYvt1.jpg" width="200">

--- a/README_zh_CN.md
+++ b/README_zh_CN.md
@ -1,4 +1,6 @@
-# ***NeuronBlocks*** - 像搭积木一样构建自然语言理解深度学习模型
+<img src="https://i.imgur.com/IanH6xI.png" width="450"> 
+
+## 像搭积木一样构建自然语言理解深度学习模型

 [![language](https://img.shields.io/badge/language-en%20%7C%20中文-brightgreen.svg)](#language-supported)
 [![python](https://img.shields.io/badge/python-3.6%20%7C%203.7-blue.svg)](https://www.python.org)
@ -7,7 +9,7 @@

 [English version](README.md)

-[中文教程](Tutorial_zh_CN.md) [Tutorial](Tutorial.md)
+[中文教程](Tutorial_zh_CN.md) [Tutorial](Tutorial.md) [Demo Video](https://youtu.be/x6cOpVSZcdo)

 # 目录

@ -47,11 +49,12 @@ NeuronBlocks包括 ***Block Zoo*** 和 ***Model Zoo*** 两个重要组件，其
 - 更多……

 ### 使用方法
-
 用户可以选择 *Model Zoo* 中的示例模型（JSON配置文件）开启模型训练，或者利用 *Block Zoo* 中的神经网络模块构建新的模型，就像玩乐高积木一样。

 <img src="https://i.imgur.com/q0p6Wvz.png" width="300">

+<img src="https://i.imgur.com/lFaBtnh.png" width="700">
+

 # 快速入门
 ## 安装
@ -95,6 +98,19 @@ python test.py --conf_path=model_zoo/demo/conf.json
 python predict.py --conf_path=model_zoo/demo/conf.json
 ```

+对于预测，NeuronBlocks 提供了两种预测的形式： **交互式**和**批量式**。
+- *交互式：* 交互式模式预测提供了交互界面，用户可以根据输入提示信息每次输入一个样本并实时得到模型前向计算出的结果，输入 "exit" 时退出交互预测模式。
+```bash
+# use the above example
+# interactive prediction
+python predict.py --conf_path=model_zoo/demo/conf.json --predict_mode='interactive'
+```
+- *批量式：* 对于批量样本预测的需求，NeuronBlocks 提供批量预测模式，其接受一个包含批量样本的文件作为输入，并且将模型前向计算的结果写回这个文件。
+```bash
+# use the above example
+# batch prediction
+python predict.py --conf_path=model_zoo/demo/conf.json --predict_mode='batch' --predict_data_path=dataset/demo/predict.tsv
+```
 更多细节, 请查看[Tutorial_zh_CN.md](Tutorial_zh_CN.md) 和 [Code documentation](https://microsoft.github.io/NeuronBlocks/)。

 # 适用人群
@ -135,7 +151,7 @@ NeuronBlocks以开放的模式运行。它由 **微软 STCA NLP Group** 设计
 ```
@article{gong2019neuronblocks,
  title={NeuronBlocks--Building Your NLP DNN Models Like Playing Lego},
-  author={Gong, Ming and Shou, Linjun and Lin, Wutao and Sang, Zhijie and Yan, Quanjia and Yang, Ze and Jiang, Daxin},
+  author={Gong, Ming and Shou, Linjun and Lin, Wutao and Sang, Zhijie and Yan, Quanjia and Yang, Ze, Cheng, Feixiang and Jiang, Daxin},
  journal={arXiv preprint arXiv:1904.09535},
  year={2019}
 }
@ -156,4 +172,5 @@ Licensed under the [MIT](LICENSE) License.

 如果您有微信，也可以添加工具包的官方账号:

-<img src="https://i.imgur.com/lI2oQWo.jpg" width="200">
+<img src="https://i.imgur.com/UfOYvt1.jpg" width="200">
+
--- a/Tutorial.md
+++ b/Tutorial.md
@ -1,4 +1,4 @@
-# ***NeuronBlocks*** Tutorial
+# <img src="https://i.imgur.com/YLrkvW3.png" width="80">  ***NeuronBlocks*** Tutorial

 [简体中文](Tutorial_zh_CN.md)

@ -21,6 +21,7 @@
        4. [Compression for MRC Model](#task-6.4)
    * [Task 7: Chinese Sentiment Analysis](#task-7)
    * [Task 8: Chinese Text Matching](#task-8)
+    * [Task 9: Sequence Labeling](#task-9)
 * [Advanced Usage](#advanced-usage)
    * [Extra Feature Support](#extra-feature)
    * [Learning Rate Decay](#lr-decay)
@ -146,10 +147,12 @@ The architecture of the configuration file is:
        CUDA_VISIBLE_DEVICES= python train.py
        ```
    - ***cpu_num_workers***. [default: -1] Define the number of processes to preprocess the dataset. The number of processes is equal to that of logical cores CPU supports if value is negtive or 0, otherwise it is equal to *cpu_num_workers*.
+    - ***chunk_size***. [default: 1000000] Define the chunk size of files that NB reads every time for avoiding out of memory and the mechanism of lazy-loading.
    - ***batch_size***. Define the batch size here. If there are multiple GPUs, *batch_size* is the batch size of each GPU.
    - ***batch_num_to_show_results***. [necessary for training] During the training process, show the results every batch_num_to_show_results batches.
    - ***max_epoch***. [necessary for training] The maximum number of epochs to train.
-    - ***valid_times_per_epoch***. [optional for training, default: 1] Define how many times to conduct validation per epoch. Usually, we conduct validation after each epoch, but for a very large corpus, we'd better validate multiple times in case to miss the best state of our model. The default value is 1.
+    - ~~***valid_times_per_epoch***~~. [**deprecated**] Please use steps_per_validation instead.
+    - ***steps_per_validation***. [default: 10] Define how many steps does each validation take place. 
    - ***tokenizer***. [optional] Define tokenizer here. Currently, we support 'nltk' and 'jieba'. By default, 'nltk' for English and 'jieba' for Chinese.
 - **architecture**. Define the model architecture. The node is a list of layers (blocks) in block_zoo to represent a model. The supported layers of this toolkit are given in [block_zoo overview](https://microsoft.github.io/NeuronBlocks). 
    
@ -294,11 +297,13 @@ Question answer matching is a crucial subtask of the question answering problem,
    
     Model    | AUC 
     -------- | -------- 
-     CNN (WikiQA paper) | 0.735 
+     CNN (WikiQA paper) | 0.735
     CNN-Cnt (WikiQA paper) | 0.753 
     CNN (NeuronBlocks) | 0.747 
     BiLSTM (NeuronBlocks) | 0.767 
-     BiLSTM+Attn (NeuronBlocks) | 0.754 
+     BiLSTM+Attn (NeuronBlocks) | 0.754
+     [ARC-I](https://arxiv.org/abs/1503.03244) (NeuronBlocks) | 0.7508
+     [ARC-II](https://arxiv.org/abs/1503.03244) (NeuronBlocks) | 0.7612
     [MatchPyramid](https://arxiv.org/abs/1602.06359) (NeuronBlocks) | 0.763
     BiLSTM+Match Attention (NeuronBlocks) | 0.786

@ -457,7 +462,7 @@ This task is to train a query regression model to learn from a heavy teacher mod
    3. Calculate AUC metric
    ```bash
    cd PROJECT_ROOT
-    python tools/calculate_AUC.py --input_file models/kdqbc_bilstmattn_cnn/train/predict.tsv --predict_index 2 --label_index 1 
+    python tools/calculate_auc.py --input_file models/kdqbc_bilstmattn_cnn/train/predict.tsv --predict_index 2 --label_index 1 
    ```
    
     *Tips: you can try different models by running different JSON config files.*
@ -501,7 +506,7 @@ This task is to train a query-passage regression model to learn from a heavy tea
    3. Calculate AUC metric
    ```bash
    cd PROJECT_ROOT
-    python tools/calculate_AUC.py --input_file=models/kdtm_match_linearAttn/predict.tsv --predict_index=3 --label_index=2 
+    python tools/calculate_auc.py --input_file=models/kdtm_match_linearAttn/predict.tsv --predict_index=3 --label_index=2 
    ```
    
     *Tips: you can try different models by running different JSON config files.*
@ -562,7 +567,58 @@ Here is an example using Chinese data, for text matching task.
    ```
     *Tips: you can try different models by running different JSON config files. The model file and train log file can be found in JOSN config file's outputs/save_base_dir after you finish training.*

+### <span id="task-9">Task 9: Sequence Labeling</span>
+Sequence Labeling is an important NLP task, which includes NER, Slot Tagging, Pos Tagging, etc.

+- ***Dataset***
+
+    [CoNLL 2003](https://www.clips.uantwerpen.be/conll2003/) is a popular dataset in Sequence Labeling task. We use CoNLL 2003 English NER data for our experiment and you can refer the data format in [sample data](https://github.com/microsoft/NeuronBlocks/tree/master/dataset/slot_tagging/conll_2003).
+    
+- ***Tagging Scheme***
+    
+    - NeuronBlocks support both BIO and BIOES tag schemes.
+    - The IOB scheme is not supported, because of its worse performance in most [experiment](https://arxiv.org/pdf/1707.06799.pdf).
+    - NeuronBlocks provides a [script](tools/tagging_schemes_converter.py) that converts the tag scheme among IOB/BIO/BIOES (NOTE: the script only supports tsv file which has data and label in two columns).
+
+- ***Usages***
+
+    1. Softmax output.
+    ```bash
+    # train model
+    cd PROJECT_ROOT
+    python train.py --conf_path=model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging.json
+    
+    # test model
+    cd PROJECT_ROOT
+    python test.py --conf_path=model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging.json
+    ``` 
+    2. CRF output.
+    ```bash
+    # train model
+    cd PROJECT_ROOT
+    python train.py --conf_path=model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_ccnn_wlstm_crf.json
+    
+    # test model
+    cd PROJECT_ROOT
+    python test.py --conf_path=model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_ccnn_wlstm_crf.json
+    ```
+    *Tips: you can try more model in [here](https://github.com/microsoft/NeuronBlocks/tree/master/model_zoo/nlp_tasks/slot_tagging).*
+    
+- ***Result***
+    
+    The result on CoNLL 2003 English NER dataset.
+    
+    Model    | F1-score 
+    -------- | -------- 
+    [Ma and Hovy(2016)](https://arxiv.org/pdf/1603.01354.pdf)|87.00
+    [BiLSTM+Softmax](https://github.com/microsoft/NeuronBlocks/blob/master/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging.json) (NeuronBlocks)|88.50
+    [Lample et al.(2016)](https://arxiv.org/pdf/1603.01360.pdf)| 89.15
+    [CLSTM+WLSTM+CRF](https://github.com/microsoft/NeuronBlocks/blob/master/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_clstm_wlstm_crf.json) (NeuronBlocks)|90.83
+    [Chiu and Nichols(2016)](https://www.mitpressjournals.org/doi/pdf/10.1162/tacl_a_00104)|90.91
+    [CCNN+WLSTM+CRF](https://github.com/microsoft/NeuronBlocks/blob/master/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_ccnn_wlstm_crf.json) (NeuronBlocks)|91.38
+    
+    *Tips: C means Char and W means Word. CCNN means Char-level representation with CNN model and CLSTM means Char-level representation with LSTM model.*
+    
 ## <span id="advanced-usage">Advanced Usage</span>

 After building a model, the next goal is to train a model with good performance. It depends on a highly expressive model and tricks of the model training. NeuronBlocks provides some tricks of model training.
@ -675,5 +731,7 @@ To solve the above problems, NeuronBlocks supports *fixing embedding weight* (em
    
    ***training_params/vocabulary/max_vocabulary***. [int, optional for training, default: 800,000] The max size of corpus vocabulary. If corpus vocabulary size is larger than *max_vocabulary*, it will be cut according to word frequency.

+    ***training_params/vocabulary/max_building_lines***. [int, optional for training, default: 1,000,000] The max lines NB will read from every file to build vocabulary
+
 ## <span id="faq">Frequently Asked Questions</span>

--- a/Tutorial_zh_CN.md
+++ b/Tutorial_zh_CN.md
@ -1,4 +1,4 @@
-# ***NeuronBlocks*** 教程
+# <img src="https://i.imgur.com/YLrkvW3.png" width="80">  ***NeuronBlocks*** 教程

 [English Version](Tutorial.md)

@ -21,6 +21,7 @@
        4. [机器阅读理解模型的模型压缩](#task-6.4)
    * [任务 7: 中文情感分析](#task-7)
    * [任务 8：中文文本匹配](#task-8)
+    * [任务 9：序列标注](#task-9)
 * [高阶用法](#advanced-usage)
    * [额外的feature](#extra-feature)
    * [学习率衰减](#lr-decay)
@ -136,10 +137,12 @@ python predict.py --conf_path=model_zoo/demo/conf.json
        CUDA_VISIBLE_DEVICES= python train.py
        ```
    - ***cpu_num_workers***. [default: -1] Define the number of processes to preprocess the dataset. The number of processes is equal to that of logical cores CPU supports if value is negtive or 0, otherwise it is equal to *cpu_num_workers*.
+    - ***chunk_size***. [default: 1000000] Define the chunk size of files that NB reads every time for avoiding out of memory and the mechanism of lazy-loading.
    - ***batch_size***. Define the batch size here. If there are multiple GPUs, *batch_size* is the batch size of each GPU.
    - ***batch_num_to_show_results***. [necessary for training] During the training process, show the results every batch_num_to_show_results batches.
    - ***max_epoch***. [necessary for training] The maximum number of epochs to train.
-    - ***valid_times_per_epoch***. [optional for training, default: 1] Define how many times to conduct validation per epoch. Usually, we conduct validation after each epoch, but for a very large corpus, we'd better validate multiple times in case to miss the best state of our model. The default value is 1.
+    - ~~***valid_times_per_epoch***~~. [**deprecated**] Please use steps_per_validation instead.
+    - ***steps_per_validation***. [default: 10] Define how many steps does each validation take place. 
    - ***tokenizer***. [optional] Define tokenizer here. Currently, we support 'nltk' and 'jieba'. By default, 'nltk' for English and 'jieba' for Chinese.
 - **architecture**. Define the model architecture. The node is a list of layers (blocks) in block_zoo to represent a model. The supported layers of this toolkit are given in [block_zoo overview](https://microsoft.github.io/NeuronBlocks). 
    
@ -288,6 +291,8 @@ Question answer matching is a crucial subtask of the question answering problem,
     CNN (NeuronBlocks) | 0.747 
     BiLSTM (NeuronBlocks) | 0.767 
     BiLSTM+Attn (NeuronBlocks) | 0.754 
+    [ARC-I](https://arxiv.org/abs/1503.03244) (NeuronBlocks) | 0.7508
+    [ARC-II](https://arxiv.org/abs/1503.03244) (NeuronBlocks) | 0.7612
    [MatchPyramid](https://arxiv.org/abs/1602.06359) (NeuronBlocks) | 0.763
     BiLSTM+Match Attention (NeuronBlocks) | 0.786

@ -446,7 +451,7 @@ This task is to train a query regression model to learn from a heavy teacher mod
    3. Calculate AUC metric
    ```bash
    cd PROJECT_ROOT
-    python tools/calculate_AUC.py --input_file models/kdqbc_bilstmattn_cnn/train/predict.tsv --predict_index 2 --label_index 1 
+    python tools/calculate_auc.py --input_file models/kdqbc_bilstmattn_cnn/train/predict.tsv --predict_index 2 --label_index 1 
    ```
    
     *Tips: you can try different models by running different JSON config files.*
@ -490,7 +495,7 @@ This task is to train a query-passage regression model to learn from a heavy tea
    3. Calculate AUC metric
    ```bash
    cd PROJECT_ROOT
-    python tools/calculate_AUC.py --input_file=models/kdtm_match_linearAttn/predict.tsv --predict_index=3 --label_index=2 
+    python tools/calculate_auc.py --input_file=models/kdtm_match_linearAttn/predict.tsv --predict_index=3 --label_index=2 
    ```
    
     *Tips: you can try different models by running different JSON config files.*
@ -552,6 +557,58 @@ This task is to train a query-passage regression model to learn from a heavy tea
    ```
     *提示：您可以通过运行不同的JSON配置文件来尝试不同的模型。当训练完成后，模型文件和训练日志文件可以在JSON配置的outputs/save_base_dir目录中找到。*

+### <span id="task-9">任务 9: 序列标注</span>
+序列标注是一项重要的NLP任务，包括 NER, Slot Tagging, Pos Tagging 等任务。
+
+- ***数据集***
+
+    在序列标注任务中，[CoNLL 2003](https://www.clips.uantwerpen.be/conll2003/)是一个很常用的数据集。在我们的序列标注任务中，使用 CoNLL 2003 中英文 NER 数据作为实验数据，其中数据格式可以参考我们给出的[抽样数据](https://github.com/microsoft/NeuronBlocks/tree/master/dataset/slot_tagging/conll_2003)。
+    
+- ***标注策略***
+
+    - NeuronBlocks 支持 BIO 和 BIOES 标注策略。
+    - IOB 标注标注是不被支持的，因为在大多[实验](https://arxiv.org/pdf/1707.06799.pdf)中它具有很差的表现。
+    - NeuronBlocks 提供一个在不同标注策略(IOB/BIO/BIOES)中的[转化脚本](tools/tagging_schemes_converter.py)(脚本仅支持具有 数据和标签 的两列tsv文件输入)。
+
+- ***用法***
+
+    1. Softmax 输出.
+    ```bash
+    # train model
+    cd PROJECT_ROOT
+    python train.py --conf_path=model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging.json
+    
+    # test model
+    cd PROJECT_ROOT
+    python test.py --conf_path=model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging.json
+    ``` 
+    2. CRF 输出.
+    ```bash
+    # train model
+    cd PROJECT_ROOT
+    python train.py --conf_path=model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_ccnn_wlstm_crf.json
+    
+    # test model
+    cd PROJECT_ROOT
+    python test.py --conf_path=model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_ccnn_wlstm_crf.json
+    ```
+    *提示 ：尝试更多模型可 [点击](https://github.com/microsoft/NeuronBlocks/tree/master/model_zoo/nlp_tasks/slot_tagging)。*
+    
+- ***结果***
+
+    实验采用 CoNLL 2003 英文 NER 数据集。
+    
+    Model    | F1-score 
+    -------- | -------- 
+    [Ma and Hovy(2016)](https://arxiv.org/pdf/1603.01354.pdf)|87.00
+    [BiLSTM+Softmax](https://github.com/microsoft/NeuronBlocks/blob/master/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging.json) (NeuronBlocks)|88.50
+    [Lample et al.(2016)](https://arxiv.org/pdf/1603.01360.pdf)| 89.15
+    [CLSTM+WLSTM+CRF](https://github.com/microsoft/NeuronBlocks/blob/master/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_clstm_wlstm_crf.json) (NeuronBlocks)|90.83
+    [Chiu and Nichols(2016)](https://www.mitpressjournals.org/doi/pdf/10.1162/tacl_a_00104)|90.91
+    [CCNN+WLSTM+CRF](https://github.com/microsoft/NeuronBlocks/blob/master/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_ccnn_wlstm_crf.json) (NeuronBlocks)|91.38
+    
+    *提示 : C 代表字符，W 代表单词。 CCNN 代表使用 CNN 模型的字符级别表示， CLSTM 代表使用 LSTM 模型的字符级别表示。*
+
 ## <span id="advanced-usage">高阶用法</span>

 After building a model, the next goal is to train a model with good performance. It depends on a highly expressive model and tricks of the model training. NeuronBlocks provides some tricks of model training.
@ -664,4 +721,6 @@ To solve the above problems, NeuronBlocks supports *fixing embedding weight* (em
    
    ***training_params/vocabulary/max_vocabulary***. [int, optional for training, default: 800,000] The max size of corpus vocabulary. If corpus vocabulary size is larger than *max_vocabulary*, it will be cut according to word frequency.

+    ***training_params/vocabulary/max_building_lines***. [int, optional for training, default: 1,000,000] The max lines NB will read from every file to build vocabulary
+
 ## <span id="faq">常见问题与答案</span>
--- a/autotest.sh
+++ b/autotest.sh
--- a/autotest/tools/get_results.py
+++ b/autotest/tools/get_results.py
@ -1,6 +1,3 @@
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# Licensed under the MIT license.
-
 import re
 from calculate_AUC import main

@ -10,24 +7,29 @@ task_dir = ['/20_newsgroup_bilstm_attn', '/chinese_text_matching', '/question_pa
 results = {'english_text_matching': [0.96655], 'chinese_text_matching': [0.70001], 'quora_question_pairs': [0.72596], 'knowledge_distillation': [0.66329]}
 for each_dir, key in zip(task_dir, results.keys()):
    target_dir = base_dir + each_dir
-    with open(target_dir + '/train_autotest.log', 'r') as f_r:
-        last_line = f_r.readlines()[-1].strip()
-        score = ''.join(re.findall(r'(?<=accuracy:).*?(?=loss|;)', last_line))
-        try:
-            results[key].append(float(score))
-        except:
-            results[key].append('wrong')
-            print ('GPU test. Wrong number in %s/train_autotest.log' %target_dir)
-
-    with open(target_dir + '/test_autotest.log', 'r') as f_r:
-        last_line = f_r.readlines()[-1].strip()
-        score = ''.join(re.findall(r'(?<=accuracy:).*?(?=loss|;)', last_line))
-        try:
-            results[key].append(float(score))
-        except:
-            results[key].append('wrong')
-            print ('CPU test. Wrong number in %s/test_autotest.log' %target_dir)
+    try:
+        with open(target_dir + '/train_autotest.log', 'r') as f_r:
+            last_line = f_r.readlines()[-1].strip()
+            score = ''.join(re.findall(r'(?<=accuracy:).*?(?=loss|;)', last_line))
+            try:
+                results[key].append(float(score))
+            except:
+                results[key].append('wrong number in train log')
+                print ('GPU test. Wrong number in %s/train_autotest.log' %target_dir)
+    except:
+        results[key].append('no train log')

+    try:
+        with open(target_dir + '/test_autotest.log', 'r') as f_r:
+            last_line = f_r.readlines()[-1].strip()
+            score = ''.join(re.findall(r'(?<=accuracy:).*?(?=loss|;)', last_line))
+            try:
+                results[key].append(float(score))
+            except:
+                results[key].append('wrong number in test log')
+                print ('CPU test. Wrong number in %s/test_autotest.log' %target_dir)
+    except:
+        results[key].append('no test log')

 # for kdtm_match_linearAttn task, we use calculate_AUC.main()
 params = {'input_file': './autotest/models/kdtm_match_linearAttn/predict.tsv', 'predict_index': '3', 'label_index': '2', 'header': False}
--- a/block_zoo/BaseLayer.py
+++ b/block_zoo/BaseLayer.py
@ -154,6 +154,11 @@ class BaseConf(ABC):
        # To check if deepcopy is applied
        assert id(self.output_dim) != id(self.input_dims[0]), 'Please use copy.deepcopy to copy the input_dim to output_dim'

+    def verify_former_block(self, former_conf):
+        """check if has special attribute rely on former layer
+
+        """
+        return True

    def add_attr_type_assertion(self, attr, specified_type):
        """ check if the types of attributes are legal
--- a/block_zoo/CRF.py
+++ b/block_zoo/CRF.py
@ -0,0 +1,244 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT license.
+
+from block_zoo.BaseLayer import BaseLayer, BaseConf
+from utils.DocInherit import DocInherit
+import torch
+import torch.nn as nn
+from copy import deepcopy
+import torch.autograd as autograd
+
+
+def argmax(vec):
+    # return the argmax as a python int
+    _, idx = torch.max(vec, 1)
+    return idx.item()
+
+
+def log_sum_exp(vec, m_size):
+    """
+    calculate log of exp sum
+    args:
+        vec (batch_size, vanishing_dim, hidden_dim) : input tensor
+        m_size : hidden_dim
+    return:
+        batch_size, hidden_dim
+    """
+    _, idx = torch.max(vec, 1)  # B * 1 * M
+    max_score = torch.gather(vec, 1, idx.view(-1, 1, m_size)).view(-1, 1, m_size)  # B * M
+    return max_score.view(-1, m_size) + torch.log(torch.sum(torch.exp(vec - max_score.expand_as(vec)), 1)).view(-1, m_size)  # B * M
+
+
+class CRFConf(BaseConf):
+    """
+    Configuration of CRF layer
+
+    Args:
+
+    """
+    def __init__(self, **kwargs):
+        super(CRFConf, self).__init__(**kwargs)
+
+    @DocInherit
+    def default(self):
+        self.START_TAG = "<start>"
+        self.STOP_TAG = "<eos>"
+
+    @DocInherit
+    def declare(self):
+        self.num_of_inputs = 1
+        self.input_ranks = [3]
+
+    @DocInherit
+    def inference(self):
+        self.output_dim = [1]
+        # add target dict judgement start or end
+        self.target_dict = deepcopy(self.target_dict.cell_id_map)
+        if not self.target_dict.get(self.START_TAG):
+            self.target_dict[self.START_TAG] = len(self.target_dict)
+        if not self.target_dict.get(self.STOP_TAG):
+            self.target_dict[self.STOP_TAG] = len(self.target_dict)
+
+        super(CRFConf, self).inference()
+
+    @DocInherit
+    def verify(self):
+        super(CRFConf, self).verify()
+
+
+class CRF(BaseLayer):
+    """ Conditional Random Field layer
+
+    Args:
+        layer_conf(CRFConf): configuration of CRF layer
+    """
+    def __init__(self, layer_conf):
+        super(CRF, self).__init__(layer_conf)
+        self.target_size = len(self.layer_conf.target_dict)
+
+        init_transitions = torch.zeros(self.target_size, self.target_size)
+        init_transitions[:, self.layer_conf.target_dict[self.layer_conf.START_TAG]] = -10000.0
+        init_transitions[self.layer_conf.target_dict[self.layer_conf.STOP_TAG], :] = -10000.0
+        init_transitions[:, 0] = -10000.0
+        init_transitions[0, :] = -10000.0
+
+        if self.layer_conf.use_gpu:
+            init_transitions = init_transitions.cuda()
+        self.transitions = nn.Parameter(init_transitions)
+
+    def _calculate_forward(self, feats, mask):
+        """
+            input:
+                feats: (batch, seq_len, self.tag_size)
+                masks: (batch, seq_len)
+        """
+        batch_size = feats.size(0)
+        seq_len = feats.size(1)
+        tag_size = feats.size(2)
+
+        mask = mask.transpose(1, 0).contiguous()
+        ins_num = seq_len * batch_size
+        # be careful the view shape, it is .view(ins_num, 1, tag_size) but not .view(ins_num, tag_size, 1)
+        feats = feats.transpose(1, 0).contiguous().view(ins_num, 1, tag_size).expand(ins_num, tag_size, tag_size)
+        # need to consider start
+        scores = feats + self.transitions.view(1, tag_size, tag_size).expand(ins_num, tag_size, tag_size)
+        scores = scores.view(seq_len, batch_size, tag_size, tag_size)
+        # build iter
+        seq_iter = enumerate(scores)
+        _, inivalues = next(seq_iter)  # bat_size * from_target_size * to_target_size
+        # only need start from start_tag
+        partition = inivalues[:, self.layer_conf.target_dict[self.layer_conf.START_TAG], :].clone().view(batch_size, tag_size, 1)  # bat_size * to_target_size
+
+        for idx, cur_values in seq_iter:
+            # previous to_target is current from_target
+            # partition: previous results log(exp(from_target)), #(batch_size * from_target)
+            # cur_values: bat_size * from_target * to_target
+
+            cur_values = cur_values + partition.contiguous().view(batch_size, tag_size, 1).expand(batch_size, tag_size, tag_size)
+            cur_partition = log_sum_exp(cur_values, tag_size)
+
+            # (bat_size * from_target * to_target) -> (bat_size * to_target)
+            # partition = utils.switch(partition, cur_partition, mask[idx].view(bat_size, 1).expand(bat_size, self.tagset_size)).view(bat_size, -1)
+            mask_idx = mask[idx, :].view(batch_size, 1).expand(batch_size, tag_size)
+
+            # effective updated partition part, only keep the partition value of mask value = 1
+            masked_cur_partition = cur_partition.masked_select(mask_idx)
+            # let mask_idx broadcastable, to disable warning
+            mask_idx = mask_idx.contiguous().view(batch_size, tag_size, 1)
+
+            # replace the partition where the maskvalue=1, other partition value keeps the same
+            partition.masked_scatter_(mask_idx, masked_cur_partition)
+        # until the last state, add transition score for all partition (and do log_sum_exp) then select the value in STOP_TAG
+        cur_values = self.transitions.view(1, tag_size, tag_size).expand(batch_size, tag_size, tag_size) + partition.contiguous().view(batch_size, tag_size, 1).expand(batch_size, tag_size, tag_size)
+        cur_partition = log_sum_exp(cur_values, tag_size)
+        final_partition = cur_partition[:, self.layer_conf.target_dict[self.layer_conf.STOP_TAG]]
+        return final_partition.sum(), scores
+
+    def _viterbi_decode(self, feats, mask):
+        """
+            input:
+                feats: (batch, seq_len, self.tag_size)
+                mask: (batch, seq_len)
+            output:
+                decode_idx: (batch, seq_len) decoded sequence
+                path_score: (batch, 1) corresponding score for each sequence
+        """
+        batch_size = feats.size(0)
+        seq_len = feats.size(1)
+        tag_size = feats.size(2)
+
+        # calculate sentence length for each sentence
+        length_mask = torch.sum(mask.long(), dim=1).view(batch_size, 1).long()
+        # mask to (seq_len, batch_size)
+        mask = mask.transpose(1, 0).contiguous()
+        ins_num = seq_len * batch_size
+        # be careful the view shape, it is .view(ins_num, 1, tag_size) but not .view(ins_num, tag_size, 1)
+        feats = feats.transpose(1, 0).contiguous().view(ins_num, 1, tag_size).expand(ins_num, tag_size, tag_size)
+        # need to consider start
+        scores = feats + self.transitions.view(1, tag_size, tag_size).expand(ins_num, tag_size, tag_size)
+        scores = scores.view(seq_len, batch_size, tag_size, tag_size)
+
+        # build iter
+        seq_iter = enumerate(scores)
+        # record the position of best score
+        back_points = list()
+        partition_history = list()
+        #  reverse mask (bug for mask = 1- mask, use this as alternative choice)
+        mask = (1 - mask.long()).byte()
+        _, inivalues = next(seq_iter)  # bat_size * from_target_size * to_target_size
+        # only need start from start_tag
+        partition = inivalues[:, self.layer_conf.target_dict[self.layer_conf.START_TAG], :].clone().view(batch_size, tag_size)  # bat_size * to_target_size
+        # print "init part:",partition.size()
+        partition_history.append(partition)
+        # iter over last scores
+        for idx, cur_values in seq_iter:
+            # previous to_target is current from_target
+            # partition: previous results log(exp(from_target)), #(batch_size * from_target)
+            # cur_values: batch_size * from_target * to_target
+            cur_values = cur_values + partition.contiguous().view(batch_size, tag_size, 1).expand(batch_size, tag_size, tag_size)
+            partition, cur_bp = torch.max(cur_values, 1)
+            partition_history.append(partition)
+            # cur_bp: (batch_size, tag_size) max source score position in current tag
+            # set padded label as 0, which will be filtered in post processing
+            cur_bp.masked_fill_(mask[idx].view(batch_size, 1).expand(batch_size, tag_size), 0)
+            back_points.append(cur_bp)
+        # add score to final STOP_TAG
+        partition_history = torch.cat(partition_history, 0).view(seq_len, batch_size, -1).transpose(1, 0).contiguous() # (batch_size, seq_len. tag_size)
+        # get the last position for each setences, and select the last partitions using gather()
+        last_position = length_mask.view(batch_size, 1, 1).expand(batch_size, 1, tag_size) - 1
+        last_partition = torch.gather(partition_history, 1, last_position).view(batch_size,tag_size,1)
+        # calculate the score from last partition to end state (and then select the STOP_TAG from it)
+        last_values = last_partition.expand(batch_size, tag_size, tag_size) + self.transitions.view(1, tag_size, tag_size).expand(batch_size, tag_size, tag_size)
+        _, last_bp = torch.max(last_values, 1)
+        pad_zero = autograd.Variable(torch.zeros(batch_size, tag_size)).long()
+        if self.layer_conf.use_gpu:
+            pad_zero = pad_zero.cuda()
+        back_points.append(pad_zero)
+        back_points = torch.cat(back_points).view(seq_len, batch_size, tag_size)
+
+        # select end ids in STOP_TAG
+        pointer = last_bp[:, self.layer_conf.target_dict[self.layer_conf.STOP_TAG]]
+        insert_last = pointer.contiguous().view(batch_size, 1, 1).expand(batch_size, 1, tag_size)
+        back_points = back_points.transpose(1, 0).contiguous()
+        # move the end ids(expand to tag_size) to the corresponding position of back_points to replace the 0 values
+        back_points.scatter_(1, last_position, insert_last)
+        back_points = back_points.transpose(1, 0).contiguous()
+        # decode from the end, padded position ids are 0, which will be filtered if following evaluation
+        decode_idx = autograd.Variable(torch.LongTensor(seq_len, batch_size))
+        if self.layer_conf.use_gpu:
+            decode_idx = decode_idx.cuda()
+        decode_idx[-1] = pointer.detach()
+        for idx in range(len(back_points)-2, -1, -1):
+            pointer = torch.gather(back_points[idx], 1, pointer.contiguous().view(batch_size, 1))
+            decode_idx[idx] = pointer.detach().view(batch_size)
+        path_score = None
+        decode_idx = decode_idx.transpose(1, 0)
+        return path_score, decode_idx
+
+    def forward(self, string, string_len):
+        """
+        CRF layer process: include use transition matrix compute score and  viterbi decode
+
+        Args:
+            string(Tensor): [batch_size, seq_len, target_num]
+            string_len(Tensor): [batch_size]
+
+        Returns:
+            score: the score by CRF inference
+            best_path: the best bath of viterbi decode
+        """
+        assert string_len is not None, "CRF layer need string length for mask."
+        masks = []
+        string_len_val = string_len.cpu().data.numpy()
+        for i in range(len(string_len)):
+            masks.append(
+                torch.cat([torch.ones(string_len_val[i]), torch.zeros(string.shape[1] - string_len_val[i])]))
+        masks = torch.stack(masks).view(string.shape[0], string.shape[1]).byte()
+        if self.layer_conf.use_gpu:
+            masks = masks.cuda()
+
+        forward_score, scores = self._calculate_forward(string, masks)
+
+        _, tag_seq = self._viterbi_decode(string, masks)
+
+        return (forward_score, scores, masks, tag_seq, self.transitions, self.layer_conf), string_len
--- a/block_zoo/Conv.py
+++ b/block_zoo/Conv.py
@ -35,6 +35,9 @@ class ConvConf(BaseConf):
        self.output_channel_num = 16
        self.batch_norm = True
        self.activation = 'ReLU'
+        self.padding_type = 'VALID'
+        self.dropout = 0
+        self.remind_lengths = True

    @DocInherit
    def declare(self):
@ -43,9 +46,16 @@ class ConvConf(BaseConf):

    @DocInherit
    def inference(self):
+
+        if self.padding_type == 'SAME':
+            self.padding = int((self.window_size-1)/2)
+
        self.output_dim = [-1]
        if self.input_dims[0][1] != -1:
-            self.output_dim.append((self.input_dims[0][1] - self.window_size) // self.stride + 1)
+            if self.padding_type == 'SAME':
+                self.output_dim.append(self.input_dims[0][1])
+            else:
+                self.output_dim.append((self.input_dims[0][1] - self.window_size) // self.stride + 1)
        else:
            self.output_dim.append(-1)
        self.output_dim.append(self.output_channel_num)
@ -67,6 +77,13 @@ class ConvConf(BaseConf):
        for attr in necessary_attrs_for_user:
            self.add_attr_exist_assertion_for_user(attr)

+    @DocInherit
+    def verify_former_block(self, former_conf):
+        if 'conv' in str(type(former_conf)).lower():
+            self.mask = False
+        else:
+            self.mask = True
+

 class Conv(BaseLayer):
    """ Convolution along just 1 direction
@ -82,16 +99,29 @@ class Conv(BaseLayer):
        else:
            self.activation = None

-        self.filters = nn.ParameterList([nn.Parameter(torch.randn(layer_conf.output_channel_num,
-            layer_conf.input_channel_num, layer_conf.window_size, layer_conf.input_dims[0][-1],
-            requires_grad=True).float())])
+        self.conv = nn.Conv1d(layer_conf.input_dims[0][-1], layer_conf.output_channel_num, kernel_size=layer_conf.window_size, padding=layer_conf.padding)

        if layer_conf.batch_norm:
-            self.batch_norm = nn.BatchNorm2d(layer_conf.output_channel_num)    # the output_chanel of Conv is the input_channel of BN
+            # self.batch_norm = nn.BatchNorm2d(layer_conf.output_channel_num)    # the output_chanel of Conv is the input_channel of BN
+            self.batch_norm = nn.BatchNorm1d(layer_conf.output_channel_num)
        else:
            self.batch_norm = None

-    def forward(self, string, string_len=None):
+        if layer_conf.dropout > 0:
+            self.cov_dropout = nn.Dropout(layer_conf.dropout)
+        else:
+            self.cov_dropout = None
+
+        if layer_conf.use_gpu:
+            self.conv = self.conv.cuda()
+            if self.batch_norm:
+                self.batch_norm = self.batch_norm.cuda()
+            if self.cov_dropout:
+                self.cov_dropout = self.cov_dropout.cuda()
+            if self.activation:
+                self.activation = self.activation.cuda()
+
+    def forward(self, string, string_len):
        """ process inputs

        Args:
@ -102,7 +132,7 @@ class Conv(BaseLayer):
            Tensor: shape: [batch_size, (seq_len - conv_window_size) // stride + 1, output_channel_num]

        """
-        if string_len is not None:
+        if string_len is not None and self.layer_conf.mask:
            string_len_val = string_len.cpu().data.numpy()
            masks = []
            for i in range(len(string_len)):
@ -113,17 +143,21 @@ class Conv(BaseLayer):
                masks = masks.to(device)
            string = string * masks

-        string = torch.unsqueeze(string, 1)     # [batch_size, input_channel_num=1, seq_len, feature_dim]
-        string_out = F.conv2d(string, self.filters[0], stride=self.layer_conf.stride, padding=self.layer_conf.padding)
-        if hasattr(self, 'batch_norms') and self.batch_norm:
-            string_out = self.batch_norm(string_out)
-
-        string_out = torch.squeeze(string_out, 3).permute(0, 2, 1)
+        string_ = string.transpose(2, 1).contiguous()
+        string_out = self.conv(string_)

        if self.activation:
            string_out = self.activation(string_out)
-        if string_len is not None:
-            string_len_out = (string_len - self.layer_conf.window_size) // self.layer_conf.stride + 1
-        else:
-            string_len_out = None
+
+        if self.cov_dropout:
+            string_out = self.cov_dropout(string_out)
+
+        if self.batch_norm:
+            string_out = self.batch_norm(string_out)
+
+        string_out = string_out.transpose(2, 1).contiguous()
+
+        string_len_out = None
+        if string_len is not None and self.layer_conf.remind_lengths:
+            string_len_out = string_len
        return string_out, string_len_out
--- a/block_zoo/Embedding.py
+++ b/block_zoo/Embedding.py
@ -67,7 +67,10 @@ class EmbeddingConf(BaseConf):
        for emb_type in self.conf:
            if emb_type == 'position':
                continue
-            self.output_dim[2] += self.conf[emb_type]['dim']
+            if isinstance(self.conf[emb_type]['dim'], list):
+                self.output_dim[2] += sum(self.conf[emb_type]['dim'])
+            else:
+                self.output_dim[2] += self.conf[emb_type]['dim']

        super(EmbeddingConf, self).inference()

@ -114,6 +117,7 @@ class Embedding(BaseLayer):
        self.layer_conf = layer_conf

        self.embeddings = nn.ModuleDict() if layer_conf.weight_on_gpu else dict()
+        self.char_embeddings = nn.ModuleDict()
        for input_cluster in layer_conf.conf:
            if 'type' in layer_conf.conf[input_cluster]:
                # char embedding
@ -123,7 +127,7 @@ class Embedding(BaseLayer):
                char_emb_conf = eval(layer_conf.conf[input_cluster]['type'] + "Conf")(** char_emb_conf_dict)
                char_emb_conf.inference()
                char_emb_conf.verify()
-                self.embeddings[input_cluster] = eval(layer_conf.conf[input_cluster]['type'])(char_emb_conf)
+                self.char_embeddings[input_cluster] = eval(layer_conf.conf[input_cluster]['type'])(char_emb_conf)
            else:
                # word embedding, postag embedding, and so on
                self.embeddings[input_cluster] = nn.Embedding(layer_conf.conf[input_cluster]['vocab_size'], layer_conf.conf[input_cluster]['dim'], padding_idx=0)
@ -135,7 +139,6 @@ class Embedding(BaseLayer):
                    self.embeddings[input_cluster].weight.requires_grad = False
                    logging.info("The Embedding[%s][fix_weight] is true, fix the embeddings[%s]'s weight" % (input_cluster, input_cluster))

-
    def forward(self, inputs, use_gpu=False):
        """ process inputs

@ -157,14 +160,13 @@ class Embedding(BaseLayer):
            if 'extra' in input_cluster:
                continue
            input = inputs[input_cluster]
-            # if 'type' in self.layer_conf.conf[input_cluster]:
-            #     emb = self.embeddings[input_cluster](input, lengths[input]).float()
-            # else:
-            #     emb = self.embeddings[input_cluster](input).float()
-            if self.embeddings[input_cluster].weight.device.type == 'cpu':
-                emb = self.embeddings[input_cluster](input.cpu()).float()
+            if input_cluster == 'char':
+                emb = self.char_embeddings[input_cluster](input).float()
            else:
-                emb = self.embeddings[input_cluster](input).float()
+                if list(self.embeddings[input_cluster].parameters())[0].device.type == 'cpu':
+                    emb = self.embeddings[input_cluster](input.cpu()).float()
+                else:
+                    emb = self.embeddings[input_cluster](input).float()
            if use_gpu is True:
                device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
                emb = emb.to(device)
--- a/block_zoo/Linear.py
+++ b/block_zoo/Linear.py
@ -32,6 +32,7 @@ class LinearConf(BaseConf):
        self.activation = 'PReLU'
        self.last_hidden_activation = True
        self.last_hidden_softmax = False
+        self.keep_dim = True       # for exmaple if the output shape is [?, len, 1]. you want to squeeze it, set keep_dim=False, the the output shape is [?, len]

    @DocInherit
    def declare(self):
@ -42,10 +43,16 @@ class LinearConf(BaseConf):
    def inference(self):
        if isinstance(self.hidden_dim, int):
            self.output_dim = copy.deepcopy(self.input_dims[0])
-            self.output_dim[-1] = self.hidden_dim
+            if not self.keep_dim and self.hidden_dim == 1:
+                self.output_dim.pop()
+            else:
+                self.output_dim[-1] = self.hidden_dim
        elif isinstance(self.hidden_dim, list):
            self.output_dim = copy.deepcopy(self.input_dims[0])
-            self.output_dim[-1] = self.hidden_dim[-1]
+            if not self.keep_dim and self.hidden_dim[-1] == 1:
+                self.output_dim.pop()
+            else:
+                self.output_dim[-1] = self.hidden_dim[-1]

        super(LinearConf, self).inference()  # PUT THIS LINE AT THE END OF inference()

@ -87,6 +94,7 @@ class Linear(BaseLayer):
    def __init__(self, layer_conf):

        super(Linear, self).__init__(layer_conf)
+        self.layer_conf = layer_conf

        if layer_conf.input_ranks[0] == 3 and layer_conf.batch_norm is True:
            layer_conf.batch_norm = False
@ -139,6 +147,8 @@ class Linear(BaseLayer):
                masks = masks.to(device)
            string = string * masks
        string_out = self.linear(string.float())
+        if not self.layer_conf.keep_dim:
+            string_out = torch.squeeze(string_out, -1)
        return string_out, string_len


--- a/block_zoo/Pooling1D.py
+++ b/block_zoo/Pooling1D.py
@ -0,0 +1,104 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT license.
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+import numpy as np
+
+from block_zoo.BaseLayer import BaseLayer, BaseConf
+from utils.DocInherit import DocInherit
+
+
+class Pooling1DConf(BaseConf):
+    """
+
+    Args:
+        pool_type (str): 'max' or 'mean', default is 'max'.
+        stride (int): which axis to conduct pooling, default is 1.
+        padding (int): implicit zero paddings on both sides of the input. Can be a single number or a tuple (padH, padW). Default: 0
+        window_size (int): the size of the pooling
+
+    """
+
+    def __init__(self, **kwargs):
+        super(Pooling1DConf, self).__init__(**kwargs)
+
+    @DocInherit
+    def default(self):
+        self.pool_type = 'max'  # Supported: ['max', mean']
+        self.stride = 1
+        self.padding = 0
+        self.window_size = 3
+
+    @DocInherit
+    def declare(self):
+        self.num_of_inputs = 1
+        self.input_ranks = [3]
+
+
+    @DocInherit
+    def inference(self):
+
+        self.output_dim = [self.input_dims[0][0]]
+        if self.input_dims[0][1] != -1:
+            self.output_dim.append(
+                (self.input_dims[0][1] + 2 * self.padding - self.window_size) // self.stride + 1)
+        else:
+            self.output_dim.append(-1)
+
+        self.output_dim.append(self.input_dims[0][-1])
+        # DON'T MODIFY THIS
+        self.output_rank = len(self.output_dim)
+
+    @DocInherit
+    def verify(self):
+        super(Pooling1DConf, self).verify()
+
+        necessary_attrs_for_user = ['pool_type']
+        for attr in necessary_attrs_for_user:
+            self.add_attr_exist_assertion_for_user(attr)
+
+        self.add_attr_value_assertion('pool_type', ['max', 'mean'])
+
+        assert self.output_dim[
+                   -1] != -1, "The shape of input is %s , and the input channel number of pooling should not be -1." % (
+            str(self.input_dims[0]))
+
+
+class Pooling1D(BaseLayer):
+    """ Pooling layer
+
+    Args:
+        layer_conf (PoolingConf): configuration of a layer
+    """
+
+    def __init__(self, layer_conf):
+        super(Pooling1D, self).__init__(layer_conf)
+        self.pool = None
+        if layer_conf.pool_type == "max":
+            self.pool = nn.MaxPool1d(kernel_size=layer_conf.window_size, stride=layer_conf.stride,
+                                     padding=layer_conf.padding)
+        elif layer_conf.pool_type == "mean":
+            self.pool = nn.AvgPool1d(kernel_size=layer_conf.window_size, stride=layer_conf.stride,
+                                     padding=layer_conf.padding)
+
+    def forward(self, string, string_len=None):
+        """ process inputs
+
+        Args:
+            string (Tensor): tensor with shape: [batch_size, length, feature_dim]
+            string_len (Tensor): [batch_size], default is None.
+
+        Returns:
+            Tensor: Pooling result of string
+
+        """
+
+        string = string.permute([0, 2, 1]).contiguous()
+        string = self.pool(string)
+        string = string.permute([0, 2, 1]).contiguous()
+        return string, string_len
+
+
--- a/block_zoo/Pooling2D.py
+++ b/block_zoo/Pooling2D.py
@ -19,7 +19,6 @@ class Pooling2DConf(BaseConf):
        stride (int): which axis to conduct pooling, default is 1.
        padding (int): implicit zero paddings on both sides of the input. Can be a single number or a tuple (padH, padW). Default: 0
        window_size (int): the size of the pooling
-        activation (string): activation functions, e.g. ReLU

    """
    def __init__(self, **kwargs):
@ -30,7 +29,7 @@ class Pooling2DConf(BaseConf):
        self.pool_type = 'max'  # Supported: ['max', mean']
        self.stride = 1
        self.padding = 0
-        self.window_size = 3
+        # self.window_size = [self.input_dims[0][1], self.input_dims[0][2]]
        
    @DocInherit
    def declare(self):
@ -39,7 +38,7 @@ class Pooling2DConf(BaseConf):
    
    def check_size(self, value, attr):
        res = value
-        if isinstance(value,int):
+        if isinstance(value, int):
            res = [value, value]
        elif (isinstance(self.window_size, tuple) or isinstance(self.window_size, list)) and len(value)==2:
            res = list(value)
@ -49,6 +48,9 @@ class Pooling2DConf(BaseConf):
            
    @DocInherit
    def inference(self):
+
+        if not hasattr(self, "window_size"):
+            self.window_size = [self.input_dims[0][1], self.input_dims[0][2]]
        
        self.window_size = self.check_size(self.window_size, "window_size")
        self.stride = self.check_size(self.stride, "stride")
--- a/block_zoo/init.py
+++ b/block_zoo/init.py
@ -16,9 +16,13 @@ from .ConvPooling import ConvPooling, ConvPoolingConf
 from .Dropout import Dropout, DropoutConf

 from .Conv2D import Conv2D, Conv2DConf
+from .Pooling1D import Pooling1D, Pooling1DConf
 from .Pooling2D import Pooling2D, Pooling2DConf

 from .embedding import CNNCharEmbedding, CNNCharEmbeddingConf
+from .embedding import LSTMCharEmbedding, LSTMCharEmbeddingConf
+
+from .CRF import CRFConf, CRF

 from .attentions import FullAttention, FullAttentionConf
 from .attentions import Seq2SeqAttention, Seq2SeqAttentionConf
@ -49,4 +53,5 @@ from .EncoderDecoder import EncoderDecoder, EncoderDecoderConf

 from .normalizations import LayerNorm, LayerNormConf

-from .HighwayLinear import HighwayLinear, HighwayLinearConf
+from .HighwayLinear import HighwayLinear, HighwayLinearConf
+
--- a/block_zoo/attentions/Interaction.py
+++ b/block_zoo/attentions/Interaction.py
@ -58,7 +58,7 @@ class InteractionConf(BaseConf):
    def verify(self):
        super(InteractionConf, self).verify()
        assert hasattr(self, 'matching_type'), "Please define matching_type attribute of BiGRUConf in default() or the configuration file"
-        assert self.matching_type in ['general', 'dot', 'mul', 'plus', 'minus', 'add'], "Invalid `matching_type`{self.matching_type} received. Must be in `mul`, `general`, `plus`, `minus`, `dot` and `concat`."
+        assert self.matching_type in ['general', 'dot', 'mul', 'plus', 'minus', 'add', 'concat'], "Invalid `matching_type`{self.matching_type} received. Must be in `mul`, `general`, `plus`, `minus`, `dot` and `concat`."


 class Interaction(BaseLayer):
@ -120,7 +120,7 @@ class Interaction(BaseLayer):
                    return x - y
            elif self.matching_type == 'concat':
                def func(x, y):
-                    return torch.concat([x, y], axis=-1)
+                    return torch.cat([x, y], dim=-1)
            else:
                raise ValueError(f"Invalid matching type."
                                 f"{self.matching_type} received."
--- a/block_zoo/embedding/CNNCharEmbedding.py
+++ b/block_zoo/embedding/CNNCharEmbedding.py
@ -28,11 +28,11 @@ class CNNCharEmbeddingConf(BaseConf):

    @DocInherit
    def default(self):
-        self.dim = 30       # cnn's output channel dim
+        self.dim = [30]       # cnn's output channel dim
        self.embedding_matrix_dim = 30      #
-        self.stride = 1
+        self.stride = [1]
        self.padding = 0
-        self.window_size = 3
+        self.window_size = [3]
        self.activation = 'ReLU'

    @DocInherit
@ -41,8 +41,14 @@ class CNNCharEmbeddingConf(BaseConf):
        self.num_of_inputs = 1
        self.input_ranks = [3]

+    def change_to_list(self, attribute):
+        for single in attribute:
+            if not isinstance(getattr(self, single), list):
+                setattr(self, single, [getattr(self, single)])
+
    @DocInherit
    def inference(self):
+        self.change_to_list(['dim', 'stride', 'window_size'])
        self.output_channel_num = self.dim
        self.output_rank = 3

@ -65,20 +71,24 @@ class CNNCharEmbedding(BaseLayer):
        super(CNNCharEmbedding, self).__init__(layer_conf)
        self.layer_conf = layer_conf

+        assert len(layer_conf.dim) == len(layer_conf.window_size) == len(layer_conf.stride), "The attribute dim/window_size/stride must have the same length."
+
        self.char_embeddings = nn.Embedding(layer_conf.vocab_size, layer_conf.embedding_matrix_dim, padding_idx=self.layer_conf.padding)
        nn.init.uniform_(self.char_embeddings.weight, -0.001, 0.001)

-        self.filters = Variable(torch.randn(layer_conf.output_channel_num, layer_conf.input_channel_num,
-                                            layer_conf.window_size, layer_conf.embedding_matrix_dim).float(),
-                                requires_grad=True)
+        self.char_cnn = nn.ModuleList()
+        for i in range(len(layer_conf.output_channel_num)):
+            self.char_cnn.append(nn.Conv2d(1, layer_conf.output_channel_num[i], (layer_conf.window_size[i], layer_conf.embedding_matrix_dim),
+                                   stride=self.layer_conf.stride[i], padding=self.layer_conf.padding))
        if layer_conf.activation:
            self.activation = eval("nn." + self.layer_conf.activation)()
        else:
            self.activation = None
-        if self.is_cuda():
-            self.filters = self.filters.cuda()
-            if self.activation:
-                self.activation.weight = torch.nn.Parameter(self.activation.weight.cuda())
+        # if self.is_cuda():
+        #     self.char_embeddings = self.char_embeddings.cuda()
+        #     self.char_cnn = self.char_cnn.cuda()
+        #     if self.activation and hasattr(self.activation, 'weight'):
+        #         self.activation.weight = torch.nn.Parameter(self.activation.weight.cuda())

    def forward(self, string):
        """
@ -97,24 +107,29 @@ class CNNCharEmbedding(BaseLayer):

        """
        string_reshaped = string.view(string.size()[0], -1)     #[batch_size, seq_len * char num in words]
+
        char_embs_lookup = self.char_embeddings(string_reshaped).float()    # [batch_size, seq_len * char num in words, embedding_dim]
-        if self.is_cuda():
-            if self.filters.device == torch.device('cpu'):
-                self.filters = self.filters.cuda()
-            char_embs_lookup = char_embs_lookup.cuda(device=self.filters.device)
        char_embs_lookup = char_embs_lookup.view(-1, string.size()[2], self.layer_conf.embedding_matrix_dim)    #[batch_size * seq_len, char num in words, embedding_dim]

        string_input = torch.unsqueeze(char_embs_lookup, 1)   # [batch_size * seq_len, input_channel_num=1, char num in words, embedding_dim]

-        string_conv = F.conv2d(string_input, self.filters, stride=self.layer_conf.stride, padding=self.layer_conf.padding)    # [batch_size * seq_len, output_channel_num, char num in word related, 1]
-        string_conv = torch.squeeze(string_conv, 3).permute(0, 2, 1)      # [batch_size * seq_len, char num in word related, output_channel_num]
-        if self.activation:
-            string_conv = self.activation(string_conv)
+        outputs = []
+        for index, single_cnn in enumerate(self.char_cnn):
+            string_conv = single_cnn(string_input).squeeze(3)
+            if self.activation:
+                string_conv = self.activation(string_conv)

-        string_maxpooling = torch.max(string_conv, 1)[0]
-        string_out = string_maxpooling.view(string.size()[0], string.size()[1], -1)
+            string_maxpooling = F.max_pool1d(string_conv, string_conv.size(2)).squeeze()
+            string_out = string_maxpooling.view(string.size()[0], -1, self.layer_conf.output_channel_num[index])

-        return string_out.cpu()
+            outputs.append(string_out)
+
+        if len(outputs) > 1:
+            string_output = torch.cat(outputs, 2)
+        else:
+            string_output = outputs[0]
+
+        return string_output


 if __name__ == '__main__':
--- a/block_zoo/embedding/LSTMCharEmbedding.py
+++ b/block_zoo/embedding/LSTMCharEmbedding.py
@ -0,0 +1,137 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT license.
+
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.autograd import Variable
+
+import numpy as np
+from block_zoo.BaseLayer import BaseLayer, BaseConf
+from utils.DocInherit import DocInherit
+
+
+class LSTMCharEmbeddingConf(BaseConf):
+    """ Configuration of LSTMCharEmbedding
+
+    Args:
+        dim (int, optional): the dimension of character embedding after lstm. Default: 50
+        embedding_matrix_dim(int, optional): the dimension of character initialized embedding. Default: 30
+        padding(int, optional): Zero-padding added to both sides of the input. Default: 0
+        dropout(float, optional): dropout rate. Default: 0.2
+        bidirect_flag(Bool, optional): Using BiLSTM or not. Default: True
+    """
+    def __init__(self, **kwargs):
+        super(LSTMCharEmbeddingConf, self).__init__(**kwargs)
+
+    @DocInherit
+    def default(self):
+
+        self.dim = 50  # lstm's output channel dim
+        self.embedding_matrix_dim = 30
+        self.padding = 0
+        self.dropout = 0.2
+        self.bidirect_flag = True
+
+    @DocInherit
+    def declare(self):
+        #self.input_channel_num = 1
+        self.num_of_inputs = 1
+        self.input_ranks = [3]
+
+    @DocInherit
+    def inference(self):
+        #self.output_channel_num = self.hidden_dim
+        self.output_rank = 3
+
+    @DocInherit
+    def verify(self):
+        # super(LSTMCharEmbeddingConf, self).verify()
+
+        necessary_attrs_for_user = ['embedding_matrix_dim', 'dim', 'dropout', 'bidirect_flag', 'vocab_size']
+        for attr in necessary_attrs_for_user:
+            self.add_attr_exist_assertion_for_user(attr)
+
+
+class LSTMCharEmbedding(BaseLayer):
+    """
+    This layer implements the character embedding use LSTM
+    Args:
+        layer_conf (LSTMCharEmbeddingConf): configuration of LSTMCharEmbedding
+    """
+    def __init__(self, layer_conf):
+        super(LSTMCharEmbedding, self).__init__(layer_conf)
+        self.layer_conf = layer_conf
+
+        self.char_embeddings = nn.Embedding(layer_conf.vocab_size, layer_conf.embedding_matrix_dim, padding_idx=self.layer_conf.padding)
+        nn.init.uniform_(self.char_embeddings.weight, -0.001, 0.001)
+
+        if layer_conf.bidirect_flag:
+            self.dim = layer_conf.dim // 2
+        self.dropout = nn.Dropout(layer_conf.dropout)
+        self.char_lstm = nn.LSTM(layer_conf.embedding_matrix_dim, self.dim, num_layers=1, batch_first=True, bidirectional=layer_conf.bidirect_flag)
+
+        if self.is_cuda():
+            self.char_embeddings = self.char_embeddings.cuda()
+            self.dropout = self.dropout.cuda()
+            self.char_lstm = self.char_lstm.cuda()
+
+    def forward(self, string):
+        """
+        Step1: [batch_size, seq_len, char num in words] -> [batch_size*seq_len, char num in words]
+        Step2: lookup embedding matrix -> [batch_size*seq_len, char num in words, embedding_dim]
+        Step3: after lstm operation, got [num_layer* num_directions, batch_size * seq_len, dim]
+        Step5: reshape -> [batch_size, seq_len, dim]
+
+        Args:
+            string (Variable): [[char ids of word1], [char ids of word2], [...], ...], shape: [batch_size, seq_len, char num in words]
+
+        Returns:
+            Variable: [batch_size, seq_len, output_dim]
+
+        """
+        #print ('string shape: ', string.size())
+        string_reshaped = string.view(string.size()[0]*string.size()[1],  -1)     #[batch_size, seq_len * char num in words]
+
+        char_embs_lookup = self.char_embeddings(string_reshaped).float()    # [batch_size, seq_len * char num in words, embedding_dim]
+        char_embs_drop = self.dropout(char_embs_lookup)
+        char_hidden = None
+        char_rnn_out, char_hidden = self.char_lstm(char_embs_drop, char_hidden)
+        #print('char_hidden shape: ', char_hidden[0].size())
+        string_out = char_hidden[0].transpose(1,0).contiguous().view(string.size()[0], string.size()[1], -1)
+        #print('string_out shape: ', string_out.size())
+        return string_out
+
+
+if __name__ == '__main__':
+    conf = {
+        'embedding_matrix_dim': 30,
+        'dim': 30,  # lstm's output channel dim
+        'padding': 0,
+        'dropout': 0.2,
+        'bidirect_flag': True,
+
+        # should be infered from the corpus
+        'vocab_size': 10,
+        'input_dims': [5],
+        'input_ranks': [3],
+        'use_gpu': True
+    }
+    layer_conf = LSTMCharEmbeddingConf(**conf)
+
+    # make a fake input: [bs, seq_len, char num in words]
+    # assume in this batch, the padded sentence length is 3 and the each word has 5 chars, including padding 0.
+    input_chars = np.array([
+        [[3, 1, 2, 5, 4], [1, 2, 3, 4, 0], [0, 0, 0, 0, 0]],
+        [[1, 1, 0, 0, 0], [2, 3, 1, 0, 0], [1, 2, 3, 4, 5]]
+    ])
+
+    char_emb_layer = LSTMCharEmbedding(layer_conf)
+
+    input_chars = torch.LongTensor(input_chars)
+    output = char_emb_layer(input_chars)
+
+    print(output)
+
+
--- a/block_zoo/embedding/init.py
+++ b/block_zoo/embedding/init.py
@ -1,3 +1,4 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT license.
-from .CNNCharEmbedding import CNNCharEmbeddingConf, CNNCharEmbedding
+from .CNNCharEmbedding import CNNCharEmbeddingConf, CNNCharEmbedding
+from .LSTMCharEmbedding import LSTMCharEmbeddingConf, LSTMCharEmbedding
--- a/block_zoo/op/CalculateDistance.py
+++ b/block_zoo/op/CalculateDistance.py
@ -0,0 +1,97 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT license.
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import logging
+
+from ..BaseLayer import BaseConf, BaseLayer
+from utils.DocInherit import DocInherit
+from utils.exceptions import ConfigurationError
+import copy
+
+
+class CalculateDistanceConf(BaseConf):
+    """ Configuration of CalculateDistance Layer
+
+    Args:
+        operations (list):  a subset of ["cos", "euclidean", "manhattan", "chebyshev"].
+    """
+
+    # init the args
+    def __init__(self, **kwargs):
+        super(CalculateDistanceConf, self).__init__(**kwargs)
+
+    # set default params
+    @DocInherit
+    def default(self):
+        self.operations = ["cos", "euclidean", "manhattan", "chebyshev"]
+
+    @DocInherit
+    def declare(self):
+        self.num_of_inputs = 2
+        self.input_ranks = [2]
+
+    @DocInherit
+    def inference(self):
+        self.output_dim = copy.deepcopy(self.input_dims[0])
+        self.output_dim[-1] = 1
+
+        super(CalculateDistanceConf, self).inference()
+
+    @DocInherit
+    def verify(self):
+        super(CalculateDistanceConf, self).verify()
+
+        assert len(self.input_dims) == 2, "Operation requires that there should be two inputs"
+
+        # to check if the ranks of all the inputs are equal
+        rank_equal_flag = True
+        for i in range(len(self.input_ranks)):
+            if self.input_ranks[i] != self.input_ranks[0] or self.input_ranks[i] != 2:
+                rank_equal_flag = False
+                break
+        if rank_equal_flag == False:
+            raise ConfigurationError("For layer CalculateDistance, the ranks of each inputs should be equal and 2!")
+
+
+class CalculateDistance(BaseLayer):
+    """ CalculateDistance layer to calculate the distance of sequences(2D representation)
+
+    Args:
+        layer_conf (CalculateDistanceConf): configuration of a layer
+    """
+
+    def __init__(self, layer_conf):
+        super(CalculateDistance, self).__init__(layer_conf)
+        self.layer_conf = layer_conf
+
+
+    def forward(self, x, x_len, y, y_len):
+        """
+
+        Args:
+            x: [batch_size, dim]
+            x_len: [batch_size]
+            y: [batch_size, dim]
+            y_len: [batch_size]
+        Returns:
+            Tensor: [batch_size, 1], None
+
+        """
+
+        batch_size = x.size()[0]
+        if "cos" in self.layer_conf.operations:
+            result = F.cosine_similarity(x , y)
+        elif "euclidean" in self.layer_conf.operations:
+            result = torch.sqrt(torch.sum((x-y)**2, dim=1))
+        elif "manhattan" in self.layer_conf.operations:
+            result = torch.sum(torch.abs((x - y)), dim=1)
+        elif "chebyshev" in self.layer_conf.operations:
+            result = torch.abs((x - y)).max(dim=1)
+        else:
+            raise ConfigurationError("This operation is not supported!")
+
+        result = result.view(batch_size, 1)
+        return result, None
--- a/block_zoo/op/Combination.py
+++ b/block_zoo/op/Combination.py
@ -47,7 +47,6 @@ class CombinationConf(BaseConf):
            self.output_dim[-1] += int(np.mean([input_dim[-1] for input_dim in self.input_dims]))     # difference operation requires dimension of all the inputs should be equal
        if "dot_multiply" in self.operations:
            self.output_dim[-1] += int(np.mean([input_dim[-1] for input_dim in self.input_dims]))     # dot_multiply operation requires dimension of all the inputs should be equal
-
        super(CombinationConf, self).inference()

    @DocInherit
--- a/block_zoo/op/Expand_plus.py
+++ b/block_zoo/op/Expand_plus.py
@ -0,0 +1,76 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT license.
+
+# Come from http://www.hangli-hl.com/uploads/3/1/6/8/3168008/hu-etal-nips2014.pdf [ARC-II]
+
+import torch
+import torch.nn as nn
+import copy
+
+from block_zoo.BaseLayer import BaseLayer, BaseConf
+from utils.DocInherit import DocInherit
+from utils.exceptions import ConfigurationError
+
+class Expand_plusConf(BaseConf):
+    """Configuration for Expand_plus layer
+
+    """
+    def __init__(self, **kwargs):
+        super(Expand_plusConf, self).__init__(**kwargs)
+
+    @DocInherit
+    def default(self):
+        self.operation = 'Plus'
+
+    @DocInherit
+    def declare(self):
+        self.num_of_inputs = 2
+        self.input_ranks = [3, 3]
+
+    @DocInherit
+    def inference(self):
+        self.output_dim = copy.deepcopy(self.input_dims[0])
+        if self.input_dims[0][1] == -1 or self.input_dims[1][1] == -1:
+            raise ConfigurationError("For Expand_plus layer, the sequence length should be fixed")
+        self.output_dim.insert(2, self.input_dims[1][1])   # y_len
+        super(Expand_plusConf, self).inference()  # PUT THIS LINE AT THE END OF inference()
+
+    @DocInherit
+    def verify(self):
+        super(Expand_plusConf, self).verify()
+
+
+class Expand_plus(BaseLayer):
+    """  Expand_plus layer
+    Given sequences X and Y, put X and Y expand_dim, and then add.
+
+    Args:
+        layer_conf (Expand_plusConf): configuration of a layer
+
+    """
+    def __init__(self, layer_conf):
+
+        super(Expand_plus, self).__init__(layer_conf)
+        assert layer_conf.input_dims[0][-1] == layer_conf.input_dims[1][-1]
+
+
+    def forward(self, x, x_len, y, y_len):
+        """
+
+        Args:
+            x:      [batch_size, x_max_len, dim].
+            x_len:  [batch_size], default is None.
+            y:      [batch_size, y_max_len, dim].
+            y_len:  [batch_size], default is None.
+
+        Returns:
+            output: batch_size, x_max_len, y_max_len, dim].
+
+        """
+
+        x_new = torch.stack([x]*y.size()[1], 2) # [batch_size, x_max_len, y_max_len, dim]
+        y_new = torch.stack([y]*x.size()[1], 1) # [batch_size, x_max_len, y_max_len, dim]
+
+        return x_new + y_new, None
+
+
--- a/block_zoo/op/init.py
+++ b/block_zoo/op/init.py
@ -4,4 +4,6 @@ from .Concat2D import Concat2D, Concat2DConf
 from .Concat3D import Concat3D, Concat3DConf
 from .Combination import Combination, CombinationConf
 from .Match import Match, MatchConf
-from .Flatten import Flatten, FlattenConf
+from .Flatten import Flatten, FlattenConf
+from .Expand_plus import Expand_plus, Expand_plusConf
+from .CalculateDistance import CalculateDistance, CalculateDistanceConf
--- a/dataset/get_glove.sh
+++ b/dataset/get_glove.sh
@ -1,13 +1,21 @@
 preprocess_exec="sed -f tokenizer.sed"

 glovepath='http://nlp.stanford.edu/data/glove.840B.300d.zip'
+glovepath_6B='http://nlp.stanford.edu/data/glove.6B.zip'

 ZIPTOOL="unzip"

 # GloVe
 echo $glovepath
-mkdir GloVe
+if [ ! -d "/GloVe/"];then
+    mkdir GloVe
+fi
 curl -LO $glovepath
 $ZIPTOOL glove.840B.300d.zip -d GloVe/
 rm glove.840B.300d.zip

+curl -LO $glovepath_6B
+$ZIPTOOL glove.6B.zip -d GloVe/
+rm glove.6B.zip
+
+
--- a/dataset/get_glove_6B.sh
+++ b/dataset/get_glove_6B.sh
@ -0,0 +1,15 @@
+preprocess_exec="sed -f tokenizer.sed"
+
+glovepath='http://nlp.stanford.edu/data/glove.6B.zip'
+
+ZIPTOOL="unzip"
+
+# GloVe
+echo $glovepath
+if [ ! -d "/GloVe/"];then
+    mkdir GloVe
+fi
+curl -LO $glovepath
+$ZIPTOOL glove.6B.zip -d GloVe/
+rm glove.6B.zip
+
--- a/dataset/slot_tagging/conll_2003/eng.testa.tsv
+++ b/dataset/slot_tagging/conll_2003/eng.testa.tsv
@ -0,0 +1,500 @@
+-DOCSTART-	O
+CRICKET - LEICESTERSHIRE TAKE OVER AT TOP AFTER INNINGS VICTORY .	O O S-ORG O O O O O O O O
+LONDON 1996-08-30	S-LOC O
+West Indian all-rounder Phil Simmons took four for 38 on Friday as Leicestershire beat Somerset by an innings and 39 runs in two days to take over at the head of the county championship .	B-MISC E-MISC O B-PER E-PER O O O O O O O S-ORG O S-ORG O O O O O O O O O O O O O O O O O O O O
+Their stay on top , though , may be short-lived as title rivals Essex , Derbyshire and Surrey all closed in on victory while Kent made up for lost time in their rain-affected match against Nottinghamshire .	O O O O O O O O O O O O O S-ORG O S-ORG O S-ORG O O O O O O S-ORG O O O O O O O O O O S-ORG O
+After bowling Somerset out for 83 on the opening morning at Grace Road , Leicestershire extended their first innings by 94 runs before being bowled out for 296 with England discard Andy Caddick taking three for 83 .	O O S-ORG O O O O O O O O B-LOC E-LOC O S-ORG O O O O O O O O O O O O O O S-LOC O B-PER E-PER O O O O O
+Trailing by 213 , Somerset got a solid start to their second innings before Simmons stepped in to bundle them out for 174 .	O O O O S-ORG O O O O O O O O O S-PER O O O O O O O O O
+Essex , however , look certain to regain their top spot after Nasser Hussain and Peter Such gave them a firm grip on their match against Yorkshire at Headingley .	S-ORG O O O O O O O O O O O B-PER E-PER O B-PER E-PER O O O O O O O O O S-ORG O S-LOC O
+Hussain , considered surplus to England 's one-day requirements , struck 158 , his first championship century of the season , as Essex reached 372 and took a first innings lead of 82 .	S-PER O O O O S-LOC O O O O O O O O O O O O O O O O S-ORG O O O O O O O O O O O
+By the close Yorkshire had turned that into a 37-run advantage but off-spinner Such had scuttled their hopes , taking four for 24 in 48 balls and leaving them hanging on 119 for five and praying for rain .	O O O S-ORG O O O O O O O O O S-PER O O O O O O O O O O O O O O O O O O O O O O O O O
+At the Oval , Surrey captain Chris Lewis , another man dumped by England , continued to silence his critics as he followed his four for 45 on Thursday with 80 not out on Friday in the match against Warwickshire .	O O S-LOC O S-ORG O B-PER E-PER O O O O O S-LOC O O O O O O O O O O O O O O O O O O O O O O O O O S-ORG O
+He was well backed by England hopeful Mark Butcher who made 70 as Surrey closed on 429 for seven , a lead of 234 .	O O O O O S-LOC O B-PER E-PER O O O O S-ORG O O O O O O O O O O O
+Derbyshire kept up the hunt for their first championship title since 1936 by reducing Worcestershire to 133 for five in their second innings , still 100 runs away from avoiding an innings defeat .	S-ORG O O O O O O O O O O O O O S-ORG O O O O O O O O O O O O O O O O O O O
+Australian Tom Moody took six for 82 but Chris Adams , 123 , and Tim O'Gorman , 109 , took Derbyshire to 471 and a first innings lead of 233 .	S-MISC B-PER E-PER O O O O O B-PER E-PER O O O O B-PER E-PER O O O O S-ORG O O O O O O O O O O
+After the frustration of seeing the opening day of their match badly affected by the weather , Kent stepped up a gear to dismiss Nottinghamshire for 214 .	O O O O O O O O O O O O O O O O O S-ORG O O O O O O S-ORG O O O
+They were held up by a gritty 84 from Paul Johnson but ex-England fast bowler Martin McCague took four for 55 .	O O O O O O O O O B-PER E-PER O S-MISC O O B-PER E-PER O O O O O
+By stumps Kent had reached 108 for three .	O O S-ORG O O O O O O
+-DOCSTART-	O
+CRICKET - ENGLISH COUNTY CHAMPIONSHIP SCORES .	O O B-MISC I-MISC E-MISC O O
+LONDON 1996-08-30	S-LOC O
+Result and close of play scores in English county championship matches on Friday :	O O O O O O O S-MISC O O O O O O
+Leicester : Leicestershire beat Somerset by an innings and 39 runs .	S-LOC O S-ORG O S-ORG O O O O O O O
+Somerset 83 and 174 ( P. Simmons 4-38 ) , Leicestershire 296 .	S-ORG O O O O B-PER E-PER O O O S-ORG O O
+Leicestershire 22 points , Somerset 4 .	S-ORG O O O S-ORG O O
+Chester-le-Street : Glamorgan 259 and 207 ( A. Dale 69 , H. Morris 69 ; D. Blenkiron 4-43 ) , Durham 114 ( S. Watkin 4-28 ) and 81-3 .	S-LOC O S-ORG O O O O B-PER E-PER O O B-PER E-PER O O B-PER E-PER O O O S-ORG O O B-PER E-PER O O O O O
+Tunbridge Wells : Nottinghamshire 214 ( P. Johnson 84 ; M. McCague 4-55 ) , Kent 108-3 .	B-LOC E-LOC O S-ORG O O B-PER E-PER O O B-PER E-PER O O O S-ORG O O
+London ( The Oval ) : Warwickshire 195 , Surrey 429-7 ( C. Lewis 80 not out , M. Butcher 70 , G. Kersey 63 , J. Ratcliffe 63 , D. Bicknell 55 ) .	S-LOC O B-LOC E-LOC O O S-ORG O O S-ORG O O B-PER E-PER O O O O B-PER E-PER O O B-PER E-PER O O B-PER E-PER O O B-PER E-PER O O O
+Hove : Sussex 363 ( W. Athey 111 , V. Drakes 52 ; I. Austin 4-37 ) , Lancashire 197-8 ( W. Hegg 54 )	S-LOC O S-ORG O O B-PER E-PER O O B-PER E-PER O O B-PER E-PER O O O S-ORG O O B-PER E-PER O O
+Portsmouth : Middlesex 199 and 426 ( J. Pooley 111 , M. Ramprakash 108 , M. Gatting 83 ) , Hampshire 232 and 109-5 .	S-LOC O S-ORG O O O O B-PER E-PER O O B-PER E-PER O O B-PER E-PER O O O S-ORG O O O O
+Chesterfield : Worcestershire 238 and 133-5 , Derbyshire 471 ( J. Adams 123 , T.O'Gorman 109 not out , K. Barnett 87 ; T. Moody 6-82 )	S-LOC O S-ORG O O O O S-ORG O O B-PER E-PER O O S-PER O O O O B-PER E-PER O O B-PER E-PER O O
+Bristol : Gloucestershire 183 and 185-6 ( J. Russell 56 not out ) , Northamptonshire 190 ( K. Curran 52 ; A. Smith 5-68 ) .	S-LOC O S-ORG O O O O B-PER E-PER O O O O O S-ORG O O B-PER E-PER O O B-PER E-PER O O O
+-DOCSTART-	O
+CRICKET - 1997 ASHES INTINERARY .	O O O S-MISC O O
+LONDON 1996-08-30	S-LOC O
+Australia will defend the Ashes in	S-LOC O O O S-MISC O
+a six-test series against England during a four-month tour	O O O O S-LOC O O O O
+starting on May 13 next year , the Test and County Cricket Board	O O O O O O O O B-ORG I-ORG I-ORG I-ORG E-ORG
+said on Friday .	O O O O
+Australia will also play three one-day internationals and	S-LOC O O O O O O O
+four one-day warm-up matches at the start of the tour .	O O O O O O O O O O O
+The tourists will play nine first-class matches against	O O O O O O O O
+English county sides and another against British Universities ,	S-MISC O O O O O B-ORG E-ORG O
+as well as one-day matches against the Minor Counties and	O O O O O O O B-ORG E-ORG O
+Scotland .	S-LOC O
+Tour itinerary :	O O O
+May	O
+May 13 Arrive in London	O O O O S-LOC
+May 14 Practice at Lord 's	O O O O B-LOC E-LOC
+May 15 v Duke of Norfolk 's XI ( at Arundel )	O O O B-ORG I-ORG I-ORG I-ORG E-ORG O O S-LOC O
+May 17 v Northampton	O O O S-ORG
+May 18 v Worcestershire	O O O S-ORG
+May 20 v Durham	O O O S-ORG
+May 22 First one-day international ( at Headingley ,	O O O O O O O S-LOC O
+Leeds )	S-ORG O
+May 24 Second one-day international ( at The Oval ,	O O O O O O O B-LOC E-LOC O
+London )	S-LOC O
+May 25 Third one-day international ( at Lord 's , London )	O O O O O O O B-LOC E-LOC O S-LOC O
+May 27-29 v Gloucestershire or Sussex or Surrey ( three	O O O S-ORG O S-ORG O S-ORG O O
+days )	O O
+May 31 - June 2 v Derbyshire ( three days )	O O O O O O S-ORG O O O O
+June	O
+June 5-9 First test match ( at Edgbaston , Birmingham )	O O O O O O O S-LOC O S-LOC O
+June 11-13 v a first class county ( to be confirmed )	O O O O O O O O O O O O
+June 14-16 v Leicestershire ( three days )	O O O S-ORG O O O O
+June 19-23 Second test ( at Lord 's )	O O O O O O B-LOC E-LOC O
+June 25-27 v British Universities ( at Oxford , three days )	O O O B-ORG E-ORG O O S-LOC O O O O
+June 28-30 v Hampshire ( three days )	O O O S-ORG O O O O
+July	O
+July 3-7 Third test ( at Old Trafford , Manchester )	O O O O O O B-LOC E-LOC O S-LOC O
+July 9 v Minor Counties XI	O O O B-ORG I-ORG E-ORG
+July 12 v Scotland	O O O S-LOC
+July 16-18 v Glamorgan ( three days )	O O O S-ORG O O O O
+July 19-21 v Middlesex ( three days )	O O O S-ORG O O O O
+July 24-28 Fourth test ( at Headingley )	O O O O O O S-LOC O
+August	O
+August 1-4 v Somerset ( four days )	O O O S-ORG O O O O
+August 7-11 Fifth test ( at Trent Bridge , Nottingham )	O O O O O O B-LOC E-LOC O S-LOC O
+August 16-18 v Kent ( three days )	O O O S-ORG O O O O
+August 21-25 Sixth test ( at The Oval , London ) .	O O O O O O B-LOC E-LOC O S-LOC O O
+-DOCSTART-	O
+SOCCER - SHEARER NAMED AS ENGLAND CAPTAIN .	O O S-PER O O S-LOC O O
+LONDON 1996-08-30	S-LOC O
+The world 's costliest footballer Alan Shearer was named as the new England captain on Friday .	O O O O O B-PER E-PER O O O O O S-LOC O O O O
+The 26-year-old , who joined Newcastle for 15 million pounds sterling ( $ 23.4 million ) , takes over from Tony Adams , who led the side during the European championship in June , and former captain David Platt .	O O O O O S-ORG O O O O O O O O O O O O O O B-PER E-PER O O O O O O O S-MISC O O O O O O O B-PER E-PER O
+Adams and Platt are both injured and will miss England 's opening World Cup qualifier against Moldova on Sunday .	S-PER O S-PER O O O O O O S-LOC O O B-MISC E-MISC O O S-LOC O O O
+Shearer takes the captaincy on a trial basis , but new coach Glenn Hoddle said he saw no reason why the former Blackburn and Southampton skipper should not make the post his own .	S-PER O O O O O O O O O O O B-PER E-PER O O O O O O O O S-ORG O S-ORG O O O O O O O O O
+" I 'm sure there wo n't be a problem , I 'm sure Alan is the man for the job , " Hoddle said .	O O O O O O O O O O O O O O S-PER O O O O O O O O S-PER O O
+" There were three or four people who could have done it but when I spoke to Alan he was up for it and really wanted it .	O O O O O O O O O O O O O O O O O S-PER O O O O O O O O O O
+" In four days it 's very difficult to come to a 100 percent conclusion about something like this ...	O O O O O O O O O O O O O O O O O O O O
+but he knows how to conduct himself , his team mates respect him and he knows about the team situation even though he plays up front . "	O O O O O O O O O O O O O O O O O O O O O O O O O O O O
+Shearer 's Euro 96 striking partner Teddy Sheringham withdrew from the squad with an injury on Friday .	S-PER O B-MISC E-MISC O O B-PER E-PER O O O O O O O O O O
+He will probably be replaced by Shearer 's Newcastle team mate Les Ferdinand .	O O O O O O S-PER O S-ORG O O B-PER E-PER O
+-DOCSTART-	O
+BASKETBALL - INTERNATIONAL TOURNAMENT RESULT .	O O O O O O
+BELGRADE 1996-08-30	S-LOC O
+Result in an international	O O O O
+basketball tournament on Friday :	O O O O O
+Red Star ( Yugoslavia ) beat Dinamo ( Russia ) 92-90 ( halftime	B-ORG E-ORG O S-LOC O O S-ORG O S-LOC O O O O
+47-47 )	O O
+-DOCSTART-	O
+SOCCER - ROMANIA BEAT LITHUANIA IN UNDER-21 MATCH .	O O S-LOC O S-LOC O O O O
+BUCHAREST 1996-08-30	S-LOC O
+Romania beat Lithuania 2-1 ( halftime 1-1 ) in their European under-21 soccer match on Friday .	S-LOC O S-LOC O O O O O O O S-MISC O O O O O O
+Scorers :	O O
+Romania - Cosmin Contra ( 31st ) , Mihai Tararache ( 75th )	S-LOC O B-PER E-PER O O O O B-PER E-PER O O O
+Lithuania - Danius Gleveckas ( 13rd )	S-LOC O B-PER E-PER O O O
+Attendance : 200	O O O
+-DOCSTART-	O
+SOCCER - ROTOR FANS LOCKED OUT AFTER VOLGOGRAD VIOLENCE .	O O S-ORG O O O O S-LOC O O
+MOSCOW 1996-08-30	S-LOC O
+Rotor Volgograd must play their next home game behind closed doors after fans hurled bottles and stones at Dynamo Moscow players during a 1-0 home defeat on Saturday that ended Rotor 's brief spell as league leaders .	B-ORG E-ORG O O O O O O O O O O O O O O O O B-ORG E-ORG O O O O O O O O O O S-ORG O O O O O O O
+The head of the Russian league 's disciplinary committee , Anatoly Gorokhovsky , said on Friday that Rotor would play Lada Togliatti to empty stands on September 3 .	O O O O S-MISC O O O O O B-PER E-PER O O O O O S-ORG O O B-ORG E-ORG O O O O O O O
+The club , who put Manchester United out of last year 's UEFA Cup , were fined $ 1,000 .	O O O O O B-ORG E-ORG O O O O O B-MISC E-MISC O O O O O O
+Despite the defeat , Rotor are well placed with 11 games to play in the championship .	O O O O S-ORG O O O O O O O O O O O O
+Lying three points behind Alania and two behind Dynamo Moscow , the Volgograd side have a game in hand over the leaders and two over the Moscow club .	O O O O S-ORG O O O B-ORG E-ORG O O S-LOC O O O O O O O O O O O O O S-LOC O O
+-DOCSTART-	O
+BOXING - PANAMA 'S ROBERTO DURAN FIGHTS THE SANDS OF TIME .	O O S-LOC O B-PER E-PER O O O O O O
+PANAMA CITY 1996-08-30	B-LOC E-LOC O
+Panamanian boxing legend Roberto " Hands of Stone " Duran climbs into the ring on Saturday in another age-defying attempt to sustain his long career .	S-MISC O O B-PER I-PER I-PER I-PER I-PER I-PER E-PER O O O O O O O O O O O O O O O O
+Duran , 45 , takes on little-known Mexican Ariel Cruz , 30 , in a super middleweight non-title bout in Panama City .	S-PER O O O O O O S-MISC B-PER E-PER O O O O O O O O O O B-LOC E-LOC O
+The fight , Duran 's first on home soil for 10 years , is being billed here as the " Return of the Legend " and Duran still talks as if he was in his prime .	O O O S-PER O O O O O O O O O O O O O O O O B-MISC I-MISC I-MISC E-MISC O O S-PER O O O O O O O O O O
+" I want a fifth title .	O O O O O O O
+This match is to prepare me .	O O O O O O O
+I feel good .	O O O O
+I 'm not retiring , " Duran told Reuters .	O O O O O O S-PER O S-ORG O
+But those close to the boxer acknowledge that the man who has won championships in four different weight classes -- lightweight , welterweight , junior middleweight and middleweight -- is drawing close to the end of his career .	O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O
+" Each time he fights , he 's on the last frontier of his career .	O O O O O O O O O O O O O O O O
+If he loses Saturday , it could devalue his position as one of the world 's great boxers , " Panamanian Boxing Association President Ramon Manzanares said .	O O O O O O O O O O O O O O O O O O O O S-MISC B-ORG E-ORG O B-PER E-PER O O
+Duran , whose 97-12 record spans three decades , hopes a win in the 10-round bout will earn him a rematch against Puerto Rico 's Hector " Macho " Camacho .	S-PER O O O O O O O O O O O O O O O O O O O O O B-LOC E-LOC O B-PER I-PER I-PER I-PER E-PER O
+Camacho took a controversial points decision against the Panamanian in Atlantic City in June in a title fight .	S-PER O O O O O O O S-MISC O B-LOC E-LOC O O O O O O O
+-DOCSTART-	O
+SQUASH - HONG KONG OPEN QUARTER-FINAL RESULTS .	O O B-MISC I-MISC E-MISC O O O
+HONG KONG 1996-08-30	B-LOC E-LOC O
+Quarter-final results in the Hong Kong Open on Friday ( prefix number denotes seeding ) : 1 - Jansher Khan ( Pakistan ) beat Mark Cairns ( England ) 15-10 15-6 15-7	O O O O B-MISC I-MISC E-MISC O O O O O O O O O O O B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O O
+Anthony Hill ( Australia ) beat Dan Jenson ( Australia ) 15-9 15-8 15-17 17-15	B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O O O
+4 - Peter Nicol ( Scotland ) beat 7 - Chris Walker ( England ) 15-8 15-13 13-15 15-9	O O B-PER E-PER O S-LOC O O O O B-PER E-PER O S-LOC O O O O O
+2 - Rodney Eyles ( Australia ) beat Derek Ryan ( Ireland ) 15-6 15-9 11-15 15-10 .	O O B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O O O O
+-DOCSTART-	O
+SOCCER - RESULTS OF SOUTH KOREAN PRO-SOCCER GAMES .	O O O O B-MISC E-MISC O O O
+SEOUL 1996-08-30	S-LOC O
+Results of South Korean pro-soccer	O O B-MISC E-MISC O
+games played on Thursday .	O O O O O
+Pohang 3 Ulsan 2 ( halftime 1-0 )	S-ORG O S-ORG O O O O O
+Puchon 2 Chonbuk 1 ( halftime 1-1 )	S-ORG O S-ORG O O O O O
+Standings after games played on Thursday ( tabulate under -	O O O O O O O O O O
+won , drawn , lost , goals for , goals against , points ) :	O O O O O O O O O O O O O O O
+W D L G / F G / A P	O O O O O O O O O O
+Puchon 3 1 0 6 1 10	S-ORG O O O O O O
+Chonan 3 0 1 13 10 9	S-ORG O O O O O O
+Pohang 2 1 1 11 10 7	S-ORG O O O O O O
+Suwan 1 3 0 7 3 6	S-ORG O O O O O O
+Ulsan 1 0 2 8 9 3	S-ORG O O O O O O
+Anyang 0 3 1 6 9 3	S-ORG O O O O O O
+Chonnam 0 2 1 4 5 2	S-ORG O O O O O O
+Pusan 0 2 1 3 7 2	S-ORG O O O O O O
+Chonbuk 0 0 3 3 7 0	S-ORG O O O O O O
+-DOCSTART-	O
+BASEBALL - RESULTS OF S. KOREAN PROFESSIONAL GAMES .	O O O O B-MISC E-MISC O O O
+SEOUL 1996-08-30	S-LOC O
+Results of South Korean	O O B-MISC E-MISC
+professional baseball games played on Thursday .	O O O O O O O
+LG 2 OB 0	S-ORG O S-ORG O
+Lotte 6 Hyundai 2	S-ORG O S-ORG O
+Hyundai 6 Lotte 5	S-ORG O S-ORG O
+Haitai 2 Samsung 0	S-ORG O S-ORG O
+Samsung 10 Haitai 3	S-ORG O S-ORG O
+Hanwha 6 Ssangbangwool 5	S-ORG O S-ORG O
+Note - Lotte and Hyundai , Haitai and Samsung played two games .	O O S-ORG O S-ORG O S-ORG O S-ORG O O O O
+Standings after games played on Thursday ( tabulate under	O O O O O O O O O
+won , drawn , lost , winning percentage , games behind first place )	O O O O O O O O O O O O O O
+W D L PCT GB	O O O O O
+Haitai 64 2 43 .596 -	S-ORG O O O O O
+Ssangbangwool 59 2 49 .545 5 1/2	S-ORG O O O O O O
+Hanwha 58 1 49 .542 6	S-ORG O O O O O
+Hyundai 57 5 49 .536 6 1/2	S-ORG O O O O O O
+Samsung 49 5 56 .468 14	S-ORG O O O O O
+Lotte 46 6 54 .462 14 1/2	S-ORG O O O O O O
+LG 46 5 59 .441 17	S-ORG O O O O O
+OB 42 6 62 .409 20 1/2	S-ORG O O O O O O
+-DOCSTART-	O
+TENNIS - FRIDAY 'S RESULTS FROM THE U.S. OPEN .	O O O O O O O B-MISC E-MISC O
+NEW YORK 1996-08-30	B-LOC E-LOC O
+Results from the U.S. Open Tennis Championships at the National Tennis Centre on Friday ( prefix number denotes seeding ) :	O O O B-MISC I-MISC I-MISC E-MISC O O B-LOC I-LOC E-LOC O O O O O O O O O
+Women 's singles , third round	O O O O O O
+Sandrine Testud ( France ) beat Ines Gorrochategui ( Argentina ) 4-6 6-2 6-1	B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O O
+Men 's singles , second round	O O O O O O
+4 - Goran Ivanisevic ( Croatia ) beat Scott Draper ( Australia ) 6-7 ( 1-7 ) 6-3 6-4 6-4	O O B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O O O O O O
+Tim Henman ( Britain ) beat Doug Flach ( U.S. ) 6-3 6-4 6-2	B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O O
+Mark Philippoussis ( Australia ) beat Andrei Olhovskiy ( Russia ) 6 - 3 6-4 6-2	B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O O O O
+Sjeng Schalken ( Netherlands ) beat David Rikl ( Czech Republic ) 6 - 2 6-4 6-4	B-PER E-PER O S-LOC O O B-PER E-PER O B-LOC E-LOC O O O O O O
+Guy Forget ( France ) beat 17 - Felix Mantilla ( Spain ) 6-4 7-5 6-3	B-PER E-PER O S-LOC O O O O B-PER E-PER O S-LOC O O O O
+Men 's singles , second round	O O O O O O
+Alexander Volkov ( Russia ) beat Mikael Tillstrom ( Sweden ) 1-6 6- 4 6-1 4-6 7-6 ( 10-8 )	B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O O O O O O O O
+Jonas Bjorkman ( Sweden ) beat David Nainkin ( South Africa ) ) 6-4 6-1 6-1	B-PER E-PER O S-LOC O O B-PER E-PER O B-LOC E-LOC O O O O O
+Women 's singles , third round	O O O O O O
+8 - Lindsay Davenport ( U.S. ) beat Anne-Gaelle Sidot ( France ) 6-0 6-3	O O B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O
+4 - Conchita Martinez ( Spain ) beat Helena Sukova ( Czech Republic ) 6-4 6-3	O O B-PER E-PER O S-LOC O O B-PER E-PER O B-LOC E-LOC O O O
+Amanda Coetzer ( South Africa ) beat Irina Spirlea ( Romania ) 7-6 ( 7-5 ) 7-5	B-PER E-PER O B-LOC E-LOC O O B-PER E-PER O S-LOC O O O O O O
+Add Men 's singles , second round 16 - Cedric Pioline ( France ) beat Roberto Carretero ( Spain ) 4-6 6 - 2 6-2 6-1 Alex Corretja ( Spain ) beat Filippo Veglio ( Switzerland ) 6-7 ( 4- 7 ) 6-4 6-4 6-0	O O O O O O O O O B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O O O O O B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O O O O O O O
+Add Women 's singles , third round Linda Wild ( U.S. ) beat Barbara Rittner ( Germany ) 6-4 4-6 7-5 Asa Carlsson ( Sweden ) beat 15 - Gabriela Sabatini ( Argentina ) 7-5 3-6 6-2	O O O O O O O B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O O B-PER E-PER O S-LOC O O O O B-PER E-PER O S-LOC O O O O
+Add Men 's singles , second round 1 - Pete Sampras ( U.S. ) beat Jiri Novak ( Czech Republic ) 6-3 1-6 6-3 4-6 6-4 Paul Haarhuis ( Netherlands ) beat Michael Tebbutt ( Australia ) 1- 6 6-2 6-2 6-3	O O O O O O O O O B-PER E-PER O S-LOC O O B-PER E-PER O B-LOC E-LOC O O O O O O B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O O O O
+Add Women 's singles , third round Lisa Raymond ( U.S. ) beat Kimberly Po ( U.S. ) 6-3 6-2	O O O O O O O B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O
+:	O
+Add men 's singles , second round	O O O O O O O
+Hendrik Dreekmann ( Germany ) beat Thomas Johansson ( Sweden )	B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O
+7-6 ( 7-1 ) 6-2 4-6 6-1	O O O O O O O
+Andrei Medvedev ( Ukraine ) beat Jan Kroslak ( Slovakia ) 6-4 6-3	B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O
+6-2	O
+Petr Korda ( Czech Republic ) bat Bohdan Ulihrach ( Czech	B-PER E-PER O B-LOC E-LOC O O B-PER E-PER O S-LOC
+Republic ) 6-0 7-6 ( 7-5 ) 6-2	S-LOC O O O O O O O
+Add women 's singles , third round	O O O O O O O
+2 - Monica Seles ( U.S. ) beat Dally Randriantefy ( Madagascar )	O O B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O
+6-0 6-2	O O
+:	O
+Add men 's singles , second round 12 - Todd Martin ( U.S. ) beat Andrea Gaudenzi ( Italy ) 6-3 6-2 6-2 Stefan Edberg ( Sweden ) beat Bernd Karbacher ( Germany ) 3-6 6-3 6-3 1-0 retired ( leg injury )	O O O O O O O O O B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O O B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O O O O O O O O
+-DOCSTART-	O
+BASEBALL - MAJOR LEAGUE STANDINGS AFTER THURSDAY 'S GAMES .	O O B-MISC E-MISC O O O O O O
+NEW YORK 1996-08-30	B-LOC E-LOC O
+Major League Baseball	B-MISC I-MISC E-MISC
+standings after games played on Thursday ( tabulate under won ,	O O O O O O O O O O O
+lost , winning percentage and games behind ) :	O O O O O O O O O
+AMERICAN LEAGUE	B-MISC E-MISC
+EASTERN DIVISION	B-MISC E-MISC
+W L PCT GB	O O O O
+NEW YORK 74 59 .556 -	B-ORG E-ORG O O O O
+BALTIMORE 70 63 .526 4	S-ORG O O O O
+BOSTON 69 65 .515 5 1/2	S-ORG O O O O O
+TORONTO 63 71 .470 11 1/2	S-ORG O O O O O
+DETROIT 48 86 .358 26 1/2	S-ORG O O O O O
+CENTRAL DIVISION	B-MISC E-MISC
+CLEVELAND 80 53 .602 -	S-ORG O O O O
+CHICAGO 71 64 .526 10	S-ORG O O O O
+MINNESOTA 67 67 .500 13 1/2	S-ORG O O O O O
+MILWAUKEE 64 71 .474 17	S-ORG O O O O
+KANSAS CITY 61 74 .452 20	B-ORG E-ORG O O O O
+WESTERN DIVISION	B-MISC E-MISC
+TEXAS 75 58 .564 -	S-ORG O O O O
+SEATTLE 70 63 .526 5	S-ORG O O O O
+OAKLAND 64 72 .471 12 1/2	S-ORG O O O O O
+CALIFORNIA 62 72 .463 13 1/2	S-ORG O O O O O
+FRIDAY , AUGUST 30 SCHEDULE	O O O O O
+KANSAS CITY AT DETROIT	B-ORG E-ORG O S-LOC
+CHICAGO AT TORONTO	S-ORG O S-LOC
+MINNESOTA AT MILWAUKEE	S-ORG O S-LOC
+CLEVELAND AT TEXAS	S-ORG O S-LOC
+NEW YORK AT CALIFORNIA	B-ORG E-ORG O S-LOC
+BOSTON AT OAKLAND	S-ORG O S-LOC
+BALTIMORE AT SEATTLE	S-ORG O S-LOC
+NATIONAL LEAGUE	B-MISC E-MISC
+EASTERN DIVISION	B-MISC E-MISC
+W L PCT GB	O O O O
+ATLANTA 83 49 .629 -	S-ORG O O O O
+MONTREAL 71 61 .538 12	S-ORG O O O O
+FLORIDA 64 70 .478 20	S-ORG O O O O
+NEW YORK 59 75 .440 25	B-ORG E-ORG O O O O
+PHILADELPHIA 54 80 .403 30	S-ORG O O O O
+CENTRAL DIVISION	B-MISC E-MISC
+HOUSTON 72 63 .533 -	S-ORG O O O O
+ST LOUIS 69 65 .515 2 1/2	B-ORG E-ORG O O O O O
+CINCINNATI 66 67 .496 5	S-ORG O O O O
+CHICAGO 65 66 .496 5	S-ORG O O O O
+PITTSBURGH 56 77 .421 15	S-ORG O O O O
+WESTERN DIVISION	B-MISC E-MISC
+SAN DIEGO 75 60 .556 -	B-ORG E-ORG O O O O
+LOS ANGELES 72 61 .541 2	B-ORG E-ORG O O O O
+COLORADO 70 65 .519 5	S-ORG O O O O
+SAN FRANCISCO 57 74 .435 16	B-ORG E-ORG O O O O
+FRIDAY , AUGUST 30 SCHEDULE	O O O O O
+ATLANTA AT CHICAGO	S-ORG O S-LOC
+FLORIDA AT CINCINNATI	S-ORG O S-LOC
+SAN DIEGO AT MONTREAL	B-ORG E-ORG O S-LOC
+LOS ANGELES AT PHILADELPHIA	B-ORG E-ORG O S-LOC
+HOUSTON AT PITTSBURGH	S-ORG O S-LOC
+SAN FRANCISCO AT NEW YORK	B-ORG E-ORG O B-LOC E-LOC
+COLORADO AT ST LOUIS	S-ORG O B-LOC E-LOC
+-DOCSTART-	O
+BASEBALL - MAJOR LEAGUE RESULTS THURSDAY .	O O B-MISC E-MISC O O O
+NEW YORK 1996-08-30	B-LOC E-LOC O
+Results of Major League	O O B-MISC E-MISC
+Baseball games played on Thursday ( home team in CAPS ) :	O O O O O O O O O O O O
+American League	B-MISC E-MISC
+DETROIT 4 Kansas City 1	S-ORG O B-ORG E-ORG O
+Minnesota 6 MILWAUKEE 1	S-ORG O S-ORG O
+CALIFORNIA 14 New York 3	S-ORG O B-ORG E-ORG O
+SEATTLE 9 Baltimore 6	S-ORG O S-ORG O
+National League	B-MISC E-MISC
+San Diego 3 NEW YORK 2	B-ORG E-ORG O B-ORG E-ORG O
+Chicago 4 HOUSTON 3	S-ORG O S-ORG O
+Cincinnati 18 COLORADO 7	S-ORG O S-ORG O
+Atlanta 5 PITTSBURGH 1	S-ORG O S-ORG O
+Los Angeles 2 MONTREAL 1	B-ORG E-ORG O S-ORG O
+Florida 10 ST LOUIS 9	S-ORG O B-ORG E-ORG O
+-DOCSTART-	O
+TENNIS - TARANGO , O'BRIEN SPRING TWIN UPSETS UNDER THE LIGHTS .	O O S-PER O S-PER O O O O O O O
+Larry Fine	B-PER E-PER
+NEW YORK 1996-08-30	B-LOC E-LOC O
+Andre Agassi escaped disaster on Thursday but Wimbledon finalist MaliVai Washington and Marcelo Rios were not so fortunate on a night of upsets at the U.S. Open .	B-PER E-PER O O O O O S-MISC O B-PER E-PER O B-PER E-PER O O O O O O O O O O O B-MISC E-MISC O
+The 11th-seeded Washington fell short of reprising his Wimbledon miracle comeback as he lost to red-hot wildcard Alex O'Brien 6-3 6-4 5-7 3-6 6-3 in a two hour 51 minute struggle on the Stadium court .	O O S-PER O O O O O S-MISC O O O O O O O O B-PER E-PER O O O O O O O O O O O O O O O O O
+Next door on the grandstand , 10th seed Rios lost to another player with a Wimbledon connection -- bad boy Jeff Tarango .	O O O O O O O O S-PER O O O O O O S-MISC O O O O B-PER E-PER O
+The temperamental left-hander defeated the Chilean 6-4 4-6 7-6 6-2 .	O O O O O S-MISC O O O O O
+The day programme went smoothly although sixth-seeded former champion Agassi had to wriggle out of a dangerous 3-6 0-4 hole , winning 18 of the last 19 games against India 's Leander Paes .	O O O O O O O O O S-PER O O O O O O O O O O O O O O O O O O O S-LOC O B-PER E-PER O
+But the night belonged to the upstarts .	O O O O O O O O
+Washington , who climbed back from a 1-5 deficit , two sets down in the third set against Todd Martin in the Wimbledon semifinals , looked poised for another sensational comeback .	S-PER O O O O O O O O O O O O O O O O O B-PER E-PER O O S-MISC O O O O O O O O O
+O'Brien , a winner two weeks ago in New Haven for his first pro title , served for the match at 5-4 in the third set before Washington came charging back .	S-PER O O O O O O O B-LOC E-LOC O O O O O O O O O O O O O O O O O S-PER O O O O
+" I just kept saying to myself , ' keep giving yourself the best chance to win , keep battling , maybe something will happen , ' " said the 26-year-old O'Brien , ranked 65th .	O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O S-PER O O O O
+" I kept my composure and I was proud of myself for that -- usually I would have folded up the tent and gone home . "	O O O O O O O O O O O O O O O O O O O O O O O O O O O
+The hard-serving O'Brien , a former U.S. collegiate national champion , fired up 17 aces to ultimately subdue the never-say-die Washington .	O O S-PER O O O S-LOC O O O O O O O O O O O O O S-PER O
+The fifth set stayed on serve until the sixth game , when Washington , after saving one break point with a forehand winner down the line , netted a backhand to give O'Brien a 4-2 lead .	O O O O O O O O O O O O S-PER O O O O O O O O O O O O O O O O O O O S-PER O O O O
+The Texan blasted in two aces to hold serve at 5-2 and then converted his eighth match point for victory when Washington found the net with another backhand from 40-0 .	O S-MISC O O O O O O O O O O O O O O O O O O O S-PER O O O O O O O O O
+" You just kind of keep fighting and you keep trying to make him play a little bit .	O O O O O O O O O O O O O O O O O O O
+I think he got a little tight at a couple of moments , " said Washington . "	O O O O O O O O O O O O O O O S-PER O O
+But I think he served pretty well when he had to . "	O O O O O O O O O O O O O
+Tarango , whose Wimbledon tantrum two years ago brought him a $ 28,000 fine and suspension from this year 's tournament at the All-England Club , argued calls and taunted fans in his lively two hour , 24 minute tango with Rios on the grandstand .	S-PER O O S-MISC O O O O O O O O O O O O O O O O O O O B-ORG E-ORG O O O O O O O O O O O O O O O O S-PER O O O O
+A boisterous cheering section backed the distracted Chilean and booed the lanky American , who ate up all the attention .	O O O O O O O S-MISC O O O O S-MISC O O O O O O O O
+" I 'm an emotional player , " said the 104th-ranked Tarango . "	O O O O O O O O O O O S-PER O O
+I think I played very well tonight , very focused . "	O O O O O O O O O O O O
+The match turned on the third-set tiebreaker , which the American won 7-5 much to the dismay of the spectators .	O O O O O O O O O O S-MISC O O O O O O O O O O
+" I love the crowd if they boo me every day .	O O O O O O O O O O O O
+It fires me up , makes me play my best tennis , " Tarango said .	O O O O O O O O O O O O O S-PER O O
+" I played some of my best tennis in college when fraternities were throwing beer on me .	O O O O O O O O O O O O O O O O O O
+If tennis was like that every day , I think everybody wold be having a lot more fun . "	O O O O O O O O O O O O O O O O O O O O
+Rios did not appreciate Tarango 's antics .	S-PER O O O S-PER O O O
+" He 's always complaining too much , " said Rios . "	O O O O O O O O O O S-PER O O
+But I think it 's not that .	O O O O O O O O
+I think I played really bad .	O O O O O O O
+It was tough to play at night .	O O O O O O O O
+Balls were going really fast .	O O O O O O
+I lost too many points that I never lose .	O O O O O O O O O O
+I did n't play my tennis . "	O O O O O O O O
+" I do n't see the ball like I see during the day .	O O O O O O O O O O O O O O
+I play an American so that 's why I play at night .	O O O S-MISC O O O O O O O O O
+I did n't feel good on the court . "	O O O O O O O O O O
+At the end of the match , Tarango blew sarcastic kisses to the crowd , then jiggled his body to a Rios rooting section in a jeering salute .	O O O O O O O S-PER O O O O O O O O O O O O O S-PER O O O O O O O
+" I support their enthusiasm , " Tarango said about the fans . "	O O O O O O O S-PER O O O O O O
+At the same time , they 're cheering blatantly against me .	O O O O O O O O O O O O
+After I won I figured I could give them a little razzle-dazzle . "	O O O O O O O O O O O O O O
+-DOCSTART-	O
+NFL AMERICAN FOOTBALL-RANDALL CUNNINGHAM RETIRES .	S-ORG B-MISC E-MISC S-PER O O
+PHILADELPHIA 1996-08-29	S-LOC O
+Randall Cunningham , the National Football League 's all-time leading rusher as a quarterback and one of the most athletic players ever to line up over centre , retired Thursday .	B-PER E-PER O O B-ORG I-ORG E-ORG O O O O O O O O O O O O O O O O O O O O O O O O
+Cunningham played his entire 11-year career with the Philadelphia Eagles .	S-PER O O O O O O O B-ORG E-ORG O
+A three-time Pro Bowl selection , Cunningham rushed for 4,482 yards on 677 carries .	O O B-MISC E-MISC O O S-PER O O O O O O O O
+" I would like to thank the Eagles organisation and the wonderful fans of Philadelphia for supporting me throughout my career , " Cunningham said .	O O O O O O O S-ORG O O O O O O S-ORG O O O O O O O O S-PER O O
+" Although it saddens me to leave , I am looking forward to spending more time with my family and pursuing other interests that have been on the back burner for sometime . "	O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O
+" Randall was one of the most exciting quarterbacks in NFL history , " said Eagles owner Jeffrey Lurie . "	O S-PER O O O O O O O O S-ORG O O O O S-ORG O B-PER E-PER O O
+During his 11 years in Philadelphia , Randall was the cornerstone of the Eagles ' franchise and brought many great moments to fans in Philadelphia as well as across the NFL . "	O O O O O S-LOC O S-PER O O O O O S-ORG O O O O O O O O O O S-LOC O O O O O S-ORG O O
+A second-round choice in 1985 , Cunningham completed 1,874-of-3,362 passes ( 55.7 percent ) for 22,877 yards and 150 touchdowns .	O O O O O O S-PER O O O O O O O O O O O O O O
+Cunningham has already been signed as a broadcaster .	S-PER O O O O O O O O
+-DOCSTART-	O
+GOLF - LEADING SCORES AT GREATER MILWAUKEE OPEN .	O O O O O B-MISC I-MISC E-MISC O
+MILWAUKEE , Wisconsin 1996-08-29	S-LOC O S-LOC O
+Leading scores in	O O O
+the $ 1.2 million Greater Milwaukee Open at the par-71 ,	O O O O B-MISC I-MISC E-MISC O O O O
+6,739-yard Brown Deer Park Golf Course after the first round	O B-LOC I-LOC I-LOC I-LOC E-LOC O O O O
+on Thursday ( players U.S. unless stated ) :	O O O O S-LOC O O O O
+62 Nolan Henke	O B-PER E-PER
+64 Bob Estes	O B-PER E-PER
+65 Billy Andrade , Duffy Waldorf , Jesper Parnevik ( Sweden )	O B-PER E-PER O B-PER E-PER O B-PER E-PER O S-LOC O
+66 Neal Lancaster , Dave Barr ( Canada ) , Mike Sullivan , Willie	O B-PER E-PER O B-PER E-PER O S-LOC O O B-PER E-PER O S-PER
+Wood , Loren Roberts , Steve Stricker , Brian Claar , Russ Cochran	S-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER
+67 Mark Calcavecchia , Payne Stewart , Billy Mayfair , Ken	O B-PER E-PER O B-PER E-PER O B-PER E-PER O S-PER
+Green , Jerry Kelly , Tim Simpson , Olin Browne , Shane Bortsch ,	S-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O
+Mike Hulbert , Brian Henninger , Tiger Woods , Steve Jurgenson ,	B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O
+Bryan Gorman	B-PER E-PER
+-DOCSTART-	O
+GOLF - HENKE TAKES LEAD IN MILWAUKEE , WOODS MAKES PRO DEBUT .	O O S-PER O O O S-LOC O S-PER O O O O
+MILWAUKEE , Wisconsin 1996-08-29	S-LOC O S-LOC O
+Nolan Henke fired a nine-under-par 62 to grab a two-shot lead after the opening round of the $ 1.2 million Greater Milwaukee Open Thursday as 20-year-old Tiger Woods shot 67 in his professional debut .	B-PER E-PER O O O O O O O O O O O O O O O O O O B-MISC I-MISC E-MISC O O O B-PER E-PER O O O O O O O
+Henke stood two strokes ahead of Bob Estes and three up on Billy Andrade , Duffy Waldorf and Jesper Parnevik .	S-PER O O O O O B-PER E-PER O O O O B-PER E-PER O B-PER E-PER O B-PER E-PER O
+Woods , who turned pro Tuesday after winning an unprecedented third successive U.S. Amateur Championship , almost eagled the 18th hole .	S-PER O O O O O O O O O O O B-MISC I-MISC E-MISC O O O O O O O
+He settled for a birdie and a four-under opening round that left him five shots off the pace .	O O O O O O O O O O O O O O O O O O O
+" Yesterday was the toughest day I 've had for a long time , " Woods said . "	O O O O O O O O O O O O O O O S-PER O O O
+Today , I got to play golf . "	O O O O O O O O O
+He added : " I thought I got off off to a great start .	O O O O O O O O O O O O O O O
+It was a perfect start .	O O O O O O
+I 'm in a good position . "	O O O O O O O O
+Henke , who called his round a " pleasant surprise , " finished with six birdies on the final eight holes .	S-PER O O O O O O O O O O O O O O O O O O O O O
+" We finally got things going in the right direction , " he said . "	O O O O O O O O O O O O O O O O
+It was my best round in a very long time .	O O O O O O O O O O O
+My short game has improved since I 've had to use it so often .	O O O O O O O O O O O O O O O
+That 's always been the worst part of my game .	O O O O O O O O O O O
+All in all , playing bad 's been a good experience . "	O O O O O O O O O O O O O
+Henke , who came within one shot of the course record set by Andrew Magee during Wednesday 's pro-am , has three career PGA Tour victories , but none since the 1993 BellSouth Classic .	S-PER O O O O O O O O O O O O B-PER E-PER O O O O O O O O B-MISC E-MISC O O O O O O O B-MISC E-MISC O
+Estes , whose only win came at the 1994 Texas Open and whose best finish this year was a third-place tie at the Nortel Open in January , eagled the par-five fourth hole and added five birdies to grab sole possession of second place .	S-PER O O O O O O O O B-MISC E-MISC O O O O O O O O O O O O B-MISC E-MISC O O O O O O O O O O O O O O O O O O O O
+" No bogeys on the card , " he noted . "	O O O O O O O O O O O O
+Sometimes I take more pride in that . "	O O O O O O O O O
+Woods was among a group of 13 players at four under , including 1993 champion Billy Mayfair , who tied for second at last week 's World Series of Golf , and former U.S. Open champ Payne Stewart .	S-PER O O O O O O O O O O O O O O B-PER E-PER O O O O O O O O O B-MISC I-MISC I-MISC E-MISC O O O B-MISC E-MISC O B-PER E-PER O
+Defending champion Scott Hoch shot a three-under 68 and was six strokes back .	O O B-PER E-PER O O O O O O O O O O
+Phil Mickelson , the only four-time winner on the PGA Tour , skipped the tournament after winning the World Series of Golf last week .	B-PER E-PER O O O O O O O B-MISC E-MISC O O O O O O O B-MISC I-MISC I-MISC E-MISC O O O
+Mark Brooks , Tom Lehman and Mark O'Meara , who make up the rest of the top four on the money list , also took the week off .	B-PER E-PER O B-PER E-PER O B-PER E-PER O O O O O O O O O O O O O O O O O O O O O
+-DOCSTART-	O
+SOCCER - SILVA 'S `LOST PASSPORT ' EXCUSE NOT ENOUGH FOR FIFA .	O O S-PER O O O O O O O O S-ORG O
+MADRID 1996-08-30	S-LOC O
+Spanish first division team Deportivo Coruna will be without key midfielder Mauro Silva for Saturday 's game with Real Madrid after FIFA , soccer 's world governing body , suspended the Brazilian for one game for missing his national side 's European tour .	S-MISC O O O B-ORG E-ORG O O O O O B-PER E-PER O O O O O B-ORG E-ORG O S-ORG O O O O O O O O O S-MISC O O O O O O O O O S-MISC O O
+Silva excused his absence from Brazil 's game against Russia , on Wednesday , and Saturday 's match with the Netherlands by saying he had lost his passport .	S-PER O O O O S-LOC O O O S-LOC O O O O O O O O O O S-LOC O O O O O O O O
+But that did not prevent him from collecting the one-match suspension .	O O O O O O O O O O O O
+-DOCSTART-	O
+ATHLETICS - MITCHELL DEFEATS BAILEY IN FRONT OF FORMER CHAMPIONS .	O O S-PER O S-PER O O O O O O
+Adrian Warner	B-PER E-PER
+BERLIN 1996-08-30	S-LOC O
+American Dennis Mitchell outclassed Olympic 100 metres champion Donovan Bailey for the third time at a major post-Games meeting in front of the most experienced sprinting crowd in the world on Friday .	S-MISC B-PER E-PER O S-MISC O O O B-PER E-PER O O O O O O O S-MISC O O O O O O O O O O O O O O O
+Watched by an array of former Olympic sprint champions at the Berlin grand prix meeting , Mitchell made a brilliant start in the 100 metres and held off Bailey 's strong finish to win in 10.08 seconds despite cool conditions .	O O O O O O S-MISC O O O O S-LOC O O O O S-PER O O O O O O O O O O O S-PER O O O O O O O O O O O O
+Bailey , who set a world record of 9.84 on his way to victory in Atlanta , could not catch his American rival and had to settle for third in a tight finish .	S-PER O O O O O O O O O O O O O O S-LOC O O O O O S-MISC O O O O O O O O O O O O
+Jamaica 's Michael Green was second with 10.09 with Bailey finishing in 10.13 .	S-LOC O B-PER E-PER O O O O O S-PER O O O O
+Last Friday Mitchell , who finished fourth at the Atlanta Games , upstaged a trio of Olympic champions including Bailey to win the 100 in Brussels .	O O S-PER O O O O O O B-MISC E-MISC O O O O O S-MISC O O S-PER O O O O O S-LOC O
+Earlier this month he also beat world champion Bailey in Zurich .	O O O O O O O O S-PER O S-LOC O
+Berlin , Brussels and Zurich all belong to the most lucrative series in the sport , the Golden Four .	S-LOC O S-LOC O S-LOC O O O O O O O O O O O O B-MISC E-MISC O
+Among the crowd on Friday were Olympic 100 metres champions going back to 1948 .	O O O O O O S-MISC O O O O O O O O
+They had been invited to the meeting to watch a special relay to mark the 60th anniversary of Jesse Owens 's four gold medals at the 1936 Olympics in the same Berlin stadium .	O O O O O O O O O O O O O O O O O O B-PER E-PER O O O O O O O S-MISC O O O S-LOC O O
+" Today the concentration was the most important thing for me , " Mitchell said .	O O O O O O O O O O O O O S-PER O O
+Despite the coolish conditions American Olympic champion Gail Devers looked in commanding form in the women 's 100 , clocking 10.89 to defeat Jamaican rival Merlene Ottey , who was second in 10.94 .	O O O O B-MISC E-MISC O B-PER E-PER O O O O O O O O O O O O O O S-MISC O B-PER E-PER O O O O O O O
+-DOCSTART-	O
+ATHLETICS - BERLIN GRAND PRIX RESULTS .	O O B-MISC I-MISC E-MISC O O
+BERLIN 1996-08-30	S-LOC O
+Leading results at the Berlin	O O O O S-MISC
+Grand Prix athletics meeting on Friday :	B-MISC E-MISC O O O O O
+Women 's 100 metres hurdles	O O O O O
+1. Michelle Freeman ( Jamaica ) 12.71 seconds	O B-PER E-PER O S-LOC O O O
+2. Ludmila Engquist ( Sweden ) 12.74	O B-PER E-PER O S-LOC O O
+3. Aliuska Lopez ( Cuba ) 12.92	O B-PER E-PER O S-LOC O O
+4. Brigita Bokovec ( Slovenia ) 12.92	O B-PER E-PER O S-LOC O O
+5. Dionne Rose ( Jamaica ) 12.92	O B-PER E-PER O S-LOC O O
+6. Julie Baumann ( Switzerland ) 13.11	O B-PER E-PER O S-LOC O O
+7. Gillian Russell ( Jamaica ) 13.17	O B-PER E-PER O S-LOC O O
+Women 's 1,500 metres	O O O O
+1. Svetlana Masterkova ( Russia ) four minutes 6.87 seconds	O B-PER E-PER O S-LOC O O O O O
+2. Patricia Djate-Taillard ( France ) 4:08.22	O B-PER E-PER O S-LOC O O
+3. Carla Sacramento ( Portugal ) 4:08.96	O B-PER E-PER O S-LOC O O
+4. Yekaterina Podkopayeva ( Russia ) 4:09.25	O B-PER E-PER O S-LOC O O
+5. Leah Pells ( Canada ) 4:09.95	O B-PER E-PER O S-LOC O O
+6. Carmen Wuestenhagen ( Germany ) 4:10.38	O B-PER E-PER O S-LOC O O
+7. Margarita Maruseva ( Russia ) 4:10.87	O B-PER E-PER O S-LOC O O
+8. Sara Thorsett ( U.S. ) 4:11.06	O B-PER E-PER O S-LOC O O
+Men 's 110 metres hurdles	O O O O O
+1. Mark Crear ( U.S. ) 13.26 seconds	O B-PER E-PER O S-LOC O O O
+2. Tony Jarrett ( Britain ) 13.35	O B-PER E-PER O S-LOC O O
+3. Florian Schwarthoff ( Germany ) 13.36	O B-PER E-PER O S-LOC O O
+4. Emilio Valle ( Cuba ) 13.52	O B-PER E-PER O S-LOC O O
+5. Falk Balzer ( Germany ) 13.52	O B-PER E-PER O S-LOC O O
+6. Steve Brown ( U.S. ) 13.53	O B-PER E-PER O S-LOC O O
+7. Frank Busemann ( Germany ) 13.58	O B-PER E-PER O S-LOC O O
+8. Jack Pierce ( U.S. ) 13.60	O B-PER E-PER O S-LOC O O
+Men 's 200 metres	O O O O
+1. Frankie Fredericks ( Namibia ) 19.97 seconds	O B-PER E-PER O S-LOC O O O
+2. Michael Johnson ( U.S. ) 20.02	O B-PER E-PER O S-LOC O O
+3. Ato Boldon ( Trinidad ) 20.37	O B-PER E-PER O S-LOC O O
+4. Geir Moen ( Norway ) 20.41	O B-PER E-PER O S-LOC O O
+5. Patrick Stevens ( Belgium ) 20.54	O B-PER E-PER O S-LOC O O
+6. Jon Drummond ( U.S. ) 20.78	O B-PER E-PER O S-LOC O O
+7. Claus Hirsbro ( Denmark ) 20.90	O B-PER E-PER O S-LOC O O
+8. Ivan Garcia ( Cuba ) 20.96	O B-PER E-PER O S-LOC O O
+Women 's shot put	O O O O
+1. Astrid Kumbernuss ( Germany ) 19.89 metres	O B-PER E-PER O S-LOC O O O
+2. Claudia Mues ( Germany ) 18.80	O B-PER E-PER O S-LOC O O
+3. Irina Korzhanenko ( Russia ) 18.63	O B-PER E-PER O S-LOC O O
+4. Valentina Fedyushina ( Russia ) 18.55	O B-PER E-PER O S-LOC O O
+5. Stephanie Storp ( Germany ) 18.41	O B-PER E-PER O S-LOC O O
+Men 's mile	O O O
+1. Noureddine Morceli ( Algeria ) 3 minutes 49.09 seconds	O B-PER E-PER O S-LOC O O O O O
+2. Venuste Niyongabo ( Burundi ) 3:51.01	O B-PER E-PER O S-LOC O O
+3. William Tanui ( Kenya ) 3:51.40	O B-PER E-PER O S-LOC O O
+4. Laban Rotich ( Kenya ) 3:53.42	O B-PER E-PER O S-LOC O O
+5. Marko Koers ( Netherlands ) 3:53.47	O B-PER E-PER O S-LOC O O
+6. Isaac Viciosa ( Spain ) 3:53.85	O B-PER E-PER O S-LOC O O
+7. John Mayock ( Britain ) 3:54.67	O B-PER E-PER O S-LOC O O
+8. Marcus O'Sullivan ( Ireland ) 3:54.87	O B-PER E-PER O S-LOC O O
+Men 's discus	O O O
+1. Lars Riedel ( Germany ) 70.60 metres	O B-PER E-PER O S-LOC O O O
+2. Anthony Washington ( U.S. ) 68.44	O B-PER E-PER O S-LOC O O
+3. Vasily Kaptyukh ( Belarus ) 66.24	O B-PER E-PER O S-LOC O O
+4. Vladimir Dubrovshchik ( Belarus ) 65.30	O B-PER E-PER O S-LOC O O
+5. Virgilijus Alekna ( Lithuania ) 65.00	O B-PER E-PER O S-LOC O O
+6. Juergen Schult ( Germany ) 64.46	O B-PER E-PER O S-LOC O O
+7. Andreas Seelig ( Germany ) 62.00	O B-PER E-PER O S-LOC O O
+8. Michael Moellenbeck ( Germany ) 58.56	O B-PER E-PER O S-LOC O O
+Women 's 100 metres	O O O O
+1. Gail Devers ( U.S. ) 10.89 seconds	O B-PER E-PER O S-LOC O O O
+2. Merlene Ottey ( Jamaica ) 10.94	O B-PER E-PER O S-LOC O O
+3. Gwen Torrence ( U.S. ) 11.07	O B-PER E-PER O S-LOC O O
+4. Mary Onyali ( Nigeria ) 11.14	O B-PER E-PER O S-LOC O O
+5. Chryste Gaines ( U.S. ) 11.20	O B-PER E-PER O S-LOC O O
+6. Chandra Sturrup ( Bahamas ) 11.26	O B-PER E-PER O S-LOC O O
+7. Irina Privalova ( Russia ) 11.27	O B-PER E-PER O S-LOC O O
+8. Inger Miller ( U.S. ) 11.37	O B-PER E-PER O S-LOC O O
+Women 's 5,000 metres	O O O O
+1. Gabriela Szabo ( Romania ) 15 minutes 04.95 seconds	O B-PER E-PER O S-LOC O O O O O
+2. Gete Wami ( Ethiopia ) 15:05.21	O B-PER E-PER O S-LOC O O
+3. Rose Cheruiyot ( Kenya ) 15:05.41	O B-PER E-PER O S-LOC O O
+4. Annemari Sandell ( Finland ) 15:06.33	O B-PER E-PER O S-LOC O O
+5. Tegla Loroupe ( Kenya ) 15:08.79	O B-PER E-PER O S-LOC O O
+6. Gunhild Halle ( Norway ) 15:09.00	O B-PER E-PER O S-LOC O O
+7. Pauline Konga ( Kenya ) 15:09.74	O B-PER E-PER O S-LOC O O
+8. Sally Barsosio ( Kenya ) 15:14.34	O B-PER E-PER O S-LOC O O
+Men 's 400 metres hurdles	O O O O O
+1. Torrance Zellner ( U.S. ) 48.23 seconds	O B-PER E-PER O S-LOC O O O
+2. Samuel Matete ( Zambia ) 48.34	O B-PER E-PER O S-LOC O O
+3. Derrick Adkins ( U.S. ) 48.62	O B-PER E-PER O S-LOC O O
+4. Fabrizio Mori ( Italy ) 49.21	O B-PER E-PER O S-LOC O O
+5. Sven Nylander ( Sweden ) 49.22	O B-PER E-PER O S-LOC O O
+6. Eric Thomas ( U.S. ) 49.35	O B-PER E-PER O S-LOC O O
--- a/dataset/slot_tagging/conll_2003/eng.testb.tsv
+++ b/dataset/slot_tagging/conll_2003/eng.testb.tsv
@ -0,0 +1,500 @@
+-DOCSTART-	O
+SOCCER - JAPAN GET LUCKY WIN , CHINA IN SURPRISE DEFEAT .	O O S-LOC O O O O S-PER O O O O
+Nadim Ladki	B-PER E-PER
+AL-AIN , United Arab Emirates 1996-12-06	S-LOC O B-LOC I-LOC E-LOC O
+Japan began the defence of their Asian Cup title with a lucky 2-1 win against Syria in a Group C championship match on Friday .	S-LOC O O O O O B-MISC E-MISC O O O O O O O S-LOC O O O O O O O O O
+But China saw their luck desert them in the second match of the group , crashing to a surprise 2-0 defeat to newcomers Uzbekistan .	O S-LOC O O O O O O O O O O O O O O O O O O O O O S-LOC O
+China controlled most of the match and saw several chances missed until the 78th minute when Uzbek striker Igor Shkvyrin took advantage of a misdirected defensive header to lob the ball over the advancing Chinese keeper and into an empty net .	S-LOC O O O O O O O O O O O O O O O S-MISC O B-PER E-PER O O O O O O O O O O O O O O S-MISC O O O O O O O
+Oleg Shatskiku made sure of the win in injury time , hitting an unstoppable left foot shot from just outside the area .	B-PER E-PER O O O O O O O O O O O O O O O O O O O O O
+The former Soviet republic was playing in an Asian Cup finals tie for the first time .	O O S-MISC O O O O O B-MISC E-MISC O O O O O O O
+Despite winning the Asian Games title two years ago , Uzbekistan are in the finals as outsiders .	O O O B-MISC E-MISC O O O O O S-LOC O O O O O O O
+Two goals from defensive errors in the last six minutes allowed Japan to come from behind and collect all three points from their opening meeting against Syria .	O O O O O O O O O O O S-LOC O O O O O O O O O O O O O O S-LOC O
+Takuya Takagi scored the winner in the 88th minute , rising to head a Hiroshige Yanagimoto cross towards the Syrian goal which goalkeeper Salem Bitar appeared to have covered but then allowed to slip into the net .	B-PER E-PER O O O O O O O O O O O O B-PER E-PER O O O S-MISC O O O B-PER E-PER O O O O O O O O O O O O O
+It was the second costly blunder by Syria in four minutes .	O O O O O O O S-LOC O O O O
+Defender Hassan Abbas rose to intercept a long ball into the area in the 84th minute but only managed to divert it into the top corner of Bitar 's goal .	O B-PER E-PER O O O O O O O O O O O O O O O O O O O O O O O O S-PER O O O
+Nader Jokhadar had given Syria the lead with a well-struck header in the seventh minute .	B-PER E-PER O O S-LOC O O O O O O O O O O O
+Japan then laid siege to the Syrian penalty area for most of the game but rarely breached the Syrian defence .	S-LOC O O O O O S-MISC O O O O O O O O O O O S-MISC O O
+Bitar pulled off fine saves whenever they did .	S-PER O O O O O O O O
+Japan coach Shu Kamo said : ' ' The Syrian own goal proved lucky for us .	S-LOC O B-PER E-PER O O O O O S-MISC O O O O O O O
+The Syrians scored early and then played defensively and adopted long balls which made it hard for us . '	O S-MISC O O O O O O O O O O O O O O O O O O
+'	O
+Japan , co-hosts of the World Cup in 2002 and ranked 20th in the world by FIFA , are favourites to regain their title here .	S-LOC O O O O B-MISC E-MISC O O O O O O O O O S-ORG O O O O O O O O O
+Hosts UAE play Kuwait and South Korea take on Indonesia on Saturday in Group A matches .	O S-LOC O S-LOC O B-LOC E-LOC O O S-LOC O O O O O O O
+All four teams are level with one point each from one game .	O O O O O O O O O O O O O
+-DOCSTART-	O
+RUGBY UNION - CUTTITTA BACK FOR ITALY AFTER A YEAR .	B-ORG E-ORG O S-PER O O S-LOC O O O O
+ROME 1996-12-06	S-LOC O
+Italy recalled Marcello Cuttitta	S-LOC O B-PER E-PER
+on Friday for their friendly against Scotland at Murrayfield more than a year after the 30-year-old wing announced he was retiring following differences over selection .	O O O O O O S-LOC O S-LOC O O O O O O O O O O O O O O O O O
+Cuttitta , who trainer George Coste said was certain to play on Saturday week , was named in a 21-man squad lacking only two of the team beaten 54-21 by England at Twickenham last month .	S-PER O O O B-PER E-PER O O O O O O O O O O O O O O O O O O O O O O O O S-LOC O S-LOC O O O
+Stefano Bordon is out through illness and Coste said he had dropped back row Corrado Covi , who had been recalled for the England game after five years out of the national team .	B-PER E-PER O O O O O S-PER O O O O O O B-PER E-PER O O O O O O O S-LOC O O O O O O O O O O
+Cuttitta announced his retirement after the 1995 World Cup , where he took issue with being dropped from the Italy side that faced England in the pool stages .	S-PER O O O O O B-MISC I-MISC E-MISC O O O O O O O O O O S-LOC O O O S-LOC O O O O O
+Coste said he had approached the player two months ago about a comeback .	S-PER O O O O O O O O O O O O O
+" He ended the World Cup on the wrong note , " Coste said .	O O O O B-MISC E-MISC O O O O O O S-PER O O
+" I thought it would be useful to have him back and he said he would be available .	O O O O O O O O O O O O O O O O O O O
+I think now is the right time for him to return . "	O O O O O O O O O O O O O
+Squad : Javier Pertile , Paolo Vaccari , Marcello Cuttitta , Ivan Francescato , Leandro Manteri , Diego Dominguez , Francesco Mazzariol , Alessandro Troncon , Orazio Arancio , Andrea Sgorlon , Massimo Giovanelli , Carlo Checchinato , Walter Cristofoletto , Franco Properzi Curti , Carlo Orlandi , Massimo Cuttitta , Giambatista Croci , Gianluca Guidi , Nicola Mazzucato , Alessandro Moscardi , Andrea Castellani .	O O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER I-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O
+-DOCSTART-	O
+SOCCER - LATE GOALS GIVE JAPAN WIN OVER SYRIA .	O O O O O S-LOC O O S-LOC O
+AL-AIN , United Arab Emirates 1996-12-06	S-LOC O B-LOC I-LOC E-LOC O
+Two goals in the last six minutes gave holders Japan an uninspiring 2-1 Asian Cup victory over Syria on Friday .	O O O O O O O O O S-LOC O O O B-MISC E-MISC O O S-LOC O O O
+Takuya Takagi headed the winner in the 88th minute of the group C game after goalkeeper Salem Bitar spoiled a mistake-free display by allowing the ball to slip under his body .	B-PER E-PER O O O O O O O O O O O O O O B-PER E-PER O O O O O O O O O O O O O O
+It was the second Syrian defensive blunder in four minutes .	O O O O S-MISC O O O O O O
+Defender Hassan Abbas rose to intercept a long ball into the area in the 84th minute but only managed to divert it into the top corner of Bitar 's goal .	O B-PER E-PER O O O O O O O O O O O O O O O O O O O O O O O O S-PER O O O
+Syria had taken the lead from their first serious attack in the seventh minute .	S-LOC O O O O O O O O O O O O O O
+Nader Jokhadar headed a cross from the right by Ammar Awad into the top right corner of Kenichi Shimokawa 's goal .	B-PER E-PER O O O O O O O B-PER E-PER O O O O O O B-PER E-PER O O O
+Japan then laid siege to the Syrian penalty area and had a goal disallowed for offside in the 16th minute .	S-LOC O O O O O S-MISC O O O O O O O O O O O O O O
+A minute later , Bitar produced a good double save , first from Kazuyoshi Miura 's header and then blocked a Takagi follow-up shot .	O O O O S-PER O O O O O O O O B-PER E-PER O O O O O O S-PER O O O
+Bitar saved well again from Miura in the 37th minute , parrying away his header from a corner .	S-PER O O O O S-PER O O O O O O O O O O O O O
+Japan started the second half brightly but Bitar denied them an equaliser when he dived to his right to save Naoki Soma 's low drive in the 53rd minute .	S-LOC O O O O O O S-PER O O O O O O O O O O O O B-PER E-PER O O O O O O O O
+Japan : 19 - Kenichi Shimokawa , 2 - Hiroshige Yanagimoto , 3 - Naoki Soma , 4 - Masami Ihara , 5 - Norio Omura , 6 - Motohiro Yamaguchi , 8 - Masakiyo Maezono ( 7 - Yasuto Honda 71 ) , 9 - Takuya Takagi , 10 - Hiroshi Nanami , 11 - Kazuyoshi Miura , 15 - Hiroaki Morishima ( 14 - Masayuki Okano 75 ) .	S-LOC O O O B-PER E-PER O O O B-PER E-PER O O O B-PER E-PER O O O B-PER E-PER O O O B-PER E-PER O O O B-PER E-PER O O O B-PER E-PER O O O B-PER E-PER O O O O O B-PER E-PER O O O B-PER E-PER O O O B-PER E-PER O O O B-PER E-PER O O O B-PER E-PER O O O
+Syria : 24 - Salem Bitar , 3 - Bachar Srour ; 4 - Hassan Abbas , 5 - Tarek Jabban , 6 - Ammar Awad ( 9 - Louay Taleb 69 ) , 8 - Nihad al-Boushi , 10 - Mohammed Afash , 12 - Ali Dib , 13 - Abdul Latif Helou ( 17 - Ammar Rihawiy 46 ) , 14 - Khaled Zaher ; 16 - Nader Jokhadar .	S-LOC O O O B-PER E-PER O O O B-PER E-PER O O O B-PER E-PER O O O B-PER E-PER O O O B-PER E-PER O O O B-PER E-PER O O O O O B-PER E-PER O O O B-PER E-PER O O O B-PER E-PER O O O B-PER I-PER E-PER O O O B-PER E-PER O O O O O B-PER E-PER O O O B-PER E-PER O
+-DOCSTART-	O
+FREESTYLE SKIING-WORLD CUP MOGUL RESULTS .	O B-MISC E-MISC O O O
+TIGNES , France 1996-12-06	S-LOC O S-LOC O
+Results of the World Cup	O O O B-MISC E-MISC
+freestyle skiing moguls competition on Friday :	O O O O O O O
+Men	O
+1. Jesper Ronnback ( Sweden ) 25.76 points	O B-PER E-PER O S-LOC O O O
+2. Andrei Ivanov ( Russia ) 24.88	O B-PER E-PER O S-LOC O O
+3. Ryan Johnson ( Canada ) 24.57	O B-PER E-PER O S-LOC O O
+4. Jean-Luc Brassard ( Canada ) 24.40	O B-PER E-PER O S-LOC O O
+5. Korneilus Hole ( Norway ) 23.92	O B-PER E-PER O S-LOC O O
+6. Jeremie Collomb-Patton ( France ) 23.87	O B-PER E-PER O S-LOC O O
+7. Jim Moran ( U.S. ) 23.25	O B-PER E-PER O S-LOC O O
+8. Dominick Gauthier ( Canada ) 22.73	O B-PER E-PER O S-LOC O O
+9. Johann Gregoire ( France ) 22.58	O B-PER E-PER O S-LOC O O
+10. Troy Benson ( U.S. ) 22.56	O B-PER E-PER O S-LOC O O
+Women	O
+1. Tatjana Mittermayer ( Germany ) 24.32	O B-PER E-PER O S-LOC O O
+2. Candice Gilg ( France ) 24.31	O B-PER E-PER O S-LOC O O
+3. Minna Karhu ( Finland ) 24.05	O B-PER E-PER O S-LOC O O
+4. Tae Satoya ( Japan ) 23.75	O B-PER E-PER O S-LOC O O
+5. Ann Battellle ( U.S. ) 23.56	O B-PER E-PER O S-LOC O O
+6. Donna Weinbrecht ( U.S. ) 22.48	O B-PER E-PER O S-LOC O O
+7. Liz McIntyre ( U.S. ) 22.00	O B-PER E-PER O S-LOC O O
+8. Elena Koroleva ( Russia ) 21.77	O B-PER E-PER O S-LOC O O
+9. Ljudmila Dymchenko ( Russia ) 21.59	O B-PER E-PER O S-LOC O O
+10. Katleen Allais ( France ) 21.58	O B-PER E-PER O S-LOC O O
+-DOCSTART-	O
+SOCCER - ASIAN CUP GROUP C RESULTS .	O O B-MISC E-MISC O O O O
+AL-AIN , United Arab Emirates 1996-12-06	S-LOC O B-LOC I-LOC E-LOC O
+Results of Asian Cup group C matches played on Friday :	O O B-MISC E-MISC O O O O O O O
+Japan 2 Syria 1 ( halftime 0-1 )	S-LOC O S-LOC O O O O O
+Scorers :	O O
+Japan - Hassan Abbas 84 own goal , Takuya Takagi 88 .	S-LOC O B-PER E-PER O O O O B-PER E-PER O O
+Syria - Nader Jokhadar 7	S-LOC O B-PER E-PER O
+Attendance : 10,000 .	O O O O
+China 0 Uzbekistan 2 ( halftime 0-0 )	S-LOC O S-LOC O O O O O
+Scorers : Shkvyrin Igor 78 , Shatskikh Oleg 90	O O B-PER E-PER O O B-PER E-PER O
+Attendence : 3,000	O O O
+Standings ( tabulate under played , won , drawn , lost , goals	O O O O O O O O O O O O O
+for , goals against , points ) :	O O O O O O O O
+Uzbekistan 1 1 0 0 2 0 3	S-LOC O O O O O O O
+Japan 1 1 0 0 2 1 3	S-LOC O O O O O O O
+Syria 1 0 0 1 1 2 0	S-LOC O O O O O O O
+China 1 0 0 1 0 2 0	S-LOC O O O O O O O
+-DOCSTART-	O
+CRICKET - PAKISTAN V NEW ZEALAND ONE-DAY SCOREBOARD .	O O S-LOC O B-LOC E-LOC O O O
+[ CORRECTED 14:06 GMT ]	O O O S-MISC O
+SIALKOT , Pakistan 1996-12-06	S-LOC O S-LOC O
+Scoreboard in the second	O O O O
+one-day cricket international between Pakistan and New Zealand	O O O O S-LOC O B-LOC E-LOC
+on Friday :	O O O
+Pakistan	S-LOC
+Saeed Anwar run out 91 ( corrects from 90 )	B-PER E-PER O O O O O O O O
+Zahoor Elahi b Cairns 86 ( corrects from 87 )	B-PER E-PER O S-PER O O O O O O
+Ijaz Ahmad c Spearman b Vaughan 59	B-PER E-PER O S-PER O S-PER O
+Inzamamul Haq st Germon b Astle 2	B-PER E-PER O S-PER O S-PER O
+Wasim Akram b Harris 4	B-PER E-PER O S-PER O
+Shahid Afridi b Harris 2	B-PER E-PER O S-PER O
+Moin Khan c Astle b Harris 1	B-PER E-PER O S-PER O S-PER O
+Waqar Younis st Germon b Harris 0	B-PER E-PER O S-PER O S-PER O
+Saqlain Mushtaq b Harris 2	B-PER E-PER O S-PER O
+Mushtaq Ahmad not out 5	B-PER E-PER O O O
+Salim Malik not out 1	B-PER E-PER O O O
+Extras ( lb-8 nb-2 w-14 ) 24	O O O O O O O
+Total ( for 9 wickets in 47 overs ) 277	O O O O O O O O O O
+Fall of wicket : 1-177 ( corrects from 1-178 ) 2-225 3-240 4-247 5-252 6-260 7-261 8-269 9-276	O O O O O O O O O O O O O O O O O O
+Bowling : Doull 8-1-60-0 ( w-3 ) , Kennedy 3-0-24-0 ( w-7 nb-1 ) ,	O O S-PER O O O O O S-PER O O O O O O
+Cairns 8-1-35-1 ( w-2 ) , Vaughan 9-1-55-1 , Harris 10-0-42-5 ( w-1 ) ,	S-PER O O O O O S-PER O O S-PER O O O O O
+Astle 9-0-53-1 ( w-1 nb-1 )	S-PER O O O O O
+New Zealand innings	B-LOC E-LOC O
+B. Young c Moin Khan b Waqar 5	B-PER E-PER O B-PER E-PER O S-PER O
+C. Spearman c Moin Khan b Wasim 0	B-PER E-PER O B-PER E-PER O S-PER O
+A. Parore c Ijaz Ahmad b Saqlain 37	B-PER E-PER O B-PER E-PER O S-PER O
+S. Fleming c and b Afridi 88	B-PER E-PER O O O S-PER O
+C. Cairns b Saqlain 10	B-PER E-PER O S-PER O
+N. Astle c Ijaz Ahmad b Salim Malik 20	B-PER E-PER O B-PER E-PER O B-PER E-PER O
+C. Harris lbw b Wasim 22	B-PER E-PER O O S-PER O
+L. Germon lbw b Afridi 2	B-PER E-PER O O S-PER O
+J. Vaughan c Moin Khan b Wasim 13	B-PER E-PER O B-PER E-PER O S-PER O
+S. Doull c subs ( M. Wasim ) b Waqar 1	B-PER E-PER O O O B-PER E-PER O O S-PER O
+R. Kennedy not out 7	B-PER E-PER O O O
+Extras ( b-9 lb-3 w-12 nb-2 ) 26	O O O O O O O O
+Total ( all out in 42.1 overs ) 231	O O O O O O O O O
+Fall of wickets : 1-3 2-7 3-125 4-146 5-170 6-190 7-195	O O O O O O O O O O O
+8-213 9-216 .	O O O
+Bowling : Wasim Akram 8.1-0-43-3 ( 9w , 1nb ) , Waqar Younis	O O B-PER E-PER O O O O O O O B-PER E-PER
+6-0-32-2 ( 2w , 1nb ) , Saqlain Mushtaq 8-0-54-2 , Mushtaq Ahmad	O O O O O O O B-PER E-PER O O B-PER E-PER
+10-0-42-0 ( 1w ) , Shahid Afridi 7-0-40-2 , Salim Malik 2.5-0-8-1 ,	O O O O O B-PER E-PER O O B-PER E-PER O O
+Ijaz Ahmad 0.1-0-0-0 .	B-PER E-PER O O
+Result : Pakistan won by 46 runs .	O O S-LOC O O O O O
+Third one-day match : December 8 , in Karachi .	O O O O O O O O S-LOC O
+-DOCSTART-	O
+SOCCER - ENGLISH F.A. CUP SECOND ROUND RESULT .	O O B-MISC I-MISC E-MISC O O O O
+LONDON 1996-12-06	S-LOC O
+Result of an English F.A. Challenge	O O O B-MISC I-MISC E-MISC
+Cup second round match on Friday :	S-MISC O O O O O O
+Plymouth 4 Exeter 1	S-ORG O S-ORG O
+-DOCSTART-	O
+SOCCER - BLINKER BAN LIFTED .	O O S-PER O O O
+LONDON 1996-12-06	S-LOC O
+Dutch forward Reggie Blinker had his indefinite suspension lifted by FIFA on Friday and was set to make his Sheffield Wednesday comeback against Liverpool on Saturday .	S-MISC O B-PER E-PER O O O O O O S-ORG O O O O O O O O B-ORG E-ORG O O S-ORG O O O
+Blinker missed his club 's last two games after FIFA slapped a worldwide ban on him for appearing to sign contracts for both Wednesday and Udinese while he was playing for Feyenoord .	S-PER O O O O O O O O S-ORG O O O O O O O O O O O O O S-ORG O S-ORG O O O O O S-ORG O
+FIFA 's players ' status committee , meeting in Barcelona , decided that although the Udinese document was basically valid , it could not be legally protected .	S-ORG O O O O O O O O S-LOC O O O O O S-ORG O O O O O O O O O O O O
+The committee said the Italian club had violated regulations by failing to inform Feyenoord , with whom the player was under contract .	O O O O S-MISC O O O O O O O O S-ORG O O O O O O O O O
+Blinker was fined 75,000 Swiss francs ( $ 57,600 ) for failing to inform the Engllsh club of his previous commitment to Udinese .	S-PER O O O S-MISC O O O O O O O O O O S-MISC O O O O O O S-ORG O
+-DOCSTART-	O
+SOCCER - LEEDS ' BOWYER FINED FOR PART IN FAST-FOOD FRACAS .	O O S-ORG O S-PER O O O O O O O
+LONDON 1996-12-06	S-LOC O
+Leeds ' England under-21 striker Lee Bowyer was fined 4,500 pounds ( $ 7,400 ) on Friday for hurling chairs at restaurant staff during a disturbance at a McDonald 's fast-food restaurant .	S-ORG O S-LOC O O B-PER E-PER O O O O O O O O O O O O O O O O O O O O O B-ORG E-ORG O O O
+Bowyer , 19 , who was caught in the act by security cameras , pleaded guilty to a charge of affray at a court in London .	S-PER O O O O O O O O O O O O O O O O O O O O O O O O S-LOC O
+He was fined and ordered to pay a total of 175 pounds to two members of staff injured in the fracas in an east London restaurant in October .	O O O O O O O O O O O O O O O O O O O O O O O O S-LOC O O O O
+Leeds had already fined Bowyer 4,000 pounds ( $ 6,600 ) and warned him a repeat of his criminal behaviour could cost him his place in the side .	S-ORG O O O S-PER O O O O O O O O O O O O O O O O O O O O O O O O
+Bowyer , who moved to the Yorkshire club in August for 3.5 million pounds ( $ 5.8 million ) , was expected to play against Middlesbrough on Saturday .	S-PER O O O O O S-LOC O O O O O O O O O O O O O O O O O O S-ORG O O O
+-DOCSTART-	O
+BASKETBALL - EUROLEAGUE STANDINGS .	O O S-MISC O O
+LONDON 1996-12-06	S-LOC O
+Standings in the men 's EuroLeague	O O O O O S-MISC
+basketball championship after Thursday 's matches ( tabulate under	O O O O O O O O O
+played , won , lost , points ) :	O O O O O O O O O
+Group A	O O
+CSKA Moscow ( Russia 9 6 3 15	B-ORG E-ORG O S-LOC O O O O
+Stefanel Milan ( Italy ) 9 6 3 15	B-ORG E-ORG O S-LOC O O O O O
+Maccabi Tel Aviv ( Israel ) 9 5 4 14	B-ORG I-ORG E-ORG O S-LOC O O O O O
+Ulker Spor ( Turkey ) 9 4 5 13	B-ORG E-ORG O S-LOC O O O O O
+Limoges ( France ) 9 3 6 12	S-ORG O S-LOC O O O O O
+Panionios ( Greece ) 9 3 6 12	S-ORG O S-LOC O O O O O
+Group B	O O
+Teamsystem Bologna ( Italy ) 9 7 2 16	B-ORG E-ORG O S-LOC O O O O O
+Olympiakos ( Greece ) 9 5 4 14	S-ORG O S-LOC O O O O O
+Cibona Zagreb ( Croatia ) 9 5 4 14	B-ORG E-ORG O S-LOC O O O O O
+Alba Berlin ( Germany ) 9 5 4 14	B-ORG E-ORG O S-LOC O O O O O
+Estudiantes Madrid ( Spain ) 9 5 4 14	B-ORG E-ORG O S-LOC O O O O O
+Charleroi ( Belgium ) 9 0 9 9	S-ORG O S-LOC O O O O O
+Group C	O O
+Panathinaikos ( Greece ) 9 7 2 16	S-ORG O S-LOC O O O O O
+Ljubljana ( Slovenia ) 9 6 3 15	S-ORG O S-LOC O O O O O
+Villeurbanne ( France ) 9 6 3 15	S-ORG O S-LOC O O O O O
+Barcelona ( Spain ) 9 4 5 13	S-ORG O S-LOC O O O O O
+Split ( Croatia ) 9 4 5 13	S-ORG O S-LOC O O O O O
+Bayer Leverkusen ( Germany ) 9 0 9 9	B-ORG E-ORG O S-LOC O O O O O
+Group D	O O
+Efes Pilsen ( Turkey ) 9 7 2 16	B-ORG E-ORG O S-LOC O O O O O
+Pau-Orthez ( France ) 9 5 4 14	S-ORG O S-LOC O O O O O
+Partizan Belgrade ( Yugoslavia ) 9 5 4 14	B-ORG E-ORG O S-LOC O O O O O
+Kinder Bologna ( Italy ) 9 4 5 13	B-ORG E-ORG O S-LOC O O O O O
+Sevilla ( Spain ) 9 4 5 13	S-ORG O S-LOC O O O O O
+Dynamo Moscow ( Russia ) 9 2 7 11	B-ORG E-ORG O S-LOC O O O O O
+-DOCSTART-	O
+RUGBY UNION - LITTLE TO MISS CAMPESE FAREWELL .	B-ORG E-ORG O S-PER O O S-PER O O
+Robert Kitson	B-PER E-PER
+LONDON 1996-12-06	S-LOC O
+Centre Jason Little will miss Australia 's end-of-tour fixture against the Barbarians at Twickenham on Saturday .	O B-PER E-PER O O S-LOC O O O O O S-ORG O S-LOC O O O
+Little has opted not to risk aggravating the knee injury which ruled him out of a large chunk of the tour and is replaced by fellow Queenslander Daniel Herbert .	S-PER O O O O O O O O O O O O O O O O O O O O O O O O O S-MISC B-PER E-PER O
+Owen Finegan has recovered from the knocks he took in last weekend 's test against Wales and retains his place in the back-row ahead of Daniel Manu .	B-PER E-PER O O O O O O O O O O O O O S-LOC O O O O O O O O O B-PER E-PER O
+The Wallabies have their sights set on a 13th successive victory to end their European tour with a 100 percent record but also want to turn on the style and provide David Campese with a fitting send-off in his final match in Australian colours .	O S-ORG O O O O O O O O O O O O S-MISC O O O O O O O O O O O O O O O O B-PER E-PER O O O O O O O O O S-MISC O O
+The Wallabies currently have no plans to make any special presentation to the 34-year-old winger but a full house of 75,000 spectators will still gather in the hope of witnessing one last moment of magic .	O S-ORG O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O
+Campese will be up against a familiar foe in the shape of Barbarians captain Rob Andrew , the man who kicked Australia to defeat with a last-ditch drop-goal in the World Cup quarter-final in Cape Town .	S-PER O O O O O O O O O O O S-ORG O B-PER E-PER O O O O O S-LOC O O O O O O O O B-MISC E-MISC O O B-LOC E-LOC O
+" Campo has a massive following in this country and has had the public with him ever since he first played here in 1984 , " said Andrew , also likely to be making his final Twickenham appearance .	O S-PER O O O O O O O O O O O O O O O O O O O O O O O O O S-PER O O O O O O O O S-LOC O O
+On tour , Australia have won all four tests against Italy , Scotland , Ireland and Wales , and scored 414 points at an average of almost 35 points a game .	O O O S-LOC O O O O O O S-LOC O S-LOC O S-LOC O S-LOC O O O O O O O O O O O O O O O
+League duties restricted the Barbarians ' selectorial options but they still boast 13 internationals including England full-back Tim Stimpson and recalled wing Tony Underwood , plus All Black forwards Ian Jones and Norm Hewitt .	O O O O S-ORG O O O O O O O O O O S-LOC O B-PER E-PER O O O B-PER E-PER O O B-ORG E-ORG O B-PER E-PER O B-PER E-PER O
+Teams :	O O
+Barbarians - 15 - Tim Stimpson ( England ) ; 14 - Nigel Walker ( Wales ) , 13 - Allan Bateman ( Wales ) , 12 - Gregor Townsend ( Scotland ) , 11 - Tony Underwood ( England ) ; 10 - Rob Andrew ( England ) , 9 - Rob Howley ( Wales ) ; 8 - Scott Quinnell ( Wales ) , 7 - Neil Back ( England ) , 6 - Dale McIntosh ( Pontypridd ) , 5 - Ian Jones ( New Zealand ) , 4 - Craig Quinnell ( Wales ) , 3 - Darren Garforth ( Leicester ) , 2 - Norm Hewitt ( New Zealand ) , 1 - Nick Popplewell ( Ireland ) .	S-ORG O O O B-PER E-PER O S-LOC O O O O B-PER E-PER O S-LOC O O O O B-PER E-PER O S-LOC O O O O B-PER E-PER O S-LOC O O O O B-PER E-PER O S-LOC O O O O B-PER E-PER O S-LOC O O O O B-PER E-PER O S-LOC O O O O B-PER E-PER O S-LOC O O O O B-PER E-PER O S-LOC O O O O B-PER E-PER O S-LOC O O O O B-PER E-PER O B-LOC E-LOC O O O O B-PER E-PER O S-LOC O O O O B-PER E-PER O S-LOC O O O O B-PER E-PER O B-LOC E-LOC O O O O B-PER E-PER O S-LOC O O
+Australia - 15 - Matthew Burke ; 14 - Joe Roff , 13 - Daniel Herbert , 12 - Tim Horan ( captain ) , 11 - David Campese ; 10 - Pat Howard , 9 - Sam Payne ; 8 - Michael Brial , 7 - David Wilson , 6 - Owen Finegan , 5 - David Giffin , 4 - Tim Gavin , 3 - Andrew Blades , 2 - Marco Caputo , 1 - Dan Crowley .	S-LOC O O O B-PER E-PER O O O B-PER E-PER O O O B-PER E-PER O O O B-PER E-PER O O O O O O B-PER E-PER O O O B-PER E-PER O O O B-PER E-PER O O O B-PER E-PER O O O B-PER E-PER O O O B-PER E-PER O O O B-PER E-PER O O O B-PER E-PER O O O B-PER E-PER O O O B-PER E-PER O O O B-PER E-PER O
+-DOCSTART-	O
+GOLF - ZIMBABWE OPEN SECOND ROUND SCORES .	O O B-MISC E-MISC O O O O
+HARARE 1996-12-06	S-LOC O
+Leading second round scores in the Zimbabwe Open at the par-72 Chapman Golf Club on Friday ( South African unless stated ) : 132 Des Terblanche 65 67 133 Mark McNulty ( Zimbabwe ) 72 61 134 Steve van Vuuren 65 69 136 Nick Price ( Zimbabwe ) 68 68 , Justin Hobday 71 65 ,	O O O O O O B-MISC E-MISC O O O B-LOC I-LOC E-LOC O O O B-MISC E-MISC O O O O O B-PER E-PER O O O B-PER E-PER O S-LOC O O O O B-PER I-PER E-PER O O O B-PER E-PER O S-LOC O O O O B-PER E-PER O O O
+Andrew Pitts ( U.S. ) 69 67 138 Mark Cayeux ( Zimbabwe ) 69 69 , Mark Murless 71 67 139 Hennie Swart 75 64 , Andrew Park 72 67 140 Schalk van der Merwe ( Namibia ) 67 73 , Desvonde	B-PER E-PER O S-LOC O O O O B-PER E-PER O S-LOC O O O O B-PER E-PER O O O B-PER E-PER O O O B-PER E-PER O O O B-PER I-PER I-PER E-PER O S-LOC O O O O S-PER
+Botes 72 68 , Greg Reid 72 68 , Clinton Whitelaw 70	S-PER O O O B-PER E-PER O O O B-PER E-PER O
+70 , Brett Liddle 75 65 , Hugh Baiocchi 73 67 141 Adilson da Silva ( Brazil ) 72 69 , Sammy Daniels 73	O O B-PER E-PER O O O B-PER E-PER O O O B-PER I-PER E-PER O S-LOC O O O O B-PER E-PER O
+68 , Trevor Dodds ( Namibia ) 72 69 142 Don Robertson ( U.S. ) 73 69 , Dion Fourie 69 73 ,	O O B-PER E-PER O S-LOC O O O O B-PER E-PER O S-LOC O O O O B-PER E-PER O O O
+Steve Waltman 72 70 , Ian Dougan 73 69	B-PER E-PER O O O B-PER E-PER O O
+-DOCSTART-	O
+SOCCER - UNCAPPED PLAYERS CALLED TO FACE MACEDONIA .	O O O O O O O S-LOC O
+BUCHAREST 1996-12-06	S-LOC O
+Romania trainer Anghel Iordanescu called up three uncapped players on Friday in his squad to face Macedonia next week in a World Cup qualifier .	S-LOC O B-PER E-PER O O O O O O O O O O O O S-LOC O O O O B-MISC E-MISC O O
+Midfielder Valentin Stefan and striker Viorel Ion of Otelul Galati and defender Liviu Ciobotariu of National Bucharest are the newcomers for the European group eight clash in Macedonia on December 14 .	O B-PER E-PER O O B-PER E-PER O B-ORG E-ORG O O B-PER E-PER O B-ORG E-ORG O O O O O S-MISC O O O O S-LOC O O O O
+Iordanescu said he had picked them because of their good performances in the domestic championship in which National Bucharest are top and Otelul Galati third . "	S-PER O O O O O O O O O O O O O O O O B-ORG E-ORG O O O B-ORG E-ORG O O O
+I think it 's fair to give them a chance , " he told reporters .	O O O O O O O O O O O O O O O O
+League title-holders Steaua Bucharest , who finished bottom of their Champions ' League group in the European Cup , have only two players in the squad .	O O B-ORG E-ORG O O O O O O B-MISC I-MISC E-MISC O O O B-MISC E-MISC O O O O O O O O O
+Attacking midfielder Adrian Ilie , who recently moved from Steaua to Turkish club Galatasaray , is ruled out after two yellow-card offences .	O O B-PER E-PER O O O O O S-ORG O S-MISC O S-ORG O O O O O O O O O
+Squad :	O O
+Goalkeepers - Bogdan Stelea , Florin Prunea .	O O B-PER E-PER O B-PER E-PER O
+Defenders - Dan Petrescu , Daniel Prodan , Anton Dobos , Cornel Papura , Liviu Ciobotariu , Tibor Selymess , Iulian Filipescu .	O O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O
+Midfielders - Gheorghe Hagi , Gheorghe Popescu , Constantin Galca , Valentin Stefan , Basarab Panduru , Dorinel Munteanu , Ovidiu Stinga .	O O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O
+Forwards - Ioan Vladoiu , Gheorghe Craioveanu , Ionel Danciulescu , Viorel Ion .	O O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O
+REUTER	S-ORG
+-DOCSTART-	O
+SOCCER - BRAZILIAN CHAMPIONSHIP RESULTS .	O O S-MISC O O O
+RIO DE JANEIRO 1996-12-05	B-LOC I-LOC E-LOC O
+Results of Brazilian	O O S-MISC
+soccer championship semifinal , first leg matches on Thursday .	O O O O O O O O O O
+Goias 1 Gremio 3	S-ORG O S-ORG O
+Portuguesa 1 Atletico Mineiro 0	S-ORG O B-ORG E-ORG O
+-DOCSTART-	O
+CRICKET - LARA ENDURES ANOTHER MISERABLE DAY .	O O S-PER O O O O O
+Robert Galvin	B-PER E-PER
+MELBOURNE 1996-12-06	S-LOC O
+Australia gave Brian Lara another reason to be miserable when they beat West Indies by five wickets in the opening World Series limited overs match on Friday .	S-LOC O B-PER E-PER O O O O O O O O B-LOC E-LOC O O O O O O B-MISC E-MISC O O O O O O
+Lara , disciplined for misconduct on Wednesday , was dismissed for five to extend a disappointing run of form on tour .	S-PER O O O O O O O O O O O O O O O O O O O O O
+Australia , who hold a 2-0 lead in the five-match test series , overhauled West Indies ' total of 172 all out with eight balls to spare to end a run of six successive one-day defeats .	S-LOC O O O O O O O O O O O O O B-LOC E-LOC O O O O O O O O O O O O O O O O O O O O O
+All-rounder Greg Blewett steered his side to a comfortable victory with an unbeaten 57 in 90 balls to the delight of the 42,442 crowd .	O B-PER E-PER O O O O O O O O O O O O O O O O O O O O O O
+Man-of-the match Blewett came to the wicket with the total on 70 for two and hit three fours during an untroubled innings lasting 129 minutes .	O O S-PER O O O O O O O O O O O O O O O O O O O O O O O
+His crucial fifth-wicket partnership with fellow all-rounder Stuart Law , who scored 21 , added 71 off 85 balls .	O O O O O O O B-PER E-PER O O O O O O O O O O O
+Lara looked out of touch during his brief stay at the crease before chipping a simple catch to Shane Warne at mid-wicket .	S-PER O O O O O O O O O O O O O O O O O B-PER E-PER O O O
+West Indies tour manager Clive Lloyd has apologised for Lara 's behaviour on Tuesday .	B-LOC E-LOC O O B-PER E-PER O O O S-PER O O O O O
+He ( Lara ) had told Australia coach Geoff Marsh that wicketkeeper Ian Healy was unwelcome in the visitors ' dressing room .	O O S-PER O O O S-LOC O B-PER E-PER O O B-PER E-PER O O O O O O O O O
+The Melbourne crowd were clearly angered by the incident , loudly jeering the West Indies vice-captain as he walked to the middle .	O S-LOC O O O O O O O O O O O B-LOC E-LOC O O O O O O O O
+It was left to fellow left-hander Shivnarine Chanderpaul to hold the innings together with a gritty 54 despite the handicap of an injured groin .	O O O O O O B-PER E-PER O O O O O O O O O O O O O O O O O
+Chanderpaul was forced to rely on a runner for most of his innings after hurting himself as he scurried back to his crease to avoid being run out .	S-PER O O O O O O O O O O O O O O O O O O O O O O O O O O O O
+Pakistan , who arrive in Australia later this month , are the other team competing in the World Series tournament .	S-LOC O O O O S-LOC O O O O O O O O O O O B-MISC E-MISC O O
+-DOCSTART-	O
+CRICKET - AUSTRALIA V WEST INDIES WORLD SERIES SCOREBOARD .	O O S-LOC O B-LOC E-LOC B-MISC E-MISC O O
+MELBOURNE 1996-12-06	S-LOC O
+Scoreboard in the World Series	O O O B-MISC E-MISC
+limited overs match between Australia and West Indies on Friday :	O O O O S-LOC O B-LOC E-LOC O O O
+West Indies	B-LOC E-LOC
+S. Campbell c Healy b Gillespie 31	B-PER E-PER O S-PER O S-PER O
+R. Samuels c M. Waugh b Gillespie 7	B-PER E-PER O B-PER E-PER O S-PER O
+B. Lara c Warne b Moody 5	B-PER E-PER O S-PER O S-PER O
+S. Chanderpaul c Healy b Blewett 54	B-PER E-PER O S-PER O S-PER O
+C. Hooper run out 7	B-PER E-PER O O O
+J. Adams lbw b Moody 5	B-PER E-PER O O S-PER O
+J. Murray c Blewett b Warne 24	B-PER E-PER O S-PER O S-PER O
+N. McLean c and b M. Waugh 7	B-PER E-PER O O O B-PER E-PER O
+K. Benjamin b Warne 8	B-PER E-PER O S-PER O
+C. Ambrose run out 2	B-PER E-PER O O O
+C. Walsh not out 8	B-PER E-PER O O O
+Extras ( lb-10 w-1 nb-3 ) 14	O O O O O O O
+Total ( 49.2 overs ) 172	O O O O O O
+Fall of wickets : 1-11 2-38 3-64 4-73 5-81 6-120 7-135 8-150	O O O O O O O O O O O O
+9-153 .	O O
+Bowling : Reiffel 10-2-26-0 ( nb-3 ) , Gillespie 10-0-39-2 ,	O O S-PER O O O O O S-PER O O
+Moody 10-1-25-2 , Blewett 6.2-0-27-1 , Warne 10-0-34-2 ( w-1 ) ,	S-PER O O S-PER O O S-PER O O O O O
+M. Waugh 3-0-11-1 .	B-PER E-PER O O
+Australia	S-LOC
+M. Taylor b McLean 29	B-PER E-PER O S-PER O
+M. Waugh c Murray b Benjamin 27	B-PER E-PER O S-PER O S-PER O
+R. Ponting lbw McLean 5	B-PER E-PER O S-PER O
+G. Blewett not out 57	B-PER E-PER O O O
+M. Bevan st Murray b Hooper 3	B-PER E-PER O S-PER O S-PER O
+S. Law b Hooper 21	B-PER E-PER O S-PER O
+T. Moody not out 3	B-PER E-PER O O O
+Extras ( lb-17 nb-8 w-3 ) 28	O O O O O O O
+Total ( for five wickets , 48.4 overs ) 173	O O O O O O O O O O
+Fall of wickets : 1-59 2-70 3-78 4-90 5-160 .	O O O O O O O O O O
+Did not bat : I. Healy , P. Reiffel , S. Warne , J. Gillespie .	O O O O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O
+Bowling : Ambrose 10-3-19-0 ( 2nb 1w ) , Walsh 9-0-34-0 ( 4nb ) ,	O O S-PER O O O O O O S-PER O O O O O
+Benjamin 9.4-0-43-1 ( 1nb 1w ) , Hooper 10-0-27-2 ( 1nb ) , McLean	S-PER O O O O O O S-PER O O O O O S-PER
+10-1-33-2 ( 1w ) .	O O O O O
+Result : Australia won by five wickets .	O O S-LOC O O O O O
+-DOCSTART-	O
+CRICKET - AUSTRALIA BEAT WEST INDIES BY FIVE WICKETS .	O O S-LOC O B-LOC E-LOC O O O O
+MELBOURNE 1996-12-06	S-LOC O
+Australia beat West Indies by five wickets in a World Series limited overs match at the Melbourne Cricket Ground on Friday .	S-LOC O B-LOC E-LOC O O O O O B-MISC E-MISC O O O O O B-LOC I-LOC E-LOC O O O
+Scores : West Indies 172 all out in 49.2 overs ( Shivnarine Chanderpaul 54 ) ; Australia 173-5 in 48.4 overs ( Greg Blewett 57 not out ) .	O O B-LOC E-LOC O O O O O O O B-PER E-PER O O O S-LOC O O O O O B-PER E-PER O O O O O
+-DOCSTART-	O
+CRICKET - WEST INDIES 172 ALL OUT IN 49.2 OVERS V AUSTRALIA .	O O B-LOC E-LOC O O O O O O O S-LOC O
+MELBOURNE 1996-12-06	S-LOC O
+West Indies were all out for 172 off 49.2 overs in the World Series limited overs match against Australia on Friday .	B-LOC E-LOC O O O O O O O O O O B-MISC E-MISC O O O O S-LOC O O O
+-DOCSTART-	O
+CRICKET - SHEFFIELD SHIELD SCORE .	O O B-MISC E-MISC O O
+HOBART , Australia 1996-12-06	S-LOC O S-LOC O
+Score on the first day of the four-day Sheffield Shield match between Tasmania and Victoria at Bellerive Oval on Friday :	O O O O O O O O B-MISC E-MISC O O S-LOC O S-LOC O B-LOC E-LOC O O O
+Tasmania 352 for three ( David Boon 106 not out , Shaun Young 86 not out , Michael DiVenuto 119 ) v Victoria .	S-LOC O O O O B-PER E-PER O O O O B-PER E-PER O O O O B-PER E-PER O O O S-ORG O
+-DOCSTART-	O
+CRICKET - LARA SUFFERS MORE AUSTRALIAN TOUR MISERY .	O O S-PER O O O O O O
+MELBOURNE 1996-12-06	S-LOC O
+West Indies batsman Brian Lara suffered another blow to his Australian tour , after already being disciplined for misconduct , when he was dismissed cheaply in the first limited overs match against Australia on Friday .	B-LOC E-LOC O B-PER E-PER O O O O O S-MISC O O O O O O O O O O O O O O O O O O O O O S-LOC O O O
+Lara , who earned a stern rebuke from his own tour management after an angry outburst against Australia wicketkeeper Ian Healy , scored five to prolong a run of poor form with the bat .	S-PER O O O O O O O O O O O O O O O O S-LOC O B-PER E-PER O O O O O O O O O O O O O O
+The West Indies vice-captain struggled for timing during his 36-minute stay at the crease before chipping a ball from medium pacer Tom Moody straight to Shane Warne at mid-wicket .	O B-LOC E-LOC O O O O O O O O O O O O O O O O O O B-PER E-PER O O B-PER E-PER O O O
+West Indies were 53 for two in 15 overs when rain stopped play at the Melbourne Cricket Ground after captain Courtney Walsh won the toss and elected to bat .	B-LOC E-LOC O O O O O O O O O O O O O B-LOC I-LOC E-LOC O O B-PER E-PER O O O O O O O O
+Lara 's outburst three days ago has clearly turned some of the Australian public against him .	S-PER O O O O O O O O O O O S-MISC O O O O
+As he walked to the wicket he was greeted by loud jeers from sections of the crowd .	O O O O O O O O O O O O O O O O O O
+On several occasions during his innings , the crowd joined together in a series of obscene chants against him .	O O O O O O O O O O O O O O O O O O O O
+Tour manager Clive Lloyd on Wednesday apologised for Lara 's behaviour in confronting Australia coach Geoff Marsh in the opposition dressing room to protest against his dismissal in the second test on Tuesday .	O O B-PER E-PER O O O O S-PER O O O O S-LOC O B-PER E-PER O O O O O O O O O O O O O O O O O
+Lloyd did not say what form the discipline would take .	S-PER O O O O O O O O O O
+Lara , who holds the record for the highest score in test and first-class cricket , was unhappy about Healy 's role in the incident and questioned whether the ball had carried to the Australia keeper .	S-PER O O O O O O O O O O O O O O O O O O S-PER O O O O O O O O O O O O O O S-LOC O O
+Australia went on to win the match at the Sydney Cricket Ground by 124 runs to take a two-nil lead in the five-test series after Lara failed in both innings .	S-LOC O O O O O O O O B-LOC I-LOC E-LOC O O O O O O O O O O O O O S-PER O O O O O
+Lara has yet to score a century since West Indies arrived in Australia five weeks ago .	S-PER O O O O O O O B-LOC E-LOC O O S-LOC O O O O
+Both West Indies and Australia team management have played down the incident , stressing that relations between the two sides have not been adversely affected .	O B-LOC E-LOC O S-LOC O O O O O O O O O O O O O O O O O O O O O
+Pakistan , who arrive next week , are the third team in the triangular World Series tournament .	S-LOC O O O O O O O O O O O O O B-MISC E-MISC O O
+-DOCSTART-	O
+CRICKET - WEST INDIES TO BAT AFTER WINNING THE TOSS .	O O B-LOC E-LOC O O O O O O O
+MELBOURNE 1996-12-06	S-LOC O
+West Indies captain Courtney Walsh elected to bat after winning the toss in the first match in the World Series limited overs competition against Australia at the Melbourne Cricket Ground on Friday .	B-LOC E-LOC O B-PER E-PER O O O O O O O O O O O O O B-MISC E-MISC O O O O S-LOC O O S-LOC O O O O O
+Teams :	O O
+Australia - Mark Taylor ( captain ) , Mark Waugh , Ricky Ponting , Greg Blewett , Michael Bevan , Stuart Law , Tom Moody , Ian Healy , Paul Reiffel , Shane Warne , Jason Gillespie , Glenn McGrath 12th man .	S-LOC O B-PER E-PER O O O O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O O O
+West Indies - Sherwin Campbell , Robert Samuels , Brian Lara , Shivnarine Chanderpaul , Carl Hooper , Jimmy Adams , Junior Murray , Nixon McLean , Kenneth Benjamin , Curtly Ambrose , Courtney Walsh ( captain ) , Roland Holder 12th man .	B-LOC E-LOC O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O B-PER E-PER O O O O B-PER E-PER O O O
+-DOCSTART-	O
+BADMINTON - WORLD GRAND PRIX RESULTS .	O O B-MISC I-MISC E-MISC O O
+BALI 1996-12-06	S-LOC O
+Results in last of the group matches at the World Grand Prix badminton finals on Friday :	O O O O O O O O O B-MISC I-MISC E-MISC O O O O O
+Men 's singles	O O O
+Group B	O O
+Chen Gang ( China ) beat Martin Londgaard Hansen ( Denmark ) 15-12 15-6	B-PER E-PER O S-LOC O O B-PER I-PER E-PER O S-LOC O O O
+Dong Jiong ( China ) beat Thomas Stuer-Lauridsen ( Denmark ) 15-10 15-6	B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O
+Indra Wijaya ( Indonesia ) beat Ong Ewe Hock ( Malaysia ) 5-15 15-11 15-11	B-PER E-PER O S-LOC O O B-PER I-PER E-PER O S-LOC O O O O
+Group C	O O
+Sun Jun ( China ) beat Rashid Sidek ( Malaysia ) 15-12 17-14	B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O
+Hermawan Susanto ( Indonesia ) beat Soren B. Nielsen ( Denmark ) 15-8 15-2	B-PER E-PER O S-LOC O O B-PER I-PER E-PER O S-LOC O O O
+Group D	O O
+Allan Budi Kuksuma ( Indonesia ) beat Poul-Erik Hoyer-Larsen ( Denmark ) 15-7 15-4	B-PER I-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O
+Budi Santoso ( Indonesia ) beat Hu Zhilan ( China ) 15-4 15-5	B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O
+Semifinals ( on Saturday ) : Fung Permadi ( Taiwan ) v Indra	O O O O O O B-PER E-PER O S-LOC O O S-PER
+Wijaya ( Indonesia ) ; Sun Jun ( China ) v Allan Budi Kusuma	S-PER O S-LOC O O B-PER E-PER O S-LOC O O B-PER I-PER E-PER
+( Indonesia )	O S-LOC O
+Women 's singles	O O O
+Group A	O O
+Gong Zhichao ( China ) beat Mia Audina ( Indonesia ) 11-2 12-10	B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O
+Group B	O O
+Ye Zhaoying ( China ) beat Meiluawati ( Indonesia ) 11-6 12-10	B-PER E-PER O S-LOC O O S-PER O S-LOC O O O
+Group C	O O
+Camilla Martin ( Denmark ) beat Wang Chen ( China ) 11-0 12-10	B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O
+Group D	O O
+Susi Susanti ( Indonesia ) beat Han Jingna ( China ) 11-5 11-4 .	B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O O O
+Semifinals ( on Saturday ) : Susi Susanti ( Indonesia ) v Camilla Martin ( Denmark ) ; Ye Zhaoying ( China ) v Gong Zichao ( China ) .	O O O O O O B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O B-PER E-PER O S-LOC O O
+-DOCSTART-	O
+SOCCER - ARAB CONTRACTORS WIN AFRICAN CUP WINNERS ' CUP .	O O S-MISC O O B-MISC I-MISC I-MISC I-MISC E-MISC O
+CAIRO 1996-12-06	S-LOC O
+Result of the second leg of the African Cup Winners ' Cup final at the National stadium on Friday : Arab Contractors ( Egypt ) 4 Sodigraf ( Zaire ) 0 ( halftime 2-0 )	O O O O O O O B-MISC I-MISC I-MISC I-MISC E-MISC O O O B-LOC E-LOC O O O B-ORG E-ORG O S-LOC O O S-ORG O S-LOC O O O O O O
+Scorers :	O O
+Aly Ashour 7 , 56 penalty , Mohamed Ouda 24 , 73	B-PER E-PER O O O O O B-PER E-PER O O O
+Contractors won 4-0 on aggregate .	O O O O O O
+-DOCSTART-	O
+NHL ICE HOCKEY - STANDINGS AFTER THURSDAY 'S GAMES .	S-ORG O O O O O O O O O
+NEW YORK 1996-12-06	B-LOC E-LOC O
+Standings of National Hockey	O O B-ORG E-ORG
+League teams after games played on Thursday ( tabulate under	S-ORG O O O O O O O O O
+won , lost , tied , goals for , goals against , points ) :	O O O O O O O O O O O O O O O
+EASTERN CONFERENCE	O O
+NORTHEAST DIVISION	O O
+W L T GF GA PTS	O O O O O O
+HARTFORD 12 7 6 77 76 30	S-ORG O O O O O O
+BUFFALO 13 12 1 77 76 27	S-ORG O O O O O O
+BOSTON 10 11 4 74 84 24	S-ORG O O O O O O
+MONTREAL 10 14 4 96 103 24	S-ORG O O O O O O
+PITTSBURGH 9 13 3 81 91 21	S-ORG O O O O O O
+OTTAWA 7 11 6 62 72 20	S-ORG O O O O O O
+ATLANTIC DIVISION	S-LOC O
+W L T GF GA PTS	O O O O O O
+FLORIDA 17 4 6 83 53 40	S-ORG O O O O O O
+PHILADELPHIA 14 12 2 75 75 30	S-ORG O O O O O O
+NEW JERSEY 14 10 1 61 61 29	B-ORG E-ORG O O O O O O
+WASHINGTON 13 12 1 69 66 27	S-ORG O O O O O O
+NY RANGERS 10 13 5 91 81 25	B-ORG E-ORG O O O O O O
+NY ISLANDERS 7 11 8 65 72 22	B-ORG E-ORG O O O O O O
+TAMPA BAY 8 15 2 69 81 18	B-ORG E-ORG O O O O O O
+WESTERN CONFERENCE	O O
+CENTRAL DIVISION	B-MISC E-MISC
+W L T GF GA PTS	O O O O O O
+DETROIT 15 9 4 81 53 34	S-ORG O O O O O O
+DALLAS 16 9 1 74 60 33	S-ORG O O O O O O
+CHICAGO 12 12 3 71 67 27	S-ORG O O O O O O
+ST LOUIS 13 14 0 78 81 26	B-ORG E-ORG O O O O O O
+TORONTO 11 15 0 76 89 22	S-ORG O O O O O O
+PHOENIX 9 13 4 61 74 22	S-ORG O O O O O O
+PACIFIC DIVISION	S-LOC O
+W L T GF GA PTS	O O O O O O
+COLORADO 17 6 4 97 56 38	S-ORG O O O O O O
+VANCOUVER 14 11 1 84 83 29	S-ORG O O O O O O
+EDMONTON 13 14 1 94 88 27	S-ORG O O O O O O
+LOS ANGELES 11 13 3 72 83 25	B-ORG E-ORG O O O O O O
+SAN JOSE 10 13 4 69 87 24	B-ORG E-ORG O O O O O O
+CALGARY 10 16 2 65 77 22	S-ORG O O O O O O
+ANAHEIM 9 14 4 73 86 22	S-ORG O O O O O O
+FRIDAY , DECEMBER 6	O O O O
+ANAHEIM AT BUFFALO	S-ORG O S-LOC
+TORONTO AT NY RANGERS	S-ORG O B-ORG E-ORG
+PITTSBURGH AT WASHINGTON	S-ORG O S-LOC
+MONTREAL AT CHICAGO	S-ORG O S-LOC
+PHILADELPHIA AT DALLAS	S-ORG O S-LOC
+ST LOUIS AT COLORADO	B-ORG E-ORG O S-LOC
+OTTAWA AT EDMONTON	S-ORG O S-LOC
+-DOCSTART-	O
+NHL ICE HOCKEY - THURSDAY 'S RESULTS .	S-ORG O O O O O O O
+[ CORRECTED 08:40 GMT ]	O O O S-MISC O
+NEW YORK 1996-12-06	B-LOC E-LOC O
+( Corrects headline from NBA to NHL and corrects team name in second result from La Clippers to Ny Islanders .	O O O O S-ORG O S-ORG O O O O O O O O B-ORG E-ORG O B-ORG E-ORG O
+)	O
+Results of National Hockey	O O B-ORG E-ORG
+League games on Thursday ( home team in CAPS ) :	S-ORG O O O O O O O O O O
+Hartford 4 BOSTON 2	S-ORG O S-ORG O
+FLORIDA 4 Ny Islanders 2	S-ORG O B-ORG E-ORG O
+NEW JERSEY 2 Calgary 1	B-ORG E-ORG O S-ORG O
+Phoenix 3 ST LOUIS 0	S-ORG O B-ORG E-ORG O
+Tampa Bay 2 LOS ANGELES 1	B-ORG E-ORG O B-ORG E-ORG O
+-DOCSTART-	O
+NFL AMERICAN FOOTBALL-COLTS CLOBBER EAGLES TO STAY IN PLAYOFF HUNT .	S-ORG O O O S-ORG O O O O O O
+INDIANAPOLIS 1996-12-06	S-LOC O
+The injury-plagued Indianapolis Colts lost another quarterback on Thursday but last year 's AFC finalists rallied together to shoot down the Philadelphia Eagles 37-10 in a showdown of playoff contenders .	O O B-ORG E-ORG O O O O O O O O O O O O O O O O O B-ORG E-ORG O O O O O O O O
+Marshall Faulk rushed for 101 yards and two touchdowns and Jason Belser returned an interception 44 yards for a score as the Colts improved to 8-6 , the same mark as the Eagles , who lost for the fourth time in five games .	B-PER E-PER O O O O O O O O B-PER E-PER O O O O O O O O O O S-ORG O O O O O O O O O S-ORG O O O O O O O O O O O
+Paul Justin , starting for the sidelined Jim Harbaugh , was 14-of-23 for 144 yards and a touchdown for the the Colts , who played their last home game of the season .	B-PER E-PER O O O O O B-PER E-PER O O O O O O O O O O O O S-ORG O O O O O O O O O O O
+Indianapolis closes with games at Kansas City and Cincinnati .	S-LOC O O O O B-LOC E-LOC O S-LOC O
+The Eagles were held without a touchdown until the final five seconds .	O S-ORG O O O O O O O O O O O
+Philadelphia , which fell from an NFC East tie with the Dallas Cowboys and Washington Redskins , go on the road against the New York Jets and then entertain Arizona .	S-LOC O O O O O O O O O O B-ORG E-ORG O B-ORG E-ORG O O O O O O O B-ORG I-ORG E-ORG O O O S-ORG O
+The loss by Philadelphia allowed the idle Green Bay Packers ( 10-3 ) to clinch the first NFC playoff berth .	O O O S-ORG O O O B-ORG I-ORG E-ORG O O O O O O O O O O O
+The Colts won despite the absence of injured starting defensive tackle Tony Siragusa , cornerback Ray Buchanan and linebacker Quentin Coryatt .	O S-ORG O O O O O O O O O B-PER E-PER O O B-PER E-PER O O B-PER E-PER O
+Faulk carried 16 times , including a 13-yard TD run in the first quarter and a seven-yard score early in the final period .	S-PER O O O O O O O O O O O O O O O O O O O O O O O
+Justin made his second straight start for Harbaugh , who has a knee injury .	S-PER O O O O O O S-PER O O O O O O O
+Justin suffered a sprained right shoulder in the third quarter and did not return .	S-PER O O O O O O O O O O O O O O
+Third-stringer Kerwin Bell , a 1988 draft choice of the Miami Dolphins , made his NFL debut and was 5-of-5 for 75 yards , including a 20-yard scoring strike to Marvin Harrison in the third period .	O B-PER E-PER O O O O O O O B-ORG E-ORG O O O S-ORG O O O O O O O O O O O O O O B-PER E-PER O O O O O
+A 39-yard interference penalty against Philadelphia 's Troy Vincent set up Faulk 's first score around left end that capped an 80-yard march 5:17 into the game and the rout was on .	O O O O O S-LOC O B-PER E-PER O O S-PER O O O O O O O O O O O O O O O O O O O O O
+Eagles quarterback Ty Detmer was 17-of-34 for 182 yards before he was benched .	S-ORG O B-PER E-PER O O O O O O O O O O
+Ricky Watters , who leads the NFC in rushing , left the game after getting kneed to the helmet after gaining 33 yards on seven carries .	B-PER E-PER O O O O O O O O O O O O O O O O O O O O O O O O O
+-DOCSTART-	O
+NBA BASKETBALL - STANDINGS AFTER THURSDAY 'S GAMES .	S-ORG O O O O O O O O
+NEW YORK 1996-12-06	B-LOC E-LOC O
+Standings of National	O O S-ORG
+Basketball Association teams after games played on Thursday	B-ORG E-ORG O O O O O O
+( tabulate under won , lost , percentage , games behind ) :	O O O O O O O O O O O O O
+EASTERN CONFERENCE	O O
+ATLANTIC DIVISION	S-LOC O
+W L PCT GB	O O O O
+MIAMI 14 4 .778 -	S-ORG O O O O
+NEW YORK 10 6 .625 3	B-ORG E-ORG O O O O
+ORLANDO 8 6 .571 4	S-ORG O O O O
+WASHINGTON 7 9 .438 6	S-ORG O O O O
+PHILADELPHIA 7 10 .412 6 1/2	S-ORG O O O O O
+BOSTON 4 12 .250 9	S-ORG O O O O
+NEW JERSEY 3 10 .231 8 1/2	B-ORG E-ORG O O O O O
+CENTRAL DIVISION	O O
+W L PCT GB	O O O O
+CHICAGO 17 1 .944 -	S-ORG O O O O
+DETROIT 13 3 .813 3	S-ORG O O O O
+CLEVELAND 11 5 .688 5	S-ORG O O O O
+ATLANTA 10 8 .556 7	S-ORG O O O O
+CHARLOTTE 8 8 .500 8	S-ORG O O O O
+MILWAUKEE 8 8 .500 8	S-ORG O O O O
+INDIANA 7 8 .467 8 1/2	S-ORG O O O O O
+TORONTO 6 11 .353 10 1/2	S-ORG O O O O O
+WESTERN CONFERENCE	O O
+MIDWEST DIVISION	O O
+W L PCT GB	O O O O
+HOUSTON 16 2 .889 -	S-ORG O O O O
+UTAH 14 2 .875 1	S-ORG O O O O
+MINNESOTA 7 10 .412 8 1/2	S-ORG O O O O O
+DALLAS 6 11 .353 9 1/2	S-ORG O O O O O
+DENVER 5 14 .263 11 1/2	S-ORG O O O O O
+SAN ANTONIO 3 13 .188 12	B-ORG E-ORG O O O O
+VANCOUVER 2 16 .111 14	S-ORG O O O O
+PACIFIC DIVISION	S-LOC O
+W L PCT GB	O O O O
+SEATTLE 15 5 .750 -	S-ORG O O O O
+LA LAKERS 13 7 .650 2	B-ORG E-ORG O O O O
+PORTLAND 11 8 .579 3 1/2	S-ORG O O O O O
+LA CLIPPERS 7 11 .389 7	B-ORG E-ORG O O O O
+GOLDEN STATE 6 12 .333 8	B-ORG E-ORG O O O O
--- a/dataset/slot_tagging/conll_2003/eng.train.tsv
+++ b/dataset/slot_tagging/conll_2003/eng.train.tsv
--- a/losses/CRFLoss.py
+++ b/losses/CRFLoss.py
@ -0,0 +1,71 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT license.
+
+import torch
+import torch.nn as nn
+import torch.autograd as autograd
+
+
+class CRFLoss(nn.Module):
+    """CRFLoss
+       use for crf output layer for sequence tagging task.
+    """
+    def __init__(self):
+        super(CRFLoss, self).__init__()
+        
+    def _score_sentence(self, scores, mask, tags, transitions, crf_layer_conf):
+        """
+            input:
+                scores: variable (seq_len, batch, tag_size, tag_size)
+                mask: (batch, seq_len)
+                tags: tensor  (batch, seq_len)
+            output:
+                score: sum of score for gold sequences within whole batch
+        """
+        # Gives the score of a provided tag sequence
+        batch_size = scores.size(1)
+        seq_len = scores.size(0)
+        tag_size = scores.size(2)
+        # convert tag value into a new format, recorded label bigram information to index
+        new_tags = autograd.Variable(torch.LongTensor(batch_size, seq_len))
+        if crf_layer_conf.use_gpu:
+            new_tags = new_tags.cuda()
+        for idx in range(seq_len):
+            if idx == 0:
+                # start -> first score
+                new_tags[:, 0] = (tag_size-2)*tag_size + tags[:, 0]
+            else:
+                new_tags[:, idx] = tags[:, idx-1]*tag_size + tags[:, idx]
+
+        # transition for label to STOP_TAG
+        end_transition = transitions[:, crf_layer_conf.target_dict[crf_layer_conf.STOP_TAG]].contiguous().view(1, tag_size).expand(batch_size, tag_size)
+        # length for batch,  last word position = length - 1
+        length_mask = torch.sum(mask.long(), dim=1).view(batch_size, 1).long()
+        # index the label id of last word
+        end_ids = torch.gather(tags, 1, length_mask - 1)
+
+        # index the transition score for end_id to STOP_TAG
+        end_energy = torch.gather(end_transition, 1, end_ids)
+
+        # convert tag as (seq_len, batch_size, 1)
+        new_tags = new_tags.transpose(1, 0).contiguous().view(seq_len, batch_size, 1)
+        # need convert tags id to search from positions of scores
+        tg_energy = torch.gather(scores.view(seq_len, batch_size, -1), 2, new_tags).view(seq_len, batch_size)  # seq_len * batch_size
+        # mask transpose to (seq_len, batch_size)
+        tg_energy = tg_energy.masked_select(mask.transpose(1, 0))
+
+        # add all score together
+        gold_score = tg_energy.sum() + end_energy.sum()
+        return gold_score
+    
+    def forward(self, forward_score, scores, masks, tags, transitions, crf_layer_conf):
+        """
+        
+        :param forward_score: Tensor scale
+        :param scores: Tensor [seq_len, batch_size, target_size, target_size]
+        :param masks:  Tensor [batch_size, seq_len]
+        :param tags:   Tensor [batch_size, seq_len]
+        :return: goal_score - forward_score
+        """
+        gold_score = self._score_sentence(scores, masks, tags, transitions, crf_layer_conf)
+        return forward_score - gold_score
--- a/losses/Loss.py
+++ b/losses/Loss.py
@ -8,7 +8,7 @@ import logging
 sys.path.append('../')
 from settings import LossOperationType
 from torch.nn import CrossEntropyLoss, L1Loss, MSELoss, NLLLoss, PoissonNLLLoss, NLLLoss2d, KLDivLoss, BCELoss, BCEWithLogitsLoss, MarginRankingLoss, HingeEmbeddingLoss, MultiLabelMarginLoss, SmoothL1Loss, SoftMarginLoss, MultiLabelSoftMarginLoss, CosineEmbeddingLoss, MultiMarginLoss, TripletMarginLoss
-
+from .CRFLoss import CRFLoss

 class Loss(nn.Module):
    '''
--- a/losses/init.py
+++ b/losses/init.py
@ -1,5 +1,6 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT license.
 from .FocalLoss import FocalLoss
+from .CRFLoss import CRFLoss
 from .Loss import Loss
 from torch.nn import CrossEntropyLoss, L1Loss, MSELoss, NLLLoss, PoissonNLLLoss, NLLLoss2d, KLDivLoss, BCELoss, BCEWithLogitsLoss, MarginRankingLoss, HingeEmbeddingLoss, MultiLabelMarginLoss, SmoothL1Loss, SoftMarginLoss, MultiLabelSoftMarginLoss, CosineEmbeddingLoss, MultiMarginLoss, TripletMarginLoss
--- a/metrics/Evaluator.py
+++ b/metrics/Evaluator.py
@ -4,6 +4,7 @@
 from sklearn import metrics
 from sklearn.metrics import mean_squared_error
 from .conlleval import countChunks, evaluate, to_conll_format
+from .slot_tagging_metrics import get_ner_BIOES, get_ner_BIO
 from settings import TaggingSchemes
 import numpy as np
 import re
@ -138,24 +139,72 @@ class Evaluator(object):
    def accuracy(self, y_true, y_pred):
        return metrics.accuracy_score(y_true, y_pred)

-    def seq_tag_f1(self, y_true, y_pred):
-        """ For sequence tagging task, calculate F1-score(e.g. CONLL 2000)
+    def seq_tag_f1(self, y_ture, y_pred):
+        '''

-        Args:
-            y_true:
-            y_pred:
-
-        Returns:
-
-        """
+        :param y_ture:
+        :param y_pred:
+        :return:
+        '''
        assert self.__tagging_scheme is not None, "Please define tagging scheme!"
-        if TaggingSchemes[self.__tagging_scheme] == TaggingSchemes.BIO:
-            result_conll_format = to_conll_format(y_true, y_pred)
-            correctChunk, foundGuessed, foundCorrect, correctTags, tokenCounter = countChunks(result_conll_format)
-            overall_precision, overall_recall, overall_FB1 = evaluate(correctChunk, foundGuessed, foundCorrect, correctTags, tokenCounter)
+        sent_num = len(y_pred)
+        golden_full = []
+        predict_full = []
+        right_full = []
+        for idx in range(0, sent_num):
+            golden_list = y_ture[idx]
+            predict_list = y_pred[idx]
+            if self.__tagging_scheme == "BMES" or self.__tagging_scheme == "BIOES":
+                gold_matrix = get_ner_BIOES(golden_list)
+                pred_matrix = get_ner_BIOES(predict_list)
+            elif self.__tagging_scheme == "BIO":
+                gold_matrix = get_ner_BIO(golden_list)
+                pred_matrix = get_ner_BIO(predict_list)
+            else:
+                # raise Exception("DETECT UNKNOWN TAGGING SCHEMES! YOU CAN USE OUR SCRIPT TO CONVERT TAG SCHEME!")
+                raise Exception("DETECT UNKNOWN TAGGING SCHEMES!")
+            right_ner = list(set(gold_matrix).intersection(set(pred_matrix)))
+            golden_full += gold_matrix
+            predict_full += pred_matrix
+            right_full += right_ner
+        right_num = len(right_full)
+        golden_num = len(golden_full)
+        predict_num = len(predict_full)
+        if predict_num == 0:
+            precision = -1
        else:
-            raise Exception("TO DO: SUPPORT MORE TAGGING SCHEMES")
-        return overall_FB1
+            precision = (right_num + 0.0) / predict_num
+        if golden_num == 0:
+            recall = -1
+        else:
+            recall = (right_num + 0.0) / golden_num
+        if (precision == -1) or (recall == -1) or (precision + recall) <= 0.:
+            f_measure = -1
+        else:
+            f_measure = 2 * precision * recall / (precision + recall)
+        return f_measure
+
+
+    def seq_tag_accuracy(self, y_ture, y_pred):
+        '''
+
+        :param y_ture:
+        :param y_pred:
+        :return:
+        '''
+        sent_num = len(y_pred)
+        right_tag = 0
+        all_tag = 0
+        for idx in range(0, sent_num):
+            golden_list = y_ture[idx]
+            predict_list = y_pred[idx]
+            for idy in range(len(golden_list)):
+                if golden_list[idy] == predict_list[idy]:
+                    right_tag += 1
+            all_tag += len(golden_list)
+        accuracy = (right_tag + 0.0) / all_tag
+        return accuracy
+

    def macro_f1(self, y_true, y_pred):
        """ For classification task, calculate f1-score for each label, and find their unweighted mean. This does not take label imbalance into account.
--- a/metrics/slot_tagging_metrics.py
+++ b/metrics/slot_tagging_metrics.py
@ -0,0 +1,97 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT license.
+
+
+def get_ner_BIOES(label_list):
+    list_len = len(label_list)
+    begin_label = 'B-'
+    end_label = 'E-'
+    single_label = 'S-'
+    whole_tag = ''
+    index_tag = ''
+    tag_list = []
+    stand_matrix = []
+    for i in range(0, list_len):
+        current_label = label_list[i].upper()
+        if begin_label in current_label:
+            if index_tag != '':
+                tag_list.append(whole_tag + ',' + str(i-1))
+            whole_tag = current_label.replace(begin_label, "", 1) + '[' + str(i)
+            index_tag = current_label.replace(begin_label, "", 1)
+
+        elif single_label in current_label:
+            if index_tag != '':
+                tag_list.append(whole_tag + ',' + str(i-1))
+            whole_tag = current_label.replace(single_label, "", 1) + '[' +str(i)
+            tag_list.append(whole_tag)
+            whole_tag = ""
+            index_tag = ""
+        elif end_label in current_label:
+            if index_tag != '':
+                tag_list.append(whole_tag + ',' + str(i))
+            whole_tag = ''
+            index_tag = ''
+        else:
+            continue
+    if (whole_tag != '')&(index_tag != ''):
+        tag_list.append(whole_tag)
+    tag_list_len = len(tag_list)
+
+    for i in range(0, tag_list_len):
+        if len(tag_list[i]) > 0:
+            tag_list[i] = tag_list[i]+ ']'
+            insert_list = reverse_style(tag_list[i])
+            stand_matrix.append(insert_list)
+    return stand_matrix
+
+
+def get_ner_BIO(label_list):
+    list_len = len(label_list)
+    begin_label = 'B-'
+    inside_label = 'I-'
+    whole_tag = ''
+    index_tag = ''
+    tag_list = []
+    stand_matrix = []
+    for i in range(0, list_len):
+        current_label = label_list[i].upper()
+        if begin_label in current_label:
+            if index_tag == '':
+                whole_tag = current_label.replace(begin_label, "", 1) + '[' + str(i)
+                index_tag = current_label.replace(begin_label, "", 1)
+            else:
+                tag_list.append(whole_tag + ',' + str(i-1))
+                whole_tag = current_label.replace(begin_label, "", 1) + '[' + str(i)
+                index_tag = current_label.replace(begin_label, "", 1)
+
+        elif inside_label in current_label:
+            if current_label.replace(inside_label, "", 1) == index_tag:
+                whole_tag = whole_tag
+            else:
+                if (whole_tag != '')&(index_tag != ''):
+                    tag_list.append(whole_tag + ',' + str(i-1))
+                whole_tag = ''
+                index_tag = ''
+        else:
+            if (whole_tag != '')&(index_tag != ''):
+                tag_list.append(whole_tag + ',' + str(i-1))
+            whole_tag = ''
+            index_tag = ''
+
+    if (whole_tag != '')&(index_tag != ''):
+        tag_list.append(whole_tag)
+    tag_list_len = len(tag_list)
+
+    for i in range(0, tag_list_len):
+        if len(tag_list[i]) > 0:
+            tag_list[i] = tag_list[i]+ ']'
+            insert_list = reverse_style(tag_list[i])
+            stand_matrix.append(insert_list)
+    return stand_matrix
+
+
+def reverse_style(input_string):
+    target_position = input_string.index('[')
+    input_len = len(input_string)
+    output_string = input_string[target_position:input_len] + input_string[0:target_position]
+    return output_string
--- a/model_zoo/advanced/conf.json
+++ b/model_zoo/advanced/conf.json
@ -49,7 +49,8 @@
  "training_params": {
    "vocabulary": {
      "min_word_frequency": 1,
-      "max_vocabulary": 100000
+      "max_vocabulary": 100000,
+      "max_building_lines": 1000000
    },
    "optimizer": {
      "name": "Adam",
@ -57,6 +58,7 @@
        "lr": 0.001
      }
    },
+    "chunk_size": 1000000,
    "lr_decay": 0.95,
    "minimum_lr": 0.0001,
    "epoch_start_lr_decay": 1,
@ -65,7 +67,7 @@
    "batch_size": 30,
    "batch_num_to_show_results": 10,
    "max_epoch": 3,
-    "valid_times_per_epoch": 1,
+    "steps_per_validation": 10,
    "text_preprocessing": ["DBC2SBC"],
    "max_lengths":{
      "question": 30,
@ -75,6 +77,7 @@
  "architecture":[
    {
        "layer": "Embedding",
+        "weight_on_gpu": false,
        "conf": {
          "word": {
            "cols": ["question_text", "answer_text"],
@ -89,11 +92,11 @@
            "cols": ["question_char", "answer_char"],
            "type": "CNNCharEmbedding",
            "dropout": 0.2,
-            "dim": 30,
-            "embedding_matrix_dim": 8,
-            "stride":1,
-            "window_size": 5,
-            "activation": null
+            "dim": [30, 20, 100],
+            "embedding_matrix_dim": 50,
+            "stride":[1, 2, 3],
+            "window_size": [3,3,5],
+            "activation": "ReLU"
          }
        }
    },
--- a/model_zoo/nlp_tasks/knowledge_distillation/query_binary_classifier_compression/conf_kdqbc_bilstmattn_cnn.json
+++ b/model_zoo/nlp_tasks/knowledge_distillation/query_binary_classifier_compression/conf_kdqbc_bilstmattn_cnn.json
@ -53,7 +53,7 @@
    "batch_size": 256,
    "batch_num_to_show_results": 10,
    "max_epoch": 30,
-    "valid_times_per_epoch": 10,
+    "steps_per_validation": 10,
    "fixed_lengths":{
      "query": 30
    }
--- a/model_zoo/nlp_tasks/question_answer_matching/conf_question_answer_matching_arci.json
+++ b/model_zoo/nlp_tasks/question_answer_matching/conf_question_answer_matching_arci.json
@ -0,0 +1,241 @@
+{
+  "license": "Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license.",
+  "tool_version": "1.1.0",
+  "model_description": "This model is used for question answer matching task, and it achieved auc: 0.7508 in WikiQACorpus test set",
+  "language": "English",
+  "inputs": {
+    "use_cache": true,
+    "dataset_type": "classification",
+    "data_paths": {
+      "train_data_path": "./dataset/WikiQACorpus/WikiQA-train.tsv",
+      "valid_data_path": "./dataset/WikiQACorpus/WikiQA-dev.tsv",
+      "test_data_path": "./dataset/WikiQACorpus/WikiQA-test.tsv",
+      "pre_trained_emb": "./dataset/Glove/glove.840B.300d.txt"
+    },
+    "file_with_col_header": true,
+    "add_start_end_for_seq": true,
+    "file_header": {
+      "question_id": 0,
+      "question_text": 1,
+      "document_id": 2,
+      "document_title": 3,
+      "passage_id": 4,
+      "passage_text": 5,
+      "label":  6
+    },
+    "model_inputs": {
+      "question": ["question_text"],
+      "passage": ["passage_text"]
+    },
+    "target": ["label"]
+  },
+  "outputs":{
+    "save_base_dir": "./models/wikiqa_arci/",
+    "model_name": "model.nb",
+    "train_log_name": "train.log",
+    "test_log_name": "test.log",
+    "predict_log_name": "predict.log",
+    "predict_fields": ["prediction"],
+    "predict_output_name": "predict.tsv",
+    "cache_dir": ".cache.wikiqa_arci/"
+  },
+  "training_params": {
+    "vocabulary": {
+      "min_word_frequency": 1
+    },
+    "optimizer": {
+      "name": "Adam",
+      "params": {
+        "lr": 0.001
+      }
+    },
+    "fixed_lengths": {
+      "question": 200,
+      "passage": 200
+    },
+    "lr_decay": 0.90,
+    "minimum_lr": 0.00005,
+    "epoch_start_lr_decay": 20,
+    "use_gpu": true,
+    "cpu_num_workers": 1,
+    "batch_size": 64,
+    "batch_num_to_show_results": 500,
+    "max_epoch": 10,
+    "valid_times_per_epoch": 2
+  },
+  "architecture":[
+    {
+        "layer": "Embedding",
+        "conf": {
+          "word": {
+            "cols": ["question_text", "passage_text"],
+            "dim": 300,
+            "fix_weight": false
+          }
+        }
+    },
+    {
+        "layer_id": "s1_dropout",
+        "layer": "Dropout",
+        "conf": {
+            "dropout": 0.5
+        },
+        "inputs": ["question"]
+    },
+    {
+        "layer_id": "s2_dropout",
+        "layer": "Dropout",
+        "conf": {
+            "dropout": 0.5
+        },
+        "inputs": ["passage"]
+    },
+    {
+        "layer_id": "s1_conv_1",
+        "layer": "Conv",
+        "conf": {
+            "window_size": 3,
+            "output_channel_num": 32,
+            "padding_type": "SAME",
+            "remind_lengths": false
+        },
+        "inputs": ["s1_dropout"]
+    },
+    {
+        "layer_id": "s1_pool_1",
+        "layer": "Pooling1D",
+        "conf": {
+            "stride": 1,
+            "window_size": 2
+        },
+        "inputs": ["s1_conv_1"]
+    },
+    {
+        "layer_id": "s1_conv_2",
+        "layer": "Conv",
+        "conf": {
+            "window_size": 3,
+            "output_channel_num": 32,
+            "padding_type": "SAME"
+        },
+        "inputs": ["s1_pool_1"]
+    },
+    {
+        "layer_id": "s1_pool_2",
+        "layer": "Pooling1D",
+        "conf": {
+            "stride": 1,
+            "window_size": 2
+        },
+        "inputs": ["s1_conv_2"]
+    },
+    {
+        "layer_id": "s1_flatten",
+        "layer": "Flatten",
+        "conf": {
+
+        },
+        "inputs": ["s1_pool_2"]
+    },
+    {
+        "layer_id": "s2_conv_1",
+        "layer": "Conv",
+        "conf": {
+            "window_size": 3,
+            "output_channel_num": 32,
+            "padding_type": "SAME",
+            "remind_lengths": false
+        },
+        "inputs": ["s2_dropout"]
+    },
+    {
+        "layer_id": "s2_pool_1",
+        "layer": "Pooling1D",
+        "conf": {
+            "stride": 1,
+            "window_size": 2
+        },
+        "inputs": ["s2_conv_1"]
+    },
+    {
+        "layer_id": "s2_conv_2",
+        "layer": "Conv",
+        "conf": {
+            "window_size": 3,
+            "output_channel_num": 32,
+            "padding_type": "SAME"
+        },
+        "inputs": ["s2_pool_1"]
+    },
+    {
+        "layer_id": "s2_pool_2",
+        "layer": "Pooling1D",
+        "conf": {
+            "stride": 1,
+            "window_size": 2
+        },
+        "inputs": ["s2_conv_2"]
+    },
+    {
+        "layer_id": "s2_flatten",
+        "layer": "Flatten",
+      "conf": {
+
+        },
+        "inputs": ["s2_pool_2"]
+    },
+    {
+        "layer_id": "comb",
+        "layer": "Combination",
+        "conf": {
+          "operations": ["origin"]
+        },
+        "inputs": ["s1_flatten", "s2_flatten"]
+    },
+    {
+        "layer_id": "comb_dropout",
+        "layer": "Dropout",
+        "conf": {
+            "dropout": 0.5
+        },
+        "inputs": ["comb"]
+    },
+    {
+        "layer_id": "mlp",
+        "layer": "Linear",
+        "conf": {
+          "hidden_dim": [64, 32],
+          "activation": "ReLU",
+          "batch_norm": true,
+          "last_hidden_activation": true
+        },
+        "inputs": ["comb_dropout"]
+    },
+    {
+        "output_layer_flag": true,
+        "layer_id": "output",
+        "layer": "Linear",
+        "conf": {
+          "hidden_dim": [-1],
+          "activation": "ReLU",
+          "batch_norm": true,
+          "last_hidden_activation": false,
+          "last_hidden_softmax": true
+        },
+        "inputs": ["mlp"]
+    }
+  ],
+  "loss": {
+    "losses": [
+      {
+        "type": "CrossEntropyLoss",
+        "conf": {
+          "weight": [0.1,0.9],
+          "size_average": true
+        },
+        "inputs": ["output","label"]
+      }
+    ]
+  },
+  "metrics": ["auc", "accuracy"]
+}
--- a/model_zoo/nlp_tasks/question_answer_matching/conf_question_answer_matching_arcii.json
+++ b/model_zoo/nlp_tasks/question_answer_matching/conf_question_answer_matching_arcii.json
@ -0,0 +1,212 @@
+{
+  "license": "Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license.",
+  "tool_version": "1.1.0",
+  "model_description": "This model is used for question answer matching task, and it achieved auc: 0.7612 in WikiQACorpus test set",
+  "language": "English",
+  "inputs": {
+    "use_cache": true,
+    "dataset_type": "classification",
+    "data_paths": {
+      "train_data_path": "./dataset/WikiQACorpus/WikiQA-train.tsv",
+      "valid_data_path": "./dataset/WikiQACorpus/WikiQA-dev.tsv",
+      "test_data_path": "./dataset/WikiQACorpus/WikiQA-test.tsv",
+      "pre_trained_emb": "./dataset/Glove/glove.840B.300d.txt"
+    },
+    "file_with_col_header": true,
+    "add_start_end_for_seq": true,
+    "file_header": {
+      "question_id": 0,
+      "question_text": 1,
+      "document_id": 2,
+      "document_title": 3,
+      "passage_id": 4,
+      "passage_text": 5,
+      "label":  6
+    },
+    "model_inputs": {
+      "question": ["question_text"],
+      "passage": ["passage_text"]
+    },
+    "target": ["label"]
+  },
+  "outputs":{
+    "save_base_dir": "./models/wikiqa_arcii/",
+    "model_name": "model.nb",
+    "train_log_name": "train.log",
+    "test_log_name": "test.log",
+    "predict_log_name": "predict.log",
+    "predict_fields": ["prediction"],
+    "predict_output_name": "predict.tsv",
+    "cache_dir": ".cache.wikiqa_arcii/"
+  },
+  "training_params": {
+    "vocabulary": {
+      "min_word_frequency": 1
+    },
+    "optimizer": {
+      "name": "Adam",
+      "params": {
+        "lr": 0.001
+      }
+    },
+    "fixed_lengths": {
+      "question": 200,
+      "passage": 200
+    },
+    "lr_decay": 0.9,
+    "minimum_lr": 0.00005,
+    "epoch_start_lr_decay": 20,
+    "use_gpu": true,
+    "cpu_num_workers": 1,
+    "batch_size": 64,
+    "batch_num_to_show_results": 500,
+    "max_epoch": 10,
+    "valid_times_per_epoch": 1
+  },
+  "architecture":[
+    {
+        "layer": "Embedding",
+        "conf": {
+          "word": {
+            "cols": ["question_text", "passage_text"],
+            "dim": 300,
+            "fix_weight": true
+          }
+        }
+    },
+    {
+        "layer_id": "s1_dropout",
+        "layer": "Dropout",
+        "conf": {
+            "dropout": 0.5
+        },
+        "inputs": ["question"]
+    },
+    {
+        "layer_id": "s2_dropout",
+        "layer": "Dropout",
+        "conf": {
+            "dropout": 0.5
+        },
+        "inputs": ["passage"]
+    },
+    {
+        "layer_id": "s1_conv_1",
+        "layer": "Conv",
+        "conf": {
+            "window_size": 3,
+            "output_channel_num": 32,
+            "padding_type": "SAME",
+            "remind_lengths": false
+        },
+        "inputs": ["s1_dropout"]
+    },
+    {
+        "layer_id": "s2_conv_1",
+        "layer": "Conv",
+        "conf": {
+            "window_size": 3,
+            "output_channel_num": 32,
+            "padding_type": "SAME",
+            "remind_lengths": false
+        },
+        "inputs": ["s2_dropout"]
+    },
+    {
+        "layer_id": "match",
+        "layer": "Expand_plus",
+        "conf": {
+        },
+        "inputs": ["s1_conv_1", "s2_conv_1"]
+    },
+    {
+        "layer_id": "conv2D_1",
+        "layer": "Conv2D",
+        "conf": {
+            "window_size": [3,3],
+            "output_channel_num": 32,
+            "padding_type": "SAME"
+        },
+        "inputs": ["match"]
+    },
+    {
+        "layer_id": "pool2D_1",
+        "layer": "Pooling2D",
+        "conf": {
+            "window_size": [2,2]
+        },
+        "inputs": ["conv2D_1"]
+    },
+    {
+        "layer_id": "conv2D_2",
+        "layer": "Conv2D",
+        "conf": {
+            "window_size": [3,3],
+            "output_channel_num": 32,
+            "padding_type": "SAME"
+        },
+        "inputs": ["pool2D_1"]
+    },
+    {
+        "layer_id": "pool2D_2",
+        "layer": "Pooling2D",
+        "conf": {
+            "window_size": [2,2]
+        },
+        "inputs": ["conv2D_2"]
+    },
+    {
+        "layer_id": "flatten",
+        "layer": "Flatten",
+      "conf": {
+
+        },
+        "inputs": ["pool2D_2"]
+    },
+    {
+        "layer_id": "dropout",
+        "layer": "Dropout",
+        "conf": {
+            "dropout": 0.5
+        },
+        "inputs": ["flatten"]
+    },
+    {
+        "layer_id": "mlp",
+        "layer": "Linear",
+        "conf": {
+          "hidden_dim": [64, 32],
+          "activation": "ReLU",
+          "batch_norm": true,
+          "last_hidden_activation": true
+        },
+        "inputs": ["dropout"]
+    },
+    {
+        "output_layer_flag": true,
+        "layer_id": "output",
+        "layer": "Linear",
+        "conf": {
+          "hidden_dim": [-1],
+          "activation": "ReLU",
+          "batch_norm": true,
+          "last_hidden_activation": false,
+          "last_hidden_softmax": true
+        },
+        "inputs": ["mlp"]
+    }
+  ],
+  "loss": {
+    "losses": [
+      {
+        "type": "CrossEntropyLoss",
+        "conf": {
+          "weight": [0.1,0.9],
+          "size_average": true
+        },
+        "inputs": ["output","label"]
+      }
+    ]
+  },
+  "metrics": ["auc", "accuracy"]
+}
--- a/model_zoo/nlp_tasks/question_answer_matching/conf_question_answer_matching_pyramid.json
+++ b/model_zoo/nlp_tasks/question_answer_matching/conf_question_answer_matching_pyramid.json
@ -57,7 +57,7 @@
    "valid_times_per_epoch": 5,
    "fixed_lengths":{
      "question": 30,
-      "passage": 120  
+      "passage": 200  
    }
  },
  "architecture":[
@ -92,7 +92,6 @@
      "layer": "Interaction",
      "conf": {
          "dropout": 0.2,
-          "hidden_dim": 300,
          "matching_type": "general"
      },
      "inputs": ["question_dropout", "passage_dropout"]
--- a/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging.json
+++ b/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging.json
@ -0,0 +1,119 @@
+{
+  "license": "Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license.",
+  "tool_version": "1.1.0",
+  "model_description": "This model is used for sequence tagging task. It achieved a f1-score of 88.50 on the dataset conll-2003",
+  "inputs": {
+    "use_cache": true,
+    "dataset_type": "sequence_tagging",
+    "tagging_scheme": "BIOES",
+    "data_paths": {
+      "train_data_path": "./dataset/slot_tagging/conll_2003/eng.train.tsv",
+      "valid_data_path": "./dataset/slot_tagging/conll_2003/eng.testa.tsv",
+      "test_data_path": "./dataset/slot_tagging/conll_2003/eng.testb.tsv",
+      "pre_trained_emb": "dataset/GloVe/glove.6B.100d.txt"
+    },
+    "add_start_end_for_seq": false,
+    "file_header": {
+      "sequence": 0,
+      "tag": 1
+    },
+    "model_inputs": {
+      "words": ["sequence"]
+    },
+    "target": ["tag"]
+  },
+  "outputs":{
+    "save_base_dir": "./models/slot_tagging/",
+    "model_name": "model.nb",
+    "train_log_name": "train.log",
+    "test_log_name": "test.log",
+    "predict_log_name": "predict.log",
+    "predict_fields": ["prediction"],
+    "predict_output_name": "predict.tsv",
+    "cache_dir": ".cache.slot_tagging/"
+  },
+  "training_params": {
+    "vocabulary": {
+      "min_word_frequency": 1
+    },
+    "cpu_num_workers": 4,
+    "optimizer": {
+      "name": "SGD",
+      "params": {
+        "lr": 0.015,
+        "weight_decay": 1e-8
+      }
+    },
+    "lr_decay": 0.95,
+    "minimum_lr": 0.00001,
+    "epoch_start_lr_decay": 1,
+    "use_gpu": true,
+    "batch_size": 10,
+    "batch_num_to_show_results": 500,
+    "max_epoch": 2,
+    "valid_times_per_epoch": 1
+  },
+  "architecture":[
+    {
+        "layer": "Embedding",
+        "weight_on_gpu": true,
+        "conf": {
+          "word": {
+            "cols": ["sequence"],
+            "dim": 100
+          }
+        }
+    },
+    {
+      "layer_id": "emb_dropout",
+      "layer": "Dropout",
+      "conf": {
+          "dropout": 0.5
+      },
+      "inputs": ["words"]
+    },
+    {
+        "layer_id": "sentence_BiLSTM",
+        "layer": "BiLSTM",
+        "conf": {
+          "hidden_dim": 100,
+          "num_layers": 1
+        },
+        "inputs": ["emb_dropout"]
+    },
+    {
+      "layer_id": "lstm_dropout",
+      "layer": "Dropout",
+      "conf": {
+          "dropout": 0.5
+      },
+      "inputs": ["sentence_BiLSTM"]
+    },
+    {
+        "output_layer_flag": true,
+        "layer_id": "output",
+        "layer": "Linear",
+        "conf": {
+          "hidden_dim": [-1],
+          "activation": "PReLU",
+          "batch_norm": false,
+          "last_hidden_activation": false,
+          "last_hidden_softmax": false
+        },
+        "inputs": ["lstm_dropout"]
+    }
+  ],
+  "loss": {
+      "losses": [
+        {
+          "type": "CrossEntropyLoss",
+          "conf": {
+              "size_average": false,
+              "ignore_index": 0
+          },
+          "inputs": ["output","tag"]
+        }
+      ]
+  },
+  "metrics": ["seq_tag_f1","seq_tag_accuracy"]
+}
--- a/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_ccnn_wcnn.json
+++ b/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_ccnn_wcnn.json
@ -0,0 +1,208 @@
+{
+  "license": "Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license.",
+  "tool_version": "1.1.0",
+  "model_description": "This model is used for sequence tagging task. It achieved a f1-score of 90.36 on the dataset conll-2003",
+  "inputs": {
+    "use_cache": true,
+    "dataset_type": "sequence_tagging",
+    "tagging_scheme": "BIOES",
+    "data_paths": {
+      "train_data_path": "./dataset/slot_tagging/conll_2003/eng.train.tsv",
+      "valid_data_path": "./dataset/slot_tagging/conll_2003/eng.testa.tsv",
+      "test_data_path": "./dataset/slot_tagging/conll_2003/eng.testb.tsv",
+      "pre_trained_emb": "./dataset/GloVe/glove.6B.100d.txt"
+    },
+    "add_start_end_for_seq": false,
+    "file_header": {
+      "word": 0,
+      "tag": 1
+    },
+    "model_inputs": {
+      "words": ["word", "word_char"]
+    },
+    "target": ["tag"]
+  },
+  "outputs":{
+    "save_base_dir": "./models/slot_tagging_ccnn_wcnn/",
+    "model_name": "model.nb",
+    "train_log_name": "train.log",
+    "test_log_name": "test.log",
+    "predict_log_name": "predict.log",
+    "predict_fields": ["prediction"],
+    "predict_output_name": "predict.tsv",
+    "cache_dir": ".cache.slot_tagging_ccnn_wcnn/"
+  },
+  "training_params": {
+    "vocabulary": {
+      "min_word_frequency": 1
+    },
+    "cpu_num_workers": 4,
+    "optimizer": {
+      "name": "SGD",
+      "params": {
+        "lr": 0.005,
+        "weight_decay": 1e-8
+      }
+    },
+    "lr_decay": 0.95,
+    "minimum_lr": 0.00001,
+    "epoch_start_lr_decay": 1,
+    "use_gpu": true,
+    "batch_size": 10,
+    "batch_num_to_show_results": 500,
+    "max_epoch": 100,
+    "valid_times_per_epoch": 1
+  },
+  "architecture":[
+    {
+        "layer": "Embedding",
+        "weight_on_gpu": true,
+        "conf": {
+          "word": {
+            "cols": ["word"],
+            "dim": 100
+          },
+          "char":{
+	        "cols":["word_char"],
+	        "type":"CNNCharEmbedding",
+            "dropout": 0.5,
+            "dim": 50,
+            "embedding_matrix_dim": 30,
+            "stride":1,
+            "window_size": 3,
+            "activation": null
+	      }
+        }
+    },
+    {
+      "layer_id": "emb_dropout",
+      "layer": "Dropout",
+      "conf": {
+          "dropout": 0.5
+      },
+      "inputs": ["words"]
+    },
+    {
+      "layer_id": "Linear1",
+      "layer": "Linear",
+      "conf": {
+          "hidden_dim": [200],
+          "activation": "Tanh",
+          "batch_norm": true,
+          "last_hidden_activation": true,
+          "last_hidden_softmax": false
+      },
+      "inputs": ["emb_dropout"]
+    },
+    {
+      "layer_id": "Conv1",
+      "layer": "Conv",
+      "conf": {
+          "stride": 1,
+          "window_size": 3,
+          "output_channel_num": 200,
+          "batch_norm": true,
+          "activation": "ReLU",
+          "padding_type": "SAME"
+      },
+      "inputs": ["Linear1"]
+    },
+    {
+      "layer_id": "Dropout1",
+      "layer": "Dropout",
+      "conf": {
+          "dropout": 0.5
+      },
+      "inputs": ["Conv1"]
+    },
+    {
+      "layer_id": "Conv2",
+      "layer": "Conv",
+      "conf": {
+          "stride": 1,
+          "window_size": 3,
+          "output_channel_num": 200,
+          "batch_norm": true,
+          "activation": "ReLU",
+          "padding_type": "SAME"
+      },
+      "inputs": ["Dropout1"]
+    },
+    {
+      "layer_id": "Dropout2",
+      "layer": "Dropout",
+      "conf": {
+          "dropout": 0.5
+      },
+      "inputs": ["Conv2"]
+    },
+    {
+      "layer_id": "Conv3",
+      "layer": "Conv",
+      "conf": {
+          "stride": 1,
+          "window_size": 3,
+          "output_channel_num": 200,
+          "batch_norm": true,
+          "activation": "ReLU",
+          "padding_type": "SAME"
+      },
+      "inputs": ["Dropout2"]
+    },
+    {
+      "layer_id": "Dropout3",
+      "layer": "Dropout",
+      "conf": {
+          "dropout": 0.5
+      },
+      "inputs": ["Conv3"]
+    },
+    {
+      "layer_id": "Conv4",
+      "layer": "Conv",
+      "conf": {
+          "stride": 1,
+          "window_size": 3,
+          "output_channel_num": 200,
+          "batch_norm": true,
+          "activation": "ReLU",
+          "padding_type": "SAME"
+      },
+      "inputs": ["Dropout3"]
+    },
+    {
+      "layer_id": "Dropout4",
+      "layer": "Dropout",
+      "conf": {
+          "dropout": 0.5
+      },
+      "inputs": ["Conv4"]
+    },
+    {
+        "output_layer_flag": true,
+        "layer_id": "output",
+        "layer": "Linear",
+        "conf": {
+          "hidden_dim": [-1],
+          "activation": "PReLU",
+          "batch_norm": false,
+          "last_hidden_activation": false,
+          "last_hidden_softmax": false
+        },
+        "inputs": ["Dropout4"]
+    }
+  ],
+  "loss": {
+      "losses": [
+        {
+          "type": "CrossEntropyLoss",
+          "conf": {
+              "size_average": false,
+              "ignore_index": 0
+          },
+          "inputs": ["output","tag"]
+        }
+      ]
+  },
+  "metrics": ["seq_tag_f1","seq_tag_accuracy"]
+}
--- a/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_ccnn_wcnn_crf.json
+++ b/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_ccnn_wcnn_crf.json
@ -0,0 +1,185 @@
+{
+  "license": "Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license.",
+  "tool_version": "1.1.0",
+  "model_description": "This model is used for sequence tagging task. It achieved a f1-score of 90.36 on the dataset conll-2003",
+  "inputs": {
+    "use_cache": true,
+    "dataset_type": "sequence_tagging",
+    "tagging_scheme": "BIOES",
+    "data_paths": {
+      "train_data_path": "./dataset/slot_tagging/conll_2003/eng.train.tsv",
+      "valid_data_path": "./dataset/slot_tagging/conll_2003/eng.testa.tsv",
+      "test_data_path": "./dataset/slot_tagging/conll_2003/eng.testb.tsv",
+      "pre_trained_emb": "./dataset/GloVe/glove.6B.100d.txt"
+    },
+    "add_start_end_for_seq": false,
+    "file_header": {
+      "word": 0,
+      "tag": 1
+    },
+    "model_inputs": {
+      "words": ["word", "word_char"]
+    },
+    "target": ["tag"]
+  },
+  "outputs":{
+    "save_base_dir": "./models/slot_tagging_wcnn_ccnn_crf/",
+    "model_name": "model.nb",
+    "train_log_name": "train.log",
+    "test_log_name": "test.log",
+    "predict_log_name": "predict.log",
+    "predict_fields": ["prediction"],
+    "predict_output_name": "predict.tsv",
+    "cache_dir": ".cache.slot_tagging_wcnn_ccnn_crf/"
+  },
+  "training_params": {
+    "vocabulary": {
+      "min_word_frequency": 1
+    },
+    "cpu_num_workers": 4,
+    "optimizer": {
+      "name": "SGD",
+      "params": {
+        "lr": 0.005,
+        "weight_decay": 1e-8
+      }
+    },
+    "lr_decay": 0.95,
+    "minimum_lr": 0.00001,
+    "epoch_start_lr_decay": 1,
+    "use_gpu": true,
+    "batch_size": 10,
+    "batch_num_to_show_results": 500,
+    "max_epoch": 100,
+    "valid_times_per_epoch": 1
+  },
+  "architecture":[
+    {
+        "layer": "Embedding",
+        "weight_on_gpu": true,
+        "conf": {
+          "word": {
+            "cols": ["word"],
+            "dim": 100
+          },
+          "char":{
+          "cols":["word_char"],
+          "type":"CNNCharEmbedding",
+            "dropout": 0.5,
+            "dim": 50,
+            "embedding_matrix_dim": 30,
+            "stride":1,
+            "window_size": 3,
+            "activation": null
+          }
+        }
+    },
+    {
+      "layer_id": "emb_dropout",
+      "layer": "Dropout",
+      "conf": {
+          "dropout": 0.5
+      },
+      "inputs": ["words"]
+    },
+    {
+      "layer_id": "Linear1",
+      "layer": "Linear",
+      "conf": {
+          "hidden_dim": [200],
+          "activation": "Tanh",
+          "batch_norm": false,
+          "last_hidden_activation": true,
+          "last_hidden_softmax": false
+      },
+      "inputs": ["emb_dropout"]
+    },
+    {
+      "layer_id": "Conv1",
+      "layer": "Conv",
+      "conf": {
+          "stride": 1,
+          "window_size": 3,
+          "output_channel_num": 200,
+          "batch_norm": true,
+          "activation": "ReLU",
+          "padding_type": "SAME"
+      },
+      "inputs": ["Linear1"]
+    },
+
+    {
+      "layer_id": "Conv2",
+      "layer": "Conv",
+      "conf": {
+          "stride": 1,
+          "window_size": 3,
+          "output_channel_num": 200,
+          "batch_norm": true,
+          "activation": "ReLU",
+          "padding_type": "SAME"
+      },
+      "inputs": ["Conv1"]
+    },
+
+    {
+      "layer_id": "Conv3",
+      "layer": "Conv",
+      "conf": {
+          "stride": 1,
+          "window_size": 3,
+          "output_channel_num": 200,
+          "batch_norm": true,
+          "activation": "ReLU",
+          "padding_type": "SAME"
+      },
+      "inputs": ["Conv2"]
+    },
+
+    {
+      "layer_id": "Conv4",
+      "layer": "Conv",
+      "conf": {
+          "stride": 1,
+          "window_size": 3,
+          "output_channel_num": 200,
+          "batch_norm": true,
+          "activation": "ReLU",
+          "padding_type": "SAME"
+      },
+      "inputs": ["Conv3"]
+    },
+
+    {
+        "layer_id": "rep2tag",
+        "layer": "Linear",
+        "conf": {
+          "hidden_dim": "#target#",
+          "activation": "PReLU",
+          "batch_norm": false,
+          "last_hidden_activation": false,
+          "last_hidden_softmax": false
+        },
+        "inputs": ["Conv4"]
+    },
+    {
+        "output_layer_flag": true,
+        "layer_id": "output",
+        "layer": "CRF",
+        "conf": {},
+        "inputs": ["rep2tag"]
+    }
+  ],
+  "loss": {
+      "losses": [
+        {
+          "type": "CRFLoss",
+          "conf": {
+
+          },
+          "inputs": ["output","tag"]
+        }
+      ]
+  },
+  "metrics": ["seq_tag_f1","seq_tag_accuracy"]
+}
--- a/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_ccnn_wlstm.json
+++ b/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_ccnn_wlstm.json
@ -0,0 +1,130 @@
+{
+  "license": "Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license.",
+  "tool_version": "1.1.0",
+  "model_description": "This model is used for sequence tagging task. It achieved a f1-score of 90.63 on the dataset conll-2003",
+  "inputs": {
+    "use_cache": true,
+    "dataset_type": "sequence_tagging",
+    "tagging_scheme": "BIOES",
+    "data_paths": {
+      "train_data_path": "./dataset/slot_tagging/conll_2003/eng.train.tsv",
+      "valid_data_path": "./dataset/slot_tagging/conll_2003/eng.testa.tsv",
+      "test_data_path": "./dataset/slot_tagging/conll_2003/eng.testb.tsv",
+      "pre_trained_emb": "./dataset/GloVe/glove.6B.100d.txt"
+    },
+    "add_start_end_for_seq": false,
+    "file_header": {
+      "word": 0,
+      "tag": 1
+    },
+    "model_inputs": {
+      "words": ["word", "word_char"]
+    },
+    "target": ["tag"]
+  },
+  "outputs":{
+    "save_base_dir": "./models/slot_tagging_ccnn_wlstm/",
+    "model_name": "model.nb",
+    "train_log_name": "train.log",
+    "test_log_name": "test.log",
+    "predict_log_name": "predict.log",
+    "predict_fields": ["prediction"],
+    "predict_output_name": "predict.tsv",
+    "cache_dir": ".cache.slot_tagging_ccnn_wlstm/"
+  },
+  "training_params": {
+    "vocabulary": {
+      "min_word_frequency": 1
+    },
+    "cpu_num_workers": 4,
+    "optimizer": {
+      "name": "SGD",
+      "params": {
+        "lr": 0.015,
+        "weight_decay": 1e-8
+      }
+    },
+    "lr_decay": 0.95,
+    "minimum_lr": 0.00001,
+    "epoch_start_lr_decay": 1,
+    "use_gpu": true,
+    "batch_size": 10,
+    "batch_num_to_show_results": 500,
+    "max_epoch": 100,
+    "valid_times_per_epoch": 1
+  },
+  "architecture":[
+    {
+        "layer": "Embedding",
+        "weight_on_gpu": true,
+        "conf": {
+          "word": {
+            "cols": ["word"],
+            "dim": 100
+          },
+          "char":{
+          "cols":["word_char"],
+          "type":"CNNCharEmbedding",
+            "dropout": 0.5,
+            "dim": 50,
+            "embedding_matrix_dim": 30,
+            "stride":1,
+            "window_size": 3,
+            "activation": null
+     }
+        }
+    },
+
+    {
+      "layer_id": "emb_dropout",
+      "layer": "Dropout",
+      "conf": {
+          "dropout": 0.5
+      },
+      "inputs": ["words"]
+    },
+    {
+        "layer_id": "sentence_BiLSTM",
+        "layer": "BiLSTM",
+        "conf": {
+          "hidden_dim": 100,
+          "num_layers": 1
+        },
+        "inputs": ["emb_dropout"]
+    },
+    {
+      "layer_id": "lstm_dropout",
+      "layer": "Dropout",
+      "conf": {
+          "dropout": 0.5
+      },
+      "inputs": ["sentence_BiLSTM"]
+    },
+    {
+        "output_layer_flag": true,
+        "layer_id": "output",
+        "layer": "Linear",
+        "conf": {
+          "hidden_dim": [-1],
+          "activation": "PReLU",
+          "batch_norm": false,
+          "last_hidden_activation": false,
+          "last_hidden_softmax": false
+        },
+        "inputs": ["lstm_dropout"]
+    }
+  ],
+  "loss": {
+      "losses": [
+        {
+          "type": "CrossEntropyLoss",
+          "conf": {
+              "size_average": false,
+              "ignore_index": 0
+          },
+          "inputs": ["output","tag"]
+        }
+      ]
+  },
+  "metrics": ["seq_tag_f1","seq_tag_accuracy"]
+}
--- a/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_ccnn_wlstm_crf.json
+++ b/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_ccnn_wlstm_crf.json
@ -0,0 +1,134 @@
+{
+  "license": "Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license.",
+  "tool_version": "1.1.0",
+  "model_description": "This model is used for slot sequence task. It achieved a f1-score of 91.38 on the dataset conll-2003",
+  "inputs": {
+    "use_cache": true,
+    "dataset_type": "sequence_tagging",
+    "tagging_scheme": "BIOES",
+    "data_paths": {
+      "train_data_path": "./dataset/slot_tagging/conll_2003/eng.train.tsv",
+      "valid_data_path": "./dataset/slot_tagging/conll_2003/eng.testa.tsv",
+      "test_data_path": "./dataset/slot_tagging/conll_2003/eng.testb.tsv",
+      "pre_trained_emb": "./dataset/GloVe/glove.6B.100d.txt"
+    },
+    "add_start_end_for_seq": false,
+    "file_header": {
+      "word": 0,
+      "tag": 1
+    },
+    "model_inputs": {
+      "words": ["word", "word_char"]
+    },
+    "target": ["tag"]
+  },
+  "outputs":{
+    "save_base_dir": "./models/slot_tagging_ccnn_wlstm_crf/",
+    "model_name": "model.nb",
+    "train_log_name": "train.log",
+    "test_log_name": "test.log",
+    "predict_log_name": "predict.log",
+    "predict_fields": ["prediction"],
+    "predict_output_name": "predict.tsv",
+    "cache_dir": ".cache.slot_tagging_ccnn_wlstm_crf/"
+  },
+  "training_params": {
+    "vocabulary": {
+      "min_word_frequency": 1
+    },
+    "cpu_num_workers": 4,
+    "optimizer": {
+      "name": "SGD",
+      "params": {
+        "lr": 0.015,
+        "weight_decay": 1e-8
+      }
+    },
+    "lr_decay": 0.95,
+    "minimum_lr": 0.00001,
+    "epoch_start_lr_decay": 1,
+    "use_gpu": true,
+    "batch_size": 10,
+    "batch_num_to_show_results": 500,
+    "max_epoch": 100,
+    "valid_times_per_epoch": 1
+  },
+  "architecture":[
+    {
+        "layer": "Embedding",
+        "weight_on_gpu": true,
+        "conf": {
+          "word": {
+            "cols": ["word"],
+            "dim": 100
+          },
+          "char":{
+          "cols":["word_char"],
+          "type":"CNNCharEmbedding",
+            "dropout": 0.5,
+            "dim": 50,
+            "embedding_matrix_dim": 30,
+            "stride":1,
+            "window_size": 3,
+            "activation": null
+          }
+        }
+    },
+    {
+      "layer_id": "emb_dropout",
+      "layer": "Dropout",
+      "conf": {
+          "dropout": 0.5
+      },
+      "inputs": ["words"]
+    },
+    {
+        "layer_id": "sentence_BiLSTM",
+        "layer": "BiLSTM",
+        "conf": {
+          "hidden_dim": 100,
+          "num_layers": 1
+        },
+        "inputs": ["emb_dropout"]
+    },
+    {
+      "layer_id": "lstm_dropout",
+      "layer": "Dropout",
+      "conf": {
+          "dropout": 0.5
+      },
+      "inputs": ["sentence_BiLSTM"]
+    },
+    {
+        "layer_id": "rep2tag",
+        "layer": "Linear",
+        "conf": {
+          "hidden_dim": "#target#",
+          "activation": "PReLU",
+          "batch_norm": false,
+          "last_hidden_activation": false,
+          "last_hidden_softmax": false
+        },
+        "inputs": ["lstm_dropout"]
+    },
+    {
+        "output_layer_flag": true,
+        "layer_id": "output",
+        "layer": "CRF",
+        "conf": {},
+        "inputs": ["rep2tag"]
+    }
+  ],
+  "loss": {
+      "losses": [
+        {
+          "type": "CRFLoss",
+          "conf": {
+
+          },
+          "inputs": ["output","tag"]
+        }
+      ]
+  },
+  "metrics": ["seq_tag_f1","seq_tag_accuracy"]
+}
--- a/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_clstm_wcnn_crf.json
+++ b/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_clstm_wcnn_crf.json
@ -0,0 +1,191 @@
+{
+  "license": "Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license.",
+  "tool_version": "1.1.0",
+  "model_description": "This model is used for sequence tagging task. It achieved a f1-score of 90.27 on the dataset conll-2003",
+  "inputs": {
+    "use_cache": true,
+    "dataset_type": "sequence_tagging",
+    "tagging_scheme": "BIOES",
+    "data_paths": {
+      "train_data_path": "./dataset/slot_tagging/conll_2003/eng.train.tsv",
+      "valid_data_path": "./dataset/slot_tagging/conll_2003/eng.testa.tsv",
+      "test_data_path": "./dataset/slot_tagging/conll_2003/eng.testb.tsv",
+      "pre_trained_emb": "./dataset/GloVe/glove.6B.100d.txt"
+    },
+    "add_start_end_for_seq": false,
+    "file_header": {
+      "word": 0,
+      "tag": 1
+    },
+    "model_inputs": {
+      "words": ["word", "word_char"]
+    },
+    "target": ["tag"]
+  },
+  "outputs":{
+    "save_base_dir": "./models/slot_tagging_clstm_wcnn_crf/",
+    "model_name": "model.nb",
+    "train_log_name": "train.log",
+    "test_log_name": "test.log",
+    "predict_log_name": "predict.log",
+    "predict_fields": ["prediction"],
+    "predict_output_name": "predict.tsv",
+    "cache_dir": ".cache.slot_tagging_clstm_wcnn_crf/"
+  },
+  "training_params": {
+    "vocabulary": {
+      "min_word_frequency": 1
+    },
+    "cpu_num_workers": 4,
+    "optimizer": {
+      "name": "SGD",
+      "params": {
+        "lr": 0.005,
+        "weight_decay": 1e-8
+      }
+    },
+    "lr_decay": 0.95,
+    "minimum_lr": 0.00001,
+    "epoch_start_lr_decay": 1,
+    "use_gpu": true,
+    "batch_size": 10,
+    "batch_num_to_show_results": 500,
+    "max_epoch": 100,
+    "valid_times_per_epoch": 1
+  },
+  "architecture":[
+    {
+        "layer": "Embedding",
+        "weight_on_gpu": true,
+        "conf": {
+          "word": {
+            "cols": ["word"],
+            "dim": 100
+          },
+          "char":{
+          "cols":["word_char"],
+          "type":"LSTMCharEmbedding",
+            "dropout": 0.5,
+            "dim": 50,
+            "embedding_matrix_dim": 30,
+            "bidirect_flag": true
+          }
+        }
+    },
+    {
+      "layer_id": "emb_dropout",
+      "layer": "Dropout",
+      "conf": {
+          "dropout": 0.2
+      },
+      "inputs": ["words"]
+    },
+    {
+      "layer_id": "Linear1",
+      "layer": "Linear",
+      "conf": {
+          "hidden_dim": [200],
+          "activation": "Tanh",
+          "batch_norm": false,
+          "last_hidden_activation": true,
+          "last_hidden_softmax": false
+      },
+      "inputs": ["emb_dropout"]
+    },
+    {
+      "layer_id": "Conv1",
+      "layer": "Conv",
+      "conf": {
+          "stride": 1,
+          "window_size": 3,
+          "output_channel_num": 200,
+          "batch_norm": true,
+          "activation": "ReLU",
+          "padding_type": "SAME",
+          "dropout": 0.2,
+          "remind_lengths": true
+      },
+      "inputs": ["Linear1"]
+    },
+
+    {
+      "layer_id": "Conv2",
+      "layer": "Conv",
+      "conf": {
+          "stride": 1,
+          "window_size": 3,
+          "output_channel_num": 200,
+          "batch_norm": true,
+          "activation": "ReLU",
+          "padding_type": "SAME",
+          "dropout": 0.2,
+          "remind_lengths": true
+      },
+      "inputs": ["Conv1"]
+    },
+
+    {
+      "layer_id": "Conv3",
+      "layer": "Conv",
+      "conf": {
+          "stride": 1,
+          "window_size": 3,
+          "output_channel_num": 200,
+          "batch_norm": true,
+          "activation": "ReLU",
+          "padding_type": "SAME",
+          "dropout": 0.2,
+          "remind_lengths": true
+      },
+      "inputs": ["Conv2"]
+    },
+
+    {
+      "layer_id": "Conv4",
+      "layer": "Conv",
+      "conf": {
+          "stride": 1,
+          "window_size": 3,
+          "output_channel_num": 200,
+          "batch_norm": true,
+          "activation": "ReLU",
+          "padding_type": "SAME",
+          "dropout": 0.2,
+          "remind_lengths": true
+      },
+      "inputs": ["Conv3"]
+    },
+
+    {
+        "layer_id": "rep2tag",
+        "layer": "Linear",
+        "conf": {
+          "hidden_dim": "#target#",
+          "activation": "PReLU",
+          "batch_norm": false,
+          "last_hidden_activation": false,
+          "last_hidden_softmax": false
+        },
+        "inputs": ["Conv4"]
+    },
+    {
+        "output_layer_flag": true,
+        "layer_id": "output",
+        "layer": "CRF",
+        "conf": {},
+        "inputs": ["rep2tag"]
+    }
+  ],
+  "loss": {
+      "losses": [
+        {
+          "type": "CRFLoss",
+          "conf": {
+
+          },
+          "inputs": ["output","tag"]
+        }
+      ]
+  },
+  "metrics": ["seq_tag_f1","seq_tag_accuracy"]
+}
--- a/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_clstm_wlstm_crf.json
+++ b/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_clstm_wlstm_crf.json
@ -0,0 +1,132 @@
+{
+  "license": "Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license.",
+  "tool_version": "1.1.0",
+  "model_description": "This model is used for sequence tagging task. It achieved a f1-score of 90.83 on the dataset conll-2003",
+  "inputs": {
+    "use_cache": true,
+    "dataset_type": "sequence_tagging",
+    "tagging_scheme": "BIOES",
+    "data_paths": {
+      "train_data_path": "./dataset/slot_tagging/conll_2003/eng.train.tsv",
+      "valid_data_path": "./dataset/slot_tagging/conll_2003/eng.testa.tsv",
+      "test_data_path": "./dataset/slot_tagging/conll_2003/eng.testb.tsv",
+      "pre_trained_emb": "./dataset/GloVe/glove.6B.100d.txt"
+    },
+    "add_start_end_for_seq": false,
+    "file_header": {
+      "word": 0,
+      "tag": 1
+    },
+    "model_inputs": {
+      "words": ["word", "word_char"]
+    },
+    "target": ["tag"]
+  },
+  "outputs":{
+    "save_base_dir": "./models/slot_tagging_clstm_wlstm_crf/",
+    "model_name": "model.nb",
+    "train_log_name": "train.log",
+    "test_log_name": "test.log",
+    "predict_log_name": "predict.log",
+    "predict_fields": ["prediction"],
+    "predict_output_name": "predict.tsv",
+    "cache_dir": ".cache.slot_tagging_clstm_wlstm_crf/"
+  },
+  "training_params": {
+    "vocabulary": {
+      "min_word_frequency": 1
+    },
+    "cpu_num_workers": 4,
+    "optimizer": {
+      "name": "SGD",
+      "params": {
+        "lr": 0.015,
+        "weight_decay": 1e-8
+      }
+    },
+    "lr_decay": 0.95,
+    "minimum_lr": 0.00001,
+    "epoch_start_lr_decay": 1,
+    "use_gpu": true,
+    "batch_size": 10,
+    "batch_num_to_show_results": 500,
+    "max_epoch": 100,
+    "valid_times_per_epoch": 1
+  },
+  "architecture":[
+    {
+        "layer": "Embedding",
+        "weight_on_gpu": true,
+        "conf": {
+          "word": {
+            "cols": ["word"],
+            "dim": 100
+          },
+          "char":{
+          "cols":["word_char"],
+          "type":"LSTMCharEmbedding",
+            "dropout": 0.2,
+            "dim": 50,
+            "embedding_matrix_dim": 30,
+            "bidirect_flag": true
+          }
+        }
+    },
+    {
+      "layer_id": "emb_dropout",
+      "layer": "Dropout",
+      "conf": {
+          "dropout": 0.2
+      },
+      "inputs": ["words"]
+    },
+    {
+        "layer_id": "sentence_BiLSTM",
+        "layer": "BiLSTM",
+        "conf": {
+          "hidden_dim": 100,
+          "num_layers": 1
+        },
+        "inputs": ["emb_dropout"]
+    },
+    {
+      "layer_id": "lstm_dropout",
+      "layer": "Dropout",
+      "conf": {
+          "dropout": 0.2
+      },
+      "inputs": ["sentence_BiLSTM"]
+    },
+    {
+        "layer_id": "rep2tag",
+        "layer": "Linear",
+        "conf": {
+          "hidden_dim": "#target#",
+          "activation": "PReLU",
+          "batch_norm": false,
+          "last_hidden_activation": false,
+          "last_hidden_softmax": false
+        },
+        "inputs": ["lstm_dropout"]
+    },
+    {
+        "output_layer_flag": true,
+        "layer_id": "output",
+        "layer": "CRF",
+        "conf": {},
+        "inputs": ["rep2tag"]
+    }
+  ],
+  "loss": {
+      "losses": [
+        {
+          "type": "CRFLoss",
+          "conf": {
+
+          },
+          "inputs": ["output","tag"]
+        }
+      ]
+  },
+  "metrics": ["seq_tag_f1","seq_tag_accuracy"]
+}
--- a/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_encoder_decoder.json
+++ b/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_encoder_decoder.json
@ -23,13 +23,13 @@
  },
  "outputs":{
    "save_base_dir": "./models/slot_tagging_encoder_decoder/",
-    "model_name": "model_debug.nb",
-    "train_log_name": "train_debug.log",
-    "test_log_name": "test_debug.log",
-    "predict_log_name": "predict_debug.log",
+    "model_name": "model.nb",
+    "train_log_name": "train.log",
+    "test_log_name": "test.log",
+    "predict_log_name": "predict.log",
    "predict_fields": ["prediction"],
-    "predict_output_name": "predict_debug.tsv",
-    "cache_dir": ".cache.atis/"
+    "predict_output_name": "predict.tsv",
+    "cache_dir": ".cache.slot_tagging_encoder_decoder/"
  },
  "training_params": {
    "vocabulary": {
--- a/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_wcnn.json
+++ b/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_wcnn.json
@ -0,0 +1,198 @@
+{
+  "license": "Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license.",
+  "tool_version": "1.1.0",
+  "model_description": "This model is used for sequence tagging task. It achieved a f1-score of 88.51 on the dataset conll-2003",
+  "inputs": {
+    "use_cache": true,
+    "dataset_type": "sequence_tagging",
+    "tagging_scheme": "BIOES",
+    "data_paths": {
+      "train_data_path": "./dataset/slot_tagging/conll_2003/eng.train.tsv",
+      "valid_data_path": "./dataset/slot_tagging/conll_2003/eng.testa.tsv",
+      "test_data_path": "./dataset/slot_tagging/conll_2003/eng.testb.tsv",
+      "pre_trained_emb": "./dataset/GloVe/glove.6B.100d.txt"
+    },
+    "add_start_end_for_seq": false,
+    "file_header": {
+      "word": 0,
+      "tag": 1
+    },
+    "model_inputs": {
+      "words": ["word"]
+    },
+    "target": ["tag"]
+  },
+  "outputs":{
+    "save_base_dir": "./models/slot_tagging_wcnn/",
+    "model_name": "model.nb",
+    "train_log_name": "train.log",
+    "test_log_name": "test.log",
+    "predict_log_name": "predict.log",
+    "predict_fields": ["prediction"],
+    "predict_output_name": "predict.tsv",
+    "cache_dir": ".cache.slot_tagging_wcnn/"
+  },
+  "training_params": {
+    "vocabulary": {
+      "min_word_frequency": 1
+    },
+    "cpu_num_workers": 4,
+    "optimizer": {
+      "name": "SGD",
+      "params": {
+        "lr": 0.005,
+        "weight_decay": 1e-8
+      }
+    },
+    "lr_decay": 0.95,
+    "minimum_lr": 0.00001,
+    "epoch_start_lr_decay": 1,
+    "use_gpu": true,
+    "batch_size": 10,
+    "batch_num_to_show_results": 500,
+    "max_epoch": 100,
+    "valid_times_per_epoch": 1
+  },
+  "architecture":[
+    {
+        "layer": "Embedding",
+        "weight_on_gpu": true,
+        "conf": {
+          "word": {
+            "cols": ["word"],
+            "dim": 100
+          }
+        }
+    },
+    {
+      "layer_id": "emb_dropout",
+      "layer": "Dropout",
+      "conf": {
+          "dropout": 0.2
+      },
+      "inputs": ["words"]
+    },
+    {
+      "layer_id": "Linear1",
+      "layer": "Linear",
+      "conf": {
+          "hidden_dim": [200],
+          "activation": "Tanh",
+          "batch_norm": true,
+          "last_hidden_activation": true,
+          "last_hidden_softmax": false
+      },
+      "inputs": ["emb_dropout"]
+    },
+    {
+      "layer_id": "Conv1",
+      "layer": "Conv",
+      "conf": {
+          "stride": 1,
+          "window_size": 3,
+          "output_channel_num": 200,
+          "batch_norm": true,
+          "activation": "ReLU",
+          "padding_type": "SAME"
+      },
+      "inputs": ["Linear1"]
+    },
+    {
+      "layer_id": "Dropout1",
+      "layer": "Dropout",
+      "conf": {
+          "dropout": 0.2
+      },
+      "inputs": ["Conv1"]
+    },
+    {
+      "layer_id": "Conv2",
+      "layer": "Conv",
+      "conf": {
+          "stride": 1,
+          "window_size": 3,
+          "output_channel_num": 200,
+          "batch_norm": true,
+          "activation": "ReLU",
+          "padding_type": "SAME"
+      },
+      "inputs": ["Dropout1"]
+    },
+    {
+      "layer_id": "Dropout2",
+      "layer": "Dropout",
+      "conf": {
+          "dropout": 0.2
+      },
+      "inputs": ["Conv2"]
+    },
+    {
+      "layer_id": "Conv3",
+      "layer": "Conv",
+      "conf": {
+          "stride": 1,
+          "window_size": 3,
+          "output_channel_num": 200,
+          "batch_norm": true,
+          "activation": "ReLU",
+          "padding_type": "SAME"
+      },
+      "inputs": ["Dropout2"]
+    },
+    {
+      "layer_id": "Dropout3",
+      "layer": "Dropout",
+      "conf": {
+          "dropout": 0.2
+      },
+      "inputs": ["Conv3"]
+    },
+    {
+      "layer_id": "Conv4",
+      "layer": "Conv",
+      "conf": {
+          "stride": 1,
+          "window_size": 3,
+          "output_channel_num": 200,
+          "batch_norm": true,
+          "activation": "ReLU",
+          "padding_type": "SAME"
+      },
+      "inputs": ["Dropout3"]
+    },
+    {
+      "layer_id": "Dropout4",
+      "layer": "Dropout",
+      "conf": {
+          "dropout": 0.2
+      },
+      "inputs": ["Conv4"]
+    },
+    {
+        "output_layer_flag": true,
+        "layer_id": "output",
+        "layer": "Linear",
+        "conf": {
+          "hidden_dim": [-1],
+          "activation": "PReLU",
+          "batch_norm": false,
+          "last_hidden_activation": false,
+          "last_hidden_softmax": false
+        },
+        "inputs": ["Dropout4"]
+    }
+  ],
+  "loss": {
+      "losses": [
+        {
+          "type": "CrossEntropyLoss",
+          "conf": {
+              "size_average": false,
+              "ignore_index": 0
+          },
+          "inputs": ["output","tag"]
+        }
+      ]
+  },
+  "metrics": ["seq_tag_f1","seq_tag_accuracy"]
+}
--- a/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_wcnn_crf.json
+++ b/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_wcnn_crf.json
@ -0,0 +1,183 @@
+{
+  "license": "Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license.",
+  "tool_version": "1.1.0",
+  "model_description": "This model is used for sequence tagging task. It achieved a f1-score of 88.72 on the dataset conll-2003",
+  "inputs": {
+    "use_cache": true,
+    "dataset_type": "sequence_tagging",
+    "tagging_scheme": "BIOES",
+    "data_paths": {
+      "train_data_path": "./dataset/slot_tagging/conll_2003/eng.train.tsv",
+      "valid_data_path": "./dataset/slot_tagging/conll_2003/eng.testa.tsv",
+      "test_data_path": "./dataset/slot_tagging/conll_2003/eng.testb.tsv",
+      "pre_trained_emb": "./dataset/GloVe/glove.6B.100d.txt"
+    },
+    "add_start_end_for_seq": false,
+    "file_header": {
+      "word": 0,
+      "tag": 1
+    },
+    "model_inputs": {
+      "words": ["word"]
+    },
+    "target": ["tag"]
+  },
+  "outputs":{
+    "save_base_dir": "./models/slot_tagging_wcnn_crf/",
+    "model_name": "model.nb",
+    "train_log_name": "train.log",
+    "test_log_name": "test.log",
+    "predict_log_name": "predict.log",
+    "predict_fields": ["prediction"],
+    "predict_output_name": "predict.tsv",
+    "cache_dir": ".cache.slot_tagging_wcnn_crf/"
+  },
+  "training_params": {
+    "vocabulary": {
+      "min_word_frequency": 1
+    },
+    "cpu_num_workers": 4,
+    "optimizer": {
+      "name": "SGD",
+      "params": {
+        "lr": 0.005,
+        "weight_decay": 1e-8
+      }
+    },
+    "lr_decay": 0.95,
+    "minimum_lr": 0.00001,
+    "epoch_start_lr_decay": 1,
+    "use_gpu": true,
+    "batch_size": 10,
+    "batch_num_to_show_results": 500,
+    "max_epoch": 100,
+    "valid_times_per_epoch": 1
+  },
+  "architecture":[
+    {
+        "layer": "Embedding",
+        "weight_on_gpu": true,
+        "conf": {
+          "word": {
+            "cols": ["word"],
+            "dim": 100
+          }
+        }
+    },
+    {
+      "layer_id": "emb_dropout",
+      "layer": "Dropout",
+      "conf": {
+          "dropout": 0.2
+      },
+      "inputs": ["words"]
+    },
+    {
+      "layer_id": "Linear1",
+      "layer": "Linear",
+      "conf": {
+          "hidden_dim": [200],
+          "activation": "Tanh",
+          "batch_norm": false,
+          "last_hidden_activation": true,
+          "last_hidden_softmax": false
+      },
+      "inputs": ["emb_dropout"]
+    },
+    {
+      "layer_id": "Conv1",
+      "layer": "Conv",
+      "conf": {
+          "stride": 1,
+          "window_size": 3,
+          "output_channel_num": 200,
+          "batch_norm": true,
+          "activation": "ReLU",
+          "padding_type": "SAME",
+          "dropout": 0.2,
+          "remind_lengths": true
+      },
+      "inputs": ["Linear1"]
+    },
+
+    {
+      "layer_id": "Conv2",
+      "layer": "Conv",
+      "conf": {
+          "stride": 1,
+          "window_size": 3,
+          "output_channel_num": 200,
+          "batch_norm": true,
+          "activation": "ReLU",
+          "padding_type": "SAME",
+          "dropout": 0.2,
+          "remind_lengths": true
+      },
+      "inputs": ["Conv1"]
+    },
+
+    {
+      "layer_id": "Conv3",
+      "layer": "Conv",
+      "conf": {
+          "stride": 1,
+          "window_size": 3,
+          "output_channel_num": 200,
+          "batch_norm": true,
+          "activation": "ReLU",
+          "padding_type": "SAME",
+          "dropout": 0.2,
+          "remind_lengths": true
+      },
+      "inputs": ["Conv2"]
+    },
+
+    {
+      "layer_id": "Conv4",
+      "layer": "Conv",
+      "conf": {
+          "stride": 1,
+          "window_size": 3,
+          "output_channel_num": 200,
+          "batch_norm": true,
+          "activation": "ReLU",
+          "padding_type": "SAME",
+          "dropout": 0.2,
+          "remind_lengths": true
+      },
+      "inputs": ["Conv3"]
+    },
+
+    {
+        "layer_id": "rep2tag",
+        "layer": "Linear",
+        "conf": {
+          "hidden_dim": "#target#",
+          "activation": "PReLU",
+          "batch_norm": false,
+          "last_hidden_activation": false,
+          "last_hidden_softmax": false
+        },
+        "inputs": ["Conv4"]
+    },
+    {
+        "output_layer_flag": true,
+        "layer_id": "output",
+        "layer": "CRF",
+        "conf": {},
+        "inputs": ["rep2tag"]
+    }
+  ],
+  "loss": {
+      "losses": [
+        {
+          "type": "CRFLoss",
+          "conf": {
+
+          },
+          "inputs": ["output","tag"]
+        }
+      ]
+  },
+  "metrics": ["seq_tag_f1","seq_tag_accuracy"]
+}
--- a/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_wlstm_crf.json
+++ b/model_zoo/nlp_tasks/slot_tagging/conf_slot_tagging_wlstm_crf.json
@ -0,0 +1,124 @@
+{
+  "license": "Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT license.",
+  "tool_version": "1.1.0",
+  "model_description": "This model is used for sequence tagging task. It achieved a f1-score of 89.34 on the dataset conll-2003",
+  "inputs": {
+    "use_cache": true,
+    "dataset_type": "sequence_tagging",
+    "tagging_scheme": "BIOES",
+    "data_paths": {
+      "train_data_path": "./dataset/slot_tagging/conll_2003/eng.train.tsv",
+      "valid_data_path": "./dataset/slot_tagging/conll_2003/eng.testa.tsv",
+      "test_data_path": "./dataset/slot_tagging/conll_2003/eng.testb.tsv",
+      "pre_trained_emb": "./dataset/GloVe/glove.6B.100d.txt"
+    },
+    "add_start_end_for_seq": false,
+    "file_header": {
+      "word": 0,
+      "tag": 1
+    },
+    "model_inputs": {
+      "words": ["word"]
+    },
+    "target": ["tag"]
+  },
+  "outputs":{
+    "save_base_dir": "./models/slot_tagging_wlstm_crf/",
+    "model_name": "model.nb",
+    "train_log_name": "train.log",
+    "test_log_name": "test.log",
+    "predict_log_name": "predict.log",
+    "predict_fields": ["prediction"],
+    "predict_output_name": "predict.tsv",
+    "cache_dir": ".cache.slot_tagging_wlstm_crf/"
+  },
+  "training_params": {
+    "vocabulary": {
+      "min_word_frequency": 1
+    },
+    "cpu_num_workers": 4,
+    "optimizer": {
+      "name": "SGD",
+      "params": {
+        "lr": 0.015,
+        "weight_decay": 1e-8
+      }
+    },
+    "lr_decay": 0.95,
+    "minimum_lr": 0.00001,
+    "epoch_start_lr_decay": 1,
+    "use_gpu": true,
+    "batch_size": 10,
+    "batch_num_to_show_results": 500,
+    "max_epoch": 100,
+    "valid_times_per_epoch": 1
+  },
+  "architecture":[
+    {
+        "layer": "Embedding",
+        "weight_on_gpu": true,
+        "conf": {
+          "word": {
+            "cols": ["word"],
+            "dim": 100
+          }
+        }
+    },
+    {
+      "layer_id": "emb_dropout",
+      "layer": "Dropout",
+      "conf": {
+          "dropout": 0.4
+      },
+      "inputs": ["words"]
+    },
+    {
+        "layer_id": "sentence_BiLSTM",
+        "layer": "BiLSTM",
+        "conf": {
+          "hidden_dim": 100,
+          "num_layers": 1
+        },
+        "inputs": ["emb_dropout"]
+    },
+    {
+      "layer_id": "lstm_dropout",
+      "layer": "Dropout",
+      "conf": {
+          "dropout": 0.4
+      },
+      "inputs": ["sentence_BiLSTM"]
+    },
+    {
+        "layer_id": "rep2tag",
+        "layer": "Linear",
+        "conf": {
+          "hidden_dim": "#target#",
+          "activation": "PReLU",
+          "batch_norm": false,
+          "last_hidden_activation": false,
+          "last_hidden_softmax": false
+        },
+        "inputs": ["lstm_dropout"]
+    },
+    {
+        "output_layer_flag": true,
+        "layer_id": "output",
+        "layer": "CRF",
+        "conf": {},
+        "inputs": ["rep2tag"]
+    }
+  ],
+  "loss": {
+      "losses": [
+        {
+          "type": "CRFLoss",
+          "conf": {
+
+          },
+          "inputs": ["output","tag"]
+        }
+      ]
+  },
+  "metrics": ["seq_tag_f1","seq_tag_accuracy"]
+}
--- a/predict.py
+++ b/predict.py
@ -34,13 +34,46 @@ def main(params):
    lm = LearningMachine('predict', conf, problem, vocab_info=None, initialize=False, use_gpu=conf.use_gpu)
    lm.load_model(conf.previous_model_path)

-    logging.info('Predicting %s with the model saved at %s' % (conf.predict_data_path, conf.previous_model_path))
-    lm.predict(conf.predict_data_path, conf.predict_output_path, conf.predict_file_columns, conf.predict_fields)
-    logging.info("Predict done! The predict result: %s" % conf.predict_output_path)
+    if params.predict_mode == 'batch':
+        logging.info('Predicting %s with the model saved at %s' % (conf.predict_data_path, conf.previous_model_path))
+    if params.predict_mode == 'batch':
+        lm.predict(conf.predict_data_path, conf.predict_output_path, conf.predict_file_columns, conf.predict_fields)
+        logging.info("Predict done! The predict result: %s" % conf.predict_output_path)
+    elif params.predict_mode == 'interactive':
+        print('='*80)
+        task_type = str(ProblemTypes[problem.problem_type]).split('.')[1]
+        sample_format = list(conf.predict_file_columns.keys())
+        target_ = conf.conf['inputs'].get('target', None)
+        target_list = list(target_) if target_ else []
+        for single_element in sample_format[:]:
+            if single_element in target_list:
+                sample_format.remove(single_element)
+        predict_file_columns = {}
+        for index, single in enumerate(sample_format):
+            predict_file_columns[single] = index
+        print('Enabling Interactive Inference Mode for %s Task...' % (task_type.upper()))
+        print('%s Task Interactive. The sample format is <%s>' % (task_type.upper(), ', '.join(sample_format)))
+        case_cnt = 1
+        while True:
+            print('Case%d:' % case_cnt)
+            sample = []
+            for single in sample_format:
+                temp_ = input('\t%s: ' % single)
+                if temp_.lower() == 'exit':
+                    exit(0)
+                sample.append(temp_)
+            sample = '\t'.join(sample)
+            result = lm.interactive([sample], predict_file_columns, conf.predict_fields, params.predict_mode)
+            print('\tInference result: %s' % result)
+            case_cnt += 1
+    else:
+        raise Exception('Predict mode support interactive|batch, get %s' % params.predict_mode)
+

 if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Prediction')
    parser.add_argument("--conf_path", type=str, help="configuration path")
+    parser.add_argument("--predict_mode", type=str, default='batch', help='interactive|batch')
    parser.add_argument("--predict_data_path", type=str, help='specify another predict data path, instead of the one defined in configuration file')
    parser.add_argument("--previous_model_path", type=str, help='load model trained previously.')
    parser.add_argument("--predict_output_path", type=str, help='specify another prediction output path, instead of conf[outputs][save_base_dir] + conf[outputs][predict_output_name] defined in configuration file')
--- a/problem.py
+++ b/problem.py
@ -12,9 +12,9 @@ nltk.download('stopwords', quiet=True)
 from utils.BPEEncoder import BPEEncoder
 import os
 import pickle as pkl
-from utils.common_utils import load_from_pkl, dump_to_pkl
+from utils.common_utils import load_from_pkl, dump_to_pkl, load_from_json, dump_to_json, prepare_dir, md5

-from settings import ProblemTypes
+from settings import ProblemTypes, Setting as st
 import math
 from utils.ProcessorsScheduler import ProcessorsScheduler

@ -65,6 +65,11 @@ class Problem():
            target_with_start, target_with_end, target_with_unk, target_with_pad, same_length = (False, ) * 5
            with_bos_eos = False

+        if ProblemTypes[problem_type] == ProblemTypes.sequence_tagging:
+            target_with_start = False
+            target_with_end = False
+            target_with_unk = False
+
        self.lowercase = lowercase
        self.problem_type = problem_type
        self.tagging_scheme = tagging_scheme
@ -107,24 +112,21 @@ class Problem():
        else:
            return None

-    def get_data_generator_from_file(self, data_path_list, file_with_col_header, chunk_size=1000000):
-        # NOTE: file_path is a list type
-        for single_path in data_path_list:
-            data_list = list()
-            if single_path is not None:
-                with open(single_path, "r", encoding='utf-8') as f:
-                    if file_with_col_header:
-                        f.readline()
-                    for index, line in enumerate(f):
-                        line = line.rstrip()
-                        if not line:
-                            break
-                        data_list.append(line)
-                        if (index + 1) % chunk_size == 0:
-                            yield data_list
-                            data_list = list()
-                    if len(data_list) > 0:
-                        yield data_list
+    def get_data_generator_from_file(self, data_path, file_with_col_header, chunk_size=1000000):
+        data_list = list()
+        with open(data_path, "r", encoding='utf-8') as f:
+            if file_with_col_header:
+                f.readline()
+            for index, line in enumerate(f):
+                line = line.rstrip()
+                if not line:
+                    break
+                data_list.append(line)
+                if (index + 1) % chunk_size == 0:
+                    yield data_list
+                    data_list = list()
+            if len(data_list) > 0:
+                yield data_list

    def build_training_data_list(self, training_data_list, file_columns, input_types, answer_column_name, bpe_encoder=None):
        docs = dict()           # docs of each type of input
@ -162,7 +164,10 @@ class Problem():
                    line_split[i] = self.text_preprocessor.preprocess(line_split[i])

                    if col_index_types[i] == 'word':
-                        token_list = self.tokenizer.tokenize(line_split[i])
+                        if ProblemTypes[self.problem_type] == ProblemTypes.sequence_tagging:
+                            token_list = line_split[i].split(" ")
+                        else:
+                            token_list = self.tokenizer.tokenize(line_split[i])
                        docs[col_index_types[i]].append(token_list)
                        if 'char' in docs:
                            # add char
@ -218,11 +223,11 @@ class Problem():

    def build(self, data_path_list, file_columns, input_types, file_with_col_header, answer_column_name, word2vec_path=None, word_emb_dim=None,
              format=None, file_type=None, involve_all_words=None, file_format="tsv", show_progress=True,
-              cpu_num_workers=-1, max_vocabulary=800000, word_frequency=3):
+              cpu_num_workers=-1, max_vocabulary=800000, word_frequency=3, max_building_lines=1000*1000):
        """

        Args:
-            training_data_path:
+            data_path_list:
            file_columns: {
                  "word1": 0,
                  "word2": 1,
@ -260,39 +265,29 @@ class Problem():

        """
        # parameter check
-        if not word2vec_path:
-            word_emb_dim, format, file_type, involve_all_words = None, None, None, None
-
-        if 'bpe' in input_types:
-            try:
-                bpe_encoder = BPEEncoder(input_types['bpe']['bpe_path'])
-            except KeyError:
-                raise Exception('Please define a bpe path at the embedding layer.')
-        else:
-            bpe_encoder = None
-
+        bpe_encoder = self._check_bpe_encoder(input_types)  
        self.file_column_num = len(file_columns)
-        progress = self.get_data_generator_from_file(data_path_list, file_with_col_header)
-        preprocessed_data_generator = self.build_training_multi_processor(progress, cpu_num_workers, file_columns, input_types, answer_column_name, bpe_encoder=bpe_encoder)
-        
-        # update symbol universe
-        total_cnt_legal, total_cnt_illegal = 0, 0
-        for docs, target_docs, cnt_legal, cnt_illegal in tqdm(preprocessed_data_generator):
-            total_cnt_legal += cnt_legal
-            total_cnt_illegal += cnt_illegal

-            # input_type
-            for input_type in input_types:
-                self.input_dicts[input_type].update(docs[input_type])
-            
-            # problem_type
-            if ProblemTypes[self.problem_type] == ProblemTypes.classification or \
-                ProblemTypes[self.problem_type] == ProblemTypes.sequence_tagging:
-                self.output_dict.update(list(target_docs.values())[0])
-            elif ProblemTypes[self.problem_type] == ProblemTypes.regression or \
-                    ProblemTypes[self.problem_type] == ProblemTypes.mrc:
-                pass
-        logging.info("Corpus imported: %d legal lines, %d illegal lines." % (total_cnt_legal, total_cnt_illegal))
+        for data_path in data_path_list:
+            if data_path:
+                progress = self.get_data_generator_from_file(data_path, file_with_col_header, chunk_size=max_building_lines)
+                preprocessed_data_generator= self.build_training_multi_processor(progress, cpu_num_workers, file_columns, input_types, answer_column_name, bpe_encoder=bpe_encoder)
+        
+                # update symbol universe
+                docs, target_docs, cnt_legal, cnt_illegal = next(preprocessed_data_generator)
+
+                # input_type
+                for input_type in input_types:
+                    self.input_dicts[input_type].update(docs[input_type])
+        
+                # problem_type
+                if ProblemTypes[self.problem_type] == ProblemTypes.classification or \
+                    ProblemTypes[self.problem_type] == ProblemTypes.sequence_tagging:
+                    self.output_dict.update(list(target_docs.values())[0])
+                elif ProblemTypes[self.problem_type] == ProblemTypes.regression or \
+                        ProblemTypes[self.problem_type] == ProblemTypes.mrc:
+                    pass
+                logging.info("[Building Dictionary] in %s at most %d lines imported: %d legal lines, %d illegal lines." % (data_path, max_building_lines, cnt_legal, cnt_illegal))
     
        # build dictionary
        for input_type in input_types:
@ -300,6 +295,11 @@ class Problem():
            logging.info("%d types in %s column" % (self.input_dicts[input_type].cell_num(), input_type))
        if self.output_dict:
            self.output_dict.build(threshold=0)
+            if ProblemTypes[self.problem_type] == ProblemTypes.sequence_tagging:
+                self.output_dict.cell_id_map["<start>"] = len(self.output_dict.cell_id_map)
+                self.output_dict.id_cell_map[len(self.output_dict.id_cell_map)] = "<start>"
+                self.output_dict.cell_id_map["<eos>"] = len(self.output_dict.cell_id_map)
+                self.output_dict.id_cell_map[len(self.output_dict.id_cell_map)] = "<eos>"
            logging.info("%d types in target column" % (self.output_dict.cell_num()))
        logging.debug("training data dict built")

@ -313,7 +313,12 @@ class Problem():
                self.input_dicts['word'].update([list(word_emb_dict.keys())])
                self.input_dicts['word'].build(threshold=0, max_vocabulary_num=len(word_emb_dict))
            else:
-                word_emb_dict = load_embedding(word2vec_path, word_emb_dim, format, file_type, with_head=False, word_set=self.input_dicts['word'].cell_id_map.keys())
+                extend_vocabulary = set()
+                for single_word in self.input_dicts['word'].cell_id_map.keys():
+                    extend_vocabulary.add(single_word)
+                    if single_word.lower() != single_word:
+                        extend_vocabulary.add(single_word.lower())
+                word_emb_dict = load_embedding(word2vec_path, word_emb_dim, format, file_type, with_head=False, word_set=extend_vocabulary)

            for word in word_emb_dict:
                loaded_emb_dim = len(word_emb_dict[word])
@ -329,11 +334,15 @@ class Problem():

            word_emb_matrix = []
            unknown_word_count = 0
+            scale = np.sqrt(3.0 / word_emb_dim)
            for i in range(self.input_dicts['word'].cell_num()):
-                if self.input_dicts['word'].id_cell_map[i] in word_emb_dict:
-                    word_emb_matrix.append(word_emb_dict[self.input_dicts['word'].id_cell_map[i]])
+                single_word = self.input_dicts['word'].id_cell_map[i]
+                if single_word in word_emb_dict:
+                    word_emb_matrix.append(word_emb_dict[single_word])
+                elif single_word.lower() in word_emb_dict:
+                    word_emb_matrix.append(word_emb_dict[single_word.lower()])
                else:
-                    word_emb_matrix.append(word_emb_dict['<unk>'])
+                    word_emb_matrix.append(np.random.uniform(-scale, scale, word_emb_dim))
                    unknown_word_count += 1
            word_emb_matrix = np.array(word_emb_matrix)
            logging.info("word embedding matrix shape:(%d, %d); unknown word count: %d;" %
@ -382,8 +391,6 @@ class Problem():

    def encode_data_multi_processor(self, data_generator, cpu_num_workers, file_columns, input_types, object_inputs,
                answer_column_name, min_sentence_len, extra_feature, max_lengths=None, fixed_lengths=None, file_format="tsv", bpe_encoder=None):
-        
-        
        for data in data_generator:
            scheduler = ProcessorsScheduler(cpu_num_workers)
            func_args = (data, file_columns, input_types, object_inputs,
@ -403,7 +410,7 @@ class Problem():
            yield output_data, lengths, target, cnt_legal, cnt_illegal

    def encode_data_list(self, data_list, file_columns, input_types, object_inputs, answer_column_name, min_sentence_len,
-                         extra_feature, max_lengths=None, fixed_lengths=None, file_format="tsv", bpe_encoder=None):
+                         extra_feature, max_lengths=None, fixed_lengths=None, file_format="tsv", bpe_encoder=None, predict_mode='batch'):
        data = dict()
        lengths = dict()
        char_emb = True if 'char' in [single_input_type.lower() for single_input_type in input_types] else False
@ -423,6 +430,9 @@ class Problem():

        type_branches = dict()            # branch of input type, e.g. type_branches['query_index'] = 'query'

+        # for char: don't split these word
+        word_no_split = ['<start>', '<pad>', '<eos>', '<unk>']
+        
        for branch in object_inputs:
            data[branch] = dict()
            lengths[branch] = dict()
@ -461,11 +471,14 @@ class Problem():
            line_split = line.rstrip().split('\t')
            cnt_all += 1
            if len(line_split) != len(file_columns):
-                # logging.warning("Current line is inconsistent with configuration/inputs/file_header. Ingore now. %s" % line)
-                cnt_illegal += 1
-                if cnt_illegal / cnt_all > 0.33:
-                    raise PreprocessError('The illegal data is too much. Please check the number of data columns or text token version.')
-                continue
+                if predict_mode == 'batch':
+                    cnt_illegal += 1
+                    if cnt_illegal / cnt_all > 0.33:
+                        raise PreprocessError('The illegal data is too much. Please check the number of data columns or text token version.')
+                    continue
+                else:
+                    print('\tThe case is illegal! Please check your case and input again!')
+                    return [None]*5
            # cnt_legal += 1
            length_appended_set = set()  # to store branches whose length have been appended to lengths[branch]

@ -496,7 +509,7 @@ class Problem():
                                data[extra_info_type]['extra_passage_text'].append(line_split[i])
                                data[extra_info_type]['extra_passage_token_offsets'].append(passage_token_offsets)
                        else:
-                            if extra_feature == False:
+                            if extra_feature == False and ProblemTypes[self.problem_type] != ProblemTypes.sequence_tagging:
                                tokens = self.tokenizer.tokenize(line_split[i])
                            else:
                                tokens = line_split[i].split(' ')
@ -505,6 +518,28 @@ class Problem():
                    else:
                        tokens = line_split[i].split(' ')

+                    # for sequence labeling task, the length must be record the corpus truth length
+                    if ProblemTypes[self.problem_type] == ProblemTypes.sequence_tagging:
+                        if not branch in length_appended_set:
+                            lengths[branch]['sentence_length'].append(len(tokens))
+                            length_appended_set.add(branch)
+                        else:
+                            if len(tokens) != lengths[branch]['sentence_length'][-1]:
+                                # logging.warning(
+                                #     "The length of inputs are not consistent. Ingore now. %s" % line)
+                                cnt_illegal += 1
+                                if cnt_illegal / cnt_all > 0.33:
+                                    raise PreprocessError(
+                                        "The illegal data is too much. Please check the number of data columns or text token version.")
+                                lengths[branch]['sentence_length'].pop()
+                                true_len = len(lengths[branch]['sentence_length'])
+                                # need delete the last example
+                                check_list = ['data', 'lengths', 'target']
+                                for single_check in check_list:
+                                    single_check = eval(single_check)
+                                    self.delete_example(single_check, true_len)
+                                break
+
                    if fixed_lengths and type_branches[input_type[0]] in fixed_lengths:
                        if len(tokens) >= fixed_lengths[type_branches[input_type[0]]]:
                            tokens = tokens[:fixed_lengths[type_branches[input_type[0]]]]
@ -520,32 +555,45 @@ class Problem():
                    if self.with_bos_eos is True:
                        tokens = ['<start>'] + tokens + ['<eos>']  # so that source_with_start && source_with_end should be True

-                    if not branch in length_appended_set:
-                        lengths[branch]['sentence_length'].append(len(tokens))
-                        length_appended_set.add(branch)
-                    else:
-                        if len(tokens) != lengths[branch]['sentence_length'][-1]:
-                            # logging.warning(
-                            #     "The length of inputs are not consistent. Ingore now. %s" % line)
-                            cnt_illegal += 1
-                            if cnt_illegal / cnt_all > 0.33:
-                                raise PreprocessError("The illegal data is too much. Please check the number of data columns or text token version.")
-                            lengths[branch]['sentence_length'].pop()
-                            true_len = len(lengths[branch]['sentence_length'])
-                            # need delete the last example
-                            check_list = ['data', 'lengths', 'target']
-                            for single_check in check_list:
-                                single_check = eval(single_check)
-                                self.delete_example(single_check, true_len)
-                            break
+                    # for other tasks, length must be same as data length because fix/max_length operation
+                    if not ProblemTypes[self.problem_type] == ProblemTypes.sequence_tagging:
+                        if not branch in length_appended_set:
+                            lengths[branch]['sentence_length'].append(len(tokens))
+                            length_appended_set.add(branch)
+                        else:
+                            if len(tokens) != lengths[branch]['sentence_length'][-1]:
+                                # logging.warning(
+                                #     "The length of inputs are not consistent. Ingore now. %s" % line)
+                                cnt_illegal += 1
+                                if cnt_illegal / cnt_all > 0.33:
+                                    raise PreprocessError(
+                                        "The illegal data is too much. Please check the number of data columns or text token version.")
+                                lengths[branch]['sentence_length'].pop()
+                                true_len = len(lengths[branch]['sentence_length'])
+                                # need delete the last example
+                                check_list = ['data', 'lengths', 'target']
+                                for single_check in check_list:
+                                    single_check = eval(single_check)
+                                    self.delete_example(single_check, true_len)
+                                break

                    for single_input_type in input_type:
                        if 'char' in single_input_type:
                            temp_word_char = []
                            temp_word_length = []
                            for single_token in tokens:
-                                temp_word_char.append(self.input_dicts[type2cluster[single_input_type]].lookup(single_token))
-                                temp_word_length.append(len(single_token))
+                                if single_token in word_no_split:
+                                    # temp_word_length.append(1)
+                                    temp_id = [self.input_dicts[type2cluster[single_input_type]].id(single_token)]
+                                else:
+                                    temp_id = self.input_dicts[type2cluster[single_input_type]].lookup(single_token)
+                                if fixed_lengths and 'word' in fixed_lengths:
+                                    if len(temp_id) >= fixed_lengths['word']:
+                                        temp_id = temp_id[:fixed_lengths['word']]
+                                    else:
+                                        temp_id = temp_id + [self.input_dicts[type2cluster[single_input_type]].id('<pad>')] * (fixed_lengths['word'] - len(temp_id))
+                                temp_word_char.append(temp_id)
+                                temp_word_length.append(len(temp_id))
                            data[branch][single_input_type].append(temp_word_char)
                            lengths[branch]['word_length'].append(temp_word_length)
                        else:
@ -625,7 +673,7 @@ class Problem():

    def encode(self, data_path, file_columns, input_types, file_with_col_header, object_inputs, answer_column_name,
               min_sentence_len, extra_feature, max_lengths=None, fixed_lengths=None, file_format="tsv", show_progress=True,
-               cpu_num_workers = -1):
+               cpu_num_workers=-1, chunk_size=1000*1000):
        """

        Args:
@ -701,22 +749,16 @@ class Problem():
            target: [...]

        """
-        if 'bpe' in input_types:
-            try:
-                bpe_encoder = BPEEncoder(input_types['bpe']['bpe_path'])
-            except KeyError:
-                raise Exception('Please define a bpe path at the embedding layer.')
-        else:
-            bpe_encoder = None
+        bpe_encoder = self._check_bpe_encoder(input_types)  

-        progress = self.get_data_generator_from_file([data_path], file_with_col_header)
-        encoder_generator = self.encode_data_multi_processor(progress, cpu_num_workers,
+        progress = self.get_data_generator_from_file(data_path, file_with_col_header, chunk_size=chunk_size)
+        encode_generator = self.encode_data_multi_processor(progress, cpu_num_workers,
                    file_columns, input_types, object_inputs, answer_column_name, min_sentence_len, extra_feature, max_lengths,
                    fixed_lengths, file_format, bpe_encoder=bpe_encoder)
        
        data, lengths, target = dict(), dict(), dict()
        cnt_legal, cnt_illegal = 0, 0
-        for temp_data, temp_lengths, temp_target, temp_cnt_legal, temp_cnt_illegal in tqdm(encoder_generator):
+        for temp_data, temp_lengths, temp_target, temp_cnt_legal, temp_cnt_illegal in tqdm(encode_generator):
            data = self._merge_encode_data(data, temp_data)
            lengths = self._merge_encode_lengths(lengths, temp_lengths)
            target = self._merge_target(target, temp_target)   
@ -726,6 +768,59 @@ class Problem():
        logging.info("%s: %d legal samples, %d illegal samples" % (data_path, cnt_legal, cnt_illegal))
        return data, lengths, target

+    def build_encode_cache(self, conf, file_format="tsv"):
+        logging.info("[Cache] building encoding cache") 
+        build_encode_cache_generator = self.get_encode_generator(conf, build_cache=True, file_format=file_format)
+        for _ in build_encode_cache_generator:
+            continue
+        logging.info("[Cache] encoding is saved to %s" % conf.encoding_cache_dir)    
+        
+    def get_encode_generator(self, conf, build_cache=True, file_format="tsv"):
+        # parameter check
+        if build_cache:
+            assert conf.encoding_cache_dir, 'There is no property encoding_cache_dir in object conf'
+            assert conf.encoding_cache_index_file_path, 'There is no property encoding_cache_index_file_path in object conf'
+            assert conf.encoding_cache_index_file_md5_path, 'There is no property encoding_cache_index_file_md5_path in object conf'
+
+        bpe_encoder = self._check_bpe_encoder(conf.input_types)   
+        data_generator = self.get_data_generator_from_file(conf.train_data_path, conf.file_with_col_header, chunk_size=conf.chunk_size)
+        encode_generator = self.encode_data_multi_processor(data_generator, conf.cpu_num_workers,
+                    conf.file_columns, conf.input_types, conf.object_inputs, conf.answer_column_name, 
+                    conf.min_sentence_len, conf.extra_feature, conf.max_lengths,
+                    conf.fixed_lengths, file_format, bpe_encoder=bpe_encoder)
+      
+        file_index = []
+        total_cnt_legal, total_cnt_illegal = 0, 0
+        for part_number, encode_data in enumerate(encode_generator):
+            data, lengths, target, cnt_legal, cnt_illegal = encode_data
+            if build_cache:
+                total_cnt_legal = total_cnt_legal + cnt_legal
+                total_cnt_illegal = total_cnt_illegal + cnt_illegal
+                file_name = st.cencoding_file_name_pattern % (part_number)
+                file_path = os.path.join(conf.encoding_cache_dir, file_name)
+                dump_to_pkl((data, lengths, target), file_path)
+                file_index.append([file_name, md5([file_path])])
+                logging.info("Up to now, in %s: %d legal samples, %d illegal samples" % (conf.train_data_path, total_cnt_legal, total_cnt_illegal))
+            yield data, lengths, target
+        
+        if build_cache:
+            cache_index = dict()
+            cache_index[st.cencoding_key_index] = file_index
+            cache_index[st.cencoding_key_legal_cnt] = total_cnt_legal
+            cache_index[st.cencoding_key_illegal_cnt] = total_cnt_illegal
+            dump_to_json(cache_index, conf.encoding_cache_index_file_path)
+            dump_to_json(md5([conf.encoding_cache_index_file_path]), conf.encoding_cache_index_file_md5_path)            
+            
+    @staticmethod
+    def _check_bpe_encoder(input_types):
+        bpe_encoder = None
+        if 'bpe' in input_types:
+            try:
+                bpe_encoder = BPEEncoder(input_types['bpe']['bpe_path'])
+            except KeyError:
+                raise Exception('Please define a bpe path at the embedding layer.')
+        return bpe_encoder
+
    def decode(self, model_output, lengths=None, batch_data=None):
        """ decode the model output, either a batch of output or a single output

--- a/register_block.py
+++ b/register_block.py
@ -0,0 +1,63 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT license.
+
+import os
+import argparse
+
+
+def get_block_path(block_name, path='./block_zoo'):
+    ''' find the block_name.py file in block_zoo
+    Args:
+         block_name: the name need to be registered. eg. BiLSTM/ CRF
+    '''
+    get_dir = os.listdir(path)
+    for single in get_dir:
+        sub_dir = os.path.join(path, single)
+        if os.path.isdir(sub_dir):
+            result = get_block_path(block_name, path=sub_dir)
+            if result:
+                return result
+        else:
+            if block_name + '.py' == single:
+                return sub_dir
+    return None
+
+
+def write_file(new_block_path, file_path):
+    init_path = os.path.join(file_path, '__init__.py')
+    diff = new_block_path[len(file_path):].split('/')
+    if diff[0] == '':
+        diff.pop(0)
+    # delete '.py' in the last str
+    diff[-1] = diff[-1][:-3]
+    line = 'from .' + diff[0] + ' import ' + diff[-1] + ', ' + diff[-1] + 'Conf'
+    with open(init_path, 'a', encoding='utf-8') as fin:
+        fin.write('\n' + line + '\n')
+
+
+def register(block_name, new_block_path):
+    ''' Add import code in the corresponding file. eg. block_zoo/__init__.py or block_zoo/subdir/__init__.py
+
+    '''
+    # check if block exist or not
+    if new_block_path:
+        block_path_split = new_block_path.split('/')
+        for i in range(len(block_path_split)-1, 1, -1):
+            # need_add_file.append(os.path.join('/'.join(block_path_split[:i])))
+            write_file(new_block_path, os.path.join('/'.join(block_path_split[:i])))
+        print('The block %s is registered successfully.' % block_name)
+    else:
+        raise Exception('The %s.py file does not exist! Please check your program or file name.' % block_name)
+
+
+def main(params):
+    new_block_path = get_block_path(params.block_name)
+    register(params.block_name, new_block_path)
+
+
+if __name__ == '__main__':
+    parse = argparse.ArgumentParser(description='Register Block')
+    parse.add_argument("--block_name", type=str, help="block name want to be registered")
+    params, _ = parse.parse_known_args()
+    assert params.block_name, 'Please specify a block_name via --block_name'
+    main(params)
--- a/settings.py
+++ b/settings.py
@ -18,11 +18,11 @@ LanguageTypes = Enum('LanguageTypes', ('english', 'chinese'))
 ProblemTypes = Enum('ProblemTypes', ('sequence_tagging', 'classification', 'regression', 'mrc'))

 # Supported sequence tagging scheme
-TaggingSchemes = Enum('TaggingSchemes', ('BIO'))
+TaggingSchemes = Enum('TaggingSchemes', ('BIO', 'BIOES'))

 # supported metrics
 SupportedMetrics = {
-    ProblemTypes.sequence_tagging: set(['seq_tag_f1', 'accuracy']),
+    ProblemTypes.sequence_tagging: set(['seq_tag_f1', 'seq_tag_accuracy']),
    ProblemTypes.classification: set(['auc', 'accuracy', 'f1', 'macro_f1', 'macro_precision', 'macro_recall', 'micro_f1', 'micro_precision', 'micro_recall', 'weighted_f1', 'weighted_precision', 'weighted_recall']),
    # In addition, for auc in multi-type classification,
    # if there is a type named 1, auc@1 means use 1 as the positive label
@ -53,3 +53,27 @@ DefaultPredictionFields = {
 # nltk's models
 nltk.data.path.append(os.path.join(os.getcwd(), 'dataset', 'nltk_data'))

+
+class Constant(type):
+    def __setattr__(self, name, value):
+        raise AttributeError("Class %s can not be modified"%(self.__name__))
+
+class ConstantStatic(metaclass=Constant):
+    def __init__(self, *args,**kwargs):
+        raise Exception("Class %s can not be instantiated"%(self.__class__.__name__))
+
+
+class Setting(ConstantStatic):
+    # cache
+
+    ## cencoding (cache_encoding)
+    cencodig_index_file_name = 'index.json'
+    cencoding_index_md5_file_name = 'index_md5.json'
+    cencoding_file_name_pattern = 'encoding_cache_%s.pkl'
+    cencoding_key_finish = 'finish'
+    cencoding_key_index = 'index'
+    cencoding_key_legal_cnt = 'legal_line_cnt'
+    cencoding_key_illegal_cnt = 'illegal_line_cnt'
+
+
+
--- a/test.py
+++ b/test.py
@ -19,7 +19,7 @@ def main(params):
    problem = Problem("test", conf.problem_type, conf.input_types, conf.answer_column_name,
        with_bos_eos=conf.add_start_end_for_seq, tagging_scheme=conf.tagging_scheme, tokenizer=conf.tokenizer,
        remove_stopwords=conf.remove_stopwords, DBC2SBC=conf.DBC2SBC, unicode_fix=conf.unicode_fix)
-        
+
    if os.path.isfile(conf.saved_problem_path):
        problem.load_problem(conf.saved_problem_path)
        logging.info("Problem loaded!")
--- a/tools/calculate_auc.py
+++ b/tools/calculate_auc.py
--- a/tools/tagging_schemes_converter.py
+++ b/tools/tagging_schemes_converter.py
@ -0,0 +1,112 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT license.
+
+import sys
+
+
+def BIO2BIOES(input_labels_list):
+    output_labels_list = []
+    for labels in input_labels_list:
+        new_labels = []
+        sent_len = len(labels)
+        for idx in range(sent_len):
+            if "-" not in labels[idx]:
+                new_labels.append(labels[idx])
+            else:
+                label_type = labels[idx].split('-')[-1]
+                if "B-" in labels[idx]:
+                    if (idx == sent_len - 1) or ("I-" not in labels[idx + 1]):
+                        new_labels.append("S-"+label_type)
+                    else:
+                        new_labels.append("B-"+label_type)
+                elif "I-" in labels[idx]:
+                    if (idx == sent_len - 1) or ("I-" not in labels[idx + 1]):
+                        new_labels.append("E-"+label_type)
+                    else:
+                        new_labels.append("I-"+label_type)
+        assert len(labels) == len(new_labels)
+        output_labels_list.append(new_labels)
+    return output_labels_list
+
+
+def BIOES2BIO(input_labels_list):
+    output_labels_list = []
+    for labels in input_labels_list:
+        new_labels = []
+        sent_len = len(labels)
+        for idx in range(sent_len):
+            if "-" not in labels[idx]:
+                new_labels.append(labels[idx])
+            else:
+                label_type = labels[idx].split('-')[-1]
+                if "E-" in labels[idx]:
+                    new_labels.append("I-" + label_type)
+                elif "S-" in labels[idx]:
+                    new_labels.append("B-" + label_type)
+                else:
+                    new_labels.append(labels[idx])
+        assert len(labels) == len(new_labels)
+        output_labels_list.append(new_labels)
+    return output_labels_list
+
+
+def IOB2BIO(input_labels_list):
+    output_labels_list = []
+    for labels in input_labels_list:
+        new_labels = []
+        sent_len = len(labels)
+        for idx in range(sent_len):
+            if "I-" in labels[idx]:
+                label_type = labels[idx].split('-')[-1]
+                if (idx == 0) or (labels[idx - 1] == "O") or (label_type != labels[idx - 1].split('-')[-1]):
+                    new_labels.append("B-" + label_type)
+                else:
+                    new_labels.append(labels[idx])
+            else:
+                new_labels.append(labels[idx])
+        assert len(labels) == len(new_labels)
+        output_labels_list.append(new_labels)
+    return output_labels_list
+
+
+if __name__ == '__main__':
+    '''Convert NER tagging schemes among IOB/BIO/BIOES.
+        For example: if you want to convert the IOB tagging scheme to BIO, then you run as following:
+            python taggingSchemes_Converter.py IOB2BIO input_iob_file output_bio_file
+        Input data format is tsv format.
+    '''
+    input_file_name, output_file_name = sys.argv[2], sys.argv[3]
+    words_list, labels_list, new_labels_list = [], [], []
+    with open(input_file_name, 'r') as input_file:
+        for line in input_file:
+            item = line.rstrip().split('\t')
+            assert len(item) == 2
+            words, labels = item[0].split(' '), item[1].split(' ')
+            if len(words) != len(labels):
+                print("Error line: " + line.rstrip())
+                continue
+            words_list.append(words)
+            labels_list.append(labels)
+
+    if sys.argv[1].upper() == "IOB2BIO":
+        print("Convert IOB -> BIO...")
+        new_labels_list = IOB2BIO(labels_list)
+    elif sys.argv[1].upper() == "BIO2BIOES":
+        print("Convert BIO -> BIOES...")
+        new_labels_list = BIO2BIOES(labels_list)
+    elif sys.argv[1].upper() == "BIOES2BIO":
+        print("Convert BIOES -> BIO...")
+        new_labels_list = BIOES2BIO(labels_list)
+    elif sys.argv[1].upper() == "IOB2BIOES":
+        print("Convert IOB -> BIOES...")
+        tmp_labels_list = IOB2BIO(labels_list)
+        new_labels_list = BIO2BIOES(tmp_labels_list)
+    else:
+        print("Argument error: sys.argv[1] should belongs to \"IOB2BIO/BIO2BIOES/BIOES2BIO/IOB2BIOES\"")
+
+    with open(output_file_name, 'w') as output_file:
+        for index in range(len(words_list)):
+            words, labels = words_list[index], new_labels_list[index]
+            line = " ".join(words) + '\t' + " ".join(labels) + '\n'
+            output_file.write(line)
+
--- a/train.py
+++ b/train.py
@ -1,7 +1,7 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT license.

-from settings import ProblemTypes, version
+from settings import ProblemTypes, version, Setting as st

 import os
 import argparse
@ -15,7 +15,7 @@ import torch
 import torch.nn as nn
 from ModelConf import ModelConf
 from problem import Problem
-from utils.common_utils import dump_to_pkl, load_from_pkl, prepare_dir
+from utils.common_utils import dump_to_pkl, load_from_pkl, load_from_json, dump_to_json, prepare_dir, md5
 from utils.philly_utils import HDFSDirectTransferer
 from losses import *
 from optimizers import *
@ -34,33 +34,76 @@ class Cache:
        self.dictionary_invalid = True
        self.embedding_invalid = True

-        # cache_conf
-        cache_conf = None
-        cache_conf_path = os.path.join(conf.cache_dir, 'conf_cache.json')
-        if os.path.isfile(cache_conf_path):
-            params_cache = copy.deepcopy(params)
-            try:
-                cache_conf = ModelConf('cache', cache_conf_path, version, params_cache)
-            except Exception as e:
-                cache_conf = None
-        if cache_conf is None or not self._verify_conf(cache_conf, conf):
-            return False
-        
-        # problem
-        if not os.path.isfile(conf.problem_path):
-            return False
-
-        # embedding
-        if conf.emb_pkl_path:
-            if not os.path.isfile(conf.emb_pkl_path):
+        if not conf.pretrained_model_path:
+            # cache_conf
+            cache_conf = None
+            cache_conf_path = os.path.join(conf.cache_dir, 'conf_cache.json')
+            if os.path.isfile(cache_conf_path):
+                params_cache = copy.deepcopy(params)
+                try:
+                    cache_conf = ModelConf('cache', cache_conf_path, version, params_cache)
+                except Exception as e:
+                    cache_conf = None
+            if cache_conf is None or not self._verify_conf(cache_conf, conf):
                return False
-            self.embedding_invalid = False
+            
+            # problem
+            if not os.path.isfile(conf.problem_path):
+                return False
+
+            # embedding
+            if conf.emb_pkl_path:
+                if not os.path.isfile(conf.emb_pkl_path):
+                    return False
+                self.embedding_invalid = False
        
-        self.dictionary_invalid = False
+            self.dictionary_invalid = False
+            logging.info('[Cache] dictionary found')
        return True
        
    def _check_encoding(self, conf):
+        self.encoding_invalid = True
+        if not conf.pretrained_model_path and self.dictionary_invalid:
+            return False
+        
+        # Calculate the MD5 of problem
+        problem_path = conf.problem_path if not conf.pretrained_model_path else conf.saved_problem_path
+        try:
+            conf.problem_md5 = md5([problem_path])
+        except Exception as e:
+            conf.problem_md5 = None
+            logging.info('Can not calculate md5 of problem.pkl from %s'%(problem_path))
+            return False
+        
+        # check the valid of encoding cache
+        ## encoding cache dir
+        conf.encoding_cache_dir = os.path.join(conf.cache_dir, conf.train_data_md5 + conf.problem_md5)
+        logging.debug('[Cache] conf.encoding_cache_dir %s' % (conf.encoding_cache_dir))
+        if not os.path.exists(conf.encoding_cache_dir):
+            return False
+        
+        ## encoding cache index 
+        conf.encoding_cache_index_file_path = os.path.join(conf.encoding_cache_dir, st.cencodig_index_file_name)
+        conf.encoding_cache_index_file_md5_path = os.path.join(conf.encoding_cache_dir, st.cencoding_index_md5_file_name)
+        if not os.path.exists(conf.encoding_cache_index_file_path) or not os.path.exists(conf.encoding_cache_index_file_md5_path):
+            return False
+        if md5([conf.encoding_cache_index_file_path]) != load_from_json(conf.encoding_cache_index_file_md5_path):
+            return False
+        cache_index = load_from_json(conf.encoding_cache_index_file_path)
+
+        ## encoding cache content
+        for index in cache_index[st.cencoding_key_index]:
+            file_name, file_md5 = index[0], index[1]
+            if file_md5 != md5([os.path.join(conf.encoding_cache_dir, file_name)]):
+                return False
+        
+        if (st.cencoding_key_legal_cnt in cache_index) and (st.cencoding_key_illegal_cnt in cache_index):
+            conf.encoding_cache_legal_line_cnt = cache_index[st.cencoding_key_legal_cnt]
+            conf.encoding_cache_illegal_line_cnt = cache_index[st.cencoding_key_illegal_cnt]
+        
        self.encoding_invalid = False
+        logging.info('[Cache] encoding found')
+        logging.info('%s: %d legal samples, %d illegal samples' % (conf.train_data_path, conf.encoding_cache_legal_line_cnt, conf.encoding_cache_illegal_line_cnt))
        return True

    def check(self, conf, params):
@ -70,7 +113,7 @@ class Cache:
            return
        # encoding
        if not self._check_encoding(conf):
-            self._renew_cache(params, conf.cache_dir)
+            self._renew_cache(params, conf.encoding_cache_dir)

    def load(self, conf, problem, emb_matrix):
        # load dictionary when (not finetune) and (cache valid)
@ -81,13 +124,17 @@ class Cache:
            logging.info('[Cache] loading dictionary successfully')
        
        if not self.encoding_invalid:
-            pass  
+            self._prepare_encoding_cache(conf, problem, build=False)
+            logging.info('[Cache] preparing encoding successfully')
        return problem, emb_matrix

    def save(self, conf, params, problem, emb_matrix):
+        # make cache dir
        if not os.path.exists(conf.cache_dir):
            os.makedirs(conf.cache_dir)
        shutil.copy(params.conf_path, os.path.join(conf.cache_dir, 'conf_cache.json'))
+
+        # dictionary
        if self.dictionary_invalid:
            if conf.mode == 'philly' and conf.emb_pkl_path.startswith('/hdfs/'):
                with HDFSDirectTransferer(conf.problem_path, with_hdfs_command=True) as transferer:
@ -101,10 +148,11 @@ class Cache:
                        transferer.pkl_dump(emb_matrix)
                else:
                    dump_to_pkl(emb_matrix, conf.emb_pkl_path)
-            logging.info("Embedding matrix saved to %s" % conf.emb_pkl_path)
+            logging.info("[Cache] Embedding matrix saved to %s" % conf.emb_pkl_path)
        
+        # encoding
        if self.encoding_invalid:
-            pass
+            self._prepare_encoding_cache(conf, problem, build=params.make_cache_only) 

    def back_up(self, conf, problem):
        cache_bakup_path = os.path.join(conf.save_base_dir, 'necessary_cache/')
@ -150,6 +198,34 @@ class Cache:
                flag = False
        return flag

+    def _prepare_encoding_cache(self, conf, problem, build=False):
+        # encoding cache dir
+        problem_path = conf.problem_path if not conf.pretrained_model_path else conf.saved_problem_path
+        conf.problem_md5 = md5([problem_path])
+        conf.encoding_cache_dir = os.path.join(conf.cache_dir, conf.train_data_md5 + conf.problem_md5)
+        if not os.path.exists(conf.encoding_cache_dir):
+            os.makedirs(conf.encoding_cache_dir)
+        
+        # encoding cache files
+        conf.encoding_cache_index_file_path = os.path.join(conf.encoding_cache_dir, st.cencodig_index_file_name)
+        conf.encoding_cache_index_file_md5_path = os.path.join(conf.encoding_cache_dir, st.cencoding_index_md5_file_name) 
+        conf.load_encoding_cache_generator = self._load_encoding_cache_generator
+        
+        if build:
+            prepare_dir(conf.encoding_cache_dir, True, allow_overwrite=True, clear_dir_if_exist=True)
+            problem.build_encode_cache(conf)
+            self.encoding_invalid = False
+
+        if not self.encoding_invalid:
+            cache_index = load_from_json(conf.encoding_cache_index_file_path)
+            conf.encoding_file_index = cache_index[st.cencoding_key_index]
+
+    @staticmethod
+    def _load_encoding_cache_generator(cache_dir, file_index):
+        for index in file_index:
+            file_path = os.path.join(cache_dir, index[0])
+            yield load_from_pkl(file_path)
+    
 def main(params):
    # init
    conf = ModelConf("train", params.conf_path, version, params, mode=params.mode)
@ -172,6 +248,7 @@ def main(params):
    # data preprocessing
    ## build dictionary when (not in finetune model) and (not use cache or cache invalid)
    if (not conf.pretrained_model_path) and ((conf.use_cache == False) or cache.dictionary_invalid):
+        logging.info("="*100)
        logging.info("Preprocessing... Depending on your corpus size, this step may take a while.")
        # modify train_data_path to [train_data_path, valid_data_path, test_data_path]
        # remember the test_data may be None
@ -181,11 +258,7 @@ def main(params):
                                    word_emb_dim=conf.pretrained_emb_dim, format=conf.pretrained_emb_type,
                                    file_type=conf.pretrained_emb_binary_or_text, involve_all_words=conf.involve_all_words_in_pretrained_emb,
                                    show_progress=True if params.mode == 'normal' else False, cpu_num_workers = conf.cpu_num_workers,
-                                    max_vocabulary=conf.max_vocabulary, word_frequency=conf.min_word_frequency)
-
-    ## encode rawdata when do not use cache
-    if conf.use_cache == False:
-        pass
+                                    max_vocabulary=conf.max_vocabulary, word_frequency=conf.min_word_frequency, max_building_lines=conf.max_building_lines)

    # environment preparing
    ## cache save
@ -234,9 +307,20 @@ def main(params):

    ### optimizer
    if isinstance(lm.model, nn.DataParallel):
-        optimizer = eval(conf.optimizer_name)(list(lm.model.parameters()) + list(lm.model.module.layers['embedding'].get_parameters()), **conf.optimizer_params)
+        if isinstance(lm.model.module.layers['embedding'].embeddings, nn.ModuleDict):
+            optimizer = eval(conf.optimizer_name)(list(lm.model.parameters()), **conf.optimizer_params)
+        else:
+            optimizer = eval(conf.optimizer_name)(
+                list(lm.model.parameters()) + list(lm.model.module.layers['embedding'].get_parameters()),
+                **conf.optimizer_params)
    else:
-        optimizer = eval(conf.optimizer_name)(list(lm.model.parameters()) + list(lm.model.layers['embedding'].get_parameters()), **conf.optimizer_params)
+        if isinstance(lm.model.layers['embedding'].embeddings, nn.ModuleDict):
+            optimizer = eval(conf.optimizer_name)(
+                list(lm.model.parameters()), **conf.optimizer_params)
+        else:
+            optimizer = eval(conf.optimizer_name)(
+                list(lm.model.parameters()) + list(lm.model.layers['embedding'].get_parameters()),
+                **conf.optimizer_params)

    ## train
    lm.train(optimizer, loss_fn)
--- a/utils/common_utils.py
+++ b/utils/common_utils.py
@ -3,6 +3,7 @@

 import logging
 import pickle as pkl
+import json
 import torch
 import torch.nn as nn
 import os
@ -49,6 +50,17 @@ def dump_to_pkl(obj, pkl_path):
        pkl.dump(obj, fout, protocol=pkl.HIGHEST_PROTOCOL)
    logging.debug("Obj dumped to %s!" % pkl_path)

+def load_from_json(json_path):
+    data = None
+    with open(json_path, 'r', encoding='utf-8') as f:
+        data = json.loads(f.read())
+    logging.debug("%s loaded!" % json_path)
+    return data
+
+def dump_to_json(obj, json_path):
+    with open(json_path, 'w', encoding='utf-8') as f:
+        f.write(json.dumps(obj))
+    logging.debug("Obj dumped to %s!" % json_path)

 def get_trainable_param_num(model):
    """ get the number of trainable parameters
@ -60,9 +72,15 @@ def get_trainable_param_num(model):

    """
    if isinstance(model, nn.DataParallel):
-        model_param = list(model.parameters()) + list(model.module.layers['embedding'].get_parameters())
+        if isinstance(model.module.layers['embedding'].embeddings, dict):
+            model_param = list(model.parameters()) + list(model.module.layers['embedding'].get_parameters())
+        else:
+            model_param = list(model.parameters())
    else:
-        model_param = list(model.parameters()) + list(model.layers['embedding'].get_parameters())
+        if isinstance(model.layers['embedding'].embeddings, dict):
+            model_param = list(model.parameters()) + list(model.layers['embedding'].get_parameters())
+        else:
+            model_param = list(model.parameters())

    return sum(p.numel() for p in model_param if p.requires_grad)

@ -228,7 +246,7 @@ def md5(file_paths, chunk_size=1024*1024*1024):
    """ Calculate a md5 of lists of files. 

    Args:
-        file_paths:  an iterable object contains files. Files will be concatenated orderly if there are more than one file
+        file_paths:  an iterable object contains file paths. Files will be concatenated orderly if there are more than one file
        chunk_size:  unit is byte, default value is 1GB
    Returns:
        md5
@ -242,4 +260,17 @@ def md5(file_paths, chunk_size=1024*1024*1024):
                if not data:
                    break
                md5.update(data)
-    return md5.hexdigest()
+    return md5.hexdigest()
+
+
+def get_layer_class(model, layer_id):
+    """get the layer class use layer_id
+
+    Args:
+        model: the model architecture, maybe nn.DataParallel type or model
+        layer_id: layer id from configuration
+    """
+    if isinstance(model, nn.DataParallel):
+        return model.module.layers[layer_id]
+    else:
+        return model.layers[layer_id]
--- a/utils/corpus_utils.py
+++ b/utils/corpus_utils.py
@ -16,6 +16,7 @@ import codecs
 import copy
 from settings import ProblemTypes
 import torch
+import time


 if sys.version_info < (3,):
@ -173,7 +174,7 @@ def corpus_permutation(*corpora):
    return corpora_perm


-def get_batches(problem, data, length, target, batch_size, input_types, pad_ids=None, permutate=False, transform_tensor=True):
+def get_batches(problem, data, length, target, batch_size, input_types, pad_ids=None, permutate=False, transform_tensor=True, predict_mode='batch'):
    """

    Args:
@ -232,13 +233,14 @@ def get_batches(problem, data, length, target, batch_size, input_types, pad_ids=
        target_batches: ndarray/Variable shape: [number of batches, batch_size, targets]

    """
-    logging.info("Start making batches")
+    if predict_mode == 'batch':
+        logging.info("Start making batches")
    if permutate is True:
        #CAUTION! data and length would be revised
-        data = copy.deepcopy(data)
-        length = copy.deepcopy(length)
-        if target is not None:
-            target = copy.deepcopy(target)
+        # data = copy.deepcopy(data)
+        # length = copy.deepcopy(length)
+        # if target is not None:
+        #     target = copy.deepcopy(target)

        # shuffle the data
        permutation = np.random.permutation(len(list(target.values())[0]))
@ -392,7 +394,8 @@ def get_batches(problem, data, length, target, batch_size, input_types, pad_ids=

            target_batches.append(target_batch)

-    logging.info("Batches got!")
+    if predict_mode == 'batch':
+        logging.info("Batches got!")
    return data_batches, length_batches, target_batches