fix the evaluation script

2022-05-27 22:37:54 -04:00 · 2022-05-27 22:37:54 -04:00 · 9f3429a93e
--- a/data2text/experiment/evaluation/eval.py
+++ b/data2text/experiment/evaluation/eval.py
@ -1,10 +1,6 @@
 """Evaluation. """ 

-from .utils import (
-    beam_generate, 
-    select_prediction_set_by_bleu, 
-    select_prediction_set_by_parent, 
-) 
+from .utils import beam_generate
 from ..utils import bleu_scorer, parent_scorer 


@ -15,27 +11,15 @@ def eval_with_bleu(args, testset, tokenizer, model):
        beam_generate(sample, tokenizer, model, args)
        for sample in testset
    ]
+    predictions = [sample[0]['tokens_clear'] for sample in raw_predictions]

    references = [
        [tokenizer.tokenize(sample['target'])]
        for sample in testset
    ]

-    pred_tokens_dict = {}
-    for idx in range(args.num_return_sequences):
-        pred_tokens_dict[idx] = [sample[idx]['tokens_clear'] for sample in raw_predictions]
-
-    for idx, predictions in pred_tokens_dict.items():
-        idx_results = bleu_scorer.compute(
-            predictions=predictions, 
-            references=references,
-        )
-        print(f"Idx#{idx} - BLEU: {idx_results['bleu']: .3f}")
-    
-    best_predictions = select_prediction_set_by_bleu(
-        raw_predictions, references, bleu_scorer)
    best_results = bleu_scorer.compute(
-        predictions=best_predictions, 
+        predictions=predictions, 
        references=references
    )
    print(f"BEST BLEU: {best_results['bleu']: .3f}")
@ -49,6 +33,7 @@ def eval_with_parent(args, testset, tokenizer, model):

    raw_predictions = [ beam_generate(sample, tokenizer, model, args)
        for sample in testset]
+    predictions = [sample[0]['tokens_clear'] for sample in raw_predictions]
    references = [ [tokenizer.tokenize(sample['target'])]
        for sample in testset]
    tokenized_tables = []
@ -60,23 +45,8 @@ def eval_with_parent(args, testset, tokenizer, model):
            tokenized_table_parent.append( ([attr], value_tokens) )
        tokenized_tables.append(tokenized_table_parent)

-    pred_tokens_dict = {}
-    for idx in range(args.num_return_sequences):
-        pred_tokens_dict[idx] = [sample[idx]['tokens_clear'] for sample in raw_predictions]
-
-    for idx, predictions in pred_tokens_dict.items():
-        (idx_p, idx_r, idx_f1, idx_all_f1) = parent_scorer(
-            predictions=predictions, 
-            references=references, 
-            tables=tokenized_tables, 
-            return_dict=False, 
-        )
-        print(f"Idx#{idx} - PARENT: {idx_p:.3f}, {idx_r:.3f}, {idx_f1:.3f}")
-    
-    best_predictions = select_prediction_set_by_parent(
-        raw_predictions, references, tokenized_tables)
    (avg_p, avg_r, avg_f, all_f) = parent_scorer(
-        predictions=best_predictions, 
+        predictions=predictions, 
        references=references, 
        tables=tokenized_tables, 
        return_dict=False