зеркало из https://github.com/microsoft/HiTab.git
fix the evaluation script
This commit is contained in:
Родитель
aad7c5ac82
Коммит
9f3429a93e
|
@ -1,10 +1,6 @@
|
|||
"""Evaluation. """
|
||||
|
||||
from .utils import (
|
||||
beam_generate,
|
||||
select_prediction_set_by_bleu,
|
||||
select_prediction_set_by_parent,
|
||||
)
|
||||
from .utils import beam_generate
|
||||
from ..utils import bleu_scorer, parent_scorer
|
||||
|
||||
|
||||
|
@ -15,27 +11,15 @@ def eval_with_bleu(args, testset, tokenizer, model):
|
|||
beam_generate(sample, tokenizer, model, args)
|
||||
for sample in testset
|
||||
]
|
||||
predictions = [sample[0]['tokens_clear'] for sample in raw_predictions]
|
||||
|
||||
references = [
|
||||
[tokenizer.tokenize(sample['target'])]
|
||||
for sample in testset
|
||||
]
|
||||
|
||||
pred_tokens_dict = {}
|
||||
for idx in range(args.num_return_sequences):
|
||||
pred_tokens_dict[idx] = [sample[idx]['tokens_clear'] for sample in raw_predictions]
|
||||
|
||||
for idx, predictions in pred_tokens_dict.items():
|
||||
idx_results = bleu_scorer.compute(
|
||||
predictions=predictions,
|
||||
references=references,
|
||||
)
|
||||
print(f"Idx#{idx} - BLEU: {idx_results['bleu']: .3f}")
|
||||
|
||||
best_predictions = select_prediction_set_by_bleu(
|
||||
raw_predictions, references, bleu_scorer)
|
||||
best_results = bleu_scorer.compute(
|
||||
predictions=best_predictions,
|
||||
predictions=predictions,
|
||||
references=references
|
||||
)
|
||||
print(f"BEST BLEU: {best_results['bleu']: .3f}")
|
||||
|
@ -49,6 +33,7 @@ def eval_with_parent(args, testset, tokenizer, model):
|
|||
|
||||
raw_predictions = [ beam_generate(sample, tokenizer, model, args)
|
||||
for sample in testset]
|
||||
predictions = [sample[0]['tokens_clear'] for sample in raw_predictions]
|
||||
references = [ [tokenizer.tokenize(sample['target'])]
|
||||
for sample in testset]
|
||||
tokenized_tables = []
|
||||
|
@ -60,23 +45,8 @@ def eval_with_parent(args, testset, tokenizer, model):
|
|||
tokenized_table_parent.append( ([attr], value_tokens) )
|
||||
tokenized_tables.append(tokenized_table_parent)
|
||||
|
||||
pred_tokens_dict = {}
|
||||
for idx in range(args.num_return_sequences):
|
||||
pred_tokens_dict[idx] = [sample[idx]['tokens_clear'] for sample in raw_predictions]
|
||||
|
||||
for idx, predictions in pred_tokens_dict.items():
|
||||
(idx_p, idx_r, idx_f1, idx_all_f1) = parent_scorer(
|
||||
predictions=predictions,
|
||||
references=references,
|
||||
tables=tokenized_tables,
|
||||
return_dict=False,
|
||||
)
|
||||
print(f"Idx#{idx} - PARENT: {idx_p:.3f}, {idx_r:.3f}, {idx_f1:.3f}")
|
||||
|
||||
best_predictions = select_prediction_set_by_parent(
|
||||
raw_predictions, references, tokenized_tables)
|
||||
(avg_p, avg_r, avg_f, all_f) = parent_scorer(
|
||||
predictions=best_predictions,
|
||||
predictions=predictions,
|
||||
references=references,
|
||||
tables=tokenized_tables,
|
||||
return_dict=False
|
||||
|
|
Загрузка…
Ссылка в новой задаче