updated logging and saving metrics (#10436)
* updated logging and saving metrics * space removal
This commit is contained in:
Родитель
f52a15897b
Коммит
aca6288ff4
|
@ -375,16 +375,11 @@ def main():
|
|||
train_result = trainer.train(resume_from_checkpoint=checkpoint)
|
||||
trainer.save_model() # Saves the tokenizer too for easy upload
|
||||
|
||||
output_train_file = os.path.join(training_args.output_dir, "train_results.txt")
|
||||
if trainer.is_world_process_zero():
|
||||
with open(output_train_file, "w") as writer:
|
||||
logger.info("***** Train results *****")
|
||||
for key, value in sorted(train_result.metrics.items()):
|
||||
logger.info(f" {key} = {value}")
|
||||
writer.write(f"{key} = {value}\n")
|
||||
metrics = train_result.metrics
|
||||
|
||||
# Need to save the state, since Trainer.save_model saves only the tokenizer with the model
|
||||
trainer.state.save_to_json(os.path.join(training_args.output_dir, "trainer_state.json"))
|
||||
trainer.log_metrics("train", metrics)
|
||||
trainer.save_metrics("train", metrics)
|
||||
trainer.save_state()
|
||||
|
||||
# Evaluation
|
||||
results = {}
|
||||
|
@ -396,13 +391,8 @@ def main():
|
|||
perplexity = math.exp(eval_output["eval_loss"])
|
||||
results["perplexity"] = perplexity
|
||||
|
||||
output_eval_file = os.path.join(training_args.output_dir, "eval_results_clm.txt")
|
||||
if trainer.is_world_process_zero():
|
||||
with open(output_eval_file, "w") as writer:
|
||||
logger.info("***** Eval results *****")
|
||||
for key, value in sorted(results.items()):
|
||||
logger.info(f" {key} = {value}")
|
||||
writer.write(f"{key} = {value}\n")
|
||||
trainer.log_metrics("eval", results)
|
||||
trainer.save_metrics("eval", results)
|
||||
|
||||
return results
|
||||
|
||||
|
|
|
@ -411,17 +411,11 @@ def main():
|
|||
checkpoint = None
|
||||
train_result = trainer.train(resume_from_checkpoint=checkpoint)
|
||||
trainer.save_model() # Saves the tokenizer too for easy upload
|
||||
metrics = train_result.metrics
|
||||
|
||||
output_train_file = os.path.join(training_args.output_dir, "train_results.txt")
|
||||
if trainer.is_world_process_zero():
|
||||
with open(output_train_file, "w") as writer:
|
||||
logger.info("***** Train results *****")
|
||||
for key, value in sorted(train_result.metrics.items()):
|
||||
logger.info(f" {key} = {value}")
|
||||
writer.write(f"{key} = {value}\n")
|
||||
|
||||
# Need to save the state, since Trainer.save_model saves only the tokenizer with the model
|
||||
trainer.state.save_to_json(os.path.join(training_args.output_dir, "trainer_state.json"))
|
||||
trainer.log_metrics("train", metrics)
|
||||
trainer.save_metrics("train", metrics)
|
||||
trainer.save_state()
|
||||
|
||||
# Evaluation
|
||||
results = {}
|
||||
|
@ -433,13 +427,8 @@ def main():
|
|||
perplexity = math.exp(eval_output["eval_loss"])
|
||||
results["perplexity"] = perplexity
|
||||
|
||||
output_eval_file = os.path.join(training_args.output_dir, "eval_results_mlm.txt")
|
||||
if trainer.is_world_process_zero():
|
||||
with open(output_eval_file, "w") as writer:
|
||||
logger.info("***** Eval results *****")
|
||||
for key, value in sorted(results.items()):
|
||||
logger.info(f" {key} = {value}")
|
||||
writer.write(f"{key} = {value}\n")
|
||||
trainer.log_metrics("eval", results)
|
||||
trainer.save_metrics("eval", results)
|
||||
|
||||
return results
|
||||
|
||||
|
|
|
@ -392,17 +392,11 @@ def main():
|
|||
checkpoint = None
|
||||
train_result = trainer.train(resume_from_checkpoint=checkpoint)
|
||||
trainer.save_model() # Saves the tokenizer too for easy upload
|
||||
metrics = train_result.metrics
|
||||
|
||||
output_train_file = os.path.join(training_args.output_dir, "train_results.txt")
|
||||
if trainer.is_world_process_zero():
|
||||
with open(output_train_file, "w") as writer:
|
||||
logger.info("***** Train results *****")
|
||||
for key, value in sorted(train_result.metrics.items()):
|
||||
logger.info(f" {key} = {value}")
|
||||
writer.write(f"{key} = {value}\n")
|
||||
|
||||
# Need to save the state, since Trainer.save_model saves only the tokenizer with the model
|
||||
trainer.state.save_to_json(os.path.join(training_args.output_dir, "trainer_state.json"))
|
||||
trainer.log_metrics("train", metrics)
|
||||
trainer.save_metrics("train", metrics)
|
||||
trainer.save_state()
|
||||
|
||||
# Evaluation
|
||||
results = {}
|
||||
|
@ -414,13 +408,8 @@ def main():
|
|||
perplexity = math.exp(eval_output["eval_loss"])
|
||||
results["perplexity"] = perplexity
|
||||
|
||||
output_eval_file = os.path.join(training_args.output_dir, "eval_results_plm.txt")
|
||||
if trainer.is_world_process_zero():
|
||||
with open(output_eval_file, "w") as writer:
|
||||
logger.info("***** Eval results *****")
|
||||
for key, value in sorted(results.items()):
|
||||
logger.info(f" {key} = {value}")
|
||||
writer.write(f"{key} = {value}\n")
|
||||
trainer.log_metrics("eval", results)
|
||||
trainer.save_metrics("eval", results)
|
||||
|
||||
return results
|
||||
|
||||
|
|
|
@ -227,6 +227,8 @@ def main():
|
|||
# Set the verbosity to info of the Transformers logger (on main process only):
|
||||
if is_main_process(training_args.local_rank):
|
||||
transformers.utils.logging.set_verbosity_info()
|
||||
transformers.utils.logging.enable_default_handler()
|
||||
transformers.utils.logging.enable_explicit_format()
|
||||
logger.info("Training/evaluation parameters %s", training_args)
|
||||
|
||||
# Set seed before initializing model.
|
||||
|
@ -367,17 +369,11 @@ def main():
|
|||
checkpoint = None
|
||||
train_result = trainer.train(resume_from_checkpoint=checkpoint)
|
||||
trainer.save_model() # Saves the tokenizer too for easy upload
|
||||
metrics = train_result.metrics
|
||||
|
||||
output_train_file = os.path.join(training_args.output_dir, "train_results.txt")
|
||||
if trainer.is_world_process_zero():
|
||||
with open(output_train_file, "w") as writer:
|
||||
logger.info("***** Train results *****")
|
||||
for key, value in sorted(train_result.metrics.items()):
|
||||
logger.info(f" {key} = {value}")
|
||||
writer.write(f"{key} = {value}\n")
|
||||
|
||||
# Need to save the state, since Trainer.save_model saves only the tokenizer with the model
|
||||
trainer.state.save_to_json(os.path.join(training_args.output_dir, "trainer_state.json"))
|
||||
trainer.log_metrics("train", metrics)
|
||||
trainer.save_metrics("train", metrics)
|
||||
trainer.save_state()
|
||||
|
||||
# Evaluation
|
||||
results = {}
|
||||
|
@ -386,13 +382,8 @@ def main():
|
|||
|
||||
results = trainer.evaluate()
|
||||
|
||||
output_eval_file = os.path.join(training_args.output_dir, "eval_results_swag.txt")
|
||||
if trainer.is_world_process_zero():
|
||||
with open(output_eval_file, "w") as writer:
|
||||
logger.info("***** Eval results *****")
|
||||
for key, value in sorted(results.items()):
|
||||
logger.info(f" {key} = {value}")
|
||||
writer.write(f"{key} = {value}\n")
|
||||
trainer.log_metrics("eval", results)
|
||||
trainer.save_metrics("eval", results)
|
||||
|
||||
return results
|
||||
|
||||
|
|
|
@ -206,14 +206,10 @@ def main():
|
|||
|
||||
result = trainer.evaluate()
|
||||
|
||||
output_eval_file = os.path.join(training_args.output_dir, "eval_results.txt")
|
||||
with open(output_eval_file, "w") as writer:
|
||||
logger.info("***** Eval results *****")
|
||||
for key, value in result.items():
|
||||
logger.info(" %s = %s", key, value)
|
||||
writer.write("%s = %s\n" % (key, value))
|
||||
trainer.log_metrics("eval", results)
|
||||
trainer.save_metrics("eval", results)
|
||||
|
||||
results.update(result)
|
||||
results.update(result)
|
||||
|
||||
return results
|
||||
|
||||
|
|
|
@ -201,6 +201,8 @@ def main():
|
|||
# Set the verbosity to info of the Transformers logger (on main process only):
|
||||
if is_main_process(training_args.local_rank):
|
||||
transformers.utils.logging.set_verbosity_info()
|
||||
transformers.utils.logging.enable_default_handler()
|
||||
transformers.utils.logging.enable_explicit_format()
|
||||
logger.info("Training/evaluation parameters %s", training_args)
|
||||
|
||||
# Set seed before initializing model.
|
||||
|
@ -479,16 +481,10 @@ def main():
|
|||
train_result = trainer.train(resume_from_checkpoint=checkpoint)
|
||||
trainer.save_model() # Saves the tokenizer too for easy upload
|
||||
|
||||
output_train_file = os.path.join(training_args.output_dir, "train_results.txt")
|
||||
if trainer.is_world_process_zero():
|
||||
with open(output_train_file, "w") as writer:
|
||||
logger.info("***** Train results *****")
|
||||
for key, value in sorted(train_result.metrics.items()):
|
||||
logger.info(f" {key} = {value}")
|
||||
writer.write(f"{key} = {value}\n")
|
||||
|
||||
# Need to save the state, since Trainer.save_model saves only the tokenizer with the model
|
||||
trainer.state.save_to_json(os.path.join(training_args.output_dir, "trainer_state.json"))
|
||||
metrics = train_result.metrics
|
||||
trainer.log_metrics("train", metrics)
|
||||
trainer.save_metrics("train", metrics)
|
||||
trainer.save_state()
|
||||
|
||||
# Evaluation
|
||||
results = {}
|
||||
|
@ -496,13 +492,8 @@ def main():
|
|||
logger.info("*** Evaluate ***")
|
||||
results = trainer.evaluate()
|
||||
|
||||
output_eval_file = os.path.join(training_args.output_dir, "eval_results.txt")
|
||||
if trainer.is_world_process_zero():
|
||||
with open(output_eval_file, "w") as writer:
|
||||
logger.info("***** Eval results *****")
|
||||
for key, value in sorted(results.items()):
|
||||
logger.info(f" {key} = {value}")
|
||||
writer.write(f"{key} = {value}\n")
|
||||
trainer.log_metrics("eval", results)
|
||||
trainer.save_metrics("eval", results)
|
||||
|
||||
return results
|
||||
|
||||
|
|
|
@ -200,6 +200,8 @@ def main():
|
|||
# Set the verbosity to info of the Transformers logger (on main process only):
|
||||
if is_main_process(training_args.local_rank):
|
||||
transformers.utils.logging.set_verbosity_info()
|
||||
transformers.utils.logging.enable_default_handler()
|
||||
transformers.utils.logging.enable_explicit_format()
|
||||
logger.info("Training/evaluation parameters %s", training_args)
|
||||
|
||||
# Set seed before initializing model.
|
||||
|
@ -518,16 +520,10 @@ def main():
|
|||
train_result = trainer.train(resume_from_checkpoint=checkpoint)
|
||||
trainer.save_model() # Saves the tokenizer too for easy upload
|
||||
|
||||
output_train_file = os.path.join(training_args.output_dir, "train_results.txt")
|
||||
if trainer.is_world_process_zero():
|
||||
with open(output_train_file, "w") as writer:
|
||||
logger.info("***** Train results *****")
|
||||
for key, value in sorted(train_result.metrics.items()):
|
||||
logger.info(f" {key} = {value}")
|
||||
writer.write(f"{key} = {value}\n")
|
||||
|
||||
# Need to save the state, since Trainer.save_model saves only the tokenizer with the model
|
||||
trainer.state.save_to_json(os.path.join(training_args.output_dir, "trainer_state.json"))
|
||||
metrics = train_result.metrics
|
||||
trainer.log_metrics("train", metrics)
|
||||
trainer.save_metrics("train", metrics)
|
||||
trainer.save_state()
|
||||
|
||||
# Evaluation
|
||||
results = {}
|
||||
|
@ -535,13 +531,8 @@ def main():
|
|||
logger.info("*** Evaluate ***")
|
||||
results = trainer.evaluate()
|
||||
|
||||
output_eval_file = os.path.join(training_args.output_dir, "eval_results.txt")
|
||||
if trainer.is_world_process_zero():
|
||||
with open(output_eval_file, "w") as writer:
|
||||
logger.info("***** Eval results *****")
|
||||
for key, value in sorted(results.items()):
|
||||
logger.info(f" {key} = {value}")
|
||||
writer.write(f"{key} = {value}\n")
|
||||
trainer.log_metrics("eval", results)
|
||||
trainer.save_metrics("eval", results)
|
||||
|
||||
return results
|
||||
|
||||
|
|
|
@ -417,16 +417,9 @@ def main():
|
|||
|
||||
trainer.save_model() # Saves the tokenizer too for easy upload
|
||||
|
||||
output_train_file = os.path.join(training_args.output_dir, "train_results.txt")
|
||||
if trainer.is_world_process_zero():
|
||||
with open(output_train_file, "w") as writer:
|
||||
logger.info("***** Train results *****")
|
||||
for key, value in sorted(metrics.items()):
|
||||
logger.info(f" {key} = {value}")
|
||||
writer.write(f"{key} = {value}\n")
|
||||
|
||||
# Need to save the state, since Trainer.save_model saves only the tokenizer with the model
|
||||
trainer.state.save_to_json(os.path.join(training_args.output_dir, "trainer_state.json"))
|
||||
trainer.log_metrics("train", metrics)
|
||||
trainer.save_metrics("train", metrics)
|
||||
trainer.save_state()
|
||||
|
||||
# Evaluation
|
||||
eval_results = {}
|
||||
|
@ -443,13 +436,8 @@ def main():
|
|||
for eval_dataset, task in zip(eval_datasets, tasks):
|
||||
eval_result = trainer.evaluate(eval_dataset=eval_dataset)
|
||||
|
||||
output_eval_file = os.path.join(training_args.output_dir, f"eval_results_{task}.txt")
|
||||
if trainer.is_world_process_zero():
|
||||
with open(output_eval_file, "w") as writer:
|
||||
logger.info(f"***** Eval results {task} *****")
|
||||
for key, value in sorted(eval_result.items()):
|
||||
logger.info(f" {key} = {value}")
|
||||
writer.write(f"{key} = {value}\n")
|
||||
trainer.log_metrics("eval", eval_result)
|
||||
trainer.save_metrics("eval", eval_result)
|
||||
|
||||
eval_results.update(eval_result)
|
||||
|
||||
|
|
|
@ -247,18 +247,10 @@ def main():
|
|||
results = {}
|
||||
if training_args.do_eval:
|
||||
logger.info("*** Evaluate ***")
|
||||
|
||||
result = trainer.evaluate()
|
||||
output_eval_file = os.path.join(training_args.output_dir, "eval_results.txt")
|
||||
|
||||
with open(output_eval_file, "w") as writer:
|
||||
logger.info("***** Eval results *****")
|
||||
|
||||
for key, value in result.items():
|
||||
logger.info(" %s = %s", key, value)
|
||||
writer.write("%s = %s\n" % (key, value))
|
||||
|
||||
results.update(result)
|
||||
trainer.log_metrics("eval", result)
|
||||
trainer.save_metrics("eval", result)
|
||||
results.update(result)
|
||||
|
||||
return results
|
||||
|
||||
|
|
|
@ -293,18 +293,10 @@ def main():
|
|||
results = {}
|
||||
if training_args.do_eval:
|
||||
logger.info("*** Evaluate ***")
|
||||
|
||||
result = trainer.evaluate()
|
||||
output_eval_file = os.path.join(training_args.output_dir, "eval_results.txt")
|
||||
|
||||
with open(output_eval_file, "w") as writer:
|
||||
logger.info("***** Eval results *****")
|
||||
|
||||
for key, value in result.items():
|
||||
logger.info(" %s = %s", key, value)
|
||||
writer.write("%s = %s\n" % (key, value))
|
||||
|
||||
results.update(result)
|
||||
trainer.log_metrics("eval", result)
|
||||
trainer.save_metrics("eval", result)
|
||||
results.update(result)
|
||||
|
||||
return results
|
||||
|
||||
|
|
|
@ -291,33 +291,19 @@ def main():
|
|||
|
||||
trainer.save_model() # Saves the tokenizer too for easy upload
|
||||
|
||||
output_train_file = os.path.join(training_args.output_dir, "train_results.txt")
|
||||
if trainer.is_world_process_zero():
|
||||
with open(output_train_file, "w") as writer:
|
||||
logger.info("***** Train results *****")
|
||||
for key, value in sorted(metrics.items()):
|
||||
logger.info(f" {key} = {value}")
|
||||
writer.write(f"{key} = {value}\n")
|
||||
|
||||
# Need to save the state, since Trainer.save_model saves only the tokenizer with the model
|
||||
trainer.state.save_to_json(os.path.join(training_args.output_dir, "trainer_state.json"))
|
||||
trainer.log_metrics("train", metrics)
|
||||
trainer.save_metrics("train", metrics)
|
||||
trainer.save_state()
|
||||
|
||||
# Evaluation
|
||||
eval_results = {}
|
||||
if training_args.do_eval:
|
||||
logger.info("*** Evaluate ***")
|
||||
|
||||
eval_result = trainer.evaluate(eval_dataset=eval_dataset)
|
||||
output_eval_file = os.path.join(training_args.output_dir, "eval_results_xnli.txt")
|
||||
|
||||
if trainer.is_world_process_zero():
|
||||
with open(output_eval_file, "w") as writer:
|
||||
logger.info("***** Eval results xnli *****")
|
||||
for key, value in sorted(eval_result.items()):
|
||||
logger.info(f" {key} = {value}")
|
||||
writer.write(f"{key} = {value}\n")
|
||||
|
||||
trainer.log_metrics("eval", eval_result)
|
||||
trainer.save_metrics("eval", eval_result)
|
||||
eval_results.update(eval_result)
|
||||
|
||||
return eval_results
|
||||
|
||||
|
||||
|
|
|
@ -387,18 +387,12 @@ def main():
|
|||
else:
|
||||
checkpoint = None
|
||||
train_result = trainer.train(resume_from_checkpoint=checkpoint)
|
||||
metrics = train_result.metrics
|
||||
trainer.save_model() # Saves the tokenizer too for easy upload
|
||||
|
||||
output_train_file = os.path.join(training_args.output_dir, "train_results.txt")
|
||||
if trainer.is_world_process_zero():
|
||||
with open(output_train_file, "w") as writer:
|
||||
logger.info("***** Train results *****")
|
||||
for key, value in sorted(train_result.metrics.items()):
|
||||
logger.info(f" {key} = {value}")
|
||||
writer.write(f"{key} = {value}\n")
|
||||
|
||||
# Need to save the state, since Trainer.save_model saves only the tokenizer with the model
|
||||
trainer.state.save_to_json(os.path.join(training_args.output_dir, "trainer_state.json"))
|
||||
trainer.log_metrics("train", metrics)
|
||||
trainer.save_metrics("train", metrics)
|
||||
trainer.save_state()
|
||||
|
||||
# Evaluation
|
||||
results = {}
|
||||
|
@ -407,13 +401,8 @@ def main():
|
|||
|
||||
results = trainer.evaluate()
|
||||
|
||||
output_eval_file = os.path.join(training_args.output_dir, "eval_results_ner.txt")
|
||||
if trainer.is_world_process_zero():
|
||||
with open(output_eval_file, "w") as writer:
|
||||
logger.info("***** Eval results *****")
|
||||
for key, value in results.items():
|
||||
logger.info(f" {key} = {value}")
|
||||
writer.write(f"{key} = {value}\n")
|
||||
trainer.log_metrics("eval", results)
|
||||
trainer.save_metrics("eval", results)
|
||||
|
||||
# Predict
|
||||
if training_args.do_predict:
|
||||
|
@ -429,12 +418,8 @@ def main():
|
|||
for prediction, label in zip(predictions, labels)
|
||||
]
|
||||
|
||||
output_test_results_file = os.path.join(training_args.output_dir, "test_results.txt")
|
||||
if trainer.is_world_process_zero():
|
||||
with open(output_test_results_file, "w") as writer:
|
||||
for key, value in sorted(metrics.items()):
|
||||
logger.info(f" {key} = {value}")
|
||||
writer.write(f"{key} = {value}\n")
|
||||
trainer.log_metrics("test", metrics)
|
||||
trainer.save_metrics("test", metrics)
|
||||
|
||||
# Save predictions
|
||||
output_test_predictions_file = os.path.join(training_args.output_dir, "test_predictions.txt")
|
||||
|
|
Загрузка…
Ссылка в новой задаче