diff --git a/tapex/data_utils/preprocess_bpe.py b/tapex/data_utils/preprocess_bpe.py index ec4f7d3..054c379 100644 --- a/tapex/data_utils/preprocess_bpe.py +++ b/tapex/data_utils/preprocess_bpe.py @@ -7,14 +7,14 @@ A wrapper to wrap the BPE preprocessing procedure for different tasks: 2. TableFT tasks - Class setting (TabFact) """ import argparse -import os -import shutil -import tarfile import logging -from fairseq.examples.roberta.multiprocessing_bpe_encoder import main as bpe_main -from common.download import download_model_weights, download_bpe_files +import os import sys +from fairseq.examples.roberta.multiprocessing_bpe_encoder import main as bpe_main + +from tapex.common.download import download_model_weights, download_bpe_files + logger = logging.getLogger(__name__) diff --git a/tapex/processor/table_truncate.py b/tapex/processor/table_truncate.py index d3334d8..ba5253a 100644 --- a/tapex/processor/table_truncate.py +++ b/tapex/processor/table_truncate.py @@ -111,7 +111,7 @@ class RowDeleteTruncate(TableTruncate): def estimate_delete_ratio(self, table_content: Dict, question: str): assert "header" in table_content and "rows" in table_content number_of_rows = len(table_content["rows"]) - # calculate the tokens of header, special tokens will only be pre-prended into question + # calculate the tokens of header, special tokens will only be pre-prepended into question question_tokens = self.tokenizer.tokenize(question, add_special_tokens=True) # calculate the tokens of header header_string = self.table_linearize.process_header(table_content["header"])