Fix bug of not converting token ids to piece id

This commit is contained in:
berlino 2020-07-14 10:22:33 +01:00
Родитель e34319cfdd
Коммит 56ec3bf5f8
1 изменённых файлов: 17 добавлений и 1 удалений

Просмотреть файл

@ -624,6 +624,22 @@ class Bertokens:
new_sc_link[m_type] = _match
return new_sc_link
def bert_cv_linking(self, schema):
question_tokens = self.normalized_pieces
cv_link = compute_cell_value_linking(question_tokens, schema)
new_cv_link = {}
for m_type in cv_link:
_match = {}
for ij_str in cv_link[m_type]:
q_id_str, col_tab_id_str = ij_str.split(",")
q_id, col_tab_id = int(q_id_str), int(col_tab_id_str)
real_q_id = self.idx_map[q_id]
_match[f"{real_q_id},{col_tab_id}"] = cv_link[m_type][ij_str]
new_cv_link[m_type] = _match
return new_cv_link
class SpiderEncoderBertPreproc(SpiderEncoderV2Preproc):
@ -678,7 +694,7 @@ class SpiderEncoderBertPreproc(SpiderEncoderV2Preproc):
if self.compute_cv_link:
question_bert_tokens = Bertokens(question)
cv_link = compute_cell_value_linking(question_bert_tokens.normalized_pieces, item.schema)
cv_link = question_bert_tokens.bert_cv_linking(item.schema)
else:
cv_link = {"num_date_match": {}, "cell_match": {}}