diff --git a/examples/named_entity_recognition/ner_wikigold_transformer.ipynb b/examples/named_entity_recognition/ner_wikigold_transformer.ipynb index 7f10fb3..84cddf9 100644 --- a/examples/named_entity_recognition/ner_wikigold_transformer.ipynb +++ b/examples/named_entity_recognition/ner_wikigold_transformer.ipynb @@ -53,7 +53,8 @@ "from utils_nlp.dataset.ner_utils import read_conll_file\n", "from utils_nlp.dataset.url_utils import maybe_download\n", "from utils_nlp.models.transformers.named_entity_recognition import (\n", - " TokenClassificationProcessor, TokenClassifier)\n" + " TokenClassificationProcessor, TokenClassifier)\n", + "from utils_nlp.models.transformers.named_entity_recognition import supported_models as SUPPORTED_MODELS" ] }, { @@ -126,7 +127,7 @@ "\n", "# model configurations\n", "NUM_TRAIN_EPOCHS = 5\n", - "MODEL_NAME = \"bert-base-cased\"\n", + "MODEL_NAME = \"distilbert-base-cased\"\n", "DO_LOWER_CASE = False\n", "MAX_SEQ_LENGTH = 200\n", "TRAILING_PIECE_TAG = \"X\"\n", @@ -139,6 +140,118 @@ " NUM_TRAIN_EPOCHS = 1" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Models that can be used for token classification task" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " | supported models | \n", + "
---|---|
0 | \n", + "albert-base-v1 | \n", + "
1 | \n", + "albert-base-v2 | \n", + "
2 | \n", + "albert-large-v1 | \n", + "
3 | \n", + "albert-large-v2 | \n", + "
4 | \n", + "albert-xlarge-v1 | \n", + "
... | \n", + "... | \n", + "
65 | \n", + "xlm-roberta-large-finetuned-conll02-spanish | \n", + "
66 | \n", + "xlm-roberta-large-finetuned-conll03-english | \n", + "
67 | \n", + "xlm-roberta-large-finetuned-conll03-german | \n", + "
68 | \n", + "xlnet-base-cased | \n", + "
69 | \n", + "xlnet-large-cased | \n", + "
70 rows × 1 columns
\n", + "\n", + " | sentence | \n", + "labels | \n", + "
---|---|---|
0 | \n", + "[The, origin, of, Agotes, (, or, Cagots, ), is... | \n", + "[O, O, O, I-MISC, O, O, I-MISC, O, O, O, O] | \n", + "
1 | \n", + "[-DOCSTART-] | \n", + "[O] | \n", + "
2 | \n", + "[It, provides, full, -, and, part-time, polyte... | \n", + "[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ... | \n", + "
3 | \n", + "[Since, she, was, the, daughter, of, the, grea... | \n", + "[O, O, O, O, O, O, O, O, I-MISC, O, O, O, I-MI... | \n", + "
4 | \n", + "[The, goals, were, two, posts, ,, with, no, cr... | \n", + "[O, O, O, O, O, O, O, O, O, O] | \n", + "
5 | \n", + "[At, one, point, ,, so, many, orders, had, bee... | \n", + "[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ... | \n", + "
6 | \n", + "[Left, camp, in, July, 1972, ,, and, was, deal... | \n", + "[O, O, O, O, O, O, O, O, O, O, O, I-ORG, I-ORG... | \n", + "
7 | \n", + "[She, fled, again, to, Abra, ,, where, she, wa... | \n", + "[O, O, O, O, I-LOC, O, O, O, O, O, O] | \n", + "
8 | \n", + "[As, the, younger, sibling, ,, Ben, was, const... | \n", + "[O, O, O, O, O, I-PER, O, O, O, O, O, O, O, O,... | \n", + "
9 | \n", + "[Milepost, 1, :, granite, masonry, arch, over,... | \n", + "[O, O, O, O, O, O, O, I-LOC, I-LOC, O] | \n", + "