NER support for Albert in run_ner.py and NerPipeline (#2983)

* * Added support for Albert when fine-tuning for NER * Added support for Albert in NER pipeline * Added command-line options to examples/ner/run_ner.py to better control tokenization * Added class AlbertForTokenClassification * Changed output for NerPipeline to use .convert_ids_to_tokens(...) instead of .decode(...) to better reflect tokens * Added , * Now passes style guide enforcement * Changes from reviews. * Code now passes style enforcement * Added test for AlbertForTokenClassification * Added test for AlbertForTokenClassification
2020-02-27 10:22:55 -05:00
parent 6a37588041 f71157529e
commit 8bcb37bfb8
6 changed files with 139 additions and 5 deletions
--- a/tests/test_modeling_albert.py
+++ b/tests/test_modeling_albert.py
@@ -29,6 +29,7 @@ if is_torch_available():
        AlbertModel,
        AlbertForMaskedLM,
        AlbertForSequenceClassification,
+        AlbertForTokenClassification,
        AlbertForQuestionAnswering,
    )
    from transformers.modeling_albert import ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP
@@ -207,6 +208,25 @@ class AlbertModelTest(ModelTesterMixin, unittest.TestCase):
            self.parent.assertListEqual(list(result["logits"].size()), [self.batch_size, self.num_labels])
            self.check_loss_output(result)

+        def create_and_check_albert_for_token_classification(
+            self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
+        ):
+            config.num_labels = self.num_labels
+            model = AlbertForTokenClassification(config=config)
+            model.to(torch_device)
+            model.eval()
+            loss, logits = model(
+                input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels
+            )
+            result = {
+                "loss": loss,
+                "logits": logits,
+            }
+            self.parent.assertListEqual(
+                list(result["logits"].size()), [self.batch_size, self.seq_length, self.num_labels]
+            )
+            self.check_loss_output(result)
+
        def prepare_config_and_inputs_for_common(self):
            config_and_inputs = self.prepare_config_and_inputs()
            (