NER support for Albert in run_ner.py and NerPipeline (#2983)

* * Added support for Albert when fine-tuning for NER

* Added support for Albert in NER pipeline

* Added command-line options to examples/ner/run_ner.py to better control tokenization

* Added class AlbertForTokenClassification

* Changed output for NerPipeline to use .convert_ids_to_tokens(...) instead of .decode(...) to better reflect tokens

* Added ,

* Now passes style guide enforcement

* Changes from reviews.

* Code now passes style enforcement

* Added test for AlbertForTokenClassification

* Added test for AlbertForTokenClassification
This commit is contained in:
Lysandre Debut
2020-02-27 10:22:55 -05:00
committed by GitHub
6 changed files with 139 additions and 5 deletions

View File

@@ -29,6 +29,7 @@ if is_torch_available():
AlbertModel,
AlbertForMaskedLM,
AlbertForSequenceClassification,
AlbertForTokenClassification,
AlbertForQuestionAnswering,
)
from transformers.modeling_albert import ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP
@@ -207,6 +208,25 @@ class AlbertModelTest(ModelTesterMixin, unittest.TestCase):
self.parent.assertListEqual(list(result["logits"].size()), [self.batch_size, self.num_labels])
self.check_loss_output(result)
def create_and_check_albert_for_token_classification(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
config.num_labels = self.num_labels
model = AlbertForTokenClassification(config=config)
model.to(torch_device)
model.eval()
loss, logits = model(
input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels
)
result = {
"loss": loss,
"logits": logits,
}
self.parent.assertListEqual(
list(result["logits"].size()), [self.batch_size, self.seq_length, self.num_labels]
)
self.check_loss_output(result)
def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs()
(