ELECTRA (#3257)

* Electra wip * helpers * Electra wip * Electra v1 * ELECTRA may be saved/loaded * Generator & Discriminator * Embedding size instead of halving the hidden size * ELECTRA Tokenizer * Revert BERT helpers * ELECTRA Conversion script * Archive maps * PyTorch tests * Start fixing tests * Tests pass * Same configuration for both models * Compatible with base + large * Simplification + weight tying * Archives * Auto + Renaming to standard names * ELECTRA is uncased * Tests * Slight API changes * Update tests * wip * ElectraForTokenClassification * temp * Simpler arch + tests Removed ElectraForPreTraining which will be in a script * Conversion script * Auto model * Update links to S3 * Split ElectraForPreTraining and ElectraForTokenClassification * Actually test PreTraining model * Remove num_labels from configuration * wip * wip * From discriminator and generator to electra * Slight API changes * Better naming * TensorFlow ELECTRA tests * Accurate conversion script * Added to conversion script * Fast ELECTRA tokenizer * Style * Add ELECTRA to README * Modeling Pytorch Doc + Real style * TF Docs * Docs * Correct links * Correct model intialized * random fixes * style * Addressing Patrick's and Sam's comments * Correct links in docs
2020-04-03 14:10:54 -04:00
parent 8594dd80dd
commit d5d7d88612
16 changed files with 2279 additions and 5 deletions
--- a/src/transformers/init.py
+++ b/src/transformers/init.py
@@ -38,6 +38,7 @@ from .configuration_bert import BERT_PRETRAINED_CONFIG_ARCHIVE_MAP, BertConfig
 from .configuration_camembert import CAMEMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, CamembertConfig
 from .configuration_ctrl import CTRL_PRETRAINED_CONFIG_ARCHIVE_MAP, CTRLConfig
 from .configuration_distilbert import DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, DistilBertConfig
+from .configuration_electra import ELECTRA_PRETRAINED_CONFIG_ARCHIVE_MAP, ElectraConfig
 from .configuration_flaubert import FLAUBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, FlaubertConfig
 from .configuration_gpt2 import GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP, GPT2Config
 from .configuration_mmbt import MMBTConfig
@@ -127,6 +128,7 @@ from .tokenization_bert_japanese import BertJapaneseTokenizer, CharacterTokenize
 from .tokenization_camembert import CamembertTokenizer
 from .tokenization_ctrl import CTRLTokenizer
 from .tokenization_distilbert import DistilBertTokenizer, DistilBertTokenizerFast
+from .tokenization_electra import ElectraTokenizer, ElectraTokenizerFast
 from .tokenization_flaubert import FlaubertTokenizer
 from .tokenization_gpt2 import GPT2Tokenizer, GPT2TokenizerFast
 from .tokenization_openai import OpenAIGPTTokenizer, OpenAIGPTTokenizerFast
@@ -297,6 +299,15 @@ if is_torch_available():
        FLAUBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
    )

+    from .modeling_electra import (
+        ElectraForPreTraining,
+        ElectraForMaskedLM,
+        ElectraForTokenClassification,
+        ElectraModel,
+        load_tf_weights_in_electra,
+        ELECTRA_PRETRAINED_MODEL_ARCHIVE_MAP,
+    )
+
    # Optimization
    from .optimization import (
        AdamW,
@@ -463,6 +474,15 @@ if is_tf_available():
        TF_T5_PRETRAINED_MODEL_ARCHIVE_MAP,
    )

+    from .modeling_tf_electra import (
+        TFElectraPreTrainedModel,
+        TFElectraModel,
+        TFElectraForPreTraining,
+        TFElectraForMaskedLM,
+        TFElectraForTokenClassification,
+        TF_ELECTRA_PRETRAINED_MODEL_ARCHIVE_MAP,
+    )
+
    # Optimization
    from .optimization_tf import WarmUp, create_optimizer, AdamWeightDecay, GradientAccumulator