* Electra wip

* helpers

* Electra wip

* Electra v1

* ELECTRA may be saved/loaded

* Generator & Discriminator

* Embedding size instead of halving the hidden size

* ELECTRA Tokenizer

* Revert BERT helpers

* ELECTRA Conversion script

* Archive maps

* PyTorch tests

* Start fixing tests

* Tests pass

* Same configuration for both models

* Compatible with base + large

* Simplification + weight tying

* Archives

* Auto + Renaming to standard names

* ELECTRA is uncased

* Tests

* Slight API changes

* Update tests

* wip

* ElectraForTokenClassification

* temp

* Simpler arch + tests

Removed ElectraForPreTraining which will be in a script

* Conversion script

* Auto model

* Update links to S3

* Split ElectraForPreTraining and ElectraForTokenClassification

* Actually test PreTraining model

* Remove num_labels from configuration

* wip

* wip

* From discriminator and generator to electra

* Slight API changes

* Better naming

* TensorFlow ELECTRA tests

* Accurate conversion script

* Added to conversion script

* Fast ELECTRA tokenizer

* Style

* Add ELECTRA to README

* Modeling Pytorch Doc + Real style

* TF Docs

* Docs

* Correct links

* Correct model intialized

* random fixes

* style

* Addressing Patrick's and Sam's comments

* Correct links in docs
This commit is contained in:
Lysandre Debut
2020-04-03 14:10:54 -04:00
committed by GitHub
parent 8594dd80dd
commit d5d7d88612
16 changed files with 2279 additions and 5 deletions

View File

@@ -38,6 +38,7 @@ from .configuration_bert import BERT_PRETRAINED_CONFIG_ARCHIVE_MAP, BertConfig
from .configuration_camembert import CAMEMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, CamembertConfig
from .configuration_ctrl import CTRL_PRETRAINED_CONFIG_ARCHIVE_MAP, CTRLConfig
from .configuration_distilbert import DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, DistilBertConfig
from .configuration_electra import ELECTRA_PRETRAINED_CONFIG_ARCHIVE_MAP, ElectraConfig
from .configuration_flaubert import FLAUBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, FlaubertConfig
from .configuration_gpt2 import GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP, GPT2Config
from .configuration_mmbt import MMBTConfig
@@ -127,6 +128,7 @@ from .tokenization_bert_japanese import BertJapaneseTokenizer, CharacterTokenize
from .tokenization_camembert import CamembertTokenizer
from .tokenization_ctrl import CTRLTokenizer
from .tokenization_distilbert import DistilBertTokenizer, DistilBertTokenizerFast
from .tokenization_electra import ElectraTokenizer, ElectraTokenizerFast
from .tokenization_flaubert import FlaubertTokenizer
from .tokenization_gpt2 import GPT2Tokenizer, GPT2TokenizerFast
from .tokenization_openai import OpenAIGPTTokenizer, OpenAIGPTTokenizerFast
@@ -297,6 +299,15 @@ if is_torch_available():
FLAUBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
)
from .modeling_electra import (
ElectraForPreTraining,
ElectraForMaskedLM,
ElectraForTokenClassification,
ElectraModel,
load_tf_weights_in_electra,
ELECTRA_PRETRAINED_MODEL_ARCHIVE_MAP,
)
# Optimization
from .optimization import (
AdamW,
@@ -463,6 +474,15 @@ if is_tf_available():
TF_T5_PRETRAINED_MODEL_ARCHIVE_MAP,
)
from .modeling_tf_electra import (
TFElectraPreTrainedModel,
TFElectraModel,
TFElectraForPreTraining,
TFElectraForMaskedLM,
TFElectraForTokenClassification,
TF_ELECTRA_PRETRAINED_MODEL_ARCHIVE_MAP,
)
# Optimization
from .optimization_tf import WarmUp, create_optimizer, AdamWeightDecay, GradientAccumulator