ELECTRA (#3257)
* Electra wip * helpers * Electra wip * Electra v1 * ELECTRA may be saved/loaded * Generator & Discriminator * Embedding size instead of halving the hidden size * ELECTRA Tokenizer * Revert BERT helpers * ELECTRA Conversion script * Archive maps * PyTorch tests * Start fixing tests * Tests pass * Same configuration for both models * Compatible with base + large * Simplification + weight tying * Archives * Auto + Renaming to standard names * ELECTRA is uncased * Tests * Slight API changes * Update tests * wip * ElectraForTokenClassification * temp * Simpler arch + tests Removed ElectraForPreTraining which will be in a script * Conversion script * Auto model * Update links to S3 * Split ElectraForPreTraining and ElectraForTokenClassification * Actually test PreTraining model * Remove num_labels from configuration * wip * wip * From discriminator and generator to electra * Slight API changes * Better naming * TensorFlow ELECTRA tests * Accurate conversion script * Added to conversion script * Fast ELECTRA tokenizer * Style * Add ELECTRA to README * Modeling Pytorch Doc + Real style * TF Docs * Docs * Correct links * Correct model intialized * random fixes * style * Addressing Patrick's and Sam's comments * Correct links in docs
This commit is contained in:
@@ -38,6 +38,7 @@ from .configuration_bert import BERT_PRETRAINED_CONFIG_ARCHIVE_MAP, BertConfig
|
||||
from .configuration_camembert import CAMEMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, CamembertConfig
|
||||
from .configuration_ctrl import CTRL_PRETRAINED_CONFIG_ARCHIVE_MAP, CTRLConfig
|
||||
from .configuration_distilbert import DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, DistilBertConfig
|
||||
from .configuration_electra import ELECTRA_PRETRAINED_CONFIG_ARCHIVE_MAP, ElectraConfig
|
||||
from .configuration_flaubert import FLAUBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, FlaubertConfig
|
||||
from .configuration_gpt2 import GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP, GPT2Config
|
||||
from .configuration_mmbt import MMBTConfig
|
||||
@@ -127,6 +128,7 @@ from .tokenization_bert_japanese import BertJapaneseTokenizer, CharacterTokenize
|
||||
from .tokenization_camembert import CamembertTokenizer
|
||||
from .tokenization_ctrl import CTRLTokenizer
|
||||
from .tokenization_distilbert import DistilBertTokenizer, DistilBertTokenizerFast
|
||||
from .tokenization_electra import ElectraTokenizer, ElectraTokenizerFast
|
||||
from .tokenization_flaubert import FlaubertTokenizer
|
||||
from .tokenization_gpt2 import GPT2Tokenizer, GPT2TokenizerFast
|
||||
from .tokenization_openai import OpenAIGPTTokenizer, OpenAIGPTTokenizerFast
|
||||
@@ -297,6 +299,15 @@ if is_torch_available():
|
||||
FLAUBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
|
||||
)
|
||||
|
||||
from .modeling_electra import (
|
||||
ElectraForPreTraining,
|
||||
ElectraForMaskedLM,
|
||||
ElectraForTokenClassification,
|
||||
ElectraModel,
|
||||
load_tf_weights_in_electra,
|
||||
ELECTRA_PRETRAINED_MODEL_ARCHIVE_MAP,
|
||||
)
|
||||
|
||||
# Optimization
|
||||
from .optimization import (
|
||||
AdamW,
|
||||
@@ -463,6 +474,15 @@ if is_tf_available():
|
||||
TF_T5_PRETRAINED_MODEL_ARCHIVE_MAP,
|
||||
)
|
||||
|
||||
from .modeling_tf_electra import (
|
||||
TFElectraPreTrainedModel,
|
||||
TFElectraModel,
|
||||
TFElectraForPreTraining,
|
||||
TFElectraForMaskedLM,
|
||||
TFElectraForTokenClassification,
|
||||
TF_ELECTRA_PRETRAINED_MODEL_ARCHIVE_MAP,
|
||||
)
|
||||
|
||||
# Optimization
|
||||
from .optimization_tf import WarmUp, create_optimizer, AdamWeightDecay, GradientAccumulator
|
||||
|
||||
|
||||
Reference in New Issue
Block a user