Longformer (#4352)

* first commit

* bug fixes

* better examples

* undo padding

* remove wrong VOCAB_FILES_NAMES

* License

* make style

* make isort happy

* unit tests

* integration test

* make `black` happy by undoing `isort` changes!!

* lint

* no need for the padding value

* batch_size not bsz

* remove unused type casting

* seqlen not seq_len

* staticmethod

* `bert` selfattention instead of `n2`

* uint8 instead of bool + lints

* pad inputs_embeds using embeddings not a constant

* black

* unit test with padding

* fix unit tests

* remove redundant unit test

* upload model weights

* resolve todo

* simpler _mask_invalid_locations without lru_cache + backward compatible masked_fill_

* increase unittest coverage
This commit is contained in:
Iz Beltagy
2020-05-19 07:04:43 -07:00
committed by GitHub
parent 31eedff5a0
commit 8f1d047148
10 changed files with 1113 additions and 3 deletions

View File

@@ -44,6 +44,7 @@ from .configuration_electra import ELECTRA_PRETRAINED_CONFIG_ARCHIVE_MAP, Electr
from .configuration_encoder_decoder import EncoderDecoderConfig
from .configuration_flaubert import FLAUBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, FlaubertConfig
from .configuration_gpt2 import GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP, GPT2Config
from .configuration_longformer import LONGFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP, LongformerConfig
from .configuration_marian import MarianConfig
from .configuration_mmbt import MMBTConfig
from .configuration_openai import OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP, OpenAIGPTConfig
@@ -138,6 +139,7 @@ from .tokenization_distilbert import DistilBertTokenizer, DistilBertTokenizerFas
from .tokenization_electra import ElectraTokenizer, ElectraTokenizerFast
from .tokenization_flaubert import FlaubertTokenizer
from .tokenization_gpt2 import GPT2Tokenizer, GPT2TokenizerFast
from .tokenization_longformer import LongformerTokenizer
from .tokenization_openai import OpenAIGPTTokenizer, OpenAIGPTTokenizerFast
from .tokenization_reformer import ReformerTokenizer
from .tokenization_roberta import RobertaTokenizer, RobertaTokenizerFast
@@ -332,6 +334,8 @@ if is_torch_available():
REFORMER_PRETRAINED_MODEL_ARCHIVE_MAP,
)
from .modeling_longformer import LONGFORMER_PRETRAINED_MODEL_ARCHIVE_MAP, LongformerModel, LongformerForMaskedLM
# Optimization
from .optimization import (
AdamW,