LongformerTokenizerFast (#4547)
This commit is contained in:
@@ -139,7 +139,7 @@ from .tokenization_distilbert import DistilBertTokenizer, DistilBertTokenizerFas
|
|||||||
from .tokenization_electra import ElectraTokenizer, ElectraTokenizerFast
|
from .tokenization_electra import ElectraTokenizer, ElectraTokenizerFast
|
||||||
from .tokenization_flaubert import FlaubertTokenizer
|
from .tokenization_flaubert import FlaubertTokenizer
|
||||||
from .tokenization_gpt2 import GPT2Tokenizer, GPT2TokenizerFast
|
from .tokenization_gpt2 import GPT2Tokenizer, GPT2TokenizerFast
|
||||||
from .tokenization_longformer import LongformerTokenizer
|
from .tokenization_longformer import LongformerTokenizer, LongformerTokenizerFast
|
||||||
from .tokenization_openai import OpenAIGPTTokenizer, OpenAIGPTTokenizerFast
|
from .tokenization_openai import OpenAIGPTTokenizer, OpenAIGPTTokenizerFast
|
||||||
from .tokenization_reformer import ReformerTokenizer
|
from .tokenization_reformer import ReformerTokenizer
|
||||||
from .tokenization_roberta import RobertaTokenizer, RobertaTokenizerFast
|
from .tokenization_roberta import RobertaTokenizer, RobertaTokenizerFast
|
||||||
|
|||||||
@@ -15,7 +15,7 @@
|
|||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from .tokenization_roberta import RobertaTokenizer
|
from .tokenization_roberta import RobertaTokenizer, RobertaTokenizerFast
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@@ -40,3 +40,12 @@ class LongformerTokenizer(RobertaTokenizer):
|
|||||||
"vocab_file": {m: vocab_url for m in _all_longformer_models},
|
"vocab_file": {m: vocab_url for m in _all_longformer_models},
|
||||||
"merges_file": {m: merges_url for m in _all_longformer_models},
|
"merges_file": {m: merges_url for m in _all_longformer_models},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class LongformerTokenizerFast(RobertaTokenizerFast):
|
||||||
|
# merges and vocab same as Roberta
|
||||||
|
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
|
||||||
|
pretrained_vocab_files_map = {
|
||||||
|
"vocab_file": {m: vocab_url for m in _all_longformer_models},
|
||||||
|
"merges_file": {m: merges_url for m in _all_longformer_models},
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user