LongformerTokenizerFast (#4547)
This commit is contained in:
@@ -15,7 +15,7 @@
|
||||
|
||||
import logging
|
||||
|
||||
from .tokenization_roberta import RobertaTokenizer
|
||||
from .tokenization_roberta import RobertaTokenizer, RobertaTokenizerFast
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -40,3 +40,12 @@ class LongformerTokenizer(RobertaTokenizer):
|
||||
"vocab_file": {m: vocab_url for m in _all_longformer_models},
|
||||
"merges_file": {m: merges_url for m in _all_longformer_models},
|
||||
}
|
||||
|
||||
|
||||
class LongformerTokenizerFast(RobertaTokenizerFast):
|
||||
# merges and vocab same as Roberta
|
||||
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
|
||||
pretrained_vocab_files_map = {
|
||||
"vocab_file": {m: vocab_url for m in _all_longformer_models},
|
||||
"merges_file": {m: merges_url for m in _all_longformer_models},
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user