Add BlenderbotTokenizerFast (#13720)
* Add the support for the fast (rust) implementation of BlenbderbotTokenizer * Fix a converter and a typo in a doc * Apply the patil-suraj's suggestion * (Nitpick) Fast tokenization -> Fast Tokenization in doc * Apply the SaulLu's suggestion * Apply Narsil's suggestion to fix test pipelines * Add encoder_no_repeat_ngram_size according to the Narsil's suggestion * Revert the last (unnecessary) commit * Override pipeline config for Blenderbot to allow for larger pos. emb. * make fix-copies
This commit is contained in:
@@ -124,6 +124,11 @@ class PipelineTestCaseMeta(type):
|
||||
def test(self):
|
||||
if ModelClass.__name__.endswith("ForCausalLM"):
|
||||
tiny_config.is_encoder_decoder = False
|
||||
if hasattr(tiny_config, "encoder_no_repeat_ngram_size"):
|
||||
# specific for blenderbot which supports both decoder-only
|
||||
# encoder/decoder but the test config only reflects
|
||||
# encoder/decoder arch
|
||||
tiny_config.encoder_no_repeat_ngram_size = 0
|
||||
if ModelClass.__name__.endswith("WithLMHead"):
|
||||
tiny_config.is_decoder = True
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user