[tokenizers] Updates data processors, docstring, examples and model cards to the new API (#5308)

* remove references to old API in docstring - update data processors * style * fix tests - better type checking error messages * better type checking * include awesome fix by @LysandreJik for #5310 * updated doc and examples
2020-06-26 19:48:14 +02:00
parent fd405e9a93
commit 601d4d699c
73 changed files with 180 additions and 138 deletions
--- a/examples/seq2seq/run_eval.py
+++ b/examples/seq2seq/run_eval.py
@@ -45,9 +45,9 @@ def generate_summaries_or_translations(
    for batch in tqdm(list(chunks(examples, batch_size))):
        if "t5" in model_name:
            batch = [model.config.prefix + text for text in batch]
-        batch = tokenizer.batch_encode_plus(
-            batch, max_length=1024, return_tensors="pt", truncation=True, pad_to_max_length=True
-        ).to(device)
+        batch = tokenizer(batch, max_length=1024, return_tensors="pt", truncation=True, padding="max_length").to(
+            device
+        )
        summaries = model.generate(**batch, **gen_kwargs)
        dec = tokenizer.batch_decode(summaries, skip_special_tokens=True, clean_up_tokenization_spaces=False)
        for hypothesis in dec: