[tokenizers] Updates data processors, docstring, examples and model cards to the new API (#5308)

* remove references to old API in docstring - update data processors

* style

* fix tests - better type checking error messages

* better type checking

* include awesome fix by @LysandreJik for #5310

* updated doc and examples
This commit is contained in:
Thomas Wolf
2020-06-26 19:48:14 +02:00
committed by GitHub
parent fd405e9a93
commit 601d4d699c
73 changed files with 180 additions and 138 deletions

View File

@@ -375,10 +375,11 @@ class T5ModelIntegrationTests(unittest.TestCase):
summarization_config = task_specific_config.get("summarization", {})
model.config.update(summarization_config)
dct = tok.batch_encode_plus(
dct = tok(
[model.config.prefix + x for x in [FRANCE_ARTICLE, SHORTER_ARTICLE, IRAN_ARTICLE, ARTICLE_SUBWAY]],
max_length=512,
pad_to_max_length=True,
padding="max_length",
truncation=True,
return_tensors="pt",
)
self.assertEqual(512, dct["input_ids"].shape[1])