Tokenizers should be framework agnostic (#8599)

* Tokenizers should be framework agnostic * Run the slow tests * Not testing * Fix documentation * Apply suggestions from code review Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com> Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com>
2020-11-17 14:03:03 -05:00
parent 7f3b41a306
commit 3095ee9dab
28 changed files with 73 additions and 177 deletions
--- a/scripts/fsmt/fsmt-make-super-tiny-model.py
+++ b/scripts/fsmt/fsmt-make-super-tiny-model.py
@@ -58,7 +58,7 @@ tiny_model = FSMTForConditionalGeneration(config)
 print(f"num of params {tiny_model.num_parameters()}")

 # Test
-batch = tokenizer.prepare_seq2seq_batch(["Making tiny model"])
+batch = tokenizer.prepare_seq2seq_batch(["Making tiny model"], return_tensors="pt")
 outputs = tiny_model(**batch)

 print("test output:", len(outputs.logits[0]))
--- a/scripts/fsmt/fsmt-make-tiny-model.py
+++ b/scripts/fsmt/fsmt-make-tiny-model.py
@@ -29,7 +29,7 @@ tiny_model = FSMTForConditionalGeneration(config)
 print(f"num of params {tiny_model.num_parameters()}")

 # Test
-batch = tokenizer.prepare_seq2seq_batch(["Making tiny model"])
+batch = tokenizer.prepare_seq2seq_batch(["Making tiny model"], return_tensors="pt")
 outputs = tiny_model(**batch)

 print("test output:", len(outputs.logits[0]))