Add "Leveraging Pretrained Checkpoints for Generation" Seq2Seq models. (#6594)

* add conversion script * improve conversion script * make style * add tryout files * fix * update * add causal bert * better names * add tokenizer file as well * finish causal_bert * fix small bugs * improve generate * change naming * renaming * renaming * renaming * remove leftover files * clean files * add fix tokenizer * finalize * correct slow test * update docs * small fixes * fix link * adapt check repo * apply sams and sylvains recommendations * fix import * implement Lysandres recommendations * fix logger warn
2020-09-10 16:40:51 +02:00
parent d1691d90e5
commit 7fd1febf38
20 changed files with 1508 additions and 9 deletions
--- a/tests/test_tokenization_t5.py
+++ b/tests/test_tokenization_t5.py
@@ -21,11 +21,12 @@ from transformers import BatchEncoding
 from transformers.file_utils import cached_property
 from transformers.testing_utils import _torch_available
 from transformers.tokenization_t5 import T5Tokenizer
-from transformers.tokenization_xlnet import SPIECE_UNDERLINE

 from .test_tokenization_common import TokenizerTesterMixin


+SPIECE_UNDERLINE = "▁"
+
 SAMPLE_VOCAB = os.path.join(os.path.dirname(os.path.abspath(__file__)), "fixtures/test_sentencepiece.model")

 FRAMEWORK = "pt" if _torch_available else "tf"