Fixed all links. Removed TPU. Changed CLI to Converting TF models. Many minor formatting adjustments. Added "TODO Lysandre filled" where necessary.

2019-07-10 14:45:56 -04:00
parent 3f56ad5aff
commit f773faa258
19 changed files with 235 additions and 153 deletions
--- a/pytorch_transformers/tokenization_xlm.py
+++ b/pytorch_transformers/tokenization_xlm.py
@@ -77,10 +77,15 @@ def text_standardize(text):
 class XLMTokenizer(PreTrainedTokenizer):
    """
    BPE tokenizer for XLM, adapted from OpenAI BPE tokenizer. Peculiarities:
+
        - lower case all inputs
-        - uses SpaCy tokenizer and ftfy for pre-BPE tokenization if they are installed, fallback to BERT's BasicTokenizer if not.
-        - argument special_tokens and function set_special_tokens:
-            can be used to add additional symbols (ex: "__classify__") to a vocabulary.
+
+        - uses `SpaCy tokenizer <https://spacy.io/api/tokenizer/>`_ and \
+        `ftfy <https://ftfy.readthedocs.io/en/latest/>`_ for pre-BPE tokenization if they are installed, \
+        fallback to BERT's BasicTokenizer if not.
+
+        - argument ``special_tokens`` and function ``set_special_tokens``, can be used to add additional symbols \
+        (ex: "__classify__") to a vocabulary.
    """
    vocab_files_names = VOCAB_FILES_NAMES
    pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP