diff --git a/.circleci/deploy.sh b/.circleci/deploy.sh index 0f5e171c22..8eed414e88 100755 --- a/.circleci/deploy.sh +++ b/.circleci/deploy.sh @@ -56,5 +56,5 @@ deploy_doc "eb0e0ce" v3.4.0 deploy_doc "818878d" v3.5.1 deploy_doc "c781171" v4.0.0 deploy_doc "bfa4ccf" v4.1.1 -deploy_doc "7d9a9d0" # v4.2.1 Latest stable release +deploy_doc "7d9a9d0" # v4.2.2 Latest stable release deploy_doc "4cd22r1" v4.3.0 # Pre-release \ No newline at end of file diff --git a/docs/source/_static/js/custom.js b/docs/source/_static/js/custom.js index 0e9b0a2b70..b5d1265490 100644 --- a/docs/source/_static/js/custom.js +++ b/docs/source/_static/js/custom.js @@ -1,11 +1,11 @@ // These two things need to be updated at each release for the version selector. // Last stable version -const stableVersion = "v4.2.1" +const stableVersion = "v4.2.2" // Dictionary doc folder to label. The last stable version should have an empty key. const versionMapping = { "master": "master", "v4.3.0": "v4.3.0 (pre)", - "": "v4.2.0/v4.2.1 (stable)", + "": "v4.2.0/v4.2.1/v4.2.2 (stable)", "v4.1.1": "v4.1.0/v4.1.1", "v4.0.1": "v4.0.0/v4.0.1", "v3.5.1": "v3.5.0/v3.5.1", diff --git a/docs/source/main_classes/tokenizer.rst b/docs/source/main_classes/tokenizer.rst index a676b6081d..3bd9b3a966 100644 --- a/docs/source/main_classes/tokenizer.rst +++ b/docs/source/main_classes/tokenizer.rst @@ -54,9 +54,9 @@ PreTrainedTokenizer .. autoclass:: transformers.PreTrainedTokenizer :special-members: __call__ - :members: - - .. automethod:: encode + :members: batch_decode, convert_ids_to_tokens, convert_tokens_to_ids, convert_tokens_to_string, decode, encode, + get_added_vocab, get_special_tokens_mask, num_special_tokens_to_add, prepare_for_tokenization, tokenize, + vocab_size PreTrainedTokenizerFast @@ -64,9 +64,9 @@ PreTrainedTokenizerFast .. autoclass:: transformers.PreTrainedTokenizerFast :special-members: __call__ - :members: - - .. automethod:: encode + :members: batch_decode, convert_ids_to_tokens, convert_tokens_to_ids, convert_tokens_to_string, decode, encode, + get_added_vocab, get_special_tokens_mask, num_special_tokens_to_add, + set_truncation_and_padding,tokenize, vocab_size BatchEncoding diff --git a/src/transformers/tokenization_utils.py b/src/transformers/tokenization_utils.py index 0f2880e2ed..58aa2848ad 100644 --- a/src/transformers/tokenization_utils.py +++ b/src/transformers/tokenization_utils.py @@ -101,7 +101,7 @@ def _is_start_of_word(text): return bool(_is_control(first_char) | _is_punctuation(first_char) | _is_whitespace(first_char)) -@add_end_docstrings(INIT_TOKENIZER_DOCSTRING, """ .. automethod:: __call__""") +@add_end_docstrings(INIT_TOKENIZER_DOCSTRING) class PreTrainedTokenizer(PreTrainedTokenizerBase): """ Base class for all slow tokenizers. diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py index 425942668c..85cee8f2c3 100644 --- a/src/transformers/tokenization_utils_base.py +++ b/src/transformers/tokenization_utils_base.py @@ -2056,7 +2056,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin): Whether or not to add the special tokens associated with the corresponding model. kwargs (additional keyword arguments, `optional`): Will be passed to the underlying model specific encode method. See details in - :meth:`~transformers.PreTrainedTokenizer.__call__` + :meth:`~transformers.PreTrainedTokenizerBase.__call__` Returns: :obj:`List[str]`: The list of tokens. diff --git a/src/transformers/tokenization_utils_fast.py b/src/transformers/tokenization_utils_fast.py index 4ee82a4552..34cc039908 100644 --- a/src/transformers/tokenization_utils_fast.py +++ b/src/transformers/tokenization_utils_fast.py @@ -56,12 +56,7 @@ TOKENIZER_CONFIG_FILE = "tokenizer_config.json" ADDED_TOKENS_FILE = "added_tokens.json" -@add_end_docstrings( - INIT_TOKENIZER_DOCSTRING, - """ - .. automethod:: __call__ - """, -) +@add_end_docstrings(INIT_TOKENIZER_DOCSTRING) class PreTrainedTokenizerFast(PreTrainedTokenizerBase): """ Base class for all fast tokenizers (wrapping HuggingFace tokenizers library).