From 7bddb45a6f2a6d2edf2bedde3817041a9d169d2b Mon Sep 17 00:00:00 2001 From: LysandreJik Date: Fri, 4 Oct 2019 14:27:38 -0400 Subject: [PATCH] Decode documentaton --- transformers/tokenization_utils.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/transformers/tokenization_utils.py b/transformers/tokenization_utils.py index de3f48f4c3..a712703190 100644 --- a/transformers/tokenization_utils.py +++ b/transformers/tokenization_utils.py @@ -912,6 +912,11 @@ class PreTrainedTokenizer(object): Converts a sequence of ids (integer) in a string, using the tokenizer and vocabulary with options to remove special tokens and clean up tokenization spaces. Similar to doing ``self.convert_tokens_to_string(self.convert_ids_to_tokens(token_ids))``. + + Args: + token_ids: list of tokenized input ids. Can be obtained using the `encode` or `encode_plus` methods. + skip_special_tokens: if set to True, will replace special tokens. + clean_up_tokenization_spaces: if set to True, will clean up the tokenization spaces. """ filtered_tokens = self.convert_ids_to_tokens(token_ids, skip_special_tokens=skip_special_tokens)