From 529a16dec6cc9bfcf8954a1b16546960f2fab6fa Mon Sep 17 00:00:00 2001 From: LysandreJik Date: Mon, 26 Aug 2019 15:00:43 -0400 Subject: [PATCH] Generic encoding implementation. --- pytorch_transformers/tokenization_utils.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pytorch_transformers/tokenization_utils.py b/pytorch_transformers/tokenization_utils.py index 2fb7f87e9c..3596711bdb 100644 --- a/pytorch_transformers/tokenization_utils.py +++ b/pytorch_transformers/tokenization_utils.py @@ -593,10 +593,12 @@ class PreTrainedTokenizer(object): return first_sentence_tokens, second_sentence_tokens def add_special_tokens_single_sentence(self, token_ids): - raise NotImplementedError + logger.warning("This tokenizer does not make use of special tokens. The sequence has been returned with no modification.") + return token_ids def add_special_tokens_sentences_pair(self, token_ids_0, token_ids_1): - raise NotImplementedError + logger.warning("This tokenizer does not make use of special tokens. The two sequences have been concatenated.") + return token_ids_0 + token_ids_1 def convert_ids_to_tokens(self, ids, skip_special_tokens=False): """ Converts a single index or a sequence of indices (integers) in a token "