From 94785906309fb1154f63da16b053ee9416e04c7b Mon Sep 17 00:00:00 2001 From: Denny Date: Fri, 27 Sep 2019 15:18:42 -0300 Subject: [PATCH] Update run_lm_finetuning.py The previous method, just as phrased, did not exist in the class. --- examples/run_lm_finetuning.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/run_lm_finetuning.py b/examples/run_lm_finetuning.py index 8d440ebcc6..6e1a150313 100644 --- a/examples/run_lm_finetuning.py +++ b/examples/run_lm_finetuning.py @@ -75,7 +75,7 @@ class TextDataset(Dataset): tokenized_text = tokenizer.convert_tokens_to_ids(tokenizer.tokenize(text)) for i in range(0, len(tokenized_text)-block_size+1, block_size): # Truncate in block of block_size - self.examples.append(tokenizer.add_special_tokens_single_sentence(tokenized_text[i:i+block_size])) + self.examples.append(tokenizer.add_special_tokens_single_sequence(tokenized_text[i:i+block_size])) # Note that we are loosing the last truncated example here for the sake of simplicity (no padding) # If your dataset is small, first you should loook for a bigger one :-) and second you # can change this behavior by adding (model specific) padding.