diff --git a/examples/contrib/run_openai_gpt.py b/examples/contrib/run_openai_gpt.py index 4033940b2d..1c9fba8ee8 100644 --- a/examples/contrib/run_openai_gpt.py +++ b/examples/contrib/run_openai_gpt.py @@ -153,9 +153,11 @@ def main(): # This loading functions also add new tokens and embeddings called `special tokens` # These new embeddings will be fine-tuned on the RocStories dataset special_tokens = ['_start_', '_delimiter_', '_classify_'] - tokenizer = OpenAIGPTTokenizer.from_pretrained(args.model_name, special_tokens=special_tokens) - special_tokens_ids = list(tokenizer.convert_tokens_to_ids(token) for token in special_tokens) - model = OpenAIGPTDoubleHeadsModel.from_pretrained(args.model_name, num_special_tokens=len(special_tokens)) + tokenizer = OpenAIGPTTokenizer.from_pretrained(args.model_name) + tokenizer.add_tokens(special_tokens) + special_tokens_ids = tokenizer.convert_tokens_to_ids(special_tokens) + model = OpenAIGPTDoubleHeadsModel.from_pretrained(args.model_name) + model.resize_token_embeddings(len(tokenizer)) model.to(device) # Load and encode the datasets