From 83d38c9ff387a7b4ffe40082a3f8c036bf0eddd7 Mon Sep 17 00:00:00 2001 From: Suraj Patil Date: Tue, 30 Mar 2021 20:45:55 +0530 Subject: [PATCH] GPT Neo few fixes (#10968) * fix checkpoint names * auto model * fix doc --- docs/source/model_doc/gpt_neo.rst | 4 ++-- docs/source/pretrained_models.rst | 4 ++-- src/transformers/models/auto/modeling_auto.py | 1 + .../models/gpt_neo/configuration_gpt_neo.py | 10 +++++----- src/transformers/models/gpt_neo/modeling_gpt_neo.py | 4 ++-- src/transformers/pipelines/text_generation.py | 1 + tests/test_modeling_gpt_neo.py | 8 ++++---- 7 files changed, 17 insertions(+), 15 deletions(-) diff --git a/docs/source/model_doc/gpt_neo.rst b/docs/source/model_doc/gpt_neo.rst index e7a3732913..652c613a34 100644 --- a/docs/source/model_doc/gpt_neo.rst +++ b/docs/source/model_doc/gpt_neo.rst @@ -31,8 +31,8 @@ The :obj:`generate()` method can be used to generate text using GPT Neo model. .. code-block:: >>> from transformers import GPTNeoForCausalLM, GPT2Tokenizer - >>> model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt_neo_xl") - >>> tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt_neo_xl") + >>> model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B") + >>> tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B") >>> prompt = "In a shocking finding, scientists discovered a herd of unicorns living in a remote, " \ ... "previously unexplored valley, in the Andes Mountains. Even more surprising to the " \ diff --git a/docs/source/pretrained_models.rst b/docs/source/pretrained_models.rst index f8bcef0586..090e50f5ba 100644 --- a/docs/source/pretrained_models.rst +++ b/docs/source/pretrained_models.rst @@ -139,10 +139,10 @@ For the full list, refer to `https://huggingface.co/models `__ architecture. + configuration with the defaults will yield a similar configuration to that of the GPTNeo `gpt-neo-1.3B + `__ architecture. Configuration objects inherit from :class:`~transformers.PretrainedConfig` and can be used to control the model outputs. Read the documentation from :class:`~transformers.PretrainedConfig` for more information. @@ -81,10 +81,10 @@ class GPTNeoConfig(PretrainedConfig): >>> from transformers import GPTNeoModel, GPTNeoConfig - >>> # Initializing a GPTNeo EleutherAI/gpt_neo_xl style configuration + >>> # Initializing a GPTNeo EleutherAI/gpt-neo-1.3B style configuration >>> configuration = GPTNeoConfig() - >>> # Initializing a model from the EleutherAI/gpt_neo_xl style configuration + >>> # Initializing a model from the EleutherAI/gpt-neo-1.3B style configuration >>> model = GPTNeoModel(configuration) >>> # Accessing the model configuration diff --git a/src/transformers/models/gpt_neo/modeling_gpt_neo.py b/src/transformers/models/gpt_neo/modeling_gpt_neo.py index 8903e41d25..7abaa9c7aa 100755 --- a/src/transformers/models/gpt_neo/modeling_gpt_neo.py +++ b/src/transformers/models/gpt_neo/modeling_gpt_neo.py @@ -43,11 +43,11 @@ _CONFIG_FOR_DOC = "GPTNeoConfig" _TOKENIZER_FOR_DOC = "GPT2Tokenizer" GPT_NEO_PRETRAINED_MODEL_ARCHIVE_LIST = [ - "EleutherAI/gpt_neo_xl", + "EleutherAI/gpt-neo-1.3B", # See all GPTNeo models at https://huggingface.co/models?filter=gpt_neo ] -_CHECKPOINT_FOR_DOC = "EleutherAI/gpt_neo_xl" +_CHECKPOINT_FOR_DOC = "EleutherAI/gpt-neo-1.3B" def load_tf_weights_in_gpt_neo(model, config, gpt_neo_checkpoint_path): diff --git a/src/transformers/pipelines/text_generation.py b/src/transformers/pipelines/text_generation.py index 12c1e3b4a4..1f98d37479 100644 --- a/src/transformers/pipelines/text_generation.py +++ b/src/transformers/pipelines/text_generation.py @@ -35,6 +35,7 @@ class TextGenerationPipeline(Pipeline): "TransfoXLLMHeadModel", "ReformerModelWithLMHead", "GPT2LMHeadModel", + "GPTNeoForCausalLM", "OpenAIGPTLMHeadModel", "CTRLLMHeadModel", "TFXLNetLMHeadModel", diff --git a/tests/test_modeling_gpt_neo.py b/tests/test_modeling_gpt_neo.py index bea0ee7764..023a9d265e 100644 --- a/tests/test_modeling_gpt_neo.py +++ b/tests/test_modeling_gpt_neo.py @@ -432,7 +432,7 @@ class GPTNeoModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase @slow def test_batch_generation(self): - model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt_neo_xl") + model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B") model.to(torch_device) tokenizer = GPT2Tokenizer.from_pretrained("gpt2") @@ -486,7 +486,7 @@ class GPTNeoModelLanguageGenerationTest(unittest.TestCase): @slow def test_lm_generate_gpt_neo(self): for checkpointing in [True, False]: - model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt_neo_xl", gradient_checkpointing=checkpointing) + model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B", gradient_checkpointing=checkpointing) model.to(torch_device) input_ids = torch.tensor([[464, 3290]], dtype=torch.long, device=torch_device) # The dog # fmt: off @@ -497,8 +497,8 @@ class GPTNeoModelLanguageGenerationTest(unittest.TestCase): @slow def test_gpt_neo_sample(self): - tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt_neo_xl") - model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt_neo_xl") + tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B") + model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B") model.to(torch_device) torch.manual_seed(0)