From 83d38c9ff387a7b4ffe40082a3f8c036bf0eddd7 Mon Sep 17 00:00:00 2001
From: Suraj Patil <surajp815@gmail.com>
Date: Tue, 30 Mar 2021 20:45:55 +0530
Subject: [PATCH] GPT Neo few fixes (#10968)

* fix checkpoint names

* auto model

* fix doc
---
 docs/source/model_doc/gpt_neo.rst                      |  4 ++--
 docs/source/pretrained_models.rst                      |  4 ++--
 src/transformers/models/auto/modeling_auto.py          |  1 +
 .../models/gpt_neo/configuration_gpt_neo.py            | 10 +++++-----
 src/transformers/models/gpt_neo/modeling_gpt_neo.py    |  4 ++--
 src/transformers/pipelines/text_generation.py          |  1 +
 tests/test_modeling_gpt_neo.py                         |  8 ++++----
 7 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/docs/source/model_doc/gpt_neo.rst b/docs/source/model_doc/gpt_neo.rst
index e7a3732913..652c613a34 100644
--- a/docs/source/model_doc/gpt_neo.rst
+++ b/docs/source/model_doc/gpt_neo.rst
@@ -31,8 +31,8 @@ The :obj:`generate()` method can be used to generate text using GPT Neo model.
 .. code-block::
 
     >>> from transformers import GPTNeoForCausalLM, GPT2Tokenizer
-    >>> model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt_neo_xl")
-    >>> tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt_neo_xl")
+    >>> model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")
+    >>> tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")
 
     >>> prompt = "In a shocking finding, scientists discovered a herd of unicorns living in a remote, " \
     ...          "previously unexplored valley, in the Andes Mountains. Even more surprising to the " \
diff --git a/docs/source/pretrained_models.rst b/docs/source/pretrained_models.rst
index f8bcef0586..090e50f5ba 100644
--- a/docs/source/pretrained_models.rst
+++ b/docs/source/pretrained_models.rst
@@ -139,10 +139,10 @@ For the full list, refer to `https://huggingface.co/models <https://huggingface.
 |                    | ``gpt2-xl``                                                | | 48-layer, 1600-hidden, 25-heads, 1558M parameters.                                                                                  |
 |                    |                                                            | | OpenAI's XL-sized GPT-2 English model                                                                                               |
 +--------------------+------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+
-| GPTNeo             | ``EleutherAI/gpt_neo_xl``                                  | | 24-layer, 2048-hidden, 16-heads, 1.3B parameters.                                                                                   |
+| GPTNeo             | ``EleutherAI/gpt-neo-1.3B``                                | | 24-layer, 2048-hidden, 16-heads, 1.3B parameters.                                                                                   |
 |                    |                                                            | | EleutherAI's GPT-3 like language model.                                                                                             |
 |                    +------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+
-|                    | ``EleutherAI/gpt_neo_2.7B``                                | | 32-layer, 2560-hidden, 20-heads, 2.7B parameters.                                                                                   |
+|                    | ``EleutherAI/gpt-neo-2.7B``                                | | 32-layer, 2560-hidden, 20-heads, 2.7B parameters.                                                                                   |
 |                    |                                                            | | EleutherAI's GPT-3 like language model.                                                                                             |
 +--------------------+------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+
 | Transformer-XL     | ``transfo-xl-wt103``                                       | | 18-layer, 1024-hidden, 16-heads, 257M parameters.                                                                                   |
diff --git a/src/transformers/models/auto/modeling_auto.py b/src/transformers/models/auto/modeling_auto.py
index 9317883cbd..2dce0c1a75 100644
--- a/src/transformers/models/auto/modeling_auto.py
+++ b/src/transformers/models/auto/modeling_auto.py
@@ -418,6 +418,7 @@ MODEL_FOR_PRETRAINING_MAPPING = OrderedDict(
 MODEL_WITH_LM_HEAD_MAPPING = OrderedDict(
     [
         # Model with LM heads mapping
+        (GPTNeoConfig, GPTNeoForCausalLM),
         (BigBirdConfig, BigBirdForMaskedLM),
         (Speech2TextConfig, Speech2TextForConditionalGeneration),
         (Wav2Vec2Config, Wav2Vec2ForMaskedLM),
diff --git a/src/transformers/models/gpt_neo/configuration_gpt_neo.py b/src/transformers/models/gpt_neo/configuration_gpt_neo.py
index 8c2de843b2..37df09ae7d 100644
--- a/src/transformers/models/gpt_neo/configuration_gpt_neo.py
+++ b/src/transformers/models/gpt_neo/configuration_gpt_neo.py
@@ -21,7 +21,7 @@ from ...utils import logging
 logger = logging.get_logger(__name__)
 
 GPT_NEO_PRETRAINED_CONFIG_ARCHIVE_MAP = {
-    "EleutherAI/gpt_neo_xl": "https://huggingface.co/EleutherAI/gpt_neo_xl/resolve/main/config.json",
+    "EleutherAI/gpt-neo-1.3B": "https://huggingface.co/EleutherAI/gpt-neo-1.3B/resolve/main/config.json",
     # See all GPTNeo models at https://huggingface.co/models?filter=gpt_neo
 }
 
@@ -30,8 +30,8 @@ class GPTNeoConfig(PretrainedConfig):
     r"""
     This is the configuration class to store the configuration of a :class:`~transformers.GPTNeoModel`. It is used to
     instantiate a GPT Neo model according to the specified arguments, defining the model architecture. Instantiating a
-    configuration with the defaults will yield a similar configuration to that of the GPTNeo `gpt_neo_xl
-    <https://huggingface.co/EleutherAI/gpt_neo_xl>`__ architecture.
+    configuration with the defaults will yield a similar configuration to that of the GPTNeo `gpt-neo-1.3B
+    <https://huggingface.co/EleutherAI/gpt-neo-1.3B>`__ architecture.
 
     Configuration objects inherit from :class:`~transformers.PretrainedConfig` and can be used to control the model
     outputs. Read the documentation from :class:`~transformers.PretrainedConfig` for more information.
@@ -81,10 +81,10 @@ class GPTNeoConfig(PretrainedConfig):
 
         >>> from transformers import GPTNeoModel, GPTNeoConfig
 
-        >>> # Initializing a GPTNeo EleutherAI/gpt_neo_xl style configuration
+        >>> # Initializing a GPTNeo EleutherAI/gpt-neo-1.3B style configuration
         >>> configuration = GPTNeoConfig()
 
-        >>> # Initializing a model from the EleutherAI/gpt_neo_xl style configuration
+        >>> # Initializing a model from the EleutherAI/gpt-neo-1.3B style configuration
         >>> model = GPTNeoModel(configuration)
 
         >>> # Accessing the model configuration
diff --git a/src/transformers/models/gpt_neo/modeling_gpt_neo.py b/src/transformers/models/gpt_neo/modeling_gpt_neo.py
index 8903e41d25..7abaa9c7aa 100755
--- a/src/transformers/models/gpt_neo/modeling_gpt_neo.py
+++ b/src/transformers/models/gpt_neo/modeling_gpt_neo.py
@@ -43,11 +43,11 @@ _CONFIG_FOR_DOC = "GPTNeoConfig"
 _TOKENIZER_FOR_DOC = "GPT2Tokenizer"
 
 GPT_NEO_PRETRAINED_MODEL_ARCHIVE_LIST = [
-    "EleutherAI/gpt_neo_xl",
+    "EleutherAI/gpt-neo-1.3B",
     # See all GPTNeo models at https://huggingface.co/models?filter=gpt_neo
 ]
 
-_CHECKPOINT_FOR_DOC = "EleutherAI/gpt_neo_xl"
+_CHECKPOINT_FOR_DOC = "EleutherAI/gpt-neo-1.3B"
 
 
 def load_tf_weights_in_gpt_neo(model, config, gpt_neo_checkpoint_path):
diff --git a/src/transformers/pipelines/text_generation.py b/src/transformers/pipelines/text_generation.py
index 12c1e3b4a4..1f98d37479 100644
--- a/src/transformers/pipelines/text_generation.py
+++ b/src/transformers/pipelines/text_generation.py
@@ -35,6 +35,7 @@ class TextGenerationPipeline(Pipeline):
         "TransfoXLLMHeadModel",
         "ReformerModelWithLMHead",
         "GPT2LMHeadModel",
+        "GPTNeoForCausalLM",
         "OpenAIGPTLMHeadModel",
         "CTRLLMHeadModel",
         "TFXLNetLMHeadModel",
diff --git a/tests/test_modeling_gpt_neo.py b/tests/test_modeling_gpt_neo.py
index bea0ee7764..023a9d265e 100644
--- a/tests/test_modeling_gpt_neo.py
+++ b/tests/test_modeling_gpt_neo.py
@@ -432,7 +432,7 @@ class GPTNeoModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase
 
     @slow
     def test_batch_generation(self):
-        model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt_neo_xl")
+        model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")
         model.to(torch_device)
         tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
 
@@ -486,7 +486,7 @@ class GPTNeoModelLanguageGenerationTest(unittest.TestCase):
     @slow
     def test_lm_generate_gpt_neo(self):
         for checkpointing in [True, False]:
-            model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt_neo_xl", gradient_checkpointing=checkpointing)
+            model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B", gradient_checkpointing=checkpointing)
             model.to(torch_device)
             input_ids = torch.tensor([[464, 3290]], dtype=torch.long, device=torch_device)  # The dog
             # fmt: off
@@ -497,8 +497,8 @@ class GPTNeoModelLanguageGenerationTest(unittest.TestCase):
 
     @slow
     def test_gpt_neo_sample(self):
-        tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt_neo_xl")
-        model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt_neo_xl")
+        tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")
+        model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")
         model.to(torch_device)
 
         torch.manual_seed(0)