GPT Neo few fixes (#10968)
* fix checkpoint names * auto model * fix doc
This commit is contained in:
@@ -31,8 +31,8 @@ The :obj:`generate()` method can be used to generate text using GPT Neo model.
|
||||
.. code-block::
|
||||
|
||||
>>> from transformers import GPTNeoForCausalLM, GPT2Tokenizer
|
||||
>>> model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt_neo_xl")
|
||||
>>> tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt_neo_xl")
|
||||
>>> model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")
|
||||
>>> tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")
|
||||
|
||||
>>> prompt = "In a shocking finding, scientists discovered a herd of unicorns living in a remote, " \
|
||||
... "previously unexplored valley, in the Andes Mountains. Even more surprising to the " \
|
||||
|
||||
@@ -139,10 +139,10 @@ For the full list, refer to `https://huggingface.co/models <https://huggingface.
|
||||
| | ``gpt2-xl`` | | 48-layer, 1600-hidden, 25-heads, 1558M parameters. |
|
||||
| | | | OpenAI's XL-sized GPT-2 English model |
|
||||
+--------------------+------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+
|
||||
| GPTNeo | ``EleutherAI/gpt_neo_xl`` | | 24-layer, 2048-hidden, 16-heads, 1.3B parameters. |
|
||||
| GPTNeo | ``EleutherAI/gpt-neo-1.3B`` | | 24-layer, 2048-hidden, 16-heads, 1.3B parameters. |
|
||||
| | | | EleutherAI's GPT-3 like language model. |
|
||||
| +------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+
|
||||
| | ``EleutherAI/gpt_neo_2.7B`` | | 32-layer, 2560-hidden, 20-heads, 2.7B parameters. |
|
||||
| | ``EleutherAI/gpt-neo-2.7B`` | | 32-layer, 2560-hidden, 20-heads, 2.7B parameters. |
|
||||
| | | | EleutherAI's GPT-3 like language model. |
|
||||
+--------------------+------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+
|
||||
| Transformer-XL | ``transfo-xl-wt103`` | | 18-layer, 1024-hidden, 16-heads, 257M parameters. |
|
||||
|
||||
@@ -418,6 +418,7 @@ MODEL_FOR_PRETRAINING_MAPPING = OrderedDict(
|
||||
MODEL_WITH_LM_HEAD_MAPPING = OrderedDict(
|
||||
[
|
||||
# Model with LM heads mapping
|
||||
(GPTNeoConfig, GPTNeoForCausalLM),
|
||||
(BigBirdConfig, BigBirdForMaskedLM),
|
||||
(Speech2TextConfig, Speech2TextForConditionalGeneration),
|
||||
(Wav2Vec2Config, Wav2Vec2ForMaskedLM),
|
||||
|
||||
@@ -21,7 +21,7 @@ from ...utils import logging
|
||||
logger = logging.get_logger(__name__)
|
||||
|
||||
GPT_NEO_PRETRAINED_CONFIG_ARCHIVE_MAP = {
|
||||
"EleutherAI/gpt_neo_xl": "https://huggingface.co/EleutherAI/gpt_neo_xl/resolve/main/config.json",
|
||||
"EleutherAI/gpt-neo-1.3B": "https://huggingface.co/EleutherAI/gpt-neo-1.3B/resolve/main/config.json",
|
||||
# See all GPTNeo models at https://huggingface.co/models?filter=gpt_neo
|
||||
}
|
||||
|
||||
@@ -30,8 +30,8 @@ class GPTNeoConfig(PretrainedConfig):
|
||||
r"""
|
||||
This is the configuration class to store the configuration of a :class:`~transformers.GPTNeoModel`. It is used to
|
||||
instantiate a GPT Neo model according to the specified arguments, defining the model architecture. Instantiating a
|
||||
configuration with the defaults will yield a similar configuration to that of the GPTNeo `gpt_neo_xl
|
||||
<https://huggingface.co/EleutherAI/gpt_neo_xl>`__ architecture.
|
||||
configuration with the defaults will yield a similar configuration to that of the GPTNeo `gpt-neo-1.3B
|
||||
<https://huggingface.co/EleutherAI/gpt-neo-1.3B>`__ architecture.
|
||||
|
||||
Configuration objects inherit from :class:`~transformers.PretrainedConfig` and can be used to control the model
|
||||
outputs. Read the documentation from :class:`~transformers.PretrainedConfig` for more information.
|
||||
@@ -81,10 +81,10 @@ class GPTNeoConfig(PretrainedConfig):
|
||||
|
||||
>>> from transformers import GPTNeoModel, GPTNeoConfig
|
||||
|
||||
>>> # Initializing a GPTNeo EleutherAI/gpt_neo_xl style configuration
|
||||
>>> # Initializing a GPTNeo EleutherAI/gpt-neo-1.3B style configuration
|
||||
>>> configuration = GPTNeoConfig()
|
||||
|
||||
>>> # Initializing a model from the EleutherAI/gpt_neo_xl style configuration
|
||||
>>> # Initializing a model from the EleutherAI/gpt-neo-1.3B style configuration
|
||||
>>> model = GPTNeoModel(configuration)
|
||||
|
||||
>>> # Accessing the model configuration
|
||||
|
||||
@@ -43,11 +43,11 @@ _CONFIG_FOR_DOC = "GPTNeoConfig"
|
||||
_TOKENIZER_FOR_DOC = "GPT2Tokenizer"
|
||||
|
||||
GPT_NEO_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
||||
"EleutherAI/gpt_neo_xl",
|
||||
"EleutherAI/gpt-neo-1.3B",
|
||||
# See all GPTNeo models at https://huggingface.co/models?filter=gpt_neo
|
||||
]
|
||||
|
||||
_CHECKPOINT_FOR_DOC = "EleutherAI/gpt_neo_xl"
|
||||
_CHECKPOINT_FOR_DOC = "EleutherAI/gpt-neo-1.3B"
|
||||
|
||||
|
||||
def load_tf_weights_in_gpt_neo(model, config, gpt_neo_checkpoint_path):
|
||||
|
||||
@@ -35,6 +35,7 @@ class TextGenerationPipeline(Pipeline):
|
||||
"TransfoXLLMHeadModel",
|
||||
"ReformerModelWithLMHead",
|
||||
"GPT2LMHeadModel",
|
||||
"GPTNeoForCausalLM",
|
||||
"OpenAIGPTLMHeadModel",
|
||||
"CTRLLMHeadModel",
|
||||
"TFXLNetLMHeadModel",
|
||||
|
||||
@@ -432,7 +432,7 @@ class GPTNeoModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase
|
||||
|
||||
@slow
|
||||
def test_batch_generation(self):
|
||||
model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt_neo_xl")
|
||||
model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")
|
||||
model.to(torch_device)
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
||||
|
||||
@@ -486,7 +486,7 @@ class GPTNeoModelLanguageGenerationTest(unittest.TestCase):
|
||||
@slow
|
||||
def test_lm_generate_gpt_neo(self):
|
||||
for checkpointing in [True, False]:
|
||||
model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt_neo_xl", gradient_checkpointing=checkpointing)
|
||||
model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B", gradient_checkpointing=checkpointing)
|
||||
model.to(torch_device)
|
||||
input_ids = torch.tensor([[464, 3290]], dtype=torch.long, device=torch_device) # The dog
|
||||
# fmt: off
|
||||
@@ -497,8 +497,8 @@ class GPTNeoModelLanguageGenerationTest(unittest.TestCase):
|
||||
|
||||
@slow
|
||||
def test_gpt_neo_sample(self):
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt_neo_xl")
|
||||
model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt_neo_xl")
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")
|
||||
model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")
|
||||
model.to(torch_device)
|
||||
|
||||
torch.manual_seed(0)
|
||||
|
||||
Reference in New Issue
Block a user