GPT Neo few fixes (#10968)
* fix checkpoint names * auto model * fix doc
This commit is contained in:
@@ -31,8 +31,8 @@ The :obj:`generate()` method can be used to generate text using GPT Neo model.
|
|||||||
.. code-block::
|
.. code-block::
|
||||||
|
|
||||||
>>> from transformers import GPTNeoForCausalLM, GPT2Tokenizer
|
>>> from transformers import GPTNeoForCausalLM, GPT2Tokenizer
|
||||||
>>> model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt_neo_xl")
|
>>> model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")
|
||||||
>>> tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt_neo_xl")
|
>>> tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")
|
||||||
|
|
||||||
>>> prompt = "In a shocking finding, scientists discovered a herd of unicorns living in a remote, " \
|
>>> prompt = "In a shocking finding, scientists discovered a herd of unicorns living in a remote, " \
|
||||||
... "previously unexplored valley, in the Andes Mountains. Even more surprising to the " \
|
... "previously unexplored valley, in the Andes Mountains. Even more surprising to the " \
|
||||||
|
|||||||
@@ -139,10 +139,10 @@ For the full list, refer to `https://huggingface.co/models <https://huggingface.
|
|||||||
| | ``gpt2-xl`` | | 48-layer, 1600-hidden, 25-heads, 1558M parameters. |
|
| | ``gpt2-xl`` | | 48-layer, 1600-hidden, 25-heads, 1558M parameters. |
|
||||||
| | | | OpenAI's XL-sized GPT-2 English model |
|
| | | | OpenAI's XL-sized GPT-2 English model |
|
||||||
+--------------------+------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+
|
+--------------------+------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+
|
||||||
| GPTNeo | ``EleutherAI/gpt_neo_xl`` | | 24-layer, 2048-hidden, 16-heads, 1.3B parameters. |
|
| GPTNeo | ``EleutherAI/gpt-neo-1.3B`` | | 24-layer, 2048-hidden, 16-heads, 1.3B parameters. |
|
||||||
| | | | EleutherAI's GPT-3 like language model. |
|
| | | | EleutherAI's GPT-3 like language model. |
|
||||||
| +------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+
|
| +------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+
|
||||||
| | ``EleutherAI/gpt_neo_2.7B`` | | 32-layer, 2560-hidden, 20-heads, 2.7B parameters. |
|
| | ``EleutherAI/gpt-neo-2.7B`` | | 32-layer, 2560-hidden, 20-heads, 2.7B parameters. |
|
||||||
| | | | EleutherAI's GPT-3 like language model. |
|
| | | | EleutherAI's GPT-3 like language model. |
|
||||||
+--------------------+------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+
|
+--------------------+------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+
|
||||||
| Transformer-XL | ``transfo-xl-wt103`` | | 18-layer, 1024-hidden, 16-heads, 257M parameters. |
|
| Transformer-XL | ``transfo-xl-wt103`` | | 18-layer, 1024-hidden, 16-heads, 257M parameters. |
|
||||||
|
|||||||
@@ -418,6 +418,7 @@ MODEL_FOR_PRETRAINING_MAPPING = OrderedDict(
|
|||||||
MODEL_WITH_LM_HEAD_MAPPING = OrderedDict(
|
MODEL_WITH_LM_HEAD_MAPPING = OrderedDict(
|
||||||
[
|
[
|
||||||
# Model with LM heads mapping
|
# Model with LM heads mapping
|
||||||
|
(GPTNeoConfig, GPTNeoForCausalLM),
|
||||||
(BigBirdConfig, BigBirdForMaskedLM),
|
(BigBirdConfig, BigBirdForMaskedLM),
|
||||||
(Speech2TextConfig, Speech2TextForConditionalGeneration),
|
(Speech2TextConfig, Speech2TextForConditionalGeneration),
|
||||||
(Wav2Vec2Config, Wav2Vec2ForMaskedLM),
|
(Wav2Vec2Config, Wav2Vec2ForMaskedLM),
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ from ...utils import logging
|
|||||||
logger = logging.get_logger(__name__)
|
logger = logging.get_logger(__name__)
|
||||||
|
|
||||||
GPT_NEO_PRETRAINED_CONFIG_ARCHIVE_MAP = {
|
GPT_NEO_PRETRAINED_CONFIG_ARCHIVE_MAP = {
|
||||||
"EleutherAI/gpt_neo_xl": "https://huggingface.co/EleutherAI/gpt_neo_xl/resolve/main/config.json",
|
"EleutherAI/gpt-neo-1.3B": "https://huggingface.co/EleutherAI/gpt-neo-1.3B/resolve/main/config.json",
|
||||||
# See all GPTNeo models at https://huggingface.co/models?filter=gpt_neo
|
# See all GPTNeo models at https://huggingface.co/models?filter=gpt_neo
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -30,8 +30,8 @@ class GPTNeoConfig(PretrainedConfig):
|
|||||||
r"""
|
r"""
|
||||||
This is the configuration class to store the configuration of a :class:`~transformers.GPTNeoModel`. It is used to
|
This is the configuration class to store the configuration of a :class:`~transformers.GPTNeoModel`. It is used to
|
||||||
instantiate a GPT Neo model according to the specified arguments, defining the model architecture. Instantiating a
|
instantiate a GPT Neo model according to the specified arguments, defining the model architecture. Instantiating a
|
||||||
configuration with the defaults will yield a similar configuration to that of the GPTNeo `gpt_neo_xl
|
configuration with the defaults will yield a similar configuration to that of the GPTNeo `gpt-neo-1.3B
|
||||||
<https://huggingface.co/EleutherAI/gpt_neo_xl>`__ architecture.
|
<https://huggingface.co/EleutherAI/gpt-neo-1.3B>`__ architecture.
|
||||||
|
|
||||||
Configuration objects inherit from :class:`~transformers.PretrainedConfig` and can be used to control the model
|
Configuration objects inherit from :class:`~transformers.PretrainedConfig` and can be used to control the model
|
||||||
outputs. Read the documentation from :class:`~transformers.PretrainedConfig` for more information.
|
outputs. Read the documentation from :class:`~transformers.PretrainedConfig` for more information.
|
||||||
@@ -81,10 +81,10 @@ class GPTNeoConfig(PretrainedConfig):
|
|||||||
|
|
||||||
>>> from transformers import GPTNeoModel, GPTNeoConfig
|
>>> from transformers import GPTNeoModel, GPTNeoConfig
|
||||||
|
|
||||||
>>> # Initializing a GPTNeo EleutherAI/gpt_neo_xl style configuration
|
>>> # Initializing a GPTNeo EleutherAI/gpt-neo-1.3B style configuration
|
||||||
>>> configuration = GPTNeoConfig()
|
>>> configuration = GPTNeoConfig()
|
||||||
|
|
||||||
>>> # Initializing a model from the EleutherAI/gpt_neo_xl style configuration
|
>>> # Initializing a model from the EleutherAI/gpt-neo-1.3B style configuration
|
||||||
>>> model = GPTNeoModel(configuration)
|
>>> model = GPTNeoModel(configuration)
|
||||||
|
|
||||||
>>> # Accessing the model configuration
|
>>> # Accessing the model configuration
|
||||||
|
|||||||
@@ -43,11 +43,11 @@ _CONFIG_FOR_DOC = "GPTNeoConfig"
|
|||||||
_TOKENIZER_FOR_DOC = "GPT2Tokenizer"
|
_TOKENIZER_FOR_DOC = "GPT2Tokenizer"
|
||||||
|
|
||||||
GPT_NEO_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
GPT_NEO_PRETRAINED_MODEL_ARCHIVE_LIST = [
|
||||||
"EleutherAI/gpt_neo_xl",
|
"EleutherAI/gpt-neo-1.3B",
|
||||||
# See all GPTNeo models at https://huggingface.co/models?filter=gpt_neo
|
# See all GPTNeo models at https://huggingface.co/models?filter=gpt_neo
|
||||||
]
|
]
|
||||||
|
|
||||||
_CHECKPOINT_FOR_DOC = "EleutherAI/gpt_neo_xl"
|
_CHECKPOINT_FOR_DOC = "EleutherAI/gpt-neo-1.3B"
|
||||||
|
|
||||||
|
|
||||||
def load_tf_weights_in_gpt_neo(model, config, gpt_neo_checkpoint_path):
|
def load_tf_weights_in_gpt_neo(model, config, gpt_neo_checkpoint_path):
|
||||||
|
|||||||
@@ -35,6 +35,7 @@ class TextGenerationPipeline(Pipeline):
|
|||||||
"TransfoXLLMHeadModel",
|
"TransfoXLLMHeadModel",
|
||||||
"ReformerModelWithLMHead",
|
"ReformerModelWithLMHead",
|
||||||
"GPT2LMHeadModel",
|
"GPT2LMHeadModel",
|
||||||
|
"GPTNeoForCausalLM",
|
||||||
"OpenAIGPTLMHeadModel",
|
"OpenAIGPTLMHeadModel",
|
||||||
"CTRLLMHeadModel",
|
"CTRLLMHeadModel",
|
||||||
"TFXLNetLMHeadModel",
|
"TFXLNetLMHeadModel",
|
||||||
|
|||||||
@@ -432,7 +432,7 @@ class GPTNeoModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase
|
|||||||
|
|
||||||
@slow
|
@slow
|
||||||
def test_batch_generation(self):
|
def test_batch_generation(self):
|
||||||
model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt_neo_xl")
|
model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")
|
||||||
model.to(torch_device)
|
model.to(torch_device)
|
||||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
||||||
|
|
||||||
@@ -486,7 +486,7 @@ class GPTNeoModelLanguageGenerationTest(unittest.TestCase):
|
|||||||
@slow
|
@slow
|
||||||
def test_lm_generate_gpt_neo(self):
|
def test_lm_generate_gpt_neo(self):
|
||||||
for checkpointing in [True, False]:
|
for checkpointing in [True, False]:
|
||||||
model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt_neo_xl", gradient_checkpointing=checkpointing)
|
model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B", gradient_checkpointing=checkpointing)
|
||||||
model.to(torch_device)
|
model.to(torch_device)
|
||||||
input_ids = torch.tensor([[464, 3290]], dtype=torch.long, device=torch_device) # The dog
|
input_ids = torch.tensor([[464, 3290]], dtype=torch.long, device=torch_device) # The dog
|
||||||
# fmt: off
|
# fmt: off
|
||||||
@@ -497,8 +497,8 @@ class GPTNeoModelLanguageGenerationTest(unittest.TestCase):
|
|||||||
|
|
||||||
@slow
|
@slow
|
||||||
def test_gpt_neo_sample(self):
|
def test_gpt_neo_sample(self):
|
||||||
tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt_neo_xl")
|
tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")
|
||||||
model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt_neo_xl")
|
model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")
|
||||||
model.to(torch_device)
|
model.to(torch_device)
|
||||||
|
|
||||||
torch.manual_seed(0)
|
torch.manual_seed(0)
|
||||||
|
|||||||
Reference in New Issue
Block a user