[LongT5] Rename checkpoitns (#17700)

This commit is contained in:
Patrick von Platen
2022-06-14 14:10:50 +02:00
committed by GitHub
parent 3b29c9fdb7
commit 53496ac510
5 changed files with 26 additions and 26 deletions

View File

@@ -23,10 +23,10 @@ from ...utils import logging
logger = logging.get_logger(__name__) logger = logging.get_logger(__name__)
LONGT5_PRETRAINED_CONFIG_ARCHIVE_MAP = { LONGT5_PRETRAINED_CONFIG_ARCHIVE_MAP = {
"google/LongT5-Local-Base": "https://huggingface.co/google/LongT5-Local-Base/blob/main/config.json", "google/long-t5-local-base": "https://huggingface.co/google/long-t5-local-base/blob/main/config.json",
"google/LongT5-Local-Large": "https://huggingface.co/google/LongT5-Local-Large/blob/main/config.json", "google/long-t5-local-large": "https://huggingface.co/google/long-t5-local-large/blob/main/config.json",
"google/LongT5-TGlobal-Base": "https://huggingface.co/google/LongT5-TGlobal-Base/blob/main/config.json", "google/long-t5-tglobal-base": "https://huggingface.co/google/long-t5-tglobal-base/blob/main/config.json",
"google/LongT5-TGlobal-Large": "https://huggingface.co/google/LongT5-TGlobal-Large/blob/main/config.json", "google/long-t5-tglobal-large": "https://huggingface.co/google/long-t5-tglobal-large/blob/main/config.json",
} }
@@ -35,7 +35,7 @@ class LongT5Config(PretrainedConfig):
This is the configuration class to store the configuration of a [`LongT5Model`] or a [`FlaxLongT5Model`]. It is This is the configuration class to store the configuration of a [`LongT5Model`] or a [`FlaxLongT5Model`]. It is
used to instantiate a LongT5 model according to the specified arguments, defining the model architecture. used to instantiate a LongT5 model according to the specified arguments, defining the model architecture.
Instantiating a configuration with the defaults will yield a similar configuration to that of the LongT5 Instantiating a configuration with the defaults will yield a similar configuration to that of the LongT5
[google/LongT5-Local-Base](https://huggingface.co/google/LongT5-Local-Base) architecture. [google/long-t5-local-base](https://huggingface.co/google/long-t5-local-base) architecture.
Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
documentation from [`PretrainedConfig`] for more information. documentation from [`PretrainedConfig`] for more information.

View File

@@ -49,7 +49,7 @@ from .configuration_longt5 import LongT5Config
logger = logging.get_logger(__name__) logger = logging.get_logger(__name__)
_CHECKPOINT_FOR_DOC = "google/LongT5-Local-Base" _CHECKPOINT_FOR_DOC = "google/long-t5-local-base"
_CONFIG_FOR_DOC = "LongT5Config" _CONFIG_FOR_DOC = "LongT5Config"
_TOKENIZER_FOR_DOC = "T5Tokenizer" _TOKENIZER_FOR_DOC = "T5Tokenizer"
@@ -1799,7 +1799,7 @@ class FlaxLongT5PreTrainedModel(FlaxPreTrainedModel):
>>> from transformers import T5Tokenizer, FlaxLongT5ForConditionalGeneration >>> from transformers import T5Tokenizer, FlaxLongT5ForConditionalGeneration
>>> tokenizer = T5Tokenizer.from_pretrained("t5-base") >>> tokenizer = T5Tokenizer.from_pretrained("t5-base")
>>> model = FlaxLongT5ForConditionalGeneration.from_pretrained("google/LongT5-Local-Base") >>> model = FlaxLongT5ForConditionalGeneration.from_pretrained("google/long-t5-local-base")
>>> text = "My friends are cool but they eat too many carbs." >>> text = "My friends are cool but they eat too many carbs."
>>> inputs = tokenizer(text, return_tensors="np") >>> inputs = tokenizer(text, return_tensors="np")
@@ -1861,7 +1861,7 @@ class FlaxLongT5PreTrainedModel(FlaxPreTrainedModel):
>>> import jax.numpy as jnp >>> import jax.numpy as jnp
>>> tokenizer = T5Tokenizer.from_pretrained("t5-base") >>> tokenizer = T5Tokenizer.from_pretrained("t5-base")
>>> model = FlaxLongT5ForConditionalGeneration.from_pretrained("google/LongT5-Local-Base") >>> model = FlaxLongT5ForConditionalGeneration.from_pretrained("google/long-t5-local-base")
>>> text = "My friends are cool but they eat too many carbs." >>> text = "My friends are cool but they eat too many carbs."
>>> inputs = tokenizer(text, return_tensors="np") >>> inputs = tokenizer(text, return_tensors="np")
@@ -2080,7 +2080,7 @@ FLAX_LONGT5_MODEL_DOCSTRING = """
>>> from transformers import T5Tokenizer, FlaxLongT5Model >>> from transformers import T5Tokenizer, FlaxLongT5Model
>>> tokenizer = T5Tokenizer.from_pretrained("t5-base") >>> tokenizer = T5Tokenizer.from_pretrained("t5-base")
>>> model = FlaxLongT5Model.from_pretrained("google/LongT5-Local-Base") >>> model = FlaxLongT5Model.from_pretrained("google/long-t5-local-base")
>>> input_ids = tokenizer( >>> input_ids = tokenizer(
... "Studies have been shown that owning a dog is good for you", return_tensors="np" ... "Studies have been shown that owning a dog is good for you", return_tensors="np"
@@ -2233,7 +2233,7 @@ class FlaxLongT5ForConditionalGeneration(FlaxLongT5PreTrainedModel):
>>> import jax.numpy as jnp >>> import jax.numpy as jnp
>>> tokenizer = T5Tokenizer.from_pretrained("t5-base") >>> tokenizer = T5Tokenizer.from_pretrained("t5-base")
>>> model = FlaxLongT5ForConditionalGeneration.from_pretrained("google/LongT5-Local-Base") >>> model = FlaxLongT5ForConditionalGeneration.from_pretrained("google/long-t5-local-base")
>>> text = "summarize: My friends are cool but they eat too many carbs." >>> text = "summarize: My friends are cool but they eat too many carbs."
>>> inputs = tokenizer(text, return_tensors="np") >>> inputs = tokenizer(text, return_tensors="np")
@@ -2381,7 +2381,7 @@ FLAX_LONGT5_CONDITIONAL_GENERATION_DOCSTRING = """
>>> from transformers import T5Tokenizer, FlaxLongT5ForConditionalGeneration >>> from transformers import T5Tokenizer, FlaxLongT5ForConditionalGeneration
>>> tokenizer = T5Tokenizer.from_pretrained("t5-base") >>> tokenizer = T5Tokenizer.from_pretrained("t5-base")
>>> model = FlaxLongT5ForConditionalGeneration.from_pretrained("google/LongT5-Local-Base") >>> model = FlaxLongT5ForConditionalGeneration.from_pretrained("google/long-t5-local-base")
>>> ARTICLE_TO_SUMMARIZE = "summarize: My friends are cool but they eat too many carbs." >>> ARTICLE_TO_SUMMARIZE = "summarize: My friends are cool but they eat too many carbs."
>>> inputs = tokenizer([ARTICLE_TO_SUMMARIZE], return_tensors="np") >>> inputs = tokenizer([ARTICLE_TO_SUMMARIZE], return_tensors="np")

View File

@@ -49,14 +49,14 @@ logger = logging.get_logger(__name__)
_CONFIG_FOR_DOC = "LongT5Config" _CONFIG_FOR_DOC = "LongT5Config"
_TOKENIZER_FOR_DOC = "T5Tokenizer" _TOKENIZER_FOR_DOC = "T5Tokenizer"
_CHECKPOINT_FOR_DOC = "google/LongT5-Local-Base" _CHECKPOINT_FOR_DOC = "google/long-t5-local-base"
# TODO: Update before the merge # TODO: Update before the merge
LONGT5_PRETRAINED_MODEL_ARCHIVE_LIST = [ LONGT5_PRETRAINED_MODEL_ARCHIVE_LIST = [
"google/LongT5-Local-Base", "google/long-t5-local-base",
"google/LongT5-Local-Large", "google/long-t5-local-large",
"google/LongT5-TGlobal-Base", "google/long-t5-tglobal-base",
"google/LongT5-TGlobal-Large", "google/long-t5-tglobal-large",
] ]
@@ -1797,8 +1797,8 @@ class LongT5Model(LongT5PreTrainedModel):
```python ```python
>>> from transformers import T5Tokenizer, LongT5Model >>> from transformers import T5Tokenizer, LongT5Model
>>> tokenizer = T5Tokenizer.from_pretrained("google/LongT5-Local-Base") >>> tokenizer = T5Tokenizer.from_pretrained("google/long-t5-local-base")
>>> model = LongT5Model.from_pretrained("google/LongT5-Local-Base") >>> model = LongT5Model.from_pretrained("google/long-t5-local-base")
>>> # Let's try a very long encoder input. >>> # Let's try a very long encoder input.
>>> input_ids = tokenizer( >>> input_ids = tokenizer(
@@ -2169,8 +2169,8 @@ class LongT5EncoderModel(LongT5PreTrainedModel):
```python ```python
>>> from transformers import AutoTokenizer, LongT5ForConditionalGeneration >>> from transformers import AutoTokenizer, LongT5ForConditionalGeneration
>>> tokenizer = AutoTokenizer.from_pretrained("google/LongT5-Local-Base") >>> tokenizer = AutoTokenizer.from_pretrained("google/long-t5-local-base")
>>> model = LongT5EncoderModel.from_pretrained("google/LongT5-Local-Base") >>> model = LongT5EncoderModel.from_pretrained("google/long-t5-local-base")
>>> input_ids = tokenizer( >>> input_ids = tokenizer(
... 100 * "Studies have been shown that owning a dog is good for you ", return_tensors="pt" ... 100 * "Studies have been shown that owning a dog is good for you ", return_tensors="pt"
... ).input_ids # Batch size 1 ... ).input_ids # Batch size 1

View File

@@ -68,7 +68,7 @@ class LongT5ModelTester:
decoder_start_token_id=0, decoder_start_token_id=0,
scope=None, scope=None,
decoder_layers=None, decoder_layers=None,
large_model_config_path="google/LongT5-Local-Large", large_model_config_path="google/long-t5-local-large",
): ):
self.parent = parent self.parent = parent
@@ -755,7 +755,7 @@ class LongT5ModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase
class LongT5TGlobalModelTest(LongT5ModelTest): class LongT5TGlobalModelTest(LongT5ModelTest):
def setUp(self): def setUp(self):
self.model_tester = LongT5ModelTester( self.model_tester = LongT5ModelTester(
self, encoder_attention_type="transient-global", large_model_config_path="google/LongT5-TGlobal-Large" self, encoder_attention_type="transient-global", large_model_config_path="google/long-t5-tglobal-large"
) )
self.config_tester = ConfigTester(self, config_class=LongT5Config, d_model=37) self.config_tester = ConfigTester(self, config_class=LongT5Config, d_model=37)
@@ -912,7 +912,7 @@ class LongT5EncoderOnlyModelTester:
eos_token_id=1, eos_token_id=1,
pad_token_id=0, pad_token_id=0,
scope=None, scope=None,
large_model_config_path="google/LongT5-Local-Large", large_model_config_path="google/long-t5-local-large",
): ):
self.parent = parent self.parent = parent
@@ -1095,7 +1095,7 @@ class LongT5EncoderOnlyModelTest(ModelTesterMixin, unittest.TestCase):
class LongT5EncoderOnlyTGlobalModelTest(LongT5EncoderOnlyModelTest): class LongT5EncoderOnlyTGlobalModelTest(LongT5EncoderOnlyModelTest):
def setUp(self): def setUp(self):
self.model_tester = LongT5EncoderOnlyModelTester( self.model_tester = LongT5EncoderOnlyModelTester(
self, encoder_attention_type="transient-global", large_model_config_path="google/LongT5-TGlobal-Large" self, encoder_attention_type="transient-global", large_model_config_path="google/long-t5-tglobal-large"
) )
self.config_tester = ConfigTester(self, config_class=LongT5Config, d_model=37) self.config_tester = ConfigTester(self, config_class=LongT5Config, d_model=37)

View File

@@ -213,8 +213,8 @@ PYTORCH_EXPORT_SEQ2SEQ_WITH_PAST_MODELS = {
("blenderbot-small", "facebook/blenderbot_small-90M"), ("blenderbot-small", "facebook/blenderbot_small-90M"),
("blenderbot", "facebook/blenderbot-400M-distill"), ("blenderbot", "facebook/blenderbot-400M-distill"),
("bigbird-pegasus", "google/bigbird-pegasus-large-arxiv"), ("bigbird-pegasus", "google/bigbird-pegasus-large-arxiv"),
("longt5", "google/LongT5-Local-Base"), ("longt5", "google/long-t5-local-base"),
("longt5", "google/LongT5-TGlobal-Base"), ("longt5", "google/long-t5-tglobal-base"),
} }
# TODO(lewtun): Include the same model types in `PYTORCH_EXPORT_MODELS` once TensorFlow has parity with the PyTorch model implementations. # TODO(lewtun): Include the same model types in `PYTORCH_EXPORT_MODELS` once TensorFlow has parity with the PyTorch model implementations.