From 53496ac5107d0b4af2d84e9c71d70b79954a32d9 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Tue, 14 Jun 2022 14:10:50 +0200 Subject: [PATCH] [LongT5] Rename checkpoitns (#17700) --- .../models/longt5/configuration_longt5.py | 10 +++++----- .../models/longt5/modeling_flax_longt5.py | 12 ++++++------ .../models/longt5/modeling_longt5.py | 18 +++++++++--------- tests/models/longt5/test_modeling_longt5.py | 8 ++++---- tests/onnx/test_onnx_v2.py | 4 ++-- 5 files changed, 26 insertions(+), 26 deletions(-) diff --git a/src/transformers/models/longt5/configuration_longt5.py b/src/transformers/models/longt5/configuration_longt5.py index e120055bb4..705fdc4939 100644 --- a/src/transformers/models/longt5/configuration_longt5.py +++ b/src/transformers/models/longt5/configuration_longt5.py @@ -23,10 +23,10 @@ from ...utils import logging logger = logging.get_logger(__name__) LONGT5_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "google/LongT5-Local-Base": "https://huggingface.co/google/LongT5-Local-Base/blob/main/config.json", - "google/LongT5-Local-Large": "https://huggingface.co/google/LongT5-Local-Large/blob/main/config.json", - "google/LongT5-TGlobal-Base": "https://huggingface.co/google/LongT5-TGlobal-Base/blob/main/config.json", - "google/LongT5-TGlobal-Large": "https://huggingface.co/google/LongT5-TGlobal-Large/blob/main/config.json", + "google/long-t5-local-base": "https://huggingface.co/google/long-t5-local-base/blob/main/config.json", + "google/long-t5-local-large": "https://huggingface.co/google/long-t5-local-large/blob/main/config.json", + "google/long-t5-tglobal-base": "https://huggingface.co/google/long-t5-tglobal-base/blob/main/config.json", + "google/long-t5-tglobal-large": "https://huggingface.co/google/long-t5-tglobal-large/blob/main/config.json", } @@ -35,7 +35,7 @@ class LongT5Config(PretrainedConfig): This is the configuration class to store the configuration of a [`LongT5Model`] or a [`FlaxLongT5Model`]. It is used to instantiate a LongT5 model according to the specified arguments, defining the model architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of the LongT5 - [google/LongT5-Local-Base](https://huggingface.co/google/LongT5-Local-Base) architecture. + [google/long-t5-local-base](https://huggingface.co/google/long-t5-local-base) architecture. Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the documentation from [`PretrainedConfig`] for more information. diff --git a/src/transformers/models/longt5/modeling_flax_longt5.py b/src/transformers/models/longt5/modeling_flax_longt5.py index db9e34f8e5..8ea0b38bb4 100644 --- a/src/transformers/models/longt5/modeling_flax_longt5.py +++ b/src/transformers/models/longt5/modeling_flax_longt5.py @@ -49,7 +49,7 @@ from .configuration_longt5 import LongT5Config logger = logging.get_logger(__name__) -_CHECKPOINT_FOR_DOC = "google/LongT5-Local-Base" +_CHECKPOINT_FOR_DOC = "google/long-t5-local-base" _CONFIG_FOR_DOC = "LongT5Config" _TOKENIZER_FOR_DOC = "T5Tokenizer" @@ -1799,7 +1799,7 @@ class FlaxLongT5PreTrainedModel(FlaxPreTrainedModel): >>> from transformers import T5Tokenizer, FlaxLongT5ForConditionalGeneration >>> tokenizer = T5Tokenizer.from_pretrained("t5-base") - >>> model = FlaxLongT5ForConditionalGeneration.from_pretrained("google/LongT5-Local-Base") + >>> model = FlaxLongT5ForConditionalGeneration.from_pretrained("google/long-t5-local-base") >>> text = "My friends are cool but they eat too many carbs." >>> inputs = tokenizer(text, return_tensors="np") @@ -1861,7 +1861,7 @@ class FlaxLongT5PreTrainedModel(FlaxPreTrainedModel): >>> import jax.numpy as jnp >>> tokenizer = T5Tokenizer.from_pretrained("t5-base") - >>> model = FlaxLongT5ForConditionalGeneration.from_pretrained("google/LongT5-Local-Base") + >>> model = FlaxLongT5ForConditionalGeneration.from_pretrained("google/long-t5-local-base") >>> text = "My friends are cool but they eat too many carbs." >>> inputs = tokenizer(text, return_tensors="np") @@ -2080,7 +2080,7 @@ FLAX_LONGT5_MODEL_DOCSTRING = """ >>> from transformers import T5Tokenizer, FlaxLongT5Model >>> tokenizer = T5Tokenizer.from_pretrained("t5-base") - >>> model = FlaxLongT5Model.from_pretrained("google/LongT5-Local-Base") + >>> model = FlaxLongT5Model.from_pretrained("google/long-t5-local-base") >>> input_ids = tokenizer( ... "Studies have been shown that owning a dog is good for you", return_tensors="np" @@ -2233,7 +2233,7 @@ class FlaxLongT5ForConditionalGeneration(FlaxLongT5PreTrainedModel): >>> import jax.numpy as jnp >>> tokenizer = T5Tokenizer.from_pretrained("t5-base") - >>> model = FlaxLongT5ForConditionalGeneration.from_pretrained("google/LongT5-Local-Base") + >>> model = FlaxLongT5ForConditionalGeneration.from_pretrained("google/long-t5-local-base") >>> text = "summarize: My friends are cool but they eat too many carbs." >>> inputs = tokenizer(text, return_tensors="np") @@ -2381,7 +2381,7 @@ FLAX_LONGT5_CONDITIONAL_GENERATION_DOCSTRING = """ >>> from transformers import T5Tokenizer, FlaxLongT5ForConditionalGeneration >>> tokenizer = T5Tokenizer.from_pretrained("t5-base") - >>> model = FlaxLongT5ForConditionalGeneration.from_pretrained("google/LongT5-Local-Base") + >>> model = FlaxLongT5ForConditionalGeneration.from_pretrained("google/long-t5-local-base") >>> ARTICLE_TO_SUMMARIZE = "summarize: My friends are cool but they eat too many carbs." >>> inputs = tokenizer([ARTICLE_TO_SUMMARIZE], return_tensors="np") diff --git a/src/transformers/models/longt5/modeling_longt5.py b/src/transformers/models/longt5/modeling_longt5.py index 292a54c64b..9b7d2c62c4 100644 --- a/src/transformers/models/longt5/modeling_longt5.py +++ b/src/transformers/models/longt5/modeling_longt5.py @@ -49,14 +49,14 @@ logger = logging.get_logger(__name__) _CONFIG_FOR_DOC = "LongT5Config" _TOKENIZER_FOR_DOC = "T5Tokenizer" -_CHECKPOINT_FOR_DOC = "google/LongT5-Local-Base" +_CHECKPOINT_FOR_DOC = "google/long-t5-local-base" # TODO: Update before the merge LONGT5_PRETRAINED_MODEL_ARCHIVE_LIST = [ - "google/LongT5-Local-Base", - "google/LongT5-Local-Large", - "google/LongT5-TGlobal-Base", - "google/LongT5-TGlobal-Large", + "google/long-t5-local-base", + "google/long-t5-local-large", + "google/long-t5-tglobal-base", + "google/long-t5-tglobal-large", ] @@ -1797,8 +1797,8 @@ class LongT5Model(LongT5PreTrainedModel): ```python >>> from transformers import T5Tokenizer, LongT5Model - >>> tokenizer = T5Tokenizer.from_pretrained("google/LongT5-Local-Base") - >>> model = LongT5Model.from_pretrained("google/LongT5-Local-Base") + >>> tokenizer = T5Tokenizer.from_pretrained("google/long-t5-local-base") + >>> model = LongT5Model.from_pretrained("google/long-t5-local-base") >>> # Let's try a very long encoder input. >>> input_ids = tokenizer( @@ -2169,8 +2169,8 @@ class LongT5EncoderModel(LongT5PreTrainedModel): ```python >>> from transformers import AutoTokenizer, LongT5ForConditionalGeneration - >>> tokenizer = AutoTokenizer.from_pretrained("google/LongT5-Local-Base") - >>> model = LongT5EncoderModel.from_pretrained("google/LongT5-Local-Base") + >>> tokenizer = AutoTokenizer.from_pretrained("google/long-t5-local-base") + >>> model = LongT5EncoderModel.from_pretrained("google/long-t5-local-base") >>> input_ids = tokenizer( ... 100 * "Studies have been shown that owning a dog is good for you ", return_tensors="pt" ... ).input_ids # Batch size 1 diff --git a/tests/models/longt5/test_modeling_longt5.py b/tests/models/longt5/test_modeling_longt5.py index e6716b7fbc..d0bf1ac804 100644 --- a/tests/models/longt5/test_modeling_longt5.py +++ b/tests/models/longt5/test_modeling_longt5.py @@ -68,7 +68,7 @@ class LongT5ModelTester: decoder_start_token_id=0, scope=None, decoder_layers=None, - large_model_config_path="google/LongT5-Local-Large", + large_model_config_path="google/long-t5-local-large", ): self.parent = parent @@ -755,7 +755,7 @@ class LongT5ModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase class LongT5TGlobalModelTest(LongT5ModelTest): def setUp(self): self.model_tester = LongT5ModelTester( - self, encoder_attention_type="transient-global", large_model_config_path="google/LongT5-TGlobal-Large" + self, encoder_attention_type="transient-global", large_model_config_path="google/long-t5-tglobal-large" ) self.config_tester = ConfigTester(self, config_class=LongT5Config, d_model=37) @@ -912,7 +912,7 @@ class LongT5EncoderOnlyModelTester: eos_token_id=1, pad_token_id=0, scope=None, - large_model_config_path="google/LongT5-Local-Large", + large_model_config_path="google/long-t5-local-large", ): self.parent = parent @@ -1095,7 +1095,7 @@ class LongT5EncoderOnlyModelTest(ModelTesterMixin, unittest.TestCase): class LongT5EncoderOnlyTGlobalModelTest(LongT5EncoderOnlyModelTest): def setUp(self): self.model_tester = LongT5EncoderOnlyModelTester( - self, encoder_attention_type="transient-global", large_model_config_path="google/LongT5-TGlobal-Large" + self, encoder_attention_type="transient-global", large_model_config_path="google/long-t5-tglobal-large" ) self.config_tester = ConfigTester(self, config_class=LongT5Config, d_model=37) diff --git a/tests/onnx/test_onnx_v2.py b/tests/onnx/test_onnx_v2.py index ee7fba9659..3430bc9fbd 100644 --- a/tests/onnx/test_onnx_v2.py +++ b/tests/onnx/test_onnx_v2.py @@ -213,8 +213,8 @@ PYTORCH_EXPORT_SEQ2SEQ_WITH_PAST_MODELS = { ("blenderbot-small", "facebook/blenderbot_small-90M"), ("blenderbot", "facebook/blenderbot-400M-distill"), ("bigbird-pegasus", "google/bigbird-pegasus-large-arxiv"), - ("longt5", "google/LongT5-Local-Base"), - ("longt5", "google/LongT5-TGlobal-Base"), + ("longt5", "google/long-t5-local-base"), + ("longt5", "google/long-t5-tglobal-base"), } # TODO(lewtun): Include the same model types in `PYTORCH_EXPORT_MODELS` once TensorFlow has parity with the PyTorch model implementations.