From 79444f370f855e36c97876b04831e0a6c94f007d Mon Sep 17 00:00:00 2001 From: Tom Aarsen <37621491+tomaarsen@users.noreply.github.com> Date: Thu, 20 Jul 2023 13:03:24 +0200 Subject: [PATCH] Deprecate unused OpenLlama architecture (#24922) * Resolve typo in check_repo.py * Specify encoding when opening modeling files * Deprecate the OpenLlama architecture * Add disclaimer pointing to Llama I'm open to different wordings here * Match the capitalisation of LLaMA --- docs/source/en/model_doc/open-llama.md | 15 + src/transformers/__init__.py | 22 +- src/transformers/models/__init__.py | 1 - .../models/auto/configuration_auto.py | 1 + .../{ => deprecated}/open_llama/__init__.py | 2 +- .../open_llama/configuration_open_llama.py | 4 +- .../open_llama/modeling_open_llama.py | 8 +- src/transformers/utils/dummy_pt_objects.py | 56 +-- tests/models/open_llama/__init__.py | 0 .../open_llama/test_modeling_open_llama.py | 370 ------------------ utils/check_config_attributes.py | 2 +- utils/check_repo.py | 2 +- 12 files changed, 64 insertions(+), 419 deletions(-) rename src/transformers/models/{ => deprecated}/open_llama/__init__.py (99%) rename src/transformers/models/{ => deprecated}/open_llama/configuration_open_llama.py (98%) rename src/transformers/models/{ => deprecated}/open_llama/modeling_open_llama.py (99%) delete mode 100644 tests/models/open_llama/__init__.py delete mode 100644 tests/models/open_llama/test_modeling_open_llama.py diff --git a/docs/source/en/model_doc/open-llama.md b/docs/source/en/model_doc/open-llama.md index 23d35b8057..c20ecb7f88 100644 --- a/docs/source/en/model_doc/open-llama.md +++ b/docs/source/en/model_doc/open-llama.md @@ -16,6 +16,21 @@ rendered properly in your Markdown viewer. # Open-Llama + + +This model is in maintenance mode only, so we won't accept any new PRs changing its code. + +If you run into any issues running this model, please reinstall the last version that supported this model: v4.31.0. +You can do so by running the following command: `pip install -U transformers==4.31.0`. + + + + + +This model differs from the [OpenLLaMA models](https://huggingface.co/models?search=openllama) on the Hugging Face Hub, which primarily use the [LLaMA](llama) architecture. + + + ## Overview The Open-Llama model was proposed in [Open-Llama project](https://github.com/s-JoL/Open-Llama) by community developer s-JoL. diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py index fdec70f4ac..1ebb680880 100644 --- a/src/transformers/__init__.py +++ b/src/transformers/__init__.py @@ -278,6 +278,7 @@ _import_structure = { "MCTCTProcessor", ], "models.deprecated.mmbt": ["MMBTConfig"], + "models.deprecated.open_llama": ["OPEN_LLAMA_PRETRAINED_CONFIG_ARCHIVE_MAP", "OpenLlamaConfig"], "models.deprecated.retribert": [ "RETRIBERT_PRETRAINED_CONFIG_ARCHIVE_MAP", "RetriBertConfig", @@ -445,7 +446,6 @@ _import_structure = { "NystromformerConfig", ], "models.oneformer": ["ONEFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP", "OneFormerConfig", "OneFormerProcessor"], - "models.open_llama": ["OPEN_LLAMA_PRETRAINED_CONFIG_ARCHIVE_MAP", "OpenLlamaConfig"], "models.openai": ["OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP", "OpenAIGPTConfig", "OpenAIGPTTokenizer"], "models.opt": ["OPTConfig"], "models.owlvit": [ @@ -1536,6 +1536,9 @@ else: ] ) _import_structure["models.deprecated.mmbt"].extend(["MMBTForClassification", "MMBTModel", "ModalEmbeddings"]) + _import_structure["models.deprecated.open_llama"].extend( + ["OpenLlamaForCausalLM", "OpenLlamaForSequenceClassification", "OpenLlamaModel", "OpenLlamaPreTrainedModel"] + ) _import_structure["models.deprecated.retribert"].extend( ["RETRIBERT_PRETRAINED_MODEL_ARCHIVE_LIST", "RetriBertModel", "RetriBertPreTrainedModel"] ) @@ -2300,9 +2303,6 @@ else: "OneFormerPreTrainedModel", ] ) - _import_structure["models.open_llama"].extend( - ["OpenLlamaForCausalLM", "OpenLlamaForSequenceClassification", "OpenLlamaModel", "OpenLlamaPreTrainedModel"] - ) _import_structure["models.openai"].extend( [ "OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_LIST", @@ -4239,6 +4239,7 @@ if TYPE_CHECKING: MCTCTProcessor, ) from .models.deprecated.mmbt import MMBTConfig + from .models.deprecated.open_llama import OPEN_LLAMA_PRETRAINED_CONFIG_ARCHIVE_MAP, OpenLlamaConfig from .models.deprecated.retribert import ( RETRIBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, RetriBertConfig, @@ -4390,7 +4391,6 @@ if TYPE_CHECKING: from .models.nllb_moe import NLLB_MOE_PRETRAINED_CONFIG_ARCHIVE_MAP, NllbMoeConfig from .models.nystromformer import NYSTROMFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP, NystromformerConfig from .models.oneformer import ONEFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP, OneFormerConfig, OneFormerProcessor - from .models.open_llama import OPEN_LLAMA_PRETRAINED_CONFIG_ARCHIVE_MAP, OpenLlamaConfig from .models.openai import OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP, OpenAIGPTConfig, OpenAIGPTTokenizer from .models.opt import OPTConfig from .models.owlvit import ( @@ -5334,6 +5334,12 @@ if TYPE_CHECKING: MCTCTPreTrainedModel, ) from .models.deprecated.mmbt import MMBTForClassification, MMBTModel, ModalEmbeddings + from .models.deprecated.open_llama import ( + OpenLlamaForCausalLM, + OpenLlamaForSequenceClassification, + OpenLlamaModel, + OpenLlamaPreTrainedModel, + ) from .models.deprecated.retribert import ( RETRIBERT_PRETRAINED_MODEL_ARCHIVE_LIST, RetriBertModel, @@ -5954,12 +5960,6 @@ if TYPE_CHECKING: OneFormerModel, OneFormerPreTrainedModel, ) - from .models.open_llama import ( - OpenLlamaForCausalLM, - OpenLlamaForSequenceClassification, - OpenLlamaModel, - OpenLlamaPreTrainedModel, - ) from .models.openai import ( OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_LIST, OpenAIGPTDoubleHeadsModel, diff --git a/src/transformers/models/__init__.py b/src/transformers/models/__init__.py index 853dd115b3..8649286b51 100644 --- a/src/transformers/models/__init__.py +++ b/src/transformers/models/__init__.py @@ -145,7 +145,6 @@ from . import ( nllb_moe, nystromformer, oneformer, - open_llama, openai, opt, owlvit, diff --git a/src/transformers/models/auto/configuration_auto.py b/src/transformers/models/auto/configuration_auto.py index 2c696b26c8..2eb7bd9745 100755 --- a/src/transformers/models/auto/configuration_auto.py +++ b/src/transformers/models/auto/configuration_auto.py @@ -652,6 +652,7 @@ DEPRECATED_MODELS = [ "bort", "mctct", "mmbt", + "open_llama", "retribert", "tapex", "trajectory_transformer", diff --git a/src/transformers/models/open_llama/__init__.py b/src/transformers/models/deprecated/open_llama/__init__.py similarity index 99% rename from src/transformers/models/open_llama/__init__.py rename to src/transformers/models/deprecated/open_llama/__init__.py index 757cba9cf8..446c9f076d 100644 --- a/src/transformers/models/open_llama/__init__.py +++ b/src/transformers/models/deprecated/open_llama/__init__.py @@ -13,7 +13,7 @@ # limitations under the License. from typing import TYPE_CHECKING -from ...utils import ( +from ....utils import ( OptionalDependencyNotAvailable, _LazyModule, is_sentencepiece_available, diff --git a/src/transformers/models/open_llama/configuration_open_llama.py b/src/transformers/models/deprecated/open_llama/configuration_open_llama.py similarity index 98% rename from src/transformers/models/open_llama/configuration_open_llama.py rename to src/transformers/models/deprecated/open_llama/configuration_open_llama.py index c0629b31e8..e4694fe11b 100644 --- a/src/transformers/models/open_llama/configuration_open_llama.py +++ b/src/transformers/models/deprecated/open_llama/configuration_open_llama.py @@ -19,8 +19,8 @@ # limitations under the License. """ Open-Llama model configuration""" -from ...configuration_utils import PretrainedConfig -from ...utils import logging +from ....configuration_utils import PretrainedConfig +from ....utils import logging logger = logging.get_logger(__name__) diff --git a/src/transformers/models/open_llama/modeling_open_llama.py b/src/transformers/models/deprecated/open_llama/modeling_open_llama.py similarity index 99% rename from src/transformers/models/open_llama/modeling_open_llama.py rename to src/transformers/models/deprecated/open_llama/modeling_open_llama.py index bcb4e04c0e..a9948afa80 100644 --- a/src/transformers/models/open_llama/modeling_open_llama.py +++ b/src/transformers/models/deprecated/open_llama/modeling_open_llama.py @@ -26,10 +26,10 @@ import torch.utils.checkpoint from torch import nn from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss -from ...activations import ACT2FN -from ...modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast, SequenceClassifierOutputWithPast -from ...modeling_utils import PreTrainedModel -from ...utils import add_start_docstrings, add_start_docstrings_to_model_forward, logging, replace_return_docstrings +from ....activations import ACT2FN +from ....modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast, SequenceClassifierOutputWithPast +from ....modeling_utils import PreTrainedModel +from ....utils import add_start_docstrings, add_start_docstrings_to_model_forward, logging, replace_return_docstrings from .configuration_open_llama import OpenLlamaConfig diff --git a/src/transformers/utils/dummy_pt_objects.py b/src/transformers/utils/dummy_pt_objects.py index 20631fc066..6ef0dd4344 100644 --- a/src/transformers/utils/dummy_pt_objects.py +++ b/src/transformers/utils/dummy_pt_objects.py @@ -2396,6 +2396,34 @@ class ModalEmbeddings(metaclass=DummyObject): requires_backends(self, ["torch"]) +class OpenLlamaForCausalLM(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class OpenLlamaForSequenceClassification(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class OpenLlamaModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + +class OpenLlamaPreTrainedModel(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + RETRIBERT_PRETRAINED_MODEL_ARCHIVE_LIST = None @@ -5461,34 +5489,6 @@ class OneFormerPreTrainedModel(metaclass=DummyObject): requires_backends(self, ["torch"]) -class OpenLlamaForCausalLM(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class OpenLlamaForSequenceClassification(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class OpenLlamaModel(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - -class OpenLlamaPreTrainedModel(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_LIST = None diff --git a/tests/models/open_llama/__init__.py b/tests/models/open_llama/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/models/open_llama/test_modeling_open_llama.py b/tests/models/open_llama/test_modeling_open_llama.py deleted file mode 100644 index 687b267b70..0000000000 --- a/tests/models/open_llama/test_modeling_open_llama.py +++ /dev/null @@ -1,370 +0,0 @@ -# coding=utf-8 -# Copyright 2023 The HuggingFace Inc. team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" Testing suite for the PyTorch Open-Llama model. """ - - -import unittest - -from parameterized import parameterized - -from transformers import OpenLlamaConfig, is_torch_available, set_seed -from transformers.testing_utils import require_torch, torch_device - -from ...generation.test_utils import GenerationTesterMixin -from ...test_configuration_common import ConfigTester -from ...test_modeling_common import ModelTesterMixin, ids_tensor, random_attention_mask -from ...test_pipeline_mixin import PipelineTesterMixin - - -if is_torch_available(): - import torch - - from transformers import OpenLlamaForCausalLM, OpenLlamaForSequenceClassification, OpenLlamaModel - - -class OpenLlamaModelTester: - def __init__( - self, - parent, - batch_size=13, - seq_length=7, - is_training=True, - use_input_mask=True, - use_token_type_ids=False, - use_labels=True, - vocab_size=99, - hidden_size=32, - num_hidden_layers=5, - num_attention_heads=4, - intermediate_size=37, - hidden_act="gelu", - hidden_dropout_prob=0.1, - attention_probs_dropout_prob=0.1, - max_position_embeddings=512, - type_vocab_size=16, - type_sequence_label_size=2, - initializer_range=0.02, - num_labels=3, - num_choices=4, - scope=None, - ): - self.parent = parent - self.batch_size = batch_size - self.seq_length = seq_length - self.is_training = is_training - self.use_input_mask = use_input_mask - self.use_token_type_ids = use_token_type_ids - self.use_labels = use_labels - self.vocab_size = vocab_size - self.hidden_size = hidden_size - self.num_hidden_layers = num_hidden_layers - self.num_attention_heads = num_attention_heads - self.intermediate_size = intermediate_size - self.hidden_act = hidden_act - self.hidden_dropout_prob = hidden_dropout_prob - self.attention_probs_dropout_prob = attention_probs_dropout_prob - self.max_position_embeddings = max_position_embeddings - self.type_vocab_size = type_vocab_size - self.type_sequence_label_size = type_sequence_label_size - self.initializer_range = initializer_range - self.num_labels = num_labels - self.num_choices = num_choices - self.scope = scope - - def prepare_config_and_inputs(self): - input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) - - input_mask = None - if self.use_input_mask: - input_mask = random_attention_mask([self.batch_size, self.seq_length]) - - token_type_ids = None - if self.use_token_type_ids: - token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) - - sequence_labels = None - token_labels = None - choice_labels = None - if self.use_labels: - sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) - token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) - choice_labels = ids_tensor([self.batch_size], self.num_choices) - - config = self.get_config() - - return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - - def get_config(self): - return OpenLlamaConfig( - vocab_size=self.vocab_size, - hidden_size=self.hidden_size, - num_hidden_layers=self.num_hidden_layers, - num_attention_heads=self.num_attention_heads, - intermediate_size=self.intermediate_size, - hidden_act=self.hidden_act, - hidden_dropout_prob=self.hidden_dropout_prob, - attention_probs_dropout_prob=self.attention_probs_dropout_prob, - max_position_embeddings=self.max_position_embeddings, - type_vocab_size=self.type_vocab_size, - is_decoder=False, - initializer_range=self.initializer_range, - use_stable_embedding=False, - ) - - def create_and_check_model( - self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels - ): - model = OpenLlamaModel(config=config) - model.to(torch_device) - model.eval() - result = model(input_ids, attention_mask=input_mask) - result = model(input_ids) - self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size)) - - def create_and_check_model_as_decoder( - self, - config, - input_ids, - token_type_ids, - input_mask, - sequence_labels, - token_labels, - choice_labels, - encoder_hidden_states, - encoder_attention_mask, - ): - config.add_cross_attention = True - model = OpenLlamaModel(config) - model.to(torch_device) - model.eval() - result = model( - input_ids, - attention_mask=input_mask, - encoder_hidden_states=encoder_hidden_states, - encoder_attention_mask=encoder_attention_mask, - ) - result = model( - input_ids, - attention_mask=input_mask, - encoder_hidden_states=encoder_hidden_states, - ) - result = model(input_ids, attention_mask=input_mask) - self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size)) - - def create_and_check_for_causal_lm( - self, - config, - input_ids, - token_type_ids, - input_mask, - sequence_labels, - token_labels, - choice_labels, - encoder_hidden_states, - encoder_attention_mask, - ): - model = OpenLlamaForCausalLM(config=config) - model.to(torch_device) - model.eval() - result = model(input_ids, attention_mask=input_mask, labels=token_labels) - self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size)) - - def create_and_check_decoder_model_past_large_inputs( - self, - config, - input_ids, - token_type_ids, - input_mask, - sequence_labels, - token_labels, - choice_labels, - encoder_hidden_states, - encoder_attention_mask, - ): - config.is_decoder = True - config.add_cross_attention = True - model = OpenLlamaForCausalLM(config=config) - model.to(torch_device) - model.eval() - - # first forward pass - outputs = model( - input_ids, - attention_mask=input_mask, - encoder_hidden_states=encoder_hidden_states, - encoder_attention_mask=encoder_attention_mask, - use_cache=True, - ) - past_key_values = outputs.past_key_values - - # create hypothetical multiple next token and extent to next_input_ids - next_tokens = ids_tensor((self.batch_size, 3), config.vocab_size) - next_mask = ids_tensor((self.batch_size, 3), vocab_size=2) - - # append to next input_ids and - next_input_ids = torch.cat([input_ids, next_tokens], dim=-1) - next_attention_mask = torch.cat([input_mask, next_mask], dim=-1) - - output_from_no_past = model( - next_input_ids, - attention_mask=next_attention_mask, - encoder_hidden_states=encoder_hidden_states, - encoder_attention_mask=encoder_attention_mask, - output_hidden_states=True, - )["hidden_states"][0] - output_from_past = model( - next_tokens, - attention_mask=next_attention_mask, - encoder_hidden_states=encoder_hidden_states, - encoder_attention_mask=encoder_attention_mask, - past_key_values=past_key_values, - output_hidden_states=True, - )["hidden_states"][0] - - # select random slice - random_slice_idx = ids_tensor((1,), output_from_past.shape[-1]).item() - output_from_no_past_slice = output_from_no_past[:, -3:, random_slice_idx].detach() - output_from_past_slice = output_from_past[:, :, random_slice_idx].detach() - - self.parent.assertTrue(output_from_past_slice.shape[1] == next_tokens.shape[1]) - - # test that outputs are equal for slice - self.parent.assertTrue(torch.allclose(output_from_past_slice, output_from_no_past_slice, atol=1e-3)) - - def prepare_config_and_inputs_for_common(self): - config_and_inputs = self.prepare_config_and_inputs() - ( - config, - input_ids, - token_type_ids, - input_mask, - sequence_labels, - token_labels, - choice_labels, - ) = config_and_inputs - inputs_dict = {"input_ids": input_ids, "attention_mask": input_mask} - return config, inputs_dict - - -@require_torch -class OpenLlamaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, unittest.TestCase): - all_model_classes = ( - (OpenLlamaModel, OpenLlamaForCausalLM, OpenLlamaForSequenceClassification) if is_torch_available() else () - ) - all_generative_model_classes = (OpenLlamaForCausalLM,) if is_torch_available() else () - pipeline_model_mapping = ( - { - "feature-extraction": OpenLlamaModel, - "text-classification": OpenLlamaForSequenceClassification, - "text-generation": OpenLlamaForCausalLM, - "zero-shot": OpenLlamaForSequenceClassification, - } - if is_torch_available() - else {} - ) - test_headmasking = False - test_pruning = False - - def setUp(self): - self.model_tester = OpenLlamaModelTester(self) - self.config_tester = ConfigTester(self, config_class=OpenLlamaConfig, hidden_size=37) - - def test_config(self): - self.config_tester.run_common_tests() - - def test_model(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - self.model_tester.create_and_check_model(*config_and_inputs) - - def test_model_various_embeddings(self): - config_and_inputs = self.model_tester.prepare_config_and_inputs() - for type in ["absolute", "relative_key", "relative_key_query"]: - config_and_inputs[0].position_embedding_type = type - self.model_tester.create_and_check_model(*config_and_inputs) - - def test_open_llama_sequence_classification_model(self): - config, input_dict = self.model_tester.prepare_config_and_inputs_for_common() - config.num_labels = 3 - input_ids = input_dict["input_ids"] - attention_mask = input_ids.ne(1).to(torch_device) - sequence_labels = ids_tensor([self.model_tester.batch_size], self.model_tester.type_sequence_label_size) - model = OpenLlamaForSequenceClassification(config) - model.to(torch_device) - model.eval() - result = model(input_ids, attention_mask=attention_mask, labels=sequence_labels) - self.assertEqual(result.logits.shape, (self.model_tester.batch_size, self.model_tester.num_labels)) - - def test_open_llama_sequence_classification_model_for_single_label(self): - config, input_dict = self.model_tester.prepare_config_and_inputs_for_common() - config.num_labels = 3 - config.problem_type = "single_label_classification" - input_ids = input_dict["input_ids"] - attention_mask = input_ids.ne(1).to(torch_device) - sequence_labels = ids_tensor([self.model_tester.batch_size], self.model_tester.type_sequence_label_size) - model = OpenLlamaForSequenceClassification(config) - model.to(torch_device) - model.eval() - result = model(input_ids, attention_mask=attention_mask, labels=sequence_labels) - self.assertEqual(result.logits.shape, (self.model_tester.batch_size, self.model_tester.num_labels)) - - def test_open_llama_sequence_classification_model_for_multi_label(self): - config, input_dict = self.model_tester.prepare_config_and_inputs_for_common() - config.num_labels = 3 - config.problem_type = "multi_label_classification" - input_ids = input_dict["input_ids"] - attention_mask = input_ids.ne(1).to(torch_device) - sequence_labels = ids_tensor( - [self.model_tester.batch_size, config.num_labels], self.model_tester.type_sequence_label_size - ).to(torch.float) - model = OpenLlamaForSequenceClassification(config) - model.to(torch_device) - model.eval() - result = model(input_ids, attention_mask=attention_mask, labels=sequence_labels) - self.assertEqual(result.logits.shape, (self.model_tester.batch_size, self.model_tester.num_labels)) - - @unittest.skip("Open-Llama buffers include complex numbers, which breaks this test") - def test_save_load_fast_init_from_base(self): - pass - - @parameterized.expand([("linear",), ("dynamic",)]) - def test_model_rope_scaling(self, scaling_type): - config, _ = self.model_tester.prepare_config_and_inputs_for_common() - short_input = ids_tensor([1, 10], config.vocab_size) - long_input = ids_tensor([1, int(config.max_position_embeddings * 1.5)], config.vocab_size) - - set_seed(42) # Fixed seed at init time so the two models get the same random weights - original_model = OpenLlamaModel(config) - original_model.to(torch_device) - original_model.eval() - original_short_output = original_model(short_input).last_hidden_state - original_long_output = original_model(long_input).last_hidden_state - - set_seed(42) # Fixed seed at init time so the two models get the same random weights - config.rope_scaling = {"type": scaling_type, "factor": 10.0} - scaled_model = OpenLlamaModel(config) - scaled_model.to(torch_device) - scaled_model.eval() - scaled_short_output = scaled_model(short_input).last_hidden_state - scaled_long_output = scaled_model(long_input).last_hidden_state - - # Dynamic scaling does not change the RoPE embeddings until it receives an input longer than the original - # maximum sequence length, so the outputs for the short input should match. - if scaling_type == "dynamic": - self.assertTrue(torch.allclose(original_short_output, scaled_short_output, atol=1e-5)) - else: - self.assertFalse(torch.allclose(original_short_output, scaled_short_output, atol=1e-5)) - - # The output should be different for long inputs - self.assertFalse(torch.allclose(original_long_output, scaled_long_output, atol=1e-5)) diff --git a/utils/check_config_attributes.py b/utils/check_config_attributes.py index ac68337ea2..f96b9f3700 100644 --- a/utils/check_config_attributes.py +++ b/utils/check_config_attributes.py @@ -238,7 +238,7 @@ def check_config_attributes_being_used(config_class): modeling_sources = [] for path in modeling_paths: if os.path.isfile(path): - with open(path) as fp: + with open(path, encoding="utf8") as fp: modeling_sources.append(fp.read()) unused_attributes = [] diff --git a/utils/check_repo.py b/utils/check_repo.py index 66f75863a3..7af69519c6 100644 --- a/utils/check_repo.py +++ b/utils/check_repo.py @@ -1093,7 +1093,7 @@ def check_deprecated_constant_is_up_to_date(): if len(missing_models) != 0: missing_models = ", ".join(missing_models) message.append( - "The following models are in the deprecated folder, make sur to add them to `DEPRECATED_MODELS` in " + "The following models are in the deprecated folder, make sure to add them to `DEPRECATED_MODELS` in " f"`models/auto/configuration_auto.py`: {missing_models}." )