add Glm (#33823)

* Create modular_glm.py * Update modular_glm.py * Finalize architecture without all attentions * Add all attentions modules * Finalize modular * Update given last version * Last update * Finalize model * Finalize converter * Update convert_glm_weights_to_hf.py * style * style * Create __init__.py * Aff all inits * Update convert_glm_weights_to_hf.py * Update convert_glm_weights_to_hf.py * Update convert_glm_weights_to_hf.py * Update convert_glm_weights_to_hf.py * Update convert_glm_weights_to_hf.py * Update convert_glm_weights_to_hf.py * Update convert_glm_weights_to_hf.py * Update convert_glm_weights_to_hf.py * Update convert_glm_weights_to_hf.py * Correct the rotary embeddings * Remove apply_residual_connection_post_layernorm (always false) * remove use_rms_norm (always true) * remove past_layer_norm (always true) * Update __init__.py * Update config and license * start adding tests and doc * Add doc + style * Update test_modeling_glm.py * Add dummies * Apply correct modeling * Refactor attention to follow llama * Update __init__.py * Update convert_glm_weights_to_hf.py * Correct bias * remove linear_bias and pdrop (never used) * apply modular * Simplify converter * remove dummies + style * add model_input_names * Add pretraining_tp to config for when eager attention is used * Update modular to remove all pretraining_tp * Update test_modeling_glm.py * Update the __all__ * Update __all__ * Update __init__.py * Update test_modeling_glm.py * add revisions * Add the correct repos and revisions * style * Update __init__.py * update exports * remove import of modular files * style * Apply Llama changes + refine converter * Update convert_glm_weights_to_hf.py * Update convert_glm_weights_to_hf.py * Update convert_glm_weights_to_hf.py * Update convert_glm_weights_to_hf.py * Update convert_glm_weights_to_hf.py * Update convert_glm_weights_to_hf.py * Update convert_glm_weights_to_hf.py * Update convert_glm_weights_to_hf.py * style * Use new modular converter * add pretrainedmodel to init * style * Update test_modeling_glm.py * Move config outside modular to please CI about docstrings * Add dummies to please CI * Update glm.md * Update glm.md
2024-10-18 17:41:12 +02:00
parent e95ea479ee
commit 6604764007
19 changed files with 2975 additions and 4 deletions
--- a/tests/test_modeling_common.py
+++ b/tests/test_modeling_common.py
@@ -4938,14 +4938,17 @@ class ModelTesterMixin:
        if not hasattr(self, "_torch_compile_test_ckpt"):
            self.skipTest(f"{self.__class__.__name__} doesn't have the attribute `_torch_compile_test_ckpt`.")
        ckpt = self._torch_compile_test_ckpt
+        revision = "main" if not hasattr(self, "_torch_compile_test_revision") else self._torch_compile_test_revision

        os.environ["TOKENIZERS_PARALLELISM"] = "false"

        batch_size = 1
        n_iter = 3

-        tokenizer = AutoTokenizer.from_pretrained(ckpt)
-        model = AutoModelForCausalLM.from_pretrained(ckpt, torch_dtype=torch.float16).to(torch_device)
+        tokenizer = AutoTokenizer.from_pretrained(ckpt, revision=revision)
+        model = AutoModelForCausalLM.from_pretrained(ckpt, torch_dtype=torch.float16, revision=revision).to(
+            torch_device
+        )

        model.generation_config.max_new_tokens = 4

@@ -5013,11 +5016,14 @@ class ModelTesterMixin:
        if not hasattr(self, "_torch_compile_test_ckpt"):
            self.skipTest(f"{self.__class__.__name__} doesn't have the attribute `_torch_compile_test_ckpt`.")
        ckpt = self._torch_compile_test_ckpt
+        revision = "main" if not hasattr(self, "_torch_compile_test_revision") else self._torch_compile_test_revision

        os.environ["TOKENIZERS_PARALLELISM"] = "false"

-        tokenizer = AutoTokenizer.from_pretrained(ckpt)
-        model = AutoModelForCausalLM.from_pretrained(ckpt, torch_dtype=torch.float16).to(torch_device)
+        tokenizer = AutoTokenizer.from_pretrained(ckpt, revision=revision)
+        model = AutoModelForCausalLM.from_pretrained(ckpt, torch_dtype=torch.float16, revision=revision).to(
+            torch_device
+        )

        cache_implementation = "static"
        if model.config.model_type == "gemma2":