* Create modular_glm.py

* Update modular_glm.py

* Finalize architecture without all attentions

* Add all attentions modules

* Finalize modular

* Update given last version

* Last update

* Finalize model

* Finalize converter

* Update convert_glm_weights_to_hf.py

* style

* style

* Create __init__.py

* Aff all inits

* Update convert_glm_weights_to_hf.py

* Update convert_glm_weights_to_hf.py

* Update convert_glm_weights_to_hf.py

* Update convert_glm_weights_to_hf.py

* Update convert_glm_weights_to_hf.py

* Update convert_glm_weights_to_hf.py

* Update convert_glm_weights_to_hf.py

* Update convert_glm_weights_to_hf.py

* Update convert_glm_weights_to_hf.py

* Correct the rotary embeddings

* Remove apply_residual_connection_post_layernorm (always false)

* remove use_rms_norm (always true)

* remove past_layer_norm (always true)

* Update __init__.py

* Update config and license

* start adding tests and doc

* Add doc + style

* Update test_modeling_glm.py

* Add dummies

* Apply correct modeling

* Refactor attention to follow llama

* Update __init__.py

* Update convert_glm_weights_to_hf.py

* Correct bias

* remove linear_bias and pdrop (never used)

* apply modular

* Simplify converter

* remove dummies + style

* add model_input_names

* Add pretraining_tp to config for when eager attention is used

* Update modular to remove all pretraining_tp

* Update test_modeling_glm.py

* Update the __all__

* Update __all__

* Update __init__.py

* Update test_modeling_glm.py

* add revisions

* Add the correct repos and revisions

* style

* Update __init__.py

* update exports

* remove import of modular files

* style

* Apply Llama changes + refine converter

* Update convert_glm_weights_to_hf.py

* Update convert_glm_weights_to_hf.py

* Update convert_glm_weights_to_hf.py

* Update convert_glm_weights_to_hf.py

* Update convert_glm_weights_to_hf.py

* Update convert_glm_weights_to_hf.py

* Update convert_glm_weights_to_hf.py

* Update convert_glm_weights_to_hf.py

* style

* Use new modular converter

* add pretrainedmodel to init

* style

* Update test_modeling_glm.py

* Move config outside modular to please CI about docstrings

* Add dummies to please CI

* Update glm.md

* Update glm.md
This commit is contained in:
Cyril Vallez
2024-10-18 17:41:12 +02:00
committed by GitHub
parent e95ea479ee
commit 6604764007
19 changed files with 2975 additions and 4 deletions

View File

@@ -4938,14 +4938,17 @@ class ModelTesterMixin:
if not hasattr(self, "_torch_compile_test_ckpt"):
self.skipTest(f"{self.__class__.__name__} doesn't have the attribute `_torch_compile_test_ckpt`.")
ckpt = self._torch_compile_test_ckpt
revision = "main" if not hasattr(self, "_torch_compile_test_revision") else self._torch_compile_test_revision
os.environ["TOKENIZERS_PARALLELISM"] = "false"
batch_size = 1
n_iter = 3
tokenizer = AutoTokenizer.from_pretrained(ckpt)
model = AutoModelForCausalLM.from_pretrained(ckpt, torch_dtype=torch.float16).to(torch_device)
tokenizer = AutoTokenizer.from_pretrained(ckpt, revision=revision)
model = AutoModelForCausalLM.from_pretrained(ckpt, torch_dtype=torch.float16, revision=revision).to(
torch_device
)
model.generation_config.max_new_tokens = 4
@@ -5013,11 +5016,14 @@ class ModelTesterMixin:
if not hasattr(self, "_torch_compile_test_ckpt"):
self.skipTest(f"{self.__class__.__name__} doesn't have the attribute `_torch_compile_test_ckpt`.")
ckpt = self._torch_compile_test_ckpt
revision = "main" if not hasattr(self, "_torch_compile_test_revision") else self._torch_compile_test_revision
os.environ["TOKENIZERS_PARALLELISM"] = "false"
tokenizer = AutoTokenizer.from_pretrained(ckpt)
model = AutoModelForCausalLM.from_pretrained(ckpt, torch_dtype=torch.float16).to(torch_device)
tokenizer = AutoTokenizer.from_pretrained(ckpt, revision=revision)
model = AutoModelForCausalLM.from_pretrained(ckpt, torch_dtype=torch.float16, revision=revision).to(
torch_device
)
cache_implementation = "static"
if model.config.model_type == "gemma2":