Fix model integration ci (#26322)

* fix wav2vec2 * nit * stash * one more file to update * fix byt5 * vocab size is 256, don't change that! * use other revision * test persimon in smaller size * style * tests * nits * update add tokens from pretrained * test tokenization * nits * potential fnet fix? * more nits * nits * correct test * assert close * udpate * ouch * fix it * some more nits * FINALLU * use `adept` checkpoints * more adept checkpoints * that was invlved!
2023-10-02 13:55:46 +02:00
parent 6824461f2a
commit 63864e057f
9 changed files with 38 additions and 18 deletions
--- a/tests/models/persimmon/test_modeling_persimmon.py
+++ b/tests/models/persimmon/test_modeling_persimmon.py
@@ -386,11 +386,13 @@ class PersimmonIntegrationTest(unittest.TestCase):
    @slow
    def test_model_8b_chat_logits(self):
        input_ids = [1, 306, 4658, 278, 6593, 310, 2834, 338]
-        model = PersimmonForCausalLM.from_pretrained("ArthurZ/persimmon-8b-chat", device_map="auto")
+        model = PersimmonForCausalLM.from_pretrained(
+            "adept/persimmon-8b-chat", device_map="auto", torch_dtype=torch.float16
+        )
        out = model(torch.tensor([input_ids])).logits

        EXPECTED_MEAN = torch.tensor(
-            [[-11.2879, -11.2628, -11.2498, -11.2534, -11.2676, -11.2638, -11.2501, -11.2431]], dtype=torch.float32
+            [[-11.2879, -11.2628, -11.2498, -11.2534, -11.2676, -11.2638, -11.2501, -11.2431]], dtype=torch.float16
        )
        torch.testing.assert_close(out.cpu().mean(-1), EXPECTED_MEAN, atol=1e-4, rtol=1e-4)
        # fmt: off
@@ -403,9 +405,11 @@ class PersimmonIntegrationTest(unittest.TestCase):
    def test_model_8b_chat_greedy_generation(self):
        EXPECTED_TEXT_COMPLETION = """human: Simply put, the theory of relativity states that?\n\nadept: The theory of relativity states that the laws of physics are the same for all observers, regardless of their relative motion."""
        prompt = "human: Simply put, the theory of relativity states that?\n\nadept:"
-        tokenizer = AutoTokenizer.from_pretrained("ArthurZ/persimmon-8b-chat", use_fast=False)
+        tokenizer = AutoTokenizer.from_pretrained("adept/persimmon-8b-chat", use_fast=False)
        input_ids = tokenizer.encode(prompt, return_tensors="pt").to(torch_device)
-        model = PersimmonForCausalLM.from_pretrained("ArthurZ/persimmon-8b-chat").to(torch_device)
+        model = PersimmonForCausalLM.from_pretrained("adept/persimmon-8b-chat", torch_dtype=torch.float16).to(
+            torch_device
+        )

        # greedy generation outputs
        generated_ids = model.generate(input_ids, max_new_tokens=64)