Bart: new cache format (#35314)

* bart compile * add mbart * some more models touched by fix-copies * more * more models * even more models * fix copies * fix tests * fix copies * fix * biogpt accepts position ids now (breaking?) * fix failing non-slow tests * fix some tests * should not be removed * small update * Update src/transformers/models/bart/modeling_bart.py Co-authored-by: Joao Gante <joaofranciscocardosogante@gmail.com> * update for last `main` * fix copies * clone `update_causal_mask` from llama * tmp * fixup * why? how? * fix bart tests * dont skip test * address comments * fix tests * fix * fixup and delete the file --------- Co-authored-by: Joao Gante <joaofranciscocardosogante@gmail.com>
2025-05-16 13:26:54 +02:00
parent 3ab47b6ce3
commit 01ad9f4b49
46 changed files with 3904 additions and 1995 deletions
--- a/tests/models/bart/test_modeling_bart.py
+++ b/tests/models/bart/test_modeling_bart.py
@@ -87,7 +87,7 @@ class BartModelTester:
        hidden_act="gelu",
        hidden_dropout_prob=0.1,
        attention_probs_dropout_prob=0.1,
-        max_position_embeddings=20,
+        max_position_embeddings=50,
        eos_token_id=2,
        pad_token_id=1,
        bos_token_id=0,
@@ -1164,8 +1164,7 @@ class BartModelIntegrationTests(unittest.TestCase):
            [FRANCE_ARTICLE, SHORTER_ARTICLE, IRAN_ARTICLE, ARTICLE_SUBWAY],
            max_length=1024,
            padding="max_length",
-            truncation_strategy="only_first",
-            truncation=True,
+            truncation="only_first",
            return_tensors="pt",
        )

@@ -1301,7 +1300,7 @@ class BartStandaloneDecoderModelTester:
        decoder_layers=2,
        encoder_attention_heads=4,
        decoder_attention_heads=4,
-        max_position_embeddings=30,
+        max_position_embeddings=50,
        is_encoder_decoder=False,
        pad_token_id=0,
        bos_token_id=1,
@@ -1365,6 +1364,7 @@ class BartStandaloneDecoderModelTester:
            decoder_start_token_id=self.decoder_start_token_id,
            max_position_embeddings=self.max_position_embeddings,
            is_encoder_decoder=self.is_encoder_decoder,
+            forced_eos_token_id=None,
        )

        return (
@@ -1465,9 +1465,9 @@ class BartStandaloneDecoderModelTester:

        # get two different outputs
        output_from_no_past = model(next_input_ids, attention_mask=attn_mask)["last_hidden_state"]
-        output_from_past = model(next_tokens, attention_mask=attn_mask, past_key_values=past_key_values)[
-            "last_hidden_state"
-        ]
+        output_from_past = model(
+            next_tokens, attention_mask=attn_mask, past_key_values=past_key_values, use_cache=True
+        )["last_hidden_state"]

        # select random slice
        random_slice_idx = ids_tensor((1,), output_from_past.shape[-1]).item()