Bart: new cache format (#35314)
* bart compile * add mbart * some more models touched by fix-copies * more * more models * even more models * fix copies * fix tests * fix copies * fix * biogpt accepts position ids now (breaking?) * fix failing non-slow tests * fix some tests * should not be removed * small update * Update src/transformers/models/bart/modeling_bart.py Co-authored-by: Joao Gante <joaofranciscocardosogante@gmail.com> * update for last `main` * fix copies * clone `update_causal_mask` from llama * tmp * fixup * why? how? * fix bart tests * dont skip test * address comments * fix tests * fix * fixup and delete the file --------- Co-authored-by: Joao Gante <joaofranciscocardosogante@gmail.com>
This commit is contained in:
committed by
GitHub
parent
3ab47b6ce3
commit
01ad9f4b49
@@ -538,7 +538,7 @@ class MBartStandaloneDecoderModelTester:
|
||||
decoder_layers=2,
|
||||
encoder_attention_heads=4,
|
||||
decoder_attention_heads=4,
|
||||
max_position_embeddings=30,
|
||||
max_position_embeddings=50,
|
||||
is_encoder_decoder=False,
|
||||
pad_token_id=0,
|
||||
bos_token_id=1,
|
||||
@@ -681,9 +681,9 @@ class MBartStandaloneDecoderModelTester:
|
||||
|
||||
# get two different outputs
|
||||
output_from_no_past = model(next_input_ids, attention_mask=attn_mask)["last_hidden_state"]
|
||||
output_from_past = model(next_tokens, attention_mask=attn_mask, past_key_values=past_key_values)[
|
||||
"last_hidden_state"
|
||||
]
|
||||
output_from_past = model(
|
||||
next_tokens, attention_mask=attn_mask, past_key_values=past_key_values, use_cache=True
|
||||
)["last_hidden_state"]
|
||||
|
||||
# select random slice
|
||||
random_slice_idx = ids_tensor((1,), output_from_past.shape[-1]).item()
|
||||
|
||||
Reference in New Issue
Block a user