Refactor CI: more explicit (#30674)
* don't run custom when not needed? * update test fetcher filtering * fixup and updates * update * update * reduce burden * nit * nit * mising comma * this? * this? * more parallelism * more * nit for real parallelism on tf and torch examples * update * update * update * update * update * update * update * update * update * update * update * update * update to make it more custom * update to make it more custom * update to make it more custom * update to make it more custom * update * update * update * update * update * update * use correct path * fix path to test files and examples * filter-tests * filter? * filter? * filter? * nits * fix naming of the artifacts to be pushed * list vs files * list vs files * fixup * fix list of all tests * fix the install steps * fix the install steps * fix the config * fix the config * only split if needed * only split if needed * extend should fix it * extend should fix it * arg * arg * update * update * run tests * run tests * run tests * more nits * update * update * update * update * update * update * update * simpler way to show the test, reduces the complexity of the generated config * simpler way to show the test, reduces the complexity of the generated config * style * oups * oups * fix import errors * skip some tests for now * update doctestjob * more parallelism * fixup * test only the test in examples * test only the test in examples * nits * from Arthur * fix generated congi * update * update * show tests * oups * oups * fix torch job for now * use single upload setp * oups * fu**k * fix * nit * update * nit * fix * fixes * [test-all] * add generate marker and generate job * oups * torch job runs not generate tests * let repo utils test all utils * UPdate * styling * fix repo utils test * more parallel please * don't test * update * bit more verbose sir * more * hub were skipped * split by classname * revert * maybe? * Amazing catch Co-authored-by: Yih-Dar <2521628+ydshieh@users.noreply.github.com> * fix * update * update * maybe non capturing * manual convert? * pass artifacts as parameters as otherwise the config is too long * artifact.json * store output * might not be safe? * my token * mmm? * use CI job IS * can't get a proper id? * ups * build num * update * echo url * this? * this! * fix * wget * ish * dang * udpdate * there we go * update * update * pass all * not .txt * update * fetcg * fix naming * fix * up * update * update * ?? * update * more updates * update * more * skip * oups * pr documentation tests are currently created differently * update * hmmmm * oups * curl -L * update * ???? * nit * mmmm * ish * ouf * update * ish * update * update * updatea * nit * nit * up * oups * documentation_test fix * test hub tests everything, just marker * update * fix * test_hub is the only annoying one now * tf threads? * oups * not sure what is happening? * fix? * just use folder for stating hub * I am getting fucking annoyed * fix the test? * update * uupdate * ? * fixes * add comment! * nit --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com> Co-authored-by: Yih-Dar <2521628+ydshieh@users.noreply.github.com>
This commit is contained in:
@@ -21,6 +21,7 @@ import unittest
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
from parameterized import parameterized
|
||||
|
||||
from transformers import is_torch_available, pipeline, set_seed
|
||||
@@ -88,6 +89,7 @@ if is_torch_available():
|
||||
from transformers.generation.utils import _speculative_sampling
|
||||
|
||||
|
||||
@pytest.mark.generate
|
||||
class GenerationTesterMixin:
|
||||
model_tester = None
|
||||
all_generative_model_classes = ()
|
||||
@@ -417,6 +419,7 @@ class GenerationTesterMixin:
|
||||
|
||||
return output_generate
|
||||
|
||||
@pytest.mark.generate
|
||||
def test_greedy_generate(self):
|
||||
for model_class in self.all_generative_model_classes:
|
||||
config, input_ids, attention_mask = self._get_input_ids_and_config()
|
||||
@@ -429,6 +432,7 @@ class GenerationTesterMixin:
|
||||
else:
|
||||
self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + input_ids.shape[-1])
|
||||
|
||||
@pytest.mark.generate
|
||||
def test_greedy_generate_dict_outputs(self):
|
||||
for model_class in self.all_generative_model_classes:
|
||||
config, input_ids, attention_mask = self._get_input_ids_and_config()
|
||||
@@ -459,6 +463,7 @@ class GenerationTesterMixin:
|
||||
|
||||
self._check_outputs(output_generate, input_ids, model.config)
|
||||
|
||||
@pytest.mark.generate
|
||||
def test_greedy_generate_dict_outputs_use_cache(self):
|
||||
for model_class in self.all_generative_model_classes:
|
||||
config, input_ids, attention_mask = self._get_input_ids_and_config()
|
||||
@@ -488,6 +493,7 @@ class GenerationTesterMixin:
|
||||
self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + input_ids.shape[-1])
|
||||
self._check_outputs(output_generate, input_ids, model.config, use_cache=True)
|
||||
|
||||
@pytest.mark.generate
|
||||
def test_sample_generate(self):
|
||||
for model_class in self.all_generative_model_classes:
|
||||
config, input_ids, attention_mask = self._get_input_ids_and_config()
|
||||
@@ -505,6 +511,7 @@ class GenerationTesterMixin:
|
||||
else:
|
||||
self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + input_ids.shape[-1])
|
||||
|
||||
@pytest.mark.generate
|
||||
def test_sample_generate_dict_output(self):
|
||||
for model_class in self.all_generative_model_classes:
|
||||
config, input_ids, attention_mask = self._get_input_ids_and_config()
|
||||
@@ -536,6 +543,7 @@ class GenerationTesterMixin:
|
||||
|
||||
self._check_outputs(output_generate, input_ids, model.config, num_return_sequences=2)
|
||||
|
||||
@pytest.mark.generate
|
||||
def test_beam_search_generate(self):
|
||||
for model_class in self.all_generative_model_classes:
|
||||
config, input_ids, attention_mask = self._get_input_ids_and_config()
|
||||
@@ -555,6 +563,7 @@ class GenerationTesterMixin:
|
||||
else:
|
||||
self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + input_ids.shape[-1])
|
||||
|
||||
@pytest.mark.generate
|
||||
def test_beam_search_generate_dict_output(self):
|
||||
for model_class in self.all_generative_model_classes:
|
||||
config, input_ids, attention_mask = self._get_input_ids_and_config()
|
||||
@@ -588,6 +597,7 @@ class GenerationTesterMixin:
|
||||
output_generate, input_ids, model.config, num_return_sequences=beam_kwargs["num_beams"]
|
||||
)
|
||||
|
||||
@pytest.mark.generate
|
||||
def test_beam_search_generate_dict_outputs_use_cache(self):
|
||||
for model_class in self.all_generative_model_classes:
|
||||
# enable cache
|
||||
@@ -626,6 +636,7 @@ class GenerationTesterMixin:
|
||||
|
||||
@require_accelerate
|
||||
@require_torch_multi_accelerator
|
||||
@pytest.mark.generate
|
||||
def test_model_parallel_beam_search(self):
|
||||
for model_class in self.all_generative_model_classes:
|
||||
if "xpu" in torch_device:
|
||||
@@ -648,6 +659,7 @@ class GenerationTesterMixin:
|
||||
num_beams=2,
|
||||
)
|
||||
|
||||
@pytest.mark.generate
|
||||
def test_beam_sample_generate(self):
|
||||
for model_class in self.all_generative_model_classes:
|
||||
config, input_ids, attention_mask = self._get_input_ids_and_config()
|
||||
@@ -684,6 +696,7 @@ class GenerationTesterMixin:
|
||||
|
||||
torch.testing.assert_close(output_generate[:, input_embeds.shape[1] :], output_generate2)
|
||||
|
||||
@pytest.mark.generate
|
||||
def test_beam_sample_generate_dict_output(self):
|
||||
for model_class in self.all_generative_model_classes:
|
||||
config, input_ids, attention_mask = self._get_input_ids_and_config()
|
||||
@@ -719,6 +732,7 @@ class GenerationTesterMixin:
|
||||
output_generate, input_ids, model.config, num_return_sequences=beam_kwargs["num_beams"]
|
||||
)
|
||||
|
||||
@pytest.mark.generate
|
||||
def test_generate_without_input_ids(self):
|
||||
config, _, _ = self._get_input_ids_and_config()
|
||||
|
||||
@@ -739,6 +753,7 @@ class GenerationTesterMixin:
|
||||
)
|
||||
self.assertIsNotNone(output_ids_generate)
|
||||
|
||||
@pytest.mark.generate
|
||||
def test_group_beam_search_generate(self):
|
||||
for model_class in self.all_generative_model_classes:
|
||||
config, input_ids, attention_mask = self._get_input_ids_and_config()
|
||||
@@ -771,6 +786,7 @@ class GenerationTesterMixin:
|
||||
else:
|
||||
self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + input_ids.shape[-1])
|
||||
|
||||
@pytest.mark.generate
|
||||
def test_group_beam_search_generate_dict_output(self):
|
||||
for model_class in self.all_generative_model_classes:
|
||||
config, input_ids, attention_mask = self._get_input_ids_and_config()
|
||||
@@ -806,6 +822,7 @@ class GenerationTesterMixin:
|
||||
|
||||
# TODO: @gante
|
||||
@is_flaky()
|
||||
@pytest.mark.generate
|
||||
def test_constrained_beam_search_generate(self):
|
||||
for model_class in self.all_generative_model_classes:
|
||||
config, input_ids, attention_mask = self._get_input_ids_and_config()
|
||||
@@ -863,6 +880,7 @@ class GenerationTesterMixin:
|
||||
for generation_output in output_generate:
|
||||
self._check_sequence_inside_sequence(force_tokens, generation_output)
|
||||
|
||||
@pytest.mark.generate
|
||||
def test_constrained_beam_search_generate_dict_output(self):
|
||||
for model_class in self.all_generative_model_classes:
|
||||
config, input_ids, attention_mask = self._get_input_ids_and_config()
|
||||
@@ -907,6 +925,7 @@ class GenerationTesterMixin:
|
||||
output_generate, input_ids, model.config, num_return_sequences=beam_kwargs["num_beams"]
|
||||
)
|
||||
|
||||
@pytest.mark.generate
|
||||
def test_contrastive_generate(self):
|
||||
for model_class in self.all_generative_model_classes:
|
||||
if model_class._is_stateful:
|
||||
@@ -933,6 +952,7 @@ class GenerationTesterMixin:
|
||||
else:
|
||||
self.assertTrue(output_generate.shape[-1] == self.max_new_tokens + input_ids.shape[-1])
|
||||
|
||||
@pytest.mark.generate
|
||||
def test_contrastive_generate_dict_outputs_use_cache(self):
|
||||
for model_class in self.all_generative_model_classes:
|
||||
if model_class._is_stateful:
|
||||
@@ -968,6 +988,7 @@ class GenerationTesterMixin:
|
||||
self.assertTrue(output_generate.sequences.shape[-1] == self.max_new_tokens + input_ids.shape[-1])
|
||||
self._check_outputs(output_generate, input_ids, model.config, use_cache=True)
|
||||
|
||||
@pytest.mark.generate
|
||||
def test_contrastive_generate_low_memory(self):
|
||||
# Check that choosing 'low_memory' does not change the model output
|
||||
for model_class in self.all_generative_model_classes:
|
||||
@@ -1011,6 +1032,7 @@ class GenerationTesterMixin:
|
||||
)
|
||||
self.assertListEqual(low_output.tolist(), high_output.tolist())
|
||||
|
||||
@pytest.mark.generate
|
||||
def test_beam_search_low_memory(self):
|
||||
# Check that choosing 'low_memory' does not change the model output
|
||||
for model_class in self.all_generative_model_classes:
|
||||
@@ -1053,6 +1075,7 @@ class GenerationTesterMixin:
|
||||
)
|
||||
self.assertListEqual(low_output.tolist(), high_output.tolist())
|
||||
|
||||
@pytest.mark.generate
|
||||
@parameterized.expand([("random",), ("same",)])
|
||||
@is_flaky() # Read NOTE (1) below. If there are API issues, all attempts will fail.
|
||||
def test_assisted_decoding_matches_greedy_search(self, assistant_type):
|
||||
@@ -1134,6 +1157,7 @@ class GenerationTesterMixin:
|
||||
self._check_outputs(output, input_ids, model.config, use_cache=True)
|
||||
|
||||
@is_flaky()
|
||||
@pytest.mark.generate
|
||||
def test_prompt_lookup_decoding_matches_greedy_search(self):
|
||||
# This test ensures that the prompt lookup generation does not introduce output changes over greedy search.
|
||||
# This test is mostly a copy of test_assisted_decoding_matches_greedy_search
|
||||
@@ -1196,6 +1220,7 @@ class GenerationTesterMixin:
|
||||
for output in (output_greedy, output_prompt_lookup):
|
||||
self._check_outputs(output, input_ids, model.config, use_cache=True)
|
||||
|
||||
@pytest.mark.generate
|
||||
def test_dola_decoding_sample(self):
|
||||
# TODO (joao): investigate skips, try to reduce incompatibilities
|
||||
for model_class in self.all_generative_model_classes:
|
||||
@@ -1240,6 +1265,7 @@ class GenerationTesterMixin:
|
||||
output_dola = model.generate(input_ids, **model_kwargs, **generation_kwargs)
|
||||
self._check_outputs(output_dola, input_ids, model.config, use_cache=hasattr(config, "use_cache"))
|
||||
|
||||
@pytest.mark.generate
|
||||
def test_assisted_decoding_sample(self):
|
||||
# In this test we don't check assisted vs non-assisted output -- seeded assisted decoding with sample will not
|
||||
# match sample for the same seed, as the forward pass does not return the exact same logits (due to matmul with
|
||||
@@ -1299,6 +1325,7 @@ class GenerationTesterMixin:
|
||||
|
||||
self._check_outputs(output_assisted, input_ids, model.config, use_cache=True)
|
||||
|
||||
@pytest.mark.generate
|
||||
def test_prompt_lookup_decoding_stops_at_eos(self):
|
||||
# This test ensures that the prompt lookup generation stops at eos token and does not suggest more tokens
|
||||
# (see https://github.com/huggingface/transformers/pull/31301)
|
||||
@@ -1327,6 +1354,7 @@ class GenerationTesterMixin:
|
||||
# PLD shouldn't propose any new tokens based on eos-match
|
||||
self.assertTrue(output_prompt_lookup.shape[-1] == 10)
|
||||
|
||||
@pytest.mark.generate
|
||||
def test_generate_with_head_masking(self):
|
||||
"""Test designed for encoder-decoder models to ensure the attention head masking is used."""
|
||||
attention_names = ["encoder_attentions", "decoder_attentions", "cross_attentions"]
|
||||
@@ -1366,6 +1394,7 @@ class GenerationTesterMixin:
|
||||
attn_weights = out[attn_name] if attn_name == attention_names[0] else out[attn_name][-1]
|
||||
self.assertEqual(sum([w.sum().item() for w in attn_weights]), 0.0)
|
||||
|
||||
@pytest.mark.generate
|
||||
def test_left_padding_compatibility(self):
|
||||
# NOTE: left-padding results in small numerical differences. This is expected.
|
||||
# See https://github.com/huggingface/transformers/issues/25420#issuecomment-1775317535
|
||||
@@ -1434,6 +1463,7 @@ class GenerationTesterMixin:
|
||||
# They should result in very similar logits
|
||||
self.assertTrue(torch.allclose(next_logits_wo_padding, next_logits_with_padding, atol=1e-5))
|
||||
|
||||
@pytest.mark.generate
|
||||
def test_past_key_values_format(self):
|
||||
# Test that the KV cache is formatted correctly. Exceptions need to explicitly overwrite this test. Having a
|
||||
# standard KV cache format is important for a consistent API (and for advanced generation methods).
|
||||
@@ -1505,6 +1535,7 @@ class GenerationTesterMixin:
|
||||
past_kv[i][1].shape, (batch_size, num_attention_heads, seq_length, per_head_embed_dim)
|
||||
)
|
||||
|
||||
@pytest.mark.generate
|
||||
def test_generate_from_inputs_embeds_decoder_only(self):
|
||||
# When supported, tests that the decoder model can generate from `inputs_embeds` instead of `input_ids`
|
||||
# if fails, you should probably update the `prepare_inputs_for_generation` function
|
||||
@@ -1555,6 +1586,7 @@ class GenerationTesterMixin:
|
||||
outputs_from_embeds_wo_ids.tolist(),
|
||||
)
|
||||
|
||||
@pytest.mark.generate
|
||||
def test_generate_continue_from_past_key_values(self):
|
||||
# Tests that we can continue generating from past key values, returned from a previous `generate` call
|
||||
for model_class in self.all_generative_model_classes:
|
||||
@@ -1638,6 +1670,7 @@ class GenerationTesterMixin:
|
||||
)
|
||||
|
||||
@parameterized.expand([(1, False), (1, True), (4, False)])
|
||||
@pytest.mark.generate
|
||||
def test_new_cache_format(self, num_beams, do_sample):
|
||||
# Tests that generating with the new format is exactly the same as the legacy one (for models that support it).
|
||||
# 👉 tests with and without beam search so that we can test with and without cache reordering.
|
||||
@@ -1702,6 +1735,7 @@ class GenerationTesterMixin:
|
||||
)
|
||||
)
|
||||
|
||||
@pytest.mark.generate
|
||||
def test_generate_with_static_cache(self):
|
||||
"""
|
||||
Tests if StaticCache works if we set attn_implementation=static when generation.
|
||||
@@ -1750,6 +1784,7 @@ class GenerationTesterMixin:
|
||||
self.assertTrue(results.past_key_values.key_cache[0].shape == cache_shape)
|
||||
|
||||
@require_quanto
|
||||
@pytest.mark.generate
|
||||
def test_generate_with_quant_cache(self):
|
||||
for model_class in self.all_generative_model_classes:
|
||||
if not model_class._supports_quantized_cache:
|
||||
@@ -1782,6 +1817,7 @@ class GenerationTesterMixin:
|
||||
with self.assertRaises(ValueError):
|
||||
model.generate(input_ids, attention_mask=attention_mask, **generation_kwargs)
|
||||
|
||||
@pytest.mark.generate
|
||||
@require_torch_gpu
|
||||
@slow
|
||||
@is_flaky() # compilation may result in equivalent (!= same) FP ops, causing the argmax in `generate` to be flaky
|
||||
@@ -2134,6 +2170,7 @@ class UtilsFunctionsTest(unittest.TestCase):
|
||||
self.assertTrue(validated_tokens.tolist()[0] == [1, 4, 8])
|
||||
|
||||
|
||||
@pytest.mark.generate
|
||||
@require_torch
|
||||
class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMixin):
|
||||
# setting framework_dependent_parameters needs to be gated, just like its contents' imports
|
||||
|
||||
Reference in New Issue
Block a user