Make test_generate_with_static_cache even less flaky (#34995)

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

---------

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
Yih-Dar
2024-12-20 16:03:26 +01:00
committed by GitHub
parent 0fc2970363
commit 504c4d3692
5 changed files with 93 additions and 32 deletions

View File

@@ -14,6 +14,7 @@
import collections import collections
import contextlib import contextlib
import copy
import doctest import doctest
import functools import functools
import gc import gc
@@ -1396,6 +1397,53 @@ def assert_screenout(out, what):
assert match_str != -1, f"expecting to find {what} in output: f{out_pr}" assert match_str != -1, f"expecting to find {what} in output: f{out_pr}"
def set_model_tester_for_less_flaky_test(test_case):
if hasattr(test_case.model_tester, "num_hidden_layers"):
test_case.model_tester.num_hidden_layers = 1
if (
hasattr(test_case.model_tester, "vision_config")
and "num_hidden_layers" in test_case.model_tester.vision_config
):
test_case.model_tester.vision_config = copy.deepcopy(test_case.model_tester.vision_config)
test_case.model_tester.vision_config["num_hidden_layers"] = 1
if hasattr(test_case.model_tester, "text_config") and "num_hidden_layers" in test_case.model_tester.text_config:
test_case.model_tester.text_config = copy.deepcopy(test_case.model_tester.text_config)
test_case.model_tester.text_config["num_hidden_layers"] = 1
def set_config_for_less_flaky_test(config):
target_attrs = [
"rms_norm_eps",
"layer_norm_eps",
"norm_eps",
"norm_epsilon",
"layer_norm_epsilon",
"batch_norm_eps",
]
for target_attr in target_attrs:
setattr(config, target_attr, 1.0)
# norm layers (layer/group norm, etc.) could cause flaky tests when the tensors have very small variance.
# (We don't need the original epsilon values to check eager/sdpa matches)
attrs = ["text_config", "vision_config", "text_encoder", "audio_encoder", "decoder"]
for attr in attrs:
if hasattr(config, attr):
for target_attr in target_attrs:
setattr(getattr(config, attr), target_attr, 1.0)
def set_model_for_less_flaky_test(model):
# Another way to make sure norm layers have desired epsilon. (Some models don't set it from its config.)
target_names = ("LayerNorm", "GroupNorm", "BatchNorm", "RMSNorm", "BatchNorm2d", "BatchNorm1d")
target_attrs = ["eps", "epsilon", "variance_epsilon"]
if is_torch_available() and isinstance(model, torch.nn.Module):
for module in model.modules():
if type(module).__name__.endswith(target_names):
for attr in target_attrs:
if hasattr(module, attr):
setattr(module, attr, 1.0)
class CaptureStd: class CaptureStd:
""" """
Context manager to capture: Context manager to capture:

View File

@@ -37,6 +37,9 @@ from transformers.testing_utils import (
require_torch_multi_accelerator, require_torch_multi_accelerator,
require_torch_multi_gpu, require_torch_multi_gpu,
require_torch_sdpa, require_torch_sdpa,
set_config_for_less_flaky_test,
set_model_for_less_flaky_test,
set_model_tester_for_less_flaky_test,
slow, slow,
torch_device, torch_device,
) )
@@ -1921,11 +1924,13 @@ class GenerationTesterMixin:
Tests that generating with static cache give almost same results as with dynamic cache, and the output cache Tests that generating with static cache give almost same results as with dynamic cache, and the output cache
has the expected shapes has the expected shapes
""" """
set_model_tester_for_less_flaky_test(self)
for model_class in self.all_generative_model_classes: for model_class in self.all_generative_model_classes:
if not model_class._supports_static_cache: if not model_class._supports_static_cache:
self.skipTest(reason="This model does not support the static cache format") self.skipTest(reason="This model does not support the static cache format")
config, inputs_dict = self.prepare_config_and_inputs_for_generate() config, inputs_dict = self.prepare_config_and_inputs_for_generate()
set_config_for_less_flaky_test(config)
main_input = inputs_dict[model_class.main_input_name] main_input = inputs_dict[model_class.main_input_name]
if config.is_encoder_decoder: if config.is_encoder_decoder:
@@ -1938,6 +1943,8 @@ class GenerationTesterMixin:
for dtype in (torch.float32, torch.float16): for dtype in (torch.float32, torch.float16):
model = model_class(config).to(torch_device).to(dtype).eval() model = model_class(config).to(torch_device).to(dtype).eval()
set_model_for_less_flaky_test(model)
generation_kwargs = { generation_kwargs = {
"max_new_tokens": max_new_tokens, "max_new_tokens": max_new_tokens,
"return_dict_in_generate": True, # Required to return `past_key_values` "return_dict_in_generate": True, # Required to return `past_key_values`

View File

@@ -41,6 +41,9 @@ from transformers.testing_utils import (
require_torch_gpu, require_torch_gpu,
require_torch_sdpa, require_torch_sdpa,
require_torchaudio, require_torchaudio,
set_config_for_less_flaky_test,
set_model_for_less_flaky_test,
set_model_tester_for_less_flaky_test,
slow, slow,
torch_device, torch_device,
) )
@@ -516,8 +519,11 @@ class MusicgenMelodyDecoderTest(ModelTesterMixin, GenerationTesterMixin, unittes
def get_mean_reldiff(failcase, x, ref, atol, rtol): def get_mean_reldiff(failcase, x, ref, atol, rtol):
return f"{failcase}: mean relative difference: {((x - ref).abs() / (ref.abs() + 1e-12)).mean():.3e}, torch atol = {atol}, torch rtol = {rtol}" return f"{failcase}: mean relative difference: {((x - ref).abs() / (ref.abs() + 1e-12)).mean():.3e}, torch atol = {atol}, torch rtol = {rtol}"
set_model_tester_for_less_flaky_test(self)
for model_class in self.all_model_classes: for model_class in self.all_model_classes:
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
set_config_for_less_flaky_test(config)
model = model_class(config) model = model_class(config)
is_encoder_decoder = model.config.is_encoder_decoder is_encoder_decoder = model.config.is_encoder_decoder
@@ -534,6 +540,9 @@ class MusicgenMelodyDecoderTest(ModelTesterMixin, GenerationTesterMixin, unittes
) )
model_eager = model_eager.eval().to(torch_device) model_eager = model_eager.eval().to(torch_device)
set_model_for_less_flaky_test(model_eager)
set_model_for_less_flaky_test(model_sdpa)
# We use these for loops instead of parameterized.expand just for the interest of avoiding loading/saving 8 times the model, # We use these for loops instead of parameterized.expand just for the interest of avoiding loading/saving 8 times the model,
# but it would be nicer to have an efficient way to use parameterized.expand # but it would be nicer to have an efficient way to use parameterized.expand
fail_cases = [] fail_cases = []
@@ -1528,8 +1537,11 @@ class MusicgenMelodyTest(ModelTesterMixin, GenerationTesterMixin, PipelineTester
def get_mean_reldiff(failcase, x, ref, atol, rtol): def get_mean_reldiff(failcase, x, ref, atol, rtol):
return f"{failcase}: mean relative difference: {((x - ref).abs() / (ref.abs() + 1e-12)).mean():.3e}, torch atol = {atol}, torch rtol = {rtol}" return f"{failcase}: mean relative difference: {((x - ref).abs() / (ref.abs() + 1e-12)).mean():.3e}, torch atol = {atol}, torch rtol = {rtol}"
set_model_tester_for_less_flaky_test(self)
for model_class in self.all_model_classes: for model_class in self.all_model_classes:
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
set_config_for_less_flaky_test(config)
model = model_class(config) model = model_class(config)
is_encoder_decoder = model.config.is_encoder_decoder is_encoder_decoder = model.config.is_encoder_decoder
@@ -1546,6 +1558,9 @@ class MusicgenMelodyTest(ModelTesterMixin, GenerationTesterMixin, PipelineTester
) )
model_eager = model_eager.eval().to(torch_device) model_eager = model_eager.eval().to(torch_device)
set_model_for_less_flaky_test(model_eager)
set_model_for_less_flaky_test(model_sdpa)
# We use these for loops instead of parameterized.expand just for the interest of avoiding loading/saving 8 times the model, # We use these for loops instead of parameterized.expand just for the interest of avoiding loading/saving 8 times the model,
# but it would be nicer to have an efficient way to use parameterized.expand # but it would be nicer to have an efficient way to use parameterized.expand
fail_cases = [] fail_cases = []

View File

@@ -840,7 +840,13 @@ class SeamlessM4Tv2GenerationTest(unittest.TestCase):
def test_speech_generation(self): def test_speech_generation(self):
config, input_speech, input_text = self.prepare_speech_and_text_input() config, input_speech, input_text = self.prepare_speech_and_text_input()
from transformers.testing_utils import set_config_for_less_flaky_test, set_model_for_less_flaky_test
set_config_for_less_flaky_test(config)
model = SeamlessM4Tv2Model(config=config) model = SeamlessM4Tv2Model(config=config)
set_model_for_less_flaky_test(model)
self.update_generation(model) self.update_generation(model)
model.save_pretrained(self.tmpdirname) model.save_pretrained(self.tmpdirname)
model.to(torch_device) model.to(torch_device)
@@ -852,6 +858,11 @@ class SeamlessM4Tv2GenerationTest(unittest.TestCase):
state_dict = model.state_dict() state_dict = model.state_dict()
text_model = SeamlessM4Tv2ForTextToSpeech.from_pretrained(self.tmpdirname) text_model = SeamlessM4Tv2ForTextToSpeech.from_pretrained(self.tmpdirname)
# Even if this component is loaded after `model.save_pretrained` which is after
# `set_model_for_less_flaky_test(model)`, we still need to apply `set_model_for_less_flaky_test` here as the
# `eps` attribute in the model's norm layers is not set from the config.
set_model_for_less_flaky_test(text_model)
self.update_generation(text_model) self.update_generation(text_model)
text_model.to(torch_device) text_model.to(torch_device)
text_model.eval() text_model.eval()
@@ -859,6 +870,11 @@ class SeamlessM4Tv2GenerationTest(unittest.TestCase):
output_text = self.factory_generation_speech_test(model, input_text) output_text = self.factory_generation_speech_test(model, input_text)
speech_model = SeamlessM4Tv2ForSpeechToSpeech.from_pretrained(self.tmpdirname) speech_model = SeamlessM4Tv2ForSpeechToSpeech.from_pretrained(self.tmpdirname)
# Even if this component is loaded after `model.save_pretrained` which is after
# `set_model_for_less_flaky_test(model)`, we still need to apply `set_model_for_less_flaky_test` here as the
# `eps` attribute in the model's norm layers is not set from the config.
set_model_for_less_flaky_test(speech_model)
self.update_generation(speech_model) self.update_generation(speech_model)
speech_model.to(torch_device) speech_model.to(torch_device)
speech_model.eval() speech_model.eval()

View File

@@ -89,6 +89,9 @@ from transformers.testing_utils import (
require_torch_multi_accelerator, require_torch_multi_accelerator,
require_torch_multi_gpu, require_torch_multi_gpu,
require_torch_sdpa, require_torch_sdpa,
set_config_for_less_flaky_test,
set_model_for_less_flaky_test,
set_model_tester_for_less_flaky_test,
slow, slow,
torch_device, torch_device,
) )
@@ -3976,34 +3979,11 @@ class ModelTesterMixin:
def get_mean_reldiff(failcase, x, ref, atol, rtol): def get_mean_reldiff(failcase, x, ref, atol, rtol):
return f"{failcase}: mean relative difference: {((x - ref).abs() / (ref.abs() + 1e-12)).mean():.3e}, torch atol = {atol}, torch rtol = {rtol}" return f"{failcase}: mean relative difference: {((x - ref).abs() / (ref.abs() + 1e-12)).mean():.3e}, torch atol = {atol}, torch rtol = {rtol}"
if hasattr(self.model_tester, "num_hidden_layers"): set_model_tester_for_less_flaky_test(self)
self.model_tester.num_hidden_layers = 1
if hasattr(self.model_tester, "vision_config") and "num_hidden_layers" in self.model_tester.vision_config:
self.model_tester.vision_config = copy.deepcopy(self.model_tester.vision_config)
self.model_tester.vision_config["num_hidden_layers"] = 1
if hasattr(self.model_tester, "text_config") and "num_hidden_layers" in self.model_tester.text_config:
self.model_tester.text_config = copy.deepcopy(self.model_tester.text_config)
self.model_tester.text_config["num_hidden_layers"] = 1
for model_class in self.all_model_classes: for model_class in self.all_model_classes:
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
set_config_for_less_flaky_test(config)
config.rms_norm_eps = 1.0
config.layer_norm_eps = 1.0
config.norm_eps = 1.0
config.norm_epsilon = 1.0
config.layer_norm_epsilon = 1.0
# norm layers (layer/group norm, etc.) could cause flaky tests when the tensors have very small variance.
# (We don't need the original epsilon values to check eager/sdpa matches)
for attr in ["text_config", "vision_config", "text_encoder", "audio_encoder", "decoder"]:
if hasattr(config, attr):
getattr(config, attr).rms_norm_eps = 1.0
getattr(config, attr).layer_norm_eps = 1.0
getattr(config, attr).norm_eps = 1.0
getattr(config, attr).norm_epsilon = 1.0
getattr(config, attr).layer_norm_epsilon = 1.0
model = model_class(config) model = model_class(config)
# FIXME: we deactivate boolean mask for models using "use_mask_token" in their constructors. # FIXME: we deactivate boolean mask for models using "use_mask_token" in their constructors.
# These models support masking only in the case `use_mask_token=True`. Otherwise they cannot consume an input mask. # These models support masking only in the case `use_mask_token=True`. Otherwise they cannot consume an input mask.
@@ -4029,13 +4009,8 @@ class ModelTesterMixin:
) )
model_eager = model_eager.eval().to(torch_device, dtype=torch_dtype) model_eager = model_eager.eval().to(torch_device, dtype=torch_dtype)
# Another way to make sure norm layers have desired epsilon. (Some models don't set it from its config.) set_model_for_less_flaky_test(model_eager)
for x in model_eager.modules(): set_model_for_less_flaky_test(model_sdpa)
if isinstance(x, (nn.LayerNorm, nn.GroupNorm)):
x.eps = 1.0
for x in model_sdpa.modules():
if isinstance(x, (nn.LayerNorm, nn.GroupNorm)):
x.eps = 1.0
# We use these for loops instead of parameterized.expand just for the interest of avoiding loading/saving 16 times the model, # We use these for loops instead of parameterized.expand just for the interest of avoiding loading/saving 16 times the model,
# but it would be nicer to have an efficient way to use parameterized.expand # but it would be nicer to have an efficient way to use parameterized.expand