use torch.testing.assertclose instead to get more details about error in cis (#35659)
* use torch.testing.assertclose instead to get more details about error in cis * fix * style * test_all * revert for I bert * fixes and updates * more image processing fixes * more image processors * fix mamba and co * style * less strick * ok I won't be strict * skip and be done * up
This commit is contained in:
@@ -1771,7 +1771,7 @@ class ModelTesterMixin:
|
||||
model.eval()
|
||||
|
||||
hidden_states_with_chunk = model(**self._prepare_for_class(inputs_dict, model_class))[0]
|
||||
self.assertTrue(torch.allclose(hidden_states_no_chunk, hidden_states_with_chunk, atol=1e-3))
|
||||
torch.testing.assert_close(hidden_states_no_chunk, hidden_states_with_chunk, rtol=1e-3, atol=1e-3)
|
||||
|
||||
def test_resize_position_vector_embeddings(self):
|
||||
if not self.test_resize_position_embeddings:
|
||||
@@ -1898,7 +1898,7 @@ class ModelTesterMixin:
|
||||
else:
|
||||
old_embeddings_mean = torch.mean(model_embed.weight.data[:-10, :], axis=0)
|
||||
new_embeddings_mean = torch.mean(model_embed.weight.data[-10:, :], axis=0)
|
||||
torch.testing.assert_close(old_embeddings_mean, new_embeddings_mean, atol=1e-3, rtol=1e-1)
|
||||
torch.testing.assert_close(old_embeddings_mean, new_embeddings_mean, rtol=1e-3, atol=1e-3)
|
||||
|
||||
# Check that the model can still do a forward pass successfully (every parameter should be resized)
|
||||
if not is_deepspeed_zero3_enabled():
|
||||
@@ -2006,7 +2006,7 @@ class ModelTesterMixin:
|
||||
else:
|
||||
old_embeddings_mean = torch.mean(model_embed.weight.data[:-10, :], axis=0)
|
||||
new_embeddings_mean = torch.mean(model_embed.weight.data[-10:, :], axis=0)
|
||||
torch.testing.assert_close(old_embeddings_mean, new_embeddings_mean, atol=1e-3, rtol=1e-1)
|
||||
torch.testing.assert_close(old_embeddings_mean, new_embeddings_mean, rtol=1e-3, atol=1e-3)
|
||||
|
||||
@require_deepspeed
|
||||
@require_torch_accelerator
|
||||
@@ -2081,7 +2081,7 @@ class ModelTesterMixin:
|
||||
else:
|
||||
old_embeddings_mean = torch.mean(output_embeds.weight.data[:-10, :], axis=0)
|
||||
new_embeddings_mean = torch.mean(output_embeds.weight.data[-10:, :], axis=0)
|
||||
torch.testing.assert_close(old_embeddings_mean, new_embeddings_mean, atol=1e-3, rtol=1e-1)
|
||||
torch.testing.assert_close(old_embeddings_mean, new_embeddings_mean, rtol=1e-3, atol=1e-3)
|
||||
# check if the old bias mean close to added bias mean.
|
||||
if output_embeds.bias is not None:
|
||||
if is_deepspeed_zero3_enabled():
|
||||
@@ -2092,7 +2092,7 @@ class ModelTesterMixin:
|
||||
old_bias_mean = torch.mean(output_embeds.bias.data[:-10], axis=0)
|
||||
new_bias_mean = torch.mean(output_embeds.bias.data[-10:], axis=0)
|
||||
|
||||
torch.testing.assert_close(old_bias_mean, new_bias_mean, atol=1e-5, rtol=1e-2)
|
||||
torch.testing.assert_close(old_bias_mean, new_bias_mean, rtol=1e-5, atol=1e-5)
|
||||
|
||||
# Check that resizing the token embeddings with a smaller vocab size decreases the model's vocab size
|
||||
model.resize_token_embeddings(model_vocab_size - 15)
|
||||
@@ -3049,7 +3049,7 @@ class ModelTesterMixin:
|
||||
out_embeds = model(
|
||||
inputs_embeds=inputs_embeds, decoder_inputs_embeds=decoder_inputs_embeds, **inputs
|
||||
)[0]
|
||||
self.assertTrue(torch.allclose(out_embeds, out_ids))
|
||||
torch.testing.assert_close(out_embeds, out_ids)
|
||||
|
||||
@require_non_xpu
|
||||
@require_torch_multi_gpu
|
||||
@@ -3170,10 +3170,10 @@ class ModelTesterMixin:
|
||||
|
||||
for value, parallel_value in zip(output, parallel_output):
|
||||
if isinstance(value, torch.Tensor):
|
||||
self.assertTrue(torch.allclose(value, parallel_value.to("cpu"), atol=1e-7))
|
||||
torch.testing.assert_close(value, parallel_value.to("cpu"), rtol=1e-7, atol=1e-7)
|
||||
elif isinstance(value, (Tuple, List)):
|
||||
for value_, parallel_value_ in zip(value, parallel_value):
|
||||
self.assertTrue(torch.allclose(value_, parallel_value_.to("cpu"), atol=1e-7))
|
||||
torch.testing.assert_close(value_, parallel_value_.to("cpu"), rtol=1e-7, atol=1e-7)
|
||||
|
||||
def check_device_map_is_respected(self, model, device_map):
|
||||
for param_name, param in model.named_parameters():
|
||||
@@ -3229,9 +3229,12 @@ class ModelTesterMixin:
|
||||
new_output = new_model(**inputs_dict_class)
|
||||
|
||||
if isinstance(base_output[0], tuple) and isinstance(new_output[0], tuple):
|
||||
self.assertTrue(torch.allclose(a, b, atol=1e-5) for a, b in zip(base_output[0], new_output[0]))
|
||||
[
|
||||
torch.testing.assert_close(a, b, rtol=1e-5, atol=1e-5)
|
||||
for a, b in zip(base_output[0], new_output[0])
|
||||
]
|
||||
else:
|
||||
self.assertTrue(torch.allclose(base_output[0], new_output[0], atol=1e-5))
|
||||
torch.testing.assert_close(base_output[0], new_output[0], rtol=1e-5, atol=1e-5)
|
||||
|
||||
@require_accelerate
|
||||
@mark.accelerate_tests
|
||||
@@ -3264,9 +3267,12 @@ class ModelTesterMixin:
|
||||
new_output = new_model(**inputs_dict_class)
|
||||
|
||||
if isinstance(base_output[0], tuple) and isinstance(new_output[0], tuple):
|
||||
self.assertTrue(torch.allclose(a, b, atol=1e-5) for a, b in zip(base_output[0], new_output[0]))
|
||||
[
|
||||
torch.testing.assert_close(a, b, rtol=1e-5, atol=1e-5)
|
||||
for a, b in zip(base_output[0], new_output[0])
|
||||
]
|
||||
else:
|
||||
self.assertTrue(torch.allclose(base_output[0], new_output[0], atol=1e-5))
|
||||
torch.testing.assert_close(base_output[0], new_output[0], rtol=1e-5, atol=1e-5)
|
||||
|
||||
@require_accelerate
|
||||
@mark.accelerate_tests
|
||||
@@ -3303,9 +3309,12 @@ class ModelTesterMixin:
|
||||
new_output = new_model(**inputs_dict_class)
|
||||
|
||||
if isinstance(base_output[0], tuple) and isinstance(new_output[0], tuple):
|
||||
self.assertTrue(torch.allclose(a, b, atol=1e-5) for a, b in zip(base_output[0], new_output[0]))
|
||||
[
|
||||
torch.testing.assert_close(a, b, rtol=1e-5, atol=1e-5)
|
||||
for a, b in zip(base_output[0], new_output[0])
|
||||
]
|
||||
else:
|
||||
self.assertTrue(torch.allclose(base_output[0], new_output[0], atol=1e-5))
|
||||
torch.testing.assert_close(base_output[0], new_output[0], rtol=1e-5, atol=1e-5)
|
||||
|
||||
@require_accelerate
|
||||
@mark.accelerate_tests
|
||||
@@ -3341,9 +3350,12 @@ class ModelTesterMixin:
|
||||
new_output = new_model(**inputs_dict_class)
|
||||
|
||||
if isinstance(base_output[0], tuple) and isinstance(new_output[0], tuple):
|
||||
self.assertTrue(torch.allclose(a, b, atol=1e-5) for a, b in zip(base_output[0], new_output[0]))
|
||||
[
|
||||
torch.testing.assert_close(a, b, rtol=1e-5, atol=1e-5)
|
||||
for a, b in zip(base_output[0], new_output[0])
|
||||
]
|
||||
else:
|
||||
self.assertTrue(torch.allclose(base_output[0], new_output[0], atol=1e-5))
|
||||
torch.testing.assert_close(base_output[0], new_output[0], rtol=1e-5, atol=1e-5)
|
||||
|
||||
def test_problem_types(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
@@ -4555,10 +4567,10 @@ class ModelTesterMixin:
|
||||
logits_padded = res_padded.logits[inputs_dict["attention_mask"].bool()]
|
||||
logits_padfree = res_padfree.logits[0]
|
||||
|
||||
torch.testing.assert_close(logits_padded.argmax(-1), logits_padfree.argmax(-1), atol=0, rtol=0)
|
||||
torch.testing.assert_close(logits_padded.argmax(-1), logits_padfree.argmax(-1), rtol=0, atol=0)
|
||||
# acceptable numerical instability
|
||||
tol = torch.finfo(torch.float16).eps
|
||||
torch.testing.assert_close(logits_padded, logits_padfree, atol=tol, rtol=tol)
|
||||
torch.testing.assert_close(logits_padded, logits_padfree, rtol=tol, atol=tol)
|
||||
|
||||
@is_pt_tf_cross_test
|
||||
def test_tf_from_pt_safetensors(self):
|
||||
@@ -4780,7 +4792,7 @@ class ModelTesterMixin:
|
||||
self.assertEqual(tuple(last_token_logits.shape), (batch_size, 1, vocab_size))
|
||||
|
||||
# Assert the last tokens are actually the same (except for the natural fluctuation due to order of FP ops)
|
||||
self.assertTrue(torch.allclose(all_logits[:, -1:, :], last_token_logits, atol=1e-5))
|
||||
torch.testing.assert_close(all_logits[:, -1:, :], last_token_logits, rtol=1e-5, atol=1e-5)
|
||||
|
||||
@require_torch_gpu
|
||||
def test_flex_attention_with_grads(self):
|
||||
|
||||
Reference in New Issue
Block a user