Fix slow tests for important models to be compatible with A10 runners (#29905)

* fix mistral and mixtral

* add pdb

* fix mixtral tesst

* fix

* fix mistral ?

* add fix gemma

* fix mistral

* fix

* test

* anoter test

* fix

* fix

* fix mistral tests

* fix them again

* final fixes for mistral

* fix padding right

* fix whipser fa2

* fix

* fix

* fix gemma

* test

* fix llama

* fix

* fix

* fix llama gemma

* add class attribute

* fix CI

* clarify whisper

* compute_capability

* rename names in some comments

* Add   # fmt: skip

* make style

* Update tests/models/mistral/test_modeling_mistral.py

Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com>

* update

* update

---------

Co-authored-by: Younes Belkada <younesbelkada@gmail.com>
Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com>
This commit is contained in:
Yih-Dar
2024-04-09 13:28:54 +02:00
committed by GitHub
parent e9c23fa056
commit 08a194fcd6
6 changed files with 246 additions and 110 deletions

View File

@@ -507,6 +507,16 @@ class MixtralModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi
@require_torch
class MixtralIntegrationTest(unittest.TestCase):
# This variable is used to determine which CUDA device are we using for our runners (A10 or T4)
# Depending on the hardware we get different logits / generations
cuda_compute_capability_major_version = None
@classmethod
def setUpClass(cls):
if is_torch_available() and torch.cuda.is_available():
# 8 is for A100 / A10 and 7 for T4
cls.cuda_compute_capability_major_version = torch.cuda.get_device_capability()[0]
@slow
@require_torch_gpu
def test_small_model_logits(self):
@@ -518,18 +528,26 @@ class MixtralIntegrationTest(unittest.TestCase):
)
# TODO: might need to tweak it in case the logits do not match on our daily runners
# these logits have been obtained with the original megablocks impelmentation.
EXPECTED_LOGITS = torch.Tensor(
[[0.1670, 0.1620, 0.6094], [-0.8906, -0.1588, -0.6060], [0.1572, 0.1290, 0.7246]]
).to(torch_device)
EXPECTED_LOGITS = {
7: torch.Tensor([[0.1670, 0.1620, 0.6094], [-0.8906, -0.1588, -0.6060], [0.1572, 0.1290, 0.7246]]).to(
torch_device
),
8: torch.Tensor([[0.1631, 0.1621, 0.6094], [-0.8906, -0.1621, -0.6094], [0.1572, 0.1270, 0.7227]]).to(
torch_device
),
}
with torch.no_grad():
logits = model(dummy_input).logits
torch.testing.assert_close(logits[0, :3, :3].half(), EXPECTED_LOGITS, atol=1e-3, rtol=1e-3)
torch.testing.assert_close(logits[1, :3, :3].half(), EXPECTED_LOGITS, atol=1e-3, rtol=1e-3)
torch.testing.assert_close(
logits[0, :3, :3], EXPECTED_LOGITS[self.cuda_compute_capability_major_version], atol=1e-3, rtol=1e-3
)
torch.testing.assert_close(
logits[1, :3, :3], EXPECTED_LOGITS[self.cuda_compute_capability_major_version], atol=1e-3, rtol=1e-3
)
@slow
# @require_torch_gpu
@require_torch_gpu
def test_small_model_logits_batched(self):
model_id = "hf-internal-testing/Mixtral-tiny"
dummy_input = torch.LongTensor([[0, 0, 0, 0, 0, 0, 1, 2, 3], [1, 1, 2, 3, 4, 5, 6, 7, 8]]).to(torch_device)
@@ -540,23 +558,48 @@ class MixtralIntegrationTest(unittest.TestCase):
)
# TODO: might need to tweak it in case the logits do not match on our daily runners
EXPECTED_LOGITS_LEFT = torch.Tensor(
[[0.1750, 0.0537, 0.7007], [0.1750, 0.0537, 0.7007], [0.1750, 0.0537, 0.7007]],
)
EXPECTED_LOGITS_LEFT = {
7: torch.Tensor(
[[0.1750, 0.0537, 0.7007], [0.1750, 0.0537, 0.7007], [0.1750, 0.0537, 0.7007]],
).to(torch_device),
8: torch.Tensor([[0.1914, 0.0508, 0.7188], [0.1953, 0.0510, 0.7227], [0.1973, 0.0562, 0.7148]]).to(
torch_device
),
}
# logits[0, -3:, -3:].half()
EXPECTED_LOGITS_LEFT_UNPADDED = torch.Tensor(
[[0.2212, 0.5200, -0.3816], [0.8213, -0.2313, 0.6069], [0.2664, -0.7090, 0.2468]],
)
EXPECTED_LOGITS_LEFT_UNPADDED = {
7: torch.Tensor(
[[0.2212, 0.5200, -0.3816], [0.8213, -0.2313, 0.6069], [0.2664, -0.7090, 0.2468]],
).to(torch_device),
8: torch.Tensor([[0.2217, 0.5195, -0.3828], [0.8203, -0.2295, 0.6055], [0.2676, -0.7109, 0.2461]]).to(
torch_device
),
}
# logits[1, -3:, -3:].half()
EXPECTED_LOGITS_RIGHT_UNPADDED = torch.Tensor(
[[0.2205, 0.1232, -0.1611], [-0.3484, 0.3030, -1.0312], [0.0742, 0.7930, 0.7969]]
)
EXPECTED_LOGITS_RIGHT_UNPADDED = {
7: torch.Tensor([[0.2205, 0.1232, -0.1611], [-0.3484, 0.3030, -1.0312], [0.0742, 0.7930, 0.7969]]).to(
torch_device
),
8: torch.Tensor([[0.2178, 0.1260, -0.1621], [-0.3496, 0.2988, -1.0312], [0.0693, 0.7930, 0.8008]]).to(
torch_device
),
}
with torch.no_grad():
logits = model(dummy_input, attention_mask=attention_mask).logits
torch.testing.assert_close(logits[0, :3, :3].half(), EXPECTED_LOGITS_LEFT, atol=1e-3, rtol=1e-3)
torch.testing.assert_close(logits[0, -3:, -3:].half(), EXPECTED_LOGITS_LEFT_UNPADDED, atol=1e-3, rtol=1e-3)
torch.testing.assert_close(logits[1, -3:, -3:].half(), EXPECTED_LOGITS_RIGHT_UNPADDED, atol=1e-3, rtol=1e-3)
torch.testing.assert_close(
logits[0, :3, :3], EXPECTED_LOGITS_LEFT[self.cuda_compute_capability_major_version], atol=1e-3, rtol=1e-3
)
torch.testing.assert_close(
logits[0, -3:, -3:],
EXPECTED_LOGITS_LEFT_UNPADDED[self.cuda_compute_capability_major_version],
atol=1e-3,
rtol=1e-3,
)
torch.testing.assert_close(
logits[1, -3:, -3:],
EXPECTED_LOGITS_RIGHT_UNPADDED[self.cuda_compute_capability_major_version],
atol=1e-3,
rtol=1e-3,
)