[Styling] stylify using ruff (#27144)
* try to stylify using ruff * might need to remove these changes? * use ruf format andruff check * use isinstance instead of type comparision * use # fmt: skip * use # fmt: skip * nits * soem styling changes * update ci job * nits isinstance * more files update * nits * more nits * small nits * check and format * revert wrong changes * actually use formatter instead of checker * nits * well docbuilder is overwriting this commit * revert notebook changes * try to nuke docbuilder * style * fix feature exrtaction test * remve `indent-width = 4` * fixup * more nits * update the ruff version that we use * style * nuke docbuilder styling * leve the print for detected changes * nits * Remove file I/O Co-authored-by: charliermarsh <charlie.r.marsh@gmail.com> * style * nits * revert notebook changes * Add # fmt skip when possible * Add # fmt skip when possible * Fix * More ` # fmt: skip` usage * More ` # fmt: skip` usage * More ` # fmt: skip` usage * NIts * more fixes * fix tapas * Another way to skip * Recommended way * Fix two more fiels * Remove asynch Remove asynch --------- Co-authored-by: charliermarsh <charlie.r.marsh@gmail.com>
This commit is contained in:
@@ -1001,9 +1001,7 @@ class SeamlessM4TModelIntegrationTest(unittest.TestCase):
|
||||
def input_text(self):
|
||||
# corresponds to "C'est un test." with seamlessM4T_medium checkpoint
|
||||
|
||||
# fmt: off
|
||||
input_ids = torch.tensor([[256057, 152, 248116, 354, 159, 7356, 248075, 3]])
|
||||
# fmt: on
|
||||
input_ids = torch.tensor([[256057, 152, 248116, 354, 159, 7356, 248075, 3]]) # fmt: skip
|
||||
|
||||
input_ids = input_ids.to(torch_device)
|
||||
|
||||
@@ -1049,9 +1047,7 @@ class SeamlessM4TModelIntegrationTest(unittest.TestCase):
|
||||
|
||||
# test text - tgt lang: eng
|
||||
|
||||
# fmt: off
|
||||
expected_text_tokens = [3, 256047, 3291, 248116, 248066, 9, 7356, 248075, 3]
|
||||
# fmt: on
|
||||
expected_text_tokens = [3, 256047, 3291, 248116, 248066, 9, 7356, 248075, 3] # fmt: skip
|
||||
|
||||
# fmt: off
|
||||
expected_unit_tokens = [
|
||||
@@ -1062,9 +1058,7 @@ class SeamlessM4TModelIntegrationTest(unittest.TestCase):
|
||||
]
|
||||
# fmt: on
|
||||
|
||||
# fmt: off
|
||||
expected_wav_slice = [-3e-05, -0.0004, -0.00037, -0.00013, -6e-05, 0.00012, -0.00016, 0.00025, 7e-05, -3e-05]
|
||||
# fmt: on
|
||||
expected_wav_slice = [-3e-05, -0.0004, -0.00037, -0.00013, -6e-05, 0.00012, -0.00016, 0.00025, 7e-05, -3e-05] # fmt: skip
|
||||
|
||||
set_seed(0)
|
||||
output = model.generate(**self.input_text, num_beams=1, tgt_lang="eng", return_intermediate_token_ids=True)
|
||||
@@ -1081,9 +1075,7 @@ class SeamlessM4TModelIntegrationTest(unittest.TestCase):
|
||||
|
||||
# test text - tgt lang: swh
|
||||
|
||||
# fmt: off
|
||||
expected_text_tokens = [3, 256168, 1665, 188589, 7040, 248075, 3]
|
||||
# fmt: on
|
||||
expected_text_tokens = [3, 256168, 1665, 188589, 7040, 248075, 3] # fmt: skip
|
||||
|
||||
# fmt: off
|
||||
expected_unit_tokens = [
|
||||
@@ -1093,9 +1085,7 @@ class SeamlessM4TModelIntegrationTest(unittest.TestCase):
|
||||
]
|
||||
# fmt: on
|
||||
|
||||
# fmt: off
|
||||
expected_wav_slice = [1e-05, -7e-05, -4e-05, -4e-05, -6e-05, -9e-05, -0.0001, -2e-05, -7e-05, -2e-05]
|
||||
# fmt: on
|
||||
expected_wav_slice = [1e-05, -7e-05, -4e-05, -4e-05, -6e-05, -9e-05, -0.0001, -2e-05, -7e-05, -2e-05] # fmt: skip
|
||||
|
||||
set_seed(0)
|
||||
output = model.generate(**self.input_text, num_beams=1, tgt_lang="swh", return_intermediate_token_ids=True)
|
||||
@@ -1111,9 +1101,7 @@ class SeamlessM4TModelIntegrationTest(unittest.TestCase):
|
||||
|
||||
# test audio - tgt lang: rus
|
||||
|
||||
# fmt: off
|
||||
expected_text_tokens = [3, 256147, 1197, 73565, 3413, 537, 233331, 248075, 3]
|
||||
# fmt: on
|
||||
expected_text_tokens = [3, 256147, 1197, 73565, 3413, 537, 233331, 248075, 3] # fmt: skip
|
||||
|
||||
# fmt: off
|
||||
expected_unit_tokens = [
|
||||
@@ -1124,9 +1112,7 @@ class SeamlessM4TModelIntegrationTest(unittest.TestCase):
|
||||
]
|
||||
# fmt: on
|
||||
|
||||
# fmt: off
|
||||
expected_wav_slice = [0.00013, 0.00012, 0.00014, 3e-05, 0.0, -6e-05, -0.00018, -0.00016, -0.00021, -0.00018]
|
||||
# fmt: on
|
||||
expected_wav_slice = [0.00013, 0.00012, 0.00014, 3e-05, 0.0, -6e-05, -0.00018, -0.00016, -0.00021, -0.00018] # fmt: skip
|
||||
|
||||
set_seed(0)
|
||||
output = model.generate(**self.input_audio, num_beams=1, tgt_lang="rus", return_intermediate_token_ids=True)
|
||||
|
||||
@@ -449,9 +449,7 @@ class SeamlessM4TDistilledIntegrationTest(unittest.TestCase):
|
||||
" face decât să înrăutăţească violenţele şi mizeria pentru milioane de oameni.",
|
||||
]
|
||||
|
||||
# fmt: off
|
||||
expected_src_tokens = [256047, 16297, 134408, 8165, 248066, 14734, 950, 1135, 105721, 3573, 83, 27352, 108, 49486, 3]
|
||||
# fmt: on
|
||||
expected_src_tokens = [256047, 16297, 134408, 8165, 248066, 14734, 950, 1135, 105721, 3573, 83, 27352, 108, 49486, 3] # fmt: skip
|
||||
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
@@ -483,9 +481,7 @@ class SeamlessM4TDistilledIntegrationTest(unittest.TestCase):
|
||||
# Copied from tests.models.nllb.test_tokenization_nllb.NllbDistilledIntegrationTest.test_enro_tokenizer_decode_ignores_language_codes
|
||||
def test_enro_tokenizer_decode_ignores_language_codes(self):
|
||||
self.assertIn(RO_CODE, self.tokenizer.all_special_ids)
|
||||
# fmt: off
|
||||
generated_ids = [RO_CODE, 4254, 98068, 112923, 39072, 3909, 713, 102767, 26, 17314, 35642, 14683, 33118, 2022, 66987, 2, 256047]
|
||||
# fmt: on
|
||||
generated_ids = [RO_CODE, 4254, 98068, 112923, 39072, 3909, 713, 102767, 26, 17314, 35642, 14683, 33118, 2022, 66987, 2, 256047] # fmt: skip
|
||||
|
||||
result = self.tokenizer.decode(generated_ids, skip_special_tokens=True)
|
||||
expected_romanian = self.tokenizer.decode(generated_ids[1:], skip_special_tokens=True)
|
||||
|
||||
Reference in New Issue
Block a user