[Styling] stylify using ruff (#27144)

* try to stylify using ruff

* might need to remove these changes?

* use ruf format andruff check

* use isinstance instead of type comparision

* use # fmt: skip

* use # fmt: skip

* nits

* soem styling changes

* update ci job

* nits isinstance

* more files update

* nits

* more nits

* small nits

* check and format

* revert wrong changes

* actually use formatter instead of checker

* nits

* well docbuilder is overwriting this commit

* revert notebook changes

* try to nuke docbuilder

* style

* fix feature exrtaction test

* remve `indent-width = 4`

* fixup

* more nits

* update the ruff version that we use

* style

* nuke docbuilder styling

* leve the print for detected changes

* nits

* Remove file I/O

Co-authored-by: charliermarsh
 <charlie.r.marsh@gmail.com>

* style

* nits

* revert notebook changes

* Add # fmt skip when possible

* Add # fmt skip when possible

* Fix

* More `  # fmt: skip` usage

* More `  # fmt: skip` usage

* More `  # fmt: skip` usage

* NIts

* more fixes

* fix tapas

* Another way to skip

* Recommended way

* Fix two more fiels

* Remove asynch
Remove asynch

---------

Co-authored-by: charliermarsh <charlie.r.marsh@gmail.com>
This commit is contained in:
Arthur
2023-11-16 17:43:19 +01:00
committed by GitHub
parent acb5b4aff5
commit 651408a077
480 changed files with 867 additions and 1059 deletions

View File

@@ -1001,9 +1001,7 @@ class SeamlessM4TModelIntegrationTest(unittest.TestCase):
def input_text(self):
# corresponds to "C'est un test." with seamlessM4T_medium checkpoint
# fmt: off
input_ids = torch.tensor([[256057, 152, 248116, 354, 159, 7356, 248075, 3]])
# fmt: on
input_ids = torch.tensor([[256057, 152, 248116, 354, 159, 7356, 248075, 3]]) # fmt: skip
input_ids = input_ids.to(torch_device)
@@ -1049,9 +1047,7 @@ class SeamlessM4TModelIntegrationTest(unittest.TestCase):
# test text - tgt lang: eng
# fmt: off
expected_text_tokens = [3, 256047, 3291, 248116, 248066, 9, 7356, 248075, 3]
# fmt: on
expected_text_tokens = [3, 256047, 3291, 248116, 248066, 9, 7356, 248075, 3] # fmt: skip
# fmt: off
expected_unit_tokens = [
@@ -1062,9 +1058,7 @@ class SeamlessM4TModelIntegrationTest(unittest.TestCase):
]
# fmt: on
# fmt: off
expected_wav_slice = [-3e-05, -0.0004, -0.00037, -0.00013, -6e-05, 0.00012, -0.00016, 0.00025, 7e-05, -3e-05]
# fmt: on
expected_wav_slice = [-3e-05, -0.0004, -0.00037, -0.00013, -6e-05, 0.00012, -0.00016, 0.00025, 7e-05, -3e-05] # fmt: skip
set_seed(0)
output = model.generate(**self.input_text, num_beams=1, tgt_lang="eng", return_intermediate_token_ids=True)
@@ -1081,9 +1075,7 @@ class SeamlessM4TModelIntegrationTest(unittest.TestCase):
# test text - tgt lang: swh
# fmt: off
expected_text_tokens = [3, 256168, 1665, 188589, 7040, 248075, 3]
# fmt: on
expected_text_tokens = [3, 256168, 1665, 188589, 7040, 248075, 3] # fmt: skip
# fmt: off
expected_unit_tokens = [
@@ -1093,9 +1085,7 @@ class SeamlessM4TModelIntegrationTest(unittest.TestCase):
]
# fmt: on
# fmt: off
expected_wav_slice = [1e-05, -7e-05, -4e-05, -4e-05, -6e-05, -9e-05, -0.0001, -2e-05, -7e-05, -2e-05]
# fmt: on
expected_wav_slice = [1e-05, -7e-05, -4e-05, -4e-05, -6e-05, -9e-05, -0.0001, -2e-05, -7e-05, -2e-05] # fmt: skip
set_seed(0)
output = model.generate(**self.input_text, num_beams=1, tgt_lang="swh", return_intermediate_token_ids=True)
@@ -1111,9 +1101,7 @@ class SeamlessM4TModelIntegrationTest(unittest.TestCase):
# test audio - tgt lang: rus
# fmt: off
expected_text_tokens = [3, 256147, 1197, 73565, 3413, 537, 233331, 248075, 3]
# fmt: on
expected_text_tokens = [3, 256147, 1197, 73565, 3413, 537, 233331, 248075, 3] # fmt: skip
# fmt: off
expected_unit_tokens = [
@@ -1124,9 +1112,7 @@ class SeamlessM4TModelIntegrationTest(unittest.TestCase):
]
# fmt: on
# fmt: off
expected_wav_slice = [0.00013, 0.00012, 0.00014, 3e-05, 0.0, -6e-05, -0.00018, -0.00016, -0.00021, -0.00018]
# fmt: on
expected_wav_slice = [0.00013, 0.00012, 0.00014, 3e-05, 0.0, -6e-05, -0.00018, -0.00016, -0.00021, -0.00018] # fmt: skip
set_seed(0)
output = model.generate(**self.input_audio, num_beams=1, tgt_lang="rus", return_intermediate_token_ids=True)

View File

@@ -449,9 +449,7 @@ class SeamlessM4TDistilledIntegrationTest(unittest.TestCase):
" face decât să înrăutăţească violenţele şi mizeria pentru milioane de oameni.",
]
# fmt: off
expected_src_tokens = [256047, 16297, 134408, 8165, 248066, 14734, 950, 1135, 105721, 3573, 83, 27352, 108, 49486, 3]
# fmt: on
expected_src_tokens = [256047, 16297, 134408, 8165, 248066, 14734, 950, 1135, 105721, 3573, 83, 27352, 108, 49486, 3] # fmt: skip
@classmethod
def setUpClass(cls):
@@ -483,9 +481,7 @@ class SeamlessM4TDistilledIntegrationTest(unittest.TestCase):
# Copied from tests.models.nllb.test_tokenization_nllb.NllbDistilledIntegrationTest.test_enro_tokenizer_decode_ignores_language_codes
def test_enro_tokenizer_decode_ignores_language_codes(self):
self.assertIn(RO_CODE, self.tokenizer.all_special_ids)
# fmt: off
generated_ids = [RO_CODE, 4254, 98068, 112923, 39072, 3909, 713, 102767, 26, 17314, 35642, 14683, 33118, 2022, 66987, 2, 256047]
# fmt: on
generated_ids = [RO_CODE, 4254, 98068, 112923, 39072, 3909, 713, 102767, 26, 17314, 35642, 14683, 33118, 2022, 66987, 2, 256047] # fmt: skip
result = self.tokenizer.decode(generated_ids, skip_special_tokens=True)
expected_romanian = self.tokenizer.decode(generated_ids[1:], skip_special_tokens=True)