[Styling] stylify using ruff (#27144)

* try to stylify using ruff * might need to remove these changes? * use ruf format andruff check * use isinstance instead of type comparision * use # fmt: skip * use # fmt: skip * nits * soem styling changes * update ci job * nits isinstance * more files update * nits * more nits * small nits * check and format * revert wrong changes * actually use formatter instead of checker * nits * well docbuilder is overwriting this commit * revert notebook changes * try to nuke docbuilder * style * fix feature exrtaction test * remve `indent-width = 4` * fixup * more nits * update the ruff version that we use * style * nuke docbuilder styling * leve the print for detected changes * nits * Remove file I/O Co-authored-by: charliermarsh <charlie.r.marsh@gmail.com> * style * nits * revert notebook changes * Add # fmt skip when possible * Add # fmt skip when possible * Fix * More ` # fmt: skip` usage * More ` # fmt: skip` usage * More ` # fmt: skip` usage * NIts * more fixes * fix tapas * Another way to skip * Recommended way * Fix two more fiels * Remove asynch Remove asynch --------- Co-authored-by: charliermarsh <charlie.r.marsh@gmail.com>
2023-11-16 17:43:19 +01:00
parent acb5b4aff5
commit 651408a077
480 changed files with 867 additions and 1059 deletions
--- a/tests/models/seamless_m4t/test_modeling_seamless_m4t.py
+++ b/tests/models/seamless_m4t/test_modeling_seamless_m4t.py
@@ -1001,9 +1001,7 @@ class SeamlessM4TModelIntegrationTest(unittest.TestCase):
    def input_text(self):
        # corresponds to "C'est un test." with seamlessM4T_medium checkpoint

-        # fmt: off
-        input_ids = torch.tensor([[256057, 152, 248116, 354, 159, 7356, 248075, 3]])
-        # fmt: on
+        input_ids = torch.tensor([[256057, 152, 248116, 354, 159, 7356, 248075, 3]])  # fmt: skip

        input_ids = input_ids.to(torch_device)

@@ -1049,9 +1047,7 @@ class SeamlessM4TModelIntegrationTest(unittest.TestCase):

        # test text - tgt lang: eng

-        # fmt: off
-        expected_text_tokens = [3, 256047, 3291, 248116, 248066, 9, 7356, 248075, 3]
-        # fmt: on
+        expected_text_tokens = [3, 256047, 3291, 248116, 248066, 9, 7356, 248075, 3]  # fmt: skip

        # fmt: off
        expected_unit_tokens = [
@@ -1062,9 +1058,7 @@ class SeamlessM4TModelIntegrationTest(unittest.TestCase):
        ]
        # fmt: on

-        # fmt: off
-        expected_wav_slice = [-3e-05, -0.0004, -0.00037, -0.00013, -6e-05, 0.00012, -0.00016, 0.00025, 7e-05, -3e-05]
-        # fmt: on
+        expected_wav_slice = [-3e-05, -0.0004, -0.00037, -0.00013, -6e-05, 0.00012, -0.00016, 0.00025, 7e-05, -3e-05]  # fmt: skip

        set_seed(0)
        output = model.generate(**self.input_text, num_beams=1, tgt_lang="eng", return_intermediate_token_ids=True)
@@ -1081,9 +1075,7 @@ class SeamlessM4TModelIntegrationTest(unittest.TestCase):

        # test text - tgt lang: swh

-        # fmt: off
-        expected_text_tokens = [3, 256168, 1665, 188589, 7040, 248075, 3]
-        # fmt: on
+        expected_text_tokens = [3, 256168, 1665, 188589, 7040, 248075, 3]  # fmt: skip

        # fmt: off
        expected_unit_tokens = [
@@ -1093,9 +1085,7 @@ class SeamlessM4TModelIntegrationTest(unittest.TestCase):
        ]
        # fmt: on

-        # fmt: off
-        expected_wav_slice = [1e-05, -7e-05, -4e-05, -4e-05, -6e-05, -9e-05, -0.0001, -2e-05, -7e-05, -2e-05]
-        # fmt: on
+        expected_wav_slice = [1e-05, -7e-05, -4e-05, -4e-05, -6e-05, -9e-05, -0.0001, -2e-05, -7e-05, -2e-05]  # fmt: skip

        set_seed(0)
        output = model.generate(**self.input_text, num_beams=1, tgt_lang="swh", return_intermediate_token_ids=True)
@@ -1111,9 +1101,7 @@ class SeamlessM4TModelIntegrationTest(unittest.TestCase):

        # test audio - tgt lang: rus

-        # fmt: off
-        expected_text_tokens = [3, 256147, 1197, 73565, 3413, 537, 233331, 248075, 3]
-        # fmt: on
+        expected_text_tokens = [3, 256147, 1197, 73565, 3413, 537, 233331, 248075, 3]  # fmt: skip

        # fmt: off
        expected_unit_tokens = [
@@ -1124,9 +1112,7 @@ class SeamlessM4TModelIntegrationTest(unittest.TestCase):
        ]
        # fmt: on

-        # fmt: off
-        expected_wav_slice = [0.00013, 0.00012, 0.00014, 3e-05, 0.0, -6e-05, -0.00018, -0.00016, -0.00021, -0.00018]
-        # fmt: on
+        expected_wav_slice = [0.00013, 0.00012, 0.00014, 3e-05, 0.0, -6e-05, -0.00018, -0.00016, -0.00021, -0.00018]  # fmt: skip

        set_seed(0)
        output = model.generate(**self.input_audio, num_beams=1, tgt_lang="rus", return_intermediate_token_ids=True)
--- a/tests/models/seamless_m4t/test_tokenization_seamless_m4t.py
+++ b/tests/models/seamless_m4t/test_tokenization_seamless_m4t.py
@@ -449,9 +449,7 @@ class SeamlessM4TDistilledIntegrationTest(unittest.TestCase):
        " face decât să înrăutăţească violenţele şi mizeria pentru milioane de oameni.",
    ]

-    # fmt: off
-    expected_src_tokens = [256047, 16297, 134408, 8165, 248066, 14734, 950, 1135, 105721, 3573, 83, 27352, 108, 49486, 3]
-    # fmt: on
+    expected_src_tokens = [256047, 16297, 134408, 8165, 248066, 14734, 950, 1135, 105721, 3573, 83, 27352, 108, 49486, 3]  # fmt: skip

    @classmethod
    def setUpClass(cls):
@@ -483,9 +481,7 @@ class SeamlessM4TDistilledIntegrationTest(unittest.TestCase):
    # Copied from tests.models.nllb.test_tokenization_nllb.NllbDistilledIntegrationTest.test_enro_tokenizer_decode_ignores_language_codes
    def test_enro_tokenizer_decode_ignores_language_codes(self):
        self.assertIn(RO_CODE, self.tokenizer.all_special_ids)
-        # fmt: off
-        generated_ids = [RO_CODE, 4254, 98068, 112923, 39072, 3909, 713, 102767, 26, 17314, 35642, 14683, 33118, 2022, 66987, 2, 256047]
-        # fmt: on
+        generated_ids = [RO_CODE, 4254, 98068, 112923, 39072, 3909, 713, 102767, 26, 17314, 35642, 14683, 33118, 2022, 66987, 2, 256047]  # fmt: skip

        result = self.tokenizer.decode(generated_ids, skip_special_tokens=True)
        expected_romanian = self.tokenizer.decode(generated_ids[1:], skip_special_tokens=True)