[Styling] stylify using ruff (#27144)
* try to stylify using ruff * might need to remove these changes? * use ruf format andruff check * use isinstance instead of type comparision * use # fmt: skip * use # fmt: skip * nits * soem styling changes * update ci job * nits isinstance * more files update * nits * more nits * small nits * check and format * revert wrong changes * actually use formatter instead of checker * nits * well docbuilder is overwriting this commit * revert notebook changes * try to nuke docbuilder * style * fix feature exrtaction test * remve `indent-width = 4` * fixup * more nits * update the ruff version that we use * style * nuke docbuilder styling * leve the print for detected changes * nits * Remove file I/O Co-authored-by: charliermarsh <charlie.r.marsh@gmail.com> * style * nits * revert notebook changes * Add # fmt skip when possible * Add # fmt skip when possible * Fix * More ` # fmt: skip` usage * More ` # fmt: skip` usage * More ` # fmt: skip` usage * NIts * more fixes * fix tapas * Another way to skip * Recommended way * Fix two more fiels * Remove asynch Remove asynch --------- Co-authored-by: charliermarsh <charlie.r.marsh@gmail.com>
This commit is contained in:
@@ -517,9 +517,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
|
||||
)
|
||||
|
||||
# Merge when the previous sequence is not included in the current sequence
|
||||
# fmt: off
|
||||
next_sequences_3 = [[50364, 2812, 9836, 14783, 390, 6263, 538, 257, 1359, 11, 8199, 6327, 1090, 322, 702, 7443, 13, 50584, 50257]]
|
||||
# fmt: on
|
||||
next_sequences_3 = [[50364, 2812, 9836, 14783, 390, 6263, 538, 257, 1359, 11, 8199, 6327, 1090, 322, 702, 7443, 13, 50584, 50257]] # fmt: skip
|
||||
# {'text': ' His instant panic was followed by a small, sharp blow high on his chest.','timestamp': (0.0, 9.4)}
|
||||
merge = _find_timestamp_sequence(
|
||||
[[previous_sequence, (480_000, 0, 0)], [next_sequences_3, (480_000, 120_000, 0)]],
|
||||
@@ -527,12 +525,10 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
|
||||
processor.feature_extractor,
|
||||
max_source_positions,
|
||||
)
|
||||
# fmt: off
|
||||
self.assertEqual(
|
||||
merge,
|
||||
[51492, 406, 3163, 1953, 466, 13, 51612, 51612, 2812, 9836, 14783, 390, 6263, 538, 257, 1359, 11, 8199, 6327, 1090, 322, 702, 7443, 13, 51832],
|
||||
)
|
||||
# fmt: on
|
||||
) # fmt: skip
|
||||
self.assertEqual(
|
||||
processor.decode(merge, output_offsets=True),
|
||||
{
|
||||
@@ -550,23 +546,19 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
|
||||
},
|
||||
)
|
||||
# last case is when the sequence is not in the first next predicted start and end of timestamp
|
||||
# fmt: off
|
||||
next_sequences_3 = [
|
||||
[50364, 2812, 9836, 14783, 390, 406, 3163, 1953, 466, 13, 50634, 50634, 2812, 9836, 14783, 390, 6263, 538, 257, 1359, 11, 8199, 6327, 1090, 322, 702, 7443, 13, 50934]
|
||||
]
|
||||
# fmt: on
|
||||
] # fmt: skip
|
||||
merge = _find_timestamp_sequence(
|
||||
[[previous_sequence, (480_000, 0, 0)], [next_sequences_3, (480_000, 167_000, 0)]],
|
||||
processor.tokenizer,
|
||||
processor.feature_extractor,
|
||||
max_source_positions,
|
||||
)
|
||||
# fmt: off
|
||||
self.assertEqual(
|
||||
merge,
|
||||
[51492, 406, 3163, 1953, 466, 13, 51612, 51612, 2812, 9836, 14783, 390, 6263, 538, 257, 1359, 11, 8199, 6327, 1090, 322, 702, 7443, 13, 51912]
|
||||
)
|
||||
# fmt: on
|
||||
) # fmt: skip
|
||||
self.assertEqual(
|
||||
processor.decode(merge, output_offsets=True),
|
||||
{
|
||||
|
||||
@@ -242,9 +242,7 @@ These are just a few of the many attractions that Paris has to offer. With so mu
|
||||
],
|
||||
)
|
||||
inputs = tokenizer._build_conversation_input_ids(conversation)
|
||||
# fmt: off
|
||||
EXPECTED_INPUTS_IDS = [ 1, 518, 25580, 29962, 3532, 14816, 29903, 6778, 13, 3492, 526, 263, 8444, 29892, 3390, 1319, 322, 15993, 20255, 29889, 29849, 1234, 408, 1371, 3730, 408, 1950, 29892, 1550, 1641, 9109, 29889, 29871, 3575, 6089, 881, 451, 3160, 738, 10311, 1319, 29892, 443, 621, 936, 29892, 11021, 391, 29892, 7916, 391, 29892, 304, 27375, 29892, 18215, 29892, 470, 27302, 2793, 29889, 3529, 9801, 393, 596, 20890, 526, 5374, 635, 443, 5365, 1463, 322, 6374, 297, 5469, 29889, 13, 13, 3644, 263, 1139, 947, 451, 1207, 738, 4060, 29892, 470, 338, 451, 2114, 1474, 16165, 261, 296, 29892, 5649, 2020, 2012, 310, 22862, 1554, 451, 1959, 29889, 960, 366, 1016, 29915, 29873, 1073, 278, 1234, 304, 263, 1139, 29892, 3113, 1016, 29915, 29873, 6232, 2089, 2472, 29889, 13, 29966, 829, 14816, 29903, 6778, 13, 13, 29902, 626, 2675, 304, 3681, 29892, 825, 881, 306, 1074, 29973, 518, 29914, 25580, 29962, 3681, 29892, 278, 7483, 310, 3444, 29892, 338, 2998, 363, 967, 380, 27389, 11258, 29892, 1616, 19133, 29879, 29892, 15839, 2982, 22848, 29892, 322, 6017, 7716, 25005, 29889, 2266, 526, 777, 310, 278, 2246, 19650, 1953, 304, 1074, 297, 3681, 29901, 13, 13, 29896, 29889, 450, 382, 2593, 295, 23615, 29901, 450, 9849, 293, 382, 2593, 295, 23615, 338, 697, 310, 278, 1556, 5936, 13902, 2982, 22848, 297, 278, 3186, 322, 16688, 2078, 271, 400, 5086, 8386, 310, 278, 4272, 29889, 13, 29906, 29889, 450, 4562, 12675, 6838, 29901, 450, 4562, 12675, 338, 697, 310, 278, 3186, 29915, 29879, 10150, 322, 1556, 13834, 19133, 29879, 29892, 27261, 385, 21210, 573, 4333, 310, 1616, 322, 24238, 29879, 29892, 3704, 278, 2598, 29874, 29420, 29889, 13, 29941, 29889, 24337, 29899, 29928, 420, 315, 21471, 29901, 910, 9560, 274, 21471, 338, 697, 310, 278, 1556, 13834, 2982, 22848, 297, 3681, 322, 338, 2998, 363, 967, 22883, 293, 11258, 322, 380, 27389, 380, 7114, 12917, 5417, 29889, 13, 13, 1349, 968, 526, 925, 263, 2846, 310, 278, 1784, 19650, 1953, 393, 3681, 756, 304, 5957, 29889, 2973, 577, 1568, 304, 1074, 322, 437, 29892, 372, 29915, 29879, 694, 4997, 393, 3681, 338, 697, 310, 278, 1556, 5972, 6282, 391, 15422, 800, 297, 278, 3186, 29889, 29871, 2, 1, 518, 25580, 29962, 1724, 338, 577, 2107, 1048, 396, 29896, 29973, 518, 29914, 25580, 29962]
|
||||
# fmt: on
|
||||
EXPECTED_INPUTS_IDS = [ 1, 518, 25580, 29962, 3532, 14816, 29903, 6778, 13, 3492, 526, 263, 8444, 29892, 3390, 1319, 322, 15993, 20255, 29889, 29849, 1234, 408, 1371, 3730, 408, 1950, 29892, 1550, 1641, 9109, 29889, 29871, 3575, 6089, 881, 451, 3160, 738, 10311, 1319, 29892, 443, 621, 936, 29892, 11021, 391, 29892, 7916, 391, 29892, 304, 27375, 29892, 18215, 29892, 470, 27302, 2793, 29889, 3529, 9801, 393, 596, 20890, 526, 5374, 635, 443, 5365, 1463, 322, 6374, 297, 5469, 29889, 13, 13, 3644, 263, 1139, 947, 451, 1207, 738, 4060, 29892, 470, 338, 451, 2114, 1474, 16165, 261, 296, 29892, 5649, 2020, 2012, 310, 22862, 1554, 451, 1959, 29889, 960, 366, 1016, 29915, 29873, 1073, 278, 1234, 304, 263, 1139, 29892, 3113, 1016, 29915, 29873, 6232, 2089, 2472, 29889, 13, 29966, 829, 14816, 29903, 6778, 13, 13, 29902, 626, 2675, 304, 3681, 29892, 825, 881, 306, 1074, 29973, 518, 29914, 25580, 29962, 3681, 29892, 278, 7483, 310, 3444, 29892, 338, 2998, 363, 967, 380, 27389, 11258, 29892, 1616, 19133, 29879, 29892, 15839, 2982, 22848, 29892, 322, 6017, 7716, 25005, 29889, 2266, 526, 777, 310, 278, 2246, 19650, 1953, 304, 1074, 297, 3681, 29901, 13, 13, 29896, 29889, 450, 382, 2593, 295, 23615, 29901, 450, 9849, 293, 382, 2593, 295, 23615, 338, 697, 310, 278, 1556, 5936, 13902, 2982, 22848, 297, 278, 3186, 322, 16688, 2078, 271, 400, 5086, 8386, 310, 278, 4272, 29889, 13, 29906, 29889, 450, 4562, 12675, 6838, 29901, 450, 4562, 12675, 338, 697, 310, 278, 3186, 29915, 29879, 10150, 322, 1556, 13834, 19133, 29879, 29892, 27261, 385, 21210, 573, 4333, 310, 1616, 322, 24238, 29879, 29892, 3704, 278, 2598, 29874, 29420, 29889, 13, 29941, 29889, 24337, 29899, 29928, 420, 315, 21471, 29901, 910, 9560, 274, 21471, 338, 697, 310, 278, 1556, 13834, 2982, 22848, 297, 3681, 322, 338, 2998, 363, 967, 22883, 293, 11258, 322, 380, 27389, 380, 7114, 12917, 5417, 29889, 13, 13, 1349, 968, 526, 925, 263, 2846, 310, 278, 1784, 19650, 1953, 393, 3681, 756, 304, 5957, 29889, 2973, 577, 1568, 304, 1074, 322, 437, 29892, 372, 29915, 29879, 694, 4997, 393, 3681, 338, 697, 310, 278, 1556, 5972, 6282, 391, 15422, 800, 297, 278, 3186, 29889, 29871, 2, 1, 518, 25580, 29962, 1724, 338, 577, 2107, 1048, 396, 29896, 29973, 518, 29914, 25580, 29962] # fmt: skip
|
||||
self.assertEqual(inputs, EXPECTED_INPUTS_IDS)
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
|
||||
|
||||
@@ -198,17 +198,14 @@ class QAPipelineTests(unittest.TestCase):
|
||||
"question-answering",
|
||||
model="KoichiYasuoka/deberta-base-japanese-aozora-ud-head",
|
||||
)
|
||||
output = question_answerer(question="国語", context="全学年にわたって小学校の国語の教科書に挿し絵が用いられている")
|
||||
output = question_answerer(question="国語", context="全学年にわたって小学校の国語の教科書に挿し絵が用いられている") # fmt: skip
|
||||
|
||||
# Wrong answer, the whole text is identified as one "word" since the tokenizer does not include
|
||||
# a pretokenizer
|
||||
self.assertEqual(
|
||||
nested_simplify(output),
|
||||
{"score": 1.0, "start": 0, "end": 30, "answer": "全学年にわたって小学校の国語の教科書に挿し絵が用いられている"},
|
||||
)
|
||||
self.assertEqual(nested_simplify(output),{"score": 1.0, "start": 0, "end": 30, "answer": "全学年にわたって小学校の国語の教科書に挿し絵が用いられている"}) # fmt: skip
|
||||
|
||||
# Disable word alignment
|
||||
output = question_answerer(question="国語", context="全学年にわたって小学校の国語の教科書に挿し絵が用いられている", align_to_words=False)
|
||||
output = question_answerer(question="国語", context="全学年にわたって小学校の国語の教科書に挿し絵が用いられている", align_to_words=False) # fmt: skip
|
||||
self.assertEqual(
|
||||
nested_simplify(output),
|
||||
{"score": 1.0, "start": 15, "end": 18, "answer": "教科書"},
|
||||
|
||||
@@ -486,8 +486,7 @@ class TokenClassificationPipelineTests(unittest.TestCase):
|
||||
token_classifier.model.config.id2label = {0: "O", 1: "MISC", 2: "PER", 3: "ORG", 4: "LOC"}
|
||||
example = [
|
||||
{
|
||||
# fmt : off
|
||||
"scores": np.array([0, 0, 0, 0, 0.9968166351318359]),
|
||||
"scores": np.array([0, 0, 0, 0, 0.9968166351318359]), # fmt : skip
|
||||
"index": 1,
|
||||
"is_subword": False,
|
||||
"word": "En",
|
||||
@@ -495,8 +494,7 @@ class TokenClassificationPipelineTests(unittest.TestCase):
|
||||
"end": 2,
|
||||
},
|
||||
{
|
||||
# fmt : off
|
||||
"scores": np.array([0, 0, 0, 0, 0.9957635998725891]),
|
||||
"scores": np.array([0, 0, 0, 0, 0.9957635998725891]), # fmt : skip
|
||||
"index": 2,
|
||||
"is_subword": True,
|
||||
"word": "##zo",
|
||||
@@ -504,9 +502,7 @@ class TokenClassificationPipelineTests(unittest.TestCase):
|
||||
"end": 4,
|
||||
},
|
||||
{
|
||||
# fmt: off
|
||||
"scores": np.array([0, 0, 0, 0.9986497163772583, 0]),
|
||||
# fmt: on
|
||||
"scores": np.array([0, 0, 0, 0.9986497163772583, 0]), # fmt : skip
|
||||
"index": 7,
|
||||
"word": "UN",
|
||||
"is_subword": False,
|
||||
@@ -542,8 +538,7 @@ class TokenClassificationPipelineTests(unittest.TestCase):
|
||||
)
|
||||
example = [
|
||||
{
|
||||
# fmt : off
|
||||
"scores": np.array([0, 0, 0, 0, 0.9968166351318359, 0, 0, 0]),
|
||||
"scores": np.array([0, 0, 0, 0, 0.9968166351318359, 0, 0, 0]), # fmt : skip
|
||||
"index": 1,
|
||||
"is_subword": False,
|
||||
"word": "En",
|
||||
@@ -551,8 +546,7 @@ class TokenClassificationPipelineTests(unittest.TestCase):
|
||||
"end": 2,
|
||||
},
|
||||
{
|
||||
# fmt : off
|
||||
"scores": np.array([0, 0, 0, 0, 0.9957635998725891, 0, 0, 0]),
|
||||
"scores": np.array([0, 0, 0, 0, 0.9957635998725891, 0, 0, 0]), # fmt : skip
|
||||
"index": 2,
|
||||
"is_subword": True,
|
||||
"word": "##zo",
|
||||
@@ -560,9 +554,7 @@ class TokenClassificationPipelineTests(unittest.TestCase):
|
||||
"end": 4,
|
||||
},
|
||||
{
|
||||
# fmt: off
|
||||
"scores": np.array([0, 0, 0, 0, 0, 0.9986497163772583, 0, 0, ]),
|
||||
# fmt: on
|
||||
"scores": np.array([0, 0, 0, 0, 0, 0.9986497163772583, 0, 0]), # fmt : skip
|
||||
"index": 7,
|
||||
"word": "UN",
|
||||
"is_subword": False,
|
||||
|
||||
Reference in New Issue
Block a user