Avoid invalid escape sequences, use raw strings (#22936)

* Avoid invalid escape sequences, use raw strings

* Integrate PR feedback
This commit is contained in:
Lingepumpe
2023-04-25 15:17:56 +02:00
committed by GitHub
parent 81c1910c86
commit 5427250351
24 changed files with 61 additions and 61 deletions

View File

@@ -365,7 +365,7 @@ def main():
target_sr = processor.feature_extractor.sampling_rate if data_args.target_feature_extractor_sampling_rate else None
vocabulary_chars_str = "".join(t for t in processor.tokenizer.get_vocab().keys() if len(t) == 1)
vocabulary_text_cleaner = re.compile( # remove characters not in vocabulary
f"[^\s{re.escape(vocabulary_chars_str)}]", # allow space in addition to chars in vocabulary
rf"[^\s{re.escape(vocabulary_chars_str)}]", # allow space in addition to chars in vocabulary
flags=re.IGNORECASE if processor.tokenizer.do_lower_case else 0,
)
text_updates = []