fix: (issue #32124) Exception raised when running transformers/examples/flax/language-modeling/t5_tokenizer_model.py. (#32157)

fix: Exception raised when running .
This commit is contained in:
Shaopeng Fu
2024-08-03 19:24:11 +03:00
committed by GitHub
parent c1aa0edb48
commit 7c31d05b59

View File

@@ -47,14 +47,14 @@ class SentencePieceUnigramTokenizer(BaseTokenizer):
tokenizer.pre_tokenizer = pre_tokenizers.Sequence( tokenizer.pre_tokenizer = pre_tokenizers.Sequence(
[ [
pre_tokenizers.Metaspace( pre_tokenizers.Metaspace(
replacement=replacement, add_prefix_space="always" if add_prefix_space else "never" replacement=replacement, prepend_scheme="always" if add_prefix_space else "never"
), ),
pre_tokenizers.Digits(individual_digits=True), pre_tokenizers.Digits(individual_digits=True),
pre_tokenizers.Punctuation(), pre_tokenizers.Punctuation(),
] ]
) )
tokenizer.decoder = decoders.Metaspace( tokenizer.decoder = decoders.Metaspace(
replacement=replacement, add_prefix_space="always" if add_prefix_space else "never" replacement=replacement, prepend_scheme="always" if add_prefix_space else "never"
) )
tokenizer.post_processor = TemplateProcessing( tokenizer.post_processor = TemplateProcessing(