From 53dc39d82144a3540584c8f62922d1afadf25d97 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Wed, 20 Oct 2021 13:01:42 +0200 Subject: [PATCH] up (#14079) --- .../speech-recognition/run_speech_recognition_ctc.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py b/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py index e2c2d90957..ca5841e32f 100755 --- a/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py +++ b/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py @@ -410,10 +410,15 @@ def main(): # load config config = AutoConfig.from_pretrained(model_args.model_name_or_path, cache_dir=model_args.cache_dir) + # tokenizer is defined by `tokenizer_class` if present in config else by `model_type` + config_for_tokenizer = config if config.tokenizer_class is not None else None + tokenizer_type = config.model_type if config.tokenizer_class is None else None + # load feature_extractor, tokenizer and create processor tokenizer = AutoTokenizer.from_pretrained( training_args.output_dir, - tokenizer_type=config.model_type, + config=config_for_tokenizer, + tokenizer_type=tokenizer_type, unk_token="[UNK]", pad_token="[PAD]", word_delimiter_token="|",