Fix some doctests after PR 15775 (#20036)

* Add skip_special_tokens=True in some doctest

* For T5

* Fix for speech_to_text.mdx

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
Yih-Dar
2022-11-03 14:18:45 +01:00
committed by GitHub
parent a639ea9e8a
commit 9ccea7acb1
4 changed files with 6 additions and 6 deletions

View File

@@ -57,7 +57,7 @@ be installed as follows: `apt install libsndfile1-dev`
>>> inputs = processor(ds[0]["audio"]["array"], sampling_rate=ds[0]["audio"]["sampling_rate"], return_tensors="pt") >>> inputs = processor(ds[0]["audio"]["array"], sampling_rate=ds[0]["audio"]["sampling_rate"], return_tensors="pt")
>>> generated_ids = model.generate(inputs["input_features"], attention_mask=inputs["attention_mask"]) >>> generated_ids = model.generate(inputs["input_features"], attention_mask=inputs["attention_mask"])
>>> transcription = processor.batch_decode(generated_ids) >>> transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)
>>> transcription >>> transcription
['mister quilter is the apostle of the middle classes and we are glad to welcome his gospel'] ['mister quilter is the apostle of the middle classes and we are glad to welcome his gospel']
``` ```
@@ -87,9 +87,9 @@ be installed as follows: `apt install libsndfile1-dev`
... forced_bos_token_id=processor.tokenizer.lang_code_to_id["fr"], ... forced_bos_token_id=processor.tokenizer.lang_code_to_id["fr"],
... ) ... )
>>> translation = processor.batch_decode(generated_ids) >>> translation = processor.batch_decode(generated_ids, skip_special_tokens=True)
>>> translation >>> translation
["<lang:fr> (Vidéo) Si M. Kilder est l'apossible des classes moyennes, et nous sommes heureux d'être accueillis dans son évangile."] ["(Vidéo) Si M. Kilder est l'apossible des classes moyennes, et nous sommes heureux d'être accueillis dans son évangile."]
``` ```
See the [model hub](https://huggingface.co/models?filter=speech_to_text) to look for Speech2Text checkpoints. See the [model hub](https://huggingface.co/models?filter=speech_to_text) to look for Speech2Text checkpoints.

View File

@@ -1334,7 +1334,7 @@ class Speech2TextForConditionalGeneration(Speech2TextPreTrainedModel):
>>> generated_ids = model.generate(inputs=input_features) >>> generated_ids = model.generate(inputs=input_features)
>>> transcription = processor.batch_decode(generated_ids)[0] >>> transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
>>> transcription >>> transcription
'mister quilter is the apostle of the middle classes and we are glad to welcome his gospel' 'mister quilter is the apostle of the middle classes and we are glad to welcome his gospel'
```""" ```"""

View File

@@ -201,7 +201,7 @@ PT_QUESTION_ANSWERING_SAMPLE = r"""
>>> answer_end_index = outputs.end_logits.argmax() >>> answer_end_index = outputs.end_logits.argmax()
>>> predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1] >>> predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1]
>>> tokenizer.decode(predict_answer_tokens) >>> tokenizer.decode(predict_answer_tokens, skip_special_tokens=True)
{expected_output} {expected_output}
``` ```