[Kyutai-STT] correct model type + model id (#39035)

* correct model type + model id

* udpate doc

* init fix

* style !!!
This commit is contained in:
eustlb
2025-06-25 18:09:00 +02:00
committed by GitHub
parent dad0e87c79
commit 551e48f182
15 changed files with 29 additions and 23 deletions

View File

@@ -847,7 +847,7 @@
title: GraniteSpeech
- local: model_doc/hubert
title: Hubert
- local: model_doc/stt
- local: model_doc/kyutai_speech_to_text
title: Kyutai Speech-To-Text
- local: model_doc/mctct
title: MCTCT

View File

@@ -36,10 +36,10 @@ from transformers import KyutaiSpeechToTextProcessor, KyutaiSpeechToTextForCondi
# 1. load the model and the processor
torch_device = "cuda" if torch.cuda.is_available() else "cpu"
model_id = "kyutai/stt-2.6b-en"
model_id = "kyutai/stt-2.6b-en-trfs"
processor = KyutaiSpeechToTextProcessor.from_pretrained(model_id)
model = KyutaiSpeechToTextForConditionalGeneration.from_pretrained(model_id, device_map=torch_device)
model = KyutaiSpeechToTextForConditionalGeneration.from_pretrained(model_id, device_map=torch_device, torch_dtype="auto")
# 2. load audio samples
ds = load_dataset(
@@ -69,10 +69,10 @@ from transformers import KyutaiSpeechToTextProcessor, KyutaiSpeechToTextForCondi
# 1. load the model and the processor
torch_device = "cuda" if torch.cuda.is_available() else "cpu"
model_id = "kyutai/stt-2.6b-en"
model_id = "kyutai/stt-2.6b-en-trfs"
processor = KyutaiSpeechToTextProcessor.from_pretrained(model_id)
model = KyutaiSpeechToTextForConditionalGeneration.from_pretrained(model_id, device_map=torch_device)
model = KyutaiSpeechToTextForConditionalGeneration.from_pretrained(model_id, device_map=torch_device, torch_dtype="auto")
# 2. load audio samples
ds = load_dataset(