[serve] Add speech to text (/v1/audio/transcriptions) (#39434)

* Scaffolding

* Explicit content

* Naïve Responses API streaming implementation

* Cleanup

* Scaffolding

* Explicit content

* Naïve Responses API streaming implementation

* Cleanup

* use openai

* validate request, including detecting unused fields

* dict indexing

* dict var access

* tmp commit (tests failing)

* add slow

* use oai output type in completions

* (little rebase errors)

* working spec?

* guard type hint

* type hints. fix state (CB can now load different models)

* type hints; fn names; error type

* add docstrings

* responses + kv cache

* metadata support; fix kv cache; error event

* add output_index and content_index

* docstrings

* add test_build_response_event

* docs/comments

* gate test requirements; terminate cb manager on model switch

* nasty type hints

* more type hints

* disable validation by default; enable force models

* todo

* experiment: base model from typed dict

* audio working

* fix bad rebase

* load audio with librosa

* implement timed models

* almost working

* make fixup

* fix tests

* transcription request type

* tokenizer -> processor

* add example in docs

---------

Co-authored-by: Lysandre <hi@lysand.re>
This commit is contained in:
Joao Gante
2025-07-17 15:29:57 +01:00
committed by GitHub
parent 8b3de61a65
commit bf6c997685
3 changed files with 355 additions and 104 deletions

View File

@@ -63,14 +63,13 @@ class ServeCLITest(unittest.TestCase):
"""
dummy = ServeCommand.__new__(ServeCommand)
dummy.args = type("Args", (), {})()
dummy.loaded_model = "dummy_model@main"
# The keys for these fields must be present in every chunk
MANDATORY_FIELDS = ["data", "id", "choices", "created", "model", "object", "system_fingerprint"]
# Case 1: most fields are provided
chunk = ServeCommand.build_chat_completion_chunk(
dummy, request_id="req0", content="hello", finish_reason="stop", role="user"
dummy, request_id="req0", content="hello", finish_reason="stop", role="user", model="dummy_model@main"
)
for field in MANDATORY_FIELDS:
self.assertIn(field, chunk)
@@ -79,13 +78,13 @@ class ServeCLITest(unittest.TestCase):
)
# Case 2: only the role is provided -- other fields in 'choices' are omitted
chunk = dummy.build_chat_completion_chunk(request_id="req0", role="user")
chunk = dummy.build_chat_completion_chunk(request_id="req0", role="user", model="dummy_model@main")
for field in MANDATORY_FIELDS:
self.assertIn(field, chunk)
self.assertIn('"choices":[{"delta":{"role":"user"},"index":0}]', chunk)
# Case 3: only the content is provided -- other fields in 'choices' are omitted
chunk = dummy.build_chat_completion_chunk(request_id="req0", content="hello")
chunk = dummy.build_chat_completion_chunk(request_id="req0", content="hello", model="dummy_model@main")
for field in MANDATORY_FIELDS:
self.assertIn(field, chunk)
self.assertIn('"choices":[{"delta":{"content":"hello"},"index":0}]', chunk)
@@ -96,7 +95,7 @@ class ServeCLITest(unittest.TestCase):
function=ChoiceDeltaToolCallFunction(name="foo_bar", arguments='{"foo1": "bar1", "foo2": "bar2"}'),
type="function",
)
chunk = dummy.build_chat_completion_chunk(request_id="req0", tool_calls=[tool_call])
chunk = dummy.build_chat_completion_chunk(request_id="req0", tool_calls=[tool_call], model="dummy_model@main")
for field in MANDATORY_FIELDS:
self.assertIn(field, chunk)
expected_choices_content = (