[serve] Add speech to text (/v1/audio/transcriptions) (#39434)

* Scaffolding * Explicit content * Naïve Responses API streaming implementation * Cleanup * Scaffolding * Explicit content * Naïve Responses API streaming implementation * Cleanup * use openai * validate request, including detecting unused fields * dict indexing * dict var access * tmp commit (tests failing) * add slow * use oai output type in completions * (little rebase errors) * working spec? * guard type hint * type hints. fix state (CB can now load different models) * type hints; fn names; error type * add docstrings * responses + kv cache * metadata support; fix kv cache; error event * add output_index and content_index * docstrings * add test_build_response_event * docs/comments * gate test requirements; terminate cb manager on model switch * nasty type hints * more type hints * disable validation by default; enable force models * todo * experiment: base model from typed dict * audio working * fix bad rebase * load audio with librosa * implement timed models * almost working * make fixup * fix tests * transcription request type * tokenizer -> processor * add example in docs --------- Co-authored-by: Lysandre <hi@lysand.re>
2025-07-17 15:29:57 +01:00
parent 8b3de61a65
commit bf6c997685
3 changed files with 355 additions and 104 deletions
--- a/tests/commands/test_serving.py
+++ b/tests/commands/test_serving.py
@@ -63,14 +63,13 @@ class ServeCLITest(unittest.TestCase):
        """
        dummy = ServeCommand.__new__(ServeCommand)
        dummy.args = type("Args", (), {})()
-        dummy.loaded_model = "dummy_model@main"

        # The keys for these fields must be present in every chunk
        MANDATORY_FIELDS = ["data", "id", "choices", "created", "model", "object", "system_fingerprint"]

        # Case 1: most fields are provided
        chunk = ServeCommand.build_chat_completion_chunk(
-            dummy, request_id="req0", content="hello", finish_reason="stop", role="user"
+            dummy, request_id="req0", content="hello", finish_reason="stop", role="user", model="dummy_model@main"
        )
        for field in MANDATORY_FIELDS:
            self.assertIn(field, chunk)
@@ -79,13 +78,13 @@ class ServeCLITest(unittest.TestCase):
        )

        # Case 2: only the role is provided -- other fields in 'choices' are omitted
-        chunk = dummy.build_chat_completion_chunk(request_id="req0", role="user")
+        chunk = dummy.build_chat_completion_chunk(request_id="req0", role="user", model="dummy_model@main")
        for field in MANDATORY_FIELDS:
            self.assertIn(field, chunk)
        self.assertIn('"choices":[{"delta":{"role":"user"},"index":0}]', chunk)

        # Case 3: only the content is provided -- other fields in 'choices' are omitted
-        chunk = dummy.build_chat_completion_chunk(request_id="req0", content="hello")
+        chunk = dummy.build_chat_completion_chunk(request_id="req0", content="hello", model="dummy_model@main")
        for field in MANDATORY_FIELDS:
            self.assertIn(field, chunk)
        self.assertIn('"choices":[{"delta":{"content":"hello"},"index":0}]', chunk)
@@ -96,7 +95,7 @@ class ServeCLITest(unittest.TestCase):
            function=ChoiceDeltaToolCallFunction(name="foo_bar", arguments='{"foo1": "bar1", "foo2": "bar2"}'),
            type="function",
        )
-        chunk = dummy.build_chat_completion_chunk(request_id="req0", tool_calls=[tool_call])
+        chunk = dummy.build_chat_completion_chunk(request_id="req0", tool_calls=[tool_call], model="dummy_model@main")
        for field in MANDATORY_FIELDS:
            self.assertIn(field, chunk)
        expected_choices_content = (