if output is tuple like facebook/hf-seamless-m4t-medium, waveform is … (#29722)

* if output is tuple like facebook/hf-seamless-m4t-medium, waveform is the first element

Signed-off-by: Wang, Yi <yi.a.wang@intel.com>

* add test and fix batch issue

Signed-off-by: Wang, Yi <yi.a.wang@intel.com>

* add dict output support for seamless_m4t

Signed-off-by: Wang, Yi <yi.a.wang@intel.com>

---------

Signed-off-by: Wang, Yi <yi.a.wang@intel.com>
This commit is contained in:
Wang, Yi
2024-04-05 15:26:44 +08:00
committed by GitHub
parent 8b52fa6b42
commit 79d62b2da2
4 changed files with 29 additions and 3 deletions

View File

@@ -66,6 +66,27 @@ class TextToAudioPipelineTests(unittest.TestCase):
audio = [output["audio"] for output in outputs]
self.assertEqual([ANY(np.ndarray), ANY(np.ndarray)], audio)
@slow
@require_torch
def test_medium_seamless_m4t_pt(self):
speech_generator = pipeline(task="text-to-audio", model="facebook/hf-seamless-m4t-medium", framework="pt")
for forward_params in [{"tgt_lang": "eng"}, {"return_intermediate_token_ids": True, "tgt_lang": "eng"}]:
outputs = speech_generator("This is a test", forward_params=forward_params)
self.assertEqual({"audio": ANY(np.ndarray), "sampling_rate": 16000}, outputs)
# test two examples side-by-side
outputs = speech_generator(["This is a test", "This is a second test"], forward_params=forward_params)
audio = [output["audio"] for output in outputs]
self.assertEqual([ANY(np.ndarray), ANY(np.ndarray)], audio)
# test batching
outputs = speech_generator(
["This is a test", "This is a second test"], forward_params=forward_params, batch_size=2
)
audio = [output["audio"] for output in outputs]
self.assertEqual([ANY(np.ndarray), ANY(np.ndarray)], audio)
@slow
@require_torch
def test_small_bark_pt(self):