Add SpeechEncoderDecoder & Speech2Text2 (#13186)

* fix_torch_device_generate_test * remove @ * up * correct some bugs * correct model * finish speech2text extension * up * up * up * up * Update utils/custom_init_isort.py * up * up * update with tokenizer * correct old tok * correct old tok * fix bug * up * up * add more tests * up * fix docs * up * fix some more tests * add better config * correct some more things " * fix tests * improve docs * Apply suggestions from code review * Apply suggestions from code review * final fixes * finalize * Apply suggestions from code review Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Co-authored-by: Lysandre Debut <lysandre@huggingface.co> * apply suggestions Lysandre and Sylvain * apply nicos suggestions * upload everything * finish Co-authored-by: Patrick von Platen <patrick@huggingface.co> Co-authored-by: your_github_username <your_github_email> Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Co-authored-by: Lysandre Debut <lysandre@huggingface.co>
2021-09-01 13:33:31 +02:00
parent 9396b40433
commit 0b8c84e110
30 changed files with 3649 additions and 73 deletions
--- a/tests/test_pipelines_automatic_speech_recognition.py
+++ b/tests/test_pipelines_automatic_speech_recognition.py
@@ -14,6 +14,8 @@

 import unittest

+import pytest
+
 from transformers import AutoFeatureExtractor, AutoTokenizer, Speech2TextForConditionalGeneration, Wav2Vec2ForCTC
 from transformers.pipelines import AutomaticSpeechRecognitionPipeline, pipeline
 from transformers.testing_utils import is_pipeline_test, require_datasets, require_torch, require_torchaudio, slow
@@ -44,6 +46,16 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
        output = speech_recognizer(waveform)
        self.assertEqual(output, {"text": "C'est ce que j'ai fait à ce moment-là."})

+    @require_torch
+    def test_torch_small_no_tokenizer_files(self):
+        # test that model without tokenizer file cannot be loaded
+        with pytest.raises(ValueError):
+            pipeline(
+                task="automatic-speech-recognition",
+                model="hf-internal-testing/tiny-random-wav2vec2",
+                framework="pt",
+            )
+
    @require_datasets
    @require_torch
    @slow
@@ -67,6 +79,24 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
        output = speech_recognizer(filename)
        self.assertEqual(output, {"text": "A MAN SAID TO THE UNIVERSE SIR I EXIST"})

+    @require_datasets
+    @require_torch
+    @slow
+    def test_torch_speech_encoder_decoder(self):
+        speech_recognizer = pipeline(
+            task="automatic-speech-recognition",
+            model="facebook/s2t-wav2vec2-large-en-de",
+            feature_extractor="facebook/s2t-wav2vec2-large-en-de",
+            framework="pt",
+        )
+
+        from datasets import load_dataset
+
+        ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
+        filename = ds[0]["file"]
+        output = speech_recognizer(filename)
+        self.assertEqual(output, {"text": 'Ein Mann sagte zum Universum : " Sir, ich existiert! "'})
+
    @slow
    @require_torch
    @require_datasets