Add SpeechEncoderDecoder & Speech2Text2 (#13186)

* fix_torch_device_generate_test * remove @ * up * correct some bugs * correct model * finish speech2text extension * up * up * up * up * Update utils/custom_init_isort.py * up * up * update with tokenizer * correct old tok * correct old tok * fix bug * up * up * add more tests * up * fix docs * up * fix some more tests * add better config * correct some more things " * fix tests * improve docs * Apply suggestions from code review * Apply suggestions from code review * final fixes * finalize * Apply suggestions from code review Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Co-authored-by: Lysandre Debut <lysandre@huggingface.co> * apply suggestions Lysandre and Sylvain * apply nicos suggestions * upload everything * finish Co-authored-by: Patrick von Platen <patrick@huggingface.co> Co-authored-by: your_github_username <your_github_email> Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Co-authored-by: Lysandre Debut <lysandre@huggingface.co>
2021-09-01 13:33:31 +02:00
parent 9396b40433
commit 0b8c84e110
30 changed files with 3649 additions and 73 deletions
--- a/tests/test_pipelines_audio_classification.py
+++ b/tests/test_pipelines_audio_classification.py
@@ -76,6 +76,8 @@ class AudioClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTest
    @require_torch
    def test_small_model_pt(self):
        model = "anton-l/wav2vec2-random-tiny-classifier"
+
+        # hack: dummy tokenizer is required to prevent pipeline from failing
        tokenizer = PreTrainedTokenizer()
        audio_classifier = pipeline("audio-classification", model=model, tokenizer=tokenizer)

@@ -98,6 +100,8 @@ class AudioClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTest
        import datasets

        model = "superb/wav2vec2-base-superb-ks"
+
+        # hack: dummy tokenizer is required to prevent pipeline from failing
        tokenizer = PreTrainedTokenizer()
        audio_classifier = pipeline("audio-classification", model=model, tokenizer=tokenizer)
        dataset = datasets.load_dataset("anton-l/superb_dummy", "ks", split="test")