[Speech] Move all examples to new audio feature (#14045)

* up * up * up * finish
2021-10-18 12:52:40 +02:00
parent 4334095c32
commit bdf31d6e0a
11 changed files with 43 additions and 108 deletions
--- a/tests/test_modeling_tf_hubert.py
+++ b/tests/test_modeling_tf_hubert.py
@@ -479,21 +479,13 @@ class TFHubertModelIntegrationTest(unittest.TestCase):
    def _load_datasamples(self, num_samples):
        from datasets import load_dataset

-        import soundfile as sf
-
-        ids = [f"1272-141231-000{i}" for i in range(num_samples)]
-
-        # map files to raw
-        def map_to_array(batch):
-            speech, _ = sf.read(batch["file"])
-            batch["speech"] = speech
-            return batch
-
        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+        # automatic decoding with librispeech
+        speech_samples = ds.sort("id").filter(
+            lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)]
+        )[:num_samples]["audio"]

-        ds = ds.filter(lambda x: x["id"] in ids).sort("id").map(map_to_array)
-
-        return ds["speech"][:num_samples]
+        return [x["array"] for x in speech_samples]

    def test_inference_ctc_normal(self):
        model = TFHubertForCTC.from_pretrained("facebook/hubert-large-ls960-ft")