[Speech] Move all examples to new audio feature (#14045)
* up * up * up * finish
This commit is contained in:
committed by
GitHub
parent
4334095c32
commit
bdf31d6e0a
@@ -356,21 +356,13 @@ class FlaxWav2Vec2ModelIntegrationTest(unittest.TestCase):
|
||||
def _load_datasamples(self, num_samples):
|
||||
from datasets import load_dataset
|
||||
|
||||
import soundfile as sf
|
||||
|
||||
ids = [f"1272-141231-000{i}" for i in range(num_samples)]
|
||||
|
||||
# map files to raw
|
||||
def map_to_array(batch):
|
||||
speech, _ = sf.read(batch["file"])
|
||||
batch["speech"] = speech
|
||||
return batch
|
||||
|
||||
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
||||
# automatic decoding with librispeech
|
||||
speech_samples = ds.sort("id").filter(
|
||||
lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)]
|
||||
)[:num_samples]["audio"]
|
||||
|
||||
ds = ds.filter(lambda x: x["id"] in ids).sort("id").map(map_to_array)
|
||||
|
||||
return ds["speech"][:num_samples]
|
||||
return [x["array"] for x in speech_samples]
|
||||
|
||||
def test_inference_ctc_robust_batched(self):
|
||||
model = FlaxWav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h-lv60-self", from_pt=True)
|
||||
|
||||
@@ -613,21 +613,11 @@ class HubertModelIntegrationTest(unittest.TestCase):
|
||||
def _load_datasamples(self, num_samples):
|
||||
from datasets import load_dataset
|
||||
|
||||
import soundfile as sf
|
||||
|
||||
ids = [f"1272-141231-000{i}" for i in range(num_samples)]
|
||||
|
||||
# map files to raw
|
||||
def map_to_array(batch):
|
||||
speech, _ = sf.read(batch["file"])
|
||||
batch["speech"] = speech
|
||||
return batch
|
||||
|
||||
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
||||
# automatic decoding with librispeech
|
||||
speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"]
|
||||
|
||||
ds = ds.filter(lambda x: x["id"] in ids).sort("id").map(map_to_array)
|
||||
|
||||
return ds["speech"][:num_samples]
|
||||
return [x["array"] for x in speech_samples]
|
||||
|
||||
def _load_superb(self, task, num_samples):
|
||||
from datasets import load_dataset
|
||||
|
||||
@@ -407,21 +407,13 @@ class SEWModelIntegrationTest(unittest.TestCase):
|
||||
def _load_datasamples(self, num_samples):
|
||||
from datasets import load_dataset
|
||||
|
||||
import soundfile as sf
|
||||
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
||||
# automatic decoding with librispeech
|
||||
speech_samples = ds.sort("id").filter(
|
||||
lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)]
|
||||
)[:num_samples]["audio"]
|
||||
|
||||
ids = [f"1272-141231-000{i}" for i in range(num_samples)]
|
||||
|
||||
# map files to raw
|
||||
def map_to_array(batch):
|
||||
speech, _ = sf.read(batch["file"])
|
||||
batch["speech"] = speech
|
||||
return batch
|
||||
|
||||
ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
|
||||
|
||||
ds = ds.filter(lambda x: x["id"] in ids).sort("id").map(map_to_array)
|
||||
|
||||
return ds["speech"][:num_samples]
|
||||
return [x["array"] for x in speech_samples]
|
||||
|
||||
def test_inference_pretrained_batched(self):
|
||||
model = SEWModel.from_pretrained("asapp/sew-tiny-100k").to(torch_device)
|
||||
|
||||
@@ -428,21 +428,13 @@ class SEWDModelIntegrationTest(unittest.TestCase):
|
||||
def _load_datasamples(self, num_samples):
|
||||
from datasets import load_dataset
|
||||
|
||||
import soundfile as sf
|
||||
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
||||
# automatic decoding with librispeech
|
||||
speech_samples = ds.sort("id").filter(
|
||||
lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)]
|
||||
)[:num_samples]["audio"]
|
||||
|
||||
ids = [f"1272-141231-000{i}" for i in range(num_samples)]
|
||||
|
||||
# map files to raw
|
||||
def map_to_array(batch):
|
||||
speech, _ = sf.read(batch["file"])
|
||||
batch["speech"] = speech
|
||||
return batch
|
||||
|
||||
ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
|
||||
|
||||
ds = ds.filter(lambda x: x["id"] in ids).sort("id").map(map_to_array)
|
||||
|
||||
return ds["speech"][:num_samples]
|
||||
return [x["array"] for x in speech_samples]
|
||||
|
||||
def test_inference_pretrained_batched(self):
|
||||
model = SEWDModel.from_pretrained("asapp/sew-d-tiny-100k").to(torch_device)
|
||||
|
||||
@@ -715,18 +715,11 @@ class Speech2TextModelIntegrationTests(unittest.TestCase):
|
||||
def _load_datasamples(self, num_samples):
|
||||
from datasets import load_dataset
|
||||
|
||||
import soundfile as sf
|
||||
|
||||
# map files to raw
|
||||
def map_to_array(batch):
|
||||
speech, _ = sf.read(batch["file"])
|
||||
batch["speech"] = speech
|
||||
return batch
|
||||
|
||||
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
||||
ds = ds.sort("id").select(range(num_samples)).map(map_to_array)
|
||||
# automatic decoding with librispeech
|
||||
speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"]
|
||||
|
||||
return ds["speech"][:num_samples]
|
||||
return [x["array"] for x in speech_samples]
|
||||
|
||||
def test_generation_librispeech(self):
|
||||
model = Speech2TextForConditionalGeneration.from_pretrained("facebook/s2t-small-librispeech-asr")
|
||||
|
||||
@@ -479,21 +479,13 @@ class TFHubertModelIntegrationTest(unittest.TestCase):
|
||||
def _load_datasamples(self, num_samples):
|
||||
from datasets import load_dataset
|
||||
|
||||
import soundfile as sf
|
||||
|
||||
ids = [f"1272-141231-000{i}" for i in range(num_samples)]
|
||||
|
||||
# map files to raw
|
||||
def map_to_array(batch):
|
||||
speech, _ = sf.read(batch["file"])
|
||||
batch["speech"] = speech
|
||||
return batch
|
||||
|
||||
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
||||
# automatic decoding with librispeech
|
||||
speech_samples = ds.sort("id").filter(
|
||||
lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)]
|
||||
)[:num_samples]["audio"]
|
||||
|
||||
ds = ds.filter(lambda x: x["id"] in ids).sort("id").map(map_to_array)
|
||||
|
||||
return ds["speech"][:num_samples]
|
||||
return [x["array"] for x in speech_samples]
|
||||
|
||||
def test_inference_ctc_normal(self):
|
||||
model = TFHubertForCTC.from_pretrained("facebook/hubert-large-ls960-ft")
|
||||
|
||||
@@ -479,21 +479,13 @@ class TFWav2Vec2ModelIntegrationTest(unittest.TestCase):
|
||||
def _load_datasamples(self, num_samples):
|
||||
from datasets import load_dataset
|
||||
|
||||
import soundfile as sf
|
||||
|
||||
ids = [f"1272-141231-000{i}" for i in range(num_samples)]
|
||||
|
||||
# map files to raw
|
||||
def map_to_array(batch):
|
||||
speech, _ = sf.read(batch["file"])
|
||||
batch["speech"] = speech
|
||||
return batch
|
||||
|
||||
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
||||
# automatic decoding with librispeech
|
||||
speech_samples = ds.sort("id").filter(
|
||||
lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)]
|
||||
)[:num_samples]["audio"]
|
||||
|
||||
ds = ds.filter(lambda x: x["id"] in ids).sort("id").map(map_to_array)
|
||||
|
||||
return ds["speech"][:num_samples]
|
||||
return [x["array"] for x in speech_samples]
|
||||
|
||||
def test_inference_ctc_normal(self):
|
||||
model = TFWav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
|
||||
|
||||
@@ -900,21 +900,13 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase):
|
||||
def _load_datasamples(self, num_samples):
|
||||
from datasets import load_dataset
|
||||
|
||||
import soundfile as sf
|
||||
|
||||
ids = [f"1272-141231-000{i}" for i in range(num_samples)]
|
||||
|
||||
# map files to raw
|
||||
def map_to_array(batch):
|
||||
speech, _ = sf.read(batch["file"])
|
||||
batch["speech"] = speech
|
||||
return batch
|
||||
|
||||
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
||||
# automatic decoding with librispeech
|
||||
speech_samples = ds.sort("id").filter(
|
||||
lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)]
|
||||
)[:num_samples]["audio"]
|
||||
|
||||
ds = ds.filter(lambda x: x["id"] in ids).sort("id").map(map_to_array)
|
||||
|
||||
return ds["speech"][:num_samples]
|
||||
return [x["array"] for x in speech_samples]
|
||||
|
||||
def _load_superb(self, task, num_samples):
|
||||
from datasets import load_dataset
|
||||
|
||||
Reference in New Issue
Block a user