[SequenceFeatureExtractor] Rewrite padding logic from pure python to numpy (#13650)

* Test np padding

* Pass feature extraction tests

* Update type hints

* Fix flaky integration tests

* Try a more stable waveform

* Add to_numpy jax support

* int32 attention masks

* Refactor normalization tests
This commit is contained in:
Anton Lozhkov
2021-09-21 17:10:13 +03:00
committed by GitHub
parent 8d533e6ad6
commit 1417978cd4
8 changed files with 133 additions and 146 deletions

View File

@@ -42,9 +42,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
tokenizer="facebook/s2t-small-mustc-en-fr-st",
framework="pt",
)
waveform = np.zeros((34000,))
waveform = np.tile(np.arange(1000, dtype=np.float32), 34)
output = speech_recognizer(waveform)
self.assertEqual(output, {"text": "C'est ce que j'ai fait à ce moment-là."})
self.assertEqual(output, {"text": "(Applaudissements)"})
@require_torch
def test_torch_small_no_tokenizer_files(self):
@@ -68,14 +68,14 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
tokenizer="facebook/wav2vec2-base-960h",
framework="pt",
)
waveform = np.zeros((34000,))
waveform = np.tile(np.arange(1000, dtype=np.float32), 34)
output = speech_recognizer(waveform)
self.assertEqual(output, {"text": ""})
from datasets import load_dataset
ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
filename = ds[0]["file"]
ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation").sort("id")
filename = ds[40]["file"]
output = speech_recognizer(filename)
self.assertEqual(output, {"text": "A MAN SAID TO THE UNIVERSE SIR I EXIST"})
@@ -92,8 +92,8 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
from datasets import load_dataset
ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
filename = ds[0]["file"]
ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation").sort("id")
filename = ds[40]["file"]
output = speech_recognizer(filename)
self.assertEqual(output, {"text": 'Ein Mann sagte zum Universum : " Sir, ich existiert! "'})
@@ -110,16 +110,16 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
asr = AutomaticSpeechRecognitionPipeline(model=model, tokenizer=tokenizer, feature_extractor=feature_extractor)
waveform = np.zeros((34000,))
waveform = np.tile(np.arange(1000, dtype=np.float32), 34)
output = asr(waveform)
self.assertEqual(output, {"text": ""})
ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
filename = ds[0]["file"]
ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation").sort("id")
filename = ds[40]["file"]
output = asr(filename)
self.assertEqual(output, {"text": "A MAN SAID TO THE UNIVERSE SIR I EXIST"})
filename = ds[0]["file"]
filename = ds[40]["file"]
with open(filename, "rb") as f:
data = f.read()
output = asr(data)
@@ -139,17 +139,17 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
asr = AutomaticSpeechRecognitionPipeline(model=model, tokenizer=tokenizer, feature_extractor=feature_extractor)
waveform = np.zeros((34000,))
waveform = np.tile(np.arange(1000, dtype=np.float32), 34)
output = asr(waveform)
self.assertEqual(output, {"text": "E questo è il motivo per cui non ci siamo mai incontrati."})
self.assertEqual(output, {"text": "(Applausi)"})
ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
filename = ds[0]["file"]
ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation").sort("id")
filename = ds[40]["file"]
output = asr(filename)
self.assertEqual(output, {"text": "Un uomo disse all'universo: \"Signore, io esisto."})
filename = ds[0]["file"]
filename = ds[40]["file"]
with open(filename, "rb") as f:
data = f.read()
output = asr(data)