[SequenceFeatureExtractor] Rewrite padding logic from pure python to numpy (#13650)
* Test np padding * Pass feature extraction tests * Update type hints * Fix flaky integration tests * Try a more stable waveform * Add to_numpy jax support * int32 attention masks * Refactor normalization tests
This commit is contained in:
@@ -42,9 +42,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
|
||||
tokenizer="facebook/s2t-small-mustc-en-fr-st",
|
||||
framework="pt",
|
||||
)
|
||||
waveform = np.zeros((34000,))
|
||||
waveform = np.tile(np.arange(1000, dtype=np.float32), 34)
|
||||
output = speech_recognizer(waveform)
|
||||
self.assertEqual(output, {"text": "C'est ce que j'ai fait à ce moment-là."})
|
||||
self.assertEqual(output, {"text": "(Applaudissements)"})
|
||||
|
||||
@require_torch
|
||||
def test_torch_small_no_tokenizer_files(self):
|
||||
@@ -68,14 +68,14 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
|
||||
tokenizer="facebook/wav2vec2-base-960h",
|
||||
framework="pt",
|
||||
)
|
||||
waveform = np.zeros((34000,))
|
||||
waveform = np.tile(np.arange(1000, dtype=np.float32), 34)
|
||||
output = speech_recognizer(waveform)
|
||||
self.assertEqual(output, {"text": ""})
|
||||
|
||||
from datasets import load_dataset
|
||||
|
||||
ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
|
||||
filename = ds[0]["file"]
|
||||
ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation").sort("id")
|
||||
filename = ds[40]["file"]
|
||||
output = speech_recognizer(filename)
|
||||
self.assertEqual(output, {"text": "A MAN SAID TO THE UNIVERSE SIR I EXIST"})
|
||||
|
||||
@@ -92,8 +92,8 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
|
||||
|
||||
from datasets import load_dataset
|
||||
|
||||
ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
|
||||
filename = ds[0]["file"]
|
||||
ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation").sort("id")
|
||||
filename = ds[40]["file"]
|
||||
output = speech_recognizer(filename)
|
||||
self.assertEqual(output, {"text": 'Ein Mann sagte zum Universum : " Sir, ich existiert! "'})
|
||||
|
||||
@@ -110,16 +110,16 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
|
||||
|
||||
asr = AutomaticSpeechRecognitionPipeline(model=model, tokenizer=tokenizer, feature_extractor=feature_extractor)
|
||||
|
||||
waveform = np.zeros((34000,))
|
||||
waveform = np.tile(np.arange(1000, dtype=np.float32), 34)
|
||||
output = asr(waveform)
|
||||
self.assertEqual(output, {"text": ""})
|
||||
|
||||
ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
|
||||
filename = ds[0]["file"]
|
||||
ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation").sort("id")
|
||||
filename = ds[40]["file"]
|
||||
output = asr(filename)
|
||||
self.assertEqual(output, {"text": "A MAN SAID TO THE UNIVERSE SIR I EXIST"})
|
||||
|
||||
filename = ds[0]["file"]
|
||||
filename = ds[40]["file"]
|
||||
with open(filename, "rb") as f:
|
||||
data = f.read()
|
||||
output = asr(data)
|
||||
@@ -139,17 +139,17 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
|
||||
|
||||
asr = AutomaticSpeechRecognitionPipeline(model=model, tokenizer=tokenizer, feature_extractor=feature_extractor)
|
||||
|
||||
waveform = np.zeros((34000,))
|
||||
waveform = np.tile(np.arange(1000, dtype=np.float32), 34)
|
||||
|
||||
output = asr(waveform)
|
||||
self.assertEqual(output, {"text": "E questo è il motivo per cui non ci siamo mai incontrati."})
|
||||
self.assertEqual(output, {"text": "(Applausi)"})
|
||||
|
||||
ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
|
||||
filename = ds[0]["file"]
|
||||
ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation").sort("id")
|
||||
filename = ds[40]["file"]
|
||||
output = asr(filename)
|
||||
self.assertEqual(output, {"text": "Un uomo disse all'universo: \"Signore, io esisto."})
|
||||
|
||||
filename = ds[0]["file"]
|
||||
filename = ds[40]["file"]
|
||||
with open(filename, "rb") as f:
|
||||
data = f.read()
|
||||
output = asr(data)
|
||||
|
||||
Reference in New Issue
Block a user