Fix some tests using "common_voice" (#27147)

* Use mozilla-foundation/common_voice_11_0

* Update expected values

* Update expected values

* For test_word_time_stamp_integration

---------

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
Yih-Dar
2023-10-30 15:27:15 +01:00
committed by GitHub
parent 691fd8fdde
commit 576994963f
2 changed files with 17 additions and 17 deletions

View File

@@ -434,7 +434,7 @@ class Wav2Vec2ProcessorWithLMTest(unittest.TestCase):
def test_word_time_stamp_integration(self):
import torch
ds = load_dataset("common_voice", "en", split="train", streaming=True)
ds = load_dataset("mozilla-foundation/common_voice_11_0", "en", split="train", streaming=True)
ds = ds.cast_column("audio", datasets.Audio(sampling_rate=16_000))
ds_iter = iter(ds)
sample = next(ds_iter)
@@ -442,7 +442,6 @@ class Wav2Vec2ProcessorWithLMTest(unittest.TestCase):
processor = AutoProcessor.from_pretrained("patrickvonplaten/wav2vec2-base-100h-with-lm")
model = Wav2Vec2ForCTC.from_pretrained("patrickvonplaten/wav2vec2-base-100h-with-lm")
# compare to filename `common_voice_en_100038.mp3` of dataset viewer on https://huggingface.co/datasets/common_voice/viewer/en/train
input_values = processor(sample["audio"]["array"], return_tensors="pt").input_values
with torch.no_grad():
@@ -461,6 +460,7 @@ class Wav2Vec2ProcessorWithLMTest(unittest.TestCase):
]
EXPECTED_TEXT = "WHY DOES MILISANDRA LOOK LIKE SHE WANTS TO CONSUME JOHN SNOW ON THE RIVER AT THE WALL"
EXPECTED_TEXT = "THE TRACK APPEARS ON THE COMPILATION ALBUM CRAFT FORKS"
# output words
self.assertEqual(" ".join(self.get_from_offsets(word_time_stamps, "word")), EXPECTED_TEXT)
@@ -471,8 +471,8 @@ class Wav2Vec2ProcessorWithLMTest(unittest.TestCase):
end_times = torch.tensor(self.get_from_offsets(word_time_stamps, "end_time"))
# fmt: off
expected_start_tensor = torch.tensor([1.4199, 1.6599, 2.2599, 3.0, 3.24, 3.5999, 3.7999, 4.0999, 4.26, 4.94, 5.28, 5.6599, 5.78, 5.94, 6.32, 6.5399, 6.6599])
expected_end_tensor = torch.tensor([1.5399, 1.8999, 2.9, 3.16, 3.5399, 3.72, 4.0199, 4.1799, 4.76, 5.1599, 5.5599, 5.6999, 5.86, 6.1999, 6.38, 6.6199, 6.94])
expected_start_tensor = torch.tensor([0.6800, 0.8800, 1.1800, 1.8600, 1.9600, 2.1000, 3.0000, 3.5600, 3.9800])
expected_end_tensor = torch.tensor([0.7800, 1.1000, 1.6600, 1.9200, 2.0400, 2.8000, 3.3000, 3.8800, 4.2800])
# fmt: on
self.assertTrue(torch.allclose(start_times, expected_start_tensor, atol=0.01))