From d9e693e1d01bfee99f55186ec5698b235e275a0c Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Mon, 15 Mar 2021 16:50:05 +0300 Subject: [PATCH] make wav2vec2 test deterministic (#10714) --- tests/test_modeling_wav2vec2.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/test_modeling_wav2vec2.py b/tests/test_modeling_wav2vec2.py index 75b8795f78..ef269fd65b 100644 --- a/tests/test_modeling_wav2vec2.py +++ b/tests/test_modeling_wav2vec2.py @@ -515,6 +515,8 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase): import soundfile as sf + ids = [f"1272-141231-000{i}" for i in range(num_samples)] + # map files to raw def map_to_array(batch): speech, _ = sf.read(batch["file"]) @@ -522,7 +524,8 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase): return batch ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation") - ds = ds.select(range(num_samples)).map(map_to_array) + + ds = ds.filter(lambda x: x["id"] in ids).sort("id").map(map_to_array) return ds["speech"][:num_samples]