Pass datasets trust_remote_code (#31406)
* Pass datasets trust_remote_code * Pass trust_remote_code in more tests * Add trust_remote_dataset_code arg to some tests * Revert "Temporarily pin datasets upper version to fix CI" This reverts commitb7672826ca. * Pass trust_remote_code in librispeech_asr_dummy docstrings * Revert "Pin datasets<2.20.0 for examples" This reverts commit833fc17a3e. * Pass trust_remote_code to all examples * Revert "Add trust_remote_dataset_code arg to some tests" to research_projects * Pass trust_remote_code to tests * Pass trust_remote_code to docstrings * Fix flax examples tests requirements * Pass trust_remote_dataset_code arg to tests * Replace trust_remote_dataset_code with trust_remote_code in one example * Fix duplicate trust_remote_code * Replace args.trust_remote_dataset_code with args.trust_remote_code * Replace trust_remote_dataset_code with trust_remote_code in parser * Replace trust_remote_dataset_code with trust_remote_code in dataclasses * Replace trust_remote_dataset_code with trust_remote_code arg
This commit is contained in:
committed by
GitHub
parent
485fd81471
commit
a14b055b65
@@ -72,7 +72,7 @@ def _test_wav2vec2_with_lm_invalid_pool(in_queue, out_queue, timeout):
|
||||
try:
|
||||
_ = in_queue.get(timeout=timeout)
|
||||
|
||||
ds = load_dataset("common_voice", "es", split="test", streaming=True)
|
||||
ds = load_dataset("legacy-datasets/common_voice", "es", split="test", streaming=True, trust_remote_code=True)
|
||||
sample = next(iter(ds))
|
||||
|
||||
resampled_audio = librosa.resample(sample["audio"]["array"], 48_000, 16_000)
|
||||
@@ -489,7 +489,9 @@ class FlaxWav2Vec2UtilsTest(unittest.TestCase):
|
||||
@slow
|
||||
class FlaxWav2Vec2ModelIntegrationTest(unittest.TestCase):
|
||||
def _load_datasamples(self, num_samples):
|
||||
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
||||
ds = load_dataset(
|
||||
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
|
||||
)
|
||||
# automatic decoding with librispeech
|
||||
speech_samples = ds.sort("id").filter(
|
||||
lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)]
|
||||
@@ -585,7 +587,7 @@ class FlaxWav2Vec2ModelIntegrationTest(unittest.TestCase):
|
||||
@require_pyctcdecode
|
||||
@require_librosa
|
||||
def test_wav2vec2_with_lm(self):
|
||||
ds = load_dataset("common_voice", "es", split="test", streaming=True)
|
||||
ds = load_dataset("legacy-datasets/common_voice", "es", split="test", streaming=True, trust_remote_code=True)
|
||||
sample = next(iter(ds))
|
||||
|
||||
resampled_audio = librosa.resample(sample["audio"]["array"], 48_000, 16_000)
|
||||
@@ -604,7 +606,7 @@ class FlaxWav2Vec2ModelIntegrationTest(unittest.TestCase):
|
||||
@require_pyctcdecode
|
||||
@require_librosa
|
||||
def test_wav2vec2_with_lm_pool(self):
|
||||
ds = load_dataset("common_voice", "es", split="test", streaming=True)
|
||||
ds = load_dataset("legacy-datasets/common_voice", "es", split="test", streaming=True, trust_remote_code=True)
|
||||
sample = next(iter(ds))
|
||||
|
||||
resampled_audio = librosa.resample(sample["audio"]["array"], 48_000, 16_000)
|
||||
|
||||
@@ -716,7 +716,9 @@ class TFWav2Vec2ModelIntegrationTest(unittest.TestCase):
|
||||
gc.collect()
|
||||
|
||||
def _load_datasamples(self, num_samples):
|
||||
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
||||
ds = load_dataset(
|
||||
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
|
||||
)
|
||||
# automatic decoding with librispeech
|
||||
speech_samples = ds.sort("id").filter(
|
||||
lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)]
|
||||
@@ -725,7 +727,7 @@ class TFWav2Vec2ModelIntegrationTest(unittest.TestCase):
|
||||
return [x["array"] for x in speech_samples]
|
||||
|
||||
def _load_superb(self, task, num_samples):
|
||||
ds = load_dataset("anton-l/superb_dummy", task, split="test")
|
||||
ds = load_dataset("anton-l/superb_dummy", task, split="test", trust_remote_code=True)
|
||||
|
||||
return ds[:num_samples]
|
||||
|
||||
|
||||
@@ -101,7 +101,9 @@ def _test_wav2vec2_with_lm_invalid_pool(in_queue, out_queue, timeout):
|
||||
try:
|
||||
_ = in_queue.get(timeout=timeout)
|
||||
|
||||
ds = load_dataset("mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True)
|
||||
ds = load_dataset(
|
||||
"mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True, trust_remote_code=True
|
||||
)
|
||||
sample = next(iter(ds))
|
||||
|
||||
resampled_audio = torchaudio.functional.resample(
|
||||
@@ -1468,7 +1470,9 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase):
|
||||
backend_empty_cache(torch_device)
|
||||
|
||||
def _load_datasamples(self, num_samples):
|
||||
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
||||
ds = load_dataset(
|
||||
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
|
||||
)
|
||||
# automatic decoding with librispeech
|
||||
speech_samples = ds.sort("id").filter(
|
||||
lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)]
|
||||
@@ -1477,7 +1481,7 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase):
|
||||
return [x["array"] for x in speech_samples]
|
||||
|
||||
def _load_superb(self, task, num_samples):
|
||||
ds = load_dataset("anton-l/superb_dummy", task, split="test")
|
||||
ds = load_dataset("anton-l/superb_dummy", task, split="test", trust_remote_code=True)
|
||||
|
||||
return ds[:num_samples]
|
||||
|
||||
@@ -1843,7 +1847,9 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase):
|
||||
@require_pyctcdecode
|
||||
@require_torchaudio
|
||||
def test_wav2vec2_with_lm(self):
|
||||
ds = load_dataset("mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True)
|
||||
ds = load_dataset(
|
||||
"mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True, trust_remote_code=True
|
||||
)
|
||||
sample = next(iter(ds))
|
||||
|
||||
resampled_audio = torchaudio.functional.resample(
|
||||
@@ -1867,7 +1873,9 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase):
|
||||
@require_pyctcdecode
|
||||
@require_torchaudio
|
||||
def test_wav2vec2_with_lm_pool(self):
|
||||
ds = load_dataset("mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True)
|
||||
ds = load_dataset(
|
||||
"mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True, trust_remote_code=True
|
||||
)
|
||||
sample = next(iter(ds))
|
||||
|
||||
resampled_audio = torchaudio.functional.resample(
|
||||
@@ -1965,7 +1973,9 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase):
|
||||
LANG_MAP = {"it": "ita", "es": "spa", "fr": "fra", "en": "eng"}
|
||||
|
||||
def run_model(lang):
|
||||
ds = load_dataset("mozilla-foundation/common_voice_11_0", lang, split="test", streaming=True)
|
||||
ds = load_dataset(
|
||||
"mozilla-foundation/common_voice_11_0", lang, split="test", streaming=True, trust_remote_code=True
|
||||
)
|
||||
sample = next(iter(ds))
|
||||
|
||||
wav2vec2_lang = LANG_MAP[lang]
|
||||
|
||||
Reference in New Issue
Block a user