Pass datasets trust_remote_code (#31406)

* Pass datasets trust_remote_code

* Pass trust_remote_code in more tests

* Add trust_remote_dataset_code arg to some tests

* Revert "Temporarily pin datasets upper version to fix CI"

This reverts commit b7672826ca.

* Pass trust_remote_code in librispeech_asr_dummy docstrings

* Revert "Pin datasets<2.20.0 for examples"

This reverts commit 833fc17a3e.

* Pass trust_remote_code to all examples

* Revert "Add trust_remote_dataset_code arg to some tests" to research_projects

* Pass trust_remote_code to tests

* Pass trust_remote_code to docstrings

* Fix flax examples tests requirements

* Pass trust_remote_dataset_code arg to tests

* Replace trust_remote_dataset_code with trust_remote_code in one example

* Fix duplicate trust_remote_code

* Replace args.trust_remote_dataset_code with args.trust_remote_code

* Replace trust_remote_dataset_code with trust_remote_code in parser

* Replace trust_remote_dataset_code with trust_remote_code in dataclasses

* Replace trust_remote_dataset_code with trust_remote_code arg
This commit is contained in:
Albert Villanova del Moral
2024-06-17 18:29:13 +02:00
committed by GitHub
parent 485fd81471
commit a14b055b65
168 changed files with 804 additions and 410 deletions

View File

@@ -138,7 +138,9 @@ class EnCodecFeatureExtractionTest(SequenceFeatureExtractionTestMixin, unittest.
def _load_datasamples(self, num_samples):
from datasets import load_dataset
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
ds = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
# automatic decoding with librispeech
audio_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"]

View File

@@ -462,7 +462,9 @@ class EncodecIntegrationTest(unittest.TestCase):
"1.5": [371955],
"24.0": [6659962],
}
librispeech_dummy = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
librispeech_dummy = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
model_id = "facebook/encodec_24khz"
model = EncodecModel.from_pretrained(model_id).to(torch_device)
@@ -516,7 +518,9 @@ class EncodecIntegrationTest(unittest.TestCase):
"3.0": [144259, 146765, 156435, 176871, 161971],
"24.0": [1568553, 1294948, 1306190, 1464747, 1663150],
}
librispeech_dummy = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
librispeech_dummy = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
model_id = "facebook/encodec_48khz"
model = EncodecModel.from_pretrained(model_id).to(torch_device)
@@ -578,7 +582,9 @@ class EncodecIntegrationTest(unittest.TestCase):
[85561, 81870, 76953, 48967, 79315, 85442, 81479, 107241],
],
}
librispeech_dummy = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
librispeech_dummy = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
model_id = "facebook/encodec_48khz"
model = EncodecModel.from_pretrained(model_id).to(torch_device)