Pass datasets trust_remote_code (#31406)

* Pass datasets trust_remote_code

* Pass trust_remote_code in more tests

* Add trust_remote_dataset_code arg to some tests

* Revert "Temporarily pin datasets upper version to fix CI"

This reverts commit b7672826ca.

* Pass trust_remote_code in librispeech_asr_dummy docstrings

* Revert "Pin datasets<2.20.0 for examples"

This reverts commit 833fc17a3e.

* Pass trust_remote_code to all examples

* Revert "Add trust_remote_dataset_code arg to some tests" to research_projects

* Pass trust_remote_code to tests

* Pass trust_remote_code to docstrings

* Fix flax examples tests requirements

* Pass trust_remote_dataset_code arg to tests

* Replace trust_remote_dataset_code with trust_remote_code in one example

* Fix duplicate trust_remote_code

* Replace args.trust_remote_dataset_code with args.trust_remote_code

* Replace trust_remote_dataset_code with trust_remote_code in parser

* Replace trust_remote_dataset_code with trust_remote_code in dataclasses

* Replace trust_remote_dataset_code with trust_remote_code arg
This commit is contained in:
Albert Villanova del Moral
2024-06-17 18:29:13 +02:00
committed by GitHub
parent 485fd81471
commit a14b055b65
168 changed files with 804 additions and 410 deletions

View File

@@ -209,7 +209,9 @@ class ClvpFeatureExtractionTest(SequenceFeatureExtractionTestMixin, unittest.Tes
self.assertTrue(pt_processed.input_features.dtype == torch.float32)
def _load_datasamples(self, num_samples):
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
ds = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
ds = ds.cast_column("audio", Audio(sampling_rate=22050))
# automatic decoding with librispeech
speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"]

View File

@@ -371,7 +371,9 @@ class ClvpModelForConditionalGenerationTester:
def prepare_config_and_inputs(self):
_, input_ids, attention_mask = self.clvp_encoder_tester.prepare_config_and_inputs()
ds = datasets.load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
ds = datasets.load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
ds = ds.cast_column("audio", datasets.Audio(sampling_rate=22050))
_, audio, sr = ds.sort("id").select(range(1))[:1]["audio"][0].values()
@@ -553,7 +555,9 @@ class ClvpModelForConditionalGenerationTest(ModelTesterMixin, unittest.TestCase)
class ClvpIntegrationTest(unittest.TestCase):
def setUp(self):
self.text = "This is an example text."
ds = datasets.load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
ds = datasets.load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
ds = ds.cast_column("audio", datasets.Audio(sampling_rate=22050))
_, self.speech_samples, self.sr = ds.sort("id").select(range(1))[:1]["audio"][0].values()