Pass datasets trust_remote_code (#31406)

* Pass datasets trust_remote_code * Pass trust_remote_code in more tests * Add trust_remote_dataset_code arg to some tests * Revert "Temporarily pin datasets upper version to fix CI" This reverts commit b7672826ca. * Pass trust_remote_code in librispeech_asr_dummy docstrings * Revert "Pin datasets<2.20.0 for examples" This reverts commit 833fc17a3e. * Pass trust_remote_code to all examples * Revert "Add trust_remote_dataset_code arg to some tests" to research_projects * Pass trust_remote_code to tests * Pass trust_remote_code to docstrings * Fix flax examples tests requirements * Pass trust_remote_dataset_code arg to tests * Replace trust_remote_dataset_code with trust_remote_code in one example * Fix duplicate trust_remote_code * Replace args.trust_remote_dataset_code with args.trust_remote_code * Replace trust_remote_dataset_code with trust_remote_code in parser * Replace trust_remote_dataset_code with trust_remote_code in dataclasses * Replace trust_remote_dataset_code with trust_remote_code arg
2024-06-17 18:29:13 +02:00
parent 485fd81471
commit a14b055b65
168 changed files with 804 additions and 410 deletions
--- a/tests/models/encodec/test_feature_extraction_encodec.py
+++ b/tests/models/encodec/test_feature_extraction_encodec.py
@@ -138,7 +138,9 @@ class EnCodecFeatureExtractionTest(SequenceFeatureExtractionTestMixin, unittest.
    def _load_datasamples(self, num_samples):
        from datasets import load_dataset

-        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+        ds = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        )
        # automatic decoding with librispeech
        audio_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"]

--- a/tests/models/encodec/test_modeling_encodec.py
+++ b/tests/models/encodec/test_modeling_encodec.py
@@ -462,7 +462,9 @@ class EncodecIntegrationTest(unittest.TestCase):
            "1.5": [371955],
            "24.0": [6659962],
        }
-        librispeech_dummy = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+        librispeech_dummy = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        )
        model_id = "facebook/encodec_24khz"

        model = EncodecModel.from_pretrained(model_id).to(torch_device)
@@ -516,7 +518,9 @@ class EncodecIntegrationTest(unittest.TestCase):
            "3.0": [144259, 146765, 156435, 176871, 161971],
            "24.0": [1568553, 1294948, 1306190, 1464747, 1663150],
        }
-        librispeech_dummy = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+        librispeech_dummy = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        )
        model_id = "facebook/encodec_48khz"

        model = EncodecModel.from_pretrained(model_id).to(torch_device)
@@ -578,7 +582,9 @@ class EncodecIntegrationTest(unittest.TestCase):
                [85561, 81870, 76953, 48967, 79315, 85442, 81479, 107241],
            ],
        }
-        librispeech_dummy = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+        librispeech_dummy = load_dataset(
+            "hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
+        )
        model_id = "facebook/encodec_48khz"

        model = EncodecModel.from_pretrained(model_id).to(torch_device)