Remove script datasets in tests (#38940)

* remove trust_remote_code * again * Revert "Skip some tests for now (#38931)" This reverts commit 31d30b7224. * again * style * again * again * style * fix integration test * fix tests * style * fix * fix * fix the last ones * style * last one * fix last * fix --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
2025-06-25 16:31:20 +02:00
parent 3c322c9cdf
commit 858f9b71a8
51 changed files with 154 additions and 293 deletions
--- a/tests/models/layoutlmv2/test_processor_layoutlmv2.py
+++ b/tests/models/layoutlmv2/test_processor_layoutlmv2.py
@@ -28,8 +28,6 @@ from ...test_processing_common import ProcessorTesterMixin


 if is_pytesseract_available():
-    from PIL import Image
-
    from transformers import LayoutLMv2ImageProcessor


@@ -156,11 +154,11 @@ class LayoutLMv2ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
        from datasets import load_dataset

        # set up
-        datasets = load_dataset("nielsr/funsd", trust_remote_code=True)
+        datasets = load_dataset("nielsr/funsd")
        processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased", revision="no_ocr")

        def preprocess_data(examples):
-            images = [Image.open(path).convert("RGB") for path in examples["image_path"]]
+            images = [image.convert("RGB") for image in examples["image"]]
            words = examples["words"]
            boxes = examples["bboxes"]
            word_labels = examples["ner_tags"]
@@ -192,12 +190,8 @@ class LayoutLMv2ProcessorIntegrationTests(unittest.TestCase):
        # we verify our implementation on 2 document images from the DocVQA dataset
        from datasets import load_dataset

-        ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test", trust_remote_code=True)
-
-        image_1 = Image.open(ds[0]["file"]).convert("RGB")
-        image_2 = Image.open(ds[1]["file"]).convert("RGB")
-
-        return image_1, image_2
+        ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test")
+        return ds[0]["image"].convert("RGB"), ds[1]["image"].convert("RGB")

    @cached_property
    def get_tokenizers(self):