Remove script datasets in tests (#38940)

* remove trust_remote_code * again * Revert "Skip some tests for now (#38931)" This reverts commit 31d30b7224. * again * style * again * again * style * fix integration test * fix tests * style * fix * fix * fix the last ones * style * last one * fix last * fix --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
2025-06-25 16:31:20 +02:00
parent 3c322c9cdf
commit 858f9b71a8
51 changed files with 154 additions and 293 deletions
--- a/tests/models/layoutlmv3/test_image_processing_layoutlmv3.py
+++ b/tests/models/layoutlmv3/test_image_processing_layoutlmv3.py
@@ -22,8 +22,6 @@ from ...test_image_processing_common import ImageProcessingTestMixin, prepare_im


 if is_pytesseract_available():
-    from PIL import Image
-
    from transformers import LayoutLMv3ImageProcessor

    if is_torchvision_available():
@@ -103,17 +101,16 @@ class LayoutLMv3ImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase)
            image_processor = image_processing_class.from_dict(self.image_processor_dict, size=42)
            self.assertEqual(image_processor.size, {"height": 42, "width": 42})

-    @unittest.skip("temporary to avoid failing on circleci")
    def test_LayoutLMv3_integration_test(self):
        from datasets import load_dataset

-        ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test", trust_remote_code=True)
+        ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test")

        # with apply_OCR = True
        for image_processing_class in self.image_processor_list:
            image_processor = image_processing_class()

-            image = Image.open(ds[0]["file"]).convert("RGB")
+            image = ds[0]["image"].convert("RGB")

            encoding = image_processor(image, return_tensors="pt")

--- a/tests/models/layoutlmv3/test_processor_layoutlmv3.py
+++ b/tests/models/layoutlmv3/test_processor_layoutlmv3.py
@@ -28,8 +28,6 @@ from ...test_processing_common import ProcessorTesterMixin


 if is_pytesseract_available():
-    from PIL import Image
-
    from transformers import LayoutLMv3ImageProcessor


@@ -172,12 +170,8 @@ class LayoutLMv3ProcessorIntegrationTests(unittest.TestCase):
        # we verify our implementation on 2 document images from the DocVQA dataset
        from datasets import load_dataset

-        ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test", trust_remote_code=True)
-
-        image_1 = Image.open(ds[0]["file"]).convert("RGB")
-        image_2 = Image.open(ds[1]["file"]).convert("RGB")
-
-        return image_1, image_2
+        ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test")
+        return ds[0]["image"].convert("RGB"), ds[1]["image"].convert("RGB")

    @cached_property
    def get_tokenizers(self):