Remove script datasets in tests (#38940)

* remove trust_remote_code

* again

* Revert "Skip some tests for now (#38931)"

This reverts commit 31d30b7224.

* again

* style

* again

* again

* style

* fix integration test

* fix tests

* style

* fix

* fix

* fix the last ones

* style

* last one

* fix last

* fix

---------

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
Quentin Lhoest
2025-06-25 16:31:20 +02:00
committed by GitHub
parent 3c322c9cdf
commit 858f9b71a8
51 changed files with 154 additions and 293 deletions

View File

@@ -22,8 +22,6 @@ from ...test_image_processing_common import ImageProcessingTestMixin, prepare_im
if is_pytesseract_available():
from PIL import Image
from transformers import LayoutLMv3ImageProcessor
if is_torchvision_available():
@@ -103,17 +101,16 @@ class LayoutLMv3ImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase)
image_processor = image_processing_class.from_dict(self.image_processor_dict, size=42)
self.assertEqual(image_processor.size, {"height": 42, "width": 42})
@unittest.skip("temporary to avoid failing on circleci")
def test_LayoutLMv3_integration_test(self):
from datasets import load_dataset
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test", trust_remote_code=True)
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test")
# with apply_OCR = True
for image_processing_class in self.image_processor_list:
image_processor = image_processing_class()
image = Image.open(ds[0]["file"]).convert("RGB")
image = ds[0]["image"].convert("RGB")
encoding = image_processor(image, return_tensors="pt")

View File

@@ -28,8 +28,6 @@ from ...test_processing_common import ProcessorTesterMixin
if is_pytesseract_available():
from PIL import Image
from transformers import LayoutLMv3ImageProcessor
@@ -172,12 +170,8 @@ class LayoutLMv3ProcessorIntegrationTests(unittest.TestCase):
# we verify our implementation on 2 document images from the DocVQA dataset
from datasets import load_dataset
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test", trust_remote_code=True)
image_1 = Image.open(ds[0]["file"]).convert("RGB")
image_2 = Image.open(ds[1]["file"]).convert("RGB")
return image_1, image_2
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test")
return ds[0]["image"].convert("RGB"), ds[1]["image"].convert("RGB")
@cached_property
def get_tokenizers(self):