Remove script datasets in tests (#38940)

* remove trust_remote_code

* again

* Revert "Skip some tests for now (#38931)"

This reverts commit 31d30b7224.

* again

* style

* again

* again

* style

* fix integration test

* fix tests

* style

* fix

* fix

* fix the last ones

* style

* last one

* fix last

* fix

---------

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
Quentin Lhoest
2025-06-25 16:31:20 +02:00
committed by GitHub
parent 3c322c9cdf
commit 858f9b71a8
51 changed files with 154 additions and 293 deletions

View File

@@ -41,8 +41,6 @@ if is_torch_available():
if is_pytesseract_available():
from PIL import Image
from transformers import LayoutLMv3ImageProcessor
@@ -184,11 +182,11 @@ class UdopProcessorTest(ProcessorTesterMixin, unittest.TestCase):
from datasets import load_dataset
# set up
datasets = load_dataset("nielsr/funsd", trust_remote_code=True)
datasets = load_dataset("nielsr/funsd")
processor = UdopProcessor.from_pretrained("microsoft/udop-large", apply_ocr=False)
def preprocess_data(examples):
images = [Image.open(path).convert("RGB") for path in examples["image_path"]]
images = [image.convert("RGB") for image in examples["image"]]
words = examples["words"]
boxes = examples["bboxes"]
word_labels = examples["ner_tags"]
@@ -222,12 +220,8 @@ class UdopProcessorIntegrationTests(unittest.TestCase):
# we verify our implementation on 2 document images from the DocVQA dataset
from datasets import load_dataset
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test", trust_remote_code=True)
image_1 = Image.open(ds[0]["file"]).convert("RGB")
image_2 = Image.open(ds[1]["file"]).convert("RGB")
return image_1, image_2
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test")
return ds[0]["image"].convert("RGB"), ds[1]["image"].convert("RGB")
@cached_property
def get_tokenizers(self):