Remove script datasets in tests (#38940)
* remove trust_remote_code
* again
* Revert "Skip some tests for now (#38931)"
This reverts commit 31d30b7224.
* again
* style
* again
* again
* style
* fix integration test
* fix tests
* style
* fix
* fix
* fix the last ones
* style
* last one
* fix last
* fix
---------
Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
@@ -123,13 +123,13 @@ class LayoutLMv2ImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase)
|
||||
def test_layoutlmv2_integration_test(self):
|
||||
from datasets import load_dataset
|
||||
|
||||
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test", trust_remote_code=True)
|
||||
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test")
|
||||
|
||||
for image_processing_class in self.image_processor_list:
|
||||
# with apply_OCR = True
|
||||
image_processing = image_processing_class()
|
||||
|
||||
image = Image.open(ds[0]["file"]).convert("RGB")
|
||||
image = ds[0]["image"]
|
||||
|
||||
encoding = image_processing(image, return_tensors="pt")
|
||||
|
||||
|
||||
@@ -28,8 +28,6 @@ from ...test_processing_common import ProcessorTesterMixin
|
||||
|
||||
|
||||
if is_pytesseract_available():
|
||||
from PIL import Image
|
||||
|
||||
from transformers import LayoutLMv2ImageProcessor
|
||||
|
||||
|
||||
@@ -156,11 +154,11 @@ class LayoutLMv2ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
|
||||
from datasets import load_dataset
|
||||
|
||||
# set up
|
||||
datasets = load_dataset("nielsr/funsd", trust_remote_code=True)
|
||||
datasets = load_dataset("nielsr/funsd")
|
||||
processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased", revision="no_ocr")
|
||||
|
||||
def preprocess_data(examples):
|
||||
images = [Image.open(path).convert("RGB") for path in examples["image_path"]]
|
||||
images = [image.convert("RGB") for image in examples["image"]]
|
||||
words = examples["words"]
|
||||
boxes = examples["bboxes"]
|
||||
word_labels = examples["ner_tags"]
|
||||
@@ -192,12 +190,8 @@ class LayoutLMv2ProcessorIntegrationTests(unittest.TestCase):
|
||||
# we verify our implementation on 2 document images from the DocVQA dataset
|
||||
from datasets import load_dataset
|
||||
|
||||
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test", trust_remote_code=True)
|
||||
|
||||
image_1 = Image.open(ds[0]["file"]).convert("RGB")
|
||||
image_2 = Image.open(ds[1]["file"]).convert("RGB")
|
||||
|
||||
return image_1, image_2
|
||||
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test")
|
||||
return ds[0]["image"].convert("RGB"), ds[1]["image"].convert("RGB")
|
||||
|
||||
@cached_property
|
||||
def get_tokenizers(self):
|
||||
|
||||
Reference in New Issue
Block a user