[CI] lazy loading external datasets (#37218)

This commit is contained in:
Joao Gante
2025-04-03 09:57:45 +01:00
committed by GitHub
parent a0803a9555
commit 2099287a59
6 changed files with 63 additions and 37 deletions

View File

@@ -26,7 +26,6 @@ from transformers import (
)
from transformers.pipelines import ImageClassificationPipeline, pipeline
from transformers.testing_utils import (
_run_pipeline_tests,
compare_pipeline_output_to_hub_spec,
is_pipeline_test,
nested_simplify,
@@ -59,13 +58,17 @@ else:
class ImageClassificationPipelineTests(unittest.TestCase):
model_mapping = MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING
tf_model_mapping = TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING
_dataset = None
if _run_pipeline_tests:
# we use revision="refs/pr/1" until the PR is merged
# https://hf.co/datasets/hf-internal-testing/fixtures_image_utils/discussions/1
_dataset = datasets.load_dataset(
"hf-internal-testing/fixtures_image_utils", split="test", revision="refs/pr/1"
)
@classmethod
def _load_dataset(cls):
# Lazy loading of the dataset. Because it is a class method, it will only be loaded once per pytest process.
if cls._dataset is None:
# we use revision="refs/pr/1" until the PR is merged
# https://hf.co/datasets/hf-internal-testing/fixtures_image_utils/discussions/1
cls._dataset = datasets.load_dataset(
"hf-internal-testing/fixtures_image_utils", split="test", revision="refs/pr/1"
)
def get_test_pipeline(
self,
@@ -92,6 +95,7 @@ class ImageClassificationPipelineTests(unittest.TestCase):
return image_classifier, examples
def run_pipeline_test(self, image_classifier, examples):
self._load_dataset()
outputs = image_classifier("./tests/fixtures/tests_samples/COCO/000000039769.png")
self.assertEqual(