🚨 🚨 Setup -> setupclass conversion (#37282)

* More limited setup -> setupclass conversion * make fixup * Trigger tests * Fixup UDOP * Missed a spot * tearDown -> tearDownClass where appropriate * Couple more class fixes * Fixups for UDOP and VisionTextDualEncoder * Ignore errors when removing the tmpdir, in case it already got cleaned up somewhere * CLIP fixes * More correct classmethods * Wav2Vec2Bert fixes * More methods become static * More class methods * More class methods * Revert changes for integration tests / modeling files * Use a different tempdir for tests that actually write to it * Remove addClassCleanup and just use teardownclass * Remove changes in modeling files * Cleanup get_processor_dict() for got_ocr2 * Fix regression on Wav2Vec2BERT test that was masked by this before * Rework tests that modify the tmpdir * make fix-copies * revert clvp modeling test changes * Fix CLIP processor test * make fix-copies
2025-04-08 17:15:37 +01:00
parent c15a7adb28
commit 4d0de5f73a
49 changed files with 740 additions and 574 deletions
--- a/tests/models/gemma3/test_processing_gemma3.py
+++ b/tests/models/gemma3/test_processing_gemma3.py
@@ -34,8 +34,9 @@ SAMPLE_VOCAB = get_tests_dir("fixtures/test_sentencepiece.model")
 class Gemma3ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
    processor_class = Gemma3Processor

-    def setUp(self):
-        self.tmpdirname = tempfile.mkdtemp()
+    @classmethod
+    def setUpClass(cls):
+        cls.tmpdirname = tempfile.mkdtemp()
        gemma3_image_processor_kwargs = {
            "do_pan_and_scan": True,
            "pan_and_scan_min_crop_size": 256,
@@ -52,15 +53,17 @@ class Gemma3ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
            "eoi_token": "<end_of_image>",
        }
        tokenizer = GemmaTokenizer(SAMPLE_VOCAB, keep_accents=True, extra_special_tokens=extra_special_tokens)
-        processor_kwargs = self.prepare_processor_dict()
+        processor_kwargs = cls.prepare_processor_dict()
        processor = Gemma3Processor(image_processor=image_processor, tokenizer=tokenizer, **processor_kwargs)
-        processor.save_pretrained(self.tmpdirname)
+        processor.save_pretrained(cls.tmpdirname)

-    def tearDown(self):
-        shutil.rmtree(self.tmpdirname)
+    @classmethod
+    def tearDownClass(cls):
+        shutil.rmtree(cls.tmpdirname, ignore_errors=True)

    # TODO: raushan or arthur: add the real chat template
-    def prepare_processor_dict(self):
+    @staticmethod
+    def prepare_processor_dict():
        return {
            "chat_template": "{{ bos_token }}\n{%- if messages[0]['role'] == 'system' -%}\n    {%- set first_user_prefix = messages[0]['content'][0]['text'] + '\n\n' -%}\n    {%- set loop_messages = messages[1:] -%}\n{%- else -%}\n    {%- set first_user_prefix = \"\" -%}\n    {%- set loop_messages = messages -%}\n{%- endif -%}\n{%- for message in loop_messages -%}\n    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}\n        {{ raise_exception(\"Conversation roles must alternate user/assistant/user/assistant/...\") }}\n    {%- endif -%}\n    {%- if (message['role'] == 'assistant') -%}\n        {%- set role = \"model\" -%}\n    {%- else -%}\n        {%- set role = message['role'] -%}\n    {%- endif -%}\n    {{ '<start_of_turn>' + role + '\n' + (first_user_prefix if loop.first else \"\") }}\n    {%- if message['content'] is string -%}\n        {{ message['content'] | trim }}\n    {%- elif message['content'] is iterable -%}\n        {%- for item in message['content'] -%}\n            {%- if item['type'] == 'image' -%}\n                {{ '<start_of_image>' }}\n            {%- elif item['type'] == 'text' -%}\n                {{ item['text'] | trim }}\n            {%- endif -%}\n        {%- endfor -%}\n    {%- else -%}\n        {{ raise_exception(\"Invalid content type\") }}\n    {%- endif -%}\n    {{ '<end_of_turn>\n' }}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n    {{'<start_of_turn>model\n'}}\n{%- endif -%}\n",            "image_seq_length": 3,
        }  # fmt: skip