Chat template: save and load correctly for processors (#33462)

* fix * add tests * fix tests * Update tests/models/llava/test_processor_llava.py Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * fix * fix tests * update tests --------- Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>
2024-09-18 13:00:44 +02:00
parent 52e22cbf67
commit db72894b48
4 changed files with 106 additions and 10 deletions
--- a/tests/models/llava_onevision/test_processing_llava_onevision.py
+++ b/tests/models/llava_onevision/test_processing_llava_onevision.py
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import json
 import shutil
 import tempfile
 import unittest
@@ -40,9 +41,10 @@ class LlavaOnevisionProcessorTest(ProcessorTesterMixin, unittest.TestCase):
        image_processor = LlavaOnevisionImageProcessor()
        video_processor = LlavaOnevisionVideoProcessor()
        tokenizer = Qwen2TokenizerFast.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
+        processor_kwargs = self.prepare_processor_dict()

        processor = LlavaOnevisionProcessor(
-            video_processor=video_processor, image_processor=image_processor, tokenizer=tokenizer
+            video_processor=video_processor, image_processor=image_processor, tokenizer=tokenizer, **processor_kwargs
        )
        processor.save_pretrained(self.tmpdirname)

@@ -52,9 +54,32 @@ class LlavaOnevisionProcessorTest(ProcessorTesterMixin, unittest.TestCase):
    def get_image_processor(self, **kwargs):
        return AutoProcessor.from_pretrained(self.tmpdirname, **kwargs).image_processor

-    def get_Video_processor(self, **kwargs):
+    def get_video_processor(self, **kwargs):
        return AutoProcessor.from_pretrained(self.tmpdirname, **kwargs).video_processor

+    def prepare_processor_dict(self):
+        return {"chat_template": "dummy_template"}
+
+    @unittest.skip(
+        "Skip because the model has no processor kwargs except for chat template and"
+        "chat template is saved as a separate file. Stop skipping this test when the processor"
+        "has new kwargs saved in config file."
+    )
+    def test_processor_to_json_string(self):
+        pass
+
+    # Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_chat_template_is_saved
+    def test_chat_template_is_saved(self):
+        processor_loaded = self.processor_class.from_pretrained(self.tmpdirname)
+        processor_dict_loaded = json.loads(processor_loaded.to_json_string())
+        # chat templates aren't serialized to json in processors
+        self.assertFalse("chat_template" in processor_dict_loaded.keys())
+
+        # they have to be saved as separate file and loaded back from that file
+        # so we check if the same template is loaded
+        processor_dict = self.prepare_processor_dict()
+        self.assertTrue(processor_loaded.chat_template == processor_dict.get("chat_template", None))
+
    def tearDown(self):
        shutil.rmtree(self.tmpdirname)