From db9a7e9d3dbd1b595f004597a0502cce0a96135a Mon Sep 17 00:00:00 2001 From: Yih-Dar <2521628+ydshieh@users.noreply.github.com> Date: Fri, 19 Jan 2024 09:59:14 +0000 Subject: [PATCH] Don't save `processor_config.json` if a processor has no extra attribute (#28584) * not save if empty * fix * fix * fix * fix * fix --------- Co-authored-by: ydshieh --- src/transformers/processing_utils.py | 9 +++++-- tests/models/auto/test_processor_auto.py | 32 +++++++++++++++--------- tests/test_processing_common.py | 9 ++++--- 3 files changed, 32 insertions(+), 18 deletions(-) diff --git a/src/transformers/processing_utils.py b/src/transformers/processing_utils.py index 01c824f92c..f727f308ac 100644 --- a/src/transformers/processing_utils.py +++ b/src/transformers/processing_utils.py @@ -234,8 +234,11 @@ class ProcessorMixin(PushToHubMixin): # If we save using the predefined names, we can load using `from_pretrained` output_processor_file = os.path.join(save_directory, PROCESSOR_NAME) - self.to_json_file(output_processor_file) - logger.info(f"processor saved in {output_processor_file}") + # For now, let's not save to `processor_config.json` if the processor doesn't have extra attributes and + # `auto_map` is not specified. + if set(self.to_dict().keys()) != {"processor_class"}: + self.to_json_file(output_processor_file) + logger.info(f"processor saved in {output_processor_file}") if push_to_hub: self._upload_modified_files( @@ -246,6 +249,8 @@ class ProcessorMixin(PushToHubMixin): token=kwargs.get("token"), ) + if set(self.to_dict().keys()) == {"processor_class"}: + return [] return [output_processor_file] @classmethod diff --git a/tests/models/auto/test_processor_auto.py b/tests/models/auto/test_processor_auto.py index c22013234f..6cab1cbe81 100644 --- a/tests/models/auto/test_processor_auto.py +++ b/tests/models/auto/test_processor_auto.py @@ -101,6 +101,12 @@ class AutoFeatureExtractorTest(unittest.TestCase): # save in new folder processor.save_pretrained(tmpdirname) + if not os.path.isfile(os.path.join(tmpdirname, PROCESSOR_NAME)): + # create one manually in order to perform this test's objective + config_dict = {"processor_class": "Wav2Vec2Processor"} + with open(os.path.join(tmpdirname, PROCESSOR_NAME), "w") as fp: + json.dump(config_dict, fp) + # drop `processor_class` in tokenizer config with open(os.path.join(tmpdirname, TOKENIZER_CONFIG_FILE), "r") as f: config_dict = json.load(f) @@ -123,13 +129,14 @@ class AutoFeatureExtractorTest(unittest.TestCase): # save in new folder processor.save_pretrained(tmpdirname) - # drop `processor_class` in processor - with open(os.path.join(tmpdirname, PROCESSOR_NAME), "r") as f: - config_dict = json.load(f) - config_dict.pop("processor_class") + if os.path.isfile(os.path.join(tmpdirname, PROCESSOR_NAME)): + # drop `processor_class` in processor + with open(os.path.join(tmpdirname, PROCESSOR_NAME), "r") as f: + config_dict = json.load(f) + config_dict.pop("processor_class") - with open(os.path.join(tmpdirname, PROCESSOR_NAME), "w") as f: - f.write(json.dumps(config_dict)) + with open(os.path.join(tmpdirname, PROCESSOR_NAME), "w") as f: + f.write(json.dumps(config_dict)) # drop `processor_class` in tokenizer with open(os.path.join(tmpdirname, TOKENIZER_CONFIG_FILE), "r") as f: @@ -153,13 +160,14 @@ class AutoFeatureExtractorTest(unittest.TestCase): # save in new folder processor.save_pretrained(tmpdirname) - # drop `processor_class` in processor - with open(os.path.join(tmpdirname, PROCESSOR_NAME), "r") as f: - config_dict = json.load(f) - config_dict.pop("processor_class") + if os.path.isfile(os.path.join(tmpdirname, PROCESSOR_NAME)): + # drop `processor_class` in processor + with open(os.path.join(tmpdirname, PROCESSOR_NAME), "r") as f: + config_dict = json.load(f) + config_dict.pop("processor_class") - with open(os.path.join(tmpdirname, PROCESSOR_NAME), "w") as f: - f.write(json.dumps(config_dict)) + with open(os.path.join(tmpdirname, PROCESSOR_NAME), "w") as f: + f.write(json.dumps(config_dict)) # drop `processor_class` in feature extractor with open(os.path.join(tmpdirname, FEATURE_EXTRACTOR_NAME), "r") as f: diff --git a/tests/test_processing_common.py b/tests/test_processing_common.py index 1ab215e34c..402e6a7351 100644 --- a/tests/test_processing_common.py +++ b/tests/test_processing_common.py @@ -75,11 +75,12 @@ class ProcessorTesterMixin: processor_first = self.get_processor() with tempfile.TemporaryDirectory() as tmpdirname: - saved_file = processor_first.save_pretrained(tmpdirname)[0] - check_json_file_has_correct_format(saved_file) - processor_second = self.processor_class.from_pretrained(tmpdirname) + saved_files = processor_first.save_pretrained(tmpdirname) + if len(saved_files) > 0: + check_json_file_has_correct_format(saved_files[0]) + processor_second = self.processor_class.from_pretrained(tmpdirname) - self.assertEqual(processor_second.to_dict(), processor_first.to_dict()) + self.assertEqual(processor_second.to_dict(), processor_first.to_dict()) class MyProcessor(ProcessorMixin):