Don't save processor_config.json if a processor has no extra attribute (#28584)
* not save if empty * fix * fix * fix * fix * fix --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
@@ -234,8 +234,11 @@ class ProcessorMixin(PushToHubMixin):
|
|||||||
# If we save using the predefined names, we can load using `from_pretrained`
|
# If we save using the predefined names, we can load using `from_pretrained`
|
||||||
output_processor_file = os.path.join(save_directory, PROCESSOR_NAME)
|
output_processor_file = os.path.join(save_directory, PROCESSOR_NAME)
|
||||||
|
|
||||||
self.to_json_file(output_processor_file)
|
# For now, let's not save to `processor_config.json` if the processor doesn't have extra attributes and
|
||||||
logger.info(f"processor saved in {output_processor_file}")
|
# `auto_map` is not specified.
|
||||||
|
if set(self.to_dict().keys()) != {"processor_class"}:
|
||||||
|
self.to_json_file(output_processor_file)
|
||||||
|
logger.info(f"processor saved in {output_processor_file}")
|
||||||
|
|
||||||
if push_to_hub:
|
if push_to_hub:
|
||||||
self._upload_modified_files(
|
self._upload_modified_files(
|
||||||
@@ -246,6 +249,8 @@ class ProcessorMixin(PushToHubMixin):
|
|||||||
token=kwargs.get("token"),
|
token=kwargs.get("token"),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if set(self.to_dict().keys()) == {"processor_class"}:
|
||||||
|
return []
|
||||||
return [output_processor_file]
|
return [output_processor_file]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|||||||
@@ -101,6 +101,12 @@ class AutoFeatureExtractorTest(unittest.TestCase):
|
|||||||
# save in new folder
|
# save in new folder
|
||||||
processor.save_pretrained(tmpdirname)
|
processor.save_pretrained(tmpdirname)
|
||||||
|
|
||||||
|
if not os.path.isfile(os.path.join(tmpdirname, PROCESSOR_NAME)):
|
||||||
|
# create one manually in order to perform this test's objective
|
||||||
|
config_dict = {"processor_class": "Wav2Vec2Processor"}
|
||||||
|
with open(os.path.join(tmpdirname, PROCESSOR_NAME), "w") as fp:
|
||||||
|
json.dump(config_dict, fp)
|
||||||
|
|
||||||
# drop `processor_class` in tokenizer config
|
# drop `processor_class` in tokenizer config
|
||||||
with open(os.path.join(tmpdirname, TOKENIZER_CONFIG_FILE), "r") as f:
|
with open(os.path.join(tmpdirname, TOKENIZER_CONFIG_FILE), "r") as f:
|
||||||
config_dict = json.load(f)
|
config_dict = json.load(f)
|
||||||
@@ -123,13 +129,14 @@ class AutoFeatureExtractorTest(unittest.TestCase):
|
|||||||
# save in new folder
|
# save in new folder
|
||||||
processor.save_pretrained(tmpdirname)
|
processor.save_pretrained(tmpdirname)
|
||||||
|
|
||||||
# drop `processor_class` in processor
|
if os.path.isfile(os.path.join(tmpdirname, PROCESSOR_NAME)):
|
||||||
with open(os.path.join(tmpdirname, PROCESSOR_NAME), "r") as f:
|
# drop `processor_class` in processor
|
||||||
config_dict = json.load(f)
|
with open(os.path.join(tmpdirname, PROCESSOR_NAME), "r") as f:
|
||||||
config_dict.pop("processor_class")
|
config_dict = json.load(f)
|
||||||
|
config_dict.pop("processor_class")
|
||||||
|
|
||||||
with open(os.path.join(tmpdirname, PROCESSOR_NAME), "w") as f:
|
with open(os.path.join(tmpdirname, PROCESSOR_NAME), "w") as f:
|
||||||
f.write(json.dumps(config_dict))
|
f.write(json.dumps(config_dict))
|
||||||
|
|
||||||
# drop `processor_class` in tokenizer
|
# drop `processor_class` in tokenizer
|
||||||
with open(os.path.join(tmpdirname, TOKENIZER_CONFIG_FILE), "r") as f:
|
with open(os.path.join(tmpdirname, TOKENIZER_CONFIG_FILE), "r") as f:
|
||||||
@@ -153,13 +160,14 @@ class AutoFeatureExtractorTest(unittest.TestCase):
|
|||||||
# save in new folder
|
# save in new folder
|
||||||
processor.save_pretrained(tmpdirname)
|
processor.save_pretrained(tmpdirname)
|
||||||
|
|
||||||
# drop `processor_class` in processor
|
if os.path.isfile(os.path.join(tmpdirname, PROCESSOR_NAME)):
|
||||||
with open(os.path.join(tmpdirname, PROCESSOR_NAME), "r") as f:
|
# drop `processor_class` in processor
|
||||||
config_dict = json.load(f)
|
with open(os.path.join(tmpdirname, PROCESSOR_NAME), "r") as f:
|
||||||
config_dict.pop("processor_class")
|
config_dict = json.load(f)
|
||||||
|
config_dict.pop("processor_class")
|
||||||
|
|
||||||
with open(os.path.join(tmpdirname, PROCESSOR_NAME), "w") as f:
|
with open(os.path.join(tmpdirname, PROCESSOR_NAME), "w") as f:
|
||||||
f.write(json.dumps(config_dict))
|
f.write(json.dumps(config_dict))
|
||||||
|
|
||||||
# drop `processor_class` in feature extractor
|
# drop `processor_class` in feature extractor
|
||||||
with open(os.path.join(tmpdirname, FEATURE_EXTRACTOR_NAME), "r") as f:
|
with open(os.path.join(tmpdirname, FEATURE_EXTRACTOR_NAME), "r") as f:
|
||||||
|
|||||||
@@ -75,11 +75,12 @@ class ProcessorTesterMixin:
|
|||||||
processor_first = self.get_processor()
|
processor_first = self.get_processor()
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as tmpdirname:
|
with tempfile.TemporaryDirectory() as tmpdirname:
|
||||||
saved_file = processor_first.save_pretrained(tmpdirname)[0]
|
saved_files = processor_first.save_pretrained(tmpdirname)
|
||||||
check_json_file_has_correct_format(saved_file)
|
if len(saved_files) > 0:
|
||||||
processor_second = self.processor_class.from_pretrained(tmpdirname)
|
check_json_file_has_correct_format(saved_files[0])
|
||||||
|
processor_second = self.processor_class.from_pretrained(tmpdirname)
|
||||||
|
|
||||||
self.assertEqual(processor_second.to_dict(), processor_first.to_dict())
|
self.assertEqual(processor_second.to_dict(), processor_first.to_dict())
|
||||||
|
|
||||||
|
|
||||||
class MyProcessor(ProcessorMixin):
|
class MyProcessor(ProcessorMixin):
|
||||||
|
|||||||
Reference in New Issue
Block a user