save_pretrained: mkdir(exist_ok=True) (#5258)
* all save_pretrained methods mkdir if not os.path.exists
This commit is contained in:
@@ -226,8 +226,6 @@ def train(args, train_dataset, model, tokenizer):
|
|||||||
if args.local_rank in [-1, 0] and args.save_steps > 0 and global_step % args.save_steps == 0:
|
if args.local_rank in [-1, 0] and args.save_steps > 0 and global_step % args.save_steps == 0:
|
||||||
# Save model checkpoint
|
# Save model checkpoint
|
||||||
output_dir = os.path.join(args.output_dir, "checkpoint-{}".format(global_step))
|
output_dir = os.path.join(args.output_dir, "checkpoint-{}".format(global_step))
|
||||||
if not os.path.exists(output_dir):
|
|
||||||
os.makedirs(output_dir)
|
|
||||||
model_to_save = (
|
model_to_save = (
|
||||||
model.module if hasattr(model, "module") else model
|
model.module if hasattr(model, "module") else model
|
||||||
) # Take care of distributed/parallel training
|
) # Take care of distributed/parallel training
|
||||||
@@ -649,10 +647,6 @@ def main():
|
|||||||
|
|
||||||
# Saving best-practices: if you use defaults names for the model, you can reload it using from_pretrained()
|
# Saving best-practices: if you use defaults names for the model, you can reload it using from_pretrained()
|
||||||
if args.do_train and (args.local_rank == -1 or torch.distributed.get_rank() == 0):
|
if args.do_train and (args.local_rank == -1 or torch.distributed.get_rank() == 0):
|
||||||
# Create output directory if needed
|
|
||||||
if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]:
|
|
||||||
os.makedirs(args.output_dir)
|
|
||||||
|
|
||||||
logger.info("Saving model checkpoint to %s", args.output_dir)
|
logger.info("Saving model checkpoint to %s", args.output_dir)
|
||||||
# Save a trained model, configuration and tokenizer using `save_pretrained()`.
|
# Save a trained model, configuration and tokenizer using `save_pretrained()`.
|
||||||
# They can then be reloaded using `from_pretrained()`
|
# They can then be reloaded using `from_pretrained()`
|
||||||
|
|||||||
@@ -521,10 +521,6 @@ def main():
|
|||||||
|
|
||||||
# Saving best-practices: if you use defaults names for the model, you can reload it using from_pretrained()
|
# Saving best-practices: if you use defaults names for the model, you can reload it using from_pretrained()
|
||||||
if args.do_train and (args.local_rank == -1 or torch.distributed.get_rank() == 0):
|
if args.do_train and (args.local_rank == -1 or torch.distributed.get_rank() == 0):
|
||||||
# Create output directory if needed
|
|
||||||
if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]:
|
|
||||||
os.makedirs(args.output_dir)
|
|
||||||
|
|
||||||
logger.info("Saving model checkpoint to %s", args.output_dir)
|
logger.info("Saving model checkpoint to %s", args.output_dir)
|
||||||
# Save a trained model, configuration and tokenizer using `save_pretrained()`.
|
# Save a trained model, configuration and tokenizer using `save_pretrained()`.
|
||||||
# They can then be reloaded using `from_pretrained()`
|
# They can then be reloaded using `from_pretrained()`
|
||||||
|
|||||||
@@ -383,8 +383,6 @@ def train(args, train_dataset, model, tokenizer):
|
|||||||
if args.local_rank in [-1, 0] and args.save_steps > 0 and global_step % args.save_steps == 0:
|
if args.local_rank in [-1, 0] and args.save_steps > 0 and global_step % args.save_steps == 0:
|
||||||
# Save model checkpoint
|
# Save model checkpoint
|
||||||
output_dir = os.path.join(args.output_dir, "checkpoint-{}".format(global_step))
|
output_dir = os.path.join(args.output_dir, "checkpoint-{}".format(global_step))
|
||||||
if not os.path.exists(output_dir):
|
|
||||||
os.makedirs(output_dir)
|
|
||||||
model_to_save = (
|
model_to_save = (
|
||||||
model.module if hasattr(model, "module") else model
|
model.module if hasattr(model, "module") else model
|
||||||
) # Take care of distributed/parallel training
|
) # Take care of distributed/parallel training
|
||||||
@@ -651,10 +649,6 @@ def main():
|
|||||||
|
|
||||||
# Save the trained model and the tokenizer
|
# Save the trained model and the tokenizer
|
||||||
if args.local_rank == -1 or torch.distributed.get_rank() == 0:
|
if args.local_rank == -1 or torch.distributed.get_rank() == 0:
|
||||||
# Create output directory if needed
|
|
||||||
if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]:
|
|
||||||
os.makedirs(args.output_dir)
|
|
||||||
|
|
||||||
logger.info("Saving model checkpoint to %s", args.output_dir)
|
logger.info("Saving model checkpoint to %s", args.output_dir)
|
||||||
# Save a trained model, configuration and tokenizer using `save_pretrained()`.
|
# Save a trained model, configuration and tokenizer using `save_pretrained()`.
|
||||||
# They can then be reloaded using `from_pretrained()`
|
# They can then be reloaded using `from_pretrained()`
|
||||||
|
|||||||
@@ -809,10 +809,6 @@ def main():
|
|||||||
|
|
||||||
# Save the trained model and the tokenizer
|
# Save the trained model and the tokenizer
|
||||||
if args.do_train and (args.local_rank == -1 or torch.distributed.get_rank() == 0):
|
if args.do_train and (args.local_rank == -1 or torch.distributed.get_rank() == 0):
|
||||||
# Create output directory if needed
|
|
||||||
if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]:
|
|
||||||
os.makedirs(args.output_dir)
|
|
||||||
|
|
||||||
logger.info("Saving model checkpoint to %s", args.output_dir)
|
logger.info("Saving model checkpoint to %s", args.output_dir)
|
||||||
# Save a trained model, configuration and tokenizer using `save_pretrained()`.
|
# Save a trained model, configuration and tokenizer using `save_pretrained()`.
|
||||||
# They can then be reloaded using `from_pretrained()`
|
# They can then be reloaded using `from_pretrained()`
|
||||||
|
|||||||
@@ -875,10 +875,6 @@ def main():
|
|||||||
|
|
||||||
# Saving best-practices: if you use defaults names for the model, you can reload it using from_pretrained()
|
# Saving best-practices: if you use defaults names for the model, you can reload it using from_pretrained()
|
||||||
if args.do_train and (args.local_rank == -1 or torch.distributed.get_rank() == 0):
|
if args.do_train and (args.local_rank == -1 or torch.distributed.get_rank() == 0):
|
||||||
# Create output directory if needed
|
|
||||||
if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]:
|
|
||||||
os.makedirs(args.output_dir)
|
|
||||||
|
|
||||||
logger.info("Saving model checkpoint to %s", args.output_dir)
|
logger.info("Saving model checkpoint to %s", args.output_dir)
|
||||||
# Save a trained model, configuration and tokenizer using `save_pretrained()`.
|
# Save a trained model, configuration and tokenizer using `save_pretrained()`.
|
||||||
# They can then be reloaded using `from_pretrained()`
|
# They can then be reloaded using `from_pretrained()`
|
||||||
|
|||||||
@@ -1059,10 +1059,6 @@ def main():
|
|||||||
|
|
||||||
# Save the trained model and the tokenizer
|
# Save the trained model and the tokenizer
|
||||||
if args.do_train and (args.local_rank == -1 or torch.distributed.get_rank() == 0):
|
if args.do_train and (args.local_rank == -1 or torch.distributed.get_rank() == 0):
|
||||||
# Create output directory if needed
|
|
||||||
if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]:
|
|
||||||
os.makedirs(args.output_dir)
|
|
||||||
|
|
||||||
logger.info("Saving model checkpoint to %s", args.output_dir)
|
logger.info("Saving model checkpoint to %s", args.output_dir)
|
||||||
# Save a trained model, configuration and tokenizer using `save_pretrained()`.
|
# Save a trained model, configuration and tokenizer using `save_pretrained()`.
|
||||||
# They can then be reloaded using `from_pretrained()`
|
# They can then be reloaded using `from_pretrained()`
|
||||||
|
|||||||
@@ -240,8 +240,6 @@ def train(args, train_dataset, model, tokenizer):
|
|||||||
# Save model checkpoint
|
# Save model checkpoint
|
||||||
if args.local_rank in [-1, 0] and args.save_steps > 0 and global_step % args.save_steps == 0:
|
if args.local_rank in [-1, 0] and args.save_steps > 0 and global_step % args.save_steps == 0:
|
||||||
output_dir = os.path.join(args.output_dir, "checkpoint-{}".format(global_step))
|
output_dir = os.path.join(args.output_dir, "checkpoint-{}".format(global_step))
|
||||||
if not os.path.exists(output_dir):
|
|
||||||
os.makedirs(output_dir)
|
|
||||||
# Take care of distributed/parallel training
|
# Take care of distributed/parallel training
|
||||||
model_to_save = model.module if hasattr(model, "module") else model
|
model_to_save = model.module if hasattr(model, "module") else model
|
||||||
model_to_save.save_pretrained(output_dir)
|
model_to_save.save_pretrained(output_dir)
|
||||||
@@ -768,10 +766,6 @@ def main():
|
|||||||
|
|
||||||
# Save the trained model and the tokenizer
|
# Save the trained model and the tokenizer
|
||||||
if args.do_train and (args.local_rank == -1 or torch.distributed.get_rank() == 0):
|
if args.do_train and (args.local_rank == -1 or torch.distributed.get_rank() == 0):
|
||||||
# Create output directory if needed
|
|
||||||
if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]:
|
|
||||||
os.makedirs(args.output_dir)
|
|
||||||
|
|
||||||
logger.info("Saving model checkpoint to %s", args.output_dir)
|
logger.info("Saving model checkpoint to %s", args.output_dir)
|
||||||
# Save a trained model, configuration and tokenizer using `save_pretrained()`.
|
# Save a trained model, configuration and tokenizer using `save_pretrained()`.
|
||||||
# They can then be reloaded using `from_pretrained()`
|
# They can then be reloaded using `from_pretrained()`
|
||||||
|
|||||||
@@ -92,8 +92,6 @@ class BartSummarizationDistiller(SummarizationModule):
|
|||||||
student = BartForConditionalGeneration(student_cfg)
|
student = BartForConditionalGeneration(student_cfg)
|
||||||
student, _ = init_student(student, teacher)
|
student, _ = init_student(student, teacher)
|
||||||
save_dir = self.output_dir.joinpath("student")
|
save_dir = self.output_dir.joinpath("student")
|
||||||
save_dir.mkdir(exist_ok=True)
|
|
||||||
|
|
||||||
self.copy_to_student(d_layers_to_copy, e_layers_to_copy, hparams, student, teacher)
|
self.copy_to_student(d_layers_to_copy, e_layers_to_copy, hparams, student, teacher)
|
||||||
student.save_pretrained(save_dir)
|
student.save_pretrained(save_dir)
|
||||||
hparams.model_name_or_path = str(save_dir)
|
hparams.model_name_or_path = str(save_dir)
|
||||||
|
|||||||
@@ -573,10 +573,6 @@ def main():
|
|||||||
|
|
||||||
# Saving best-practices: if you use defaults names for the model, you can reload it using from_pretrained()
|
# Saving best-practices: if you use defaults names for the model, you can reload it using from_pretrained()
|
||||||
if args.do_train and (args.local_rank == -1 or torch.distributed.get_rank() == 0):
|
if args.do_train and (args.local_rank == -1 or torch.distributed.get_rank() == 0):
|
||||||
# Create output directory if needed
|
|
||||||
if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]:
|
|
||||||
os.makedirs(args.output_dir)
|
|
||||||
|
|
||||||
logger.info("Saving model checkpoint to %s", args.output_dir)
|
logger.info("Saving model checkpoint to %s", args.output_dir)
|
||||||
# Save a trained model, configuration and tokenizer using `save_pretrained()`.
|
# Save a trained model, configuration and tokenizer using `save_pretrained()`.
|
||||||
# They can then be reloaded using `from_pretrained()`
|
# They can then be reloaded using `from_pretrained()`
|
||||||
|
|||||||
@@ -132,10 +132,9 @@ class PretrainedConfig(object):
|
|||||||
save_directory (:obj:`string`):
|
save_directory (:obj:`string`):
|
||||||
Directory where the configuration JSON file will be saved.
|
Directory where the configuration JSON file will be saved.
|
||||||
"""
|
"""
|
||||||
assert os.path.isdir(
|
if os.path.isfile(save_directory):
|
||||||
save_directory
|
raise AssertionError("Provided path ({}) should be a directory, not a file".format(save_directory))
|
||||||
), "Saving path should be a directory where the model and configuration can be saved"
|
os.makedirs(save_directory, exist_ok=True)
|
||||||
|
|
||||||
# If we save using the predefined names, we can load using `from_pretrained`
|
# If we save using the predefined names, we can load using `from_pretrained`
|
||||||
output_config_file = os.path.join(save_directory, CONFIG_NAME)
|
output_config_file = os.path.join(save_directory, CONFIG_NAME)
|
||||||
|
|
||||||
|
|||||||
@@ -240,7 +240,6 @@ def convert_all_pt_checkpoints_to_tf(
|
|||||||
remove_cached_files=False,
|
remove_cached_files=False,
|
||||||
only_convert_finetuned_models=False,
|
only_convert_finetuned_models=False,
|
||||||
):
|
):
|
||||||
assert os.path.isdir(args.tf_dump_path), "--tf_dump_path should be a directory"
|
|
||||||
|
|
||||||
if args_model_type is None:
|
if args_model_type is None:
|
||||||
model_types = list(MODEL_CLASSES.keys())
|
model_types = list(MODEL_CLASSES.keys())
|
||||||
|
|||||||
@@ -315,9 +315,10 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin):
|
|||||||
""" Save a model and its configuration file to a directory, so that it
|
""" Save a model and its configuration file to a directory, so that it
|
||||||
can be re-loaded using the :func:`~transformers.PreTrainedModel.from_pretrained` class method.
|
can be re-loaded using the :func:`~transformers.PreTrainedModel.from_pretrained` class method.
|
||||||
"""
|
"""
|
||||||
assert os.path.isdir(
|
if os.path.isfile(save_directory):
|
||||||
save_directory
|
logger.error("Provided path ({}) should be a directory, not a file".format(save_directory))
|
||||||
), "Saving path should be a directory where the model and configuration can be saved"
|
return
|
||||||
|
os.makedirs(save_directory, exist_ok=True)
|
||||||
|
|
||||||
# Save configuration file
|
# Save configuration file
|
||||||
self.config.save_pretrained(save_directory)
|
self.config.save_pretrained(save_directory)
|
||||||
|
|||||||
@@ -477,9 +477,10 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin):
|
|||||||
Arguments:
|
Arguments:
|
||||||
save_directory: directory to which to save.
|
save_directory: directory to which to save.
|
||||||
"""
|
"""
|
||||||
assert os.path.isdir(
|
if os.path.isfile(save_directory):
|
||||||
save_directory
|
logger.error("Provided path ({}) should be a directory, not a file".format(save_directory))
|
||||||
), "Saving path should be a directory where the model and configuration can be saved"
|
return
|
||||||
|
os.makedirs(save_directory, exist_ok=True)
|
||||||
|
|
||||||
# Only save the model itself if we are using distributed training
|
# Only save the model itself if we are using distributed training
|
||||||
model_to_save = self.module if hasattr(self, "module") else self
|
model_to_save = self.module if hasattr(self, "module") else self
|
||||||
|
|||||||
@@ -405,9 +405,10 @@ class Pipeline(_ScikitCompat):
|
|||||||
"""
|
"""
|
||||||
Save the pipeline's model and tokenizer to the specified save_directory
|
Save the pipeline's model and tokenizer to the specified save_directory
|
||||||
"""
|
"""
|
||||||
if not os.path.isdir(save_directory):
|
if os.path.isfile(save_directory):
|
||||||
logger.error("Provided path ({}) should be a directory".format(save_directory))
|
logger.error("Provided path ({}) should be a directory, not a file".format(save_directory))
|
||||||
return
|
return
|
||||||
|
os.makedirs(save_directory, exist_ok=True)
|
||||||
|
|
||||||
self.model.save_pretrained(save_directory)
|
self.model.save_pretrained(save_directory)
|
||||||
self.tokenizer.save_pretrained(save_directory)
|
self.tokenizer.save_pretrained(save_directory)
|
||||||
|
|||||||
@@ -1343,9 +1343,10 @@ class PreTrainedTokenizerBase(SpecialTokensMixin):
|
|||||||
This method make sure the full tokenizer can then be re-loaded using the
|
This method make sure the full tokenizer can then be re-loaded using the
|
||||||
:func:`~transformers.PreTrainedTokenizer.from_pretrained` class method.
|
:func:`~transformers.PreTrainedTokenizer.from_pretrained` class method.
|
||||||
"""
|
"""
|
||||||
if not os.path.isdir(save_directory):
|
if os.path.isfile(save_directory):
|
||||||
logger.error("Saving directory ({}) should be a directory".format(save_directory))
|
logger.error("Provided path ({}) should be a directory, not a file".format(save_directory))
|
||||||
return
|
return
|
||||||
|
os.makedirs(save_directory, exist_ok=True)
|
||||||
|
|
||||||
special_tokens_map_file = os.path.join(save_directory, SPECIAL_TOKENS_MAP_FILE)
|
special_tokens_map_file = os.path.join(save_directory, SPECIAL_TOKENS_MAP_FILE)
|
||||||
added_tokens_file = os.path.join(save_directory, ADDED_TOKENS_FILE)
|
added_tokens_file = os.path.join(save_directory, ADDED_TOKENS_FILE)
|
||||||
|
|||||||
@@ -653,10 +653,6 @@ def main():
|
|||||||
|
|
||||||
# Save the trained model and the tokenizer
|
# Save the trained model and the tokenizer
|
||||||
if args.do_train and (args.local_rank == -1 or torch.distributed.get_rank() == 0):
|
if args.do_train and (args.local_rank == -1 or torch.distributed.get_rank() == 0):
|
||||||
# Create output directory if needed
|
|
||||||
if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]:
|
|
||||||
os.makedirs(args.output_dir)
|
|
||||||
|
|
||||||
logger.info("Saving model checkpoint to %s", args.output_dir)
|
logger.info("Saving model checkpoint to %s", args.output_dir)
|
||||||
# Save a trained model, configuration and tokenizer using `save_pretrained()`.
|
# Save a trained model, configuration and tokenizer using `save_pretrained()`.
|
||||||
# They can then be reloaded using `from_pretrained()`
|
# They can then be reloaded using `from_pretrained()`
|
||||||
|
|||||||
Reference in New Issue
Block a user