diff --git a/examples/pytorch/README.md b/examples/pytorch/README.md index b5a770dd2e..1eaa61e0f0 100644 --- a/examples/pytorch/README.md +++ b/examples/pytorch/README.md @@ -74,6 +74,17 @@ line, 🤗 Trainer supports resuming from a checkpoint via `trainer.train(resume 2. If `resume_from_checkpoint` is a path to a specific checkpoint it will use that saved checkpoint folder to resume the training from. +### Upload the trained/fine-tuned model to the Hub + +All the example scripts support automatic upload of your final model to the [Model Hub](https://huggingface.co/models) by adding a `--push_to_hub` argument. It will then create a repository with your username slash the name of the folder you are using as `output_dir`. For instance, `"sgugger/test-mrpc"` if your username is `sgugger` and you are working in the folder `~/tmp/test-mrpc`. + +To specify a given repository name, use the `--hub_model_id` argument. You will need to specify the whole repository name (including your username), for instance `--hub_model_id sgugger/finetuned-bert-mrpc`. To upload to an organization you are a member of, just use the name of that organization instead of your username: `--hub_model_id huggingface/finetuned-bert-mrpc`. + +A few notes on this integration: + +- you will need to be logged in to the Hugging Face website locally for it to work, the easiest way to achieve this is to run `huggingface-cli login` and then type your username and password when prompted. You can also pass along your authentication token with the `--hub_token` argument. +- the `output_dir` you pick will either need to be a new folder or a local clone of the distant repository you are using. + ## Distributed training and mixed precision All the PyTorch scripts mentioned above work out of the box with distributed training and mixed precision, thanks to diff --git a/examples/pytorch/language-modeling/run_clm_no_trainer.py b/examples/pytorch/language-modeling/run_clm_no_trainer.py index fd69abe4a4..5ef7324d74 100755 --- a/examples/pytorch/language-modeling/run_clm_no_trainer.py +++ b/examples/pytorch/language-modeling/run_clm_no_trainer.py @@ -27,6 +27,7 @@ import logging import math import os import random +from pathlib import Path import datasets import torch @@ -36,6 +37,7 @@ from tqdm.auto import tqdm import transformers from accelerate import Accelerator, DistributedType +from huggingface_hub import Repository from transformers import ( CONFIG_MAPPING, MODEL_MAPPING, @@ -48,6 +50,7 @@ from transformers import ( get_scheduler, set_seed, ) +from transformers.file_utils import get_full_repo_name from transformers.utils.versions import require_version @@ -176,7 +179,11 @@ def parse_args(): parser.add_argument( "--no_keep_linebreaks", action="store_true", help="Do not keep line breaks when using TXT files." ) - + parser.add_argument("--push_to_hub", action="store_true", help="Whether or not to push the model to the Hub.") + parser.add_argument( + "--hub_model_id", type=str, help="The name of the repository to keep in sync with the local `output_dir`." + ) + parser.add_argument("--hub_token", type=str, help="The token to use to push to the Model Hub.") args = parser.parse_args() # Sanity checks @@ -190,8 +197,8 @@ def parse_args(): extension = args.validation_file.split(".")[-1] assert extension in ["csv", "json", "txt"], "`validation_file` should be a csv, json or txt file." - if args.output_dir is not None: - os.makedirs(args.output_dir, exist_ok=True) + if args.push_to_hub: + assert args.output_dir is not None, "Need an `output_dir` to create a repo when `--push_to_hub` is passed." return args @@ -223,6 +230,18 @@ def main(): if args.seed is not None: set_seed(args.seed) + # Handle the repository creation + if accelerator.is_main_process: + if args.push_to_hub: + if args.hub_model_id is None: + repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) + else: + repo_name = args.hub_model_id + repo = Repository(args.output_dir, clone_from=repo_name) + elif args.output_dir is not None: + os.makedirs(args.output_dir, exist_ok=True) + accelerator.wait_for_everyone() + # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below) # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/ # (the dataset will be downloaded automatically from the datasets Hub). @@ -480,10 +499,22 @@ def main(): logger.info(f"epoch {epoch}: perplexity: {perplexity}") + if args.push_to_hub and epoch < args.num_train_epochs - 1: + accelerator.wait_for_everyone() + unwrapped_model = accelerator.unwrap_model(model) + unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save) + if accelerator.is_main_process: + tokenizer.save_pretrained(args.output_dir) + repo.push_to_hub(commit_message=f"Training in progress epoch {epoch}", blocking=False) + if args.output_dir is not None: accelerator.wait_for_everyone() unwrapped_model = accelerator.unwrap_model(model) unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save) + if accelerator.is_main_process: + tokenizer.save_pretrained(args.output_dir) + if args.push_to_hub: + repo.push_to_hub(commit_message="End of training") if __name__ == "__main__": diff --git a/examples/pytorch/language-modeling/run_mlm_no_trainer.py b/examples/pytorch/language-modeling/run_mlm_no_trainer.py index 2c5346a851..14a140d666 100755 --- a/examples/pytorch/language-modeling/run_mlm_no_trainer.py +++ b/examples/pytorch/language-modeling/run_mlm_no_trainer.py @@ -27,6 +27,7 @@ import logging import math import os import random +from pathlib import Path import datasets import torch @@ -36,6 +37,7 @@ from tqdm.auto import tqdm import transformers from accelerate import Accelerator, DistributedType +from huggingface_hub import Repository from transformers import ( CONFIG_MAPPING, MODEL_MAPPING, @@ -48,6 +50,7 @@ from transformers import ( get_scheduler, set_seed, ) +from transformers.file_utils import get_full_repo_name from transformers.utils.versions import require_version @@ -185,7 +188,11 @@ def parse_args(): parser.add_argument( "--mlm_probability", type=float, default=0.15, help="Ratio of tokens to mask for masked language modeling loss" ) - + parser.add_argument("--push_to_hub", action="store_true", help="Whether or not to push the model to the Hub.") + parser.add_argument( + "--hub_model_id", type=str, help="The name of the repository to keep in sync with the local `output_dir`." + ) + parser.add_argument("--hub_token", type=str, help="The token to use to push to the Model Hub.") args = parser.parse_args() # Sanity checks @@ -199,8 +206,8 @@ def parse_args(): extension = args.validation_file.split(".")[-1] assert extension in ["csv", "json", "txt"], "`validation_file` should be a csv, json or txt file." - if args.output_dir is not None: - os.makedirs(args.output_dir, exist_ok=True) + if args.push_to_hub: + assert args.output_dir is not None, "Need an `output_dir` to create a repo when `--push_to_hub` is passed." return args @@ -232,6 +239,18 @@ def main(): if args.seed is not None: set_seed(args.seed) + # Handle the repository creation + if accelerator.is_main_process: + if args.push_to_hub: + if args.hub_model_id is None: + repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) + else: + repo_name = args.hub_model_id + repo = Repository(args.output_dir, clone_from=repo_name) + elif args.output_dir is not None: + os.makedirs(args.output_dir, exist_ok=True) + accelerator.wait_for_everyone() + # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below) # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/ # (the dataset will be downloaded automatically from the datasets Hub). @@ -518,10 +537,22 @@ def main(): logger.info(f"epoch {epoch}: perplexity: {perplexity}") + if args.push_to_hub and epoch < args.num_train_epochs - 1: + accelerator.wait_for_everyone() + unwrapped_model = accelerator.unwrap_model(model) + unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save) + if accelerator.is_main_process: + tokenizer.save_pretrained(args.output_dir) + repo.push_to_hub(commit_message=f"Training in progress epoch {epoch}", blocking=False) + if args.output_dir is not None: accelerator.wait_for_everyone() unwrapped_model = accelerator.unwrap_model(model) unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save) + if accelerator.is_main_process: + tokenizer.save_pretrained(args.output_dir) + if args.push_to_hub: + repo.push_to_hub(commit_message="End of training") if __name__ == "__main__": diff --git a/examples/pytorch/multiple-choice/run_swag_no_trainer.py b/examples/pytorch/multiple-choice/run_swag_no_trainer.py index c893f165b4..afb8ef25f5 100755 --- a/examples/pytorch/multiple-choice/run_swag_no_trainer.py +++ b/examples/pytorch/multiple-choice/run_swag_no_trainer.py @@ -24,6 +24,7 @@ import math import os import random from dataclasses import dataclass +from pathlib import Path from typing import Optional, Union import datasets @@ -34,6 +35,7 @@ from tqdm.auto import tqdm import transformers from accelerate import Accelerator +from huggingface_hub import Repository from transformers import ( CONFIG_MAPPING, MODEL_MAPPING, @@ -47,7 +49,7 @@ from transformers import ( get_scheduler, set_seed, ) -from transformers.file_utils import PaddingStrategy +from transformers.file_utils import PaddingStrategy, get_full_repo_name logger = logging.getLogger(__name__) @@ -169,9 +171,15 @@ def parse_args(): action="store_true", help="Activate debug mode and run training only with a subset of data.", ) + parser.add_argument("--push_to_hub", action="store_true", help="Whether or not to push the model to the Hub.") + parser.add_argument( + "--hub_model_id", type=str, help="The name of the repository to keep in sync with the local `output_dir`." + ) + parser.add_argument("--hub_token", type=str, help="The token to use to push to the Model Hub.") args = parser.parse_args() - if args.output_dir is not None: - os.makedirs(args.output_dir, exist_ok=True) + + if args.push_to_hub: + assert args.output_dir is not None, "Need an `output_dir` to create a repo when `--push_to_hub` is passed." return args @@ -260,6 +268,18 @@ def main(): if args.seed is not None: set_seed(args.seed) + # Handle the repository creation + if accelerator.is_main_process: + if args.push_to_hub: + if args.hub_model_id is None: + repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) + else: + repo_name = args.hub_model_id + repo = Repository(args.output_dir, clone_from=repo_name) + elif args.output_dir is not None: + os.makedirs(args.output_dir, exist_ok=True) + accelerator.wait_for_everyone() + # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below) # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/ # (the dataset will be downloaded automatically from the datasets Hub). @@ -478,10 +498,22 @@ def main(): eval_metric = metric.compute() accelerator.print(f"epoch {epoch}: {eval_metric}") + if args.push_to_hub and epoch < args.num_train_epochs - 1: + accelerator.wait_for_everyone() + unwrapped_model = accelerator.unwrap_model(model) + unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save) + if accelerator.is_main_process: + tokenizer.save_pretrained(args.output_dir) + repo.push_to_hub(commit_message=f"Training in progress epoch {epoch}", blocking=False) + if args.output_dir is not None: accelerator.wait_for_everyone() unwrapped_model = accelerator.unwrap_model(model) unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save) + if accelerator.is_main_process: + tokenizer.save_pretrained(args.output_dir) + if args.push_to_hub: + repo.push_to_hub(commit_message="End of training") if __name__ == "__main__": diff --git a/examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py b/examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py index 2d11de9ea2..d4da225cac 100644 --- a/examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py +++ b/examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py @@ -23,6 +23,7 @@ import logging import math import os import random +from pathlib import Path import datasets import numpy as np @@ -33,6 +34,7 @@ from tqdm.auto import tqdm import transformers from accelerate import Accelerator +from huggingface_hub import Repository from transformers import ( AdamW, DataCollatorWithPadding, @@ -45,6 +47,7 @@ from transformers import ( get_scheduler, set_seed, ) +from transformers.file_utils import get_full_repo_name from transformers.utils import check_min_version from transformers.utils.versions import require_version from utils_qa import postprocess_qa_predictions_with_beam_search @@ -203,7 +206,11 @@ def parse_args(): default=None, help="For debugging purposes or quicker training, truncate the number of prediction examples to this", ) - + parser.add_argument("--push_to_hub", action="store_true", help="Whether or not to push the model to the Hub.") + parser.add_argument( + "--hub_model_id", type=str, help="The name of the repository to keep in sync with the local `output_dir`." + ) + parser.add_argument("--hub_token", type=str, help="The token to use to push to the Model Hub.") args = parser.parse_args() # Sanity checks @@ -225,8 +232,8 @@ def parse_args(): extension = args.test_file.split(".")[-1] assert extension in ["csv", "json"], "`test_file` should be a csv or a json file." - if args.output_dir is not None: - os.makedirs(args.output_dir, exist_ok=True) + if args.push_to_hub: + assert args.output_dir is not None, "Need an `output_dir` to create a repo when `--push_to_hub` is passed." return args @@ -258,6 +265,18 @@ def main(): if args.seed is not None: set_seed(args.seed) + # Handle the repository creation + if accelerator.is_main_process: + if args.push_to_hub: + if args.hub_model_id is None: + repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) + else: + repo_name = args.hub_model_id + repo = Repository(args.output_dir, clone_from=repo_name) + elif args.output_dir is not None: + os.makedirs(args.output_dir, exist_ok=True) + accelerator.wait_for_everyone() + # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below) # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/ # (the dataset will be downloaded automatically from the datasets Hub). @@ -703,8 +722,15 @@ def main(): if completed_steps >= args.max_train_steps: break - # intialize all lists to collect the batches + if args.push_to_hub and epoch < args.num_train_epochs - 1: + accelerator.wait_for_everyone() + unwrapped_model = accelerator.unwrap_model(model) + unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save) + if accelerator.is_main_process: + tokenizer.save_pretrained(args.output_dir) + repo.push_to_hub(commit_message=f"Training in progress epoch {epoch}", blocking=False) + # intialize all lists to collect the batches all_start_top_log_probs = [] all_start_top_index = [] all_end_top_log_probs = [] @@ -821,6 +847,10 @@ def main(): accelerator.wait_for_everyone() unwrapped_model = accelerator.unwrap_model(model) unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save) + if accelerator.is_main_process: + tokenizer.save_pretrained(args.output_dir) + if args.push_to_hub: + repo.push_to_hub(commit_message="End of training") if __name__ == "__main__": diff --git a/examples/pytorch/question-answering/run_qa_no_trainer.py b/examples/pytorch/question-answering/run_qa_no_trainer.py index c885783fa8..d430b06f28 100755 --- a/examples/pytorch/question-answering/run_qa_no_trainer.py +++ b/examples/pytorch/question-answering/run_qa_no_trainer.py @@ -23,6 +23,7 @@ import logging import math import os import random +from pathlib import Path import datasets import numpy as np @@ -33,6 +34,7 @@ from tqdm.auto import tqdm import transformers from accelerate import Accelerator +from huggingface_hub import Repository from transformers import ( CONFIG_MAPPING, MODEL_MAPPING, @@ -47,6 +49,7 @@ from transformers import ( get_scheduler, set_seed, ) +from transformers.file_utils import get_full_repo_name from transformers.utils import check_min_version from transformers.utils.versions import require_version from utils_qa import postprocess_qa_predictions @@ -232,7 +235,11 @@ def parse_args(): help="Model type to use if training from scratch.", choices=MODEL_TYPES, ) - + parser.add_argument("--push_to_hub", action="store_true", help="Whether or not to push the model to the Hub.") + parser.add_argument( + "--hub_model_id", type=str, help="The name of the repository to keep in sync with the local `output_dir`." + ) + parser.add_argument("--hub_token", type=str, help="The token to use to push to the Model Hub.") args = parser.parse_args() # Sanity checks @@ -254,8 +261,8 @@ def parse_args(): extension = args.test_file.split(".")[-1] assert extension in ["csv", "json"], "`test_file` should be a csv or a json file." - if args.output_dir is not None: - os.makedirs(args.output_dir, exist_ok=True) + if args.push_to_hub: + assert args.output_dir is not None, "Need an `output_dir` to create a repo when `--push_to_hub` is passed." return args @@ -287,6 +294,18 @@ def main(): if args.seed is not None: set_seed(args.seed) + # Handle the repository creation + if accelerator.is_main_process: + if args.push_to_hub: + if args.hub_model_id is None: + repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) + else: + repo_name = args.hub_model_id + repo = Repository(args.output_dir, clone_from=repo_name) + elif args.output_dir is not None: + os.makedirs(args.output_dir, exist_ok=True) + accelerator.wait_for_everyone() + # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below) # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/ # (the dataset will be downloaded automatically from the datasets Hub). @@ -708,6 +727,14 @@ def main(): if completed_steps >= args.max_train_steps: break + if args.push_to_hub and epoch < args.num_train_epochs - 1: + accelerator.wait_for_everyone() + unwrapped_model = accelerator.unwrap_model(model) + unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save) + if accelerator.is_main_process: + tokenizer.save_pretrained(args.output_dir) + repo.push_to_hub(commit_message=f"Training in progress epoch {epoch}", blocking=False) + # Evaluation logger.info("***** Running Evaluation *****") logger.info(f" Num examples = {len(eval_dataset)}") @@ -782,6 +809,10 @@ def main(): accelerator.wait_for_everyone() unwrapped_model = accelerator.unwrap_model(model) unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save) + if accelerator.is_main_process: + tokenizer.save_pretrained(args.output_dir) + if args.push_to_hub: + repo.push_to_hub(commit_message="End of training") if __name__ == "__main__": diff --git a/examples/pytorch/summarization/run_summarization_no_trainer.py b/examples/pytorch/summarization/run_summarization_no_trainer.py index 9bf2e7e057..08a6292521 100644 --- a/examples/pytorch/summarization/run_summarization_no_trainer.py +++ b/examples/pytorch/summarization/run_summarization_no_trainer.py @@ -23,6 +23,7 @@ import logging import math import os import random +from pathlib import Path import datasets import nltk @@ -35,6 +36,7 @@ from tqdm.auto import tqdm import transformers from accelerate import Accelerator from filelock import FileLock +from huggingface_hub import Repository from transformers import ( CONFIG_MAPPING, MODEL_MAPPING, @@ -47,7 +49,7 @@ from transformers import ( get_scheduler, set_seed, ) -from transformers.file_utils import is_offline_mode +from transformers.file_utils import get_full_repo_name, is_offline_mode from transformers.utils.versions import require_version @@ -255,7 +257,11 @@ def parse_args(): help="Model type to use if training from scratch.", choices=MODEL_TYPES, ) - + parser.add_argument("--push_to_hub", action="store_true", help="Whether or not to push the model to the Hub.") + parser.add_argument( + "--hub_model_id", type=str, help="The name of the repository to keep in sync with the local `output_dir`." + ) + parser.add_argument("--hub_token", type=str, help="The token to use to push to the Model Hub.") args = parser.parse_args() # Sanity checks @@ -269,8 +275,8 @@ def parse_args(): extension = args.validation_file.split(".")[-1] assert extension in ["csv", "json"], "`validation_file` should be a csv or a json file." - if args.output_dir is not None: - os.makedirs(args.output_dir, exist_ok=True) + if args.push_to_hub: + assert args.output_dir is not None, "Need an `output_dir` to create a repo when `--push_to_hub` is passed." return args @@ -313,6 +319,18 @@ def main(): if args.seed is not None: set_seed(args.seed) + # Handle the repository creation + if accelerator.is_main_process: + if args.push_to_hub: + if args.hub_model_id is None: + repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) + else: + repo_name = args.hub_model_id + repo = Repository(args.output_dir, clone_from=repo_name) + elif args.output_dir is not None: + os.makedirs(args.output_dir, exist_ok=True) + accelerator.wait_for_everyone() + # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below) # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/ # (the dataset will be downloaded automatically from the datasets Hub). @@ -576,10 +594,22 @@ def main(): logger.info(result) + if args.push_to_hub and epoch < args.num_train_epochs - 1: + accelerator.wait_for_everyone() + unwrapped_model = accelerator.unwrap_model(model) + unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save) + if accelerator.is_main_process: + tokenizer.save_pretrained(args.output_dir) + repo.push_to_hub(commit_message=f"Training in progress epoch {epoch}", blocking=False) + if args.output_dir is not None: accelerator.wait_for_everyone() unwrapped_model = accelerator.unwrap_model(model) unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save) + if accelerator.is_main_process: + tokenizer.save_pretrained(args.output_dir) + if args.push_to_hub: + repo.push_to_hub(commit_message="End of training") if __name__ == "__main__": diff --git a/examples/pytorch/text-classification/run_glue_no_trainer.py b/examples/pytorch/text-classification/run_glue_no_trainer.py index 4af109a4bb..fe9943b266 100644 --- a/examples/pytorch/text-classification/run_glue_no_trainer.py +++ b/examples/pytorch/text-classification/run_glue_no_trainer.py @@ -18,6 +18,7 @@ import logging import math import os import random +from pathlib import Path import datasets from datasets import load_dataset, load_metric @@ -26,6 +27,7 @@ from tqdm.auto import tqdm import transformers from accelerate import Accelerator +from huggingface_hub import Repository from transformers import ( AdamW, AutoConfig, @@ -38,6 +40,7 @@ from transformers import ( get_scheduler, set_seed, ) +from transformers.file_utils import get_full_repo_name from transformers.utils.versions import require_version @@ -142,6 +145,11 @@ def parse_args(): ) parser.add_argument("--output_dir", type=str, default=None, help="Where to store the final model.") parser.add_argument("--seed", type=int, default=None, help="A seed for reproducible training.") + parser.add_argument("--push_to_hub", action="store_true", help="Whether or not to push the model to the Hub.") + parser.add_argument( + "--hub_model_id", type=str, help="The name of the repository to keep in sync with the local `output_dir`." + ) + parser.add_argument("--hub_token", type=str, help="The token to use to push to the Model Hub.") args = parser.parse_args() # Sanity checks @@ -155,8 +163,8 @@ def parse_args(): extension = args.validation_file.split(".")[-1] assert extension in ["csv", "json"], "`validation_file` should be a csv or a json file." - if args.output_dir is not None: - os.makedirs(args.output_dir, exist_ok=True) + if args.push_to_hub: + assert args.output_dir is not None, "Need an `output_dir` to create a repo when `--push_to_hub` is passed." return args @@ -188,6 +196,18 @@ def main(): if args.seed is not None: set_seed(args.seed) + # Handle the repository creation + if accelerator.is_main_process: + if args.push_to_hub: + if args.hub_model_id is None: + repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) + else: + repo_name = args.hub_model_id + repo = Repository(args.output_dir, clone_from=repo_name) + elif args.output_dir is not None: + os.makedirs(args.output_dir, exist_ok=True) + accelerator.wait_for_everyone() + # Get the datasets: you can either provide your own CSV/JSON training and evaluation files (see below) # or specify a GLUE benchmark task (the dataset will be downloaded automatically from the datasets Hub). @@ -426,10 +446,22 @@ def main(): eval_metric = metric.compute() logger.info(f"epoch {epoch}: {eval_metric}") + if args.push_to_hub and epoch < args.num_train_epochs - 1: + accelerator.wait_for_everyone() + unwrapped_model = accelerator.unwrap_model(model) + unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save) + if accelerator.is_main_process: + tokenizer.save_pretrained(args.output_dir) + repo.push_to_hub(commit_message=f"Training in progress epoch {epoch}", blocking=False) + if args.output_dir is not None: accelerator.wait_for_everyone() unwrapped_model = accelerator.unwrap_model(model) unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save) + if accelerator.is_main_process: + tokenizer.save_pretrained(args.output_dir) + if args.push_to_hub: + repo.push_to_hub(commit_message="End of training") if args.task_name == "mnli": # Final evaluation on mismatched validation set diff --git a/examples/pytorch/token-classification/run_ner_no_trainer.py b/examples/pytorch/token-classification/run_ner_no_trainer.py index 691d1ebbe0..b24f65ee98 100755 --- a/examples/pytorch/token-classification/run_ner_no_trainer.py +++ b/examples/pytorch/token-classification/run_ner_no_trainer.py @@ -23,6 +23,7 @@ import logging import math import os import random +from pathlib import Path import datasets import torch @@ -32,6 +33,7 @@ from tqdm.auto import tqdm import transformers from accelerate import Accelerator +from huggingface_hub import Repository from transformers import ( CONFIG_MAPPING, MODEL_MAPPING, @@ -45,6 +47,7 @@ from transformers import ( get_scheduler, set_seed, ) +from transformers.file_utils import get_full_repo_name from transformers.utils.versions import require_version @@ -195,6 +198,11 @@ def parse_args(): action="store_true", help="Activate debug mode and run training only with a subset of data.", ) + parser.add_argument("--push_to_hub", action="store_true", help="Whether or not to push the model to the Hub.") + parser.add_argument( + "--hub_model_id", type=str, help="The name of the repository to keep in sync with the local `output_dir`." + ) + parser.add_argument("--hub_token", type=str, help="The token to use to push to the Model Hub.") args = parser.parse_args() # Sanity checks @@ -208,8 +216,8 @@ def parse_args(): extension = args.validation_file.split(".")[-1] assert extension in ["csv", "json"], "`validation_file` should be a csv or a json file." - if args.output_dir is not None: - os.makedirs(args.output_dir, exist_ok=True) + if args.push_to_hub: + assert args.output_dir is not None, "Need an `output_dir` to create a repo when `--push_to_hub` is passed." return args @@ -241,6 +249,18 @@ def main(): if args.seed is not None: set_seed(args.seed) + # Handle the repository creation + if accelerator.is_main_process: + if args.push_to_hub: + if args.hub_model_id is None: + repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) + else: + repo_name = args.hub_model_id + repo = Repository(args.output_dir, clone_from=repo_name) + elif args.output_dir is not None: + os.makedirs(args.output_dir, exist_ok=True) + accelerator.wait_for_everyone() + # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below) # or just provide the name of one of the public datasets for token classification task available on the hub at https://huggingface.co/datasets/ # (the dataset will be downloaded automatically from the datasets Hub). @@ -552,10 +572,22 @@ def main(): eval_metric = compute_metrics() accelerator.print(f"epoch {epoch}:", eval_metric) + if args.push_to_hub and epoch < args.num_train_epochs - 1: + accelerator.wait_for_everyone() + unwrapped_model = accelerator.unwrap_model(model) + unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save) + if accelerator.is_main_process: + tokenizer.save_pretrained(args.output_dir) + repo.push_to_hub(commit_message=f"Training in progress epoch {epoch}", blocking=False) + if args.output_dir is not None: accelerator.wait_for_everyone() unwrapped_model = accelerator.unwrap_model(model) unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save) + if accelerator.is_main_process: + tokenizer.save_pretrained(args.output_dir) + if args.push_to_hub: + repo.push_to_hub(commit_message="End of training") if __name__ == "__main__": diff --git a/examples/pytorch/translation/run_translation_no_trainer.py b/examples/pytorch/translation/run_translation_no_trainer.py index 78b4ae2078..d18b5cae8a 100644 --- a/examples/pytorch/translation/run_translation_no_trainer.py +++ b/examples/pytorch/translation/run_translation_no_trainer.py @@ -23,6 +23,7 @@ import logging import math import os import random +from pathlib import Path import datasets import numpy as np @@ -33,6 +34,7 @@ from tqdm.auto import tqdm import transformers from accelerate import Accelerator +from huggingface_hub import Repository from transformers import ( CONFIG_MAPPING, MODEL_MAPPING, @@ -48,6 +50,7 @@ from transformers import ( get_scheduler, set_seed, ) +from transformers.file_utils import get_full_repo_name from transformers.utils.versions import require_version @@ -235,7 +238,11 @@ def parse_args(): help="Model type to use if training from scratch.", choices=MODEL_TYPES, ) - + parser.add_argument("--push_to_hub", action="store_true", help="Whether or not to push the model to the Hub.") + parser.add_argument( + "--hub_model_id", type=str, help="The name of the repository to keep in sync with the local `output_dir`." + ) + parser.add_argument("--hub_token", type=str, help="The token to use to push to the Model Hub.") args = parser.parse_args() # Sanity checks @@ -250,8 +257,9 @@ def parse_args(): extension = args.validation_file.split(".")[-1] assert extension in ["csv", "json"], "`validation_file` should be a csv or a json file." - if args.output_dir is not None: - os.makedirs(args.output_dir, exist_ok=True) + if args.push_to_hub: + assert args.output_dir is not None, "Need an `output_dir` to create a repo when `--push_to_hub` is passed." + return args @@ -284,6 +292,18 @@ def main(): if args.seed is not None: set_seed(args.seed) + # Handle the repository creation + if accelerator.is_main_process: + if args.push_to_hub: + if args.hub_model_id is None: + repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) + else: + repo_name = args.hub_model_id + repo = Repository(args.output_dir, clone_from=repo_name) + elif args.output_dir is not None: + os.makedirs(args.output_dir, exist_ok=True) + accelerator.wait_for_everyone() + # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below) # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/ # (the dataset will be downloaded automatically from the datasets Hub). @@ -553,10 +573,22 @@ def main(): eval_metric = metric.compute() logger.info({"bleu": eval_metric["score"]}) + if args.push_to_hub and epoch < args.num_train_epochs - 1: + accelerator.wait_for_everyone() + unwrapped_model = accelerator.unwrap_model(model) + unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save) + if accelerator.is_main_process: + tokenizer.save_pretrained(args.output_dir) + repo.push_to_hub(commit_message=f"Training in progress epoch {epoch}", blocking=False) + if args.output_dir is not None: accelerator.wait_for_everyone() unwrapped_model = accelerator.unwrap_model(model) unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save) + if accelerator.is_main_process: + tokenizer.save_pretrained(args.output_dir) + if args.push_to_hub: + repo.push_to_hub(commit_message="End of training") if __name__ == "__main__":