diff --git a/examples/flax/image-captioning/run_image_captioning_flax.py b/examples/flax/image-captioning/run_image_captioning_flax.py index ef9c515da4..f434752e74 100644 --- a/examples/flax/image-captioning/run_image_captioning_flax.py +++ b/examples/flax/image-captioning/run_image_captioning_flax.py @@ -53,7 +53,7 @@ from transformers import ( HfArgumentParser, is_tensorboard_available, ) -from transformers.utils import get_full_repo_name, is_offline_mode, send_example_telemetry +from transformers.utils import is_offline_mode, send_example_telemetry logger = logging.getLogger(__name__) @@ -424,14 +424,14 @@ def main(): # Handle the repository creation if training_args.push_to_hub: - if training_args.hub_model_id is None: - repo_name = get_full_repo_name( - Path(training_args.output_dir).absolute().name, token=training_args.hub_token - ) - else: - repo_name = training_args.hub_model_id - create_repo(repo_name, exist_ok=True, token=training_args.hub_token) - repo = Repository(training_args.output_dir, clone_from=repo_name, token=training_args.hub_token) + # Retrieve of infer repo_name + repo_name = training_args.hub_model_id + if repo_name is None: + repo_name = Path(training_args.output_dir).absolute().name + # Create repo and retrieve repo_id + repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id + # Clone repo locally + repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token) # Get the datasets: you can either provide your own CSV/JSON training and evaluation files (see below) # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/ diff --git a/examples/flax/language-modeling/run_bart_dlm_flax.py b/examples/flax/language-modeling/run_bart_dlm_flax.py index e863f97e0d..c1df13d08f 100644 --- a/examples/flax/language-modeling/run_bart_dlm_flax.py +++ b/examples/flax/language-modeling/run_bart_dlm_flax.py @@ -59,7 +59,7 @@ from transformers import ( set_seed, ) from transformers.models.bart.modeling_flax_bart import shift_tokens_right -from transformers.utils import get_full_repo_name, send_example_telemetry +from transformers.utils import send_example_telemetry MODEL_CONFIG_CLASSES = list(FLAX_MODEL_FOR_MASKED_LM_MAPPING.keys()) @@ -496,14 +496,14 @@ def main(): # Handle the repository creation if training_args.push_to_hub: - if training_args.hub_model_id is None: - repo_name = get_full_repo_name( - Path(training_args.output_dir).absolute().name, token=training_args.hub_token - ) - else: - repo_name = training_args.hub_model_id - create_repo(repo_name, exist_ok=True, token=training_args.hub_token) - repo = Repository(training_args.output_dir, clone_from=repo_name, token=training_args.hub_token) + # Retrieve of infer repo_name + repo_name = training_args.hub_model_id + if repo_name is None: + repo_name = Path(training_args.output_dir).absolute().name + # Create repo and retrieve repo_id + repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id + # Clone repo locally + repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token) # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below) # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/ diff --git a/examples/flax/language-modeling/run_clm_flax.py b/examples/flax/language-modeling/run_clm_flax.py index 656bfa6807..c028c3e94b 100755 --- a/examples/flax/language-modeling/run_clm_flax.py +++ b/examples/flax/language-modeling/run_clm_flax.py @@ -58,7 +58,7 @@ from transformers import ( set_seed, ) from transformers.testing_utils import CaptureLogger -from transformers.utils import get_full_repo_name, send_example_telemetry +from transformers.utils import send_example_telemetry logger = logging.getLogger(__name__) @@ -372,14 +372,14 @@ def main(): # Handle the repository creation if training_args.push_to_hub: - if training_args.hub_model_id is None: - repo_name = get_full_repo_name( - Path(training_args.output_dir).absolute().name, token=training_args.hub_token - ) - else: - repo_name = training_args.hub_model_id - create_repo(repo_name, exist_ok=True, token=training_args.hub_token) - repo = Repository(training_args.output_dir, clone_from=repo_name, token=training_args.hub_token) + # Retrieve of infer repo_name + repo_name = training_args.hub_model_id + if repo_name is None: + repo_name = Path(training_args.output_dir).absolute().name + # Create repo and retrieve repo_id + repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id + # Clone repo locally + repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token) # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below) # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/ diff --git a/examples/flax/language-modeling/run_mlm_flax.py b/examples/flax/language-modeling/run_mlm_flax.py index ae289b8470..0d12de0694 100755 --- a/examples/flax/language-modeling/run_mlm_flax.py +++ b/examples/flax/language-modeling/run_mlm_flax.py @@ -59,7 +59,7 @@ from transformers import ( is_tensorboard_available, set_seed, ) -from transformers.utils import get_full_repo_name, send_example_telemetry +from transformers.utils import send_example_telemetry MODEL_CONFIG_CLASSES = list(FLAX_MODEL_FOR_MASKED_LM_MAPPING.keys()) @@ -410,14 +410,14 @@ def main(): # Handle the repository creation if training_args.push_to_hub: - if training_args.hub_model_id is None: - repo_name = get_full_repo_name( - Path(training_args.output_dir).absolute().name, token=training_args.hub_token - ) - else: - repo_name = training_args.hub_model_id - create_repo(repo_name, exist_ok=True, token=training_args.hub_token) - repo = Repository(training_args.output_dir, clone_from=repo_name, token=training_args.hub_token) + # Retrieve of infer repo_name + repo_name = training_args.hub_model_id + if repo_name is None: + repo_name = Path(training_args.output_dir).absolute().name + # Create repo and retrieve repo_id + repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id + # Clone repo locally + repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token) # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below) # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/ diff --git a/examples/flax/language-modeling/run_t5_mlm_flax.py b/examples/flax/language-modeling/run_t5_mlm_flax.py index f3cec97b2e..89e0b9e987 100755 --- a/examples/flax/language-modeling/run_t5_mlm_flax.py +++ b/examples/flax/language-modeling/run_t5_mlm_flax.py @@ -59,7 +59,7 @@ from transformers import ( set_seed, ) from transformers.models.t5.modeling_flax_t5 import shift_tokens_right -from transformers.utils import get_full_repo_name, send_example_telemetry +from transformers.utils import send_example_telemetry MODEL_CONFIG_CLASSES = list(FLAX_MODEL_FOR_MASKED_LM_MAPPING.keys()) @@ -537,14 +537,14 @@ def main(): # Handle the repository creation if training_args.push_to_hub: - if training_args.hub_model_id is None: - repo_name = get_full_repo_name( - Path(training_args.output_dir).absolute().name, token=training_args.hub_token - ) - else: - repo_name = training_args.hub_model_id - create_repo(repo_name, exist_ok=True, token=training_args.hub_token) - repo = Repository(training_args.output_dir, clone_from=repo_name, token=training_args.hub_token) + # Retrieve of infer repo_name + repo_name = training_args.hub_model_id + if repo_name is None: + repo_name = Path(training_args.output_dir).absolute().name + # Create repo and retrieve repo_id + repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id + # Clone repo locally + repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token) # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below) # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/ diff --git a/examples/flax/question-answering/run_qa.py b/examples/flax/question-answering/run_qa.py index 48ed6173c7..29e1de744c 100644 --- a/examples/flax/question-answering/run_qa.py +++ b/examples/flax/question-answering/run_qa.py @@ -55,7 +55,7 @@ from transformers import ( PreTrainedTokenizerFast, is_tensorboard_available, ) -from transformers.utils import check_min_version, get_full_repo_name, send_example_telemetry +from transformers.utils import check_min_version, send_example_telemetry logger = logging.getLogger(__name__) @@ -462,14 +462,14 @@ def main(): # Handle the repository creation if training_args.push_to_hub: - if training_args.hub_model_id is None: - repo_name = get_full_repo_name( - Path(training_args.output_dir).absolute().name, token=training_args.hub_token - ) - else: - repo_name = training_args.hub_model_id - create_repo(repo_name, exist_ok=True, token=training_args.hub_token) - repo = Repository(training_args.output_dir, clone_from=repo_name, token=training_args.hub_token) + # Retrieve of infer repo_name + repo_name = training_args.hub_model_id + if repo_name is None: + repo_name = Path(training_args.output_dir).absolute().name + # Create repo and retrieve repo_id + repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id + # Clone repo locally + repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token) # region Load Data # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below) diff --git a/examples/flax/summarization/run_summarization_flax.py b/examples/flax/summarization/run_summarization_flax.py index 2d7e0acbf5..fc7be6edbf 100644 --- a/examples/flax/summarization/run_summarization_flax.py +++ b/examples/flax/summarization/run_summarization_flax.py @@ -56,7 +56,7 @@ from transformers import ( HfArgumentParser, is_tensorboard_available, ) -from transformers.utils import get_full_repo_name, is_offline_mode, send_example_telemetry +from transformers.utils import is_offline_mode, send_example_telemetry logger = logging.getLogger(__name__) @@ -452,14 +452,14 @@ def main(): # Handle the repository creation if training_args.push_to_hub: - if training_args.hub_model_id is None: - repo_name = get_full_repo_name( - Path(training_args.output_dir).absolute().name, token=training_args.hub_token - ) - else: - repo_name = training_args.hub_model_id - create_repo(repo_name, exist_ok=True, token=training_args.hub_token) - repo = Repository(training_args.output_dir, clone_from=repo_name, token=training_args.hub_token) + # Retrieve of infer repo_name + repo_name = training_args.hub_model_id + if repo_name is None: + repo_name = Path(training_args.output_dir).absolute().name + # Create repo and retrieve repo_id + repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id + # Clone repo locally + repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token) # Get the datasets: you can either provide your own CSV/JSON training and evaluation files (see below) # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/ diff --git a/examples/flax/text-classification/run_flax_glue.py b/examples/flax/text-classification/run_flax_glue.py index 1e6c6c1b0f..8985c3a2f7 100755 --- a/examples/flax/text-classification/run_flax_glue.py +++ b/examples/flax/text-classification/run_flax_glue.py @@ -49,7 +49,7 @@ from transformers import ( TrainingArguments, is_tensorboard_available, ) -from transformers.utils import check_min_version, get_full_repo_name, send_example_telemetry +from transformers.utils import check_min_version, send_example_telemetry logger = logging.getLogger(__name__) @@ -342,14 +342,14 @@ def main(): # Handle the repository creation if training_args.push_to_hub: - if training_args.hub_model_id is None: - repo_name = get_full_repo_name( - Path(training_args.output_dir).absolute().name, token=training_args.hub_token - ) - else: - repo_name = training_args.hub_model_id - create_repo(repo_name, exist_ok=True, token=training_args.hub_token) - repo = Repository(training_args.output_dir, clone_from=repo_name, token=training_args.hub_token) + # Retrieve of infer repo_name + repo_name = training_args.hub_model_id + if repo_name is None: + repo_name = Path(training_args.output_dir).absolute().name + # Create repo and retrieve repo_id + repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id + # Clone repo locally + repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token) # Get the datasets: you can either provide your own CSV/JSON training and evaluation files (see below) # or specify a GLUE benchmark task (the dataset will be downloaded automatically from the datasets Hub). diff --git a/examples/flax/token-classification/run_flax_ner.py b/examples/flax/token-classification/run_flax_ner.py index 6f773e2f8c..8d7f263f5a 100644 --- a/examples/flax/token-classification/run_flax_ner.py +++ b/examples/flax/token-classification/run_flax_ner.py @@ -49,7 +49,7 @@ from transformers import ( HfArgumentParser, is_tensorboard_available, ) -from transformers.utils import check_min_version, get_full_repo_name, send_example_telemetry +from transformers.utils import check_min_version, send_example_telemetry from transformers.utils.versions import require_version @@ -398,14 +398,14 @@ def main(): # Handle the repository creation if training_args.push_to_hub: - if training_args.hub_model_id is None: - repo_name = get_full_repo_name( - Path(training_args.output_dir).absolute().name, token=training_args.hub_token - ) - else: - repo_name = training_args.hub_model_id - create_repo(repo_name, exist_ok=True, token=training_args.hub_token) - repo = Repository(training_args.output_dir, clone_from=repo_name, token=training_args.hub_token) + # Retrieve of infer repo_name + repo_name = training_args.hub_model_id + if repo_name is None: + repo_name = Path(training_args.output_dir).absolute().name + # Create repo and retrieve repo_id + repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id + # Clone repo locally + repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token) # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below) # or just provide the name of one of the public datasets for token classification task available on the hub at https://huggingface.co/datasets/ diff --git a/examples/flax/vision/run_image_classification.py b/examples/flax/vision/run_image_classification.py index 6a88f0f8d6..49e147e310 100644 --- a/examples/flax/vision/run_image_classification.py +++ b/examples/flax/vision/run_image_classification.py @@ -54,7 +54,7 @@ from transformers import ( is_tensorboard_available, set_seed, ) -from transformers.utils import get_full_repo_name, send_example_telemetry +from transformers.utils import send_example_telemetry logger = logging.getLogger(__name__) @@ -293,14 +293,14 @@ def main(): # Handle the repository creation if training_args.push_to_hub: - if training_args.hub_model_id is None: - repo_name = get_full_repo_name( - Path(training_args.output_dir).absolute().name, token=training_args.hub_token - ) - else: - repo_name = training_args.hub_model_id - create_repo(repo_name, exist_ok=True, token=training_args.hub_token) - repo = Repository(training_args.output_dir, clone_from=repo_name, token=training_args.hub_token) + # Retrieve of infer repo_name + repo_name = training_args.hub_model_id + if repo_name is None: + repo_name = Path(training_args.output_dir).absolute().name + # Create repo and retrieve repo_id + repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id + # Clone repo locally + repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token) # Initialize datasets and pre-processing transforms # We use torchvision here for faster pre-processing diff --git a/examples/pytorch/image-classification/run_image_classification_no_trainer.py b/examples/pytorch/image-classification/run_image_classification_no_trainer.py index 27b4bee144..c416273132 100644 --- a/examples/pytorch/image-classification/run_image_classification_no_trainer.py +++ b/examples/pytorch/image-classification/run_image_classification_no_trainer.py @@ -42,7 +42,7 @@ from tqdm.auto import tqdm import transformers from transformers import AutoConfig, AutoImageProcessor, AutoModelForImageClassification, SchedulerType, get_scheduler -from transformers.utils import check_min_version, get_full_repo_name, send_example_telemetry +from transformers.utils import check_min_version, send_example_telemetry from transformers.utils.versions import require_version @@ -236,12 +236,14 @@ def main(): # Handle the repository creation if accelerator.is_main_process: if args.push_to_hub: - if args.hub_model_id is None: - repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) - else: - repo_name = args.hub_model_id - create_repo(repo_name, exist_ok=True, token=args.hub_token) - repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token) + # Retrieve of infer repo_name + repo_name = args.hub_model_id + if repo_name is None: + repo_name = Path(args.output_dir).absolute().name + # Create repo and retrieve repo_id + repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id + # Clone repo locally + repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token) with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: if "step_*" not in gitignore: diff --git a/examples/pytorch/image-pretraining/run_mim_no_trainer.py b/examples/pytorch/image-pretraining/run_mim_no_trainer.py index 5f83fd2da8..f8c2cdbd3c 100644 --- a/examples/pytorch/image-pretraining/run_mim_no_trainer.py +++ b/examples/pytorch/image-pretraining/run_mim_no_trainer.py @@ -25,7 +25,7 @@ import torch from accelerate import Accelerator, DistributedType from accelerate.utils import set_seed from datasets import load_dataset -from huggingface_hub import Repository +from huggingface_hub import Repository, create_repo from torch.utils.data import DataLoader from torchvision.transforms import Compose, Lambda, Normalize, RandomHorizontalFlip, RandomResizedCrop, ToTensor from tqdm.auto import tqdm @@ -41,7 +41,7 @@ from transformers import ( SchedulerType, get_scheduler, ) -from transformers.utils import check_min_version, get_full_repo_name, send_example_telemetry +from transformers.utils import check_min_version, send_example_telemetry from transformers.utils.versions import require_version @@ -406,11 +406,14 @@ def main(): # Handle the repository creation if accelerator.is_main_process: if args.push_to_hub: - if args.hub_model_id is None: - repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) - else: - repo_name = args.hub_model_id - repo = Repository(args.output_dir, clone_from=repo_name) + # Retrieve of infer repo_name + repo_name = args.hub_model_id + if repo_name is None: + repo_name = Path(args.output_dir).absolute().name + # Create repo and retrieve repo_id + repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id + # Clone repo locally + repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token) with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: if "step_*" not in gitignore: diff --git a/examples/pytorch/language-modeling/run_clm_no_trainer.py b/examples/pytorch/language-modeling/run_clm_no_trainer.py index dc619acb9a..58954bd853 100755 --- a/examples/pytorch/language-modeling/run_clm_no_trainer.py +++ b/examples/pytorch/language-modeling/run_clm_no_trainer.py @@ -52,7 +52,7 @@ from transformers import ( default_data_collator, get_scheduler, ) -from transformers.utils import check_min_version, get_full_repo_name, send_example_telemetry +from transformers.utils import check_min_version, send_example_telemetry from transformers.utils.versions import require_version @@ -286,12 +286,14 @@ def main(): # Handle the repository creation if accelerator.is_main_process: if args.push_to_hub: - if args.hub_model_id is None: - repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) - else: - repo_name = args.hub_model_id - create_repo(repo_name, exist_ok=True, token=args.hub_token) - repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token) + # Retrieve of infer repo_name + repo_name = args.hub_model_id + if repo_name is None: + repo_name = Path(args.output_dir).absolute().name + # Create repo and retrieve repo_id + repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id + # Clone repo locally + repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token) with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: if "step_*" not in gitignore: diff --git a/examples/pytorch/language-modeling/run_mlm_no_trainer.py b/examples/pytorch/language-modeling/run_mlm_no_trainer.py index b57a7d331c..af1e8d5209 100755 --- a/examples/pytorch/language-modeling/run_mlm_no_trainer.py +++ b/examples/pytorch/language-modeling/run_mlm_no_trainer.py @@ -52,7 +52,7 @@ from transformers import ( SchedulerType, get_scheduler, ) -from transformers.utils import check_min_version, get_full_repo_name, send_example_telemetry +from transformers.utils import check_min_version, send_example_telemetry from transformers.utils.versions import require_version @@ -295,12 +295,14 @@ def main(): # Handle the repository creation if accelerator.is_main_process: if args.push_to_hub: - if args.hub_model_id is None: - repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) - else: - repo_name = args.hub_model_id - create_repo(repo_name, exist_ok=True, token=args.hub_token) - repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token) + # Retrieve of infer repo_name + repo_name = args.hub_model_id + if repo_name is None: + repo_name = Path(args.output_dir).absolute().name + # Create repo and retrieve repo_id + repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id + # Clone repo locally + repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token) with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: if "step_*" not in gitignore: diff --git a/examples/pytorch/multiple-choice/run_swag_no_trainer.py b/examples/pytorch/multiple-choice/run_swag_no_trainer.py index 7f5969934e..26ae2d4944 100755 --- a/examples/pytorch/multiple-choice/run_swag_no_trainer.py +++ b/examples/pytorch/multiple-choice/run_swag_no_trainer.py @@ -52,7 +52,7 @@ from transformers import ( default_data_collator, get_scheduler, ) -from transformers.utils import PaddingStrategy, check_min_version, get_full_repo_name, send_example_telemetry +from transformers.utils import PaddingStrategy, check_min_version, send_example_telemetry # Will error if the minimal version of Transformers is not installed. Remove at your own risks. @@ -313,12 +313,14 @@ def main(): # Handle the repository creation if accelerator.is_main_process: if args.push_to_hub: - if args.hub_model_id is None: - repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) - else: - repo_name = args.hub_model_id - create_repo(repo_name, exist_ok=True, token=args.hub_token) - repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token) + # Retrieve of infer repo_name + repo_name = args.hub_model_id + if repo_name is None: + repo_name = Path(args.output_dir).absolute().name + # Create repo and retrieve repo_id + repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id + # Clone repo locally + repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token) with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: if "step_*" not in gitignore: diff --git a/examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py b/examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py index 79aaa22cdb..6d00455059 100644 --- a/examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py +++ b/examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py @@ -51,7 +51,7 @@ from transformers import ( default_data_collator, get_scheduler, ) -from transformers.utils import check_min_version, get_full_repo_name, send_example_telemetry +from transformers.utils import check_min_version, send_example_telemetry from transformers.utils.versions import require_version @@ -328,12 +328,14 @@ def main(): # Handle the repository creation if accelerator.is_main_process: if args.push_to_hub: - if args.hub_model_id is None: - repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) - else: - repo_name = args.hub_model_id - create_repo(repo_name, exist_ok=True, token=args.hub_token) - repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token) + # Retrieve of infer repo_name + repo_name = args.hub_model_id + if repo_name is None: + repo_name = Path(args.output_dir).absolute().name + # Create repo and retrieve repo_id + repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id + # Clone repo locally + repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token) with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: if "step_*" not in gitignore: diff --git a/examples/pytorch/question-answering/run_qa_no_trainer.py b/examples/pytorch/question-answering/run_qa_no_trainer.py index 7f13c250e7..2a7cdc7273 100755 --- a/examples/pytorch/question-answering/run_qa_no_trainer.py +++ b/examples/pytorch/question-answering/run_qa_no_trainer.py @@ -52,7 +52,7 @@ from transformers import ( default_data_collator, get_scheduler, ) -from transformers.utils import check_min_version, get_full_repo_name, send_example_telemetry +from transformers.utils import check_min_version, send_example_telemetry from transformers.utils.versions import require_version @@ -366,12 +366,14 @@ def main(): # Handle the repository creation if accelerator.is_main_process: if args.push_to_hub: - if args.hub_model_id is None: - repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) - else: - repo_name = args.hub_model_id - create_repo(repo_name, exist_ok=True, token=args.hub_token) - repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token) + # Retrieve of infer repo_name + repo_name = args.hub_model_id + if repo_name is None: + repo_name = Path(args.output_dir).absolute().name + # Create repo and retrieve repo_id + repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id + # Clone repo locally + repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token) with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: if "step_*" not in gitignore: diff --git a/examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py b/examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py index bcad655727..af9880901c 100644 --- a/examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py +++ b/examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py @@ -45,7 +45,7 @@ from transformers import ( default_data_collator, get_scheduler, ) -from transformers.utils import check_min_version, get_full_repo_name, send_example_telemetry +from transformers.utils import check_min_version, send_example_telemetry from transformers.utils.versions import require_version @@ -350,12 +350,14 @@ def main(): # Handle the repository creation if accelerator.is_main_process: if args.push_to_hub: - if args.hub_model_id is None: - repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) - else: - repo_name = args.hub_model_id - create_repo(repo_name, exist_ok=True, token=args.hub_token) - repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token) + # Retrieve of infer repo_name + repo_name = args.hub_model_id + if repo_name is None: + repo_name = Path(args.output_dir).absolute().name + # Create repo and retrieve repo_id + repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id + # Clone repo locally + repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token) with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: if "step_*" not in gitignore: diff --git a/examples/pytorch/speech-pretraining/run_wav2vec2_pretraining_no_trainer.py b/examples/pytorch/speech-pretraining/run_wav2vec2_pretraining_no_trainer.py index 603202e696..6bde6d2b7d 100755 --- a/examples/pytorch/speech-pretraining/run_wav2vec2_pretraining_no_trainer.py +++ b/examples/pytorch/speech-pretraining/run_wav2vec2_pretraining_no_trainer.py @@ -43,7 +43,7 @@ from transformers import ( set_seed, ) from transformers.models.wav2vec2.modeling_wav2vec2 import _compute_mask_indices, _sample_negative_indices -from transformers.utils import get_full_repo_name, send_example_telemetry +from transformers.utils import send_example_telemetry logger = get_logger(__name__) @@ -418,12 +418,14 @@ def main(): # Handle the repository creation if accelerator.is_main_process: if args.push_to_hub and not args.preprocessing_only: - if args.hub_model_id is None: - repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) - else: - repo_name = args.hub_model_id - create_repo(repo_name, exist_ok=True, token=args.hub_token) - repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token) + # Retrieve of infer repo_name + repo_name = args.hub_model_id + if repo_name is None: + repo_name = Path(args.output_dir).absolute().name + # Create repo and retrieve repo_id + repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id + # Clone repo locally + repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token) elif args.output_dir is not None: os.makedirs(args.output_dir, exist_ok=True) accelerator.wait_for_everyone() diff --git a/examples/pytorch/summarization/run_summarization_no_trainer.py b/examples/pytorch/summarization/run_summarization_no_trainer.py index bfefbee250..543b6bfaa2 100644 --- a/examples/pytorch/summarization/run_summarization_no_trainer.py +++ b/examples/pytorch/summarization/run_summarization_no_trainer.py @@ -51,7 +51,7 @@ from transformers import ( SchedulerType, get_scheduler, ) -from transformers.utils import check_min_version, get_full_repo_name, is_offline_mode, send_example_telemetry +from transformers.utils import check_min_version, is_offline_mode, send_example_telemetry from transformers.utils.versions import require_version @@ -360,12 +360,14 @@ def main(): # Handle the repository creation if accelerator.is_main_process: if args.push_to_hub: - if args.hub_model_id is None: - repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) - else: - repo_name = args.hub_model_id - create_repo(repo_name, exist_ok=True, token=args.hub_token) - repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token) + # Retrieve of infer repo_name + repo_name = args.hub_model_id + if repo_name is None: + repo_name = Path(args.output_dir).absolute().name + # Create repo and retrieve repo_id + repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id + # Clone repo locally + repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token) with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: if "step_*" not in gitignore: diff --git a/examples/pytorch/text-classification/run_glue_no_trainer.py b/examples/pytorch/text-classification/run_glue_no_trainer.py index a21a698ec9..e137f87638 100644 --- a/examples/pytorch/text-classification/run_glue_no_trainer.py +++ b/examples/pytorch/text-classification/run_glue_no_trainer.py @@ -43,7 +43,7 @@ from transformers import ( default_data_collator, get_scheduler, ) -from transformers.utils import check_min_version, get_full_repo_name, send_example_telemetry +from transformers.utils import check_min_version, send_example_telemetry from transformers.utils.versions import require_version @@ -240,12 +240,14 @@ def main(): # Handle the repository creation if accelerator.is_main_process: if args.push_to_hub: - if args.hub_model_id is None: - repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) - else: - repo_name = args.hub_model_id - create_repo(repo_name, exist_ok=True, token=args.hub_token) - repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token) + # Retrieve of infer repo_name + repo_name = args.hub_model_id + if repo_name is None: + repo_name = Path(args.output_dir).absolute().name + # Create repo and retrieve repo_id + repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id + # Clone repo locally + repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token) with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: if "step_*" not in gitignore: diff --git a/examples/pytorch/token-classification/run_ner_no_trainer.py b/examples/pytorch/token-classification/run_ner_no_trainer.py index 8ad0b6d7e4..dc57b514e7 100755 --- a/examples/pytorch/token-classification/run_ner_no_trainer.py +++ b/examples/pytorch/token-classification/run_ner_no_trainer.py @@ -51,7 +51,7 @@ from transformers import ( default_data_collator, get_scheduler, ) -from transformers.utils import check_min_version, get_full_repo_name, send_example_telemetry +from transformers.utils import check_min_version, send_example_telemetry from transformers.utils.versions import require_version @@ -295,12 +295,14 @@ def main(): # Handle the repository creation if accelerator.is_main_process: if args.push_to_hub: - if args.hub_model_id is None: - repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) - else: - repo_name = args.hub_model_id - create_repo(repo_name, exist_ok=True, token=args.hub_token) - repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token) + # Retrieve of infer repo_name + repo_name = args.hub_model_id + if repo_name is None: + repo_name = Path(args.output_dir).absolute().name + # Create repo and retrieve repo_id + repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id + # Clone repo locally + repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token) with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: if "step_*" not in gitignore: diff --git a/examples/pytorch/translation/run_translation_no_trainer.py b/examples/pytorch/translation/run_translation_no_trainer.py index 70f831620d..77efa14f4f 100644 --- a/examples/pytorch/translation/run_translation_no_trainer.py +++ b/examples/pytorch/translation/run_translation_no_trainer.py @@ -52,7 +52,7 @@ from transformers import ( default_data_collator, get_scheduler, ) -from transformers.utils import check_min_version, get_full_repo_name, send_example_telemetry +from transformers.utils import check_min_version, send_example_telemetry from transformers.utils.versions import require_version @@ -340,12 +340,14 @@ def main(): # Handle the repository creation if accelerator.is_main_process: if args.push_to_hub: - if args.hub_model_id is None: - repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) - else: - repo_name = args.hub_model_id - create_repo(repo_name, exist_ok=True, token=args.hub_token) - repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token) + # Retrieve of infer repo_name + repo_name = args.hub_model_id + if repo_name is None: + repo_name = Path(args.output_dir).absolute().name + # Create repo and retrieve repo_id + repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id + # Clone repo locally + repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token) with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: if "step_*" not in gitignore: diff --git a/examples/research_projects/luke/run_luke_ner_no_trainer.py b/examples/research_projects/luke/run_luke_ner_no_trainer.py index 6b59643cf7..f12a7d76d8 100644 --- a/examples/research_projects/luke/run_luke_ner_no_trainer.py +++ b/examples/research_projects/luke/run_luke_ner_no_trainer.py @@ -29,7 +29,7 @@ import datasets import torch from accelerate import Accelerator, DistributedDataParallelKwargs from datasets import ClassLabel, load_dataset, load_metric -from huggingface_hub import Repository +from huggingface_hub import Repository, create_repo from luke_utils import DataCollatorForLukeTokenClassification, is_punctuation, padding_tensor from torch.utils.data import DataLoader from tqdm.auto import tqdm @@ -45,7 +45,6 @@ from transformers import ( get_scheduler, set_seed, ) -from transformers.file_utils import get_full_repo_name from transformers.utils.versions import require_version @@ -258,11 +257,14 @@ def main(): # Handle the repository creation if accelerator.is_main_process: if args.push_to_hub: - if args.hub_model_id is None: - repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) - else: - repo_name = args.hub_model_id - repo = Repository(args.output_dir, clone_from=repo_name) + # Retrieve of infer repo_name + repo_name = args.hub_model_id + if repo_name is None: + repo_name = Path(args.output_dir).absolute().name + # Create repo and retrieve repo_id + repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id + # Clone repo locally + repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token) elif args.output_dir is not None: os.makedirs(args.output_dir, exist_ok=True) accelerator.wait_for_everyone() diff --git a/src/transformers/file_utils.py b/src/transformers/file_utils.py index 921974634d..d710296fc0 100644 --- a/src/transformers/file_utils.py +++ b/src/transformers/file_utils.py @@ -17,6 +17,8 @@ File utilities: utilities related to download and cache models This module should not be update anymore and is only left for backward compatibility. """ +from huggingface_hub import get_full_repo_name # for backward compatibility + from . import __version__ # Backward compatibility imports, to make sure all those objects can be found in file_utils @@ -71,7 +73,6 @@ from .utils import ( define_sagemaker_information, get_cached_models, get_file_from_repo, - get_full_repo_name, get_torch_version, has_file, http_user_agent, diff --git a/src/transformers/keras_callbacks.py b/src/transformers/keras_callbacks.py index 15b4edf07d..3bb4e859b1 100644 --- a/src/transformers/keras_callbacks.py +++ b/src/transformers/keras_callbacks.py @@ -12,7 +12,6 @@ from tensorflow.keras.callbacks import Callback from . import IntervalStrategy, PreTrainedTokenizerBase from .modelcard import TrainingSummary -from .utils import get_full_repo_name logger = logging.getLogger(__name__) @@ -334,14 +333,13 @@ class PushToHubCallback(Callback): raise ValueError("Please supply a positive integer argument for save_steps when save_strategy == 'steps'!") self.save_steps = save_steps output_dir = Path(output_dir) + + # Create repo and retrieve repo_id if hub_model_id is None: hub_model_id = output_dir.absolute().name - if "/" not in hub_model_id: - hub_model_id = get_full_repo_name(hub_model_id, token=hub_token) + self.hub_model_id = create_repo(repo_id=hub_model_id, exist_ok=True, token=hub_token).repo_id self.output_dir = output_dir - self.hub_model_id = hub_model_id - create_repo(self.hub_model_id, exist_ok=True) self.repo = Repository(str(self.output_dir), clone_from=self.hub_model_id, token=hub_token) self.tokenizer = tokenizer diff --git a/src/transformers/modeling_tf_utils.py b/src/transformers/modeling_tf_utils.py index 9548b1d67f..ebdafdbe87 100644 --- a/src/transformers/modeling_tf_utils.py +++ b/src/transformers/modeling_tf_utils.py @@ -1357,21 +1357,16 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin, Pu "Checkpoint loading failed as no optimizer is attached to the model. " "This is most likely caused by the model not being compiled." ) - if not os.path.isdir(repo_path_or_name): + if os.path.isdir(repo_path_or_name): + local_dir = repo_path_or_name + else: # If this isn't a local path, check that the remote repo exists and has a checkpoint in it repo_files = list_repo_files(repo_path_or_name) for file in ("checkpoint/weights.h5", "checkpoint/extra_data.pickle"): if file not in repo_files: raise FileNotFoundError(f"Repo {repo_path_or_name} does not contain checkpoint file {file}!") - if "/" not in repo_path_or_name: - model_id = repo_path_or_name - repo_path_or_name = self.get_full_repo_name(repo_path_or_name) - else: - model_id = repo_path_or_name.split("/")[-1] - repo = Repository(model_id, clone_from=f"https://huggingface.co/{repo_path_or_name}") + repo = Repository(repo_path_or_name.split("/")[-1], clone_from=repo_path_or_name) local_dir = repo.local_dir - else: - local_dir = repo_path_or_name # Now make sure the repo actually has a checkpoint in it. checkpoint_dir = os.path.join(local_dir, "checkpoint") diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index c4a64997b4..319c36e7c8 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -129,7 +129,6 @@ from .utils import ( WEIGHTS_NAME, can_return_loss, find_labels, - get_full_repo_name, is_accelerate_available, is_apex_available, is_datasets_available, @@ -3396,22 +3395,22 @@ class Trainer: """ if not self.is_world_process_zero(): return - if self.args.hub_model_id is None: - repo_name = Path(self.args.output_dir).absolute().name - else: - repo_name = self.args.hub_model_id - if "/" not in repo_name: - repo_name = get_full_repo_name(repo_name, token=self.args.hub_token) - # Make sure the repo exists. - create_repo(repo_name, token=self.args.hub_token, private=self.args.hub_private_repo, exist_ok=True) + # Make sure the repo exists + retrieve "real" repo_id + repo_name = self.args.hub_model_id + if repo_name is None: + repo_name = Path(self.args.output_dir).absolute().name + repo_id = create_repo( + repo_id=repo_name, token=self.args.hub_token, private=self.args.hub_private_repo, exist_ok=True + ).repo_id + try: - self.repo = Repository(self.args.output_dir, clone_from=repo_name, token=self.args.hub_token) + self.repo = Repository(self.args.output_dir, clone_from=repo_id, token=self.args.hub_token) except EnvironmentError: if self.args.overwrite_output_dir and at_init: # Try again after wiping output_dir shutil.rmtree(self.args.output_dir) - self.repo = Repository(self.args.output_dir, clone_from=repo_name, token=self.args.hub_token) + self.repo = Repository(self.args.output_dir, clone_from=repo_id, token=self.args.hub_token) else: raise diff --git a/src/transformers/training_args.py b/src/transformers/training_args.py index b6e35b73b9..8189c22fe5 100644 --- a/src/transformers/training_args.py +++ b/src/transformers/training_args.py @@ -24,6 +24,7 @@ from enum import Enum from pathlib import Path from typing import Any, Dict, List, Optional, Union +from huggingface_hub import get_full_repo_name from packaging import version from .debug_utils import DebugOption @@ -38,7 +39,6 @@ from .trainer_utils import ( from .utils import ( ExplicitEnum, cached_property, - get_full_repo_name, is_accelerate_available, is_safetensors_available, is_sagemaker_dp_enabled, diff --git a/src/transformers/utils/__init__.py b/src/transformers/utils/__init__.py index bca5440f8e..3a5dea4616 100644 --- a/src/transformers/utils/__init__.py +++ b/src/transformers/utils/__init__.py @@ -15,6 +15,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from huggingface_hub import get_full_repo_name # for backward compatibility from packaging import version from .. import __version__ @@ -79,7 +80,6 @@ from .hub import ( extract_commit_hash, get_cached_models, get_file_from_repo, - get_full_repo_name, has_file, http_user_agent, is_offline_mode, diff --git a/src/transformers/utils/hub.py b/src/transformers/utils/hub.py index 1634abe6ea..9fba97f098 100644 --- a/src/transformers/utils/hub.py +++ b/src/transformers/utils/hub.py @@ -36,7 +36,6 @@ from huggingface_hub import ( get_hf_file_metadata, hf_hub_download, hf_hub_url, - whoami, ) from huggingface_hub.file_download import REGEX_COMMIT_HASH, http_get from huggingface_hub.utils import ( @@ -690,6 +689,10 @@ class PushToHubMixin: "The `repo_url` argument is deprecated and will be removed in v5 of Transformers. Use `repo_id` " "instead." ) + if repo_id is not None: + raise ValueError( + "`repo_id` and `repo_url` are both specified. Please set only the argument `repo_id`." + ) repo_id = repo_url.replace(f"{HUGGINGFACE_CO_RESOLVE_ENDPOINT}/", "") if organization is not None: warnings.warn( @@ -702,11 +705,7 @@ class PushToHubMixin: repo_id = f"{organization}/{repo_id}" url = create_repo(repo_id=repo_id, token=token, private=private, exist_ok=True) - - # If the namespace is not there, add it or `upload_file` will complain - if "/" not in repo_id and url != f"{HUGGINGFACE_CO_RESOLVE_ENDPOINT}/{repo_id}": - repo_id = get_full_repo_name(repo_id, token=token) - return repo_id + return url.repo_id def _get_files_timestamps(self, working_dir: Union[str, os.PathLike]): """ @@ -786,8 +785,7 @@ class PushToHubMixin: **deprecated_kwargs, ) -> str: """ - Upload the {object_files} to the 🤗 Model Hub while synchronizing a local clone of the repo in - `repo_path_or_name`. + Upload the {object_files} to the 🤗 Model Hub. Parameters: repo_id (`str`): @@ -838,22 +836,35 @@ class PushToHubMixin: ) token = use_auth_token - if "repo_path_or_name" in deprecated_kwargs: + repo_path_or_name = deprecated_kwargs.pop("repo_path_or_name", None) + if repo_path_or_name is not None: + # Should use `repo_id` instead of `repo_path_or_name`. When using `repo_path_or_name`, we try to infer + # repo_id from the folder path, if it exists. warnings.warn( "The `repo_path_or_name` argument is deprecated and will be removed in v5 of Transformers. Use " - "`repo_id` instead." + "`repo_id` instead.", + FutureWarning, ) - repo_id = deprecated_kwargs.pop("repo_path_or_name") + if repo_id is not None: + raise ValueError( + "`repo_id` and `repo_path_or_name` are both specified. Please set only the argument `repo_id`." + ) + if os.path.isdir(repo_path_or_name): + # repo_path: infer repo_id from the path + repo_id = repo_id.split(os.path.sep)[-1] + working_dir = repo_id + else: + # repo_name: use it as repo_id + repo_id = repo_path_or_name + working_dir = repo_id.split("/")[-1] + else: + # Repo_id is passed correctly: infer working_dir from it + working_dir = repo_id.split("/")[-1] + # Deprecation warning will be sent after for repo_url and organization repo_url = deprecated_kwargs.pop("repo_url", None) organization = deprecated_kwargs.pop("organization", None) - if os.path.isdir(repo_id): - working_dir = repo_id - repo_id = repo_id.split(os.path.sep)[-1] - else: - working_dir = repo_id.split("/")[-1] - repo_id = self._create_repo( repo_id, private=private, token=token, repo_url=repo_url, organization=organization ) @@ -877,14 +888,6 @@ class PushToHubMixin: ) -def get_full_repo_name(model_id: str, organization: Optional[str] = None, token: Optional[str] = None): - if organization is None: - username = whoami(token)["name"] - return f"{username}/{model_id}" - else: - return f"{organization}/{model_id}" - - def send_example_telemetry(example_name, *example_args, framework="pytorch"): """ Sends telemetry that helps tracking the examples use.