Adapt repository creation to latest hf_hub (#21158)
* Adapt repository creation to latest hf_hub * Update all examples * Fix other tests, add Flax examples * Address review comments
This commit is contained in:
@@ -45,7 +45,7 @@ from flax import jax_utils, traverse_util
|
|||||||
from flax.jax_utils import unreplicate
|
from flax.jax_utils import unreplicate
|
||||||
from flax.training import train_state
|
from flax.training import train_state
|
||||||
from flax.training.common_utils import get_metrics, onehot, shard, shard_prng_key
|
from flax.training.common_utils import get_metrics, onehot, shard, shard_prng_key
|
||||||
from huggingface_hub import Repository
|
from huggingface_hub import Repository, create_repo
|
||||||
from transformers import (
|
from transformers import (
|
||||||
AutoFeatureExtractor,
|
AutoFeatureExtractor,
|
||||||
AutoTokenizer,
|
AutoTokenizer,
|
||||||
@@ -430,7 +430,8 @@ def main():
|
|||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
repo_name = training_args.hub_model_id
|
repo_name = training_args.hub_model_id
|
||||||
repo = Repository(training_args.output_dir, clone_from=repo_name)
|
create_repo(repo_name, exist_ok=True, token=training_args.hub_token)
|
||||||
|
repo = Repository(training_args.output_dir, clone_from=repo_name, token=training_args.hub_token)
|
||||||
|
|
||||||
# Get the datasets: you can either provide your own CSV/JSON training and evaluation files (see below)
|
# Get the datasets: you can either provide your own CSV/JSON training and evaluation files (see below)
|
||||||
# or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
|
# or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
|
||||||
|
|||||||
@@ -45,7 +45,7 @@ from flax import jax_utils, traverse_util
|
|||||||
from flax.jax_utils import pad_shard_unpad
|
from flax.jax_utils import pad_shard_unpad
|
||||||
from flax.training import train_state
|
from flax.training import train_state
|
||||||
from flax.training.common_utils import get_metrics, onehot, shard
|
from flax.training.common_utils import get_metrics, onehot, shard
|
||||||
from huggingface_hub import Repository
|
from huggingface_hub import Repository, create_repo
|
||||||
from transformers import (
|
from transformers import (
|
||||||
CONFIG_MAPPING,
|
CONFIG_MAPPING,
|
||||||
FLAX_MODEL_FOR_MASKED_LM_MAPPING,
|
FLAX_MODEL_FOR_MASKED_LM_MAPPING,
|
||||||
@@ -502,7 +502,8 @@ def main():
|
|||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
repo_name = training_args.hub_model_id
|
repo_name = training_args.hub_model_id
|
||||||
repo = Repository(training_args.output_dir, clone_from=repo_name)
|
create_repo(repo_name, exist_ok=True, token=training_args.hub_token)
|
||||||
|
repo = Repository(training_args.output_dir, clone_from=repo_name, token=training_args.hub_token)
|
||||||
|
|
||||||
# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
|
# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
|
||||||
# or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
|
# or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
|
||||||
|
|||||||
@@ -46,7 +46,7 @@ from flax import jax_utils, traverse_util
|
|||||||
from flax.jax_utils import pad_shard_unpad, unreplicate
|
from flax.jax_utils import pad_shard_unpad, unreplicate
|
||||||
from flax.training import train_state
|
from flax.training import train_state
|
||||||
from flax.training.common_utils import get_metrics, onehot, shard, shard_prng_key
|
from flax.training.common_utils import get_metrics, onehot, shard, shard_prng_key
|
||||||
from huggingface_hub import Repository
|
from huggingface_hub import Repository, create_repo
|
||||||
from transformers import (
|
from transformers import (
|
||||||
CONFIG_MAPPING,
|
CONFIG_MAPPING,
|
||||||
FLAX_MODEL_FOR_CAUSAL_LM_MAPPING,
|
FLAX_MODEL_FOR_CAUSAL_LM_MAPPING,
|
||||||
@@ -376,7 +376,8 @@ def main():
|
|||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
repo_name = training_args.hub_model_id
|
repo_name = training_args.hub_model_id
|
||||||
repo = Repository(training_args.output_dir, clone_from=repo_name)
|
create_repo(repo_name, exist_ok=True, token=training_args.hub_token)
|
||||||
|
repo = Repository(training_args.output_dir, clone_from=repo_name, token=training_args.hub_token)
|
||||||
|
|
||||||
# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
|
# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
|
||||||
# or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
|
# or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
|
||||||
|
|||||||
@@ -46,7 +46,7 @@ from flax import jax_utils, traverse_util
|
|||||||
from flax.jax_utils import pad_shard_unpad
|
from flax.jax_utils import pad_shard_unpad
|
||||||
from flax.training import train_state
|
from flax.training import train_state
|
||||||
from flax.training.common_utils import get_metrics, onehot, shard
|
from flax.training.common_utils import get_metrics, onehot, shard
|
||||||
from huggingface_hub import Repository
|
from huggingface_hub import Repository, create_repo
|
||||||
from transformers import (
|
from transformers import (
|
||||||
CONFIG_MAPPING,
|
CONFIG_MAPPING,
|
||||||
FLAX_MODEL_FOR_MASKED_LM_MAPPING,
|
FLAX_MODEL_FOR_MASKED_LM_MAPPING,
|
||||||
@@ -416,7 +416,8 @@ def main():
|
|||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
repo_name = training_args.hub_model_id
|
repo_name = training_args.hub_model_id
|
||||||
repo = Repository(training_args.output_dir, clone_from=repo_name)
|
create_repo(repo_name, exist_ok=True, token=training_args.hub_token)
|
||||||
|
repo = Repository(training_args.output_dir, clone_from=repo_name, token=training_args.hub_token)
|
||||||
|
|
||||||
# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
|
# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
|
||||||
# or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
|
# or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
|
||||||
|
|||||||
@@ -45,7 +45,7 @@ from flax import jax_utils, traverse_util
|
|||||||
from flax.jax_utils import pad_shard_unpad
|
from flax.jax_utils import pad_shard_unpad
|
||||||
from flax.training import train_state
|
from flax.training import train_state
|
||||||
from flax.training.common_utils import get_metrics, onehot, shard
|
from flax.training.common_utils import get_metrics, onehot, shard
|
||||||
from huggingface_hub import Repository
|
from huggingface_hub import Repository, create_repo
|
||||||
from transformers import (
|
from transformers import (
|
||||||
CONFIG_MAPPING,
|
CONFIG_MAPPING,
|
||||||
FLAX_MODEL_FOR_MASKED_LM_MAPPING,
|
FLAX_MODEL_FOR_MASKED_LM_MAPPING,
|
||||||
@@ -542,7 +542,8 @@ def main():
|
|||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
repo_name = training_args.hub_model_id
|
repo_name = training_args.hub_model_id
|
||||||
repo = Repository(training_args.output_dir, clone_from=repo_name)
|
create_repo(repo_name, exist_ok=True, token=training_args.hub_token)
|
||||||
|
repo = Repository(training_args.output_dir, clone_from=repo_name, token=training_args.hub_token)
|
||||||
|
|
||||||
# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
|
# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
|
||||||
# or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
|
# or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
|
||||||
|
|||||||
@@ -44,7 +44,7 @@ from flax import struct, traverse_util
|
|||||||
from flax.jax_utils import pad_shard_unpad, replicate, unreplicate
|
from flax.jax_utils import pad_shard_unpad, replicate, unreplicate
|
||||||
from flax.training import train_state
|
from flax.training import train_state
|
||||||
from flax.training.common_utils import get_metrics, onehot, shard
|
from flax.training.common_utils import get_metrics, onehot, shard
|
||||||
from huggingface_hub import Repository
|
from huggingface_hub import Repository, create_repo
|
||||||
from transformers import (
|
from transformers import (
|
||||||
AutoConfig,
|
AutoConfig,
|
||||||
AutoTokenizer,
|
AutoTokenizer,
|
||||||
@@ -467,7 +467,8 @@ def main():
|
|||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
repo_name = training_args.hub_model_id
|
repo_name = training_args.hub_model_id
|
||||||
repo = Repository(training_args.output_dir, clone_from=repo_name)
|
create_repo(repo_name, exist_ok=True, token=training_args.hub_token)
|
||||||
|
repo = Repository(training_args.output_dir, clone_from=repo_name, token=training_args.hub_token)
|
||||||
|
|
||||||
# region Load Data
|
# region Load Data
|
||||||
# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
|
# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
|
||||||
|
|||||||
@@ -46,7 +46,7 @@ from flax import jax_utils, traverse_util
|
|||||||
from flax.jax_utils import pad_shard_unpad, unreplicate
|
from flax.jax_utils import pad_shard_unpad, unreplicate
|
||||||
from flax.training import train_state
|
from flax.training import train_state
|
||||||
from flax.training.common_utils import get_metrics, onehot, shard, shard_prng_key
|
from flax.training.common_utils import get_metrics, onehot, shard, shard_prng_key
|
||||||
from huggingface_hub import Repository
|
from huggingface_hub import Repository, create_repo
|
||||||
from transformers import (
|
from transformers import (
|
||||||
CONFIG_MAPPING,
|
CONFIG_MAPPING,
|
||||||
FLAX_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING,
|
FLAX_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING,
|
||||||
@@ -450,7 +450,8 @@ def main():
|
|||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
repo_name = training_args.hub_model_id
|
repo_name = training_args.hub_model_id
|
||||||
repo = Repository(training_args.output_dir, clone_from=repo_name)
|
create_repo(repo_name, exist_ok=True, token=training_args.hub_token)
|
||||||
|
repo = Repository(training_args.output_dir, clone_from=repo_name, token=training_args.hub_token)
|
||||||
|
|
||||||
# Get the datasets: you can either provide your own CSV/JSON training and evaluation files (see below)
|
# Get the datasets: you can either provide your own CSV/JSON training and evaluation files (see below)
|
||||||
# or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
|
# or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
|
||||||
|
|||||||
@@ -39,7 +39,7 @@ from flax import struct, traverse_util
|
|||||||
from flax.jax_utils import pad_shard_unpad, replicate, unreplicate
|
from flax.jax_utils import pad_shard_unpad, replicate, unreplicate
|
||||||
from flax.training import train_state
|
from flax.training import train_state
|
||||||
from flax.training.common_utils import get_metrics, onehot, shard
|
from flax.training.common_utils import get_metrics, onehot, shard
|
||||||
from huggingface_hub import Repository
|
from huggingface_hub import Repository, create_repo
|
||||||
from transformers import (
|
from transformers import (
|
||||||
AutoConfig,
|
AutoConfig,
|
||||||
AutoTokenizer,
|
AutoTokenizer,
|
||||||
@@ -350,7 +350,8 @@ def main():
|
|||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
repo_name = training_args.hub_model_id
|
repo_name = training_args.hub_model_id
|
||||||
repo = Repository(training_args.output_dir, clone_from=repo_name)
|
create_repo(repo_name, exist_ok=True, token=training_args.hub_token)
|
||||||
|
repo = Repository(training_args.output_dir, clone_from=repo_name, token=training_args.hub_token)
|
||||||
|
|
||||||
# Get the datasets: you can either provide your own CSV/JSON training and evaluation files (see below)
|
# Get the datasets: you can either provide your own CSV/JSON training and evaluation files (see below)
|
||||||
# or specify a GLUE benchmark task (the dataset will be downloaded automatically from the datasets Hub).
|
# or specify a GLUE benchmark task (the dataset will be downloaded automatically from the datasets Hub).
|
||||||
|
|||||||
@@ -41,7 +41,7 @@ from flax import struct, traverse_util
|
|||||||
from flax.jax_utils import pad_shard_unpad, replicate, unreplicate
|
from flax.jax_utils import pad_shard_unpad, replicate, unreplicate
|
||||||
from flax.training import train_state
|
from flax.training import train_state
|
||||||
from flax.training.common_utils import get_metrics, onehot, shard
|
from flax.training.common_utils import get_metrics, onehot, shard
|
||||||
from huggingface_hub import Repository
|
from huggingface_hub import Repository, create_repo
|
||||||
from transformers import (
|
from transformers import (
|
||||||
AutoConfig,
|
AutoConfig,
|
||||||
AutoTokenizer,
|
AutoTokenizer,
|
||||||
@@ -406,7 +406,8 @@ def main():
|
|||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
repo_name = training_args.hub_model_id
|
repo_name = training_args.hub_model_id
|
||||||
repo = Repository(training_args.output_dir, clone_from=repo_name)
|
create_repo(repo_name, exist_ok=True, token=training_args.hub_token)
|
||||||
|
repo = Repository(training_args.output_dir, clone_from=repo_name, token=training_args.hub_token)
|
||||||
|
|
||||||
# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
|
# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
|
||||||
# or just provide the name of one of the public datasets for token classification task available on the hub at https://huggingface.co/datasets/
|
# or just provide the name of one of the public datasets for token classification task available on the hub at https://huggingface.co/datasets/
|
||||||
|
|||||||
@@ -43,7 +43,7 @@ from flax import jax_utils
|
|||||||
from flax.jax_utils import pad_shard_unpad, unreplicate
|
from flax.jax_utils import pad_shard_unpad, unreplicate
|
||||||
from flax.training import train_state
|
from flax.training import train_state
|
||||||
from flax.training.common_utils import get_metrics, onehot, shard, shard_prng_key
|
from flax.training.common_utils import get_metrics, onehot, shard, shard_prng_key
|
||||||
from huggingface_hub import Repository
|
from huggingface_hub import Repository, create_repo
|
||||||
from transformers import (
|
from transformers import (
|
||||||
CONFIG_MAPPING,
|
CONFIG_MAPPING,
|
||||||
FLAX_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
|
FLAX_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
|
||||||
@@ -298,7 +298,8 @@ def main():
|
|||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
repo_name = training_args.hub_model_id
|
repo_name = training_args.hub_model_id
|
||||||
repo = Repository(training_args.output_dir, clone_from=repo_name)
|
create_repo(repo_name, exist_ok=True, token=training_args.hub_token)
|
||||||
|
repo = Repository(training_args.output_dir, clone_from=repo_name, token=training_args.hub_token)
|
||||||
|
|
||||||
# Initialize datasets and pre-processing transforms
|
# Initialize datasets and pre-processing transforms
|
||||||
# We use torchvision here for faster pre-processing
|
# We use torchvision here for faster pre-processing
|
||||||
|
|||||||
@@ -40,7 +40,7 @@ import transformers
|
|||||||
from accelerate import Accelerator
|
from accelerate import Accelerator
|
||||||
from accelerate.logging import get_logger
|
from accelerate.logging import get_logger
|
||||||
from accelerate.utils import set_seed
|
from accelerate.utils import set_seed
|
||||||
from huggingface_hub import Repository
|
from huggingface_hub import Repository, create_repo
|
||||||
from transformers import (
|
from transformers import (
|
||||||
AutoConfig,
|
AutoConfig,
|
||||||
AutoFeatureExtractor,
|
AutoFeatureExtractor,
|
||||||
@@ -246,7 +246,8 @@ def main():
|
|||||||
repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token)
|
repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token)
|
||||||
else:
|
else:
|
||||||
repo_name = args.hub_model_id
|
repo_name = args.hub_model_id
|
||||||
repo = Repository(args.output_dir, clone_from=repo_name)
|
create_repo(repo_name, exist_ok=True, token=args.hub_token)
|
||||||
|
repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token)
|
||||||
|
|
||||||
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
|
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
|
||||||
if "step_*" not in gitignore:
|
if "step_*" not in gitignore:
|
||||||
|
|||||||
@@ -41,7 +41,7 @@ import transformers
|
|||||||
from accelerate import Accelerator, DistributedType
|
from accelerate import Accelerator, DistributedType
|
||||||
from accelerate.logging import get_logger
|
from accelerate.logging import get_logger
|
||||||
from accelerate.utils import set_seed
|
from accelerate.utils import set_seed
|
||||||
from huggingface_hub import Repository
|
from huggingface_hub import Repository, create_repo
|
||||||
from transformers import (
|
from transformers import (
|
||||||
CONFIG_MAPPING,
|
CONFIG_MAPPING,
|
||||||
MODEL_MAPPING,
|
MODEL_MAPPING,
|
||||||
@@ -282,7 +282,8 @@ def main():
|
|||||||
repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token)
|
repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token)
|
||||||
else:
|
else:
|
||||||
repo_name = args.hub_model_id
|
repo_name = args.hub_model_id
|
||||||
repo = Repository(args.output_dir, clone_from=repo_name)
|
create_repo(repo_name, exist_ok=True, token=args.hub_token)
|
||||||
|
repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token)
|
||||||
|
|
||||||
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
|
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
|
||||||
if "step_*" not in gitignore:
|
if "step_*" not in gitignore:
|
||||||
|
|||||||
@@ -41,7 +41,7 @@ import transformers
|
|||||||
from accelerate import Accelerator, DistributedType
|
from accelerate import Accelerator, DistributedType
|
||||||
from accelerate.logging import get_logger
|
from accelerate.logging import get_logger
|
||||||
from accelerate.utils import set_seed
|
from accelerate.utils import set_seed
|
||||||
from huggingface_hub import Repository
|
from huggingface_hub import Repository, create_repo
|
||||||
from transformers import (
|
from transformers import (
|
||||||
CONFIG_MAPPING,
|
CONFIG_MAPPING,
|
||||||
MODEL_MAPPING,
|
MODEL_MAPPING,
|
||||||
@@ -291,7 +291,8 @@ def main():
|
|||||||
repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token)
|
repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token)
|
||||||
else:
|
else:
|
||||||
repo_name = args.hub_model_id
|
repo_name = args.hub_model_id
|
||||||
repo = Repository(args.output_dir, clone_from=repo_name)
|
create_repo(repo_name, exist_ok=True, token=args.hub_token)
|
||||||
|
repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token)
|
||||||
|
|
||||||
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
|
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
|
||||||
if "step_*" not in gitignore:
|
if "step_*" not in gitignore:
|
||||||
|
|||||||
@@ -40,7 +40,7 @@ import transformers
|
|||||||
from accelerate import Accelerator
|
from accelerate import Accelerator
|
||||||
from accelerate.logging import get_logger
|
from accelerate.logging import get_logger
|
||||||
from accelerate.utils import set_seed
|
from accelerate.utils import set_seed
|
||||||
from huggingface_hub import Repository
|
from huggingface_hub import Repository, create_repo
|
||||||
from transformers import (
|
from transformers import (
|
||||||
CONFIG_MAPPING,
|
CONFIG_MAPPING,
|
||||||
MODEL_MAPPING,
|
MODEL_MAPPING,
|
||||||
@@ -317,7 +317,8 @@ def main():
|
|||||||
repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token)
|
repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token)
|
||||||
else:
|
else:
|
||||||
repo_name = args.hub_model_id
|
repo_name = args.hub_model_id
|
||||||
repo = Repository(args.output_dir, clone_from=repo_name)
|
create_repo(repo_name, exist_ok=True, token=args.hub_token)
|
||||||
|
repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token)
|
||||||
|
|
||||||
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
|
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
|
||||||
if "step_*" not in gitignore:
|
if "step_*" not in gitignore:
|
||||||
|
|||||||
@@ -38,7 +38,7 @@ import transformers
|
|||||||
from accelerate import Accelerator
|
from accelerate import Accelerator
|
||||||
from accelerate.logging import get_logger
|
from accelerate.logging import get_logger
|
||||||
from accelerate.utils import set_seed
|
from accelerate.utils import set_seed
|
||||||
from huggingface_hub import Repository
|
from huggingface_hub import Repository, create_repo
|
||||||
from transformers import (
|
from transformers import (
|
||||||
AdamW,
|
AdamW,
|
||||||
DataCollatorWithPadding,
|
DataCollatorWithPadding,
|
||||||
@@ -332,7 +332,8 @@ def main():
|
|||||||
repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token)
|
repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token)
|
||||||
else:
|
else:
|
||||||
repo_name = args.hub_model_id
|
repo_name = args.hub_model_id
|
||||||
repo = Repository(args.output_dir, clone_from=repo_name)
|
create_repo(repo_name, exist_ok=True, token=args.hub_token)
|
||||||
|
repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token)
|
||||||
|
|
||||||
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
|
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
|
||||||
if "step_*" not in gitignore:
|
if "step_*" not in gitignore:
|
||||||
|
|||||||
@@ -38,7 +38,7 @@ import transformers
|
|||||||
from accelerate import Accelerator
|
from accelerate import Accelerator
|
||||||
from accelerate.logging import get_logger
|
from accelerate.logging import get_logger
|
||||||
from accelerate.utils import set_seed
|
from accelerate.utils import set_seed
|
||||||
from huggingface_hub import Repository
|
from huggingface_hub import Repository, create_repo
|
||||||
from transformers import (
|
from transformers import (
|
||||||
CONFIG_MAPPING,
|
CONFIG_MAPPING,
|
||||||
MODEL_MAPPING,
|
MODEL_MAPPING,
|
||||||
@@ -370,7 +370,8 @@ def main():
|
|||||||
repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token)
|
repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token)
|
||||||
else:
|
else:
|
||||||
repo_name = args.hub_model_id
|
repo_name = args.hub_model_id
|
||||||
repo = Repository(args.output_dir, clone_from=repo_name)
|
create_repo(repo_name, exist_ok=True, token=args.hub_token)
|
||||||
|
repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token)
|
||||||
|
|
||||||
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
|
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
|
||||||
if "step_*" not in gitignore:
|
if "step_*" not in gitignore:
|
||||||
|
|||||||
@@ -36,7 +36,7 @@ import transformers
|
|||||||
from accelerate import Accelerator
|
from accelerate import Accelerator
|
||||||
from accelerate.logging import get_logger
|
from accelerate.logging import get_logger
|
||||||
from accelerate.utils import set_seed
|
from accelerate.utils import set_seed
|
||||||
from huggingface_hub import Repository, hf_hub_download
|
from huggingface_hub import Repository, create_repo, hf_hub_download
|
||||||
from transformers import (
|
from transformers import (
|
||||||
AutoConfig,
|
AutoConfig,
|
||||||
AutoFeatureExtractor,
|
AutoFeatureExtractor,
|
||||||
@@ -354,7 +354,8 @@ def main():
|
|||||||
repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token)
|
repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token)
|
||||||
else:
|
else:
|
||||||
repo_name = args.hub_model_id
|
repo_name = args.hub_model_id
|
||||||
repo = Repository(args.output_dir, clone_from=repo_name)
|
create_repo(repo_name, exist_ok=True, token=args.hub_token)
|
||||||
|
repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token)
|
||||||
|
|
||||||
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
|
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
|
||||||
if "step_*" not in gitignore:
|
if "step_*" not in gitignore:
|
||||||
|
|||||||
@@ -31,7 +31,7 @@ from tqdm.auto import tqdm
|
|||||||
import transformers
|
import transformers
|
||||||
from accelerate import Accelerator
|
from accelerate import Accelerator
|
||||||
from accelerate.logging import get_logger
|
from accelerate.logging import get_logger
|
||||||
from huggingface_hub import Repository
|
from huggingface_hub import Repository, create_repo
|
||||||
from transformers import (
|
from transformers import (
|
||||||
AdamW,
|
AdamW,
|
||||||
SchedulerType,
|
SchedulerType,
|
||||||
@@ -422,7 +422,8 @@ def main():
|
|||||||
repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token)
|
repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token)
|
||||||
else:
|
else:
|
||||||
repo_name = args.hub_model_id
|
repo_name = args.hub_model_id
|
||||||
repo = Repository(args.output_dir, clone_from=repo_name)
|
create_repo(repo_name, exist_ok=True, token=args.hub_token)
|
||||||
|
repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token)
|
||||||
elif args.output_dir is not None:
|
elif args.output_dir is not None:
|
||||||
os.makedirs(args.output_dir, exist_ok=True)
|
os.makedirs(args.output_dir, exist_ok=True)
|
||||||
accelerator.wait_for_everyone()
|
accelerator.wait_for_everyone()
|
||||||
|
|||||||
@@ -40,7 +40,7 @@ from accelerate import Accelerator
|
|||||||
from accelerate.logging import get_logger
|
from accelerate.logging import get_logger
|
||||||
from accelerate.utils import set_seed
|
from accelerate.utils import set_seed
|
||||||
from filelock import FileLock
|
from filelock import FileLock
|
||||||
from huggingface_hub import Repository
|
from huggingface_hub import Repository, create_repo
|
||||||
from transformers import (
|
from transformers import (
|
||||||
CONFIG_MAPPING,
|
CONFIG_MAPPING,
|
||||||
MODEL_MAPPING,
|
MODEL_MAPPING,
|
||||||
@@ -373,7 +373,8 @@ def main():
|
|||||||
repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token)
|
repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token)
|
||||||
else:
|
else:
|
||||||
repo_name = args.hub_model_id
|
repo_name = args.hub_model_id
|
||||||
repo = Repository(args.output_dir, clone_from=repo_name)
|
create_repo(repo_name, exist_ok=True, token=args.hub_token)
|
||||||
|
repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token)
|
||||||
|
|
||||||
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
|
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
|
||||||
if "step_*" not in gitignore:
|
if "step_*" not in gitignore:
|
||||||
|
|||||||
@@ -32,7 +32,7 @@ import transformers
|
|||||||
from accelerate import Accelerator
|
from accelerate import Accelerator
|
||||||
from accelerate.logging import get_logger
|
from accelerate.logging import get_logger
|
||||||
from accelerate.utils import set_seed
|
from accelerate.utils import set_seed
|
||||||
from huggingface_hub import Repository
|
from huggingface_hub import Repository, create_repo
|
||||||
from transformers import (
|
from transformers import (
|
||||||
AutoConfig,
|
AutoConfig,
|
||||||
AutoModelForSequenceClassification,
|
AutoModelForSequenceClassification,
|
||||||
@@ -244,7 +244,8 @@ def main():
|
|||||||
repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token)
|
repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token)
|
||||||
else:
|
else:
|
||||||
repo_name = args.hub_model_id
|
repo_name = args.hub_model_id
|
||||||
repo = Repository(args.output_dir, clone_from=repo_name)
|
create_repo(repo_name, exist_ok=True, token=args.hub_token)
|
||||||
|
repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token)
|
||||||
|
|
||||||
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
|
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
|
||||||
if "step_*" not in gitignore:
|
if "step_*" not in gitignore:
|
||||||
|
|||||||
@@ -37,7 +37,7 @@ import transformers
|
|||||||
from accelerate import Accelerator
|
from accelerate import Accelerator
|
||||||
from accelerate.logging import get_logger
|
from accelerate.logging import get_logger
|
||||||
from accelerate.utils import set_seed
|
from accelerate.utils import set_seed
|
||||||
from huggingface_hub import Repository
|
from huggingface_hub import Repository, create_repo
|
||||||
from transformers import (
|
from transformers import (
|
||||||
CONFIG_MAPPING,
|
CONFIG_MAPPING,
|
||||||
MODEL_MAPPING,
|
MODEL_MAPPING,
|
||||||
@@ -298,7 +298,8 @@ def main():
|
|||||||
repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token)
|
repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token)
|
||||||
else:
|
else:
|
||||||
repo_name = args.hub_model_id
|
repo_name = args.hub_model_id
|
||||||
repo = Repository(args.output_dir, clone_from=repo_name)
|
create_repo(repo_name, exist_ok=True, token=args.hub_token)
|
||||||
|
repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token)
|
||||||
|
|
||||||
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
|
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
|
||||||
if "step_*" not in gitignore:
|
if "step_*" not in gitignore:
|
||||||
|
|||||||
@@ -38,7 +38,7 @@ import transformers
|
|||||||
from accelerate import Accelerator
|
from accelerate import Accelerator
|
||||||
from accelerate.logging import get_logger
|
from accelerate.logging import get_logger
|
||||||
from accelerate.utils import set_seed
|
from accelerate.utils import set_seed
|
||||||
from huggingface_hub import Repository
|
from huggingface_hub import Repository, create_repo
|
||||||
from transformers import (
|
from transformers import (
|
||||||
CONFIG_MAPPING,
|
CONFIG_MAPPING,
|
||||||
MODEL_MAPPING,
|
MODEL_MAPPING,
|
||||||
@@ -345,7 +345,8 @@ def main():
|
|||||||
repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token)
|
repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token)
|
||||||
else:
|
else:
|
||||||
repo_name = args.hub_model_id
|
repo_name = args.hub_model_id
|
||||||
repo = Repository(args.output_dir, clone_from=repo_name)
|
create_repo(repo_name, exist_ok=True, token=args.hub_token)
|
||||||
|
repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token)
|
||||||
|
|
||||||
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
|
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
|
||||||
if "step_*" not in gitignore:
|
if "step_*" not in gitignore:
|
||||||
|
|||||||
2
setup.py
2
setup.py
@@ -117,7 +117,7 @@ _deps = [
|
|||||||
"fugashi>=1.0",
|
"fugashi>=1.0",
|
||||||
"GitPython<3.1.19",
|
"GitPython<3.1.19",
|
||||||
"hf-doc-builder>=0.3.0",
|
"hf-doc-builder>=0.3.0",
|
||||||
"huggingface-hub>=0.10.0,<1.0",
|
"huggingface-hub>=0.11.0,<1.0",
|
||||||
"importlib_metadata",
|
"importlib_metadata",
|
||||||
"ipadic>=1.0.0,<2.0",
|
"ipadic>=1.0.0,<2.0",
|
||||||
"isort>=5.5.4",
|
"isort>=5.5.4",
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ deps = {
|
|||||||
"fugashi": "fugashi>=1.0",
|
"fugashi": "fugashi>=1.0",
|
||||||
"GitPython": "GitPython<3.1.19",
|
"GitPython": "GitPython<3.1.19",
|
||||||
"hf-doc-builder": "hf-doc-builder>=0.3.0",
|
"hf-doc-builder": "hf-doc-builder>=0.3.0",
|
||||||
"huggingface-hub": "huggingface-hub>=0.10.0,<1.0",
|
"huggingface-hub": "huggingface-hub>=0.11.0,<1.0",
|
||||||
"importlib_metadata": "importlib_metadata",
|
"importlib_metadata": "importlib_metadata",
|
||||||
"ipadic": "ipadic>=1.0.0,<2.0",
|
"ipadic": "ipadic>=1.0.0,<2.0",
|
||||||
"isort": "isort>=5.5.4",
|
"isort": "isort>=5.5.4",
|
||||||
|
|||||||
@@ -340,11 +340,7 @@ class PushToHubCallback(Callback):
|
|||||||
self.output_dir = output_dir
|
self.output_dir = output_dir
|
||||||
self.hub_model_id = hub_model_id
|
self.hub_model_id = hub_model_id
|
||||||
create_repo(self.hub_model_id, exist_ok=True)
|
create_repo(self.hub_model_id, exist_ok=True)
|
||||||
self.repo = Repository(
|
self.repo = Repository(str(self.output_dir), clone_from=self.hub_model_id, token=hub_token)
|
||||||
str(self.output_dir),
|
|
||||||
clone_from=self.hub_model_id,
|
|
||||||
use_auth_token=hub_token if hub_token else True,
|
|
||||||
)
|
|
||||||
|
|
||||||
self.tokenizer = tokenizer
|
self.tokenizer = tokenizer
|
||||||
self.last_job = None
|
self.last_job = None
|
||||||
|
|||||||
@@ -60,7 +60,7 @@ from torch import nn
|
|||||||
from torch.utils.data import DataLoader, Dataset, RandomSampler, SequentialSampler
|
from torch.utils.data import DataLoader, Dataset, RandomSampler, SequentialSampler
|
||||||
from torch.utils.data.distributed import DistributedSampler
|
from torch.utils.data.distributed import DistributedSampler
|
||||||
|
|
||||||
from huggingface_hub import Repository
|
from huggingface_hub import Repository, create_repo
|
||||||
|
|
||||||
from . import __version__
|
from . import __version__
|
||||||
from .configuration_utils import PretrainedConfig
|
from .configuration_utils import PretrainedConfig
|
||||||
@@ -3315,7 +3315,6 @@ class Trainer:
|
|||||||
"""
|
"""
|
||||||
if not self.is_world_process_zero():
|
if not self.is_world_process_zero():
|
||||||
return
|
return
|
||||||
use_auth_token = True if self.args.hub_token is None else self.args.hub_token
|
|
||||||
if self.args.hub_model_id is None:
|
if self.args.hub_model_id is None:
|
||||||
repo_name = Path(self.args.output_dir).absolute().name
|
repo_name = Path(self.args.output_dir).absolute().name
|
||||||
else:
|
else:
|
||||||
@@ -3323,22 +3322,15 @@ class Trainer:
|
|||||||
if "/" not in repo_name:
|
if "/" not in repo_name:
|
||||||
repo_name = get_full_repo_name(repo_name, token=self.args.hub_token)
|
repo_name = get_full_repo_name(repo_name, token=self.args.hub_token)
|
||||||
|
|
||||||
|
# Make sure the repo exists.
|
||||||
|
create_repo(repo_name, token=self.args.hub_token, private=self.args.hub_private_repo, exist_ok=True)
|
||||||
try:
|
try:
|
||||||
self.repo = Repository(
|
self.repo = Repository(self.args.output_dir, clone_from=repo_name, token=self.args.hub_token)
|
||||||
self.args.output_dir,
|
|
||||||
clone_from=repo_name,
|
|
||||||
use_auth_token=use_auth_token,
|
|
||||||
private=self.args.hub_private_repo,
|
|
||||||
)
|
|
||||||
except EnvironmentError:
|
except EnvironmentError:
|
||||||
if self.args.overwrite_output_dir and at_init:
|
if self.args.overwrite_output_dir and at_init:
|
||||||
# Try again after wiping output_dir
|
# Try again after wiping output_dir
|
||||||
shutil.rmtree(self.args.output_dir)
|
shutil.rmtree(self.args.output_dir)
|
||||||
self.repo = Repository(
|
self.repo = Repository(self.args.output_dir, clone_from=repo_name, token=self.args.hub_token)
|
||||||
self.args.output_dir,
|
|
||||||
clone_from=repo_name,
|
|
||||||
use_auth_token=use_auth_token,
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ import unittest
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from shutil import copyfile
|
from shutil import copyfile
|
||||||
|
|
||||||
from huggingface_hub import HfFolder, Repository, delete_repo, set_access_token
|
from huggingface_hub import HfFolder, Repository, create_repo, delete_repo, set_access_token
|
||||||
from requests.exceptions import HTTPError
|
from requests.exceptions import HTTPError
|
||||||
from transformers import (
|
from transformers import (
|
||||||
CONFIG_MAPPING,
|
CONFIG_MAPPING,
|
||||||
@@ -282,7 +282,8 @@ class ProcessorPushToHubTester(unittest.TestCase):
|
|||||||
processor = CustomProcessor(feature_extractor, tokenizer)
|
processor = CustomProcessor(feature_extractor, tokenizer)
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||||
repo = Repository(tmp_dir, clone_from=f"{USER}/test-dynamic-processor", use_auth_token=self._token)
|
create_repo(f"{USER}/test-dynamic-processor", token=self._token)
|
||||||
|
repo = Repository(tmp_dir, clone_from=f"{USER}/test-dynamic-processor", token=self._token)
|
||||||
processor.save_pretrained(tmp_dir)
|
processor.save_pretrained(tmp_dir)
|
||||||
|
|
||||||
# This has added the proper auto_map field to the feature extractor config
|
# This has added the proper auto_map field to the feature extractor config
|
||||||
|
|||||||
@@ -29,7 +29,7 @@ from unittest import skipIf
|
|||||||
import datasets
|
import datasets
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from huggingface_hub import HfFolder, Repository, delete_repo, set_access_token
|
from huggingface_hub import HfFolder, Repository, create_repo, delete_repo, set_access_token
|
||||||
from requests.exceptions import HTTPError
|
from requests.exceptions import HTTPError
|
||||||
from transformers import (
|
from transformers import (
|
||||||
FEATURE_EXTRACTOR_MAPPING,
|
FEATURE_EXTRACTOR_MAPPING,
|
||||||
@@ -1023,7 +1023,8 @@ class DynamicPipelineTester(unittest.TestCase):
|
|||||||
model = BertForSequenceClassification(config).eval()
|
model = BertForSequenceClassification(config).eval()
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||||
repo = Repository(tmp_dir, clone_from=f"{USER}/test-dynamic-pipeline", use_auth_token=self._token)
|
create_repo(f"{USER}/test-dynamic-pipeline", token=self._token)
|
||||||
|
repo = Repository(tmp_dir, clone_from=f"{USER}/test-dynamic-pipeline", token=self._token)
|
||||||
|
|
||||||
vocab_file = os.path.join(tmp_dir, "vocab.txt")
|
vocab_file = os.path.join(tmp_dir, "vocab.txt")
|
||||||
with open(vocab_file, "w", encoding="utf-8") as vocab_writer:
|
with open(vocab_file, "w", encoding="utf-8") as vocab_writer:
|
||||||
|
|||||||
@@ -2079,7 +2079,7 @@ class TrainerIntegrationWithHubTester(unittest.TestCase):
|
|||||||
time.sleep(0.5)
|
time.sleep(0.5)
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||||
_ = Repository(tmp_dir, clone_from=f"{USER}/test-trainer-epoch", use_auth_token=self._token)
|
_ = Repository(tmp_dir, clone_from=f"{USER}/test-trainer-epoch", token=self._token)
|
||||||
commits = self.get_commit_history(tmp_dir)
|
commits = self.get_commit_history(tmp_dir)
|
||||||
self.assertIn("initial commit", commits)
|
self.assertIn("initial commit", commits)
|
||||||
# We can't test that epoch 2 and 3 are in the commits without being flaky as those might be skipped if
|
# We can't test that epoch 2 and 3 are in the commits without being flaky as those might be skipped if
|
||||||
@@ -2106,7 +2106,7 @@ class TrainerIntegrationWithHubTester(unittest.TestCase):
|
|||||||
time.sleep(0.5)
|
time.sleep(0.5)
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||||
_ = Repository(tmp_dir, clone_from=f"{USER}/test-trainer-step", use_auth_token=self._token)
|
_ = Repository(tmp_dir, clone_from=f"{USER}/test-trainer-step", token=self._token)
|
||||||
commits = self.get_commit_history(tmp_dir)
|
commits = self.get_commit_history(tmp_dir)
|
||||||
self.assertIn("initial commit", commits)
|
self.assertIn("initial commit", commits)
|
||||||
# We can't test that epoch 2 and 3 are in the commits without being flaky as those might be skipped if
|
# We can't test that epoch 2 and 3 are in the commits without being flaky as those might be skipped if
|
||||||
|
|||||||
@@ -214,9 +214,7 @@ def update_metadata(token, commit_sha):
|
|||||||
Update the metadata for the Transformers repo.
|
Update the metadata for the Transformers repo.
|
||||||
"""
|
"""
|
||||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||||
repo = Repository(
|
repo = Repository(tmp_dir, clone_from="huggingface/transformers-metadata", repo_type="dataset", token=token)
|
||||||
tmp_dir, clone_from="huggingface/transformers-metadata", repo_type="dataset", use_auth_token=token
|
|
||||||
)
|
|
||||||
|
|
||||||
frameworks_table = get_frameworks_table()
|
frameworks_table = get_frameworks_table()
|
||||||
frameworks_dataset = Dataset.from_pandas(frameworks_table)
|
frameworks_dataset = Dataset.from_pandas(frameworks_table)
|
||||||
|
|||||||
Reference in New Issue
Block a user