Update legacy Repository usage in various example files (#29085)

* Update legacy Repository usage in `examples/pytorch/text-classification/run_glue_no_trainer.py`

Marked for deprecation here https://huggingface.co/docs/huggingface_hub/guides/upload#legacy-upload-files-with-git-lfs

* Fix import order

* Replace all example usage of deprecated Repository

* Fix remaining repo call and rename args variable

* Revert removing creation of gitignore files and don't change research examples
This commit is contained in:
Hilco van der Wilk
2024-03-12 14:20:49 +01:00
committed by GitHub
parent f1a565a39f
commit b6404866cd
24 changed files with 338 additions and 163 deletions

View File

@@ -42,7 +42,7 @@ from flax import jax_utils, traverse_util
from flax.jax_utils import unreplicate from flax.jax_utils import unreplicate
from flax.training import train_state from flax.training import train_state
from flax.training.common_utils import get_metrics, onehot, shard, shard_prng_key from flax.training.common_utils import get_metrics, onehot, shard, shard_prng_key
from huggingface_hub import Repository, create_repo from huggingface_hub import HfApi
from PIL import Image from PIL import Image
from tqdm import tqdm from tqdm import tqdm
@@ -455,9 +455,8 @@ def main():
if repo_name is None: if repo_name is None:
repo_name = Path(training_args.output_dir).absolute().name repo_name = Path(training_args.output_dir).absolute().name
# Create repo and retrieve repo_id # Create repo and retrieve repo_id
repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id api = HfApi()
# Clone repo locally repo_id = api.create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id
repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token)
# Get the datasets: you can either provide your own CSV/JSON training and evaluation files (see below) # Get the datasets: you can either provide your own CSV/JSON training and evaluation files (see below)
# or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/ # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
@@ -1061,7 +1060,13 @@ def main():
model.save_pretrained(os.path.join(training_args.output_dir, ckpt_dir), params=params) model.save_pretrained(os.path.join(training_args.output_dir, ckpt_dir), params=params)
tokenizer.save_pretrained(os.path.join(training_args.output_dir, ckpt_dir)) tokenizer.save_pretrained(os.path.join(training_args.output_dir, ckpt_dir))
if training_args.push_to_hub: if training_args.push_to_hub:
repo.push_to_hub(commit_message=commit_msg, blocking=False) api.upload_folder(
commit_message=commit_msg,
folder_path=training_args.output_dir,
repo_id=repo_id,
repo_type="model",
token=training_args.hub_token,
)
def evaluation_loop( def evaluation_loop(
rng: jax.random.PRNGKey, rng: jax.random.PRNGKey,

View File

@@ -44,7 +44,7 @@ from flax import jax_utils, traverse_util
from flax.jax_utils import pad_shard_unpad from flax.jax_utils import pad_shard_unpad
from flax.training import train_state from flax.training import train_state
from flax.training.common_utils import get_metrics, onehot, shard from flax.training.common_utils import get_metrics, onehot, shard
from huggingface_hub import Repository, create_repo from huggingface_hub import HfApi
from tqdm import tqdm from tqdm import tqdm
from transformers import ( from transformers import (
@@ -517,9 +517,8 @@ def main():
if repo_name is None: if repo_name is None:
repo_name = Path(training_args.output_dir).absolute().name repo_name = Path(training_args.output_dir).absolute().name
# Create repo and retrieve repo_id # Create repo and retrieve repo_id
repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id api = HfApi()
# Clone repo locally repo_id = api.create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id
repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token)
# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below) # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
# or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/ # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
@@ -949,7 +948,13 @@ def main():
model.save_pretrained(training_args.output_dir, params=params) model.save_pretrained(training_args.output_dir, params=params)
tokenizer.save_pretrained(training_args.output_dir) tokenizer.save_pretrained(training_args.output_dir)
if training_args.push_to_hub: if training_args.push_to_hub:
repo.push_to_hub(commit_message=f"Saving weights and logs of step {cur_step}", blocking=False) api.upload_folder(
commit_message=f"Saving weights and logs of step {cur_step}",
folder_path=training_args.output_dir,
repo_id=repo_id,
repo_type="model",
token=training_args.hub_token,
)
# Eval after training # Eval after training
if training_args.do_eval: if training_args.do_eval:

View File

@@ -44,7 +44,7 @@ from flax import jax_utils, traverse_util
from flax.jax_utils import pad_shard_unpad, unreplicate from flax.jax_utils import pad_shard_unpad, unreplicate
from flax.training import train_state from flax.training import train_state
from flax.training.common_utils import get_metrics, onehot, shard, shard_prng_key from flax.training.common_utils import get_metrics, onehot, shard, shard_prng_key
from huggingface_hub import Repository, create_repo from huggingface_hub import HfApi
from tqdm import tqdm from tqdm import tqdm
import transformers import transformers
@@ -403,9 +403,8 @@ def main():
if repo_name is None: if repo_name is None:
repo_name = Path(training_args.output_dir).absolute().name repo_name = Path(training_args.output_dir).absolute().name
# Create repo and retrieve repo_id # Create repo and retrieve repo_id
repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id api = HfApi()
# Clone repo locally repo_id = api.create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id
repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token)
# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below) # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
# or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/ # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
@@ -847,8 +846,13 @@ def main():
model.save_pretrained(training_args.output_dir, params=params) model.save_pretrained(training_args.output_dir, params=params)
tokenizer.save_pretrained(training_args.output_dir) tokenizer.save_pretrained(training_args.output_dir)
if training_args.push_to_hub: if training_args.push_to_hub:
repo.push_to_hub(commit_message=f"Saving weights and logs of step {cur_step}", blocking=False) api.upload_folder(
commit_message=f"Saving weights and logs of step {cur_step}",
folder_path=training_args.output_dir,
repo_id=repo_id,
repo_type="model",
token=training_args.hub_token,
)
# Eval after training # Eval after training
if training_args.do_eval: if training_args.do_eval:
eval_metrics = [] eval_metrics = []

View File

@@ -45,7 +45,7 @@ from flax import jax_utils, traverse_util
from flax.jax_utils import pad_shard_unpad from flax.jax_utils import pad_shard_unpad
from flax.training import train_state from flax.training import train_state
from flax.training.common_utils import get_metrics, onehot, shard from flax.training.common_utils import get_metrics, onehot, shard
from huggingface_hub import Repository, create_repo from huggingface_hub import HfApi
from tqdm import tqdm from tqdm import tqdm
from transformers import ( from transformers import (
@@ -441,9 +441,8 @@ def main():
if repo_name is None: if repo_name is None:
repo_name = Path(training_args.output_dir).absolute().name repo_name = Path(training_args.output_dir).absolute().name
# Create repo and retrieve repo_id # Create repo and retrieve repo_id
repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id api = HfApi()
# Clone repo locally repo_id = api.create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id
repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token)
# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below) # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
# or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/ # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
@@ -890,8 +889,13 @@ def main():
model.save_pretrained(training_args.output_dir, params=params) model.save_pretrained(training_args.output_dir, params=params)
tokenizer.save_pretrained(training_args.output_dir) tokenizer.save_pretrained(training_args.output_dir)
if training_args.push_to_hub: if training_args.push_to_hub:
repo.push_to_hub(commit_message=f"Saving weights and logs of step {cur_step}", blocking=False) api.upload_folder(
commit_message=f"Saving weights and logs of step {cur_step}",
folder_path=training_args.output_dir,
repo_id=repo_id,
repo_type="model",
token=training_args.hub_token,
)
# Eval after training # Eval after training
if training_args.do_eval: if training_args.do_eval:
num_eval_samples = len(tokenized_datasets["validation"]) num_eval_samples = len(tokenized_datasets["validation"])

View File

@@ -44,7 +44,7 @@ from flax import jax_utils, traverse_util
from flax.jax_utils import pad_shard_unpad from flax.jax_utils import pad_shard_unpad
from flax.training import train_state from flax.training import train_state
from flax.training.common_utils import get_metrics, onehot, shard from flax.training.common_utils import get_metrics, onehot, shard
from huggingface_hub import Repository, create_repo from huggingface_hub import HfApi
from tqdm import tqdm from tqdm import tqdm
from transformers import ( from transformers import (
@@ -558,9 +558,8 @@ def main():
if repo_name is None: if repo_name is None:
repo_name = Path(training_args.output_dir).absolute().name repo_name = Path(training_args.output_dir).absolute().name
# Create repo and retrieve repo_id # Create repo and retrieve repo_id
repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id api = HfApi()
# Clone repo locally repo_id = api.create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id
repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token)
# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below) # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
# or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/ # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
@@ -977,8 +976,13 @@ def main():
model.save_pretrained(training_args.output_dir, params=params) model.save_pretrained(training_args.output_dir, params=params)
tokenizer.save_pretrained(training_args.output_dir) tokenizer.save_pretrained(training_args.output_dir)
if training_args.push_to_hub: if training_args.push_to_hub:
repo.push_to_hub(commit_message=f"Saving weights and logs of step {cur_step}", blocking=False) api.upload_folder(
commit_message=f"Saving weights and logs of step {cur_step}",
folder_path=training_args.output_dir,
repo_id=repo_id,
repo_type="model",
token=training_args.hub_token,
)
# Eval after training # Eval after training
if training_args.do_eval: if training_args.do_eval:
num_eval_samples = len(tokenized_datasets["validation"]) num_eval_samples = len(tokenized_datasets["validation"])

View File

@@ -42,7 +42,7 @@ from flax import struct, traverse_util
from flax.jax_utils import pad_shard_unpad, replicate, unreplicate from flax.jax_utils import pad_shard_unpad, replicate, unreplicate
from flax.training import train_state from flax.training import train_state
from flax.training.common_utils import get_metrics, onehot, shard from flax.training.common_utils import get_metrics, onehot, shard
from huggingface_hub import Repository, create_repo from huggingface_hub import HfApi
from tqdm import tqdm from tqdm import tqdm
from utils_qa import postprocess_qa_predictions from utils_qa import postprocess_qa_predictions
@@ -493,9 +493,8 @@ def main():
if repo_name is None: if repo_name is None:
repo_name = Path(training_args.output_dir).absolute().name repo_name = Path(training_args.output_dir).absolute().name
# Create repo and retrieve repo_id # Create repo and retrieve repo_id
repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id api = HfApi()
# Clone repo locally repo_id = api.create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id
repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token)
# region Load Data # region Load Data
# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below) # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
@@ -1051,7 +1050,13 @@ def main():
model.save_pretrained(training_args.output_dir, params=params) model.save_pretrained(training_args.output_dir, params=params)
tokenizer.save_pretrained(training_args.output_dir) tokenizer.save_pretrained(training_args.output_dir)
if training_args.push_to_hub: if training_args.push_to_hub:
repo.push_to_hub(commit_message=f"Saving weights and logs of step {cur_step}", blocking=False) api.upload_folder(
commit_message=f"Saving weights and logs of step {cur_step}",
folder_path=training_args.output_dir,
repo_id=repo_id,
repo_type="model",
token=training_args.hub_token,
)
epochs.desc = f"Epoch ... {epoch + 1}/{num_epochs}" epochs.desc = f"Epoch ... {epoch + 1}/{num_epochs}"
# endregion # endregion

View File

@@ -39,7 +39,7 @@ from flax import jax_utils, traverse_util
from flax.jax_utils import pad_shard_unpad, unreplicate from flax.jax_utils import pad_shard_unpad, unreplicate
from flax.training import train_state from flax.training import train_state
from flax.training.common_utils import get_metrics, onehot, shard, shard_prng_key from flax.training.common_utils import get_metrics, onehot, shard, shard_prng_key
from huggingface_hub import Repository, create_repo from huggingface_hub import HfApi
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from tqdm import tqdm from tqdm import tqdm
@@ -427,8 +427,9 @@ def main():
) )
else: else:
repo_name = training_args.hub_model_id repo_name = training_args.hub_model_id
create_repo(repo_name, exist_ok=True, token=training_args.hub_token) # Create repo and retrieve repo_id
repo = Repository(training_args.output_dir, clone_from=repo_name, token=training_args.hub_token) api = HfApi()
repo_id = api.create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id
# 3. Load dataset # 3. Load dataset
raw_datasets = DatasetDict() raw_datasets = DatasetDict()
@@ -852,7 +853,13 @@ def main():
model.save_pretrained(training_args.output_dir, params=params) model.save_pretrained(training_args.output_dir, params=params)
tokenizer.save_pretrained(training_args.output_dir) tokenizer.save_pretrained(training_args.output_dir)
if training_args.push_to_hub: if training_args.push_to_hub:
repo.push_to_hub(commit_message=f"Saving weights and logs of epoch {epoch}", blocking=False) api.upload_folder(
commit_message=f"Saving weights and logs of epoch {epoch}",
folder_path=training_args.output_dir,
repo_id=repo_id,
repo_type="model",
token=training_args.hub_token,
)
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -44,7 +44,7 @@ from flax import jax_utils, traverse_util
from flax.jax_utils import pad_shard_unpad, unreplicate from flax.jax_utils import pad_shard_unpad, unreplicate
from flax.training import train_state from flax.training import train_state
from flax.training.common_utils import get_metrics, onehot, shard, shard_prng_key from flax.training.common_utils import get_metrics, onehot, shard, shard_prng_key
from huggingface_hub import Repository, create_repo from huggingface_hub import HfApi
from tqdm import tqdm from tqdm import tqdm
import transformers import transformers
@@ -483,9 +483,8 @@ def main():
if repo_name is None: if repo_name is None:
repo_name = Path(training_args.output_dir).absolute().name repo_name = Path(training_args.output_dir).absolute().name
# Create repo and retrieve repo_id # Create repo and retrieve repo_id
repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id api = HfApi()
# Clone repo locally repo_id = api.create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id
repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token)
# Get the datasets: you can either provide your own CSV/JSON training and evaluation files (see below) # Get the datasets: you can either provide your own CSV/JSON training and evaluation files (see below)
# or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/ # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
@@ -976,7 +975,13 @@ def main():
model.save_pretrained(training_args.output_dir, params=params) model.save_pretrained(training_args.output_dir, params=params)
tokenizer.save_pretrained(training_args.output_dir) tokenizer.save_pretrained(training_args.output_dir)
if training_args.push_to_hub: if training_args.push_to_hub:
repo.push_to_hub(commit_message=f"Saving weights and logs of epoch {epoch}", blocking=False) api.upload_folder(
commit_message=f"Saving weights and logs of epoch {epoch}",
folder_path=training_args.output_dir,
repo_id=repo_id,
repo_type="model",
token=training_args.hub_token,
)
# ======================== Prediction loop ============================== # ======================== Prediction loop ==============================
if training_args.do_predict: if training_args.do_predict:

View File

@@ -37,7 +37,7 @@ from flax import struct, traverse_util
from flax.jax_utils import pad_shard_unpad, replicate, unreplicate from flax.jax_utils import pad_shard_unpad, replicate, unreplicate
from flax.training import train_state from flax.training import train_state
from flax.training.common_utils import get_metrics, onehot, shard from flax.training.common_utils import get_metrics, onehot, shard
from huggingface_hub import Repository, create_repo from huggingface_hub import HfApi
from tqdm import tqdm from tqdm import tqdm
import transformers import transformers
@@ -373,9 +373,8 @@ def main():
if repo_name is None: if repo_name is None:
repo_name = Path(training_args.output_dir).absolute().name repo_name = Path(training_args.output_dir).absolute().name
# Create repo and retrieve repo_id # Create repo and retrieve repo_id
repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id api = HfApi()
# Clone repo locally repo_id = api.create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id
repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token)
# Get the datasets: you can either provide your own CSV/JSON training and evaluation files (see below) # Get the datasets: you can either provide your own CSV/JSON training and evaluation files (see below)
# or specify a GLUE benchmark task (the dataset will be downloaded automatically from the datasets Hub). # or specify a GLUE benchmark task (the dataset will be downloaded automatically from the datasets Hub).
@@ -677,7 +676,13 @@ def main():
model.save_pretrained(training_args.output_dir, params=params) model.save_pretrained(training_args.output_dir, params=params)
tokenizer.save_pretrained(training_args.output_dir) tokenizer.save_pretrained(training_args.output_dir)
if training_args.push_to_hub: if training_args.push_to_hub:
repo.push_to_hub(commit_message=f"Saving weights and logs of step {cur_step}", blocking=False) api.upload_folder(
commit_message=f"Saving weights and logs of epoch {epoch}",
folder_path=training_args.output_dir,
repo_id=repo_id,
repo_type="model",
token=training_args.hub_token,
)
epochs.desc = f"Epoch ... {epoch + 1}/{num_epochs}" epochs.desc = f"Epoch ... {epoch + 1}/{num_epochs}"
# save the eval metrics in json # save the eval metrics in json

View File

@@ -39,7 +39,7 @@ from flax import struct, traverse_util
from flax.jax_utils import pad_shard_unpad, replicate, unreplicate from flax.jax_utils import pad_shard_unpad, replicate, unreplicate
from flax.training import train_state from flax.training import train_state
from flax.training.common_utils import get_metrics, onehot, shard from flax.training.common_utils import get_metrics, onehot, shard
from huggingface_hub import Repository, create_repo from huggingface_hub import HfApi
from tqdm import tqdm from tqdm import tqdm
import transformers import transformers
@@ -429,9 +429,8 @@ def main():
if repo_name is None: if repo_name is None:
repo_name = Path(training_args.output_dir).absolute().name repo_name = Path(training_args.output_dir).absolute().name
# Create repo and retrieve repo_id # Create repo and retrieve repo_id
repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id api = HfApi()
# Clone repo locally repo_id = api.create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id
repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token)
# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below) # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
# or just provide the name of one of the public datasets for token classification task available on the hub at https://huggingface.co/datasets/ # or just provide the name of one of the public datasets for token classification task available on the hub at https://huggingface.co/datasets/
@@ -798,7 +797,13 @@ def main():
model.save_pretrained(training_args.output_dir, params=params) model.save_pretrained(training_args.output_dir, params=params)
tokenizer.save_pretrained(training_args.output_dir) tokenizer.save_pretrained(training_args.output_dir)
if training_args.push_to_hub: if training_args.push_to_hub:
repo.push_to_hub(commit_message=f"Saving weights and logs of step {cur_step}", blocking=False) api.upload_folder(
commit_message=f"Saving weights and logs of step {cur_step}",
folder_path=training_args.output_dir,
repo_id=repo_id,
repo_type="model",
token=training_args.hub_token,
)
epochs.desc = f"Epoch ... {epoch + 1}/{num_epochs}" epochs.desc = f"Epoch ... {epoch + 1}/{num_epochs}"
# Eval after training # Eval after training

View File

@@ -42,7 +42,7 @@ from flax import jax_utils
from flax.jax_utils import pad_shard_unpad, unreplicate from flax.jax_utils import pad_shard_unpad, unreplicate
from flax.training import train_state from flax.training import train_state
from flax.training.common_utils import get_metrics, onehot, shard, shard_prng_key from flax.training.common_utils import get_metrics, onehot, shard, shard_prng_key
from huggingface_hub import Repository, create_repo from huggingface_hub import HfApi
from tqdm import tqdm from tqdm import tqdm
import transformers import transformers
@@ -324,9 +324,8 @@ def main():
if repo_name is None: if repo_name is None:
repo_name = Path(training_args.output_dir).absolute().name repo_name = Path(training_args.output_dir).absolute().name
# Create repo and retrieve repo_id # Create repo and retrieve repo_id
repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id api = HfApi()
# Clone repo locally repo_id = api.create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id
repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token)
# Initialize datasets and pre-processing transforms # Initialize datasets and pre-processing transforms
# We use torchvision here for faster pre-processing # We use torchvision here for faster pre-processing
@@ -595,7 +594,13 @@ def main():
params = jax.device_get(jax.tree_util.tree_map(lambda x: x[0], state.params)) params = jax.device_get(jax.tree_util.tree_map(lambda x: x[0], state.params))
model.save_pretrained(training_args.output_dir, params=params) model.save_pretrained(training_args.output_dir, params=params)
if training_args.push_to_hub: if training_args.push_to_hub:
repo.push_to_hub(commit_message=f"Saving weights and logs of epoch {epoch}", blocking=False) api.upload_folder(
commit_message=f"Saving weights and logs of epoch {epoch}",
folder_path=training_args.output_dir,
repo_id=repo_id,
repo_type="model",
token=training_args.hub_token,
)
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -27,7 +27,7 @@ from accelerate import Accelerator
from accelerate.logging import get_logger from accelerate.logging import get_logger
from accelerate.utils import set_seed from accelerate.utils import set_seed
from datasets import load_dataset from datasets import load_dataset
from huggingface_hub import Repository, create_repo from huggingface_hub import HfApi
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from torchvision.transforms import ( from torchvision.transforms import (
CenterCrop, CenterCrop,
@@ -264,9 +264,8 @@ def main():
if repo_name is None: if repo_name is None:
repo_name = Path(args.output_dir).absolute().name repo_name = Path(args.output_dir).absolute().name
# Create repo and retrieve repo_id # Create repo and retrieve repo_id
repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id api = HfApi()
# Clone repo locally repo_id = api.create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id
repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token)
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
if "step_*" not in gitignore: if "step_*" not in gitignore:
@@ -561,10 +560,12 @@ def main():
) )
if accelerator.is_main_process: if accelerator.is_main_process:
image_processor.save_pretrained(args.output_dir) image_processor.save_pretrained(args.output_dir)
repo.push_to_hub( api.upload_folder(
commit_message=f"Training in progress {completed_steps} steps", commit_message=f"Training in progress epoch {epoch}",
blocking=False, folder_path=args.output_dir,
auto_lfs_prune=True, repo_id=repo_id,
repo_type="model",
token=args.hub_token,
) )
if completed_steps >= args.max_train_steps: if completed_steps >= args.max_train_steps:
@@ -603,8 +604,12 @@ def main():
) )
if accelerator.is_main_process: if accelerator.is_main_process:
image_processor.save_pretrained(args.output_dir) image_processor.save_pretrained(args.output_dir)
repo.push_to_hub( api.upload_folder(
commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True commit_message=f"Training in progress epoch {epoch}",
folder_path=args.output_dir,
repo_id=repo_id,
repo_type="model",
token=args.hub_token,
) )
if args.checkpointing_steps == "epoch": if args.checkpointing_steps == "epoch":
@@ -625,8 +630,13 @@ def main():
if accelerator.is_main_process: if accelerator.is_main_process:
image_processor.save_pretrained(args.output_dir) image_processor.save_pretrained(args.output_dir)
if args.push_to_hub: if args.push_to_hub:
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True) api.upload_folder(
commit_message="End of training",
folder_path=args.output_dir,
repo_id=repo_id,
repo_type="model",
token=args.hub_token,
)
all_results = {f"eval_{k}": v for k, v in eval_metric.items()} all_results = {f"eval_{k}": v for k, v in eval_metric.items()}
with open(os.path.join(args.output_dir, "all_results.json"), "w") as f: with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
json.dump(all_results, f) json.dump(all_results, f)

View File

@@ -26,7 +26,7 @@ import torch
from accelerate import Accelerator, DistributedType from accelerate import Accelerator, DistributedType
from accelerate.utils import set_seed from accelerate.utils import set_seed
from datasets import load_dataset from datasets import load_dataset
from huggingface_hub import Repository, create_repo from huggingface_hub import HfApi
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from torchvision.transforms import Compose, Lambda, Normalize, RandomHorizontalFlip, RandomResizedCrop, ToTensor from torchvision.transforms import Compose, Lambda, Normalize, RandomHorizontalFlip, RandomResizedCrop, ToTensor
from tqdm.auto import tqdm from tqdm.auto import tqdm
@@ -437,15 +437,15 @@ def main():
if repo_name is None: if repo_name is None:
repo_name = Path(args.output_dir).absolute().name repo_name = Path(args.output_dir).absolute().name
# Create repo and retrieve repo_id # Create repo and retrieve repo_id
repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id api = HfApi()
# Clone repo locally repo_id = api.create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id
repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token)
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
if "step_*" not in gitignore: if "step_*" not in gitignore:
gitignore.write("step_*\n") gitignore.write("step_*\n")
if "epoch_*" not in gitignore: if "epoch_*" not in gitignore:
gitignore.write("epoch_*\n") gitignore.write("epoch_*\n")
elif args.output_dir is not None: elif args.output_dir is not None:
os.makedirs(args.output_dir, exist_ok=True) os.makedirs(args.output_dir, exist_ok=True)
accelerator.wait_for_everyone() accelerator.wait_for_everyone()
@@ -781,8 +781,12 @@ def main():
) )
if accelerator.is_main_process: if accelerator.is_main_process:
image_processor.save_pretrained(args.output_dir) image_processor.save_pretrained(args.output_dir)
repo.push_to_hub( api.upload_folder(
commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True commit_message=f"Training in progress epoch {epoch}",
folder_path=args.output_dir,
repo_id=repo_id,
repo_type="model",
token=args.hub_token,
) )
if args.checkpointing_steps == "epoch": if args.checkpointing_steps == "epoch":
@@ -803,7 +807,13 @@ def main():
if accelerator.is_main_process: if accelerator.is_main_process:
image_processor.save_pretrained(args.output_dir) image_processor.save_pretrained(args.output_dir)
if args.push_to_hub: if args.push_to_hub:
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True) api.upload_folder(
commit_message="End of training",
folder_path=args.output_dir,
repo_id=repo_id,
repo_type="model",
token=args.hub_token,
)
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -37,7 +37,7 @@ from accelerate import Accelerator, DistributedType
from accelerate.logging import get_logger from accelerate.logging import get_logger
from accelerate.utils import set_seed from accelerate.utils import set_seed
from datasets import load_dataset from datasets import load_dataset
from huggingface_hub import Repository, create_repo from huggingface_hub import HfApi
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from tqdm.auto import tqdm from tqdm.auto import tqdm
@@ -304,9 +304,8 @@ def main():
if repo_name is None: if repo_name is None:
repo_name = Path(args.output_dir).absolute().name repo_name = Path(args.output_dir).absolute().name
# Create repo and retrieve repo_id # Create repo and retrieve repo_id
repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id api = HfApi()
# Clone repo locally repo_id = api.create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id
repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token)
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
if "step_*" not in gitignore: if "step_*" not in gitignore:
@@ -682,8 +681,12 @@ def main():
) )
if accelerator.is_main_process: if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir) tokenizer.save_pretrained(args.output_dir)
repo.push_to_hub( api.upload_folder(
commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True commit_message=f"Training in progress epoch {epoch}",
folder_path=args.output_dir,
repo_id=repo_id,
repo_type="model",
token=args.hub_token,
) )
if args.checkpointing_steps == "epoch": if args.checkpointing_steps == "epoch":
@@ -704,8 +707,13 @@ def main():
if accelerator.is_main_process: if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir) tokenizer.save_pretrained(args.output_dir)
if args.push_to_hub: if args.push_to_hub:
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True) api.upload_folder(
commit_message="End of training",
folder_path=args.output_dir,
repo_id=repo_id,
repo_type="model",
token=args.hub_token,
)
with open(os.path.join(args.output_dir, "all_results.json"), "w") as f: with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
json.dump({"perplexity": perplexity}, f) json.dump({"perplexity": perplexity}, f)

View File

@@ -37,7 +37,7 @@ from accelerate import Accelerator, DistributedType
from accelerate.logging import get_logger from accelerate.logging import get_logger
from accelerate.utils import set_seed from accelerate.utils import set_seed
from datasets import load_dataset from datasets import load_dataset
from huggingface_hub import Repository, create_repo from huggingface_hub import HfApi
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from tqdm.auto import tqdm from tqdm.auto import tqdm
@@ -311,9 +311,8 @@ def main():
if repo_name is None: if repo_name is None:
repo_name = Path(args.output_dir).absolute().name repo_name = Path(args.output_dir).absolute().name
# Create repo and retrieve repo_id # Create repo and retrieve repo_id
repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id api = HfApi()
# Clone repo locally repo_id = api.create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id
repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token)
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
if "step_*" not in gitignore: if "step_*" not in gitignore:
@@ -720,8 +719,12 @@ def main():
) )
if accelerator.is_main_process: if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir) tokenizer.save_pretrained(args.output_dir)
repo.push_to_hub( api.upload_folder(
commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True commit_message=f"Training in progress epoch {epoch}",
folder_path=args.output_dir,
repo_id=repo_id,
repo_type="model",
token=args.hub_token,
) )
if args.checkpointing_steps == "epoch": if args.checkpointing_steps == "epoch":
@@ -742,8 +745,13 @@ def main():
if accelerator.is_main_process: if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir) tokenizer.save_pretrained(args.output_dir)
if args.push_to_hub: if args.push_to_hub:
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True) api.upload_folder(
commit_message="End of training",
folder_path=args.output_dir,
repo_id=repo_id,
repo_type="model",
token=args.hub_token,
)
with open(os.path.join(args.output_dir, "all_results.json"), "w") as f: with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
json.dump({"perplexity": perplexity}, f) json.dump({"perplexity": perplexity}, f)

View File

@@ -36,7 +36,7 @@ from accelerate import Accelerator
from accelerate.logging import get_logger from accelerate.logging import get_logger
from accelerate.utils import set_seed from accelerate.utils import set_seed
from datasets import load_dataset from datasets import load_dataset
from huggingface_hub import Repository, create_repo from huggingface_hub import HfApi
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from tqdm.auto import tqdm from tqdm.auto import tqdm
@@ -328,9 +328,8 @@ def main():
if repo_name is None: if repo_name is None:
repo_name = Path(args.output_dir).absolute().name repo_name = Path(args.output_dir).absolute().name
# Create repo and retrieve repo_id # Create repo and retrieve repo_id
repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id api = HfApi()
# Clone repo locally repo_id = api.create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id
repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token)
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
if "step_*" not in gitignore: if "step_*" not in gitignore:
@@ -661,8 +660,12 @@ def main():
) )
if accelerator.is_main_process: if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir) tokenizer.save_pretrained(args.output_dir)
repo.push_to_hub( api.upload_folder(
commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True commit_message=f"Training in progress epoch {epoch}",
folder_path=args.output_dir,
repo_id=repo_id,
repo_type="model",
token=args.hub_token,
) )
if args.checkpointing_steps == "epoch": if args.checkpointing_steps == "epoch":
@@ -683,8 +686,13 @@ def main():
if accelerator.is_main_process: if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir) tokenizer.save_pretrained(args.output_dir)
if args.push_to_hub: if args.push_to_hub:
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True) api.upload_folder(
commit_message="End of training",
folder_path=args.output_dir,
repo_id=repo_id,
repo_type="model",
token=args.hub_token,
)
all_results = {f"eval_{k}": v for k, v in eval_metric.items()} all_results = {f"eval_{k}": v for k, v in eval_metric.items()}
with open(os.path.join(args.output_dir, "all_results.json"), "w") as f: with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
json.dump(all_results, f) json.dump(all_results, f)

View File

@@ -34,7 +34,7 @@ from accelerate import Accelerator
from accelerate.logging import get_logger from accelerate.logging import get_logger
from accelerate.utils import set_seed from accelerate.utils import set_seed
from datasets import load_dataset from datasets import load_dataset
from huggingface_hub import Repository, create_repo from huggingface_hub import HfApi
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from tqdm.auto import tqdm from tqdm.auto import tqdm
from utils_qa import postprocess_qa_predictions_with_beam_search from utils_qa import postprocess_qa_predictions_with_beam_search
@@ -333,9 +333,8 @@ def main():
if repo_name is None: if repo_name is None:
repo_name = Path(args.output_dir).absolute().name repo_name = Path(args.output_dir).absolute().name
# Create repo and retrieve repo_id # Create repo and retrieve repo_id
repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id api = HfApi()
# Clone repo locally repo_id = api.create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id
repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token)
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
if "step_*" not in gitignore: if "step_*" not in gitignore:
@@ -873,8 +872,12 @@ def main():
) )
if accelerator.is_main_process: if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir) tokenizer.save_pretrained(args.output_dir)
repo.push_to_hub( api.upload_folder(
commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True commit_message=f"Training in progress epoch {epoch}",
folder_path=args.output_dir,
repo_id=repo_id,
repo_type="model",
token=args.hub_token,
) )
# initialize all lists to collect the batches # initialize all lists to collect the batches
@@ -1020,7 +1023,13 @@ def main():
if accelerator.is_main_process: if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir) tokenizer.save_pretrained(args.output_dir)
if args.push_to_hub: if args.push_to_hub:
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True) api.upload_folder(
commit_message="End of training",
folder_path=args.output_dir,
repo_id=repo_id,
repo_type="model",
token=args.hub_token,
)
logger.info(json.dumps(eval_metric, indent=4)) logger.info(json.dumps(eval_metric, indent=4))
save_prefixed_metrics(eval_metric, args.output_dir) save_prefixed_metrics(eval_metric, args.output_dir)

View File

@@ -34,7 +34,7 @@ from accelerate import Accelerator
from accelerate.logging import get_logger from accelerate.logging import get_logger
from accelerate.utils import set_seed from accelerate.utils import set_seed
from datasets import load_dataset from datasets import load_dataset
from huggingface_hub import Repository, create_repo from huggingface_hub import HfApi
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from tqdm.auto import tqdm from tqdm.auto import tqdm
from utils_qa import postprocess_qa_predictions from utils_qa import postprocess_qa_predictions
@@ -381,9 +381,8 @@ def main():
if repo_name is None: if repo_name is None:
repo_name = Path(args.output_dir).absolute().name repo_name = Path(args.output_dir).absolute().name
# Create repo and retrieve repo_id # Create repo and retrieve repo_id
repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id api = HfApi()
# Clone repo locally repo_id = api.create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id
repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token)
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
if "step_*" not in gitignore: if "step_*" not in gitignore:
@@ -912,8 +911,12 @@ def main():
) )
if accelerator.is_main_process: if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir) tokenizer.save_pretrained(args.output_dir)
repo.push_to_hub( api.upload_folder(
commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True commit_message=f"Training in progress epoch {epoch}",
folder_path=args.output_dir,
repo_id=repo_id,
repo_type="model",
token=args.hub_token,
) )
# Evaluation # Evaluation
@@ -1013,8 +1016,13 @@ def main():
if accelerator.is_main_process: if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir) tokenizer.save_pretrained(args.output_dir)
if args.push_to_hub: if args.push_to_hub:
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True) api.upload_folder(
commit_message="End of training",
folder_path=args.output_dir,
repo_id=repo_id,
repo_type="model",
token=args.hub_token,
)
logger.info(json.dumps(eval_metric, indent=4)) logger.info(json.dumps(eval_metric, indent=4))
save_prefixed_metrics(eval_metric, args.output_dir) save_prefixed_metrics(eval_metric, args.output_dir)

View File

@@ -29,7 +29,7 @@ from accelerate import Accelerator
from accelerate.logging import get_logger from accelerate.logging import get_logger
from accelerate.utils import set_seed from accelerate.utils import set_seed
from datasets import load_dataset from datasets import load_dataset
from huggingface_hub import Repository, create_repo, hf_hub_download from huggingface_hub import HfApi, hf_hub_download
from PIL import Image from PIL import Image
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from torchvision import transforms from torchvision import transforms
@@ -365,9 +365,8 @@ def main():
if repo_name is None: if repo_name is None:
repo_name = Path(args.output_dir).absolute().name repo_name = Path(args.output_dir).absolute().name
# Create repo and retrieve repo_id # Create repo and retrieve repo_id
repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id api = HfApi()
# Clone repo locally repo_id = api.create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id
repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token)
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
if "step_*" not in gitignore: if "step_*" not in gitignore:
@@ -632,10 +631,12 @@ def main():
) )
if accelerator.is_main_process: if accelerator.is_main_process:
image_processor.save_pretrained(args.output_dir) image_processor.save_pretrained(args.output_dir)
repo.push_to_hub( api.upload_folder(
commit_message=f"Training in progress {completed_steps} steps", commit_message=f"Training in progress epoch {epoch}",
blocking=False, folder_path=args.output_dir,
auto_lfs_prune=True, repo_id=repo_id,
repo_type="model",
token=args.hub_token,
) )
if completed_steps >= args.max_train_steps: if completed_steps >= args.max_train_steps:
@@ -687,8 +688,12 @@ def main():
) )
if accelerator.is_main_process: if accelerator.is_main_process:
image_processor.save_pretrained(args.output_dir) image_processor.save_pretrained(args.output_dir)
repo.push_to_hub( api.upload_folder(
commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True commit_message=f"Training in progress epoch {epoch}",
folder_path=args.output_dir,
repo_id=repo_id,
repo_type="model",
token=args.hub_token,
) )
if args.checkpointing_steps == "epoch": if args.checkpointing_steps == "epoch":
@@ -709,7 +714,13 @@ def main():
if accelerator.is_main_process: if accelerator.is_main_process:
image_processor.save_pretrained(args.output_dir) image_processor.save_pretrained(args.output_dir)
if args.push_to_hub: if args.push_to_hub:
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True) api.upload_folder(
commit_message="End of training",
folder_path=args.output_dir,
repo_id=repo_id,
repo_type="model",
token=args.hub_token,
)
all_results = { all_results = {
f"eval_{k}": v.tolist() if isinstance(v, np.ndarray) else v for k, v in eval_metrics.items() f"eval_{k}": v.tolist() if isinstance(v, np.ndarray) else v for k, v in eval_metrics.items()

View File

@@ -27,7 +27,7 @@ import torch
from accelerate import Accelerator from accelerate import Accelerator
from accelerate.logging import get_logger from accelerate.logging import get_logger
from datasets import DatasetDict, concatenate_datasets, load_dataset from datasets import DatasetDict, concatenate_datasets, load_dataset
from huggingface_hub import Repository, create_repo from huggingface_hub import HfApi
from torch.utils.data.dataloader import DataLoader from torch.utils.data.dataloader import DataLoader
from tqdm.auto import tqdm from tqdm.auto import tqdm
@@ -423,9 +423,14 @@ def main():
if repo_name is None: if repo_name is None:
repo_name = Path(args.output_dir).absolute().name repo_name = Path(args.output_dir).absolute().name
# Create repo and retrieve repo_id # Create repo and retrieve repo_id
repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id api = HfApi()
# Clone repo locally repo_id = api.create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id
repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token)
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
if "step_*" not in gitignore:
gitignore.write("step_*\n")
if "epoch_*" not in gitignore:
gitignore.write("epoch_*\n")
elif args.output_dir is not None: elif args.output_dir is not None:
os.makedirs(args.output_dir, exist_ok=True) os.makedirs(args.output_dir, exist_ok=True)
accelerator.wait_for_everyone() accelerator.wait_for_everyone()
@@ -719,10 +724,12 @@ def main():
) )
if (args.push_to_hub and epoch < args.num_train_epochs - 1) and accelerator.is_main_process: if (args.push_to_hub and epoch < args.num_train_epochs - 1) and accelerator.is_main_process:
repo.push_to_hub( api.upload_folder(
commit_message=f"Training in progress step {completed_steps}", commit_message=f"Training in progress epoch {epoch}",
blocking=False, folder_path=args.output_dir,
auto_lfs_prune=True, repo_id=repo_id,
repo_type="model",
token=args.hub_token,
) )
# if completed steps > `args.max_train_steps` stop # if completed steps > `args.max_train_steps` stop
@@ -772,7 +779,13 @@ def main():
) )
if accelerator.is_main_process: if accelerator.is_main_process:
if args.push_to_hub: if args.push_to_hub:
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True) api.upload_folder(
commit_message="End of training",
folder_path=args.output_dir,
repo_id=repo_id,
repo_type="model",
token=args.hub_token,
)
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -36,7 +36,7 @@ from accelerate.logging import get_logger
from accelerate.utils import set_seed from accelerate.utils import set_seed
from datasets import load_dataset from datasets import load_dataset
from filelock import FileLock from filelock import FileLock
from huggingface_hub import Repository, create_repo from huggingface_hub import HfApi
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from tqdm.auto import tqdm from tqdm.auto import tqdm
@@ -375,9 +375,8 @@ def main():
if repo_name is None: if repo_name is None:
repo_name = Path(args.output_dir).absolute().name repo_name = Path(args.output_dir).absolute().name
# Create repo and retrieve repo_id # Create repo and retrieve repo_id
repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id api = HfApi()
# Clone repo locally repo_id = api.create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id
repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token)
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
if "step_*" not in gitignore: if "step_*" not in gitignore:
@@ -755,8 +754,12 @@ def main():
) )
if accelerator.is_main_process: if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir) tokenizer.save_pretrained(args.output_dir)
repo.push_to_hub( api.upload_folder(
commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True commit_message=f"Training in progress epoch {epoch}",
folder_path=args.output_dir,
repo_id=repo_id,
repo_type="model",
token=args.hub_token,
) )
if args.checkpointing_steps == "epoch": if args.checkpointing_steps == "epoch":
@@ -774,7 +777,13 @@ def main():
if accelerator.is_main_process: if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir) tokenizer.save_pretrained(args.output_dir)
if args.push_to_hub: if args.push_to_hub:
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True) api.upload_folder(
commit_message="End of training",
folder_path=args.output_dir,
repo_id=repo_id,
repo_type="model",
token=args.hub_token,
)
all_results = {f"eval_{k}": v for k, v in result.items()} all_results = {f"eval_{k}": v for k, v in result.items()}
with open(os.path.join(args.output_dir, "all_results.json"), "w") as f: with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:

View File

@@ -28,7 +28,7 @@ from accelerate import Accelerator
from accelerate.logging import get_logger from accelerate.logging import get_logger
from accelerate.utils import set_seed from accelerate.utils import set_seed
from datasets import load_dataset from datasets import load_dataset
from huggingface_hub import Repository, create_repo from huggingface_hub import HfApi
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from tqdm.auto import tqdm from tqdm.auto import tqdm
@@ -255,9 +255,8 @@ def main():
if repo_name is None: if repo_name is None:
repo_name = Path(args.output_dir).absolute().name repo_name = Path(args.output_dir).absolute().name
# Create repo and retrieve repo_id # Create repo and retrieve repo_id
repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id api = HfApi()
# Clone repo locally repo_id = api.create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id
repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token)
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
if "step_*" not in gitignore: if "step_*" not in gitignore:
@@ -611,8 +610,12 @@ def main():
) )
if accelerator.is_main_process: if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir) tokenizer.save_pretrained(args.output_dir)
repo.push_to_hub( api.upload_folder(
commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True commit_message=f"Training in progress epoch {epoch}",
folder_path=args.output_dir,
repo_id=repo_id,
repo_type="model",
token=args.hub_token,
) )
if args.checkpointing_steps == "epoch": if args.checkpointing_steps == "epoch":
@@ -633,7 +636,13 @@ def main():
if accelerator.is_main_process: if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir) tokenizer.save_pretrained(args.output_dir)
if args.push_to_hub: if args.push_to_hub:
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True) api.upload_folder(
commit_message="End of training",
folder_path=args.output_dir,
repo_id=repo_id,
repo_type="model",
token=args.hub_token,
)
if args.task_name == "mnli": if args.task_name == "mnli":
# Final evaluation on mismatched validation set # Final evaluation on mismatched validation set

View File

@@ -34,7 +34,7 @@ from accelerate import Accelerator
from accelerate.logging import get_logger from accelerate.logging import get_logger
from accelerate.utils import set_seed from accelerate.utils import set_seed
from datasets import ClassLabel, load_dataset from datasets import ClassLabel, load_dataset
from huggingface_hub import Repository, create_repo from huggingface_hub import HfApi
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from tqdm.auto import tqdm from tqdm.auto import tqdm
@@ -310,9 +310,8 @@ def main():
if repo_name is None: if repo_name is None:
repo_name = Path(args.output_dir).absolute().name repo_name = Path(args.output_dir).absolute().name
# Create repo and retrieve repo_id # Create repo and retrieve repo_id
repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id api = HfApi()
# Clone repo locally repo_id = api.create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id
repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token)
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
if "step_*" not in gitignore: if "step_*" not in gitignore:
@@ -776,8 +775,12 @@ def main():
) )
if accelerator.is_main_process: if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir) tokenizer.save_pretrained(args.output_dir)
repo.push_to_hub( api.upload_folder(
commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True commit_message=f"Training in progress epoch {epoch}",
folder_path=args.output_dir,
repo_id=repo_id,
repo_type="model",
token=args.hub_token,
) )
if args.checkpointing_steps == "epoch": if args.checkpointing_steps == "epoch":
@@ -798,7 +801,13 @@ def main():
if accelerator.is_main_process: if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir) tokenizer.save_pretrained(args.output_dir)
if args.push_to_hub: if args.push_to_hub:
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True) api.upload_folder(
commit_message="End of training",
folder_path=args.output_dir,
repo_id=repo_id,
repo_type="model",
token=args.hub_token,
)
all_results = {f"eval_{k}": v for k, v in eval_metric.items()} all_results = {f"eval_{k}": v for k, v in eval_metric.items()}
if args.with_tracking: if args.with_tracking:

View File

@@ -34,7 +34,7 @@ from accelerate import Accelerator
from accelerate.logging import get_logger from accelerate.logging import get_logger
from accelerate.utils import set_seed from accelerate.utils import set_seed
from datasets import load_dataset from datasets import load_dataset
from huggingface_hub import Repository, create_repo from huggingface_hub import HfApi
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from tqdm.auto import tqdm from tqdm.auto import tqdm
@@ -355,9 +355,8 @@ def main():
if repo_name is None: if repo_name is None:
repo_name = Path(args.output_dir).absolute().name repo_name = Path(args.output_dir).absolute().name
# Create repo and retrieve repo_id # Create repo and retrieve repo_id
repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id api = HfApi()
# Clone repo locally repo_id = api.create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id
repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token)
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
if "step_*" not in gitignore: if "step_*" not in gitignore:
@@ -743,8 +742,12 @@ def main():
) )
if accelerator.is_main_process: if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir) tokenizer.save_pretrained(args.output_dir)
repo.push_to_hub( api.upload_folder(
commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True commit_message=f"Training in progress epoch {epoch}",
folder_path=args.output_dir,
repo_id=repo_id,
repo_type="model",
token=args.hub_token,
) )
if args.checkpointing_steps == "epoch": if args.checkpointing_steps == "epoch":
@@ -765,7 +768,13 @@ def main():
if accelerator.is_main_process: if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir) tokenizer.save_pretrained(args.output_dir)
if args.push_to_hub: if args.push_to_hub:
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True) api.upload_folder(
commit_message="End of training",
folder_path=args.output_dir,
repo_id=repo_id,
repo_type="model",
token=args.hub_token,
)
with open(os.path.join(args.output_dir, "all_results.json"), "w") as f: with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
json.dump({"eval_bleu": eval_metric["score"]}, f) json.dump({"eval_bleu": eval_metric["score"]}, f)