Update legacy Repository usage in various example files (#29085)
* Update legacy Repository usage in `examples/pytorch/text-classification/run_glue_no_trainer.py` Marked for deprecation here https://huggingface.co/docs/huggingface_hub/guides/upload#legacy-upload-files-with-git-lfs * Fix import order * Replace all example usage of deprecated Repository * Fix remaining repo call and rename args variable * Revert removing creation of gitignore files and don't change research examples
This commit is contained in:
committed by
GitHub
parent
f1a565a39f
commit
b6404866cd
@@ -42,7 +42,7 @@ from flax import jax_utils, traverse_util
|
|||||||
from flax.jax_utils import unreplicate
|
from flax.jax_utils import unreplicate
|
||||||
from flax.training import train_state
|
from flax.training import train_state
|
||||||
from flax.training.common_utils import get_metrics, onehot, shard, shard_prng_key
|
from flax.training.common_utils import get_metrics, onehot, shard, shard_prng_key
|
||||||
from huggingface_hub import Repository, create_repo
|
from huggingface_hub import HfApi
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
@@ -455,9 +455,8 @@ def main():
|
|||||||
if repo_name is None:
|
if repo_name is None:
|
||||||
repo_name = Path(training_args.output_dir).absolute().name
|
repo_name = Path(training_args.output_dir).absolute().name
|
||||||
# Create repo and retrieve repo_id
|
# Create repo and retrieve repo_id
|
||||||
repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id
|
api = HfApi()
|
||||||
# Clone repo locally
|
repo_id = api.create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id
|
||||||
repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token)
|
|
||||||
|
|
||||||
# Get the datasets: you can either provide your own CSV/JSON training and evaluation files (see below)
|
# Get the datasets: you can either provide your own CSV/JSON training and evaluation files (see below)
|
||||||
# or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
|
# or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
|
||||||
@@ -1061,7 +1060,13 @@ def main():
|
|||||||
model.save_pretrained(os.path.join(training_args.output_dir, ckpt_dir), params=params)
|
model.save_pretrained(os.path.join(training_args.output_dir, ckpt_dir), params=params)
|
||||||
tokenizer.save_pretrained(os.path.join(training_args.output_dir, ckpt_dir))
|
tokenizer.save_pretrained(os.path.join(training_args.output_dir, ckpt_dir))
|
||||||
if training_args.push_to_hub:
|
if training_args.push_to_hub:
|
||||||
repo.push_to_hub(commit_message=commit_msg, blocking=False)
|
api.upload_folder(
|
||||||
|
commit_message=commit_msg,
|
||||||
|
folder_path=training_args.output_dir,
|
||||||
|
repo_id=repo_id,
|
||||||
|
repo_type="model",
|
||||||
|
token=training_args.hub_token,
|
||||||
|
)
|
||||||
|
|
||||||
def evaluation_loop(
|
def evaluation_loop(
|
||||||
rng: jax.random.PRNGKey,
|
rng: jax.random.PRNGKey,
|
||||||
|
|||||||
@@ -44,7 +44,7 @@ from flax import jax_utils, traverse_util
|
|||||||
from flax.jax_utils import pad_shard_unpad
|
from flax.jax_utils import pad_shard_unpad
|
||||||
from flax.training import train_state
|
from flax.training import train_state
|
||||||
from flax.training.common_utils import get_metrics, onehot, shard
|
from flax.training.common_utils import get_metrics, onehot, shard
|
||||||
from huggingface_hub import Repository, create_repo
|
from huggingface_hub import HfApi
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
from transformers import (
|
from transformers import (
|
||||||
@@ -517,9 +517,8 @@ def main():
|
|||||||
if repo_name is None:
|
if repo_name is None:
|
||||||
repo_name = Path(training_args.output_dir).absolute().name
|
repo_name = Path(training_args.output_dir).absolute().name
|
||||||
# Create repo and retrieve repo_id
|
# Create repo and retrieve repo_id
|
||||||
repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id
|
api = HfApi()
|
||||||
# Clone repo locally
|
repo_id = api.create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id
|
||||||
repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token)
|
|
||||||
|
|
||||||
# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
|
# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
|
||||||
# or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
|
# or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
|
||||||
@@ -949,7 +948,13 @@ def main():
|
|||||||
model.save_pretrained(training_args.output_dir, params=params)
|
model.save_pretrained(training_args.output_dir, params=params)
|
||||||
tokenizer.save_pretrained(training_args.output_dir)
|
tokenizer.save_pretrained(training_args.output_dir)
|
||||||
if training_args.push_to_hub:
|
if training_args.push_to_hub:
|
||||||
repo.push_to_hub(commit_message=f"Saving weights and logs of step {cur_step}", blocking=False)
|
api.upload_folder(
|
||||||
|
commit_message=f"Saving weights and logs of step {cur_step}",
|
||||||
|
folder_path=training_args.output_dir,
|
||||||
|
repo_id=repo_id,
|
||||||
|
repo_type="model",
|
||||||
|
token=training_args.hub_token,
|
||||||
|
)
|
||||||
|
|
||||||
# Eval after training
|
# Eval after training
|
||||||
if training_args.do_eval:
|
if training_args.do_eval:
|
||||||
|
|||||||
@@ -44,7 +44,7 @@ from flax import jax_utils, traverse_util
|
|||||||
from flax.jax_utils import pad_shard_unpad, unreplicate
|
from flax.jax_utils import pad_shard_unpad, unreplicate
|
||||||
from flax.training import train_state
|
from flax.training import train_state
|
||||||
from flax.training.common_utils import get_metrics, onehot, shard, shard_prng_key
|
from flax.training.common_utils import get_metrics, onehot, shard, shard_prng_key
|
||||||
from huggingface_hub import Repository, create_repo
|
from huggingface_hub import HfApi
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
import transformers
|
import transformers
|
||||||
@@ -403,9 +403,8 @@ def main():
|
|||||||
if repo_name is None:
|
if repo_name is None:
|
||||||
repo_name = Path(training_args.output_dir).absolute().name
|
repo_name = Path(training_args.output_dir).absolute().name
|
||||||
# Create repo and retrieve repo_id
|
# Create repo and retrieve repo_id
|
||||||
repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id
|
api = HfApi()
|
||||||
# Clone repo locally
|
repo_id = api.create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id
|
||||||
repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token)
|
|
||||||
|
|
||||||
# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
|
# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
|
||||||
# or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
|
# or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
|
||||||
@@ -847,8 +846,13 @@ def main():
|
|||||||
model.save_pretrained(training_args.output_dir, params=params)
|
model.save_pretrained(training_args.output_dir, params=params)
|
||||||
tokenizer.save_pretrained(training_args.output_dir)
|
tokenizer.save_pretrained(training_args.output_dir)
|
||||||
if training_args.push_to_hub:
|
if training_args.push_to_hub:
|
||||||
repo.push_to_hub(commit_message=f"Saving weights and logs of step {cur_step}", blocking=False)
|
api.upload_folder(
|
||||||
|
commit_message=f"Saving weights and logs of step {cur_step}",
|
||||||
|
folder_path=training_args.output_dir,
|
||||||
|
repo_id=repo_id,
|
||||||
|
repo_type="model",
|
||||||
|
token=training_args.hub_token,
|
||||||
|
)
|
||||||
# Eval after training
|
# Eval after training
|
||||||
if training_args.do_eval:
|
if training_args.do_eval:
|
||||||
eval_metrics = []
|
eval_metrics = []
|
||||||
|
|||||||
@@ -45,7 +45,7 @@ from flax import jax_utils, traverse_util
|
|||||||
from flax.jax_utils import pad_shard_unpad
|
from flax.jax_utils import pad_shard_unpad
|
||||||
from flax.training import train_state
|
from flax.training import train_state
|
||||||
from flax.training.common_utils import get_metrics, onehot, shard
|
from flax.training.common_utils import get_metrics, onehot, shard
|
||||||
from huggingface_hub import Repository, create_repo
|
from huggingface_hub import HfApi
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
from transformers import (
|
from transformers import (
|
||||||
@@ -441,9 +441,8 @@ def main():
|
|||||||
if repo_name is None:
|
if repo_name is None:
|
||||||
repo_name = Path(training_args.output_dir).absolute().name
|
repo_name = Path(training_args.output_dir).absolute().name
|
||||||
# Create repo and retrieve repo_id
|
# Create repo and retrieve repo_id
|
||||||
repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id
|
api = HfApi()
|
||||||
# Clone repo locally
|
repo_id = api.create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id
|
||||||
repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token)
|
|
||||||
|
|
||||||
# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
|
# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
|
||||||
# or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
|
# or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
|
||||||
@@ -890,8 +889,13 @@ def main():
|
|||||||
model.save_pretrained(training_args.output_dir, params=params)
|
model.save_pretrained(training_args.output_dir, params=params)
|
||||||
tokenizer.save_pretrained(training_args.output_dir)
|
tokenizer.save_pretrained(training_args.output_dir)
|
||||||
if training_args.push_to_hub:
|
if training_args.push_to_hub:
|
||||||
repo.push_to_hub(commit_message=f"Saving weights and logs of step {cur_step}", blocking=False)
|
api.upload_folder(
|
||||||
|
commit_message=f"Saving weights and logs of step {cur_step}",
|
||||||
|
folder_path=training_args.output_dir,
|
||||||
|
repo_id=repo_id,
|
||||||
|
repo_type="model",
|
||||||
|
token=training_args.hub_token,
|
||||||
|
)
|
||||||
# Eval after training
|
# Eval after training
|
||||||
if training_args.do_eval:
|
if training_args.do_eval:
|
||||||
num_eval_samples = len(tokenized_datasets["validation"])
|
num_eval_samples = len(tokenized_datasets["validation"])
|
||||||
|
|||||||
@@ -44,7 +44,7 @@ from flax import jax_utils, traverse_util
|
|||||||
from flax.jax_utils import pad_shard_unpad
|
from flax.jax_utils import pad_shard_unpad
|
||||||
from flax.training import train_state
|
from flax.training import train_state
|
||||||
from flax.training.common_utils import get_metrics, onehot, shard
|
from flax.training.common_utils import get_metrics, onehot, shard
|
||||||
from huggingface_hub import Repository, create_repo
|
from huggingface_hub import HfApi
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
from transformers import (
|
from transformers import (
|
||||||
@@ -558,9 +558,8 @@ def main():
|
|||||||
if repo_name is None:
|
if repo_name is None:
|
||||||
repo_name = Path(training_args.output_dir).absolute().name
|
repo_name = Path(training_args.output_dir).absolute().name
|
||||||
# Create repo and retrieve repo_id
|
# Create repo and retrieve repo_id
|
||||||
repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id
|
api = HfApi()
|
||||||
# Clone repo locally
|
repo_id = api.create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id
|
||||||
repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token)
|
|
||||||
|
|
||||||
# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
|
# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
|
||||||
# or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
|
# or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
|
||||||
@@ -977,8 +976,13 @@ def main():
|
|||||||
model.save_pretrained(training_args.output_dir, params=params)
|
model.save_pretrained(training_args.output_dir, params=params)
|
||||||
tokenizer.save_pretrained(training_args.output_dir)
|
tokenizer.save_pretrained(training_args.output_dir)
|
||||||
if training_args.push_to_hub:
|
if training_args.push_to_hub:
|
||||||
repo.push_to_hub(commit_message=f"Saving weights and logs of step {cur_step}", blocking=False)
|
api.upload_folder(
|
||||||
|
commit_message=f"Saving weights and logs of step {cur_step}",
|
||||||
|
folder_path=training_args.output_dir,
|
||||||
|
repo_id=repo_id,
|
||||||
|
repo_type="model",
|
||||||
|
token=training_args.hub_token,
|
||||||
|
)
|
||||||
# Eval after training
|
# Eval after training
|
||||||
if training_args.do_eval:
|
if training_args.do_eval:
|
||||||
num_eval_samples = len(tokenized_datasets["validation"])
|
num_eval_samples = len(tokenized_datasets["validation"])
|
||||||
|
|||||||
@@ -42,7 +42,7 @@ from flax import struct, traverse_util
|
|||||||
from flax.jax_utils import pad_shard_unpad, replicate, unreplicate
|
from flax.jax_utils import pad_shard_unpad, replicate, unreplicate
|
||||||
from flax.training import train_state
|
from flax.training import train_state
|
||||||
from flax.training.common_utils import get_metrics, onehot, shard
|
from flax.training.common_utils import get_metrics, onehot, shard
|
||||||
from huggingface_hub import Repository, create_repo
|
from huggingface_hub import HfApi
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
from utils_qa import postprocess_qa_predictions
|
from utils_qa import postprocess_qa_predictions
|
||||||
|
|
||||||
@@ -493,9 +493,8 @@ def main():
|
|||||||
if repo_name is None:
|
if repo_name is None:
|
||||||
repo_name = Path(training_args.output_dir).absolute().name
|
repo_name = Path(training_args.output_dir).absolute().name
|
||||||
# Create repo and retrieve repo_id
|
# Create repo and retrieve repo_id
|
||||||
repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id
|
api = HfApi()
|
||||||
# Clone repo locally
|
repo_id = api.create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id
|
||||||
repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token)
|
|
||||||
|
|
||||||
# region Load Data
|
# region Load Data
|
||||||
# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
|
# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
|
||||||
@@ -1051,7 +1050,13 @@ def main():
|
|||||||
model.save_pretrained(training_args.output_dir, params=params)
|
model.save_pretrained(training_args.output_dir, params=params)
|
||||||
tokenizer.save_pretrained(training_args.output_dir)
|
tokenizer.save_pretrained(training_args.output_dir)
|
||||||
if training_args.push_to_hub:
|
if training_args.push_to_hub:
|
||||||
repo.push_to_hub(commit_message=f"Saving weights and logs of step {cur_step}", blocking=False)
|
api.upload_folder(
|
||||||
|
commit_message=f"Saving weights and logs of step {cur_step}",
|
||||||
|
folder_path=training_args.output_dir,
|
||||||
|
repo_id=repo_id,
|
||||||
|
repo_type="model",
|
||||||
|
token=training_args.hub_token,
|
||||||
|
)
|
||||||
epochs.desc = f"Epoch ... {epoch + 1}/{num_epochs}"
|
epochs.desc = f"Epoch ... {epoch + 1}/{num_epochs}"
|
||||||
# endregion
|
# endregion
|
||||||
|
|
||||||
|
|||||||
@@ -39,7 +39,7 @@ from flax import jax_utils, traverse_util
|
|||||||
from flax.jax_utils import pad_shard_unpad, unreplicate
|
from flax.jax_utils import pad_shard_unpad, unreplicate
|
||||||
from flax.training import train_state
|
from flax.training import train_state
|
||||||
from flax.training.common_utils import get_metrics, onehot, shard, shard_prng_key
|
from flax.training.common_utils import get_metrics, onehot, shard, shard_prng_key
|
||||||
from huggingface_hub import Repository, create_repo
|
from huggingface_hub import HfApi
|
||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
@@ -427,8 +427,9 @@ def main():
|
|||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
repo_name = training_args.hub_model_id
|
repo_name = training_args.hub_model_id
|
||||||
create_repo(repo_name, exist_ok=True, token=training_args.hub_token)
|
# Create repo and retrieve repo_id
|
||||||
repo = Repository(training_args.output_dir, clone_from=repo_name, token=training_args.hub_token)
|
api = HfApi()
|
||||||
|
repo_id = api.create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id
|
||||||
|
|
||||||
# 3. Load dataset
|
# 3. Load dataset
|
||||||
raw_datasets = DatasetDict()
|
raw_datasets = DatasetDict()
|
||||||
@@ -852,7 +853,13 @@ def main():
|
|||||||
model.save_pretrained(training_args.output_dir, params=params)
|
model.save_pretrained(training_args.output_dir, params=params)
|
||||||
tokenizer.save_pretrained(training_args.output_dir)
|
tokenizer.save_pretrained(training_args.output_dir)
|
||||||
if training_args.push_to_hub:
|
if training_args.push_to_hub:
|
||||||
repo.push_to_hub(commit_message=f"Saving weights and logs of epoch {epoch}", blocking=False)
|
api.upload_folder(
|
||||||
|
commit_message=f"Saving weights and logs of epoch {epoch}",
|
||||||
|
folder_path=training_args.output_dir,
|
||||||
|
repo_id=repo_id,
|
||||||
|
repo_type="model",
|
||||||
|
token=training_args.hub_token,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@@ -44,7 +44,7 @@ from flax import jax_utils, traverse_util
|
|||||||
from flax.jax_utils import pad_shard_unpad, unreplicate
|
from flax.jax_utils import pad_shard_unpad, unreplicate
|
||||||
from flax.training import train_state
|
from flax.training import train_state
|
||||||
from flax.training.common_utils import get_metrics, onehot, shard, shard_prng_key
|
from flax.training.common_utils import get_metrics, onehot, shard, shard_prng_key
|
||||||
from huggingface_hub import Repository, create_repo
|
from huggingface_hub import HfApi
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
import transformers
|
import transformers
|
||||||
@@ -483,9 +483,8 @@ def main():
|
|||||||
if repo_name is None:
|
if repo_name is None:
|
||||||
repo_name = Path(training_args.output_dir).absolute().name
|
repo_name = Path(training_args.output_dir).absolute().name
|
||||||
# Create repo and retrieve repo_id
|
# Create repo and retrieve repo_id
|
||||||
repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id
|
api = HfApi()
|
||||||
# Clone repo locally
|
repo_id = api.create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id
|
||||||
repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token)
|
|
||||||
|
|
||||||
# Get the datasets: you can either provide your own CSV/JSON training and evaluation files (see below)
|
# Get the datasets: you can either provide your own CSV/JSON training and evaluation files (see below)
|
||||||
# or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
|
# or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
|
||||||
@@ -976,7 +975,13 @@ def main():
|
|||||||
model.save_pretrained(training_args.output_dir, params=params)
|
model.save_pretrained(training_args.output_dir, params=params)
|
||||||
tokenizer.save_pretrained(training_args.output_dir)
|
tokenizer.save_pretrained(training_args.output_dir)
|
||||||
if training_args.push_to_hub:
|
if training_args.push_to_hub:
|
||||||
repo.push_to_hub(commit_message=f"Saving weights and logs of epoch {epoch}", blocking=False)
|
api.upload_folder(
|
||||||
|
commit_message=f"Saving weights and logs of epoch {epoch}",
|
||||||
|
folder_path=training_args.output_dir,
|
||||||
|
repo_id=repo_id,
|
||||||
|
repo_type="model",
|
||||||
|
token=training_args.hub_token,
|
||||||
|
)
|
||||||
|
|
||||||
# ======================== Prediction loop ==============================
|
# ======================== Prediction loop ==============================
|
||||||
if training_args.do_predict:
|
if training_args.do_predict:
|
||||||
|
|||||||
@@ -37,7 +37,7 @@ from flax import struct, traverse_util
|
|||||||
from flax.jax_utils import pad_shard_unpad, replicate, unreplicate
|
from flax.jax_utils import pad_shard_unpad, replicate, unreplicate
|
||||||
from flax.training import train_state
|
from flax.training import train_state
|
||||||
from flax.training.common_utils import get_metrics, onehot, shard
|
from flax.training.common_utils import get_metrics, onehot, shard
|
||||||
from huggingface_hub import Repository, create_repo
|
from huggingface_hub import HfApi
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
import transformers
|
import transformers
|
||||||
@@ -373,9 +373,8 @@ def main():
|
|||||||
if repo_name is None:
|
if repo_name is None:
|
||||||
repo_name = Path(training_args.output_dir).absolute().name
|
repo_name = Path(training_args.output_dir).absolute().name
|
||||||
# Create repo and retrieve repo_id
|
# Create repo and retrieve repo_id
|
||||||
repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id
|
api = HfApi()
|
||||||
# Clone repo locally
|
repo_id = api.create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id
|
||||||
repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token)
|
|
||||||
|
|
||||||
# Get the datasets: you can either provide your own CSV/JSON training and evaluation files (see below)
|
# Get the datasets: you can either provide your own CSV/JSON training and evaluation files (see below)
|
||||||
# or specify a GLUE benchmark task (the dataset will be downloaded automatically from the datasets Hub).
|
# or specify a GLUE benchmark task (the dataset will be downloaded automatically from the datasets Hub).
|
||||||
@@ -677,7 +676,13 @@ def main():
|
|||||||
model.save_pretrained(training_args.output_dir, params=params)
|
model.save_pretrained(training_args.output_dir, params=params)
|
||||||
tokenizer.save_pretrained(training_args.output_dir)
|
tokenizer.save_pretrained(training_args.output_dir)
|
||||||
if training_args.push_to_hub:
|
if training_args.push_to_hub:
|
||||||
repo.push_to_hub(commit_message=f"Saving weights and logs of step {cur_step}", blocking=False)
|
api.upload_folder(
|
||||||
|
commit_message=f"Saving weights and logs of epoch {epoch}",
|
||||||
|
folder_path=training_args.output_dir,
|
||||||
|
repo_id=repo_id,
|
||||||
|
repo_type="model",
|
||||||
|
token=training_args.hub_token,
|
||||||
|
)
|
||||||
epochs.desc = f"Epoch ... {epoch + 1}/{num_epochs}"
|
epochs.desc = f"Epoch ... {epoch + 1}/{num_epochs}"
|
||||||
|
|
||||||
# save the eval metrics in json
|
# save the eval metrics in json
|
||||||
|
|||||||
@@ -39,7 +39,7 @@ from flax import struct, traverse_util
|
|||||||
from flax.jax_utils import pad_shard_unpad, replicate, unreplicate
|
from flax.jax_utils import pad_shard_unpad, replicate, unreplicate
|
||||||
from flax.training import train_state
|
from flax.training import train_state
|
||||||
from flax.training.common_utils import get_metrics, onehot, shard
|
from flax.training.common_utils import get_metrics, onehot, shard
|
||||||
from huggingface_hub import Repository, create_repo
|
from huggingface_hub import HfApi
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
import transformers
|
import transformers
|
||||||
@@ -429,9 +429,8 @@ def main():
|
|||||||
if repo_name is None:
|
if repo_name is None:
|
||||||
repo_name = Path(training_args.output_dir).absolute().name
|
repo_name = Path(training_args.output_dir).absolute().name
|
||||||
# Create repo and retrieve repo_id
|
# Create repo and retrieve repo_id
|
||||||
repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id
|
api = HfApi()
|
||||||
# Clone repo locally
|
repo_id = api.create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id
|
||||||
repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token)
|
|
||||||
|
|
||||||
# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
|
# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
|
||||||
# or just provide the name of one of the public datasets for token classification task available on the hub at https://huggingface.co/datasets/
|
# or just provide the name of one of the public datasets for token classification task available on the hub at https://huggingface.co/datasets/
|
||||||
@@ -798,7 +797,13 @@ def main():
|
|||||||
model.save_pretrained(training_args.output_dir, params=params)
|
model.save_pretrained(training_args.output_dir, params=params)
|
||||||
tokenizer.save_pretrained(training_args.output_dir)
|
tokenizer.save_pretrained(training_args.output_dir)
|
||||||
if training_args.push_to_hub:
|
if training_args.push_to_hub:
|
||||||
repo.push_to_hub(commit_message=f"Saving weights and logs of step {cur_step}", blocking=False)
|
api.upload_folder(
|
||||||
|
commit_message=f"Saving weights and logs of step {cur_step}",
|
||||||
|
folder_path=training_args.output_dir,
|
||||||
|
repo_id=repo_id,
|
||||||
|
repo_type="model",
|
||||||
|
token=training_args.hub_token,
|
||||||
|
)
|
||||||
epochs.desc = f"Epoch ... {epoch + 1}/{num_epochs}"
|
epochs.desc = f"Epoch ... {epoch + 1}/{num_epochs}"
|
||||||
|
|
||||||
# Eval after training
|
# Eval after training
|
||||||
|
|||||||
@@ -42,7 +42,7 @@ from flax import jax_utils
|
|||||||
from flax.jax_utils import pad_shard_unpad, unreplicate
|
from flax.jax_utils import pad_shard_unpad, unreplicate
|
||||||
from flax.training import train_state
|
from flax.training import train_state
|
||||||
from flax.training.common_utils import get_metrics, onehot, shard, shard_prng_key
|
from flax.training.common_utils import get_metrics, onehot, shard, shard_prng_key
|
||||||
from huggingface_hub import Repository, create_repo
|
from huggingface_hub import HfApi
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
import transformers
|
import transformers
|
||||||
@@ -324,9 +324,8 @@ def main():
|
|||||||
if repo_name is None:
|
if repo_name is None:
|
||||||
repo_name = Path(training_args.output_dir).absolute().name
|
repo_name = Path(training_args.output_dir).absolute().name
|
||||||
# Create repo and retrieve repo_id
|
# Create repo and retrieve repo_id
|
||||||
repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id
|
api = HfApi()
|
||||||
# Clone repo locally
|
repo_id = api.create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id
|
||||||
repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token)
|
|
||||||
|
|
||||||
# Initialize datasets and pre-processing transforms
|
# Initialize datasets and pre-processing transforms
|
||||||
# We use torchvision here for faster pre-processing
|
# We use torchvision here for faster pre-processing
|
||||||
@@ -595,7 +594,13 @@ def main():
|
|||||||
params = jax.device_get(jax.tree_util.tree_map(lambda x: x[0], state.params))
|
params = jax.device_get(jax.tree_util.tree_map(lambda x: x[0], state.params))
|
||||||
model.save_pretrained(training_args.output_dir, params=params)
|
model.save_pretrained(training_args.output_dir, params=params)
|
||||||
if training_args.push_to_hub:
|
if training_args.push_to_hub:
|
||||||
repo.push_to_hub(commit_message=f"Saving weights and logs of epoch {epoch}", blocking=False)
|
api.upload_folder(
|
||||||
|
commit_message=f"Saving weights and logs of epoch {epoch}",
|
||||||
|
folder_path=training_args.output_dir,
|
||||||
|
repo_id=repo_id,
|
||||||
|
repo_type="model",
|
||||||
|
token=training_args.hub_token,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@@ -27,7 +27,7 @@ from accelerate import Accelerator
|
|||||||
from accelerate.logging import get_logger
|
from accelerate.logging import get_logger
|
||||||
from accelerate.utils import set_seed
|
from accelerate.utils import set_seed
|
||||||
from datasets import load_dataset
|
from datasets import load_dataset
|
||||||
from huggingface_hub import Repository, create_repo
|
from huggingface_hub import HfApi
|
||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
from torchvision.transforms import (
|
from torchvision.transforms import (
|
||||||
CenterCrop,
|
CenterCrop,
|
||||||
@@ -264,9 +264,8 @@ def main():
|
|||||||
if repo_name is None:
|
if repo_name is None:
|
||||||
repo_name = Path(args.output_dir).absolute().name
|
repo_name = Path(args.output_dir).absolute().name
|
||||||
# Create repo and retrieve repo_id
|
# Create repo and retrieve repo_id
|
||||||
repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id
|
api = HfApi()
|
||||||
# Clone repo locally
|
repo_id = api.create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id
|
||||||
repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token)
|
|
||||||
|
|
||||||
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
|
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
|
||||||
if "step_*" not in gitignore:
|
if "step_*" not in gitignore:
|
||||||
@@ -561,10 +560,12 @@ def main():
|
|||||||
)
|
)
|
||||||
if accelerator.is_main_process:
|
if accelerator.is_main_process:
|
||||||
image_processor.save_pretrained(args.output_dir)
|
image_processor.save_pretrained(args.output_dir)
|
||||||
repo.push_to_hub(
|
api.upload_folder(
|
||||||
commit_message=f"Training in progress {completed_steps} steps",
|
commit_message=f"Training in progress epoch {epoch}",
|
||||||
blocking=False,
|
folder_path=args.output_dir,
|
||||||
auto_lfs_prune=True,
|
repo_id=repo_id,
|
||||||
|
repo_type="model",
|
||||||
|
token=args.hub_token,
|
||||||
)
|
)
|
||||||
|
|
||||||
if completed_steps >= args.max_train_steps:
|
if completed_steps >= args.max_train_steps:
|
||||||
@@ -603,8 +604,12 @@ def main():
|
|||||||
)
|
)
|
||||||
if accelerator.is_main_process:
|
if accelerator.is_main_process:
|
||||||
image_processor.save_pretrained(args.output_dir)
|
image_processor.save_pretrained(args.output_dir)
|
||||||
repo.push_to_hub(
|
api.upload_folder(
|
||||||
commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True
|
commit_message=f"Training in progress epoch {epoch}",
|
||||||
|
folder_path=args.output_dir,
|
||||||
|
repo_id=repo_id,
|
||||||
|
repo_type="model",
|
||||||
|
token=args.hub_token,
|
||||||
)
|
)
|
||||||
|
|
||||||
if args.checkpointing_steps == "epoch":
|
if args.checkpointing_steps == "epoch":
|
||||||
@@ -625,8 +630,13 @@ def main():
|
|||||||
if accelerator.is_main_process:
|
if accelerator.is_main_process:
|
||||||
image_processor.save_pretrained(args.output_dir)
|
image_processor.save_pretrained(args.output_dir)
|
||||||
if args.push_to_hub:
|
if args.push_to_hub:
|
||||||
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True)
|
api.upload_folder(
|
||||||
|
commit_message="End of training",
|
||||||
|
folder_path=args.output_dir,
|
||||||
|
repo_id=repo_id,
|
||||||
|
repo_type="model",
|
||||||
|
token=args.hub_token,
|
||||||
|
)
|
||||||
all_results = {f"eval_{k}": v for k, v in eval_metric.items()}
|
all_results = {f"eval_{k}": v for k, v in eval_metric.items()}
|
||||||
with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
|
with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
|
||||||
json.dump(all_results, f)
|
json.dump(all_results, f)
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ import torch
|
|||||||
from accelerate import Accelerator, DistributedType
|
from accelerate import Accelerator, DistributedType
|
||||||
from accelerate.utils import set_seed
|
from accelerate.utils import set_seed
|
||||||
from datasets import load_dataset
|
from datasets import load_dataset
|
||||||
from huggingface_hub import Repository, create_repo
|
from huggingface_hub import HfApi
|
||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
from torchvision.transforms import Compose, Lambda, Normalize, RandomHorizontalFlip, RandomResizedCrop, ToTensor
|
from torchvision.transforms import Compose, Lambda, Normalize, RandomHorizontalFlip, RandomResizedCrop, ToTensor
|
||||||
from tqdm.auto import tqdm
|
from tqdm.auto import tqdm
|
||||||
@@ -437,15 +437,15 @@ def main():
|
|||||||
if repo_name is None:
|
if repo_name is None:
|
||||||
repo_name = Path(args.output_dir).absolute().name
|
repo_name = Path(args.output_dir).absolute().name
|
||||||
# Create repo and retrieve repo_id
|
# Create repo and retrieve repo_id
|
||||||
repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id
|
api = HfApi()
|
||||||
# Clone repo locally
|
repo_id = api.create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id
|
||||||
repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token)
|
|
||||||
|
|
||||||
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
|
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
|
||||||
if "step_*" not in gitignore:
|
if "step_*" not in gitignore:
|
||||||
gitignore.write("step_*\n")
|
gitignore.write("step_*\n")
|
||||||
if "epoch_*" not in gitignore:
|
if "epoch_*" not in gitignore:
|
||||||
gitignore.write("epoch_*\n")
|
gitignore.write("epoch_*\n")
|
||||||
|
|
||||||
elif args.output_dir is not None:
|
elif args.output_dir is not None:
|
||||||
os.makedirs(args.output_dir, exist_ok=True)
|
os.makedirs(args.output_dir, exist_ok=True)
|
||||||
accelerator.wait_for_everyone()
|
accelerator.wait_for_everyone()
|
||||||
@@ -781,8 +781,12 @@ def main():
|
|||||||
)
|
)
|
||||||
if accelerator.is_main_process:
|
if accelerator.is_main_process:
|
||||||
image_processor.save_pretrained(args.output_dir)
|
image_processor.save_pretrained(args.output_dir)
|
||||||
repo.push_to_hub(
|
api.upload_folder(
|
||||||
commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True
|
commit_message=f"Training in progress epoch {epoch}",
|
||||||
|
folder_path=args.output_dir,
|
||||||
|
repo_id=repo_id,
|
||||||
|
repo_type="model",
|
||||||
|
token=args.hub_token,
|
||||||
)
|
)
|
||||||
|
|
||||||
if args.checkpointing_steps == "epoch":
|
if args.checkpointing_steps == "epoch":
|
||||||
@@ -803,7 +807,13 @@ def main():
|
|||||||
if accelerator.is_main_process:
|
if accelerator.is_main_process:
|
||||||
image_processor.save_pretrained(args.output_dir)
|
image_processor.save_pretrained(args.output_dir)
|
||||||
if args.push_to_hub:
|
if args.push_to_hub:
|
||||||
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True)
|
api.upload_folder(
|
||||||
|
commit_message="End of training",
|
||||||
|
folder_path=args.output_dir,
|
||||||
|
repo_id=repo_id,
|
||||||
|
repo_type="model",
|
||||||
|
token=args.hub_token,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@@ -37,7 +37,7 @@ from accelerate import Accelerator, DistributedType
|
|||||||
from accelerate.logging import get_logger
|
from accelerate.logging import get_logger
|
||||||
from accelerate.utils import set_seed
|
from accelerate.utils import set_seed
|
||||||
from datasets import load_dataset
|
from datasets import load_dataset
|
||||||
from huggingface_hub import Repository, create_repo
|
from huggingface_hub import HfApi
|
||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
from tqdm.auto import tqdm
|
from tqdm.auto import tqdm
|
||||||
|
|
||||||
@@ -304,9 +304,8 @@ def main():
|
|||||||
if repo_name is None:
|
if repo_name is None:
|
||||||
repo_name = Path(args.output_dir).absolute().name
|
repo_name = Path(args.output_dir).absolute().name
|
||||||
# Create repo and retrieve repo_id
|
# Create repo and retrieve repo_id
|
||||||
repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id
|
api = HfApi()
|
||||||
# Clone repo locally
|
repo_id = api.create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id
|
||||||
repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token)
|
|
||||||
|
|
||||||
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
|
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
|
||||||
if "step_*" not in gitignore:
|
if "step_*" not in gitignore:
|
||||||
@@ -682,8 +681,12 @@ def main():
|
|||||||
)
|
)
|
||||||
if accelerator.is_main_process:
|
if accelerator.is_main_process:
|
||||||
tokenizer.save_pretrained(args.output_dir)
|
tokenizer.save_pretrained(args.output_dir)
|
||||||
repo.push_to_hub(
|
api.upload_folder(
|
||||||
commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True
|
commit_message=f"Training in progress epoch {epoch}",
|
||||||
|
folder_path=args.output_dir,
|
||||||
|
repo_id=repo_id,
|
||||||
|
repo_type="model",
|
||||||
|
token=args.hub_token,
|
||||||
)
|
)
|
||||||
|
|
||||||
if args.checkpointing_steps == "epoch":
|
if args.checkpointing_steps == "epoch":
|
||||||
@@ -704,8 +707,13 @@ def main():
|
|||||||
if accelerator.is_main_process:
|
if accelerator.is_main_process:
|
||||||
tokenizer.save_pretrained(args.output_dir)
|
tokenizer.save_pretrained(args.output_dir)
|
||||||
if args.push_to_hub:
|
if args.push_to_hub:
|
||||||
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True)
|
api.upload_folder(
|
||||||
|
commit_message="End of training",
|
||||||
|
folder_path=args.output_dir,
|
||||||
|
repo_id=repo_id,
|
||||||
|
repo_type="model",
|
||||||
|
token=args.hub_token,
|
||||||
|
)
|
||||||
with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
|
with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
|
||||||
json.dump({"perplexity": perplexity}, f)
|
json.dump({"perplexity": perplexity}, f)
|
||||||
|
|
||||||
|
|||||||
@@ -37,7 +37,7 @@ from accelerate import Accelerator, DistributedType
|
|||||||
from accelerate.logging import get_logger
|
from accelerate.logging import get_logger
|
||||||
from accelerate.utils import set_seed
|
from accelerate.utils import set_seed
|
||||||
from datasets import load_dataset
|
from datasets import load_dataset
|
||||||
from huggingface_hub import Repository, create_repo
|
from huggingface_hub import HfApi
|
||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
from tqdm.auto import tqdm
|
from tqdm.auto import tqdm
|
||||||
|
|
||||||
@@ -311,9 +311,8 @@ def main():
|
|||||||
if repo_name is None:
|
if repo_name is None:
|
||||||
repo_name = Path(args.output_dir).absolute().name
|
repo_name = Path(args.output_dir).absolute().name
|
||||||
# Create repo and retrieve repo_id
|
# Create repo and retrieve repo_id
|
||||||
repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id
|
api = HfApi()
|
||||||
# Clone repo locally
|
repo_id = api.create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id
|
||||||
repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token)
|
|
||||||
|
|
||||||
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
|
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
|
||||||
if "step_*" not in gitignore:
|
if "step_*" not in gitignore:
|
||||||
@@ -720,8 +719,12 @@ def main():
|
|||||||
)
|
)
|
||||||
if accelerator.is_main_process:
|
if accelerator.is_main_process:
|
||||||
tokenizer.save_pretrained(args.output_dir)
|
tokenizer.save_pretrained(args.output_dir)
|
||||||
repo.push_to_hub(
|
api.upload_folder(
|
||||||
commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True
|
commit_message=f"Training in progress epoch {epoch}",
|
||||||
|
folder_path=args.output_dir,
|
||||||
|
repo_id=repo_id,
|
||||||
|
repo_type="model",
|
||||||
|
token=args.hub_token,
|
||||||
)
|
)
|
||||||
|
|
||||||
if args.checkpointing_steps == "epoch":
|
if args.checkpointing_steps == "epoch":
|
||||||
@@ -742,8 +745,13 @@ def main():
|
|||||||
if accelerator.is_main_process:
|
if accelerator.is_main_process:
|
||||||
tokenizer.save_pretrained(args.output_dir)
|
tokenizer.save_pretrained(args.output_dir)
|
||||||
if args.push_to_hub:
|
if args.push_to_hub:
|
||||||
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True)
|
api.upload_folder(
|
||||||
|
commit_message="End of training",
|
||||||
|
folder_path=args.output_dir,
|
||||||
|
repo_id=repo_id,
|
||||||
|
repo_type="model",
|
||||||
|
token=args.hub_token,
|
||||||
|
)
|
||||||
with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
|
with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
|
||||||
json.dump({"perplexity": perplexity}, f)
|
json.dump({"perplexity": perplexity}, f)
|
||||||
|
|
||||||
|
|||||||
@@ -36,7 +36,7 @@ from accelerate import Accelerator
|
|||||||
from accelerate.logging import get_logger
|
from accelerate.logging import get_logger
|
||||||
from accelerate.utils import set_seed
|
from accelerate.utils import set_seed
|
||||||
from datasets import load_dataset
|
from datasets import load_dataset
|
||||||
from huggingface_hub import Repository, create_repo
|
from huggingface_hub import HfApi
|
||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
from tqdm.auto import tqdm
|
from tqdm.auto import tqdm
|
||||||
|
|
||||||
@@ -328,9 +328,8 @@ def main():
|
|||||||
if repo_name is None:
|
if repo_name is None:
|
||||||
repo_name = Path(args.output_dir).absolute().name
|
repo_name = Path(args.output_dir).absolute().name
|
||||||
# Create repo and retrieve repo_id
|
# Create repo and retrieve repo_id
|
||||||
repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id
|
api = HfApi()
|
||||||
# Clone repo locally
|
repo_id = api.create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id
|
||||||
repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token)
|
|
||||||
|
|
||||||
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
|
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
|
||||||
if "step_*" not in gitignore:
|
if "step_*" not in gitignore:
|
||||||
@@ -661,8 +660,12 @@ def main():
|
|||||||
)
|
)
|
||||||
if accelerator.is_main_process:
|
if accelerator.is_main_process:
|
||||||
tokenizer.save_pretrained(args.output_dir)
|
tokenizer.save_pretrained(args.output_dir)
|
||||||
repo.push_to_hub(
|
api.upload_folder(
|
||||||
commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True
|
commit_message=f"Training in progress epoch {epoch}",
|
||||||
|
folder_path=args.output_dir,
|
||||||
|
repo_id=repo_id,
|
||||||
|
repo_type="model",
|
||||||
|
token=args.hub_token,
|
||||||
)
|
)
|
||||||
|
|
||||||
if args.checkpointing_steps == "epoch":
|
if args.checkpointing_steps == "epoch":
|
||||||
@@ -683,8 +686,13 @@ def main():
|
|||||||
if accelerator.is_main_process:
|
if accelerator.is_main_process:
|
||||||
tokenizer.save_pretrained(args.output_dir)
|
tokenizer.save_pretrained(args.output_dir)
|
||||||
if args.push_to_hub:
|
if args.push_to_hub:
|
||||||
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True)
|
api.upload_folder(
|
||||||
|
commit_message="End of training",
|
||||||
|
folder_path=args.output_dir,
|
||||||
|
repo_id=repo_id,
|
||||||
|
repo_type="model",
|
||||||
|
token=args.hub_token,
|
||||||
|
)
|
||||||
all_results = {f"eval_{k}": v for k, v in eval_metric.items()}
|
all_results = {f"eval_{k}": v for k, v in eval_metric.items()}
|
||||||
with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
|
with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
|
||||||
json.dump(all_results, f)
|
json.dump(all_results, f)
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ from accelerate import Accelerator
|
|||||||
from accelerate.logging import get_logger
|
from accelerate.logging import get_logger
|
||||||
from accelerate.utils import set_seed
|
from accelerate.utils import set_seed
|
||||||
from datasets import load_dataset
|
from datasets import load_dataset
|
||||||
from huggingface_hub import Repository, create_repo
|
from huggingface_hub import HfApi
|
||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
from tqdm.auto import tqdm
|
from tqdm.auto import tqdm
|
||||||
from utils_qa import postprocess_qa_predictions_with_beam_search
|
from utils_qa import postprocess_qa_predictions_with_beam_search
|
||||||
@@ -333,9 +333,8 @@ def main():
|
|||||||
if repo_name is None:
|
if repo_name is None:
|
||||||
repo_name = Path(args.output_dir).absolute().name
|
repo_name = Path(args.output_dir).absolute().name
|
||||||
# Create repo and retrieve repo_id
|
# Create repo and retrieve repo_id
|
||||||
repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id
|
api = HfApi()
|
||||||
# Clone repo locally
|
repo_id = api.create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id
|
||||||
repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token)
|
|
||||||
|
|
||||||
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
|
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
|
||||||
if "step_*" not in gitignore:
|
if "step_*" not in gitignore:
|
||||||
@@ -873,8 +872,12 @@ def main():
|
|||||||
)
|
)
|
||||||
if accelerator.is_main_process:
|
if accelerator.is_main_process:
|
||||||
tokenizer.save_pretrained(args.output_dir)
|
tokenizer.save_pretrained(args.output_dir)
|
||||||
repo.push_to_hub(
|
api.upload_folder(
|
||||||
commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True
|
commit_message=f"Training in progress epoch {epoch}",
|
||||||
|
folder_path=args.output_dir,
|
||||||
|
repo_id=repo_id,
|
||||||
|
repo_type="model",
|
||||||
|
token=args.hub_token,
|
||||||
)
|
)
|
||||||
|
|
||||||
# initialize all lists to collect the batches
|
# initialize all lists to collect the batches
|
||||||
@@ -1020,7 +1023,13 @@ def main():
|
|||||||
if accelerator.is_main_process:
|
if accelerator.is_main_process:
|
||||||
tokenizer.save_pretrained(args.output_dir)
|
tokenizer.save_pretrained(args.output_dir)
|
||||||
if args.push_to_hub:
|
if args.push_to_hub:
|
||||||
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True)
|
api.upload_folder(
|
||||||
|
commit_message="End of training",
|
||||||
|
folder_path=args.output_dir,
|
||||||
|
repo_id=repo_id,
|
||||||
|
repo_type="model",
|
||||||
|
token=args.hub_token,
|
||||||
|
)
|
||||||
|
|
||||||
logger.info(json.dumps(eval_metric, indent=4))
|
logger.info(json.dumps(eval_metric, indent=4))
|
||||||
save_prefixed_metrics(eval_metric, args.output_dir)
|
save_prefixed_metrics(eval_metric, args.output_dir)
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ from accelerate import Accelerator
|
|||||||
from accelerate.logging import get_logger
|
from accelerate.logging import get_logger
|
||||||
from accelerate.utils import set_seed
|
from accelerate.utils import set_seed
|
||||||
from datasets import load_dataset
|
from datasets import load_dataset
|
||||||
from huggingface_hub import Repository, create_repo
|
from huggingface_hub import HfApi
|
||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
from tqdm.auto import tqdm
|
from tqdm.auto import tqdm
|
||||||
from utils_qa import postprocess_qa_predictions
|
from utils_qa import postprocess_qa_predictions
|
||||||
@@ -381,9 +381,8 @@ def main():
|
|||||||
if repo_name is None:
|
if repo_name is None:
|
||||||
repo_name = Path(args.output_dir).absolute().name
|
repo_name = Path(args.output_dir).absolute().name
|
||||||
# Create repo and retrieve repo_id
|
# Create repo and retrieve repo_id
|
||||||
repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id
|
api = HfApi()
|
||||||
# Clone repo locally
|
repo_id = api.create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id
|
||||||
repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token)
|
|
||||||
|
|
||||||
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
|
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
|
||||||
if "step_*" not in gitignore:
|
if "step_*" not in gitignore:
|
||||||
@@ -912,8 +911,12 @@ def main():
|
|||||||
)
|
)
|
||||||
if accelerator.is_main_process:
|
if accelerator.is_main_process:
|
||||||
tokenizer.save_pretrained(args.output_dir)
|
tokenizer.save_pretrained(args.output_dir)
|
||||||
repo.push_to_hub(
|
api.upload_folder(
|
||||||
commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True
|
commit_message=f"Training in progress epoch {epoch}",
|
||||||
|
folder_path=args.output_dir,
|
||||||
|
repo_id=repo_id,
|
||||||
|
repo_type="model",
|
||||||
|
token=args.hub_token,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Evaluation
|
# Evaluation
|
||||||
@@ -1013,8 +1016,13 @@ def main():
|
|||||||
if accelerator.is_main_process:
|
if accelerator.is_main_process:
|
||||||
tokenizer.save_pretrained(args.output_dir)
|
tokenizer.save_pretrained(args.output_dir)
|
||||||
if args.push_to_hub:
|
if args.push_to_hub:
|
||||||
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True)
|
api.upload_folder(
|
||||||
|
commit_message="End of training",
|
||||||
|
folder_path=args.output_dir,
|
||||||
|
repo_id=repo_id,
|
||||||
|
repo_type="model",
|
||||||
|
token=args.hub_token,
|
||||||
|
)
|
||||||
logger.info(json.dumps(eval_metric, indent=4))
|
logger.info(json.dumps(eval_metric, indent=4))
|
||||||
save_prefixed_metrics(eval_metric, args.output_dir)
|
save_prefixed_metrics(eval_metric, args.output_dir)
|
||||||
|
|
||||||
|
|||||||
@@ -29,7 +29,7 @@ from accelerate import Accelerator
|
|||||||
from accelerate.logging import get_logger
|
from accelerate.logging import get_logger
|
||||||
from accelerate.utils import set_seed
|
from accelerate.utils import set_seed
|
||||||
from datasets import load_dataset
|
from datasets import load_dataset
|
||||||
from huggingface_hub import Repository, create_repo, hf_hub_download
|
from huggingface_hub import HfApi, hf_hub_download
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
from torchvision import transforms
|
from torchvision import transforms
|
||||||
@@ -365,9 +365,8 @@ def main():
|
|||||||
if repo_name is None:
|
if repo_name is None:
|
||||||
repo_name = Path(args.output_dir).absolute().name
|
repo_name = Path(args.output_dir).absolute().name
|
||||||
# Create repo and retrieve repo_id
|
# Create repo and retrieve repo_id
|
||||||
repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id
|
api = HfApi()
|
||||||
# Clone repo locally
|
repo_id = api.create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id
|
||||||
repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token)
|
|
||||||
|
|
||||||
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
|
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
|
||||||
if "step_*" not in gitignore:
|
if "step_*" not in gitignore:
|
||||||
@@ -632,10 +631,12 @@ def main():
|
|||||||
)
|
)
|
||||||
if accelerator.is_main_process:
|
if accelerator.is_main_process:
|
||||||
image_processor.save_pretrained(args.output_dir)
|
image_processor.save_pretrained(args.output_dir)
|
||||||
repo.push_to_hub(
|
api.upload_folder(
|
||||||
commit_message=f"Training in progress {completed_steps} steps",
|
commit_message=f"Training in progress epoch {epoch}",
|
||||||
blocking=False,
|
folder_path=args.output_dir,
|
||||||
auto_lfs_prune=True,
|
repo_id=repo_id,
|
||||||
|
repo_type="model",
|
||||||
|
token=args.hub_token,
|
||||||
)
|
)
|
||||||
|
|
||||||
if completed_steps >= args.max_train_steps:
|
if completed_steps >= args.max_train_steps:
|
||||||
@@ -687,8 +688,12 @@ def main():
|
|||||||
)
|
)
|
||||||
if accelerator.is_main_process:
|
if accelerator.is_main_process:
|
||||||
image_processor.save_pretrained(args.output_dir)
|
image_processor.save_pretrained(args.output_dir)
|
||||||
repo.push_to_hub(
|
api.upload_folder(
|
||||||
commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True
|
commit_message=f"Training in progress epoch {epoch}",
|
||||||
|
folder_path=args.output_dir,
|
||||||
|
repo_id=repo_id,
|
||||||
|
repo_type="model",
|
||||||
|
token=args.hub_token,
|
||||||
)
|
)
|
||||||
|
|
||||||
if args.checkpointing_steps == "epoch":
|
if args.checkpointing_steps == "epoch":
|
||||||
@@ -709,7 +714,13 @@ def main():
|
|||||||
if accelerator.is_main_process:
|
if accelerator.is_main_process:
|
||||||
image_processor.save_pretrained(args.output_dir)
|
image_processor.save_pretrained(args.output_dir)
|
||||||
if args.push_to_hub:
|
if args.push_to_hub:
|
||||||
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True)
|
api.upload_folder(
|
||||||
|
commit_message="End of training",
|
||||||
|
folder_path=args.output_dir,
|
||||||
|
repo_id=repo_id,
|
||||||
|
repo_type="model",
|
||||||
|
token=args.hub_token,
|
||||||
|
)
|
||||||
|
|
||||||
all_results = {
|
all_results = {
|
||||||
f"eval_{k}": v.tolist() if isinstance(v, np.ndarray) else v for k, v in eval_metrics.items()
|
f"eval_{k}": v.tolist() if isinstance(v, np.ndarray) else v for k, v in eval_metrics.items()
|
||||||
|
|||||||
@@ -27,7 +27,7 @@ import torch
|
|||||||
from accelerate import Accelerator
|
from accelerate import Accelerator
|
||||||
from accelerate.logging import get_logger
|
from accelerate.logging import get_logger
|
||||||
from datasets import DatasetDict, concatenate_datasets, load_dataset
|
from datasets import DatasetDict, concatenate_datasets, load_dataset
|
||||||
from huggingface_hub import Repository, create_repo
|
from huggingface_hub import HfApi
|
||||||
from torch.utils.data.dataloader import DataLoader
|
from torch.utils.data.dataloader import DataLoader
|
||||||
from tqdm.auto import tqdm
|
from tqdm.auto import tqdm
|
||||||
|
|
||||||
@@ -423,9 +423,14 @@ def main():
|
|||||||
if repo_name is None:
|
if repo_name is None:
|
||||||
repo_name = Path(args.output_dir).absolute().name
|
repo_name = Path(args.output_dir).absolute().name
|
||||||
# Create repo and retrieve repo_id
|
# Create repo and retrieve repo_id
|
||||||
repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id
|
api = HfApi()
|
||||||
# Clone repo locally
|
repo_id = api.create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id
|
||||||
repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token)
|
|
||||||
|
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
|
||||||
|
if "step_*" not in gitignore:
|
||||||
|
gitignore.write("step_*\n")
|
||||||
|
if "epoch_*" not in gitignore:
|
||||||
|
gitignore.write("epoch_*\n")
|
||||||
elif args.output_dir is not None:
|
elif args.output_dir is not None:
|
||||||
os.makedirs(args.output_dir, exist_ok=True)
|
os.makedirs(args.output_dir, exist_ok=True)
|
||||||
accelerator.wait_for_everyone()
|
accelerator.wait_for_everyone()
|
||||||
@@ -719,10 +724,12 @@ def main():
|
|||||||
)
|
)
|
||||||
|
|
||||||
if (args.push_to_hub and epoch < args.num_train_epochs - 1) and accelerator.is_main_process:
|
if (args.push_to_hub and epoch < args.num_train_epochs - 1) and accelerator.is_main_process:
|
||||||
repo.push_to_hub(
|
api.upload_folder(
|
||||||
commit_message=f"Training in progress step {completed_steps}",
|
commit_message=f"Training in progress epoch {epoch}",
|
||||||
blocking=False,
|
folder_path=args.output_dir,
|
||||||
auto_lfs_prune=True,
|
repo_id=repo_id,
|
||||||
|
repo_type="model",
|
||||||
|
token=args.hub_token,
|
||||||
)
|
)
|
||||||
|
|
||||||
# if completed steps > `args.max_train_steps` stop
|
# if completed steps > `args.max_train_steps` stop
|
||||||
@@ -772,7 +779,13 @@ def main():
|
|||||||
)
|
)
|
||||||
if accelerator.is_main_process:
|
if accelerator.is_main_process:
|
||||||
if args.push_to_hub:
|
if args.push_to_hub:
|
||||||
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True)
|
api.upload_folder(
|
||||||
|
commit_message="End of training",
|
||||||
|
folder_path=args.output_dir,
|
||||||
|
repo_id=repo_id,
|
||||||
|
repo_type="model",
|
||||||
|
token=args.hub_token,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@@ -36,7 +36,7 @@ from accelerate.logging import get_logger
|
|||||||
from accelerate.utils import set_seed
|
from accelerate.utils import set_seed
|
||||||
from datasets import load_dataset
|
from datasets import load_dataset
|
||||||
from filelock import FileLock
|
from filelock import FileLock
|
||||||
from huggingface_hub import Repository, create_repo
|
from huggingface_hub import HfApi
|
||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
from tqdm.auto import tqdm
|
from tqdm.auto import tqdm
|
||||||
|
|
||||||
@@ -375,9 +375,8 @@ def main():
|
|||||||
if repo_name is None:
|
if repo_name is None:
|
||||||
repo_name = Path(args.output_dir).absolute().name
|
repo_name = Path(args.output_dir).absolute().name
|
||||||
# Create repo and retrieve repo_id
|
# Create repo and retrieve repo_id
|
||||||
repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id
|
api = HfApi()
|
||||||
# Clone repo locally
|
repo_id = api.create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id
|
||||||
repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token)
|
|
||||||
|
|
||||||
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
|
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
|
||||||
if "step_*" not in gitignore:
|
if "step_*" not in gitignore:
|
||||||
@@ -755,8 +754,12 @@ def main():
|
|||||||
)
|
)
|
||||||
if accelerator.is_main_process:
|
if accelerator.is_main_process:
|
||||||
tokenizer.save_pretrained(args.output_dir)
|
tokenizer.save_pretrained(args.output_dir)
|
||||||
repo.push_to_hub(
|
api.upload_folder(
|
||||||
commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True
|
commit_message=f"Training in progress epoch {epoch}",
|
||||||
|
folder_path=args.output_dir,
|
||||||
|
repo_id=repo_id,
|
||||||
|
repo_type="model",
|
||||||
|
token=args.hub_token,
|
||||||
)
|
)
|
||||||
|
|
||||||
if args.checkpointing_steps == "epoch":
|
if args.checkpointing_steps == "epoch":
|
||||||
@@ -774,7 +777,13 @@ def main():
|
|||||||
if accelerator.is_main_process:
|
if accelerator.is_main_process:
|
||||||
tokenizer.save_pretrained(args.output_dir)
|
tokenizer.save_pretrained(args.output_dir)
|
||||||
if args.push_to_hub:
|
if args.push_to_hub:
|
||||||
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True)
|
api.upload_folder(
|
||||||
|
commit_message="End of training",
|
||||||
|
folder_path=args.output_dir,
|
||||||
|
repo_id=repo_id,
|
||||||
|
repo_type="model",
|
||||||
|
token=args.hub_token,
|
||||||
|
)
|
||||||
|
|
||||||
all_results = {f"eval_{k}": v for k, v in result.items()}
|
all_results = {f"eval_{k}": v for k, v in result.items()}
|
||||||
with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
|
with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
|
||||||
|
|||||||
@@ -28,7 +28,7 @@ from accelerate import Accelerator
|
|||||||
from accelerate.logging import get_logger
|
from accelerate.logging import get_logger
|
||||||
from accelerate.utils import set_seed
|
from accelerate.utils import set_seed
|
||||||
from datasets import load_dataset
|
from datasets import load_dataset
|
||||||
from huggingface_hub import Repository, create_repo
|
from huggingface_hub import HfApi
|
||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
from tqdm.auto import tqdm
|
from tqdm.auto import tqdm
|
||||||
|
|
||||||
@@ -255,9 +255,8 @@ def main():
|
|||||||
if repo_name is None:
|
if repo_name is None:
|
||||||
repo_name = Path(args.output_dir).absolute().name
|
repo_name = Path(args.output_dir).absolute().name
|
||||||
# Create repo and retrieve repo_id
|
# Create repo and retrieve repo_id
|
||||||
repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id
|
api = HfApi()
|
||||||
# Clone repo locally
|
repo_id = api.create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id
|
||||||
repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token)
|
|
||||||
|
|
||||||
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
|
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
|
||||||
if "step_*" not in gitignore:
|
if "step_*" not in gitignore:
|
||||||
@@ -611,8 +610,12 @@ def main():
|
|||||||
)
|
)
|
||||||
if accelerator.is_main_process:
|
if accelerator.is_main_process:
|
||||||
tokenizer.save_pretrained(args.output_dir)
|
tokenizer.save_pretrained(args.output_dir)
|
||||||
repo.push_to_hub(
|
api.upload_folder(
|
||||||
commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True
|
commit_message=f"Training in progress epoch {epoch}",
|
||||||
|
folder_path=args.output_dir,
|
||||||
|
repo_id=repo_id,
|
||||||
|
repo_type="model",
|
||||||
|
token=args.hub_token,
|
||||||
)
|
)
|
||||||
|
|
||||||
if args.checkpointing_steps == "epoch":
|
if args.checkpointing_steps == "epoch":
|
||||||
@@ -633,7 +636,13 @@ def main():
|
|||||||
if accelerator.is_main_process:
|
if accelerator.is_main_process:
|
||||||
tokenizer.save_pretrained(args.output_dir)
|
tokenizer.save_pretrained(args.output_dir)
|
||||||
if args.push_to_hub:
|
if args.push_to_hub:
|
||||||
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True)
|
api.upload_folder(
|
||||||
|
commit_message="End of training",
|
||||||
|
folder_path=args.output_dir,
|
||||||
|
repo_id=repo_id,
|
||||||
|
repo_type="model",
|
||||||
|
token=args.hub_token,
|
||||||
|
)
|
||||||
|
|
||||||
if args.task_name == "mnli":
|
if args.task_name == "mnli":
|
||||||
# Final evaluation on mismatched validation set
|
# Final evaluation on mismatched validation set
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ from accelerate import Accelerator
|
|||||||
from accelerate.logging import get_logger
|
from accelerate.logging import get_logger
|
||||||
from accelerate.utils import set_seed
|
from accelerate.utils import set_seed
|
||||||
from datasets import ClassLabel, load_dataset
|
from datasets import ClassLabel, load_dataset
|
||||||
from huggingface_hub import Repository, create_repo
|
from huggingface_hub import HfApi
|
||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
from tqdm.auto import tqdm
|
from tqdm.auto import tqdm
|
||||||
|
|
||||||
@@ -310,9 +310,8 @@ def main():
|
|||||||
if repo_name is None:
|
if repo_name is None:
|
||||||
repo_name = Path(args.output_dir).absolute().name
|
repo_name = Path(args.output_dir).absolute().name
|
||||||
# Create repo and retrieve repo_id
|
# Create repo and retrieve repo_id
|
||||||
repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id
|
api = HfApi()
|
||||||
# Clone repo locally
|
repo_id = api.create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id
|
||||||
repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token)
|
|
||||||
|
|
||||||
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
|
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
|
||||||
if "step_*" not in gitignore:
|
if "step_*" not in gitignore:
|
||||||
@@ -776,8 +775,12 @@ def main():
|
|||||||
)
|
)
|
||||||
if accelerator.is_main_process:
|
if accelerator.is_main_process:
|
||||||
tokenizer.save_pretrained(args.output_dir)
|
tokenizer.save_pretrained(args.output_dir)
|
||||||
repo.push_to_hub(
|
api.upload_folder(
|
||||||
commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True
|
commit_message=f"Training in progress epoch {epoch}",
|
||||||
|
folder_path=args.output_dir,
|
||||||
|
repo_id=repo_id,
|
||||||
|
repo_type="model",
|
||||||
|
token=args.hub_token,
|
||||||
)
|
)
|
||||||
|
|
||||||
if args.checkpointing_steps == "epoch":
|
if args.checkpointing_steps == "epoch":
|
||||||
@@ -798,7 +801,13 @@ def main():
|
|||||||
if accelerator.is_main_process:
|
if accelerator.is_main_process:
|
||||||
tokenizer.save_pretrained(args.output_dir)
|
tokenizer.save_pretrained(args.output_dir)
|
||||||
if args.push_to_hub:
|
if args.push_to_hub:
|
||||||
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True)
|
api.upload_folder(
|
||||||
|
commit_message="End of training",
|
||||||
|
folder_path=args.output_dir,
|
||||||
|
repo_id=repo_id,
|
||||||
|
repo_type="model",
|
||||||
|
token=args.hub_token,
|
||||||
|
)
|
||||||
|
|
||||||
all_results = {f"eval_{k}": v for k, v in eval_metric.items()}
|
all_results = {f"eval_{k}": v for k, v in eval_metric.items()}
|
||||||
if args.with_tracking:
|
if args.with_tracking:
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ from accelerate import Accelerator
|
|||||||
from accelerate.logging import get_logger
|
from accelerate.logging import get_logger
|
||||||
from accelerate.utils import set_seed
|
from accelerate.utils import set_seed
|
||||||
from datasets import load_dataset
|
from datasets import load_dataset
|
||||||
from huggingface_hub import Repository, create_repo
|
from huggingface_hub import HfApi
|
||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
from tqdm.auto import tqdm
|
from tqdm.auto import tqdm
|
||||||
|
|
||||||
@@ -355,9 +355,8 @@ def main():
|
|||||||
if repo_name is None:
|
if repo_name is None:
|
||||||
repo_name = Path(args.output_dir).absolute().name
|
repo_name = Path(args.output_dir).absolute().name
|
||||||
# Create repo and retrieve repo_id
|
# Create repo and retrieve repo_id
|
||||||
repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id
|
api = HfApi()
|
||||||
# Clone repo locally
|
repo_id = api.create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id
|
||||||
repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token)
|
|
||||||
|
|
||||||
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
|
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
|
||||||
if "step_*" not in gitignore:
|
if "step_*" not in gitignore:
|
||||||
@@ -743,8 +742,12 @@ def main():
|
|||||||
)
|
)
|
||||||
if accelerator.is_main_process:
|
if accelerator.is_main_process:
|
||||||
tokenizer.save_pretrained(args.output_dir)
|
tokenizer.save_pretrained(args.output_dir)
|
||||||
repo.push_to_hub(
|
api.upload_folder(
|
||||||
commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True
|
commit_message=f"Training in progress epoch {epoch}",
|
||||||
|
folder_path=args.output_dir,
|
||||||
|
repo_id=repo_id,
|
||||||
|
repo_type="model",
|
||||||
|
token=args.hub_token,
|
||||||
)
|
)
|
||||||
|
|
||||||
if args.checkpointing_steps == "epoch":
|
if args.checkpointing_steps == "epoch":
|
||||||
@@ -765,7 +768,13 @@ def main():
|
|||||||
if accelerator.is_main_process:
|
if accelerator.is_main_process:
|
||||||
tokenizer.save_pretrained(args.output_dir)
|
tokenizer.save_pretrained(args.output_dir)
|
||||||
if args.push_to_hub:
|
if args.push_to_hub:
|
||||||
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True)
|
api.upload_folder(
|
||||||
|
commit_message="End of training",
|
||||||
|
folder_path=args.output_dir,
|
||||||
|
repo_id=repo_id,
|
||||||
|
repo_type="model",
|
||||||
|
token=args.hub_token,
|
||||||
|
)
|
||||||
with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
|
with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
|
||||||
json.dump({"eval_bleu": eval_metric["score"]}, f)
|
json.dump({"eval_bleu": eval_metric["score"]}, f)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user