Black preview (#17217)
* Black preview * Fixup too! * Fix check copies * Use the same version as the CI * Bump black
This commit is contained in:
@@ -42,14 +42,18 @@ class ModelArguments:
|
||||
)
|
||||
encoder_model_name_or_path: str = field(
|
||||
metadata={
|
||||
"help": "The encoder model checkpoint for weights initialization."
|
||||
"Don't set if you want to train an encoder model from scratch."
|
||||
"help": (
|
||||
"The encoder model checkpoint for weights initialization."
|
||||
"Don't set if you want to train an encoder model from scratch."
|
||||
)
|
||||
},
|
||||
)
|
||||
decoder_model_name_or_path: str = field(
|
||||
metadata={
|
||||
"help": "The decoder model checkpoint for weights initialization."
|
||||
"Don't set if you want to train a decoder model from scratch."
|
||||
"help": (
|
||||
"The decoder model checkpoint for weights initialization."
|
||||
"Don't set if you want to train a decoder model from scratch."
|
||||
)
|
||||
},
|
||||
)
|
||||
encoder_config_name: Optional[str] = field(
|
||||
|
||||
@@ -175,14 +175,19 @@ class ModelArguments:
|
||||
dtype: Optional[str] = field(
|
||||
default="float32",
|
||||
metadata={
|
||||
"help": "Floating-point format in which the model weights should be initialized and trained. Choose one of `[float32, float16, bfloat16]`."
|
||||
"help": (
|
||||
"Floating-point format in which the model weights should be initialized and trained. Choose one of"
|
||||
" `[float32, float16, bfloat16]`."
|
||||
)
|
||||
},
|
||||
)
|
||||
use_auth_token: bool = field(
|
||||
default=False,
|
||||
metadata={
|
||||
"help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
|
||||
"with private models)."
|
||||
"help": (
|
||||
"Will use the token generated when running `transformers-cli login` (necessary to use this script "
|
||||
"with private models)."
|
||||
)
|
||||
},
|
||||
)
|
||||
|
||||
@@ -222,38 +227,48 @@ class DataTrainingArguments:
|
||||
max_target_length: Optional[int] = field(
|
||||
default=128,
|
||||
metadata={
|
||||
"help": "The maximum total sequence length for target text after tokenization. Sequences longer "
|
||||
"than this will be truncated, sequences shorter will be padded."
|
||||
"help": (
|
||||
"The maximum total sequence length for target text after tokenization. Sequences longer "
|
||||
"than this will be truncated, sequences shorter will be padded."
|
||||
)
|
||||
},
|
||||
)
|
||||
val_max_target_length: Optional[int] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"help": "The maximum total sequence length for validation target text after tokenization. Sequences longer "
|
||||
"than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`."
|
||||
"This argument is also used to override the `max_length` param of `model.generate`, which is used "
|
||||
"during evaluation."
|
||||
"help": (
|
||||
"The maximum total sequence length for validation target text after tokenization. Sequences longer "
|
||||
"than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`."
|
||||
"This argument is also used to override the `max_length` param of `model.generate`, which is used "
|
||||
"during evaluation."
|
||||
)
|
||||
},
|
||||
)
|
||||
max_train_samples: Optional[int] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"help": "For debugging purposes or quicker training, truncate the number of training examples to this "
|
||||
"value if set."
|
||||
"help": (
|
||||
"For debugging purposes or quicker training, truncate the number of training examples to this "
|
||||
"value if set."
|
||||
)
|
||||
},
|
||||
)
|
||||
max_eval_samples: Optional[int] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
|
||||
"value if set."
|
||||
"help": (
|
||||
"For debugging purposes or quicker training, truncate the number of evaluation examples to this "
|
||||
"value if set."
|
||||
)
|
||||
},
|
||||
)
|
||||
max_predict_samples: Optional[int] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"help": "For debugging purposes or quicker training, truncate the number of prediction examples to this "
|
||||
"value if set."
|
||||
"help": (
|
||||
"For debugging purposes or quicker training, truncate the number of prediction examples to this "
|
||||
"value if set."
|
||||
)
|
||||
},
|
||||
)
|
||||
preprocessing_num_workers: Optional[int] = field(
|
||||
@@ -266,8 +281,10 @@ class DataTrainingArguments:
|
||||
num_beams: Optional[int] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"help": "Number of beams to use for evaluation. This argument will be passed to `model.generate`, "
|
||||
"which is used during evaluation."
|
||||
"help": (
|
||||
"Number of beams to use for evaluation. This argument will be passed to `model.generate`, "
|
||||
"which is used during evaluation."
|
||||
)
|
||||
},
|
||||
)
|
||||
overwrite_cache: bool = field(
|
||||
@@ -623,7 +640,7 @@ def main():
|
||||
eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count()
|
||||
if training_args.block_size % train_batch_size > 0 or training_args.block_size % eval_batch_size > 0:
|
||||
raise ValueError(
|
||||
f"`training_args.block_size` needs to be a multiple of the global train/eval batch size."
|
||||
"`training_args.block_size` needs to be a multiple of the global train/eval batch size."
|
||||
f"Got {training_args.block_size}, {train_batch_size} and {eval_batch_size} respectively instead."
|
||||
)
|
||||
|
||||
@@ -1136,7 +1153,7 @@ def main():
|
||||
)
|
||||
|
||||
# train
|
||||
for (batch_idx, _) in enumerate(tqdm(range(steps_per_epoch), desc="Training...", position=1, leave=False)):
|
||||
for batch_idx, _ in enumerate(tqdm(range(steps_per_epoch), desc="Training...", position=1, leave=False)):
|
||||
|
||||
cur_step += 1
|
||||
batch = next(train_batches)
|
||||
@@ -1150,7 +1167,10 @@ def main():
|
||||
if training_args.logging_steps > 0 and cur_step % training_args.logging_steps == 0:
|
||||
|
||||
_train_metric = unreplicate(train_metric)
|
||||
desc = f"Epoch... ({epoch + 1}/{num_epochs} | Step: {cur_step} | Loss: {_train_metric['loss']} | Learning Rate: {_train_metric['learning_rate']} | Time per step: {time_per_step})"
|
||||
desc = (
|
||||
f"Epoch... ({epoch + 1}/{num_epochs} | Step: {cur_step} | Loss: {_train_metric['loss']} |"
|
||||
f" Learning Rate: {_train_metric['learning_rate']} | Time per step: {time_per_step})"
|
||||
)
|
||||
epochs.desc = desc
|
||||
epochs.write(desc)
|
||||
|
||||
|
||||
@@ -138,8 +138,9 @@ class ModelArguments:
|
||||
model_name_or_path: Optional[str] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"help": "The model checkpoint for weights initialization."
|
||||
"Don't set if you want to train a model from scratch."
|
||||
"help": (
|
||||
"The model checkpoint for weights initialization.Don't set if you want to train a model from scratch."
|
||||
)
|
||||
},
|
||||
)
|
||||
model_type: Optional[str] = field(
|
||||
@@ -162,14 +163,19 @@ class ModelArguments:
|
||||
dtype: Optional[str] = field(
|
||||
default="float32",
|
||||
metadata={
|
||||
"help": "Floating-point format in which the model weights should be initialized and trained. Choose one of `[float32, float16, bfloat16]`."
|
||||
"help": (
|
||||
"Floating-point format in which the model weights should be initialized and trained. Choose one of"
|
||||
" `[float32, float16, bfloat16]`."
|
||||
)
|
||||
},
|
||||
)
|
||||
use_auth_token: bool = field(
|
||||
default=False,
|
||||
metadata={
|
||||
"help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
|
||||
"with private models)."
|
||||
"help": (
|
||||
"Will use the token generated when running `transformers-cli login` (necessary to use this script "
|
||||
"with private models)."
|
||||
)
|
||||
},
|
||||
)
|
||||
|
||||
@@ -194,15 +200,19 @@ class DataTrainingArguments:
|
||||
max_train_samples: Optional[int] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"help": "For debugging purposes or quicker training, truncate the number of training examples to this "
|
||||
"value if set."
|
||||
"help": (
|
||||
"For debugging purposes or quicker training, truncate the number of training examples to this "
|
||||
"value if set."
|
||||
)
|
||||
},
|
||||
)
|
||||
max_eval_samples: Optional[int] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
|
||||
"value if set."
|
||||
"help": (
|
||||
"For debugging purposes or quicker training, truncate the number of evaluation examples to this "
|
||||
"value if set."
|
||||
)
|
||||
},
|
||||
)
|
||||
overwrite_cache: bool = field(
|
||||
@@ -217,9 +227,11 @@ class DataTrainingArguments:
|
||||
block_size: Optional[int] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"help": "Optional input sequence length after tokenization. "
|
||||
"The training dataset will be truncated in block of this size for training. "
|
||||
"Default to the model max input length for single sentence inputs (take into account special tokens)."
|
||||
"help": (
|
||||
"Optional input sequence length after tokenization. "
|
||||
"The training dataset will be truncated in block of this size for training. "
|
||||
"Default to the model max input length for single sentence inputs (take into account special tokens)."
|
||||
)
|
||||
},
|
||||
)
|
||||
overwrite_cache: bool = field(
|
||||
@@ -505,7 +517,8 @@ def main():
|
||||
# clm input could be much much longer than block_size
|
||||
if "Token indices sequence length is longer than the" in cl.out:
|
||||
tok_logger.warning(
|
||||
"^^^^^^^^^^^^^^^^ Please ignore the warning above - this long input will be chunked into smaller bits before being passed to the model."
|
||||
"^^^^^^^^^^^^^^^^ Please ignore the warning above - this long input will be chunked into smaller bits"
|
||||
" before being passed to the model."
|
||||
)
|
||||
return output
|
||||
|
||||
@@ -735,7 +748,8 @@ def main():
|
||||
write_train_metric(summary_writer, train_metrics, train_time, cur_step)
|
||||
|
||||
epochs.write(
|
||||
f"Step... ({cur_step} | Loss: {train_metric['loss'].mean()}, Learning Rate: {train_metric['learning_rate'].mean()})"
|
||||
f"Step... ({cur_step} | Loss: {train_metric['loss'].mean()}, Learning Rate:"
|
||||
f" {train_metric['learning_rate'].mean()})"
|
||||
)
|
||||
|
||||
train_metrics = []
|
||||
@@ -762,7 +776,10 @@ def main():
|
||||
eval_metrics["perplexity"] = float("inf")
|
||||
|
||||
# Print metrics and update progress bar
|
||||
desc = f"Step... ({cur_step} | Eval Loss: {eval_metrics['loss']} | Eval Perplexity: {eval_metrics['perplexity']})"
|
||||
desc = (
|
||||
f"Step... ({cur_step} | Eval Loss: {eval_metrics['loss']} | Eval Perplexity:"
|
||||
f" {eval_metrics['perplexity']})"
|
||||
)
|
||||
epochs.write(desc)
|
||||
epochs.desc = desc
|
||||
|
||||
|
||||
@@ -136,8 +136,9 @@ class ModelArguments:
|
||||
model_name_or_path: Optional[str] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"help": "The model checkpoint for weights initialization."
|
||||
"Don't set if you want to train a model from scratch."
|
||||
"help": (
|
||||
"The model checkpoint for weights initialization.Don't set if you want to train a model from scratch."
|
||||
)
|
||||
},
|
||||
)
|
||||
model_type: Optional[str] = field(
|
||||
@@ -160,14 +161,19 @@ class ModelArguments:
|
||||
dtype: Optional[str] = field(
|
||||
default="float32",
|
||||
metadata={
|
||||
"help": "Floating-point format in which the model weights should be initialized and trained. Choose one of `[float32, float16, bfloat16]`."
|
||||
"help": (
|
||||
"Floating-point format in which the model weights should be initialized and trained. Choose one of"
|
||||
" `[float32, float16, bfloat16]`."
|
||||
)
|
||||
},
|
||||
)
|
||||
use_auth_token: bool = field(
|
||||
default=False,
|
||||
metadata={
|
||||
"help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
|
||||
"with private models)."
|
||||
"help": (
|
||||
"Will use the token generated when running `transformers-cli login` (necessary to use this script "
|
||||
"with private models)."
|
||||
)
|
||||
},
|
||||
)
|
||||
|
||||
@@ -209,8 +215,10 @@ class DataTrainingArguments:
|
||||
max_seq_length: Optional[int] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"help": "The maximum total input sequence length after tokenization. Sequences longer "
|
||||
"than this will be truncated. Default to the max input length of the model."
|
||||
"help": (
|
||||
"The maximum total input sequence length after tokenization. Sequences longer "
|
||||
"than this will be truncated. Default to the max input length of the model."
|
||||
)
|
||||
},
|
||||
)
|
||||
preprocessing_num_workers: Optional[int] = field(
|
||||
@@ -223,8 +231,10 @@ class DataTrainingArguments:
|
||||
pad_to_max_length: bool = field(
|
||||
default=False,
|
||||
metadata={
|
||||
"help": "Whether to pad all samples to `max_seq_length`. "
|
||||
"If False, will pad the samples dynamically when batching to the maximum length in the batch."
|
||||
"help": (
|
||||
"Whether to pad all samples to `max_seq_length`. "
|
||||
"If False, will pad the samples dynamically when batching to the maximum length in the batch."
|
||||
)
|
||||
},
|
||||
)
|
||||
line_by_line: bool = field(
|
||||
@@ -764,7 +774,8 @@ def main():
|
||||
write_train_metric(summary_writer, train_metrics, train_time, cur_step)
|
||||
|
||||
epochs.write(
|
||||
f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']})"
|
||||
f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate:"
|
||||
f" {train_metric['learning_rate']})"
|
||||
)
|
||||
|
||||
train_metrics = []
|
||||
|
||||
@@ -135,8 +135,9 @@ class ModelArguments:
|
||||
model_name_or_path: Optional[str] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"help": "The model checkpoint for weights initialization."
|
||||
"Don't set if you want to train a model from scratch."
|
||||
"help": (
|
||||
"The model checkpoint for weights initialization.Don't set if you want to train a model from scratch."
|
||||
)
|
||||
},
|
||||
)
|
||||
model_type: Optional[str] = field(
|
||||
@@ -159,14 +160,19 @@ class ModelArguments:
|
||||
dtype: Optional[str] = field(
|
||||
default="float32",
|
||||
metadata={
|
||||
"help": "Floating-point format in which the model weights should be initialized and trained. Choose one of `[float32, float16, bfloat16]`."
|
||||
"help": (
|
||||
"Floating-point format in which the model weights should be initialized and trained. Choose one of"
|
||||
" `[float32, float16, bfloat16]`."
|
||||
)
|
||||
},
|
||||
)
|
||||
use_auth_token: bool = field(
|
||||
default=False,
|
||||
metadata={
|
||||
"help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
|
||||
"with private models)."
|
||||
"help": (
|
||||
"Will use the token generated when running `transformers-cli login` (necessary to use this script "
|
||||
"with private models)."
|
||||
)
|
||||
},
|
||||
)
|
||||
|
||||
@@ -208,7 +214,10 @@ class DataTrainingArguments:
|
||||
max_seq_length: Optional[int] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"help": "The maximum total input sequence length after tokenization and masking. Sequences longer than this will be truncated. Default to the max input length of the model."
|
||||
"help": (
|
||||
"The maximum total input sequence length after tokenization and masking. Sequences longer than this"
|
||||
" will be truncated. Default to the max input length of the model."
|
||||
)
|
||||
},
|
||||
)
|
||||
preprocessing_num_workers: Optional[int] = field(
|
||||
@@ -337,12 +346,14 @@ class FlaxDataCollatorForT5MLM:
|
||||
|
||||
if batch["input_ids"].shape[-1] != self.input_length:
|
||||
raise ValueError(
|
||||
f"`input_ids` are incorrectly preprocessed. `input_ids` length is {batch['input_ids'].shape[-1]}, but should be {self.target_length}."
|
||||
f"`input_ids` are incorrectly preprocessed. `input_ids` length is {batch['input_ids'].shape[-1]}, but"
|
||||
f" should be {self.target_length}."
|
||||
)
|
||||
|
||||
if batch["labels"].shape[-1] != self.target_length:
|
||||
raise ValueError(
|
||||
f"`labels` are incorrectly preprocessed. `labels` length is {batch['labels'].shape[-1]}, but should be {self.target_length}."
|
||||
f"`labels` are incorrectly preprocessed. `labels` length is {batch['labels'].shape[-1]}, but should be"
|
||||
f" {self.target_length}."
|
||||
)
|
||||
|
||||
# to check that tokens are correctly preprocessed, one can run `self.tokenizer.batch_decode(input_ids)` and `self.tokenizer.batch_decode(labels)` here...
|
||||
@@ -884,7 +895,8 @@ def main():
|
||||
write_train_metric(summary_writer, train_metrics, train_time, cur_step)
|
||||
|
||||
epochs.write(
|
||||
f"Step... ({cur_step} | Loss: {train_metric['loss'].mean()}, Learning Rate: {train_metric['learning_rate'].mean()})"
|
||||
f"Step... ({cur_step} | Loss: {train_metric['loss'].mean()}, Learning Rate:"
|
||||
f" {train_metric['learning_rate'].mean()})"
|
||||
)
|
||||
|
||||
train_metrics = []
|
||||
|
||||
@@ -157,14 +157,19 @@ class ModelArguments:
|
||||
use_auth_token: bool = field(
|
||||
default=False,
|
||||
metadata={
|
||||
"help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
|
||||
"with private models)."
|
||||
"help": (
|
||||
"Will use the token generated when running `transformers-cli login` (necessary to use this script "
|
||||
"with private models)."
|
||||
)
|
||||
},
|
||||
)
|
||||
dtype: Optional[str] = field(
|
||||
default="float32",
|
||||
metadata={
|
||||
"help": "Floating-point format in which the model weights should be initialized and trained. Choose one of `[float32, float16, bfloat16]`."
|
||||
"help": (
|
||||
"Floating-point format in which the model weights should be initialized and trained. Choose one of"
|
||||
" `[float32, float16, bfloat16]`."
|
||||
)
|
||||
},
|
||||
)
|
||||
|
||||
@@ -200,37 +205,46 @@ class DataTrainingArguments:
|
||||
max_seq_length: int = field(
|
||||
default=384,
|
||||
metadata={
|
||||
"help": "The maximum total input sequence length after tokenization. Sequences longer "
|
||||
"than this will be truncated, sequences shorter will be padded."
|
||||
"help": (
|
||||
"The maximum total input sequence length after tokenization. Sequences longer "
|
||||
"than this will be truncated, sequences shorter will be padded."
|
||||
)
|
||||
},
|
||||
)
|
||||
pad_to_max_length: bool = field(
|
||||
default=False,
|
||||
metadata={
|
||||
"help": "Whether to pad all samples to `max_seq_length`. "
|
||||
"If False, will pad the samples dynamically when batching to the maximum length in the batch (which can "
|
||||
"be faster on GPU but will be slower on TPU)."
|
||||
"help": (
|
||||
"Whether to pad all samples to `max_seq_length`. If False, will pad the samples dynamically when"
|
||||
" batching to the maximum length in the batch (which can be faster on GPU but will be slower on TPU)."
|
||||
)
|
||||
},
|
||||
)
|
||||
max_train_samples: Optional[int] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"help": "For debugging purposes or quicker training, truncate the number of training examples to this "
|
||||
"value if set."
|
||||
"help": (
|
||||
"For debugging purposes or quicker training, truncate the number of training examples to this "
|
||||
"value if set."
|
||||
)
|
||||
},
|
||||
)
|
||||
max_eval_samples: Optional[int] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
|
||||
"value if set."
|
||||
"help": (
|
||||
"For debugging purposes or quicker training, truncate the number of evaluation examples to this "
|
||||
"value if set."
|
||||
)
|
||||
},
|
||||
)
|
||||
max_predict_samples: Optional[int] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"help": "For debugging purposes or quicker training, truncate the number of prediction examples to this "
|
||||
"value if set."
|
||||
"help": (
|
||||
"For debugging purposes or quicker training, truncate the number of prediction examples to this "
|
||||
"value if set."
|
||||
)
|
||||
},
|
||||
)
|
||||
version_2_with_negative: bool = field(
|
||||
@@ -239,9 +253,11 @@ class DataTrainingArguments:
|
||||
null_score_diff_threshold: float = field(
|
||||
default=0.0,
|
||||
metadata={
|
||||
"help": "The threshold used to select the null answer: if the best answer has a score that is less than "
|
||||
"the score of the null answer minus this threshold, the null answer is selected for this example. "
|
||||
"Only useful when `version_2_with_negative=True`."
|
||||
"help": (
|
||||
"The threshold used to select the null answer: if the best answer has a score that is less than "
|
||||
"the score of the null answer minus this threshold, the null answer is selected for this example. "
|
||||
"Only useful when `version_2_with_negative=True`."
|
||||
)
|
||||
},
|
||||
)
|
||||
doc_stride: int = field(
|
||||
@@ -255,8 +271,10 @@ class DataTrainingArguments:
|
||||
max_answer_length: int = field(
|
||||
default=30,
|
||||
metadata={
|
||||
"help": "The maximum length of an answer that can be generated. This is needed because the start "
|
||||
"and end predictions are not conditioned on one another."
|
||||
"help": (
|
||||
"The maximum length of an answer that can be generated. This is needed because the start "
|
||||
"and end predictions are not conditioned on one another."
|
||||
)
|
||||
},
|
||||
)
|
||||
|
||||
@@ -498,9 +516,9 @@ def main():
|
||||
# region Tokenizer check: this script requires a fast tokenizer.
|
||||
if not isinstance(tokenizer, PreTrainedTokenizerFast):
|
||||
raise ValueError(
|
||||
"This example script only works for models that have a fast tokenizer. Checkout the big table of models "
|
||||
"at https://huggingface.co/transformers/index.html#supported-frameworks to find the model types that meet this "
|
||||
"requirement"
|
||||
"This example script only works for models that have a fast tokenizer. Checkout the big table of models at"
|
||||
" https://huggingface.co/transformers/index.html#supported-frameworks to find the model types that meet"
|
||||
" this requirement"
|
||||
)
|
||||
# endregion
|
||||
|
||||
@@ -928,7 +946,8 @@ def main():
|
||||
write_train_metric(summary_writer, train_metrics, train_time, cur_step)
|
||||
|
||||
epochs.write(
|
||||
f"Step... ({cur_step}/{total_steps} | Training Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']})"
|
||||
f"Step... ({cur_step}/{total_steps} | Training Loss: {train_metric['loss']}, Learning Rate:"
|
||||
f" {train_metric['learning_rate']})"
|
||||
)
|
||||
|
||||
train_metrics = []
|
||||
|
||||
@@ -149,8 +149,9 @@ class ModelArguments:
|
||||
model_name_or_path: Optional[str] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"help": "The model checkpoint for weights initialization."
|
||||
"Don't set if you want to train a model from scratch."
|
||||
"help": (
|
||||
"The model checkpoint for weights initialization.Don't set if you want to train a model from scratch."
|
||||
)
|
||||
},
|
||||
)
|
||||
model_type: Optional[str] = field(
|
||||
@@ -173,14 +174,19 @@ class ModelArguments:
|
||||
dtype: Optional[str] = field(
|
||||
default="float32",
|
||||
metadata={
|
||||
"help": "Floating-point format in which the model weights should be initialized and trained. Choose one of `[float32, float16, bfloat16]`."
|
||||
"help": (
|
||||
"Floating-point format in which the model weights should be initialized and trained. Choose one of"
|
||||
" `[float32, float16, bfloat16]`."
|
||||
)
|
||||
},
|
||||
)
|
||||
use_auth_token: bool = field(
|
||||
default=False,
|
||||
metadata={
|
||||
"help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
|
||||
"with private models)."
|
||||
"help": (
|
||||
"Will use the token generated when running `transformers-cli login` (necessary to use this script "
|
||||
"with private models)."
|
||||
)
|
||||
},
|
||||
)
|
||||
|
||||
@@ -217,45 +223,57 @@ class DataTrainingArguments:
|
||||
max_source_length: Optional[int] = field(
|
||||
default=1024,
|
||||
metadata={
|
||||
"help": "The maximum total input sequence length after tokenization. Sequences longer "
|
||||
"than this will be truncated, sequences shorter will be padded."
|
||||
"help": (
|
||||
"The maximum total input sequence length after tokenization. Sequences longer "
|
||||
"than this will be truncated, sequences shorter will be padded."
|
||||
)
|
||||
},
|
||||
)
|
||||
max_target_length: Optional[int] = field(
|
||||
default=128,
|
||||
metadata={
|
||||
"help": "The maximum total sequence length for target text after tokenization. Sequences longer "
|
||||
"than this will be truncated, sequences shorter will be padded."
|
||||
"help": (
|
||||
"The maximum total sequence length for target text after tokenization. Sequences longer "
|
||||
"than this will be truncated, sequences shorter will be padded."
|
||||
)
|
||||
},
|
||||
)
|
||||
val_max_target_length: Optional[int] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"help": "The maximum total sequence length for validation target text after tokenization. Sequences longer "
|
||||
"than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`."
|
||||
"This argument is also used to override the `max_length` param of `model.generate`, which is used "
|
||||
"during evaluation."
|
||||
"help": (
|
||||
"The maximum total sequence length for validation target text after tokenization. Sequences longer "
|
||||
"than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`."
|
||||
"This argument is also used to override the `max_length` param of `model.generate`, which is used "
|
||||
"during evaluation."
|
||||
)
|
||||
},
|
||||
)
|
||||
max_train_samples: Optional[int] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"help": "For debugging purposes or quicker training, truncate the number of training examples to this "
|
||||
"value if set."
|
||||
"help": (
|
||||
"For debugging purposes or quicker training, truncate the number of training examples to this "
|
||||
"value if set."
|
||||
)
|
||||
},
|
||||
)
|
||||
max_eval_samples: Optional[int] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
|
||||
"value if set."
|
||||
"help": (
|
||||
"For debugging purposes or quicker training, truncate the number of evaluation examples to this "
|
||||
"value if set."
|
||||
)
|
||||
},
|
||||
)
|
||||
max_predict_samples: Optional[int] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"help": "For debugging purposes or quicker training, truncate the number of prediction examples to this "
|
||||
"value if set."
|
||||
"help": (
|
||||
"For debugging purposes or quicker training, truncate the number of prediction examples to this "
|
||||
"value if set."
|
||||
)
|
||||
},
|
||||
)
|
||||
preprocessing_num_workers: Optional[int] = field(
|
||||
@@ -271,8 +289,10 @@ class DataTrainingArguments:
|
||||
num_beams: Optional[int] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"help": "Number of beams to use for evaluation. This argument will be passed to `model.generate`, "
|
||||
"which is used during evaluation."
|
||||
"help": (
|
||||
"Number of beams to use for evaluation. This argument will be passed to `model.generate`, "
|
||||
"which is used during evaluation."
|
||||
)
|
||||
},
|
||||
)
|
||||
overwrite_cache: bool = field(
|
||||
@@ -831,7 +851,8 @@ def main():
|
||||
train_metric = unreplicate(train_metric)
|
||||
|
||||
epochs.write(
|
||||
f"Epoch... ({epoch + 1}/{num_epochs} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']})"
|
||||
f"Epoch... ({epoch + 1}/{num_epochs} | Loss: {train_metric['loss']}, Learning Rate:"
|
||||
f" {train_metric['learning_rate']})"
|
||||
)
|
||||
|
||||
# ======================== Evaluating ==============================
|
||||
|
||||
@@ -103,8 +103,10 @@ class ModelArguments:
|
||||
use_auth_token: bool = field(
|
||||
default=False,
|
||||
metadata={
|
||||
"help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
|
||||
"with private models)."
|
||||
"help": (
|
||||
"Will use the token generated when running `transformers-cli login` (necessary to use this script "
|
||||
"with private models)."
|
||||
)
|
||||
},
|
||||
)
|
||||
|
||||
@@ -148,29 +150,37 @@ class DataTrainingArguments:
|
||||
max_seq_length: int = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"help": "The maximum total input sequence length after tokenization. If set, sequences longer "
|
||||
"than this will be truncated, sequences shorter will be padded."
|
||||
"help": (
|
||||
"The maximum total input sequence length after tokenization. If set, sequences longer "
|
||||
"than this will be truncated, sequences shorter will be padded."
|
||||
)
|
||||
},
|
||||
)
|
||||
max_train_samples: Optional[int] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"help": "For debugging purposes or quicker training, truncate the number of training examples to this "
|
||||
"value if set."
|
||||
"help": (
|
||||
"For debugging purposes or quicker training, truncate the number of training examples to this "
|
||||
"value if set."
|
||||
)
|
||||
},
|
||||
)
|
||||
max_eval_samples: Optional[int] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
|
||||
"value if set."
|
||||
"help": (
|
||||
"For debugging purposes or quicker training, truncate the number of evaluation examples to this "
|
||||
"value if set."
|
||||
)
|
||||
},
|
||||
)
|
||||
max_predict_samples: Optional[int] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"help": "For debugging purposes or quicker training, truncate the number of prediction examples to this "
|
||||
"value if set."
|
||||
"help": (
|
||||
"For debugging purposes or quicker training, truncate the number of prediction examples to this "
|
||||
"value if set."
|
||||
)
|
||||
},
|
||||
)
|
||||
|
||||
@@ -585,7 +595,8 @@ def main():
|
||||
write_train_metric(summary_writer, train_metrics, train_time, cur_step)
|
||||
|
||||
epochs.write(
|
||||
f"Step... ({cur_step}/{total_steps} | Training Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']})"
|
||||
f"Step... ({cur_step}/{total_steps} | Training Loss: {train_metric['loss']}, Learning Rate:"
|
||||
f" {train_metric['learning_rate']})"
|
||||
)
|
||||
|
||||
train_metrics = []
|
||||
|
||||
@@ -150,8 +150,10 @@ class ModelArguments:
|
||||
use_auth_token: bool = field(
|
||||
default=False,
|
||||
metadata={
|
||||
"help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
|
||||
"with private models)."
|
||||
"help": (
|
||||
"Will use the token generated when running `transformers-cli login` (necessary to use this script "
|
||||
"with private models)."
|
||||
)
|
||||
},
|
||||
)
|
||||
|
||||
@@ -196,36 +198,46 @@ class DataTrainingArguments:
|
||||
max_seq_length: int = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"help": "The maximum total input sequence length after tokenization. If set, sequences longer "
|
||||
"than this will be truncated, sequences shorter will be padded."
|
||||
"help": (
|
||||
"The maximum total input sequence length after tokenization. If set, sequences longer "
|
||||
"than this will be truncated, sequences shorter will be padded."
|
||||
)
|
||||
},
|
||||
)
|
||||
max_train_samples: Optional[int] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"help": "For debugging purposes or quicker training, truncate the number of training examples to this "
|
||||
"value if set."
|
||||
"help": (
|
||||
"For debugging purposes or quicker training, truncate the number of training examples to this "
|
||||
"value if set."
|
||||
)
|
||||
},
|
||||
)
|
||||
max_eval_samples: Optional[int] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
|
||||
"value if set."
|
||||
"help": (
|
||||
"For debugging purposes or quicker training, truncate the number of evaluation examples to this "
|
||||
"value if set."
|
||||
)
|
||||
},
|
||||
)
|
||||
max_predict_samples: Optional[int] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"help": "For debugging purposes or quicker training, truncate the number of prediction examples to this "
|
||||
"value if set."
|
||||
"help": (
|
||||
"For debugging purposes or quicker training, truncate the number of prediction examples to this "
|
||||
"value if set."
|
||||
)
|
||||
},
|
||||
)
|
||||
label_all_tokens: bool = field(
|
||||
default=False,
|
||||
metadata={
|
||||
"help": "Whether to put the label for one word on all tokens of generated by that word or just on the "
|
||||
"one (in which case the other tokens will have a padding index)."
|
||||
"help": (
|
||||
"Whether to put the label for one word on all tokens of generated by that word or just on the "
|
||||
"one (in which case the other tokens will have a padding index)."
|
||||
)
|
||||
},
|
||||
)
|
||||
return_entity_level_metrics: bool = field(
|
||||
@@ -693,7 +705,8 @@ def main():
|
||||
write_train_metric(summary_writer, train_metrics, train_time, cur_step)
|
||||
|
||||
epochs.write(
|
||||
f"Step... ({cur_step}/{total_steps} | Training Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']})"
|
||||
f"Step... ({cur_step}/{total_steps} | Training Loss: {train_metric['loss']}, Learning Rate:"
|
||||
f" {train_metric['learning_rate']})"
|
||||
)
|
||||
|
||||
train_metrics = []
|
||||
@@ -744,7 +757,8 @@ def main():
|
||||
logger.info(f"Step... ({cur_step}/{total_steps} | Validation metrics: {eval_metrics}")
|
||||
else:
|
||||
logger.info(
|
||||
f"Step... ({cur_step}/{total_steps} | Validation f1: {eval_metrics['f1']}, Validation Acc: {eval_metrics['accuracy']})"
|
||||
f"Step... ({cur_step}/{total_steps} | Validation f1: {eval_metrics['f1']}, Validation Acc:"
|
||||
f" {eval_metrics['accuracy']})"
|
||||
)
|
||||
|
||||
if has_tensorboard and jax.process_index() == 0:
|
||||
|
||||
@@ -134,8 +134,9 @@ class ModelArguments:
|
||||
model_name_or_path: Optional[str] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"help": "The model checkpoint for weights initialization."
|
||||
"Don't set if you want to train a model from scratch."
|
||||
"help": (
|
||||
"The model checkpoint for weights initialization.Don't set if you want to train a model from scratch."
|
||||
)
|
||||
},
|
||||
)
|
||||
model_type: Optional[str] = field(
|
||||
@@ -151,14 +152,19 @@ class ModelArguments:
|
||||
dtype: Optional[str] = field(
|
||||
default="float32",
|
||||
metadata={
|
||||
"help": "Floating-point format in which the model weights should be initialized and trained. Choose one of `[float32, float16, bfloat16]`."
|
||||
"help": (
|
||||
"Floating-point format in which the model weights should be initialized and trained. Choose one of"
|
||||
" `[float32, float16, bfloat16]`."
|
||||
)
|
||||
},
|
||||
)
|
||||
use_auth_token: bool = field(
|
||||
default=False,
|
||||
metadata={
|
||||
"help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
|
||||
"with private models)."
|
||||
"help": (
|
||||
"Will use the token generated when running `transformers-cli login` (necessary to use this script "
|
||||
"with private models)."
|
||||
)
|
||||
},
|
||||
)
|
||||
|
||||
@@ -179,15 +185,19 @@ class DataTrainingArguments:
|
||||
max_train_samples: Optional[int] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"help": "For debugging purposes or quicker training, truncate the number of training examples to this "
|
||||
"value if set."
|
||||
"help": (
|
||||
"For debugging purposes or quicker training, truncate the number of training examples to this "
|
||||
"value if set."
|
||||
)
|
||||
},
|
||||
)
|
||||
max_eval_samples: Optional[int] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
|
||||
"value if set."
|
||||
"help": (
|
||||
"For debugging purposes or quicker training, truncate the number of evaluation examples to this "
|
||||
"value if set."
|
||||
)
|
||||
},
|
||||
)
|
||||
overwrite_cache: bool = field(
|
||||
@@ -509,7 +519,8 @@ def main():
|
||||
|
||||
train_step_progress_bar.close()
|
||||
epochs.write(
|
||||
f"Epoch... ({epoch + 1}/{num_epochs} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']})"
|
||||
f"Epoch... ({epoch + 1}/{num_epochs} | Loss: {train_metric['loss']}, Learning Rate:"
|
||||
f" {train_metric['learning_rate']})"
|
||||
)
|
||||
|
||||
# ======================== Evaluating ==============================
|
||||
|
||||
Reference in New Issue
Block a user