Trainer - deprecate tokenizer for processing_class (#32385)

* Trainer - deprecate tokenizer for processing_class

* Extend chage across Seq2Seq trainer and docs

* Add tests

* Update to FutureWarning and add deprecation version
This commit is contained in:
amyeroberts
2024-10-02 14:08:46 +01:00
committed by GitHub
parent e7c8af7f33
commit b7474f211d
99 changed files with 569 additions and 442 deletions

View File

@@ -302,7 +302,7 @@ def main():
tokenizer, data_args, model.config.decoder_start_token_id, training_args.tpu_num_cores
),
compute_metrics=compute_metrics_fn,
tokenizer=tokenizer,
processing_class=tokenizer,
)
all_metrics = {}

View File

@@ -394,7 +394,7 @@ def main():
train_dataset=raw_datasets["train"] if training_args.do_train else None,
eval_dataset=raw_datasets["eval"] if training_args.do_eval else None,
compute_metrics=compute_metrics,
tokenizer=feature_extractor,
processing_class=feature_extractor,
)
# Training

View File

@@ -396,7 +396,7 @@ def main():
train_dataset=dataset["train"] if training_args.do_train else None,
eval_dataset=dataset["validation"] if training_args.do_eval else None,
compute_metrics=compute_metrics,
tokenizer=image_processor,
processing_class=image_processor,
data_collator=collate_fn,
)

View File

@@ -364,7 +364,7 @@ def main():
args=training_args,
train_dataset=ds["train"] if training_args.do_train else None,
eval_dataset=ds["validation"] if training_args.do_eval else None,
tokenizer=image_processor,
processing_class=image_processor,
data_collator=collate_fn,
)

View File

@@ -443,7 +443,7 @@ def main():
args=training_args,
train_dataset=ds["train"] if training_args.do_train else None,
eval_dataset=ds["validation"] if training_args.do_eval else None,
tokenizer=image_processor,
processing_class=image_processor,
data_collator=collate_fn,
)

View File

@@ -445,7 +445,7 @@ def main():
args=training_args,
train_dataset=dataset["train"] if training_args.do_train else None,
eval_dataset=dataset["validation"] if training_args.do_eval else None,
tokenizer=image_processor,
processing_class=image_processor,
data_collator=collate_fn,
compute_metrics=compute_metrics,
)

View File

@@ -586,7 +586,7 @@ def main():
args=training_args,
train_dataset=train_dataset if training_args.do_train else None,
eval_dataset=eval_dataset if training_args.do_eval else None,
tokenizer=tokenizer,
processing_class=tokenizer,
# Data collator will default to DataCollatorWithPadding, so we change it.
data_collator=default_data_collator,
compute_metrics=compute_metrics if training_args.do_eval and not is_torch_xla_available() else None,

View File

@@ -793,7 +793,7 @@ def main():
args=training_args,
train_dataset=train_dataset if training_args.do_train else None,
eval_dataset=eval_dataset if training_args.do_eval else None,
tokenizer=tokenizer,
processing_class=tokenizer,
# Data collator will default to DataCollatorWithPadding, so we change it.
data_collator=default_data_collator,
compute_metrics=compute_metrics if training_args.do_eval and not is_torch_tpu_available() else None,

View File

@@ -622,7 +622,7 @@ def main():
args=training_args,
train_dataset=train_dataset if training_args.do_train else None,
eval_dataset=eval_dataset if training_args.do_eval else None,
tokenizer=tokenizer,
processing_class=tokenizer,
data_collator=data_collator,
compute_metrics=compute_metrics if training_args.do_eval and not is_torch_xla_available() else None,
preprocess_logits_for_metrics=preprocess_logits_for_metrics

View File

@@ -519,7 +519,7 @@ def main():
args=training_args,
train_dataset=train_dataset if training_args.do_train else None,
eval_dataset=eval_dataset if training_args.do_eval else None,
tokenizer=tokenizer,
processing_classtokenizer=tokenizer,
data_collator=data_collator,
)

View File

@@ -440,7 +440,7 @@ def main():
args=training_args,
train_dataset=train_dataset if training_args.do_train else None,
eval_dataset=eval_dataset if training_args.do_eval else None,
tokenizer=tokenizer,
processing_class=tokenizer,
data_collator=data_collator,
compute_metrics=compute_metrics,
)

View File

@@ -488,7 +488,7 @@ def main():
args=training_args,
train_dataset=dataset["train"] if training_args.do_train else None,
eval_dataset=dataset["validation"] if training_args.do_eval else None,
tokenizer=image_processor,
processing_class=image_processor,
data_collator=collate_fn,
compute_metrics=eval_compute_metrics_fn,
)

View File

@@ -640,7 +640,7 @@ def main():
train_dataset=train_dataset if training_args.do_train else None,
eval_dataset=eval_dataset if training_args.do_eval else None,
eval_examples=eval_examples if training_args.do_eval else None,
tokenizer=tokenizer,
processing_class=tokenizer,
data_collator=data_collator,
post_process_function=post_processing_function,
compute_metrics=compute_metrics,

View File

@@ -666,7 +666,7 @@ def main():
train_dataset=train_dataset if training_args.do_train else None,
eval_dataset=eval_dataset if training_args.do_eval else None,
eval_examples=eval_examples if training_args.do_eval else None,
tokenizer=tokenizer,
processing_class=tokenizer,
data_collator=data_collator,
post_process_function=post_processing_function,
compute_metrics=compute_metrics,

View File

@@ -663,7 +663,7 @@ def main():
train_dataset=train_dataset if training_args.do_train else None,
eval_dataset=eval_dataset if training_args.do_eval else None,
eval_examples=eval_examples if training_args.do_eval else None,
tokenizer=tokenizer,
processing_class=tokenizer,
data_collator=data_collator,
compute_metrics=compute_metrics if training_args.predict_with_generate else None,
post_process_function=post_processing_function,

View File

@@ -403,7 +403,7 @@ def main():
train_dataset=dataset["train"] if training_args.do_train else None,
eval_dataset=dataset["validation"] if training_args.do_eval else None,
compute_metrics=compute_metrics,
tokenizer=image_processor,
processing_class=image_processor,
data_collator=default_data_collator,
)

View File

@@ -751,7 +751,7 @@ def main():
compute_metrics=compute_metrics,
train_dataset=vectorized_datasets["train"] if training_args.do_train else None,
eval_dataset=vectorized_datasets["eval"] if training_args.do_eval else None,
tokenizer=processor,
processing_class=processor,
preprocess_logits_for_metrics=preprocess_logits_for_metrics,
)

View File

@@ -747,7 +747,7 @@ def main():
compute_metrics=compute_metrics,
train_dataset=vectorized_datasets["train"] if training_args.do_train else None,
eval_dataset=vectorized_datasets["eval"] if training_args.do_eval else None,
tokenizer=processor,
processing_class=processor,
)
# 8. Finally, we can start training

View File

@@ -569,7 +569,7 @@ def main():
args=training_args,
train_dataset=vectorized_datasets["train"] if training_args.do_train else None,
eval_dataset=vectorized_datasets["eval"] if training_args.do_eval else None,
tokenizer=feature_extractor,
processing_class=feature_extractor,
data_collator=data_collator,
compute_metrics=compute_metrics if training_args.predict_with_generate else None,
)

View File

@@ -677,7 +677,7 @@ def main():
args=training_args,
train_dataset=train_dataset if training_args.do_train else None,
eval_dataset=eval_dataset if training_args.do_eval else None,
tokenizer=tokenizer,
processing_class=tokenizer,
data_collator=data_collator,
compute_metrics=compute_metrics if training_args.predict_with_generate else None,
)

View File

@@ -674,7 +674,7 @@ def main():
train_dataset=train_dataset if training_args.do_train else None,
eval_dataset=eval_dataset if training_args.do_eval else None,
compute_metrics=compute_metrics,
tokenizer=tokenizer,
processing_class=tokenizer,
data_collator=data_collator,
)

View File

@@ -531,7 +531,7 @@ def main():
train_dataset=train_dataset if training_args.do_train else None,
eval_dataset=eval_dataset if training_args.do_eval else None,
compute_metrics=compute_metrics,
tokenizer=tokenizer,
processing_class=tokenizer,
data_collator=data_collator,
)

View File

@@ -393,7 +393,7 @@ def main():
train_dataset=train_dataset if training_args.do_train else None,
eval_dataset=eval_dataset if training_args.do_eval else None,
compute_metrics=compute_metrics,
tokenizer=tokenizer,
processing_class=tokenizer,
data_collator=data_collator,
)

View File

@@ -567,7 +567,7 @@ def main():
args=training_args,
train_dataset=train_dataset if training_args.do_train else None,
eval_dataset=eval_dataset if training_args.do_eval else None,
tokenizer=tokenizer,
processing_class=tokenizer,
data_collator=data_collator,
compute_metrics=compute_metrics,
)

View File

@@ -597,7 +597,7 @@ def main():
args=training_args,
train_dataset=train_dataset if training_args.do_train else None,
eval_dataset=eval_dataset if training_args.do_eval else None,
tokenizer=tokenizer,
processing_class=tokenizer,
data_collator=data_collator,
compute_metrics=compute_metrics if training_args.predict_with_generate else None,
)