Trainer - deprecate tokenizer for processing_class (#32385)
* Trainer - deprecate tokenizer for processing_class * Extend chage across Seq2Seq trainer and docs * Add tests * Update to FutureWarning and add deprecation version
This commit is contained in:
@@ -302,7 +302,7 @@ def main():
|
||||
tokenizer, data_args, model.config.decoder_start_token_id, training_args.tpu_num_cores
|
||||
),
|
||||
compute_metrics=compute_metrics_fn,
|
||||
tokenizer=tokenizer,
|
||||
processing_class=tokenizer,
|
||||
)
|
||||
|
||||
all_metrics = {}
|
||||
|
||||
@@ -394,7 +394,7 @@ def main():
|
||||
train_dataset=raw_datasets["train"] if training_args.do_train else None,
|
||||
eval_dataset=raw_datasets["eval"] if training_args.do_eval else None,
|
||||
compute_metrics=compute_metrics,
|
||||
tokenizer=feature_extractor,
|
||||
processing_class=feature_extractor,
|
||||
)
|
||||
|
||||
# Training
|
||||
|
||||
@@ -396,7 +396,7 @@ def main():
|
||||
train_dataset=dataset["train"] if training_args.do_train else None,
|
||||
eval_dataset=dataset["validation"] if training_args.do_eval else None,
|
||||
compute_metrics=compute_metrics,
|
||||
tokenizer=image_processor,
|
||||
processing_class=image_processor,
|
||||
data_collator=collate_fn,
|
||||
)
|
||||
|
||||
|
||||
@@ -364,7 +364,7 @@ def main():
|
||||
args=training_args,
|
||||
train_dataset=ds["train"] if training_args.do_train else None,
|
||||
eval_dataset=ds["validation"] if training_args.do_eval else None,
|
||||
tokenizer=image_processor,
|
||||
processing_class=image_processor,
|
||||
data_collator=collate_fn,
|
||||
)
|
||||
|
||||
|
||||
@@ -443,7 +443,7 @@ def main():
|
||||
args=training_args,
|
||||
train_dataset=ds["train"] if training_args.do_train else None,
|
||||
eval_dataset=ds["validation"] if training_args.do_eval else None,
|
||||
tokenizer=image_processor,
|
||||
processing_class=image_processor,
|
||||
data_collator=collate_fn,
|
||||
)
|
||||
|
||||
|
||||
@@ -445,7 +445,7 @@ def main():
|
||||
args=training_args,
|
||||
train_dataset=dataset["train"] if training_args.do_train else None,
|
||||
eval_dataset=dataset["validation"] if training_args.do_eval else None,
|
||||
tokenizer=image_processor,
|
||||
processing_class=image_processor,
|
||||
data_collator=collate_fn,
|
||||
compute_metrics=compute_metrics,
|
||||
)
|
||||
|
||||
@@ -586,7 +586,7 @@ def main():
|
||||
args=training_args,
|
||||
train_dataset=train_dataset if training_args.do_train else None,
|
||||
eval_dataset=eval_dataset if training_args.do_eval else None,
|
||||
tokenizer=tokenizer,
|
||||
processing_class=tokenizer,
|
||||
# Data collator will default to DataCollatorWithPadding, so we change it.
|
||||
data_collator=default_data_collator,
|
||||
compute_metrics=compute_metrics if training_args.do_eval and not is_torch_xla_available() else None,
|
||||
|
||||
@@ -793,7 +793,7 @@ def main():
|
||||
args=training_args,
|
||||
train_dataset=train_dataset if training_args.do_train else None,
|
||||
eval_dataset=eval_dataset if training_args.do_eval else None,
|
||||
tokenizer=tokenizer,
|
||||
processing_class=tokenizer,
|
||||
# Data collator will default to DataCollatorWithPadding, so we change it.
|
||||
data_collator=default_data_collator,
|
||||
compute_metrics=compute_metrics if training_args.do_eval and not is_torch_tpu_available() else None,
|
||||
|
||||
@@ -622,7 +622,7 @@ def main():
|
||||
args=training_args,
|
||||
train_dataset=train_dataset if training_args.do_train else None,
|
||||
eval_dataset=eval_dataset if training_args.do_eval else None,
|
||||
tokenizer=tokenizer,
|
||||
processing_class=tokenizer,
|
||||
data_collator=data_collator,
|
||||
compute_metrics=compute_metrics if training_args.do_eval and not is_torch_xla_available() else None,
|
||||
preprocess_logits_for_metrics=preprocess_logits_for_metrics
|
||||
|
||||
@@ -519,7 +519,7 @@ def main():
|
||||
args=training_args,
|
||||
train_dataset=train_dataset if training_args.do_train else None,
|
||||
eval_dataset=eval_dataset if training_args.do_eval else None,
|
||||
tokenizer=tokenizer,
|
||||
processing_classtokenizer=tokenizer,
|
||||
data_collator=data_collator,
|
||||
)
|
||||
|
||||
|
||||
@@ -440,7 +440,7 @@ def main():
|
||||
args=training_args,
|
||||
train_dataset=train_dataset if training_args.do_train else None,
|
||||
eval_dataset=eval_dataset if training_args.do_eval else None,
|
||||
tokenizer=tokenizer,
|
||||
processing_class=tokenizer,
|
||||
data_collator=data_collator,
|
||||
compute_metrics=compute_metrics,
|
||||
)
|
||||
|
||||
@@ -488,7 +488,7 @@ def main():
|
||||
args=training_args,
|
||||
train_dataset=dataset["train"] if training_args.do_train else None,
|
||||
eval_dataset=dataset["validation"] if training_args.do_eval else None,
|
||||
tokenizer=image_processor,
|
||||
processing_class=image_processor,
|
||||
data_collator=collate_fn,
|
||||
compute_metrics=eval_compute_metrics_fn,
|
||||
)
|
||||
|
||||
@@ -640,7 +640,7 @@ def main():
|
||||
train_dataset=train_dataset if training_args.do_train else None,
|
||||
eval_dataset=eval_dataset if training_args.do_eval else None,
|
||||
eval_examples=eval_examples if training_args.do_eval else None,
|
||||
tokenizer=tokenizer,
|
||||
processing_class=tokenizer,
|
||||
data_collator=data_collator,
|
||||
post_process_function=post_processing_function,
|
||||
compute_metrics=compute_metrics,
|
||||
|
||||
@@ -666,7 +666,7 @@ def main():
|
||||
train_dataset=train_dataset if training_args.do_train else None,
|
||||
eval_dataset=eval_dataset if training_args.do_eval else None,
|
||||
eval_examples=eval_examples if training_args.do_eval else None,
|
||||
tokenizer=tokenizer,
|
||||
processing_class=tokenizer,
|
||||
data_collator=data_collator,
|
||||
post_process_function=post_processing_function,
|
||||
compute_metrics=compute_metrics,
|
||||
|
||||
@@ -663,7 +663,7 @@ def main():
|
||||
train_dataset=train_dataset if training_args.do_train else None,
|
||||
eval_dataset=eval_dataset if training_args.do_eval else None,
|
||||
eval_examples=eval_examples if training_args.do_eval else None,
|
||||
tokenizer=tokenizer,
|
||||
processing_class=tokenizer,
|
||||
data_collator=data_collator,
|
||||
compute_metrics=compute_metrics if training_args.predict_with_generate else None,
|
||||
post_process_function=post_processing_function,
|
||||
|
||||
@@ -403,7 +403,7 @@ def main():
|
||||
train_dataset=dataset["train"] if training_args.do_train else None,
|
||||
eval_dataset=dataset["validation"] if training_args.do_eval else None,
|
||||
compute_metrics=compute_metrics,
|
||||
tokenizer=image_processor,
|
||||
processing_class=image_processor,
|
||||
data_collator=default_data_collator,
|
||||
)
|
||||
|
||||
|
||||
@@ -751,7 +751,7 @@ def main():
|
||||
compute_metrics=compute_metrics,
|
||||
train_dataset=vectorized_datasets["train"] if training_args.do_train else None,
|
||||
eval_dataset=vectorized_datasets["eval"] if training_args.do_eval else None,
|
||||
tokenizer=processor,
|
||||
processing_class=processor,
|
||||
preprocess_logits_for_metrics=preprocess_logits_for_metrics,
|
||||
)
|
||||
|
||||
|
||||
@@ -747,7 +747,7 @@ def main():
|
||||
compute_metrics=compute_metrics,
|
||||
train_dataset=vectorized_datasets["train"] if training_args.do_train else None,
|
||||
eval_dataset=vectorized_datasets["eval"] if training_args.do_eval else None,
|
||||
tokenizer=processor,
|
||||
processing_class=processor,
|
||||
)
|
||||
|
||||
# 8. Finally, we can start training
|
||||
|
||||
@@ -569,7 +569,7 @@ def main():
|
||||
args=training_args,
|
||||
train_dataset=vectorized_datasets["train"] if training_args.do_train else None,
|
||||
eval_dataset=vectorized_datasets["eval"] if training_args.do_eval else None,
|
||||
tokenizer=feature_extractor,
|
||||
processing_class=feature_extractor,
|
||||
data_collator=data_collator,
|
||||
compute_metrics=compute_metrics if training_args.predict_with_generate else None,
|
||||
)
|
||||
|
||||
@@ -677,7 +677,7 @@ def main():
|
||||
args=training_args,
|
||||
train_dataset=train_dataset if training_args.do_train else None,
|
||||
eval_dataset=eval_dataset if training_args.do_eval else None,
|
||||
tokenizer=tokenizer,
|
||||
processing_class=tokenizer,
|
||||
data_collator=data_collator,
|
||||
compute_metrics=compute_metrics if training_args.predict_with_generate else None,
|
||||
)
|
||||
|
||||
@@ -674,7 +674,7 @@ def main():
|
||||
train_dataset=train_dataset if training_args.do_train else None,
|
||||
eval_dataset=eval_dataset if training_args.do_eval else None,
|
||||
compute_metrics=compute_metrics,
|
||||
tokenizer=tokenizer,
|
||||
processing_class=tokenizer,
|
||||
data_collator=data_collator,
|
||||
)
|
||||
|
||||
|
||||
@@ -531,7 +531,7 @@ def main():
|
||||
train_dataset=train_dataset if training_args.do_train else None,
|
||||
eval_dataset=eval_dataset if training_args.do_eval else None,
|
||||
compute_metrics=compute_metrics,
|
||||
tokenizer=tokenizer,
|
||||
processing_class=tokenizer,
|
||||
data_collator=data_collator,
|
||||
)
|
||||
|
||||
|
||||
@@ -393,7 +393,7 @@ def main():
|
||||
train_dataset=train_dataset if training_args.do_train else None,
|
||||
eval_dataset=eval_dataset if training_args.do_eval else None,
|
||||
compute_metrics=compute_metrics,
|
||||
tokenizer=tokenizer,
|
||||
processing_class=tokenizer,
|
||||
data_collator=data_collator,
|
||||
)
|
||||
|
||||
|
||||
@@ -567,7 +567,7 @@ def main():
|
||||
args=training_args,
|
||||
train_dataset=train_dataset if training_args.do_train else None,
|
||||
eval_dataset=eval_dataset if training_args.do_eval else None,
|
||||
tokenizer=tokenizer,
|
||||
processing_class=tokenizer,
|
||||
data_collator=data_collator,
|
||||
compute_metrics=compute_metrics,
|
||||
)
|
||||
|
||||
@@ -597,7 +597,7 @@ def main():
|
||||
args=training_args,
|
||||
train_dataset=train_dataset if training_args.do_train else None,
|
||||
eval_dataset=eval_dataset if training_args.do_eval else None,
|
||||
tokenizer=tokenizer,
|
||||
processing_class=tokenizer,
|
||||
data_collator=data_collator,
|
||||
compute_metrics=compute_metrics if training_args.predict_with_generate else None,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user