Replace as_target context managers by direct calls (#18325)
* Preliminary work on tokenizers * Quality + fix tests * Treat processors * Fix pad * Remove all uses of in tests, docs and examples * Replace all as_target_tokenizer * Fix tests * Fix quality * Update examples/flax/image-captioning/run_image_captioning_flax.py Co-authored-by: amyeroberts <amy@huggingface.co> * Style Co-authored-by: amyeroberts <amy@huggingface.co>
This commit is contained in:
@@ -453,9 +453,8 @@ def main():
|
||||
inputs, targets = preprocess_squad_batch(examples, question_column, context_column, answer_column)
|
||||
|
||||
model_inputs = tokenizer(inputs, max_length=max_seq_length, padding=padding, truncation=True)
|
||||
# Setup the tokenizer for targets
|
||||
with tokenizer.as_target_tokenizer():
|
||||
labels = tokenizer(targets, max_length=max_answer_length, padding=padding, truncation=True)
|
||||
# Tokenize targets with text_target=...
|
||||
labels = tokenizer(text_target=targets, max_length=max_answer_length, padding=padding, truncation=True)
|
||||
|
||||
# If we are padding here, replace all tokenizer.pad_token_id in the labels by -100 when we want to ignore
|
||||
# padding in the loss.
|
||||
@@ -479,9 +478,8 @@ def main():
|
||||
return_overflowing_tokens=True,
|
||||
return_offsets_mapping=True,
|
||||
)
|
||||
# Setup the tokenizer for targets
|
||||
with tokenizer.as_target_tokenizer():
|
||||
labels = tokenizer(targets, max_length=max_answer_length, padding=padding, truncation=True)
|
||||
# Tokenize targets with the `text_target` keyword argument
|
||||
labels = tokenizer(text_target=targets, max_length=max_answer_length, padding=padding, truncation=True)
|
||||
|
||||
# Since one example might give us several features if it has a long context, we need a map from a feature to
|
||||
# its corresponding example. This key gives us just that.
|
||||
|
||||
Reference in New Issue
Block a user