Replace as_target context managers by direct calls (#18325)

* Preliminary work on tokenizers

* Quality + fix tests

* Treat processors

* Fix pad

* Remove all uses of  in tests, docs and examples

* Replace all as_target_tokenizer

* Fix tests

* Fix quality

* Update examples/flax/image-captioning/run_image_captioning_flax.py

Co-authored-by: amyeroberts <amy@huggingface.co>

* Style

Co-authored-by: amyeroberts <amy@huggingface.co>
This commit is contained in:
Sylvain Gugger
2022-07-29 08:09:09 -04:00
committed by GitHub
parent a64bcb564d
commit 986526a0e4
80 changed files with 725 additions and 550 deletions

View File

@@ -552,11 +552,14 @@ def main():
targets = captions
model_inputs = {}
# Setup the tokenizer for targets
with tokenizer.as_target_tokenizer():
labels = tokenizer(
targets, max_length=max_target_length, padding="max_length", truncation=True, return_tensors="np"
)
labels = tokenizer(
text_target=targets,
max_length=max_target_length,
padding="max_length",
truncation=True,
return_tensors="np",
)
model_inputs["labels"] = labels["input_ids"]
decoder_input_ids = shift_tokens_right_fn(
labels["input_ids"], model.config.pad_token_id, model.config.decoder_start_token_id

View File

@@ -590,10 +590,13 @@ def main():
)
# Setup the tokenizer for targets
with tokenizer.as_target_tokenizer():
labels = tokenizer(
targets, max_length=max_target_length, padding="max_length", truncation=True, return_tensors="np"
)
labels = tokenizer(
text_target=targets,
max_length=max_target_length,
padding="max_length",
truncation=True,
return_tensors="np",
)
model_inputs["labels"] = labels["input_ids"]
decoder_input_ids = shift_tokens_right_fn(