TF Examples Rewrite (#18451)

* Finished QA example

* Dodge a merge conflict

* Update text classification and LM examples

* Update NER example

* New Keras metrics WIP, fix NER example

* Update NER example

* Update MC, summarization and translation examples

* Add XLA warnings when shapes are variable

* Make sure batch_size is consistently scaled by num_replicas

* Add PushToHubCallback to all models

* Add docs links for KerasMetricCallback

* Add docs links for prepare_tf_dataset and jit_compile

* Correct inferred model names

* Don't assume the dataset has 'lang'

* Don't assume the dataset has 'lang'

* Write metrics in text classification

* Add 'framework' to TrainingArguments and TFTrainingArguments

* Export metrics in all examples and add tests

* Fix training args for Flax

* Update command line args for translation test

* make fixup

* Fix accidentally running other tests in fp16

* Remove do_train/do_eval from run_clm.py

* Remove do_train/do_eval from run_mlm.py

* Add tensorflow tests to circleci

* Fix circleci

* Update examples/tensorflow/language-modeling/run_mlm.py

Co-authored-by: Joao Gante <joaofranciscocardosogante@gmail.com>

* Update examples/tensorflow/test_tensorflow_examples.py

Co-authored-by: Joao Gante <joaofranciscocardosogante@gmail.com>

* Update examples/tensorflow/translation/run_translation.py

Co-authored-by: Joao Gante <joaofranciscocardosogante@gmail.com>

* Update examples/tensorflow/token-classification/run_ner.py

Co-authored-by: Joao Gante <joaofranciscocardosogante@gmail.com>

* Fix save path for tests

* Fix some model card kwargs

* Explain the magical -1000

* Actually enable tests this time

* Skip text classification PR until we fix shape inference

* make fixup

Co-authored-by: Joao Gante <joaofranciscocardosogante@gmail.com>
This commit is contained in:
Matt
2022-08-10 11:49:51 -04:00
committed by GitHub
parent d7e2d7b40b
commit 6eb51450fa
15 changed files with 1490 additions and 660 deletions

View File

@@ -16,6 +16,7 @@
""" Fine-tuning the library models for sequence classification."""
# You can also adapt this script on your own text classification task. Pointers for this are left as comments.
import json
import logging
import os
import sys
@@ -29,12 +30,12 @@ from datasets import load_dataset
from transformers import (
AutoConfig,
AutoTokenizer,
DataCollatorWithPadding,
DefaultDataCollator,
HfArgumentParser,
PretrainedConfig,
PushToHubCallback,
TFAutoModelForSequenceClassification,
TFTrainingArguments,
create_optimizer,
set_seed,
)
from transformers.utils import CONFIG_NAME, TF2_WEIGHTS_NAME, send_example_telemetry
@@ -383,10 +384,6 @@ def main():
datasets = datasets.map(preprocess_function, batched=True, load_from_cache_file=not data_args.overwrite_cache)
if data_args.pad_to_max_length:
data_collator = DefaultDataCollator(return_tensors="tf")
else:
data_collator = DataCollatorWithPadding(tokenizer, return_tensors="tf")
# endregion
with training_args.strategy.scope():
@@ -409,24 +406,10 @@ def main():
)
# endregion
# region Optimizer, loss and compilation
optimizer = tf.keras.optimizers.Adam(
learning_rate=training_args.learning_rate,
beta_1=training_args.adam_beta1,
beta_2=training_args.adam_beta2,
epsilon=training_args.adam_epsilon,
clipnorm=training_args.max_grad_norm,
)
if is_regression:
loss_fn = tf.keras.losses.MeanSquaredError()
metrics = []
else:
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
metrics = ["accuracy"]
model.compile(optimizer=optimizer, loss=loss_fn, metrics=metrics)
# endregion
# region Convert data to a tf.data.Dataset
dataset_options = tf.data.Options()
dataset_options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.OFF
num_replicas = training_args.strategy.num_replicas_in_sync
tf_data = dict()
max_samples = {
@@ -438,50 +421,121 @@ def main():
if key not in datasets:
tf_data[key] = None
continue
if (
(key == "train" and not training_args.do_train)
or (key == "validation" and not training_args.do_eval)
or (key == "test" and not training_args.do_predict)
):
tf_data[key] = None
continue
if key in ("train", "validation"):
assert "label" in datasets[key].features, f"Missing labels from {key} data!"
if key == "train":
shuffle = True
batch_size = training_args.per_device_train_batch_size
drop_remainder = True # Saves us worrying about scaling gradients for the last batch
batch_size = training_args.per_device_train_batch_size * num_replicas
else:
shuffle = False
batch_size = training_args.per_device_eval_batch_size
drop_remainder = False
batch_size = training_args.per_device_eval_batch_size * num_replicas
samples_limit = max_samples[key]
dataset = datasets[key]
if samples_limit is not None:
dataset = dataset.select(range(samples_limit))
data = dataset.to_tf_dataset(
columns=[col for col in dataset.column_names if col not in set(non_label_column_names + ["label"])],
# model.prepare_tf_dataset() wraps a Hugging Face dataset in a tf.data.Dataset which is ready to use in
# training. This is the recommended way to use a Hugging Face dataset when training with Keras. You can also
# use the lower-level dataset.to_tf_dataset() method, but you will have to specify things like column names
# yourself if you use this method, whereas they are automatically inferred from the model input names when
# using model.prepare_tf_dataset()
# For more info see the docs:
# https://huggingface.co/docs/transformers/main/en/main_classes/model#transformers.TFPreTrainedModel.prepare_tf_dataset
# https://huggingface.co/docs/datasets/main/en/package_reference/main_classes#datasets.Dataset.to_tf_dataset
data = model.prepare_tf_dataset(
dataset,
shuffle=shuffle,
batch_size=batch_size,
collate_fn=data_collator,
drop_remainder=drop_remainder,
# `label_cols` is needed for user-defined losses, such as in this example
label_cols="label" if "label" in dataset.column_names else None,
tokenizer=tokenizer,
)
data = data.with_options(dataset_options)
tf_data[key] = data
# endregion
# region Optimizer, loss and compilation
if training_args.do_train:
num_train_steps = len(tf_data["train"]) * training_args.num_train_epochs
if training_args.warmup_steps > 0:
num_warmup_steps = training_args.warmup_steps
elif training_args.warmup_ratio > 0:
num_warmup_steps = int(num_train_steps * training_args.warmup_ratio)
else:
num_warmup_steps = 0
optimizer, schedule = create_optimizer(
init_lr=training_args.learning_rate,
num_train_steps=num_train_steps,
num_warmup_steps=num_warmup_steps,
adam_beta1=training_args.adam_beta1,
adam_beta2=training_args.adam_beta2,
adam_epsilon=training_args.adam_epsilon,
weight_decay_rate=training_args.weight_decay,
adam_global_clipnorm=training_args.max_grad_norm,
)
else:
optimizer = None
if is_regression:
metrics = []
else:
metrics = ["accuracy"]
model.compile(optimizer=optimizer, metrics=metrics)
# endregion
# region Preparing push_to_hub and model card
push_to_hub_model_id = training_args.push_to_hub_model_id
model_name = model_args.model_name_or_path.split("/")[-1]
if not push_to_hub_model_id:
push_to_hub_model_id = f"{model_name}-finetuned-text-classification"
model_card_kwargs = {"finetuned_from": model_args.model_name_or_path, "tasks": "text-classification"}
if training_args.push_to_hub:
callbacks = [
PushToHubCallback(
output_dir=training_args.output_dir,
model_id=push_to_hub_model_id,
organization=training_args.push_to_hub_organization,
token=training_args.push_to_hub_token,
tokenizer=tokenizer,
**model_card_kwargs,
)
]
else:
callbacks = []
# endregion
# region Training and validation
if tf_data["train"] is not None:
callbacks = [SavePretrainedCallback(output_dir=training_args.output_dir)]
model.fit(
tf_data["train"],
validation_data=tf_data["validation"],
epochs=int(training_args.num_train_epochs),
callbacks=callbacks,
)
elif tf_data["validation"] is not None:
# If there's a validation dataset but no training set, just evaluate the metrics
if tf_data["validation"] is not None:
logger.info("Computing metrics on validation data...")
if is_regression:
loss = model.evaluate(tf_data["validation"])
logger.info(f"Loss: {loss:.5f}")
logger.info(f"Eval loss: {loss:.5f}")
else:
loss, accuracy = model.evaluate(tf_data["validation"])
logger.info(f"Loss: {loss:.5f}, Accuracy: {accuracy * 100:.4f}%")
logger.info(f"Eval loss: {loss:.5f}, Eval accuracy: {accuracy * 100:.4f}%")
if training_args.output_dir is not None:
output_eval_file = os.path.join(training_args.output_dir, "all_results.json")
eval_dict = {"eval_loss": loss}
if not is_regression:
eval_dict["eval_accuracy"] = accuracy
with open(output_eval_file, "w") as writer:
writer.write(json.dumps(eval_dict))
# endregion
# region Prediction
@@ -501,14 +555,9 @@ def main():
logger.info(f"Wrote predictions to {output_test_file}!")
# endregion
# region Prediction losses
# This section is outside the scope() because it's very quick to compute, but behaves badly inside it
if "test" in datasets and "label" in datasets["test"].features:
print("Computing prediction loss on test labels...")
labels = datasets["test"]["label"]
loss = float(loss_fn(labels, predictions).numpy())
print(f"Test loss: {loss:.4f}")
# endregion
if training_args.output_dir is not None and not training_args.push_to_hub:
# If we're not pushing to hub, at least save a local copy when we're done
model.save_pretrained(training_args.output_dir)
if __name__ == "__main__":