Add an option to reduce compile() console spam (#23938)

* Add an option to reduce compile() console spam * Add annotations to the example scripts * Add notes to the quicktour docs as well * minor fix
2023-06-02 15:28:52 +01:00
parent c9cf337772
commit 167a0d8f87
23 changed files with 54 additions and 31 deletions
--- a/examples/tensorflow/contrastive-image-text/run_clip.py
+++ b/examples/tensorflow/contrastive-image-text/run_clip.py
@@ -561,6 +561,8 @@ def main():
            weight_decay_rate=training_args.weight_decay,
            adam_global_clipnorm=training_args.max_grad_norm,
        )
+        # Transformers models compute the right loss for their task by default when labels are passed, and will
+        # use this for training unless you specify your own loss function in compile().
        model.compile(optimizer=optimizer, jit_compile=training_args.xla)

        if not training_args.do_eval:
--- a/examples/tensorflow/image-classification/run_image_classification.py
+++ b/examples/tensorflow/image-classification/run_image_classification.py
@@ -497,6 +497,8 @@ def main():
                collate_fn=collate_fn,
            ).with_options(dataset_options)

+        # Transformers models compute the right loss for their task by default when labels are passed, and will
+        # use this for training unless you specify your own loss function in compile().
        model.compile(optimizer=optimizer, jit_compile=training_args.xla, metrics=["accuracy"])

        push_to_hub_model_id = training_args.push_to_hub_model_id
--- a/examples/tensorflow/language-modeling-tpu/run_mlm.py
+++ b/examples/tensorflow/language-modeling-tpu/run_mlm.py
@@ -235,8 +235,10 @@ def main(args):
            num_warmup_steps=total_train_steps // 20,
            init_lr=args.learning_rate,
            weight_decay_rate=args.weight_decay_rate,
-            # TODO Add the other Adam parameters?
        )
+
+        # Transformers models compute the right loss for their task by default when labels are passed, and will
+        # use this for training unless you specify your own loss function in compile().
        model.compile(optimizer=optimizer, metrics=["accuracy"])

    def decode_fn(example):
--- a/examples/tensorflow/language-modeling/run_clm.py
+++ b/examples/tensorflow/language-modeling/run_clm.py
@@ -537,7 +537,8 @@ def main():
            adam_global_clipnorm=training_args.max_grad_norm,
        )

-        # no user-specified loss = will use the model internal loss
+        # Transformers models compute the right loss for their task by default when labels are passed, and will
+        # use this for training unless you specify your own loss function in compile().
        model.compile(optimizer=optimizer, jit_compile=training_args.xla)
        # endregion

--- a/examples/tensorflow/language-modeling/run_mlm.py
+++ b/examples/tensorflow/language-modeling/run_mlm.py
@@ -559,8 +559,9 @@ def main():
            adam_global_clipnorm=training_args.max_grad_norm,
        )

-        # no user-specified loss = will use the model internal loss
-        model.compile(optimizer=optimizer, jit_compile=training_args.xla, run_eagerly=True)
+        # Transformers models compute the right loss for their task by default when labels are passed, and will
+        # use this for training unless you specify your own loss function in compile().
+        model.compile(optimizer=optimizer, jit_compile=training_args.xla)
        # endregion

        # region Preparing push_to_hub and model card
--- a/examples/tensorflow/multiple-choice/run_swag.py
+++ b/examples/tensorflow/multiple-choice/run_swag.py
@@ -455,6 +455,8 @@ def main():
            )
        else:
            optimizer = None
+        # Transformers models compute the right loss for their task by default when labels are passed, and will
+        # use this for training unless you specify your own loss function in compile().
        model.compile(optimizer=optimizer, metrics=["accuracy"], jit_compile=training_args.xla)
        # endregion

--- a/examples/tensorflow/question-answering/run_qa.py
+++ b/examples/tensorflow/question-answering/run_qa.py
@@ -656,7 +656,8 @@ def main():
                adam_global_clipnorm=training_args.max_grad_norm,
            )

-            # no user-specified loss = will use the model internal loss
+            # Transformers models compute the right loss for their task by default when labels are passed, and will
+            # use this for training unless you specify your own loss function in compile().
            model.compile(optimizer=optimizer, jit_compile=training_args.xla, metrics=["accuracy"])

        else:
--- a/examples/tensorflow/summarization/run_summarization.py
+++ b/examples/tensorflow/summarization/run_summarization.py
@@ -674,6 +674,8 @@ def main():
        # endregion

        # region Training
+        # Transformers models compute the right loss for their task by default when labels are passed, and will
+        # use this for training unless you specify your own loss function in compile().
        model.compile(optimizer=optimizer, jit_compile=training_args.xla)
        eval_metrics = None
        if training_args.do_train:
--- a/examples/tensorflow/text-classification/run_glue.py
+++ b/examples/tensorflow/text-classification/run_glue.py
@@ -453,6 +453,8 @@ def main():
            metrics = []
        else:
            metrics = ["accuracy"]
+        # Transformers models compute the right loss for their task by default when labels are passed, and will
+        # use this for training unless you specify your own loss function in compile().
        model.compile(optimizer=optimizer, metrics=metrics, jit_compile=training_args.xla)
        # endregion

--- a/examples/tensorflow/text-classification/run_text_classification.py
+++ b/examples/tensorflow/text-classification/run_text_classification.py
@@ -487,6 +487,8 @@ def main():
            metrics = []
        else:
            metrics = ["accuracy"]
+        # Transformers models compute the right loss for their task by default when labels are passed, and will
+        # use this for training unless you specify your own loss function in compile().
        model.compile(optimizer=optimizer, metrics=metrics)
        # endregion

--- a/examples/tensorflow/token-classification/run_ner.py
+++ b/examples/tensorflow/token-classification/run_ner.py
@@ -454,7 +454,8 @@ def main():
            weight_decay_rate=training_args.weight_decay,
            adam_global_clipnorm=training_args.max_grad_norm,
        )
-
+        # Transformers models compute the right loss for their task by default when labels are passed, and will
+        # use this for training unless you specify your own loss function in compile().
        model.compile(optimizer=optimizer, jit_compile=training_args.xla)
        # endregion

--- a/examples/tensorflow/translation/run_translation.py
+++ b/examples/tensorflow/translation/run_translation.py
@@ -643,6 +643,8 @@ def main():

        # region Training
        eval_metrics = None
+        # Transformers models compute the right loss for their task by default when labels are passed, and will
+        # use this for training unless you specify your own loss function in compile().
        model.compile(optimizer=optimizer, jit_compile=training_args.xla)

        if training_args.do_train: