From 6f289dc97aaa1ade5f658ecdd16cc7a842505444 Mon Sep 17 00:00:00 2001
From: Julien Plu <plu.julien@gmail.com>
Date: Thu, 27 Aug 2020 14:45:34 +0200
Subject: [PATCH] Fix the TF Trainer gradient accumulation and the TF NER
 example (#6713)

* Align TF NER example over the PT one

* Fix Dataset call

* Fix gradient accumulation training

* Apply style

* Address Sylvain's comments

* Address Sylvain's comments

* Apply style
---
 examples/token-classification/run_tf_ner.py | 28 +++++++++++++++++----
 examples/token-classification/utils_ner.py  |  2 +-
 src/transformers/modeling_tf_utils.py       |  2 +-
 src/transformers/trainer_tf.py              | 17 ++++++++++---
 4 files changed, 38 insertions(+), 11 deletions(-)

diff --git a/examples/token-classification/run_tf_ner.py b/examples/token-classification/run_tf_ner.py
index 5f38d5f981..27aa48e905 100644
--- a/examples/token-classification/run_tf_ner.py
+++ b/examples/token-classification/run_tf_ner.py
@@ -18,6 +18,7 @@
 import logging
 import os
 from dataclasses import dataclass, field
+from importlib import import_module
 from typing import Dict, List, Optional, Tuple
 
 import numpy as np
@@ -32,7 +33,7 @@ from transformers import (
     TFTrainer,
     TFTrainingArguments,
 )
-from utils_ner import Split, TFNerDataset, get_labels
+from utils_ner import Split, TFTokenClassificationDataset, TokenClassificationTask
 
 
 logger = logging.getLogger(__name__)
@@ -50,6 +51,9 @@ class ModelArguments:
     config_name: Optional[str] = field(
         default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
     )
+    task_type: Optional[str] = field(
+        default="NER", metadata={"help": "Task type to fine tune in training (e.g. NER, POS, etc)"}
+    )
     tokenizer_name: Optional[str] = field(
         default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
     )
@@ -102,6 +106,17 @@ def main():
             f"Output directory ({training_args.output_dir}) already exists and is not empty. Use --overwrite_output_dir to overcome."
         )
 
+    module = import_module("tasks")
+
+    try:
+        token_classification_task_clazz = getattr(module, model_args.task_type)
+        token_classification_task: TokenClassificationTask = token_classification_task_clazz()
+    except AttributeError:
+        raise ValueError(
+            f"Task {model_args.task_type} needs to be defined as a TokenClassificationTask subclass in {module}. "
+            f"Available tasks classes are: {TokenClassificationTask.__subclasses__()}"
+        )
+
     # Setup logging
     logging.basicConfig(
         format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
@@ -117,7 +132,7 @@ def main():
     logger.info("Training/evaluation parameters %s", training_args)
 
     # Prepare Token Classification task
-    labels = get_labels(data_args.labels)
+    labels = token_classification_task.get_labels(data_args.labels)
     label_map: Dict[int, str] = {i: label for i, label in enumerate(labels)}
     num_labels = len(labels)
 
@@ -150,7 +165,8 @@ def main():
 
     # Get datasets
     train_dataset = (
-        TFNerDataset(
+        TFTokenClassificationDataset(
+            token_classification_task=token_classification_task,
             data_dir=data_args.data_dir,
             tokenizer=tokenizer,
             labels=labels,
@@ -163,7 +179,8 @@ def main():
         else None
     )
     eval_dataset = (
-        TFNerDataset(
+        TFTokenClassificationDataset(
+            token_classification_task=token_classification_task,
             data_dir=data_args.data_dir,
             tokenizer=tokenizer,
             labels=labels,
@@ -233,7 +250,8 @@ def main():
 
     # Predict
     if training_args.do_predict:
-        test_dataset = TFNerDataset(
+        test_dataset = TFTokenClassificationDataset(
+            token_classification_task=token_classification_task,
             data_dir=data_args.data_dir,
             tokenizer=tokenizer,
             labels=labels,
diff --git a/examples/token-classification/utils_ner.py b/examples/token-classification/utils_ner.py
index 616ce20550..45c422927b 100644
--- a/examples/token-classification/utils_ner.py
+++ b/examples/token-classification/utils_ner.py
@@ -276,7 +276,7 @@ if is_torch_available():
 if is_tf_available():
     import tensorflow as tf
 
-    class TFNerDataset:
+    class TFTokenClassificationDataset:
         """
         This will be superseded by a framework-agnostic approach
         soon.
diff --git a/src/transformers/modeling_tf_utils.py b/src/transformers/modeling_tf_utils.py
index c1d41beb06..47f0f30e9d 100644
--- a/src/transformers/modeling_tf_utils.py
+++ b/src/transformers/modeling_tf_utils.py
@@ -174,7 +174,7 @@ class TFTokenClassificationLoss:
         )
         # make sure only labels that are not equal to -100
         # are taken into account as loss
-        if tf.math.reduce_any(labels == -1).numpy() is True:
+        if tf.math.reduce_any(labels == -1):
             warnings.warn("Using `-1` to mask the loss for the token is deprecated. Please use `-100` instead.")
             active_loss = tf.reshape(labels, (-1,)) != -1
         else:
diff --git a/src/transformers/trainer_tf.py b/src/transformers/trainer_tf.py
index 2b8050bbf4..37510381df 100644
--- a/src/transformers/trainer_tf.py
+++ b/src/transformers/trainer_tf.py
@@ -620,13 +620,22 @@ class TFTrainer:
             self.optimizer.apply_gradients(list(zip(gradients, self.model.trainable_variables)))
         else:
             for _ in tf.range(self.args.gradient_accumulation_steps):
-                reduced_features = features[: self.args.train_batch_size / self.args.n_replicas]
-                reduced_labels = labels[: self.args.train_batch_size / self.args.n_replicas]
+                reduced_features = {
+                    k: ft[: self.args.train_batch_size // self.args.n_replicas] for k, ft in features.items()
+                }
+                reduced_labels = labels[: self.args.train_batch_size // self.args.n_replicas]
 
                 self.training_step(reduced_features, reduced_labels)
 
-                features = tf.concat(
-                    [features[self.args.train_batch_size / self.args.n_replicas :], reduced_features], axis=0
+                features = {
+                    k: tf.concat(
+                        [ft[self.args.train_batch_size // self.args.n_replicas :], reduced_features[k]], axis=0,
+                    )
+                    for k, ft in features.items()
+                }
+
+                labels = tf.concat(
+                    [labels[self.args.train_batch_size // self.args.n_replicas :], reduced_labels], axis=0
                 )
 
             gradients = self.gradient_accumulator.gradients