From c0554776de99da6df8c8167897a46f20f6d3e840 Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Mon, 8 Jun 2020 15:31:12 +0200
Subject: [PATCH] fix PR (#4810)

---
 src/transformers/benchmark/benchmark.py       | 179 ++++++++++++------
 .../benchmark/benchmark_args_utils.py         |   6 +
 src/transformers/benchmark/benchmark_utils.py |  46 +++--
 tests/test_benchmark.py                       |  54 +++++-
 4 files changed, 215 insertions(+), 70 deletions(-)

diff --git a/src/transformers/benchmark/benchmark.py b/src/transformers/benchmark/benchmark.py
index bb86b50f8b..742d84b773 100644
--- a/src/transformers/benchmark/benchmark.py
+++ b/src/transformers/benchmark/benchmark.py
@@ -18,8 +18,8 @@
 """
 
 
-import inspect
 import logging
+import os
 import timeit
 
 from transformers import MODEL_MAPPING, MODEL_WITH_LM_HEAD_MAPPING, PretrainedConfig, is_torch_available
@@ -52,66 +52,34 @@ class PyTorchBenchmark(Benchmark):
             model.to(self.args.device)
             model.train()
 
+            # encoder-decoder has vocab size saved differently
+            vocab_size = config.vocab_size if hasattr(config, "vocab_size") else config.encoder.vocab_size
             input_ids = torch.randint(
-                model.config.vocab_size, (batch_size, sequence_length), dtype=torch.long, device=self.args.device
+                vocab_size, (batch_size, sequence_length), dtype=torch.long, device=self.args.device
             )
 
-            def compute_loss_and_backprob():
-                # TODO: Not all models call labels argument labels => this hack using the function signature should be corrected once all models have a common name for labels
-                function_argument_names = inspect.getfullargspec(model.forward).args
-                if "labels" in function_argument_names:
-                    loss = model(input_ids, labels=input_ids)[0]
-                elif "lm_labels" in function_argument_names:
-                    loss = model(input_ids, lm_labels=input_ids)[0]
-                elif "masked_lm_labels" in function_argument_names:
-                    loss = model(input_ids, masked_lm_labels=input_ids)[0]
-                else:
-                    NotImplementedError(f"{model_name} does not seem to allow training with labels")
-
+            def compute_loss_and_backprob_encoder():
+                loss = model(input_ids, labels=input_ids)[0]
                 loss.backward()
                 model.zero_grad()
 
-            if trace_memory is True:
-                if self.args.trace_memory_line_by_line or self.args.n_gpu == 0:
-                    trace = start_memory_tracing("transformers")
-                else:
-                    # clear cuda cache
-                    torch.cuda.empty_cache()
-                    torch.cuda.reset_peak_memory_stats()
+            def compute_loss_and_backprob_encoder_decoder():
+                loss = model(input_ids, decoder_input_ids=input_ids, labels=input_ids)[0]
+                loss.backward()
+                model.zero_grad()
 
-                # calculate loss and do backpropagation
-                compute_loss_and_backprob()
-
-                if self.args.trace_memory_line_by_line or self.args.n_gpu == 0:
-                    summary = stop_memory_tracing(trace)
-                    memory = summary.total
-                else:
-                    memory = Memory(torch.cuda.max_memory_reserved())
-
-                return memory
-            else:
-                # as written in https://docs.python.org/2/library/timeit.html#timeit.Timer.repeat, min should be taken rather than the average
-                runtimes = timeit.repeat(lambda: compute_loss_and_backprob(), repeat=self.args.repeat, number=10,)
-                return min(runtimes) / 10.0
-        except RuntimeError as e:
-            self.print_fn("Doesn't fit on GPU. {}".format(e))
-            return "N/A"
-
-    def inference(self, model_name, batch_size, sequence_length, trace_memory=False):
-        try:
-            config = self.config_dict[model_name]
-            model = MODEL_MAPPING[config.__class__](config)
-            model.to(self.args.device)
-            model.eval()
-
-            input_ids = torch.randint(
-                config.vocab_size, (batch_size, sequence_length), dtype=torch.long, device=self.args.device
+            _train = (
+                compute_loss_and_backprob_encoder_decoder
+                if config.is_encoder_decoder
+                else compute_loss_and_backprob_encoder
             )
+
             if trace_memory is True:
-                if self.args.trace_memory_line_by_line or self.args.n_gpu == 0:
+                if self.args.trace_memory_line_by_line:
                     trace = start_memory_tracing("transformers")
-                else:
-                    # clear cuda cache
+
+                if self.args.n_gpu > 0:
+                    # clear gpu cache
                     torch.cuda.empty_cache()
                     if hasattr(torch.cuda, "max_memory_reserved"):
                         torch.cuda.reset_peak_memory_stats()
@@ -121,12 +89,16 @@ class PyTorchBenchmark(Benchmark):
                         )
                         torch.cuda.reset_max_memory_cached()
 
-                model(input_ids)
+                # calculate loss and do backpropagation
+                _train()
 
-                if self.args.trace_memory_line_by_line or self.args.n_gpu == 0:
+                if self.args.trace_memory_line_by_line:
                     summary = stop_memory_tracing(trace)
-                    memory = summary.total
                 else:
+                    summary = None
+
+                if self.args.n_gpu > 0:
+                    # gpu
                     if hasattr(torch.cuda, "max_memory_reserved"):
                         memory = Memory(torch.cuda.max_memory_reserved())
                     else:
@@ -134,11 +106,106 @@ class PyTorchBenchmark(Benchmark):
                             "Please consider updating PyTorch to version 1.4 to get more accuracy on GPU memory usage"
                         )
                         memory = Memory(torch.cuda.max_memory_cached())
+                    memory = Memory(torch.cuda.max_memory_reserved())
+                else:
+                    # cpu
+                    try:
+                        import psutil
+                    except (ImportError):
+                        logger.warning(
+                            "Psutil not installed, we won't log CPU memory usage. "
+                            "Install psutil (pip install psutil) to use CPU memory tracing."
+                        )
+                        memory = "N/A"
+                    else:
+                        process = psutil.Process(os.getpid())
+                        memory = Memory(process.memory_info().rss)
 
-                return memory
+                return memory, summary
             else:
                 # as written in https://docs.python.org/2/library/timeit.html#timeit.Timer.repeat, min should be taken rather than the average
-                runtimes = timeit.repeat(lambda: model(input_ids), repeat=self.args.repeat, number=10,)
+                runtimes = timeit.repeat(_train, repeat=self.args.repeat, number=10,)
+                return min(runtimes) / 10.0
+        except RuntimeError as e:
+            self.print_fn("Doesn't fit on GPU. {}".format(e))
+            return "N/A"
+
+    def inference(self, model_name, batch_size, sequence_length, trace_memory=False):
+        try:
+            config = self.config_dict[model_name]
+
+            if self.args.with_lm_head:
+                model = MODEL_WITH_LM_HEAD_MAPPING[config.__class__](config)
+            else:
+                model = MODEL_MAPPING[config.__class__](config)
+
+            model.to(self.args.device)
+            model.eval()
+
+            # encoder-decoder has vocab size saved differently
+            vocab_size = config.vocab_size if hasattr(config, "vocab_size") else config.encoder.vocab_size
+
+            input_ids = torch.randint(
+                vocab_size, (batch_size, sequence_length), dtype=torch.long, device=self.args.device
+            )
+
+            def encoder_decoder_forward():
+                model(input_ids, decoder_input_ids=input_ids)
+
+            def encoder_forward():
+                model(input_ids)
+
+            _forward = encoder_decoder_forward if config.is_encoder_decoder else encoder_forward
+
+            if trace_memory is True:
+                if self.args.trace_memory_line_by_line:
+                    trace = start_memory_tracing("transformers")
+
+                if self.args.n_gpu > 0:
+                    # clear gpu cache
+                    torch.cuda.empty_cache()
+                    if hasattr(torch.cuda, "max_memory_reserved"):
+                        torch.cuda.reset_peak_memory_stats()
+                    else:
+                        logger.info(
+                            "Please consider updating PyTorch to version 1.4 to get more accuracy on GPU memory usage"
+                        )
+                        torch.cuda.reset_max_memory_cached()
+
+                _forward()
+
+                if self.args.trace_memory_line_by_line:
+                    summary = stop_memory_tracing(trace)
+                else:
+                    summary = None
+
+                if self.args.n_gpu > 0:
+                    # gpu
+                    if hasattr(torch.cuda, "max_memory_reserved"):
+                        memory = Memory(torch.cuda.max_memory_reserved())
+                    else:
+                        logger.info(
+                            "Please consider updating PyTorch to version 1.4 to get more accuracy on GPU memory usage"
+                        )
+                        memory = Memory(torch.cuda.max_memory_cached())
+                else:
+                    # cpu
+                    try:
+                        import psutil
+                    except (ImportError):
+                        logger.warning(
+                            "Psutil not installed, we won't log CPU memory usage. "
+                            "Install psutil (pip install psutil) to use CPU memory tracing."
+                        )
+                        memory = "N/A"
+                    else:
+                        process = psutil.Process(os.getpid())
+                        memory = Memory(process.memory_info().rss)
+
+                return memory, summary
+            else:
+                # as written in https://docs.python.org/2/library/timeit.html#timeit.Timer.repeat, min should be taken rather than the average
+                runtimes = timeit.repeat(_forward, repeat=self.args.repeat, number=10,)
                 return min(runtimes) / 10.0
 
         except RuntimeError as e:
diff --git a/src/transformers/benchmark/benchmark_args_utils.py b/src/transformers/benchmark/benchmark_args_utils.py
index 849f0c443e..ac76c37eb1 100644
--- a/src/transformers/benchmark/benchmark_args_utils.py
+++ b/src/transformers/benchmark/benchmark_args_utils.py
@@ -61,6 +61,12 @@ class BenchmarkArguments:
     save_to_csv: bool = field(default=False, metadata={"help": "Save result to a CSV file"})
     log_print: bool = field(default=False, metadata={"help": "Save all print statements in a log file"})
     no_env_print: bool = field(default=False, metadata={"help": "Don't print environment information"})
+    with_lm_head: bool = field(
+        default=False,
+        metadata={
+            "help": "Use model with its language model head (MODEL_WITH_LM_HEAD_MAPPING instead of MODEL_MAPPING)"
+        },
+    )
     inference_time_csv_file: str = field(
         default=f"inference_time_{round(time())}.csv",
         metadata={"help": "CSV filename used if saving time results to csv."},
diff --git a/src/transformers/benchmark/benchmark_utils.py b/src/transformers/benchmark/benchmark_utils.py
index d7ca0f3b1e..7b3c3304b1 100644
--- a/src/transformers/benchmark/benchmark_utils.py
+++ b/src/transformers/benchmark/benchmark_utils.py
@@ -36,7 +36,15 @@ logger = logging.getLogger(__name__)  # pylint: disable=invalid-name
 _is_memory_tracing_enabled = False
 
 BenchmarkOutput = namedtuple(
-    "BenchmarkOutput", ["time_inference_result", "memory_inference_result", "time_train_result", "memory_train_result"]
+    "BenchmarkOutput",
+    [
+        "time_inference_result",
+        "memory_inference_result",
+        "time_train_result",
+        "memory_train_result",
+        "inference_summary",
+        "train_summary",
+    ],
 )
 
 
@@ -401,15 +409,10 @@ class Benchmark(ABC):
     def print_fn(self):
         if self._print_fn is None:
             if self.args.log_print:
-                logging.basicConfig(
-                    level=logging.DEBUG,
-                    filename=self.args.log_filename,
-                    filemode="a+",
-                    format="%(asctime)-15s %(levelname)-8s %(message)s",
-                )
 
                 def print_and_log(*args):
-                    logging.info(*args)
+                    with open(self.args.log_filename, "a") as log_file:
+                        log_file.write(str(*args) + "\n")
                     print(*args)
 
                 self._print_fn = print_and_log
@@ -454,11 +457,15 @@ class Benchmark(ABC):
             train_result_time[model_name] = copy.deepcopy(model_dict)
             train_result_memory[model_name] = copy.deepcopy(model_dict)
 
+            inference_summary = train_summary = None
+
             for batch_size in self.args.batch_sizes:
                 for sequence_length in self.args.sequence_lengths:
                     if not self.args.no_inference:
                         if not self.args.no_memory:
-                            memory = self.inference(model_name, batch_size, sequence_length, trace_memory=True)
+                            memory, inference_summary = self.inference(
+                                model_name, batch_size, sequence_length, trace_memory=True
+                            )
                             inference_result_memory[model_name]["result"][batch_size][sequence_length] = memory
                         if not self.args.no_speed:
                             time = self.inference(model_name, batch_size, sequence_length, trace_memory=False)
@@ -466,7 +473,9 @@ class Benchmark(ABC):
 
                     if self.args.training:
                         if not self.args.no_memory:
-                            memory = self.train(model_name, batch_size, sequence_length, trace_memory=True)
+                            memory, train_summary = self.train(
+                                model_name, batch_size, sequence_length, trace_memory=True
+                            )
                             train_result_memory[model_name]["result"][batch_size][sequence_length] = memory
                         if not self.args.no_speed:
                             time = self.inference(model_name, batch_size, sequence_length, trace_memory=False)
@@ -483,6 +492,10 @@ class Benchmark(ABC):
                 self.print_results(inference_result_memory)
                 self.save_to_csv(inference_result_memory, self.args.inference_memory_csv_file)
 
+            if self.args.trace_memory_line_by_line:
+                self.print_fn("======= INFERENCE - MEMORY LINE BY LINE TRACE - SUMMARY =======")
+                self.print_memory_trace_statistics(inference_summary)
+
         if self.args.training:
             if not self.args.no_speed:
                 self.print_fn("======= TRAIN - SPEED - RESULT =======")
@@ -494,6 +507,10 @@ class Benchmark(ABC):
                 self.print_results(train_result_memory)
                 self.save_to_csv(train_result_memory, self.args.train_memory_csv_file)
 
+            if self.args.trace_memory_line_by_line:
+                self.print_fn("======= TRAIN - MEMORY LINE BY LINE TRACE - SUMMARY =======")
+                self.print_memory_trace_statistics(train_summary)
+
         if not self.args.no_env_print:
             self.print_fn("\n======== ENVIRONMENT - INFORMATION ========")
             self.print_fn(
@@ -506,7 +523,14 @@ class Benchmark(ABC):
                 for key, value in self.environment_info.items():
                     writer.writerow([key, value])
 
-        return BenchmarkOutput(inference_result_time, inference_result_memory, train_result_time, train_result_memory)
+        return BenchmarkOutput(
+            inference_result_time,
+            inference_result_memory,
+            train_result_time,
+            train_result_memory,
+            inference_summary,
+            train_summary,
+        )
 
     @property
     def environment_info(self):
diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
index c6808e2480..20e830461e 100644
--- a/tests/test_benchmark.py
+++ b/tests/test_benchmark.py
@@ -3,7 +3,7 @@ import tempfile
 import unittest
 from pathlib import Path
 
-from transformers import GPT2Config, is_torch_available
+from transformers import AutoConfig, is_torch_available
 
 from .utils import require_torch
 
@@ -45,7 +45,18 @@ class BenchmarkTest(unittest.TestCase):
 
     def test_inference_with_configs(self):
         MODEL_ID = "sshleifer/tiny-gpt2"
-        config = GPT2Config.from_pretrained(MODEL_ID)
+        config = AutoConfig.from_pretrained(MODEL_ID)
+        benchmark_args = PyTorchBenchmarkArguments(
+            models=[MODEL_ID], training=False, no_inference=False, sequence_lengths=[8], batch_sizes=[1]
+        )
+        benchmark = PyTorchBenchmark(benchmark_args, configs=[config])
+        results = benchmark.run()
+        self.check_results_dict_not_empty(results.time_inference_result)
+        self.check_results_dict_not_empty(results.memory_inference_result)
+
+    def test_inference_encoder_decoder_with_configs(self):
+        MODEL_ID = "sshleifer/tinier_bart"
+        config = AutoConfig.from_pretrained(MODEL_ID)
         benchmark_args = PyTorchBenchmarkArguments(
             models=[MODEL_ID], training=False, no_inference=False, sequence_lengths=[8], batch_sizes=[1]
         )
@@ -56,7 +67,18 @@ class BenchmarkTest(unittest.TestCase):
 
     def test_train_with_configs(self):
         MODEL_ID = "sshleifer/tiny-gpt2"
-        config = GPT2Config.from_pretrained(MODEL_ID)
+        config = AutoConfig.from_pretrained(MODEL_ID)
+        benchmark_args = PyTorchBenchmarkArguments(
+            models=[MODEL_ID], training=True, no_inference=True, sequence_lengths=[8], batch_sizes=[1]
+        )
+        benchmark = PyTorchBenchmark(benchmark_args, configs=[config])
+        results = benchmark.run()
+        self.check_results_dict_not_empty(results.time_train_result)
+        self.check_results_dict_not_empty(results.memory_train_result)
+
+    def test_train_encoder_decoder_with_configs(self):
+        MODEL_ID = "sshleifer/tinier_bart"
+        config = AutoConfig.from_pretrained(MODEL_ID)
         benchmark_args = PyTorchBenchmarkArguments(
             models=[MODEL_ID], training=True, no_inference=True, sequence_lengths=[8], batch_sizes=[1]
         )
@@ -88,3 +110,29 @@ class BenchmarkTest(unittest.TestCase):
             self.assertTrue(Path(os.path.join(tmp_dir, "inf_mem.csv")).exists())
             self.assertTrue(Path(os.path.join(tmp_dir, "train_mem.csv")).exists())
             self.assertTrue(Path(os.path.join(tmp_dir, "env.csv")).exists())
+
+    def test_trace_memory(self):
+        MODEL_ID = "sshleifer/tiny-gpt2"
+
+        def _check_summary_is_not_empty(summary):
+            self.assertTrue(hasattr(summary, "sequential"))
+            self.assertTrue(hasattr(summary, "cumulative"))
+            self.assertTrue(hasattr(summary, "current"))
+            self.assertTrue(hasattr(summary, "total"))
+
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            benchmark_args = PyTorchBenchmarkArguments(
+                models=[MODEL_ID],
+                training=True,
+                no_inference=False,
+                sequence_lengths=[8],
+                batch_sizes=[1],
+                log_filename=os.path.join(tmp_dir, "log.txt"),
+                log_print=True,
+                trace_memory_line_by_line=True,
+            )
+            benchmark = PyTorchBenchmark(benchmark_args)
+            result = benchmark.run()
+            _check_summary_is_not_empty(result.inference_summary)
+            _check_summary_is_not_empty(result.train_summary)
+            self.assertTrue(Path(os.path.join(tmp_dir, "log.txt")).exists())