From c0554776de99da6df8c8167897a46f20f6d3e840 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Mon, 8 Jun 2020 15:31:12 +0200 Subject: [PATCH] fix PR (#4810) --- src/transformers/benchmark/benchmark.py | 179 ++++++++++++------ .../benchmark/benchmark_args_utils.py | 6 + src/transformers/benchmark/benchmark_utils.py | 46 +++-- tests/test_benchmark.py | 54 +++++- 4 files changed, 215 insertions(+), 70 deletions(-) diff --git a/src/transformers/benchmark/benchmark.py b/src/transformers/benchmark/benchmark.py index bb86b50f8b..742d84b773 100644 --- a/src/transformers/benchmark/benchmark.py +++ b/src/transformers/benchmark/benchmark.py @@ -18,8 +18,8 @@ """ -import inspect import logging +import os import timeit from transformers import MODEL_MAPPING, MODEL_WITH_LM_HEAD_MAPPING, PretrainedConfig, is_torch_available @@ -52,66 +52,34 @@ class PyTorchBenchmark(Benchmark): model.to(self.args.device) model.train() + # encoder-decoder has vocab size saved differently + vocab_size = config.vocab_size if hasattr(config, "vocab_size") else config.encoder.vocab_size input_ids = torch.randint( - model.config.vocab_size, (batch_size, sequence_length), dtype=torch.long, device=self.args.device + vocab_size, (batch_size, sequence_length), dtype=torch.long, device=self.args.device ) - def compute_loss_and_backprob(): - # TODO: Not all models call labels argument labels => this hack using the function signature should be corrected once all models have a common name for labels - function_argument_names = inspect.getfullargspec(model.forward).args - if "labels" in function_argument_names: - loss = model(input_ids, labels=input_ids)[0] - elif "lm_labels" in function_argument_names: - loss = model(input_ids, lm_labels=input_ids)[0] - elif "masked_lm_labels" in function_argument_names: - loss = model(input_ids, masked_lm_labels=input_ids)[0] - else: - NotImplementedError(f"{model_name} does not seem to allow training with labels") - + def compute_loss_and_backprob_encoder(): + loss = model(input_ids, labels=input_ids)[0] loss.backward() model.zero_grad() - if trace_memory is True: - if self.args.trace_memory_line_by_line or self.args.n_gpu == 0: - trace = start_memory_tracing("transformers") - else: - # clear cuda cache - torch.cuda.empty_cache() - torch.cuda.reset_peak_memory_stats() + def compute_loss_and_backprob_encoder_decoder(): + loss = model(input_ids, decoder_input_ids=input_ids, labels=input_ids)[0] + loss.backward() + model.zero_grad() - # calculate loss and do backpropagation - compute_loss_and_backprob() - - if self.args.trace_memory_line_by_line or self.args.n_gpu == 0: - summary = stop_memory_tracing(trace) - memory = summary.total - else: - memory = Memory(torch.cuda.max_memory_reserved()) - - return memory - else: - # as written in https://docs.python.org/2/library/timeit.html#timeit.Timer.repeat, min should be taken rather than the average - runtimes = timeit.repeat(lambda: compute_loss_and_backprob(), repeat=self.args.repeat, number=10,) - return min(runtimes) / 10.0 - except RuntimeError as e: - self.print_fn("Doesn't fit on GPU. {}".format(e)) - return "N/A" - - def inference(self, model_name, batch_size, sequence_length, trace_memory=False): - try: - config = self.config_dict[model_name] - model = MODEL_MAPPING[config.__class__](config) - model.to(self.args.device) - model.eval() - - input_ids = torch.randint( - config.vocab_size, (batch_size, sequence_length), dtype=torch.long, device=self.args.device + _train = ( + compute_loss_and_backprob_encoder_decoder + if config.is_encoder_decoder + else compute_loss_and_backprob_encoder ) + if trace_memory is True: - if self.args.trace_memory_line_by_line or self.args.n_gpu == 0: + if self.args.trace_memory_line_by_line: trace = start_memory_tracing("transformers") - else: - # clear cuda cache + + if self.args.n_gpu > 0: + # clear gpu cache torch.cuda.empty_cache() if hasattr(torch.cuda, "max_memory_reserved"): torch.cuda.reset_peak_memory_stats() @@ -121,12 +89,16 @@ class PyTorchBenchmark(Benchmark): ) torch.cuda.reset_max_memory_cached() - model(input_ids) + # calculate loss and do backpropagation + _train() - if self.args.trace_memory_line_by_line or self.args.n_gpu == 0: + if self.args.trace_memory_line_by_line: summary = stop_memory_tracing(trace) - memory = summary.total else: + summary = None + + if self.args.n_gpu > 0: + # gpu if hasattr(torch.cuda, "max_memory_reserved"): memory = Memory(torch.cuda.max_memory_reserved()) else: @@ -134,11 +106,106 @@ class PyTorchBenchmark(Benchmark): "Please consider updating PyTorch to version 1.4 to get more accuracy on GPU memory usage" ) memory = Memory(torch.cuda.max_memory_cached()) + memory = Memory(torch.cuda.max_memory_reserved()) + else: + # cpu + try: + import psutil + except (ImportError): + logger.warning( + "Psutil not installed, we won't log CPU memory usage. " + "Install psutil (pip install psutil) to use CPU memory tracing." + ) + memory = "N/A" + else: + process = psutil.Process(os.getpid()) + memory = Memory(process.memory_info().rss) - return memory + return memory, summary else: # as written in https://docs.python.org/2/library/timeit.html#timeit.Timer.repeat, min should be taken rather than the average - runtimes = timeit.repeat(lambda: model(input_ids), repeat=self.args.repeat, number=10,) + runtimes = timeit.repeat(_train, repeat=self.args.repeat, number=10,) + return min(runtimes) / 10.0 + except RuntimeError as e: + self.print_fn("Doesn't fit on GPU. {}".format(e)) + return "N/A" + + def inference(self, model_name, batch_size, sequence_length, trace_memory=False): + try: + config = self.config_dict[model_name] + + if self.args.with_lm_head: + model = MODEL_WITH_LM_HEAD_MAPPING[config.__class__](config) + else: + model = MODEL_MAPPING[config.__class__](config) + + model.to(self.args.device) + model.eval() + + # encoder-decoder has vocab size saved differently + vocab_size = config.vocab_size if hasattr(config, "vocab_size") else config.encoder.vocab_size + + input_ids = torch.randint( + vocab_size, (batch_size, sequence_length), dtype=torch.long, device=self.args.device + ) + + def encoder_decoder_forward(): + model(input_ids, decoder_input_ids=input_ids) + + def encoder_forward(): + model(input_ids) + + _forward = encoder_decoder_forward if config.is_encoder_decoder else encoder_forward + + if trace_memory is True: + if self.args.trace_memory_line_by_line: + trace = start_memory_tracing("transformers") + + if self.args.n_gpu > 0: + # clear gpu cache + torch.cuda.empty_cache() + if hasattr(torch.cuda, "max_memory_reserved"): + torch.cuda.reset_peak_memory_stats() + else: + logger.info( + "Please consider updating PyTorch to version 1.4 to get more accuracy on GPU memory usage" + ) + torch.cuda.reset_max_memory_cached() + + _forward() + + if self.args.trace_memory_line_by_line: + summary = stop_memory_tracing(trace) + else: + summary = None + + if self.args.n_gpu > 0: + # gpu + if hasattr(torch.cuda, "max_memory_reserved"): + memory = Memory(torch.cuda.max_memory_reserved()) + else: + logger.info( + "Please consider updating PyTorch to version 1.4 to get more accuracy on GPU memory usage" + ) + memory = Memory(torch.cuda.max_memory_cached()) + else: + # cpu + try: + import psutil + except (ImportError): + logger.warning( + "Psutil not installed, we won't log CPU memory usage. " + "Install psutil (pip install psutil) to use CPU memory tracing." + ) + memory = "N/A" + else: + process = psutil.Process(os.getpid()) + memory = Memory(process.memory_info().rss) + + return memory, summary + else: + # as written in https://docs.python.org/2/library/timeit.html#timeit.Timer.repeat, min should be taken rather than the average + runtimes = timeit.repeat(_forward, repeat=self.args.repeat, number=10,) return min(runtimes) / 10.0 except RuntimeError as e: diff --git a/src/transformers/benchmark/benchmark_args_utils.py b/src/transformers/benchmark/benchmark_args_utils.py index 849f0c443e..ac76c37eb1 100644 --- a/src/transformers/benchmark/benchmark_args_utils.py +++ b/src/transformers/benchmark/benchmark_args_utils.py @@ -61,6 +61,12 @@ class BenchmarkArguments: save_to_csv: bool = field(default=False, metadata={"help": "Save result to a CSV file"}) log_print: bool = field(default=False, metadata={"help": "Save all print statements in a log file"}) no_env_print: bool = field(default=False, metadata={"help": "Don't print environment information"}) + with_lm_head: bool = field( + default=False, + metadata={ + "help": "Use model with its language model head (MODEL_WITH_LM_HEAD_MAPPING instead of MODEL_MAPPING)" + }, + ) inference_time_csv_file: str = field( default=f"inference_time_{round(time())}.csv", metadata={"help": "CSV filename used if saving time results to csv."}, diff --git a/src/transformers/benchmark/benchmark_utils.py b/src/transformers/benchmark/benchmark_utils.py index d7ca0f3b1e..7b3c3304b1 100644 --- a/src/transformers/benchmark/benchmark_utils.py +++ b/src/transformers/benchmark/benchmark_utils.py @@ -36,7 +36,15 @@ logger = logging.getLogger(__name__) # pylint: disable=invalid-name _is_memory_tracing_enabled = False BenchmarkOutput = namedtuple( - "BenchmarkOutput", ["time_inference_result", "memory_inference_result", "time_train_result", "memory_train_result"] + "BenchmarkOutput", + [ + "time_inference_result", + "memory_inference_result", + "time_train_result", + "memory_train_result", + "inference_summary", + "train_summary", + ], ) @@ -401,15 +409,10 @@ class Benchmark(ABC): def print_fn(self): if self._print_fn is None: if self.args.log_print: - logging.basicConfig( - level=logging.DEBUG, - filename=self.args.log_filename, - filemode="a+", - format="%(asctime)-15s %(levelname)-8s %(message)s", - ) def print_and_log(*args): - logging.info(*args) + with open(self.args.log_filename, "a") as log_file: + log_file.write(str(*args) + "\n") print(*args) self._print_fn = print_and_log @@ -454,11 +457,15 @@ class Benchmark(ABC): train_result_time[model_name] = copy.deepcopy(model_dict) train_result_memory[model_name] = copy.deepcopy(model_dict) + inference_summary = train_summary = None + for batch_size in self.args.batch_sizes: for sequence_length in self.args.sequence_lengths: if not self.args.no_inference: if not self.args.no_memory: - memory = self.inference(model_name, batch_size, sequence_length, trace_memory=True) + memory, inference_summary = self.inference( + model_name, batch_size, sequence_length, trace_memory=True + ) inference_result_memory[model_name]["result"][batch_size][sequence_length] = memory if not self.args.no_speed: time = self.inference(model_name, batch_size, sequence_length, trace_memory=False) @@ -466,7 +473,9 @@ class Benchmark(ABC): if self.args.training: if not self.args.no_memory: - memory = self.train(model_name, batch_size, sequence_length, trace_memory=True) + memory, train_summary = self.train( + model_name, batch_size, sequence_length, trace_memory=True + ) train_result_memory[model_name]["result"][batch_size][sequence_length] = memory if not self.args.no_speed: time = self.inference(model_name, batch_size, sequence_length, trace_memory=False) @@ -483,6 +492,10 @@ class Benchmark(ABC): self.print_results(inference_result_memory) self.save_to_csv(inference_result_memory, self.args.inference_memory_csv_file) + if self.args.trace_memory_line_by_line: + self.print_fn("======= INFERENCE - MEMORY LINE BY LINE TRACE - SUMMARY =======") + self.print_memory_trace_statistics(inference_summary) + if self.args.training: if not self.args.no_speed: self.print_fn("======= TRAIN - SPEED - RESULT =======") @@ -494,6 +507,10 @@ class Benchmark(ABC): self.print_results(train_result_memory) self.save_to_csv(train_result_memory, self.args.train_memory_csv_file) + if self.args.trace_memory_line_by_line: + self.print_fn("======= TRAIN - MEMORY LINE BY LINE TRACE - SUMMARY =======") + self.print_memory_trace_statistics(train_summary) + if not self.args.no_env_print: self.print_fn("\n======== ENVIRONMENT - INFORMATION ========") self.print_fn( @@ -506,7 +523,14 @@ class Benchmark(ABC): for key, value in self.environment_info.items(): writer.writerow([key, value]) - return BenchmarkOutput(inference_result_time, inference_result_memory, train_result_time, train_result_memory) + return BenchmarkOutput( + inference_result_time, + inference_result_memory, + train_result_time, + train_result_memory, + inference_summary, + train_summary, + ) @property def environment_info(self): diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py index c6808e2480..20e830461e 100644 --- a/tests/test_benchmark.py +++ b/tests/test_benchmark.py @@ -3,7 +3,7 @@ import tempfile import unittest from pathlib import Path -from transformers import GPT2Config, is_torch_available +from transformers import AutoConfig, is_torch_available from .utils import require_torch @@ -45,7 +45,18 @@ class BenchmarkTest(unittest.TestCase): def test_inference_with_configs(self): MODEL_ID = "sshleifer/tiny-gpt2" - config = GPT2Config.from_pretrained(MODEL_ID) + config = AutoConfig.from_pretrained(MODEL_ID) + benchmark_args = PyTorchBenchmarkArguments( + models=[MODEL_ID], training=False, no_inference=False, sequence_lengths=[8], batch_sizes=[1] + ) + benchmark = PyTorchBenchmark(benchmark_args, configs=[config]) + results = benchmark.run() + self.check_results_dict_not_empty(results.time_inference_result) + self.check_results_dict_not_empty(results.memory_inference_result) + + def test_inference_encoder_decoder_with_configs(self): + MODEL_ID = "sshleifer/tinier_bart" + config = AutoConfig.from_pretrained(MODEL_ID) benchmark_args = PyTorchBenchmarkArguments( models=[MODEL_ID], training=False, no_inference=False, sequence_lengths=[8], batch_sizes=[1] ) @@ -56,7 +67,18 @@ class BenchmarkTest(unittest.TestCase): def test_train_with_configs(self): MODEL_ID = "sshleifer/tiny-gpt2" - config = GPT2Config.from_pretrained(MODEL_ID) + config = AutoConfig.from_pretrained(MODEL_ID) + benchmark_args = PyTorchBenchmarkArguments( + models=[MODEL_ID], training=True, no_inference=True, sequence_lengths=[8], batch_sizes=[1] + ) + benchmark = PyTorchBenchmark(benchmark_args, configs=[config]) + results = benchmark.run() + self.check_results_dict_not_empty(results.time_train_result) + self.check_results_dict_not_empty(results.memory_train_result) + + def test_train_encoder_decoder_with_configs(self): + MODEL_ID = "sshleifer/tinier_bart" + config = AutoConfig.from_pretrained(MODEL_ID) benchmark_args = PyTorchBenchmarkArguments( models=[MODEL_ID], training=True, no_inference=True, sequence_lengths=[8], batch_sizes=[1] ) @@ -88,3 +110,29 @@ class BenchmarkTest(unittest.TestCase): self.assertTrue(Path(os.path.join(tmp_dir, "inf_mem.csv")).exists()) self.assertTrue(Path(os.path.join(tmp_dir, "train_mem.csv")).exists()) self.assertTrue(Path(os.path.join(tmp_dir, "env.csv")).exists()) + + def test_trace_memory(self): + MODEL_ID = "sshleifer/tiny-gpt2" + + def _check_summary_is_not_empty(summary): + self.assertTrue(hasattr(summary, "sequential")) + self.assertTrue(hasattr(summary, "cumulative")) + self.assertTrue(hasattr(summary, "current")) + self.assertTrue(hasattr(summary, "total")) + + with tempfile.TemporaryDirectory() as tmp_dir: + benchmark_args = PyTorchBenchmarkArguments( + models=[MODEL_ID], + training=True, + no_inference=False, + sequence_lengths=[8], + batch_sizes=[1], + log_filename=os.path.join(tmp_dir, "log.txt"), + log_print=True, + trace_memory_line_by_line=True, + ) + benchmark = PyTorchBenchmark(benchmark_args) + result = benchmark.run() + _check_summary_is_not_empty(result.inference_summary) + _check_summary_is_not_empty(result.train_summary) + self.assertTrue(Path(os.path.join(tmp_dir, "log.txt")).exists())