[Benchmark] Memory benchmark utils (#4198)
* improve memory benchmarking * correct typo * fix current memory * check torch memory allocated * better pytorch function * add total cached gpu memory * add total gpu required * improve torch gpu usage * update memory usage * finalize memory tracing * save intermediate benchmark class * fix conflict * improve benchmark * improve benchmark * finalize * make style * improve benchmarking * correct typo * make train function more flexible * fix csv save * better repr of bytes * better print * fix __repr__ bug * finish plot script * rename plot file * delete csv and small improvements * fix in plot * fix in plot * correct usage of timeit * remove redundant line * remove redundant line * fix bug * add hf parser tests * add versioning and platform info * make style * add gpu information * ensure backward compatibility * finish adding all tests * Update src/transformers/benchmark/benchmark_args.py Co-authored-by: Lysandre Debut <lysandre@huggingface.co> * Update src/transformers/benchmark/benchmark_args_utils.py Co-authored-by: Lysandre Debut <lysandre@huggingface.co> * delete csv files * fix isort ordering * add out of memory handling * add better train memory handling Co-authored-by: Lysandre Debut <lysandre@huggingface.co>
This commit is contained in:
committed by
GitHub
parent
ec4cdfdd05
commit
96f57c9ccb
@@ -19,19 +19,6 @@ else:
|
||||
|
||||
import logging
|
||||
|
||||
# Benchmarking
|
||||
from .benchmark_utils import (
|
||||
Frame,
|
||||
Memory,
|
||||
MemoryState,
|
||||
MemorySummary,
|
||||
MemoryTrace,
|
||||
UsedMemoryState,
|
||||
bytes_to_human_readable,
|
||||
start_memory_tracing,
|
||||
stop_memory_tracing,
|
||||
)
|
||||
|
||||
# Configurations
|
||||
from .configuration_albert import ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, AlbertConfig
|
||||
from .configuration_auto import ALL_PRETRAINED_CONFIG_ARCHIVE_MAP, CONFIG_MAPPING, AutoConfig
|
||||
@@ -358,6 +345,9 @@ if is_torch_available():
|
||||
from .data.data_collator import DefaultDataCollator, DataCollator, DataCollatorForLanguageModeling
|
||||
from .data.datasets import GlueDataset, TextDataset, LineByLineTextDataset, GlueDataTrainingArguments
|
||||
|
||||
# Benchmarks
|
||||
from .benchmark import PyTorchBenchmark, PyTorchBenchmarkArguments
|
||||
|
||||
# TensorFlow
|
||||
if is_tf_available():
|
||||
from .modeling_tf_utils import (
|
||||
|
||||
10
src/transformers/benchmark/__init__.py
Normal file
10
src/transformers/benchmark/__init__.py
Normal file
@@ -0,0 +1,10 @@
|
||||
# flake8: noqa
|
||||
# There's no way to ignore "F401 '...' imported but unused" warnings in this
|
||||
# module, but to preserve other warnings. So, don't check this module at all.
|
||||
|
||||
from ..file_utils import is_torch_available
|
||||
|
||||
|
||||
if is_torch_available():
|
||||
from .benchmark_args import PyTorchBenchmarkArguments
|
||||
from .benchmark import PyTorchBenchmark
|
||||
146
src/transformers/benchmark/benchmark.py
Normal file
146
src/transformers/benchmark/benchmark.py
Normal file
@@ -0,0 +1,146 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2018 The HuggingFace Inc. team.
|
||||
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
Benchmarking the library on inference and training in PyTorch.
|
||||
"""
|
||||
|
||||
|
||||
import inspect
|
||||
import logging
|
||||
import timeit
|
||||
|
||||
from transformers import MODEL_MAPPING, MODEL_WITH_LM_HEAD_MAPPING, PretrainedConfig, is_torch_available
|
||||
|
||||
from .benchmark_utils import Benchmark, Memory, start_memory_tracing, stop_memory_tracing
|
||||
|
||||
|
||||
if is_torch_available():
|
||||
import torch
|
||||
from .benchmark_args import PyTorchBenchmarkArguments
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class PyTorchBenchmark(Benchmark):
|
||||
|
||||
args: PyTorchBenchmarkArguments
|
||||
configs: PretrainedConfig
|
||||
framework: str = "PyTorch"
|
||||
|
||||
@property
|
||||
def framework_version(self):
|
||||
return torch.__version__
|
||||
|
||||
def train(self, model_name, batch_size, sequence_length, trace_memory=False):
|
||||
try:
|
||||
config = self.config_dict[model_name]
|
||||
model = MODEL_WITH_LM_HEAD_MAPPING[config.__class__](config)
|
||||
model.to(self.args.device)
|
||||
model.train()
|
||||
|
||||
input_ids = torch.randint(
|
||||
model.config.vocab_size, (batch_size, sequence_length), dtype=torch.long, device=self.args.device
|
||||
)
|
||||
|
||||
def compute_loss_and_backprob():
|
||||
# TODO: Not all models call labels argument labels => this hack using the function signature should be corrected once all models have a common name for labels
|
||||
function_argument_names = inspect.getfullargspec(model.forward).args
|
||||
if "labels" in function_argument_names:
|
||||
loss = model(input_ids, labels=input_ids)[0]
|
||||
elif "lm_labels" in function_argument_names:
|
||||
loss = model(input_ids, lm_labels=input_ids)[0]
|
||||
elif "masked_lm_labels" in function_argument_names:
|
||||
loss = model(input_ids, masked_lm_labels=input_ids)[0]
|
||||
else:
|
||||
NotImplementedError(f"{model_name} does not seem to allow training with labels")
|
||||
|
||||
loss.backward()
|
||||
model.zero_grad()
|
||||
|
||||
if trace_memory is True:
|
||||
if self.args.trace_memory_line_by_line or self.args.n_gpu == 0:
|
||||
trace = start_memory_tracing("transformers")
|
||||
else:
|
||||
# clear cuda cache
|
||||
torch.cuda.empty_cache()
|
||||
torch.cuda.reset_peak_memory_stats()
|
||||
|
||||
# calculate loss and do backpropagation
|
||||
compute_loss_and_backprob()
|
||||
|
||||
if self.args.trace_memory_line_by_line or self.args.n_gpu == 0:
|
||||
summary = stop_memory_tracing(trace)
|
||||
memory = summary.total
|
||||
else:
|
||||
memory = Memory(torch.cuda.max_memory_reserved())
|
||||
|
||||
return memory
|
||||
else:
|
||||
# as written in https://docs.python.org/2/library/timeit.html#timeit.Timer.repeat, min should be taken rather than the average
|
||||
runtimes = timeit.repeat(lambda: compute_loss_and_backprob(), repeat=self.args.repeat, number=10,)
|
||||
return min(runtimes) / 10.0
|
||||
except RuntimeError as e:
|
||||
self.print_fn("Doesn't fit on GPU. {}".format(e))
|
||||
return "N/A"
|
||||
|
||||
def inference(self, model_name, batch_size, sequence_length, trace_memory=False):
|
||||
try:
|
||||
config = self.config_dict[model_name]
|
||||
model = MODEL_MAPPING[config.__class__](config)
|
||||
model.to(self.args.device)
|
||||
model.eval()
|
||||
|
||||
input_ids = torch.randint(
|
||||
config.vocab_size, (batch_size, sequence_length), dtype=torch.long, device=self.args.device
|
||||
)
|
||||
if trace_memory is True:
|
||||
if self.args.trace_memory_line_by_line or self.args.n_gpu == 0:
|
||||
trace = start_memory_tracing("transformers")
|
||||
else:
|
||||
# clear cuda cache
|
||||
torch.cuda.empty_cache()
|
||||
if hasattr(torch.cuda, "max_memory_reserved"):
|
||||
torch.cuda.reset_peak_memory_stats()
|
||||
else:
|
||||
logger.info(
|
||||
"Please consider updating PyTorch to version 1.4 to get more accuracy on GPU memory usage"
|
||||
)
|
||||
torch.cuda.reset_max_memory_cached()
|
||||
|
||||
model(input_ids)
|
||||
|
||||
if self.args.trace_memory_line_by_line or self.args.n_gpu == 0:
|
||||
summary = stop_memory_tracing(trace)
|
||||
memory = summary.total
|
||||
else:
|
||||
if hasattr(torch.cuda, "max_memory_reserved"):
|
||||
memory = Memory(torch.cuda.max_memory_reserved())
|
||||
else:
|
||||
logger.info(
|
||||
"Please consider updating PyTorch to version 1.4 to get more accuracy on GPU memory usage"
|
||||
)
|
||||
memory = Memory(torch.cuda.max_memory_cached())
|
||||
|
||||
return memory
|
||||
else:
|
||||
# as written in https://docs.python.org/2/library/timeit.html#timeit.Timer.repeat, min should be taken rather than the average
|
||||
runtimes = timeit.repeat(lambda: model(input_ids), repeat=self.args.repeat, number=10,)
|
||||
return min(runtimes) / 10.0
|
||||
|
||||
except RuntimeError as e:
|
||||
self.print_fn("Doesn't fit on GPU. {}".format(e))
|
||||
return "N/A"
|
||||
78
src/transformers/benchmark/benchmark_args.py
Normal file
78
src/transformers/benchmark/benchmark_args.py
Normal file
@@ -0,0 +1,78 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2018 The HuggingFace Inc. team.
|
||||
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Tuple
|
||||
|
||||
from ..file_utils import cached_property, is_torch_available, torch_required
|
||||
from .benchmark_args_utils import BenchmarkArguments
|
||||
|
||||
|
||||
if is_torch_available():
|
||||
import torch
|
||||
|
||||
try:
|
||||
import torch_xla.core.xla_model as xm
|
||||
|
||||
_has_tpu = True
|
||||
except ImportError:
|
||||
_has_tpu = False
|
||||
|
||||
|
||||
@torch_required
|
||||
def is_tpu_available():
|
||||
return _has_tpu
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class PyTorchBenchmarkArguments(BenchmarkArguments):
|
||||
no_cuda: bool = field(default=False, metadata={"help": "Whether to run on available cuda devices"})
|
||||
torchscript: bool = field(default=False, metadata={"help": "Trace the models using torchscript"})
|
||||
fp16: bool = field(default=False, metadata={"help": "Use FP16 to accelerate inference."})
|
||||
|
||||
@cached_property
|
||||
@torch_required
|
||||
def _setup_devices(self) -> Tuple["torch.device", int]:
|
||||
logger.info("PyTorch: setting up devices")
|
||||
if self.no_cuda:
|
||||
device = torch.device("cpu")
|
||||
n_gpu = 0
|
||||
elif is_tpu_available():
|
||||
device = xm.xla_device()
|
||||
n_gpu = 0
|
||||
else:
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
n_gpu = torch.cuda.device_count()
|
||||
return device, n_gpu
|
||||
|
||||
@property
|
||||
@torch_required
|
||||
def device_idx(self) -> int:
|
||||
return torch.cuda.current_device()
|
||||
|
||||
@property
|
||||
@torch_required
|
||||
def device(self) -> "torch.device":
|
||||
return self._setup_devices[0]
|
||||
|
||||
@property
|
||||
@torch_required
|
||||
def n_gpu(self):
|
||||
return self._setup_devices[1]
|
||||
98
src/transformers/benchmark/benchmark_args_utils.py
Normal file
98
src/transformers/benchmark/benchmark_args_utils.py
Normal file
@@ -0,0 +1,98 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2018 The HuggingFace Inc. team.
|
||||
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import dataclasses
|
||||
import json
|
||||
from dataclasses import dataclass, field
|
||||
from time import time
|
||||
from typing import List
|
||||
|
||||
|
||||
def list_field(default=None, metadata=None):
|
||||
return field(default_factory=lambda: default, metadata=metadata)
|
||||
|
||||
|
||||
@dataclass
|
||||
class BenchmarkArguments:
|
||||
"""
|
||||
BenchMarkArguments are arguments we use in our benchmark scripts
|
||||
**which relate to the training loop itself**.
|
||||
|
||||
Using `HfArgumentParser` we can turn this class
|
||||
into argparse arguments to be able to specify them on
|
||||
the command line.
|
||||
"""
|
||||
|
||||
models: List[str] = list_field(
|
||||
default=[],
|
||||
metadata={
|
||||
"help": "Model checkpoints to be provided to the AutoModel classes. Leave blank to benchmark the base version of all available models"
|
||||
},
|
||||
)
|
||||
|
||||
batch_sizes: List[int] = list_field(
|
||||
default=[8], metadata={"help": "List of batch sizes for which memory and time performance will be evaluated"}
|
||||
)
|
||||
|
||||
sequence_lengths: List[int] = list_field(
|
||||
default=[8, 32, 128, 512],
|
||||
metadata={"help": "List of sequence lengths for which memory and time performance will be evaluated"},
|
||||
)
|
||||
|
||||
no_inference: bool = field(default=False, metadata={"help": "Don't benchmark inference of model"})
|
||||
training: bool = field(default=False, metadata={"help": "Benchmark training of model"})
|
||||
verbose: bool = field(default=False, metadata={"help": "Verbose memory tracing"})
|
||||
no_speed: bool = field(default=False, metadata={"help": "Don't perform speed measurments"})
|
||||
no_memory: bool = field(default=False, metadata={"help": "Don't perform memory measurments"})
|
||||
trace_memory_line_by_line: bool = field(default=False, metadata={"help": "Trace memory line by line"})
|
||||
save_to_csv: bool = field(default=False, metadata={"help": "Save result to a CSV file"})
|
||||
log_print: bool = field(default=False, metadata={"help": "Save all print statements in a log file"})
|
||||
no_env_print: bool = field(default=False, metadata={"help": "Don't print environment information"})
|
||||
inference_time_csv_file: str = field(
|
||||
default=f"inference_time_{round(time())}.csv",
|
||||
metadata={"help": "CSV filename used if saving time results to csv."},
|
||||
)
|
||||
inference_memory_csv_file: str = field(
|
||||
default=f"inference_memory_{round(time())}.csv",
|
||||
metadata={"help": "CSV filename used if saving memory results to csv."},
|
||||
)
|
||||
train_time_csv_file: str = field(
|
||||
default=f"train_time_{round(time())}.csv",
|
||||
metadata={"help": "CSV filename used if saving time results to csv for training."},
|
||||
)
|
||||
train_memory_csv_file: str = field(
|
||||
default=f"train_memory_{round(time())}.csv",
|
||||
metadata={"help": "CSV filename used if saving memory results to csv for training."},
|
||||
)
|
||||
env_info_csv_file: str = field(
|
||||
default=f"env_info_{round(time())}.csv",
|
||||
metadata={"help": "CSV filename used if saving environment information."},
|
||||
)
|
||||
log_filename: str = field(
|
||||
default=f"log_{round(time())}.csv",
|
||||
metadata={"help": "Log filename used if print statements are saved in log."},
|
||||
)
|
||||
repeat: int = field(default=3, metadata={"help": "Times an experiment will be run."})
|
||||
|
||||
def to_json_string(self):
|
||||
"""
|
||||
Serializes this instance to a JSON string.
|
||||
"""
|
||||
return json.dumps(dataclasses.asdict(self), indent=2)
|
||||
|
||||
@property
|
||||
def model_names(self):
|
||||
return self.models
|
||||
@@ -4,18 +4,28 @@ This file is adapted from the AllenNLP library at https://github.com/allenai/all
|
||||
Copyright by the AllenNLP authors.
|
||||
"""
|
||||
|
||||
import copy
|
||||
import csv
|
||||
import linecache
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
from abc import ABC, abstractmethod
|
||||
from collections import defaultdict, namedtuple
|
||||
from datetime import datetime
|
||||
from typing import Iterable, List, NamedTuple, Optional, Union
|
||||
|
||||
from .file_utils import is_tf_available, is_torch_available
|
||||
from transformers import AutoConfig, PretrainedConfig
|
||||
from transformers import __version__ as version
|
||||
|
||||
from ..file_utils import is_tf_available, is_torch_available
|
||||
from .benchmark_args_utils import BenchmarkArguments
|
||||
|
||||
|
||||
if is_torch_available():
|
||||
from torch.cuda import empty_cache as torch_empty_cache
|
||||
|
||||
if is_tf_available():
|
||||
from tensorflow.python.eager import context as tf_context
|
||||
|
||||
@@ -25,6 +35,10 @@ logger = logging.getLogger(__name__) # pylint: disable=invalid-name
|
||||
|
||||
_is_memory_tracing_enabled = False
|
||||
|
||||
BenchmarkOutput = namedtuple(
|
||||
"BenchmarkOutput", ["time_inference_result", "memory_inference_result", "time_train_result", "memory_train_result"]
|
||||
)
|
||||
|
||||
|
||||
def is_memory_tracing_enabled():
|
||||
global _is_memory_tracing_enabled
|
||||
@@ -62,14 +76,14 @@ class UsedMemoryState(NamedTuple):
|
||||
|
||||
class Memory(NamedTuple):
|
||||
""" `Memory` NamedTuple have a single field `bytes` and
|
||||
you can get a human readable string of the number of bytes by calling `__repr__`
|
||||
you can get a human readable str of the number of mega bytes by calling `__repr__`
|
||||
- `byte` (integer): number of bytes,
|
||||
"""
|
||||
|
||||
bytes: int
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return bytes_to_human_readable(self.bytes)
|
||||
return str(bytes_to_mega_bytes(self.bytes))
|
||||
|
||||
|
||||
class MemoryState(NamedTuple):
|
||||
@@ -99,6 +113,7 @@ class MemorySummary(NamedTuple):
|
||||
|
||||
sequential: List[MemoryState]
|
||||
cumulative: List[MemoryState]
|
||||
current: List[MemoryState]
|
||||
total: Memory
|
||||
|
||||
|
||||
@@ -234,10 +249,12 @@ def start_memory_tracing(
|
||||
|
||||
# Sum used memory for all GPUs
|
||||
py3nvml.nvmlInit()
|
||||
|
||||
for i in devices:
|
||||
handle = py3nvml.nvmlDeviceGetHandleByIndex(i)
|
||||
meminfo = py3nvml.nvmlDeviceGetMemoryInfo(handle)
|
||||
gpu_mem += meminfo.used
|
||||
|
||||
py3nvml.nvmlShutdown()
|
||||
|
||||
mem_state = UsedMemoryState(traced_state, cpu_mem, gpu_mem)
|
||||
@@ -295,8 +312,11 @@ def stop_memory_tracing(
|
||||
|
||||
if memory_trace is not None and len(memory_trace) > 1:
|
||||
memory_diff_trace = []
|
||||
memory_curr_trace = []
|
||||
|
||||
cumulative_memory_dict = defaultdict(lambda: [0, 0, 0])
|
||||
for (frame, cpu_mem, gpu_mem), (next_frame, next_cpu_mem, next_gpu_mem) in zip(
|
||||
|
||||
for ((frame, cpu_mem, gpu_mem), (next_frame, next_cpu_mem, next_gpu_mem),) in zip(
|
||||
memory_trace[:-1], memory_trace[1:]
|
||||
):
|
||||
cpu_mem_inc = next_cpu_mem - cpu_mem
|
||||
@@ -307,6 +327,16 @@ def stop_memory_tracing(
|
||||
frame=frame, cpu=Memory(cpu_mem_inc), gpu=Memory(gpu_mem_inc), cpu_gpu=Memory(cpu_gpu_mem_inc),
|
||||
)
|
||||
)
|
||||
|
||||
memory_curr_trace.append(
|
||||
MemoryState(
|
||||
frame=frame,
|
||||
cpu=Memory(next_cpu_mem),
|
||||
gpu=Memory(next_gpu_mem),
|
||||
cpu_gpu=Memory(next_gpu_mem + next_cpu_mem),
|
||||
)
|
||||
)
|
||||
|
||||
cumulative_memory_dict[frame][0] += cpu_mem_inc
|
||||
cumulative_memory_dict[frame][1] += gpu_mem_inc
|
||||
cumulative_memory_dict[frame][2] += cpu_gpu_mem_inc
|
||||
@@ -321,21 +351,287 @@ def stop_memory_tracing(
|
||||
for frame, (cpu_mem_inc, gpu_mem_inc, cpu_gpu_mem_inc) in cumulative_memory
|
||||
)
|
||||
|
||||
memory_curr_trace = sorted(memory_curr_trace, key=lambda x: x.cpu_gpu.bytes, reverse=True)
|
||||
|
||||
if ignore_released_memory:
|
||||
total_memory = sum(max(0, step_trace.cpu_gpu.bytes) for step_trace in memory_diff_trace)
|
||||
else:
|
||||
total_memory = sum(step_trace.cpu_gpu.bytes for step_trace in memory_diff_trace)
|
||||
|
||||
total_memory = Memory(total_memory)
|
||||
return MemorySummary(sequential=memory_diff_trace, cumulative=cumulative_memory, total=total_memory)
|
||||
|
||||
return MemorySummary(
|
||||
sequential=memory_diff_trace, cumulative=cumulative_memory, current=memory_curr_trace, total=total_memory,
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def bytes_to_human_readable(memory_amount: int) -> str:
|
||||
""" Utility to convert a number of bytes (int) in a human readable string (with units)
|
||||
def bytes_to_mega_bytes(memory_amount: int) -> int:
|
||||
""" Utility to convert a number of bytes (int) into a number of mega bytes (int)
|
||||
"""
|
||||
for unit in ["B", "KB", "MB", "GB"]:
|
||||
if memory_amount > -1024.0 and memory_amount < 1024.0:
|
||||
return "{:.3f}{}".format(memory_amount, unit)
|
||||
memory_amount /= 1024.0
|
||||
return "{:.3f}TB".format(memory_amount)
|
||||
return memory_amount >> 20
|
||||
|
||||
|
||||
class Benchmark(ABC):
|
||||
"""
|
||||
Benchmarks is a simple but feature-complete benchmarking script
|
||||
to compare memory and time performance of models in Transformers.
|
||||
"""
|
||||
|
||||
args: BenchmarkArguments
|
||||
configs: PretrainedConfig
|
||||
framework: str
|
||||
|
||||
def __init__(self, args: BenchmarkArguments = None, configs: PretrainedConfig = None):
|
||||
self.args = args
|
||||
|
||||
if configs is None:
|
||||
self.config_dict = {
|
||||
model_name: AutoConfig.from_pretrained(model_name) for model_name in self.args.model_names
|
||||
}
|
||||
else:
|
||||
self.config_dict = {model_name: config for model_name, config in zip(self.args.model_names, configs)}
|
||||
|
||||
self._print_fn = None
|
||||
self._framework_version = None
|
||||
self._environment_info = None
|
||||
|
||||
@property
|
||||
def print_fn(self):
|
||||
if self._print_fn is None:
|
||||
if self.args.log_print:
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG,
|
||||
filename=self.args.log_filename,
|
||||
filemode="a+",
|
||||
format="%(asctime)-15s %(levelname)-8s %(message)s",
|
||||
)
|
||||
|
||||
def print_and_log(*args):
|
||||
logging.info(*args)
|
||||
print(*args)
|
||||
|
||||
self._print_fn = print_and_log
|
||||
else:
|
||||
self._print_fn = print
|
||||
return self._print_fn
|
||||
|
||||
@property
|
||||
def is_gpu(self):
|
||||
return self.args.n_gpu > 0
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def framework_version(self):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def train(self, model_name, batch_size, sequence_length):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def inference(self, model_name, batch_size, sequence_length):
|
||||
pass
|
||||
|
||||
def run(self):
|
||||
result_dict = {model_name: {} for model_name in self.args.model_names}
|
||||
inference_result_time = copy.deepcopy(result_dict)
|
||||
inference_result_memory = copy.deepcopy(result_dict)
|
||||
train_result_time = copy.deepcopy(result_dict)
|
||||
train_result_memory = copy.deepcopy(result_dict)
|
||||
|
||||
for c, model_name in enumerate(self.args.model_names):
|
||||
self.print_fn(f"{c + 1} / {len(self.args.model_names)}")
|
||||
|
||||
model_dict = {
|
||||
"bs": self.args.batch_sizes,
|
||||
"ss": self.args.sequence_lengths,
|
||||
"result": {i: {} for i in self.args.batch_sizes},
|
||||
}
|
||||
inference_result_time[model_name] = copy.deepcopy(model_dict)
|
||||
inference_result_memory[model_name] = copy.deepcopy(model_dict)
|
||||
train_result_time[model_name] = copy.deepcopy(model_dict)
|
||||
train_result_memory[model_name] = copy.deepcopy(model_dict)
|
||||
|
||||
for batch_size in self.args.batch_sizes:
|
||||
for sequence_length in self.args.sequence_lengths:
|
||||
if not self.args.no_inference:
|
||||
if not self.args.no_memory:
|
||||
memory = self.inference(model_name, batch_size, sequence_length, trace_memory=True)
|
||||
inference_result_memory[model_name]["result"][batch_size][sequence_length] = memory
|
||||
if not self.args.no_speed:
|
||||
time = self.inference(model_name, batch_size, sequence_length, trace_memory=False)
|
||||
inference_result_time[model_name]["result"][batch_size][sequence_length] = time
|
||||
|
||||
if self.args.training:
|
||||
if not self.args.no_memory:
|
||||
memory = self.train(model_name, batch_size, sequence_length, trace_memory=True)
|
||||
train_result_memory[model_name]["result"][batch_size][sequence_length] = memory
|
||||
if not self.args.no_speed:
|
||||
time = self.inference(model_name, batch_size, sequence_length, trace_memory=False)
|
||||
train_result_time[model_name]["result"][batch_size][sequence_length] = time
|
||||
|
||||
if not self.args.no_inference:
|
||||
if not self.args.no_speed:
|
||||
self.print_fn("======= INFERENCE - SPEED - RESULT =======")
|
||||
self.print_results(inference_result_time)
|
||||
self.save_to_csv(inference_result_time, self.args.inference_time_csv_file)
|
||||
|
||||
if not self.args.no_memory:
|
||||
self.print_fn("======= INFERENCE - MEMORY - RESULT =======")
|
||||
self.print_results(inference_result_memory)
|
||||
self.save_to_csv(inference_result_memory, self.args.inference_memory_csv_file)
|
||||
|
||||
if self.args.training:
|
||||
if not self.args.no_speed:
|
||||
self.print_fn("======= TRAIN - SPEED - RESULT =======")
|
||||
self.print_results(train_result_time)
|
||||
self.save_to_csv(train_result_time, self.args.train_time_csv_file)
|
||||
|
||||
if not self.args.no_memory:
|
||||
self.print_fn("======= TRAIN - MEMORY - RESULT =======")
|
||||
self.print_results(train_result_memory)
|
||||
self.save_to_csv(train_result_memory, self.args.train_memory_csv_file)
|
||||
|
||||
if not self.args.no_env_print:
|
||||
self.print_fn("\n======== ENVIRONMENT - INFORMATION ========")
|
||||
self.print_fn(
|
||||
"\n".join(["- {}: {}".format(prop, val) for prop, val in self.environment_info.items()]) + "\n"
|
||||
)
|
||||
|
||||
if self.args.save_to_csv:
|
||||
with open(self.args.env_info_csv_file, mode="w", newline="") as csv_file:
|
||||
writer = csv.writer(csv_file)
|
||||
for key, value in self.environment_info.items():
|
||||
writer.writerow([key, value])
|
||||
|
||||
return BenchmarkOutput(inference_result_time, inference_result_memory, train_result_time, train_result_memory)
|
||||
|
||||
@property
|
||||
def environment_info(self):
|
||||
if self._environment_info is None:
|
||||
info = {}
|
||||
info["transformers_version"] = version
|
||||
info["framework"] = self.framework
|
||||
info["framework_version"] = self.framework_version
|
||||
info["python_version"] = platform.python_version()
|
||||
info["system"] = platform.system()
|
||||
info["cpu"] = platform.processor()
|
||||
info["architecture"] = platform.architecture()[0]
|
||||
info["date"] = datetime.date(datetime.now())
|
||||
info["time"] = datetime.time(datetime.now())
|
||||
|
||||
try:
|
||||
import psutil
|
||||
except (ImportError):
|
||||
logger.warning(
|
||||
"Psutil not installed, we won't log available CPU memory."
|
||||
"Install psutil (pip install psutil) to log available CPU memory."
|
||||
)
|
||||
info["cpu_ram_mb"] = "N/A"
|
||||
else:
|
||||
info["cpu_ram_mb"] = bytes_to_mega_bytes(psutil.virtual_memory().total)
|
||||
|
||||
info["use_gpu"] = self.is_gpu
|
||||
if self.is_gpu:
|
||||
info["num_gpus"] = self.args.n_gpu
|
||||
try:
|
||||
from py3nvml import py3nvml
|
||||
|
||||
py3nvml.nvmlInit()
|
||||
handle = py3nvml.nvmlDeviceGetHandleByIndex(self.args.device_idx)
|
||||
except ImportError:
|
||||
logger.warning(
|
||||
"py3nvml not installed, we won't log GPU memory usage. "
|
||||
"Install py3nvml (pip install py3nvml) to log information about GPU."
|
||||
)
|
||||
info["gpu"] = "N/A"
|
||||
info["gpu_ram_mb"] = "N/A"
|
||||
info["gpu_power_watts"] = "N/A"
|
||||
info["gpu_performance_state"] = "N/A"
|
||||
except (OSError, py3nvml.NVMLError):
|
||||
logger.warning(
|
||||
"Error while initializing comunication with GPU. " "We won't log information about GPU."
|
||||
)
|
||||
info["gpu"] = "N/A"
|
||||
info["gpu_ram_mb"] = "N/A"
|
||||
info["gpu_power_watts"] = "N/A"
|
||||
info["gpu_performance_state"] = "N/A"
|
||||
py3nvml.nvmlShutdown()
|
||||
else:
|
||||
info["gpu"] = py3nvml.nvmlDeviceGetName(handle)
|
||||
info["gpu_ram_mb"] = bytes_to_mega_bytes(py3nvml.nvmlDeviceGetMemoryInfo(handle).total)
|
||||
info["gpu_power_watts"] = py3nvml.nvmlDeviceGetPowerManagementLimit(handle) / 1000
|
||||
info["gpu_performance_state"] = py3nvml.nvmlDeviceGetPerformanceState(handle)
|
||||
py3nvml.nvmlShutdown()
|
||||
|
||||
self._environment_info = info
|
||||
return self._environment_info
|
||||
|
||||
def print_results(self, result_dict):
|
||||
for model_name in self.args.model_names:
|
||||
self.print_fn("\t" + f"======= MODEL CHECKPOINT: {model_name} =======")
|
||||
for batch_size in result_dict[model_name]["bs"]:
|
||||
for sequence_length in result_dict[model_name]["ss"]:
|
||||
result = result_dict[model_name]["result"][batch_size][sequence_length]
|
||||
if isinstance(result, float):
|
||||
self.print_fn(
|
||||
f"\t\t{model_name}/{batch_size}/{sequence_length}: " f"{(round(1000 * result) / 1000)}s"
|
||||
)
|
||||
else:
|
||||
self.print_fn(f"\t\t{model_name}/{batch_size}/{sequence_length}: " f"{result} MB")
|
||||
|
||||
def print_memory_trace_statistics(self, summary: MemorySummary):
|
||||
self.print_fn(
|
||||
"\nLine by line memory consumption:\n"
|
||||
+ "\n".join(
|
||||
f"{state.frame.filename}:{state.frame.line_number}: mem {state.cpu_gpu}: {state.frame.line_text}"
|
||||
for state in summary.sequential
|
||||
)
|
||||
)
|
||||
self.print_fn(
|
||||
"\nLines with top memory consumption:\n"
|
||||
+ "\n".join(
|
||||
f"=> {state.frame.filename}:{state.frame.line_number}: mem {state.cpu_gpu}: {state.frame.line_text}"
|
||||
for state in summary.cumulative[:6]
|
||||
)
|
||||
)
|
||||
self.print_fn(
|
||||
"\nLines with lowest memory consumption:\n"
|
||||
+ "\n".join(
|
||||
f"=> {state.frame.filename}:{state.frame.line_number}: mem {state.cpu_gpu}: {state.frame.line_text}"
|
||||
for state in summary.cumulative[-6:]
|
||||
)
|
||||
)
|
||||
self.print_fn(f"\nTotal memory increase: {summary.total}")
|
||||
|
||||
def save_to_csv(self, result_dict, filename):
|
||||
if not self.args.save_to_csv:
|
||||
return
|
||||
self.print_fn("Saving results to csv.")
|
||||
with open(filename, mode="w") as csv_file:
|
||||
|
||||
assert len(self.args.model_names) > 0, "At least 1 model should be defined, but got {}".format(
|
||||
self.model_names
|
||||
)
|
||||
|
||||
fieldnames = ["model", "batch_size", "sequence_length"]
|
||||
writer = csv.DictWriter(csv_file, fieldnames=fieldnames + ["result"])
|
||||
writer.writeheader()
|
||||
|
||||
for model_name in self.args.model_names:
|
||||
result_dict_model = result_dict[model_name]["result"]
|
||||
for bs in result_dict_model:
|
||||
for ss in result_dict_model[bs]:
|
||||
result_model = result_dict_model[bs][ss]
|
||||
writer.writerow(
|
||||
{
|
||||
"model": model_name,
|
||||
"batch_size": bs,
|
||||
"sequence_length": ss,
|
||||
"result": ("{}" if not isinstance(result_model, float) else "{:.4f}").format(
|
||||
result_model
|
||||
),
|
||||
}
|
||||
)
|
||||
@@ -59,6 +59,7 @@ try:
|
||||
except (ImportError, AssertionError):
|
||||
_tf_available = False # pylint: disable=invalid-name
|
||||
|
||||
|
||||
try:
|
||||
from torch.hub import _get_torch_home
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ import sys
|
||||
from argparse import ArgumentParser
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import Any, Iterable, NewType, Tuple, Union
|
||||
from typing import Any, Iterable, List, NewType, Tuple, Union
|
||||
|
||||
|
||||
DataClass = NewType("DataClass", Any)
|
||||
@@ -52,9 +52,13 @@ class HfArgumentParser(ArgumentParser):
|
||||
"We will add compatibility when Python 3.9 is released."
|
||||
)
|
||||
typestring = str(field.type)
|
||||
for x in (int, float, str):
|
||||
if typestring == f"typing.Union[{x.__name__}, NoneType]":
|
||||
field.type = x
|
||||
for prim_type in (int, float, str):
|
||||
for collection in (List,):
|
||||
if typestring == f"typing.Union[{collection[prim_type]}, NoneType]":
|
||||
field.type = collection[prim_type]
|
||||
if typestring == f"typing.Union[{prim_type.__name__}, NoneType]":
|
||||
field.type = prim_type
|
||||
|
||||
if isinstance(field.type, type) and issubclass(field.type, Enum):
|
||||
kwargs["choices"] = list(field.type)
|
||||
kwargs["type"] = field.type
|
||||
@@ -65,6 +69,14 @@ class HfArgumentParser(ArgumentParser):
|
||||
if field.default is True:
|
||||
field_name = f"--no-{field.name}"
|
||||
kwargs["dest"] = field.name
|
||||
elif hasattr(field.type, "__origin__") and issubclass(field.type.__origin__, List):
|
||||
kwargs["nargs"] = "+"
|
||||
kwargs["type"] = field.type.__args__[0]
|
||||
assert all(
|
||||
x == kwargs["type"] for x in field.type.__args__
|
||||
), "{} cannot be a List of mixed types".format(field.name)
|
||||
if field.default_factory is not dataclasses.MISSING:
|
||||
kwargs["default"] = field.default_factory()
|
||||
else:
|
||||
kwargs["type"] = field.type
|
||||
if field.default is not dataclasses.MISSING:
|
||||
|
||||
Reference in New Issue
Block a user