diff --git a/examples/benchmarking/plot_csv_file.py b/examples/benchmarking/plot_csv_file.py new file mode 100644 index 0000000000..1f2ba3346e --- /dev/null +++ b/examples/benchmarking/plot_csv_file.py @@ -0,0 +1,113 @@ +import csv +from collections import defaultdict +from dataclasses import dataclass, field +from typing import Optional + +import numpy as np + +import matplotlib.pyplot as plt +from transformers import HfArgumentParser + + +@dataclass +class PlotArguments: + """ + Arguments pertaining to which model/config/tokenizer we are going to fine-tune, or train from scratch. + """ + + csv_file: str = field(metadata={"help": "The csv file to plot."},) + plot_along_batch: bool = field( + default=False, + metadata={"help": "Whether to plot along batch size or sequence lengh. Defaults to sequence length."}, + ) + is_time: bool = field( + default=False, + metadata={"help": "Whether the csv file has time results or memory results. Defaults to memory results."}, + ) + is_train: bool = field( + default=False, + metadata={ + "help": "Whether the csv file has training results or inference results. Defaults to inference results." + }, + ) + figure_png_file: Optional[str] = field( + default=None, metadata={"help": "Filename under which the plot will be saved. If unused no plot is saved."}, + ) + + +class Plot: + def __init__(self, args): + self.args = args + self.result_dict = defaultdict(lambda: dict(bsz=[], seq_len=[], result={})) + + with open(self.args.csv_file, newline="") as csv_file: + reader = csv.DictReader(csv_file) + for row in reader: + model_name = row["model"] + self.result_dict[model_name]["bsz"].append(int(row["batch_size"])) + self.result_dict[model_name]["seq_len"].append(int(row["sequence_length"])) + self.result_dict[model_name]["result"][(int(row["batch_size"]), int(row["sequence_length"]))] = row[ + "result" + ] + + def plot(self): + fig, ax = plt.subplots() + title_str = "Time usage" if self.args.is_time else "Memory usage" + title_str = title_str + " for training" if self.args.is_train else title_str + " for inference" + + for model_name in self.result_dict.keys(): + batch_sizes = sorted(list(set(self.result_dict[model_name]["bsz"]))) + sequence_lengths = sorted(list(set(self.result_dict[model_name]["seq_len"]))) + results = self.result_dict[model_name]["result"] + + (x_axis_array, inner_loop_array) = ( + (batch_sizes, sequence_lengths) if self.args.plot_along_batch else (sequence_lengths, batch_sizes) + ) + + plt.xlim(min(x_axis_array), max(x_axis_array)) + + for inner_loop_value in inner_loop_array: + if self.args.plot_along_batch: + y_axis_array = np.asarray([results[(x, inner_loop_value)] for x in x_axis_array], dtype=np.int) + else: + y_axis_array = np.asarray([results[(inner_loop_value, x)] for x in x_axis_array], dtype=np.float32) + + ax.set_xscale("log", basex=2) + ax.set_yscale("log", basey=10) + + (x_axis_label, inner_loop_label) = ( + ("batch_size", "sequence_length in #tokens") + if self.args.plot_along_batch + else ("sequence_length in #tokens", "batch_size") + ) + + x_axis_array = np.asarray(x_axis_array, np.int) + plt.scatter(x_axis_array, y_axis_array, label=f"{model_name} - {inner_loop_label}: {inner_loop_value}") + plt.plot(x_axis_array, y_axis_array, "--") + + title_str += f" {model_name} vs." + + title_str = title_str[:-4] + y_axis_label = "Time in s" if self.args.is_time else "Memory in MB" + + # plot + plt.title(title_str) + plt.xlabel(x_axis_label) + plt.ylabel(y_axis_label) + plt.legend() + + if self.args.figure_png_file is not None: + plt.savefig(self.args.figure_png_file) + else: + plt.show() + + +def main(): + parser = HfArgumentParser(PlotArguments) + plot_args = parser.parse_args_into_dataclasses()[0] + plot = Plot(args=plot_args) + plot.plot() + + +if __name__ == "__main__": + main() diff --git a/examples/benchmarking/run_benchmark.py b/examples/benchmarking/run_benchmark.py new file mode 100644 index 0000000000..163bcfb6fc --- /dev/null +++ b/examples/benchmarking/run_benchmark.py @@ -0,0 +1,29 @@ +# coding=utf-8 +# Copyright 2018 The HuggingFace Inc. team. +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" Benchmarking the library on inference and training """ + +from transformers import HfArgumentParser, PyTorchBenchmark, PyTorchBenchmarkArguments + + +def main(): + parser = HfArgumentParser(PyTorchBenchmarkArguments) + benchmark_args = parser.parse_args_into_dataclasses()[0] + benchmark = PyTorchBenchmark(args=benchmark_args) + benchmark.run() + + +if __name__ == "__main__": + main() diff --git a/examples/benchmarks.py b/examples/benchmarks.py deleted file mode 100644 index f215482999..0000000000 --- a/examples/benchmarks.py +++ /dev/null @@ -1,710 +0,0 @@ -# coding=utf-8 -# Copyright 2018 The HuggingFace Inc. team. -# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" Benchmarking the library on inference and training """ - -# If checking the tensors placement -# tf.debugging.set_log_device_placement(True) - -import argparse -import csv -import logging -import timeit -from time import time -from typing import Callable, List - -from transformers import ( - AutoConfig, - AutoTokenizer, - MemorySummary, - is_tf_available, - is_torch_available, - start_memory_tracing, - stop_memory_tracing, -) - - -if is_tf_available(): - import tensorflow as tf - from transformers import TFAutoModel - -if is_torch_available(): - import torch - from transformers import AutoModel - - -input_text = """Bent over their instruments, three hundred Fertilizers were plunged, as -the Director of Hatcheries and Conditioning entered the room, in the -scarcely breathing silence, the absent-minded, soliloquizing hum or - -whistle, of absorbed concentration. A troop of newly arrived students, -very young, pink and callow, followed nervously, rather abjectly, at the -Director's heels. Each of them carried a notebook, in which, whenever -the great man spoke, he desperately scribbled. Straight from the -horse's mouth. It was a rare privilege. The D. H. C. for Central London -always made a point of personally conducting his new students round -the various departments. - -"Just to give you a general idea," he would explain to them. For of -course some sort of general idea they must have, if they were to do -their work intelligently-though as little of one, if they were to be good -and happy members of society, as possible. For particulars, as every -one knows, make for virtue and happiness; generalities are intellectu- -ally necessary evils. Not philosophers but fret-sawyers and stamp col- -lectors compose the backbone of society. - -"To-morrow," he would add, smiling at them with a slightly menacing -geniality, "you'll be settling down to serious work. You won't have time -for generalities. Meanwhile ..." - -Meanwhile, it was a privilege. Straight from the horse's mouth into the -notebook. The boys scribbled like mad. - -Tall and rather thin but upright, the Director advanced into the room. -He had a long chin and big rather prominent teeth, just covered, when -he was not talking, by his full, floridly curved lips. Old, young? Thirty? -Fifty? Fifty-five? It was hard to say. And anyhow the question didn't -arise; in this year of stability, A. F. 632, it didn't occur to you to ask it. - -"I shall begin at the beginning," said the D.H.C. and the more zealous -students recorded his intention in their notebooks: Begin at the begin- -ning. "These," he waved his hand, "are the incubators." And opening -an insulated door he showed them racks upon racks of numbered test- -tubes. "The week's supply of ova. Kept," he explained, "at blood heat; -whereas the male gametes," and here he opened another door, "they -have to be kept at thirty-five instead of thirty-seven. Full blood heat -sterilizes." Rams wrapped in theremogene beget no lambs. - -Still leaning against the incubators he gave them, while the pencils -scurried illegibly across the pages, a brief description of the modern - - - -fertilizing process; spoke first, of course, of its surgical introduc- -tion-"the operation undergone voluntarily for the good of Society, not -to mention the fact that it carries a bonus amounting to six months' -salary"; continued with some account of the technique for preserving -the excised ovary alive and actively developing; passed on to a consid- -eration of optimum temperature, salinity, viscosity; referred to the liq- -uor in which the detached and ripened eggs were kept; and, leading -his charges to the work tables, actually showed them how this liquor -was drawn off from the test-tubes; how it was let out drop by drop -onto the specially warmed slides of the microscopes; how the eggs -which it contained were inspected for abnormalities, counted and -transferred to a porous receptacle; how (and he now took them to -watch the operation) this receptacle was immersed in a warm bouillon -containing free-swimming spermatozoa-at a minimum concentration -of one hundred thousand per cubic centimetre, he insisted; and how, -after ten minutes, the container was lifted out of the liquor and its -contents re-examined; how, if any of the eggs remained unfertilized, it -was again immersed, and, if necessary, yet again; how the fertilized -ova went back to the incubators; where the Alphas and Betas re- -mained until definitely bottled; while the Gammas, Deltas and Epsilons -were brought out again, after only thirty-six hours, to undergo Bo- -kanovsky's Process. - -"Bokanovsky's Process," repeated the Director, and the students un- -derlined the words in their little notebooks. - -One egg, one embryo, one adult-normality. But a bokanovskified egg -will bud, will proliferate, will divide. From eight to ninety-six buds, and -every bud will grow into a perfectly formed embryo, and every embryo -into a full-sized adult. Making ninety-six human beings grow where -only one grew before. Progress. - -"Essentially," the D.H.C. concluded, "bokanovskification consists of a -series of arrests of development. We check the normal growth and, -paradoxically enough, the egg responds by budding." - -Responds by budding. The pencils were busy. - -He pointed. On a very slowly moving band a rack-full of test-tubes was -entering a large metal box, another, rack-full was emerging. Machinery -faintly purred. It took eight minutes for the tubes to go through, he - - - -told them. Eight minutes of hard X-rays being about as much as an -egg can stand. A few died; of the rest, the least susceptible divided -into two; most put out four buds; some eight; all were returned to the -incubators, where the buds began to develop; then, after two days, -were suddenly chilled, chilled and checked. Two, four, eight, the buds -in their turn budded; and having budded were dosed almost to death -with alcohol; consequently burgeoned again and having budded-bud -out of bud out of bud-were thereafter-further arrest being generally -fatal-left to develop in peace. By which time the original egg was in a -fair way to becoming anything from eight to ninety-six embryos- a -prodigious improvement, you will agree, on nature. Identical twins-but -not in piddling twos and threes as in the old viviparous days, when an -egg would sometimes accidentally divide; actually by dozens, by -scores at a time. - -"Scores," the Director repeated and flung out his arms, as though he -were distributing largesse. "Scores." - -But one of the students was fool enough to ask where the advantage -lay. - -"My good boy!" The Director wheeled sharply round on him. "Can't you -see? Can't you see?" He raised a hand; his expression was solemn. -"Bokanovsky's Process is one of the major instruments of social stabil- -ity!" - -Major instruments of social stability. - -Standard men and women; in uniform batches. The whole of a small -factory staffed with the products of a single bokanovskified egg. - -"Ninety-six identical twins working ninety-six identical machines!" The -voice was almost tremulous with enthusiasm. "You really know where -you are. For the first time in history." He quoted the planetary motto. -"Community, Identity, Stability." Grand words. "If we could bo- -kanovskify indefinitely the whole problem would be solved." - -Solved by standard Gammas, unvarying Deltas, uniform Epsilons. Mil- -lions of identical twins. The principle of mass production at last applied -to biology. - - - -"But, alas," the Director shook his head, "we can't bokanovskify indefi- -nitely." - -Ninety-six seemed to be the limit; seventy-two a good average. From -the same ovary and with gametes of the same male to manufacture as -many batches of identical twins as possible-that was the best (sadly a -second best) that they could do. And even that was difficult. - -"For in nature it takes thirty years for two hundred eggs to reach ma- -turity. But our business is to stabilize the population at this moment, -here and now. Dribbling out twins over a quarter of a century-what -would be the use of that?" - -Obviously, no use at all. But Podsnap's Technique had immensely ac- -celerated the process of ripening. They could make sure of at least a -hundred and fifty mature eggs within two years. Fertilize and bo- -kanovskify-in other words, multiply by seventy-two-and you get an -average of nearly eleven thousand brothers and sisters in a hundred -and fifty batches of identical twins, all within two years of the same -age. - -"And in exceptional cases we can make one ovary yield us over fifteen -thousand adult individuals." - -Beckoning to a fair-haired, ruddy young man who happened to be -passing at the moment. "Mr. Foster," he called. The ruddy young man -approached. "Can you tell us the record for a single ovary, Mr. Foster?" - -"Sixteen thousand and twelve in this Centre," Mr. Foster replied with- -out hesitation. He spoke very quickly, had a vivacious blue eye, and -took an evident pleasure in quoting figures. "Sixteen thousand and -twelve; in one hundred and eighty-nine batches of identicals. But of -course they've done much better," he rattled on, "in some of the tropi- -cal Centres. Singapore has often produced over sixteen thousand five -hundred; and Mombasa has actually touched the seventeen thousand -mark. But then they have unfair advantages. You should see the way a -negro ovary responds to pituitary! It's quite astonishing, when you're -used to working with European material. Still," he added, with a laugh -(but the light of combat was in his eyes and the lift of his chin was -challenging), "still, we mean to beat them if we can. I'm working on a -wonderful Delta-Minus ovary at this moment. Only just eighteen - - - -months old. Over twelve thousand seven hundred children already, ei- -ther decanted or in embryo. And still going strong. We'll beat them -yet." - -"That's the spirit I like!" cried the Director, and clapped Mr. Foster on -the shoulder. "Come along with us, and give these boys the benefit of -your expert knowledge." - -Mr. Foster smiled modestly. "With pleasure." They went. -In the Bottling Room all was harmonious bustle and ordered activity. -Flaps of fresh sow's peritoneum ready cut to the proper size came -shooting up in little lifts from the Organ Store in the sub-basement. -Whizz and then, click! the lift-hatches hew open; the bottle-liner had -only to reach out a hand, take the flap, insert, smooth-down, and be- -fore the lined bottle had had time to travel out of reach along the end- -less band, whizz, click! another flap of peritoneum had shot up from -the depths, ready to be slipped into yet another bottle, the next of that -slow interminable procession on the band. - -Next to the Liners stood the Matriculators. The procession advanced; -one by one the eggs were transferred from their test-tubes to the -larger containers; deftly the peritoneal lining was slit, the morula -dropped into place, the saline solution poured in ... and already the -bottle had passed, and it was the turn of the labellers. Heredity, date -of fertilization, membership of Bokanovsky Group-details were trans- -ferred from test-tube to bottle. No longer anonymous, but named, -identified, the procession marched slowly on; on through an opening in -the wall, slowly on into the Social Predestination Room. -"Eighty-eight cubic metres of card-index," said Mr. Foster with relish, -as they entered.""" - - -def create_setup_and_compute( - model_names: List[str], - batch_sizes: List[int], - slice_sizes: List[int], - gpu: bool = True, - tensorflow: bool = False, - average_over: int = 3, - no_speed: bool = False, - no_memory: bool = False, - verbose: bool = False, - torchscript: bool = False, - xla: bool = False, - amp: bool = False, - fp16: bool = False, - save_to_csv: bool = False, - csv_time_filename: str = f"time_{round(time())}.csv", - csv_memory_filename: str = f"memory_{round(time())}.csv", - print_fn: Callable[[str], None] = print, -): - if xla: - tf.config.optimizer.set_jit(True) - if amp: - tf.config.optimizer.set_experimental_options({"auto_mixed_precision": True}) - - if tensorflow: - dictionary = {model_name: {} for model_name in model_names} - results = _compute_tensorflow( - model_names, - batch_sizes, - slice_sizes, - dictionary, - average_over, - amp, - no_speed, - no_memory, - verbose, - print_fn, - ) - else: - device = "cuda" if (gpu and torch.cuda.is_available()) else "cpu" - dictionary = {model_name: {} for model_name in model_names} - results = _compute_pytorch( - model_names, - batch_sizes, - slice_sizes, - dictionary, - average_over, - device, - torchscript, - fp16, - no_speed, - no_memory, - verbose, - print_fn, - ) - - print_fn("=========== RESULTS ===========") - for model_name in model_names: - print_fn("\t" + f"======= MODEL CHECKPOINT: {model_name} =======") - for batch_size in results[model_name]["bs"]: - print_fn("\t\t" + f"===== BATCH SIZE: {batch_size} =====") - for slice_size in results[model_name]["ss"]: - time = results[model_name]["time"][batch_size][slice_size] - memory = results[model_name]["memory"][batch_size][slice_size] - if isinstance(time, str): - print_fn(f"\t\t{model_name}/{batch_size}/{slice_size}: " f"{time} " f"{memory}") - else: - print_fn( - f"\t\t{model_name}/{batch_size}/{slice_size}: " - f"{(round(1000 * time) / 1000)}" - f"s " - f"{memory}" - ) - - if save_to_csv: - with open(csv_time_filename, mode="w") as csv_time_file, open( - csv_memory_filename, mode="w" - ) as csv_memory_file: - - assert len(model_names) > 0, "At least 1 model should be defined, but got {}".format(model_names) - - fieldnames = ["model", "batch_size", "sequence_length"] - time_writer = csv.DictWriter(csv_time_file, fieldnames=fieldnames + ["time_in_s"]) - time_writer.writeheader() - memory_writer = csv.DictWriter(csv_memory_file, fieldnames=fieldnames + ["memory"]) - memory_writer.writeheader() - - for model_name in model_names: - time_dict = results[model_name]["time"] - memory_dict = results[model_name]["memory"] - for bs in time_dict: - for ss in time_dict[bs]: - time_writer.writerow( - { - "model": model_name, - "batch_size": bs, - "sequence_length": ss, - "time_in_s": "{:.4f}".format(time_dict[bs][ss]), - } - ) - - for bs in memory_dict: - for ss in time_dict[bs]: - memory_writer.writerow( - { - "model": model_name, - "batch_size": bs, - "sequence_length": ss, - "memory": memory_dict[bs][ss], - } - ) - - -def print_summary_statistics(summary: MemorySummary, print_fn: Callable[[str], None]): - print_fn( - "\nLines by line memory consumption:\n" - + "\n".join( - f"{state.frame.filename}:{state.frame.line_number}: mem {state.cpu_gpu}: {state.frame.line_text}" - for state in summary.sequential - ) - ) - print_fn( - "\nLines with top memory consumption:\n" - + "\n".join( - f"=> {state.frame.filename}:{state.frame.line_number}: mem {state.cpu_gpu}: {state.frame.line_text}" - for state in summary.cumulative[:6] - ) - ) - print_fn( - "\nLines with lowest memory consumption:\n" - + "\n".join( - f"=> {state.frame.filename}:{state.frame.line_number}: mem {state.cpu_gpu}: {state.frame.line_text}" - for state in summary.cumulative[-6:] - ) - ) - print_fn(f"\nTotal memory increase: {summary.total}") - - -def get_print_function(save_print_log, log_filename): - if save_print_log: - logging.basicConfig( - level=logging.DEBUG, - filename=log_filename, - filemode="a+", - format="%(asctime)-15s %(levelname)-8s %(message)s", - ) - - def print_with_print_log(*args): - logging.info(*args) - print(*args) - - return print_with_print_log - else: - return print - - -def _compute_pytorch( - model_names, - batch_sizes, - slice_sizes, - dictionary, - average_over, - device, - torchscript, - fp16, - no_speed, - no_memory, - verbose, - print_fn, -): - for c, model_name in enumerate(model_names): - print_fn(f"{c + 1} / {len(model_names)}") - config = AutoConfig.from_pretrained(model_name, torchscript=torchscript) - model = AutoModel.from_pretrained(model_name, config=config) - tokenizer = AutoTokenizer.from_pretrained(model_name) - - tokenized_sequence = tokenizer.encode(input_text, add_special_tokens=False) - - max_input_size = tokenizer.max_model_input_sizes[model_name] - - dictionary[model_name] = {"bs": batch_sizes, "ss": slice_sizes, "time": {}, "memory": {}} - dictionary[model_name]["time"] = {i: {} for i in batch_sizes} - dictionary[model_name]["memory"] = {i: {} for i in batch_sizes} - - print_fn("Using model {}".format(model)) - print_fn("Number of all parameters {}".format(model.num_parameters())) - - for batch_size in batch_sizes: - if fp16: - model.half() - model.to(device) - model.eval() - - for slice_size in slice_sizes: - if max_input_size is not None and slice_size > max_input_size: - dictionary[model_name]["time"][batch_size][slice_size] = "N/A" - else: - sequence = torch.tensor(tokenized_sequence[:slice_size], device=device).repeat(batch_size, 1) - try: - if torchscript: - print_fn("Tracing model with sequence size {}".format(sequence.shape)) - inference = torch.jit.trace(model, sequence) - inference(sequence) - else: - inference = model - inference(sequence) - - if not no_memory: - # model.add_memory_hooks() # Forward method tracing (only for PyTorch models) - - # Line by line memory tracing (all code in the module `transformers`) works for all models/arbitrary code - trace = start_memory_tracing("transformers") - inference(sequence) - summary = stop_memory_tracing(trace) - - if verbose: - print_summary_statistics(summary, print_fn) - - dictionary[model_name]["memory"][batch_size][slice_size] = str(summary.total) - else: - dictionary[model_name]["memory"][batch_size][slice_size] = "N/A" - - if not no_speed: - print_fn("Going through model with sequence of shape {}".format(sequence.shape)) - runtimes = timeit.repeat(lambda: inference(sequence), repeat=average_over, number=3) - average_time = sum(runtimes) / float(len(runtimes)) / 3.0 - dictionary[model_name]["time"][batch_size][slice_size] = average_time - else: - dictionary[model_name]["time"][batch_size][slice_size] = "N/A" - - except RuntimeError as e: - print_fn("Doesn't fit on GPU. {}".format(e)) - torch.cuda.empty_cache() - dictionary[model_name]["time"][batch_size][slice_size] = "N/A" - dictionary[model_name]["memory"][batch_size][slice_size] = "N/A" - return dictionary - - -def _compute_tensorflow( - model_names, batch_sizes, slice_sizes, dictionary, average_over, amp, no_speed, no_memory, verbose, print_fn -): - for c, model_name in enumerate(model_names): - print_fn(f"{c + 1} / {len(model_names)}") - config = AutoConfig.from_pretrained(model_name) - model = TFAutoModel.from_pretrained(model_name, config=config) - tokenizer = AutoTokenizer.from_pretrained(model_name) - - tokenized_sequence = tokenizer.encode(input_text, add_special_tokens=False) - - max_input_size = tokenizer.max_model_input_sizes[model_name] - - dictionary[model_name] = {"bs": batch_sizes, "ss": slice_sizes, "time": {}, "memory": {}} - dictionary[model_name]["time"] = {i: {} for i in batch_sizes} - dictionary[model_name]["memory"] = {i: {} for i in batch_sizes} - - print_fn("Using model {}".format(model)) - print_fn("Number of all parameters {}".format(model.num_parameters())) - - @tf.function - def inference(inputs): - return model(inputs) - - for batch_size in batch_sizes: - for slice_size in slice_sizes: - if max_input_size is not None and slice_size > max_input_size: - dictionary[model_name]["time"][batch_size][slice_size] = "N/A" - else: - sequence = tf.stack( - [tf.squeeze(tf.constant(tokenized_sequence[:slice_size])[None, :])] * batch_size - ) - - try: - print_fn("Going through model with sequence of shape {}".format(sequence.shape)) - # To make sure that the model is traced + that the tensors are on the appropriate device - inference(sequence) - - if not no_memory: - # Line by line memory tracing (all code in the module `transformers`) works for all models/arbitrary code - trace = start_memory_tracing("transformers") - inference(sequence) - summary = stop_memory_tracing(trace) - - if verbose: - print_summary_statistics(summary, print_fn) - - dictionary[model_name]["memory"][batch_size][slice_size] = str(summary.total) - else: - dictionary[model_name]["memory"][batch_size][slice_size] = "N/A" - - if not no_speed: - runtimes = timeit.repeat(lambda: inference(sequence), repeat=average_over, number=3) - average_time = sum(runtimes) / float(len(runtimes)) / 3.0 - dictionary[model_name]["time"][batch_size][slice_size] = average_time - else: - dictionary[model_name]["time"][batch_size][slice_size] = "N/A" - - except tf.errors.ResourceExhaustedError as e: - print_fn("Doesn't fit on GPU. {}".format(e)) - dictionary[model_name]["time"][batch_size][slice_size] = "N/A" - dictionary[model_name]["memory"][batch_size][slice_size] = "N/A" - return dictionary - - -def main(): - parser = argparse.ArgumentParser() - - parser.add_argument( - "--models", - required=False, - type=str, - default="all", - help="Model checkpoints to be provided " - "to the AutoModel classes. Leave " - "blank to benchmark the base version " - "of all available model " - "architectures.", - ) - parser.add_argument("--verbose", required=False, action="store_true", help="Verbose memory tracing") - parser.add_argument("--no_speed", required=False, action="store_true", help="Don't perform speed measurments") - parser.add_argument("--no_memory", required=False, action="store_true", help="Don't perform memory measurments") - parser.add_argument( - "--torch", required=False, action="store_true", help="Benchmark the Pytorch version of the " "models" - ) - parser.add_argument( - "--torch_cuda", required=False, action="store_true", help="Pytorch only: run on available " "cuda devices" - ) - parser.add_argument( - "--torchscript", - required=False, - action="store_true", - help="Pytorch only: trace the models " "using torchscript", - ) - parser.add_argument( - "--tensorflow", - required=False, - action="store_true", - help="Benchmark the TensorFlow version " - "of the models. Will run on GPU if " - "the correct dependencies are " - "installed", - ) - parser.add_argument("--xla", required=False, action="store_true", help="TensorFlow only: use XLA acceleration.") - parser.add_argument( - "--amp", - required=False, - action="store_true", - help="TensorFlow only: use automatic mixed precision acceleration.", - ) - parser.add_argument( - "--fp16", required=False, action="store_true", help="PyTorch only: use FP16 to accelerate inference." - ) - parser.add_argument( - "--keras_predict", - required=False, - action="store_true", - help="Whether to use model.predict " "instead of model() to do a " "forward pass.", - ) - parser.add_argument("--save_to_csv", required=False, action="store_true", help="Save to a CSV file.") - parser.add_argument( - "--log_print", required=False, action="store_true", help="Save all print statements in log file." - ) - parser.add_argument( - "--csv_time_filename", - required=False, - default=f"time_{round(time())}.csv", - help="CSV filename used if saving time results to csv.", - ) - parser.add_argument( - "--csv_memory_filename", - required=False, - default=f"memory_{round(time())}.csv", - help="CSV filename used if saving memory results to csv.", - ) - parser.add_argument( - "--log_filename", - required=False, - default=f"log_{round(time())}.txt", - help="Log filename used if print statements are saved in log.", - ) - parser.add_argument( - "--average_over", required=False, default=30, type=int, help="Times an experiment will be run." - ) - parser.add_argument("--batch_sizes", nargs="+", type=int, default=[1, 2, 4, 8]) - parser.add_argument("--slice_sizes", nargs="+", type=int, default=[8, 64, 128, 256, 512, 1024]) - - args = parser.parse_args() - if args.models == "all": - args.models = [ - "gpt2", - "bert-base-cased", - "xlnet-base-cased", - "xlm-mlm-en-2048", - "transfo-xl-wt103", - "openai-gpt", - "distilbert-base-uncased", - "distilgpt2", - "roberta-base", - "ctrl", - "t5-base", - "bart-large", - ] - else: - args.models = args.models.split() - - print_fn = get_print_function(args.log_print, args.log_filename) - print_fn("Running with arguments: {}".format(args)) - - if args.torch: - if is_torch_available(): - create_setup_and_compute( - model_names=args.models, - batch_sizes=args.batch_sizes, - slice_sizes=args.slice_sizes, - tensorflow=False, - gpu=args.torch_cuda, - torchscript=args.torchscript, - fp16=args.fp16, - save_to_csv=args.save_to_csv, - csv_time_filename=args.csv_time_filename, - csv_memory_filename=args.csv_memory_filename, - average_over=args.average_over, - no_speed=args.no_speed, - no_memory=args.no_memory, - verbose=args.verbose, - print_fn=print_fn, - ) - else: - raise ImportError("Trying to run a PyTorch benchmark but PyTorch was not found in the environment.") - - if args.tensorflow: - if is_tf_available(): - create_setup_and_compute( - model_names=args.models, - batch_sizes=args.batch_sizes, - slice_sizes=args.slice_sizes, - tensorflow=True, - xla=args.xla, - amp=args.amp, - save_to_csv=args.save_to_csv, - csv_time_filename=args.csv_time_filename, - csv_memory_filename=args.csv_memory_filename, - average_over=args.average_over, - no_speed=args.no_speed, - no_memory=args.no_memory, - verbose=args.verbose, - print_fn=print_fn, - ) - else: - raise ImportError("Trying to run a TensorFlow benchmark but TensorFlow was not found in the environment.") - - -if __name__ == "__main__": - main() diff --git a/examples/requirements.txt b/examples/requirements.txt index 3e8717564e..474600d98d 100644 --- a/examples/requirements.txt +++ b/examples/requirements.txt @@ -6,3 +6,4 @@ sacrebleu rouge-score tensorflow_datasets pytorch-lightning==0.7.3 # April 10, 2020 release +matplotlib diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py index 2b3cc54ff3..0787232e76 100755 --- a/src/transformers/__init__.py +++ b/src/transformers/__init__.py @@ -19,19 +19,6 @@ else: import logging -# Benchmarking -from .benchmark_utils import ( - Frame, - Memory, - MemoryState, - MemorySummary, - MemoryTrace, - UsedMemoryState, - bytes_to_human_readable, - start_memory_tracing, - stop_memory_tracing, -) - # Configurations from .configuration_albert import ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, AlbertConfig from .configuration_auto import ALL_PRETRAINED_CONFIG_ARCHIVE_MAP, CONFIG_MAPPING, AutoConfig @@ -358,6 +345,9 @@ if is_torch_available(): from .data.data_collator import DefaultDataCollator, DataCollator, DataCollatorForLanguageModeling from .data.datasets import GlueDataset, TextDataset, LineByLineTextDataset, GlueDataTrainingArguments + # Benchmarks + from .benchmark import PyTorchBenchmark, PyTorchBenchmarkArguments + # TensorFlow if is_tf_available(): from .modeling_tf_utils import ( diff --git a/src/transformers/benchmark/__init__.py b/src/transformers/benchmark/__init__.py new file mode 100644 index 0000000000..5eae4b2cb3 --- /dev/null +++ b/src/transformers/benchmark/__init__.py @@ -0,0 +1,10 @@ +# flake8: noqa +# There's no way to ignore "F401 '...' imported but unused" warnings in this +# module, but to preserve other warnings. So, don't check this module at all. + +from ..file_utils import is_torch_available + + +if is_torch_available(): + from .benchmark_args import PyTorchBenchmarkArguments + from .benchmark import PyTorchBenchmark diff --git a/src/transformers/benchmark/benchmark.py b/src/transformers/benchmark/benchmark.py new file mode 100644 index 0000000000..bb86b50f8b --- /dev/null +++ b/src/transformers/benchmark/benchmark.py @@ -0,0 +1,146 @@ +# coding=utf-8 +# Copyright 2018 The HuggingFace Inc. team. +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" + Benchmarking the library on inference and training in PyTorch. +""" + + +import inspect +import logging +import timeit + +from transformers import MODEL_MAPPING, MODEL_WITH_LM_HEAD_MAPPING, PretrainedConfig, is_torch_available + +from .benchmark_utils import Benchmark, Memory, start_memory_tracing, stop_memory_tracing + + +if is_torch_available(): + import torch + from .benchmark_args import PyTorchBenchmarkArguments + + +logger = logging.getLogger(__name__) + + +class PyTorchBenchmark(Benchmark): + + args: PyTorchBenchmarkArguments + configs: PretrainedConfig + framework: str = "PyTorch" + + @property + def framework_version(self): + return torch.__version__ + + def train(self, model_name, batch_size, sequence_length, trace_memory=False): + try: + config = self.config_dict[model_name] + model = MODEL_WITH_LM_HEAD_MAPPING[config.__class__](config) + model.to(self.args.device) + model.train() + + input_ids = torch.randint( + model.config.vocab_size, (batch_size, sequence_length), dtype=torch.long, device=self.args.device + ) + + def compute_loss_and_backprob(): + # TODO: Not all models call labels argument labels => this hack using the function signature should be corrected once all models have a common name for labels + function_argument_names = inspect.getfullargspec(model.forward).args + if "labels" in function_argument_names: + loss = model(input_ids, labels=input_ids)[0] + elif "lm_labels" in function_argument_names: + loss = model(input_ids, lm_labels=input_ids)[0] + elif "masked_lm_labels" in function_argument_names: + loss = model(input_ids, masked_lm_labels=input_ids)[0] + else: + NotImplementedError(f"{model_name} does not seem to allow training with labels") + + loss.backward() + model.zero_grad() + + if trace_memory is True: + if self.args.trace_memory_line_by_line or self.args.n_gpu == 0: + trace = start_memory_tracing("transformers") + else: + # clear cuda cache + torch.cuda.empty_cache() + torch.cuda.reset_peak_memory_stats() + + # calculate loss and do backpropagation + compute_loss_and_backprob() + + if self.args.trace_memory_line_by_line or self.args.n_gpu == 0: + summary = stop_memory_tracing(trace) + memory = summary.total + else: + memory = Memory(torch.cuda.max_memory_reserved()) + + return memory + else: + # as written in https://docs.python.org/2/library/timeit.html#timeit.Timer.repeat, min should be taken rather than the average + runtimes = timeit.repeat(lambda: compute_loss_and_backprob(), repeat=self.args.repeat, number=10,) + return min(runtimes) / 10.0 + except RuntimeError as e: + self.print_fn("Doesn't fit on GPU. {}".format(e)) + return "N/A" + + def inference(self, model_name, batch_size, sequence_length, trace_memory=False): + try: + config = self.config_dict[model_name] + model = MODEL_MAPPING[config.__class__](config) + model.to(self.args.device) + model.eval() + + input_ids = torch.randint( + config.vocab_size, (batch_size, sequence_length), dtype=torch.long, device=self.args.device + ) + if trace_memory is True: + if self.args.trace_memory_line_by_line or self.args.n_gpu == 0: + trace = start_memory_tracing("transformers") + else: + # clear cuda cache + torch.cuda.empty_cache() + if hasattr(torch.cuda, "max_memory_reserved"): + torch.cuda.reset_peak_memory_stats() + else: + logger.info( + "Please consider updating PyTorch to version 1.4 to get more accuracy on GPU memory usage" + ) + torch.cuda.reset_max_memory_cached() + + model(input_ids) + + if self.args.trace_memory_line_by_line or self.args.n_gpu == 0: + summary = stop_memory_tracing(trace) + memory = summary.total + else: + if hasattr(torch.cuda, "max_memory_reserved"): + memory = Memory(torch.cuda.max_memory_reserved()) + else: + logger.info( + "Please consider updating PyTorch to version 1.4 to get more accuracy on GPU memory usage" + ) + memory = Memory(torch.cuda.max_memory_cached()) + + return memory + else: + # as written in https://docs.python.org/2/library/timeit.html#timeit.Timer.repeat, min should be taken rather than the average + runtimes = timeit.repeat(lambda: model(input_ids), repeat=self.args.repeat, number=10,) + return min(runtimes) / 10.0 + + except RuntimeError as e: + self.print_fn("Doesn't fit on GPU. {}".format(e)) + return "N/A" diff --git a/src/transformers/benchmark/benchmark_args.py b/src/transformers/benchmark/benchmark_args.py new file mode 100644 index 0000000000..46e62fe368 --- /dev/null +++ b/src/transformers/benchmark/benchmark_args.py @@ -0,0 +1,78 @@ +# coding=utf-8 +# Copyright 2018 The HuggingFace Inc. team. +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +from dataclasses import dataclass, field +from typing import Tuple + +from ..file_utils import cached_property, is_torch_available, torch_required +from .benchmark_args_utils import BenchmarkArguments + + +if is_torch_available(): + import torch + +try: + import torch_xla.core.xla_model as xm + + _has_tpu = True +except ImportError: + _has_tpu = False + + +@torch_required +def is_tpu_available(): + return _has_tpu + + +logger = logging.getLogger(__name__) + + +@dataclass +class PyTorchBenchmarkArguments(BenchmarkArguments): + no_cuda: bool = field(default=False, metadata={"help": "Whether to run on available cuda devices"}) + torchscript: bool = field(default=False, metadata={"help": "Trace the models using torchscript"}) + fp16: bool = field(default=False, metadata={"help": "Use FP16 to accelerate inference."}) + + @cached_property + @torch_required + def _setup_devices(self) -> Tuple["torch.device", int]: + logger.info("PyTorch: setting up devices") + if self.no_cuda: + device = torch.device("cpu") + n_gpu = 0 + elif is_tpu_available(): + device = xm.xla_device() + n_gpu = 0 + else: + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + n_gpu = torch.cuda.device_count() + return device, n_gpu + + @property + @torch_required + def device_idx(self) -> int: + return torch.cuda.current_device() + + @property + @torch_required + def device(self) -> "torch.device": + return self._setup_devices[0] + + @property + @torch_required + def n_gpu(self): + return self._setup_devices[1] diff --git a/src/transformers/benchmark/benchmark_args_utils.py b/src/transformers/benchmark/benchmark_args_utils.py new file mode 100644 index 0000000000..849f0c443e --- /dev/null +++ b/src/transformers/benchmark/benchmark_args_utils.py @@ -0,0 +1,98 @@ +# coding=utf-8 +# Copyright 2018 The HuggingFace Inc. team. +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import dataclasses +import json +from dataclasses import dataclass, field +from time import time +from typing import List + + +def list_field(default=None, metadata=None): + return field(default_factory=lambda: default, metadata=metadata) + + +@dataclass +class BenchmarkArguments: + """ + BenchMarkArguments are arguments we use in our benchmark scripts + **which relate to the training loop itself**. + + Using `HfArgumentParser` we can turn this class + into argparse arguments to be able to specify them on + the command line. + """ + + models: List[str] = list_field( + default=[], + metadata={ + "help": "Model checkpoints to be provided to the AutoModel classes. Leave blank to benchmark the base version of all available models" + }, + ) + + batch_sizes: List[int] = list_field( + default=[8], metadata={"help": "List of batch sizes for which memory and time performance will be evaluated"} + ) + + sequence_lengths: List[int] = list_field( + default=[8, 32, 128, 512], + metadata={"help": "List of sequence lengths for which memory and time performance will be evaluated"}, + ) + + no_inference: bool = field(default=False, metadata={"help": "Don't benchmark inference of model"}) + training: bool = field(default=False, metadata={"help": "Benchmark training of model"}) + verbose: bool = field(default=False, metadata={"help": "Verbose memory tracing"}) + no_speed: bool = field(default=False, metadata={"help": "Don't perform speed measurments"}) + no_memory: bool = field(default=False, metadata={"help": "Don't perform memory measurments"}) + trace_memory_line_by_line: bool = field(default=False, metadata={"help": "Trace memory line by line"}) + save_to_csv: bool = field(default=False, metadata={"help": "Save result to a CSV file"}) + log_print: bool = field(default=False, metadata={"help": "Save all print statements in a log file"}) + no_env_print: bool = field(default=False, metadata={"help": "Don't print environment information"}) + inference_time_csv_file: str = field( + default=f"inference_time_{round(time())}.csv", + metadata={"help": "CSV filename used if saving time results to csv."}, + ) + inference_memory_csv_file: str = field( + default=f"inference_memory_{round(time())}.csv", + metadata={"help": "CSV filename used if saving memory results to csv."}, + ) + train_time_csv_file: str = field( + default=f"train_time_{round(time())}.csv", + metadata={"help": "CSV filename used if saving time results to csv for training."}, + ) + train_memory_csv_file: str = field( + default=f"train_memory_{round(time())}.csv", + metadata={"help": "CSV filename used if saving memory results to csv for training."}, + ) + env_info_csv_file: str = field( + default=f"env_info_{round(time())}.csv", + metadata={"help": "CSV filename used if saving environment information."}, + ) + log_filename: str = field( + default=f"log_{round(time())}.csv", + metadata={"help": "Log filename used if print statements are saved in log."}, + ) + repeat: int = field(default=3, metadata={"help": "Times an experiment will be run."}) + + def to_json_string(self): + """ + Serializes this instance to a JSON string. + """ + return json.dumps(dataclasses.asdict(self), indent=2) + + @property + def model_names(self): + return self.models diff --git a/src/transformers/benchmark_utils.py b/src/transformers/benchmark/benchmark_utils.py similarity index 52% rename from src/transformers/benchmark_utils.py rename to src/transformers/benchmark/benchmark_utils.py index 9223816123..d7ca0f3b1e 100644 --- a/src/transformers/benchmark_utils.py +++ b/src/transformers/benchmark/benchmark_utils.py @@ -4,18 +4,28 @@ This file is adapted from the AllenNLP library at https://github.com/allenai/all Copyright by the AllenNLP authors. """ +import copy +import csv import linecache import logging import os +import platform import sys -from collections import defaultdict +from abc import ABC, abstractmethod +from collections import defaultdict, namedtuple +from datetime import datetime from typing import Iterable, List, NamedTuple, Optional, Union -from .file_utils import is_tf_available, is_torch_available +from transformers import AutoConfig, PretrainedConfig +from transformers import __version__ as version + +from ..file_utils import is_tf_available, is_torch_available +from .benchmark_args_utils import BenchmarkArguments if is_torch_available(): from torch.cuda import empty_cache as torch_empty_cache + if is_tf_available(): from tensorflow.python.eager import context as tf_context @@ -25,6 +35,10 @@ logger = logging.getLogger(__name__) # pylint: disable=invalid-name _is_memory_tracing_enabled = False +BenchmarkOutput = namedtuple( + "BenchmarkOutput", ["time_inference_result", "memory_inference_result", "time_train_result", "memory_train_result"] +) + def is_memory_tracing_enabled(): global _is_memory_tracing_enabled @@ -62,14 +76,14 @@ class UsedMemoryState(NamedTuple): class Memory(NamedTuple): """ `Memory` NamedTuple have a single field `bytes` and - you can get a human readable string of the number of bytes by calling `__repr__` + you can get a human readable str of the number of mega bytes by calling `__repr__` - `byte` (integer): number of bytes, """ bytes: int def __repr__(self) -> str: - return bytes_to_human_readable(self.bytes) + return str(bytes_to_mega_bytes(self.bytes)) class MemoryState(NamedTuple): @@ -99,6 +113,7 @@ class MemorySummary(NamedTuple): sequential: List[MemoryState] cumulative: List[MemoryState] + current: List[MemoryState] total: Memory @@ -234,10 +249,12 @@ def start_memory_tracing( # Sum used memory for all GPUs py3nvml.nvmlInit() + for i in devices: handle = py3nvml.nvmlDeviceGetHandleByIndex(i) meminfo = py3nvml.nvmlDeviceGetMemoryInfo(handle) gpu_mem += meminfo.used + py3nvml.nvmlShutdown() mem_state = UsedMemoryState(traced_state, cpu_mem, gpu_mem) @@ -295,8 +312,11 @@ def stop_memory_tracing( if memory_trace is not None and len(memory_trace) > 1: memory_diff_trace = [] + memory_curr_trace = [] + cumulative_memory_dict = defaultdict(lambda: [0, 0, 0]) - for (frame, cpu_mem, gpu_mem), (next_frame, next_cpu_mem, next_gpu_mem) in zip( + + for ((frame, cpu_mem, gpu_mem), (next_frame, next_cpu_mem, next_gpu_mem),) in zip( memory_trace[:-1], memory_trace[1:] ): cpu_mem_inc = next_cpu_mem - cpu_mem @@ -307,6 +327,16 @@ def stop_memory_tracing( frame=frame, cpu=Memory(cpu_mem_inc), gpu=Memory(gpu_mem_inc), cpu_gpu=Memory(cpu_gpu_mem_inc), ) ) + + memory_curr_trace.append( + MemoryState( + frame=frame, + cpu=Memory(next_cpu_mem), + gpu=Memory(next_gpu_mem), + cpu_gpu=Memory(next_gpu_mem + next_cpu_mem), + ) + ) + cumulative_memory_dict[frame][0] += cpu_mem_inc cumulative_memory_dict[frame][1] += gpu_mem_inc cumulative_memory_dict[frame][2] += cpu_gpu_mem_inc @@ -321,21 +351,287 @@ def stop_memory_tracing( for frame, (cpu_mem_inc, gpu_mem_inc, cpu_gpu_mem_inc) in cumulative_memory ) + memory_curr_trace = sorted(memory_curr_trace, key=lambda x: x.cpu_gpu.bytes, reverse=True) + if ignore_released_memory: total_memory = sum(max(0, step_trace.cpu_gpu.bytes) for step_trace in memory_diff_trace) else: total_memory = sum(step_trace.cpu_gpu.bytes for step_trace in memory_diff_trace) + total_memory = Memory(total_memory) - return MemorySummary(sequential=memory_diff_trace, cumulative=cumulative_memory, total=total_memory) + + return MemorySummary( + sequential=memory_diff_trace, cumulative=cumulative_memory, current=memory_curr_trace, total=total_memory, + ) return None -def bytes_to_human_readable(memory_amount: int) -> str: - """ Utility to convert a number of bytes (int) in a human readable string (with units) +def bytes_to_mega_bytes(memory_amount: int) -> int: + """ Utility to convert a number of bytes (int) into a number of mega bytes (int) """ - for unit in ["B", "KB", "MB", "GB"]: - if memory_amount > -1024.0 and memory_amount < 1024.0: - return "{:.3f}{}".format(memory_amount, unit) - memory_amount /= 1024.0 - return "{:.3f}TB".format(memory_amount) + return memory_amount >> 20 + + +class Benchmark(ABC): + """ + Benchmarks is a simple but feature-complete benchmarking script + to compare memory and time performance of models in Transformers. + """ + + args: BenchmarkArguments + configs: PretrainedConfig + framework: str + + def __init__(self, args: BenchmarkArguments = None, configs: PretrainedConfig = None): + self.args = args + + if configs is None: + self.config_dict = { + model_name: AutoConfig.from_pretrained(model_name) for model_name in self.args.model_names + } + else: + self.config_dict = {model_name: config for model_name, config in zip(self.args.model_names, configs)} + + self._print_fn = None + self._framework_version = None + self._environment_info = None + + @property + def print_fn(self): + if self._print_fn is None: + if self.args.log_print: + logging.basicConfig( + level=logging.DEBUG, + filename=self.args.log_filename, + filemode="a+", + format="%(asctime)-15s %(levelname)-8s %(message)s", + ) + + def print_and_log(*args): + logging.info(*args) + print(*args) + + self._print_fn = print_and_log + else: + self._print_fn = print + return self._print_fn + + @property + def is_gpu(self): + return self.args.n_gpu > 0 + + @property + @abstractmethod + def framework_version(self): + pass + + @abstractmethod + def train(self, model_name, batch_size, sequence_length): + pass + + @abstractmethod + def inference(self, model_name, batch_size, sequence_length): + pass + + def run(self): + result_dict = {model_name: {} for model_name in self.args.model_names} + inference_result_time = copy.deepcopy(result_dict) + inference_result_memory = copy.deepcopy(result_dict) + train_result_time = copy.deepcopy(result_dict) + train_result_memory = copy.deepcopy(result_dict) + + for c, model_name in enumerate(self.args.model_names): + self.print_fn(f"{c + 1} / {len(self.args.model_names)}") + + model_dict = { + "bs": self.args.batch_sizes, + "ss": self.args.sequence_lengths, + "result": {i: {} for i in self.args.batch_sizes}, + } + inference_result_time[model_name] = copy.deepcopy(model_dict) + inference_result_memory[model_name] = copy.deepcopy(model_dict) + train_result_time[model_name] = copy.deepcopy(model_dict) + train_result_memory[model_name] = copy.deepcopy(model_dict) + + for batch_size in self.args.batch_sizes: + for sequence_length in self.args.sequence_lengths: + if not self.args.no_inference: + if not self.args.no_memory: + memory = self.inference(model_name, batch_size, sequence_length, trace_memory=True) + inference_result_memory[model_name]["result"][batch_size][sequence_length] = memory + if not self.args.no_speed: + time = self.inference(model_name, batch_size, sequence_length, trace_memory=False) + inference_result_time[model_name]["result"][batch_size][sequence_length] = time + + if self.args.training: + if not self.args.no_memory: + memory = self.train(model_name, batch_size, sequence_length, trace_memory=True) + train_result_memory[model_name]["result"][batch_size][sequence_length] = memory + if not self.args.no_speed: + time = self.inference(model_name, batch_size, sequence_length, trace_memory=False) + train_result_time[model_name]["result"][batch_size][sequence_length] = time + + if not self.args.no_inference: + if not self.args.no_speed: + self.print_fn("======= INFERENCE - SPEED - RESULT =======") + self.print_results(inference_result_time) + self.save_to_csv(inference_result_time, self.args.inference_time_csv_file) + + if not self.args.no_memory: + self.print_fn("======= INFERENCE - MEMORY - RESULT =======") + self.print_results(inference_result_memory) + self.save_to_csv(inference_result_memory, self.args.inference_memory_csv_file) + + if self.args.training: + if not self.args.no_speed: + self.print_fn("======= TRAIN - SPEED - RESULT =======") + self.print_results(train_result_time) + self.save_to_csv(train_result_time, self.args.train_time_csv_file) + + if not self.args.no_memory: + self.print_fn("======= TRAIN - MEMORY - RESULT =======") + self.print_results(train_result_memory) + self.save_to_csv(train_result_memory, self.args.train_memory_csv_file) + + if not self.args.no_env_print: + self.print_fn("\n======== ENVIRONMENT - INFORMATION ========") + self.print_fn( + "\n".join(["- {}: {}".format(prop, val) for prop, val in self.environment_info.items()]) + "\n" + ) + + if self.args.save_to_csv: + with open(self.args.env_info_csv_file, mode="w", newline="") as csv_file: + writer = csv.writer(csv_file) + for key, value in self.environment_info.items(): + writer.writerow([key, value]) + + return BenchmarkOutput(inference_result_time, inference_result_memory, train_result_time, train_result_memory) + + @property + def environment_info(self): + if self._environment_info is None: + info = {} + info["transformers_version"] = version + info["framework"] = self.framework + info["framework_version"] = self.framework_version + info["python_version"] = platform.python_version() + info["system"] = platform.system() + info["cpu"] = platform.processor() + info["architecture"] = platform.architecture()[0] + info["date"] = datetime.date(datetime.now()) + info["time"] = datetime.time(datetime.now()) + + try: + import psutil + except (ImportError): + logger.warning( + "Psutil not installed, we won't log available CPU memory." + "Install psutil (pip install psutil) to log available CPU memory." + ) + info["cpu_ram_mb"] = "N/A" + else: + info["cpu_ram_mb"] = bytes_to_mega_bytes(psutil.virtual_memory().total) + + info["use_gpu"] = self.is_gpu + if self.is_gpu: + info["num_gpus"] = self.args.n_gpu + try: + from py3nvml import py3nvml + + py3nvml.nvmlInit() + handle = py3nvml.nvmlDeviceGetHandleByIndex(self.args.device_idx) + except ImportError: + logger.warning( + "py3nvml not installed, we won't log GPU memory usage. " + "Install py3nvml (pip install py3nvml) to log information about GPU." + ) + info["gpu"] = "N/A" + info["gpu_ram_mb"] = "N/A" + info["gpu_power_watts"] = "N/A" + info["gpu_performance_state"] = "N/A" + except (OSError, py3nvml.NVMLError): + logger.warning( + "Error while initializing comunication with GPU. " "We won't log information about GPU." + ) + info["gpu"] = "N/A" + info["gpu_ram_mb"] = "N/A" + info["gpu_power_watts"] = "N/A" + info["gpu_performance_state"] = "N/A" + py3nvml.nvmlShutdown() + else: + info["gpu"] = py3nvml.nvmlDeviceGetName(handle) + info["gpu_ram_mb"] = bytes_to_mega_bytes(py3nvml.nvmlDeviceGetMemoryInfo(handle).total) + info["gpu_power_watts"] = py3nvml.nvmlDeviceGetPowerManagementLimit(handle) / 1000 + info["gpu_performance_state"] = py3nvml.nvmlDeviceGetPerformanceState(handle) + py3nvml.nvmlShutdown() + + self._environment_info = info + return self._environment_info + + def print_results(self, result_dict): + for model_name in self.args.model_names: + self.print_fn("\t" + f"======= MODEL CHECKPOINT: {model_name} =======") + for batch_size in result_dict[model_name]["bs"]: + for sequence_length in result_dict[model_name]["ss"]: + result = result_dict[model_name]["result"][batch_size][sequence_length] + if isinstance(result, float): + self.print_fn( + f"\t\t{model_name}/{batch_size}/{sequence_length}: " f"{(round(1000 * result) / 1000)}s" + ) + else: + self.print_fn(f"\t\t{model_name}/{batch_size}/{sequence_length}: " f"{result} MB") + + def print_memory_trace_statistics(self, summary: MemorySummary): + self.print_fn( + "\nLine by line memory consumption:\n" + + "\n".join( + f"{state.frame.filename}:{state.frame.line_number}: mem {state.cpu_gpu}: {state.frame.line_text}" + for state in summary.sequential + ) + ) + self.print_fn( + "\nLines with top memory consumption:\n" + + "\n".join( + f"=> {state.frame.filename}:{state.frame.line_number}: mem {state.cpu_gpu}: {state.frame.line_text}" + for state in summary.cumulative[:6] + ) + ) + self.print_fn( + "\nLines with lowest memory consumption:\n" + + "\n".join( + f"=> {state.frame.filename}:{state.frame.line_number}: mem {state.cpu_gpu}: {state.frame.line_text}" + for state in summary.cumulative[-6:] + ) + ) + self.print_fn(f"\nTotal memory increase: {summary.total}") + + def save_to_csv(self, result_dict, filename): + if not self.args.save_to_csv: + return + self.print_fn("Saving results to csv.") + with open(filename, mode="w") as csv_file: + + assert len(self.args.model_names) > 0, "At least 1 model should be defined, but got {}".format( + self.model_names + ) + + fieldnames = ["model", "batch_size", "sequence_length"] + writer = csv.DictWriter(csv_file, fieldnames=fieldnames + ["result"]) + writer.writeheader() + + for model_name in self.args.model_names: + result_dict_model = result_dict[model_name]["result"] + for bs in result_dict_model: + for ss in result_dict_model[bs]: + result_model = result_dict_model[bs][ss] + writer.writerow( + { + "model": model_name, + "batch_size": bs, + "sequence_length": ss, + "result": ("{}" if not isinstance(result_model, float) else "{:.4f}").format( + result_model + ), + } + ) diff --git a/src/transformers/file_utils.py b/src/transformers/file_utils.py index d5abb77aa8..fa9e17e833 100644 --- a/src/transformers/file_utils.py +++ b/src/transformers/file_utils.py @@ -59,6 +59,7 @@ try: except (ImportError, AssertionError): _tf_available = False # pylint: disable=invalid-name + try: from torch.hub import _get_torch_home diff --git a/src/transformers/hf_argparser.py b/src/transformers/hf_argparser.py index 70d253c03b..d37951e4bb 100644 --- a/src/transformers/hf_argparser.py +++ b/src/transformers/hf_argparser.py @@ -4,7 +4,7 @@ import sys from argparse import ArgumentParser from enum import Enum from pathlib import Path -from typing import Any, Iterable, NewType, Tuple, Union +from typing import Any, Iterable, List, NewType, Tuple, Union DataClass = NewType("DataClass", Any) @@ -52,9 +52,13 @@ class HfArgumentParser(ArgumentParser): "We will add compatibility when Python 3.9 is released." ) typestring = str(field.type) - for x in (int, float, str): - if typestring == f"typing.Union[{x.__name__}, NoneType]": - field.type = x + for prim_type in (int, float, str): + for collection in (List,): + if typestring == f"typing.Union[{collection[prim_type]}, NoneType]": + field.type = collection[prim_type] + if typestring == f"typing.Union[{prim_type.__name__}, NoneType]": + field.type = prim_type + if isinstance(field.type, type) and issubclass(field.type, Enum): kwargs["choices"] = list(field.type) kwargs["type"] = field.type @@ -65,6 +69,14 @@ class HfArgumentParser(ArgumentParser): if field.default is True: field_name = f"--no-{field.name}" kwargs["dest"] = field.name + elif hasattr(field.type, "__origin__") and issubclass(field.type.__origin__, List): + kwargs["nargs"] = "+" + kwargs["type"] = field.type.__args__[0] + assert all( + x == kwargs["type"] for x in field.type.__args__ + ), "{} cannot be a List of mixed types".format(field.name) + if field.default_factory is not dataclasses.MISSING: + kwargs["default"] = field.default_factory() else: kwargs["type"] = field.type if field.default is not dataclasses.MISSING: diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py new file mode 100644 index 0000000000..c6808e2480 --- /dev/null +++ b/tests/test_benchmark.py @@ -0,0 +1,90 @@ +import os +import tempfile +import unittest +from pathlib import Path + +from transformers import GPT2Config, is_torch_available + +from .utils import require_torch + + +if is_torch_available(): + from transformers import ( + PyTorchBenchmarkArguments, + PyTorchBenchmark, + ) + + +@require_torch +class BenchmarkTest(unittest.TestCase): + def check_results_dict_not_empty(self, results): + for model_result in results.values(): + for batch_size, sequence_length in zip(model_result["bs"], model_result["ss"]): + result = model_result["result"][batch_size][sequence_length] + self.assertIsNotNone(result) + + def test_inference_no_configs(self): + MODEL_ID = "sshleifer/tiny-gpt2" + benchmark_args = PyTorchBenchmarkArguments( + models=[MODEL_ID], training=False, no_inference=False, sequence_lengths=[8], batch_sizes=[1] + ) + benchmark = PyTorchBenchmark(benchmark_args) + results = benchmark.run() + self.check_results_dict_not_empty(results.time_inference_result) + self.check_results_dict_not_empty(results.memory_inference_result) + + def test_train_no_configs(self): + MODEL_ID = "sshleifer/tiny-gpt2" + benchmark_args = PyTorchBenchmarkArguments( + models=[MODEL_ID], training=True, no_inference=True, sequence_lengths=[8], batch_sizes=[1] + ) + benchmark = PyTorchBenchmark(benchmark_args) + results = benchmark.run() + self.check_results_dict_not_empty(results.time_train_result) + self.check_results_dict_not_empty(results.memory_train_result) + + def test_inference_with_configs(self): + MODEL_ID = "sshleifer/tiny-gpt2" + config = GPT2Config.from_pretrained(MODEL_ID) + benchmark_args = PyTorchBenchmarkArguments( + models=[MODEL_ID], training=False, no_inference=False, sequence_lengths=[8], batch_sizes=[1] + ) + benchmark = PyTorchBenchmark(benchmark_args, configs=[config]) + results = benchmark.run() + self.check_results_dict_not_empty(results.time_inference_result) + self.check_results_dict_not_empty(results.memory_inference_result) + + def test_train_with_configs(self): + MODEL_ID = "sshleifer/tiny-gpt2" + config = GPT2Config.from_pretrained(MODEL_ID) + benchmark_args = PyTorchBenchmarkArguments( + models=[MODEL_ID], training=True, no_inference=True, sequence_lengths=[8], batch_sizes=[1] + ) + benchmark = PyTorchBenchmark(benchmark_args, configs=[config]) + results = benchmark.run() + self.check_results_dict_not_empty(results.time_train_result) + self.check_results_dict_not_empty(results.memory_train_result) + + def test_save_csv_files(self): + MODEL_ID = "sshleifer/tiny-gpt2" + with tempfile.TemporaryDirectory() as tmp_dir: + benchmark_args = PyTorchBenchmarkArguments( + models=[MODEL_ID], + training=True, + no_inference=False, + save_to_csv=True, + sequence_lengths=[8], + batch_sizes=[1], + inference_time_csv_file=os.path.join(tmp_dir, "inf_time.csv"), + train_memory_csv_file=os.path.join(tmp_dir, "train_mem.csv"), + inference_memory_csv_file=os.path.join(tmp_dir, "inf_mem.csv"), + train_time_csv_file=os.path.join(tmp_dir, "train_time.csv"), + env_info_csv_file=os.path.join(tmp_dir, "env.csv"), + ) + benchmark = PyTorchBenchmark(benchmark_args) + benchmark.run() + self.assertTrue(Path(os.path.join(tmp_dir, "inf_time.csv")).exists()) + self.assertTrue(Path(os.path.join(tmp_dir, "train_time.csv")).exists()) + self.assertTrue(Path(os.path.join(tmp_dir, "inf_mem.csv")).exists()) + self.assertTrue(Path(os.path.join(tmp_dir, "train_mem.csv")).exists()) + self.assertTrue(Path(os.path.join(tmp_dir, "env.csv")).exists()) diff --git a/tests/test_hf_argparser.py b/tests/test_hf_argparser.py index f03b3a6819..a3bda37a55 100644 --- a/tests/test_hf_argparser.py +++ b/tests/test_hf_argparser.py @@ -3,11 +3,15 @@ import unittest from argparse import Namespace from dataclasses import dataclass, field from enum import Enum -from typing import Optional +from typing import List, Optional from transformers import HfArgumentParser, TrainingArguments +def list_field(default=None, metadata=None): + return field(default_factory=lambda: default, metadata=metadata) + + @dataclass class BasicExample: foo: int @@ -43,6 +47,16 @@ class OptionalExample: foo: Optional[int] = None bar: Optional[float] = field(default=None, metadata={"help": "help message"}) baz: Optional[str] = None + ces: Optional[List[str]] = list_field(default=[]) + des: Optional[List[int]] = list_field(default=[]) + + +@dataclass +class ListExample: + foo_int: List[int] = list_field(default=[]) + bar_int: List[int] = list_field(default=[1, 2, 3]) + foo_str: List[str] = list_field(default=["Hallo", "Bonjour", "Hello"]) + foo_float: List[float] = list_field(default=[0.1, 0.2, 0.3]) class HfArgumentParserTest(unittest.TestCase): @@ -101,6 +115,26 @@ class HfArgumentParserTest(unittest.TestCase): args = parser.parse_args(["--foo", "titi"]) self.assertEqual(args.foo, BasicEnum.titi) + def test_with_list(self): + parser = HfArgumentParser(ListExample) + + expected = argparse.ArgumentParser() + expected.add_argument("--foo_int", nargs="+", default=[], type=int) + expected.add_argument("--bar_int", nargs="+", default=[1, 2, 3], type=int) + expected.add_argument("--foo_str", nargs="+", default=["Hallo", "Bonjour", "Hello"], type=str) + expected.add_argument("--foo_float", nargs="+", default=[0.1, 0.2, 0.3], type=float) + + self.argparsersEqual(parser, expected) + + args = parser.parse_args([]) + self.assertEqual( + args, + Namespace(foo_int=[], bar_int=[1, 2, 3], foo_str=["Hallo", "Bonjour", "Hello"], foo_float=[0.1, 0.2, 0.3]), + ) + + args = parser.parse_args("--foo_int 1 --bar_int 2 3 --foo_str a b c --foo_float 0.1 0.7".split()) + self.assertEqual(args, Namespace(foo_int=[1], bar_int=[2, 3], foo_str=["a", "b", "c"], foo_float=[0.1, 0.7])) + def test_with_optional(self): parser = HfArgumentParser(OptionalExample) @@ -108,13 +142,15 @@ class HfArgumentParserTest(unittest.TestCase): expected.add_argument("--foo", default=None, type=int) expected.add_argument("--bar", default=None, type=float, help="help message") expected.add_argument("--baz", default=None, type=str) + expected.add_argument("--ces", nargs="+", default=[], type=str) + expected.add_argument("--des", nargs="+", default=[], type=int) self.argparsersEqual(parser, expected) args = parser.parse_args([]) - self.assertEqual(args, Namespace(foo=None, bar=None, baz=None)) + self.assertEqual(args, Namespace(foo=None, bar=None, baz=None, ces=[], des=[])) - args = parser.parse_args("--foo 12 --bar 3.14 --baz 42".split()) - self.assertEqual(args, Namespace(foo=12, bar=3.14, baz="42")) + args = parser.parse_args("--foo 12 --bar 3.14 --baz 42 --ces a b c --des 1 2 3".split()) + self.assertEqual(args, Namespace(foo=12, bar=3.14, baz="42", ces=["a", "b", "c"], des=[1, 2, 3])) def test_integration_training_args(self): parser = HfArgumentParser(TrainingArguments)