From 32fea876bb9389a92791c8a633f811c297d4a77d Mon Sep 17 00:00:00 2001 From: thomwolf Date: Mon, 11 Feb 2019 12:53:32 +0100 Subject: [PATCH] add distant debugging to run_transfo_xl --- README.md | 4 ++-- examples/run_transfo_xl.py | 9 +++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 6a1831ac98..e866506c6d 100644 --- a/README.md +++ b/README.md @@ -891,7 +891,7 @@ python run_openai_gpt.py \ --train_batch_size 16 \ ``` -This command run in about 10 min on a single K-80 an gives an evaluation accuracy of 86.42% (the authors reports a median accuracy with the TensorFlow code of 85.8% and the OpenAI GPT paper reports a best single run accuracy of 86.5%). +This command runs in about 10 min on a single K-80 an gives an evaluation accuracy of about 86.4% (the authors report a median accuracy with the TensorFlow code of 85.8% and the OpenAI GPT paper reports a best single run accuracy of 86.5%). #### Evaluating the pre-trained Transformer-XL on the WikiText 103 dataset @@ -902,7 +902,7 @@ This command will download a pre-processed version of the WikiText 103 dataset i python run_transfo_xl.py --work_dir ../log ``` -This command run in about 10 min on a single K-80 an gives an evaluation accuracy of 86.42% (the authors reports a median accuracy with the TensorFlow code of 85.8% and the OpenAI GPT paper reports a best single run accuracy of 86.5%). +This command runs in about 1 min on a V100 and gives an evaluation perplexity of 18.22 on WikiText-103 (the authors report a perplexity of about 18.3 on this dataset with the TensorFlow code). ## Fine-tuning BERT-large on GPUs diff --git a/examples/run_transfo_xl.py b/examples/run_transfo_xl.py index 97c61777a4..06d37a719f 100644 --- a/examples/run_transfo_xl.py +++ b/examples/run_transfo_xl.py @@ -60,9 +60,18 @@ def main(): help='do not log the eval result') parser.add_argument('--same_length', action='store_true', help='set same length attention with masking') + parser.add_argument('--server_ip', type=str, default='', help="Can be used for distant debugging.") + parser.add_argument('--server_port', type=str, default='', help="Can be used for distant debugging.") args = parser.parse_args() assert args.ext_len >= 0, 'extended context length must be non-negative' + if args.server_ip and args.server_port: + # Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script + import ptvsd + print("Waiting for debugger attach") + ptvsd.enable_attach(address=(args.server_ip, args.server_port), redirect_output=True) + ptvsd.wait_for_attach() + device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") logger.info("device: {}".format(device))