small fix and updates to readme

2019-06-19 09:38:38 +02:00
parent f7e2ac01ea
commit 68ab9599ce
5 changed files with 53 additions and 18 deletions
--- a/examples/bertology.py
+++ b/examples/bertology.py
@@ -8,7 +8,7 @@ import torch
 import torch.nn.functional as F
 import numpy as np

-from pytorch_pretrained_bert import BertModel, BertTokenizer
+from pytorch_pretrained_bert import BertForSequenceClassification, BertTokenizer

 logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
                    datefmt = '%m/%d/%Y %H:%M:%S',
@@ -17,24 +17,33 @@ logger = logging.getLogger(__name__)

 def run_model():
    parser = argparse.ArgumentParser()
-    parser.add_argument('--model_name_or_path', type=str, default='bert-base-uncased',
-                                                help='pretrained model name or path to local checkpoint')
+    parser.add_argument('--model_name_or_path', type=str, default='bert-base-uncased', help='pretrained model name or path to local checkpoint')
    parser.add_argument("--seed", type=int, default=42)
+    parser.add_argument("--local_rank", type=int, default=-1, help="local_rank for distributed training on gpus")
+    parser.add_argument("--no_cuda", action='store_true', help="Whether not to use CUDA when available")
    args = parser.parse_args()
-    print(args)
-
-    if args.batch_size == -1:
-        args.batch_size = 1
-    assert args.nsamples % args.batch_size == 0

    np.random.seed(args.seed)
    torch.random.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+    if args.local_rank == -1 or args.no_cuda:
+        args.device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
+        n_gpu = torch.cuda.device_count()
+    else:
+        torch.cuda.set_device(args.local_rank)
+        args.device = torch.device("cuda", args.local_rank)
+        n_gpu = 1
+        # Initializes the distributed backend which will take care of sychronizing nodes/GPUs
+        torch.distributed.init_process_group(backend='nccl')
+
+    logging.basicConfig(level = logging.INFO if args.local_rank in [-1, 0] else logging.WARN)
+    logger.info("device: {} n_gpu: {}, distributed training: {}, 16-bits training: {}".format(
+        args.device, n_gpu, bool(args.local_rank != -1), args.fp16))

    tokenizer = BertTokenizer.from_pretrained(args.model_name_or_path)
-    model = BertModel.from_pretrained(args.model_name_or_path)
-    model.to(device)
+    model = BertForSequenceClassification.from_pretrained(args.model_name_or_path)
+    model.to(args.device)
    model.eval()

    
--- a/examples/run_classifier.py
+++ b/examples/run_classifier.py
@@ -187,7 +187,7 @@ def main():

    if os.path.exists(args.output_dir) and os.listdir(args.output_dir) and args.do_train and not args.overwrite_output_dir:
        raise ValueError("Output directory ({}) already exists and is not empty.".format(args.output_dir))
-    if not os.path.exists(args.output_dir):
+    if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]:
        os.makedirs(args.output_dir)

    task_name = args.task_name.lower()
@@ -361,6 +361,10 @@ def main():
        # Load a trained model and vocabulary that you have fine-tuned
        model = BertForSequenceClassification.from_pretrained(args.output_dir, num_labels=num_labels)
        tokenizer = BertTokenizer.from_pretrained(args.output_dir, do_lower_case=args.do_lower_case)
+
+        # Good practice: save your training arguments together with the trained model
+        output_args_file = os.path.join(args.output_dir, 'training_args.bin')
+        torch.save(args, output_args_file)
    else:
        model = BertForSequenceClassification.from_pretrained(args.bert_model)

--- a/examples/run_squad.py
+++ b/examples/run_squad.py
@@ -331,6 +331,10 @@ def main():
        # Load a trained model and vocabulary that you have fine-tuned
        model = BertForQuestionAnswering.from_pretrained(args.output_dir)
        tokenizer = BertTokenizer.from_pretrained(args.output_dir, do_lower_case=args.do_lower_case)
+
+        # Good practice: save your training arguments together with the trained model
+        output_args_file = os.path.join(args.output_dir, 'training_args.bin')
+        torch.save(args, output_args_file)
    else:
        model = BertForQuestionAnswering.from_pretrained(args.bert_model)