From 3cdb38a7c022d99c4b3333b5b4daf96f6953ea1b Mon Sep 17 00:00:00 2001
From: Victor SANH <victorsanh@gmail.com>
Date: Wed, 8 Jan 2020 17:19:05 -0500
Subject: [PATCH] indents

---
 .../distillation/run_squad_w_distillation.py  | 40 +++++++++----------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/examples/distillation/run_squad_w_distillation.py b/examples/distillation/run_squad_w_distillation.py
index 14b2e7d6e7..44b802e1c1 100644
--- a/examples/distillation/run_squad_w_distillation.py
+++ b/examples/distillation/run_squad_w_distillation.py
@@ -123,8 +123,8 @@ def train(args, train_dataset, model, tokenizer, teacher=None):
         # Load in optimizer and scheduler states
         optimizer.load_state_dict(torch.load(os.path.join(args.model_name_or_path, "optimizer.pt")))
         scheduler.load_state_dict(torch.load(os.path.join(args.model_name_or_path, "scheduler.pt")))
-		
-	if args.fp16:
+        
+    if args.fp16:
         try:
             from apex import amp
         except ImportError:
@@ -157,7 +157,7 @@ def train(args, train_dataset, model, tokenizer, teacher=None):
     logger.info("  Total optimization steps = %d", t_total)
 
     global_step = 1
-	epochs_trained = 0
+    epochs_trained = 0
     steps_trained_in_current_epoch = 0
     # Check if continuing training from a checkpoint
     if os.path.exists(args.model_name_or_path):
@@ -178,10 +178,10 @@ def train(args, train_dataset, model, tokenizer, teacher=None):
     tr_loss, logging_loss = 0.0, 0.0
     model.zero_grad()
     train_iterator = trange(
-		epochs_trained, int(args.num_train_epochs), desc="Epoch", disable=args.local_rank not in [-1, 0]
-	)
+        epochs_trained, int(args.num_train_epochs), desc="Epoch", disable=args.local_rank not in [-1, 0]
+    )
     # Added here for reproductibility
-	set_seed(args)
+    set_seed(args)
 
     for _ in train_iterator:
         epoch_iterator = tqdm(train_dataloader, desc="Iteration", disable=args.local_rank not in [-1, 0])
@@ -207,7 +207,7 @@ def train(args, train_dataset, model, tokenizer, teacher=None):
                 inputs["token_type_ids"] = None if args.model_type == "xlm" else batch[2]
             if args.model_type in ["xlnet", "xlm"]:
                 inputs.update({"cls_index": batch[5], "p_mask": batch[6]})
-				if args.version_2_with_negative:
+                if args.version_2_with_negative:
                     inputs.update({"is_impossible": batch[7]})
             outputs = model(**inputs)
             loss, start_logits_stu, end_logits_stu = outputs
@@ -261,7 +261,7 @@ def train(args, train_dataset, model, tokenizer, teacher=None):
                 model.zero_grad()
                 global_step += 1
 
-				# Log metrics
+                # Log metrics
                 if args.local_rank in [-1, 0] and args.logging_steps > 0 and global_step % args.logging_steps == 0:
                     # Only evaluate when single GPU otherwise metrics may not average well
                     if args.local_rank == -1 and args.evaluate_during_training:
@@ -281,7 +281,7 @@ def train(args, train_dataset, model, tokenizer, teacher=None):
                         model.module if hasattr(model, "module") else model
                     )  # Take care of distributed/parallel training
                     model_to_save.save_pretrained(output_dir)
-					tokenizer.save_pretrained(output_dir)
+                    tokenizer.save_pretrained(output_dir)
 
                     torch.save(args, os.path.join(output_dir, "training_args.bin"))
                     logger.info("Saving model checkpoint to %s", output_dir)
@@ -325,7 +325,7 @@ def evaluate(args, model, tokenizer, prefix=""):
     logger.info("  Batch size = %d", args.eval_batch_size)
 
     all_results = []
-	start_time = timeit.default_timer()
+    start_time = timeit.default_timer()
 
     for batch in tqdm(eval_dataloader, desc="Evaluating"):
         model.eval()
@@ -425,7 +425,7 @@ def evaluate(args, model, tokenizer, prefix=""):
 def load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=False):
     if args.local_rank not in [-1, 0] and not evaluate:
         # Make sure only the first process in distributed training process the dataset, and the others will use the cache
-		torch.distributed.barrier()
+        torch.distributed.barrier()
 
     # Load data features from cache or dataset file
     input_file = args.predict_file if evaluate else args.train_file
@@ -468,7 +468,7 @@ def load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=Fal
             max_query_length=args.max_query_length,
             is_training=not evaluate,
             return_dataset="pt",
-			threads=args.threads,
+            threads=args.threads,
         )
 
         if args.local_rank in [-1, 0]:
@@ -476,7 +476,7 @@ def load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=Fal
             torch.save({"features": features, "dataset": dataset, "examples": examples}, cached_features_file)
 
     if args.local_rank == 0 and not evaluate:
-		# Make sure only the first process in distributed training process the dataset, and the others will use the cache
+        # Make sure only the first process in distributed training process the dataset, and the others will use the cache
         torch.distributed.barrier()
 
     if output_examples:
@@ -541,11 +541,11 @@ def main():
         help="The input data dir. Should contain the .json files for the task."
         + "If no data dir or train/predict files are specified, will run with tensorflow_datasets.",
     )
-	parser.add_argument(
+    parser.add_argument(
         "--train_file",
-		default=None,
-		type=str,
-		help="The input training file. If a data dir is specified, will look for the file there"
+        default=None,
+        type=str,
+        help="The input training file. If a data dir is specified, will look for the file there"
         + "If no data dir or train/predict files are specified, will run with tensorflow_datasets.",
     )
     parser.add_argument(
@@ -688,7 +688,7 @@ def main():
     parser.add_argument("--server_ip", type=str, default="", help="Can be used for distant debugging.")
     parser.add_argument("--server_port", type=str, default="", help="Can be used for distant debugging.")
 
-	parser.add_argument("--threads", type=int, default=1, help="multiple threads for converting example to features")
+    parser.add_argument("--threads", type=int, default=1, help="multiple threads for converting example to features")
     args = parser.parse_args()
 
     if (
@@ -743,7 +743,7 @@ def main():
 
     # Load pretrained model and tokenizer
     if args.local_rank not in [-1, 0]:
-		# Make sure only the first process in distributed training will download model & vocab
+        # Make sure only the first process in distributed training will download model & vocab
         torch.distributed.barrier() 
 
     args.model_type = args.model_type.lower()
@@ -781,7 +781,7 @@ def main():
         teacher = None
 
     if args.local_rank == 0:
-		# Make sure only the first process in distributed training will download model & vocab
+        # Make sure only the first process in distributed training will download model & vocab
         torch.distributed.barrier()
 
     model.to(args.device)