Merge branch 'master' into generative-finetuning
This commit is contained in:
@@ -314,15 +314,16 @@ def main():
|
||||
mean_loss = tr_loss * args.gradient_accumulation_steps / nb_tr_steps
|
||||
pbar.set_postfix_str(f"Loss: {mean_loss:.5f}")
|
||||
if (step + 1) % args.gradient_accumulation_steps == 0:
|
||||
scheduler.step() # Update learning rate schedule
|
||||
optimizer.step()
|
||||
scheduler.step() # Update learning rate schedule
|
||||
optimizer.zero_grad()
|
||||
global_step += 1
|
||||
|
||||
# Save a trained model
|
||||
if n_gpu > 1 and torch.distributed.get_rank() == 0 or n_gpu <=1 :
|
||||
if args.local_rank == -1 or torch.distributed.get_rank() == 0:
|
||||
logging.info("** ** * Saving fine-tuned model ** ** * ")
|
||||
model.save_pretrained(args.output_dir)
|
||||
model_to_save = model.module if hasattr(model, 'module') else model # Take care of distributed/parallel training
|
||||
model_to_save.save_pretrained(args.output_dir)
|
||||
tokenizer.save_pretrained(args.output_dir)
|
||||
|
||||
|
||||
|
||||
@@ -507,7 +507,7 @@ def main():
|
||||
|
||||
if os.path.exists(args.output_dir) and os.listdir(args.output_dir):
|
||||
raise ValueError("Output directory ({}) already exists and is not empty.".format(args.output_dir))
|
||||
if not os.path.exists(args.output_dir) and ( n_gpu > 1 and torch.distributed.get_rank() == 0 or n_gpu <=1 ):
|
||||
if not os.path.exists(args.output_dir) and (args.local_rank == -1 or torch.distributed.get_rank() == 0):
|
||||
os.makedirs(args.output_dir)
|
||||
|
||||
tokenizer = BertTokenizer.from_pretrained(args.bert_model, do_lower_case=args.do_lower_case)
|
||||
@@ -602,15 +602,16 @@ def main():
|
||||
nb_tr_examples += input_ids.size(0)
|
||||
nb_tr_steps += 1
|
||||
if (step + 1) % args.gradient_accumulation_steps == 0:
|
||||
scheduler.step() # Update learning rate schedule
|
||||
optimizer.step()
|
||||
scheduler.step() # Update learning rate schedule
|
||||
optimizer.zero_grad()
|
||||
global_step += 1
|
||||
|
||||
# Save a trained model
|
||||
if args.do_train and ( n_gpu > 1 and torch.distributed.get_rank() == 0 or n_gpu <=1):
|
||||
if args.do_train and (args.local_rank == -1 or torch.distributed.get_rank() == 0):
|
||||
logger.info("** ** * Saving fine - tuned model ** ** * ")
|
||||
model.save_pretrained(args.output_dir)
|
||||
model_to_save = model.module if hasattr(model, 'module') else model # Take care of distributed/parallel training
|
||||
model_to_save.save_pretrained(args.output_dir)
|
||||
tokenizer.save_pretrained(args.output_dir)
|
||||
|
||||
|
||||
|
||||
@@ -211,10 +211,12 @@ def prune_heads(args, model, eval_dataloader, head_mask):
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
## Required parameters
|
||||
parser.add_argument("--data_dir", default=None, type=str, required=True,
|
||||
help="The input data dir. Should contain the .tsv files (or other data files) for the task.")
|
||||
parser.add_argument("--model_name", default=None, type=str, required=True,
|
||||
help="Bert/XLNet/XLM pre-trained model selected in the list: " + ", ".join(ALL_MODELS))
|
||||
parser.add_argument("--model_name_or_path", default=None, type=str, required=True,
|
||||
help="Path to pre-trained model or shortcut name selected in the list: " + ", ".join(
|
||||
ALL_MODELS))
|
||||
parser.add_argument("--task_name", default=None, type=str, required=True,
|
||||
help="The name of the task to train selected in the list: " + ", ".join(processors.keys()))
|
||||
parser.add_argument("--output_dir", default=None, type=str, required=True,
|
||||
@@ -222,9 +224,9 @@ def main():
|
||||
|
||||
## Other parameters
|
||||
parser.add_argument("--config_name", default="", type=str,
|
||||
help="Pretrained config name or path if not the same as model_name")
|
||||
help="Pretrained config name or path if not the same as model_name_or_path")
|
||||
parser.add_argument("--tokenizer_name", default="", type=str,
|
||||
help="Pretrained tokenizer name or path if not the same as model_name")
|
||||
help="Pretrained tokenizer name or path if not the same as model_name_or_path")
|
||||
parser.add_argument("--cache_dir", default="", type=str,
|
||||
help="Where do you want to store the pre-trained models downloaded from s3")
|
||||
parser.add_argument("--data_subset", type=int, default=-1,
|
||||
@@ -297,15 +299,15 @@ def main():
|
||||
|
||||
args.model_type = ""
|
||||
for key in MODEL_CLASSES:
|
||||
if key in args.model_name.lower():
|
||||
if key in args.model_name_or_path.lower():
|
||||
args.model_type = key # take the first match in model types
|
||||
break
|
||||
config_class, model_class, tokenizer_class = MODEL_CLASSES[args.model_type]
|
||||
config = config_class.from_pretrained(args.config_name if args.config_name else args.model_name,
|
||||
config = config_class.from_pretrained(args.config_name if args.config_name else args.model_name_or_path,
|
||||
num_labels=num_labels, finetuning_task=args.task_name,
|
||||
output_attentions=True)
|
||||
tokenizer = tokenizer_class.from_pretrained(args.tokenizer_name if args.tokenizer_name else args.model_name)
|
||||
model = model_class.from_pretrained(args.model_name, from_tf=bool('.ckpt' in args.model_name), config=config)
|
||||
tokenizer = tokenizer_class.from_pretrained(args.tokenizer_name if args.tokenizer_name else args.model_name_or_path)
|
||||
model = model_class.from_pretrained(args.model_name_or_path, from_tf=bool('.ckpt' in args.model_name_or_path), config=config)
|
||||
|
||||
if args.local_rank == 0:
|
||||
torch.distributed.barrier() # Make sure only the first process in distributed training will download model & vocab
|
||||
|
||||
@@ -279,7 +279,7 @@ def load_and_cache_examples(args, task, tokenizer, evaluate=False):
|
||||
sep_token=tokenizer.sep_token,
|
||||
sep_token_extra=bool(args.model_type in ['roberta']), # roberta uses an extra separator b/w pairs of sentences, cf. github.com/pytorch/fairseq/commit/1684e166e3da03f5b600dbb7855cb98ddfcd0805
|
||||
pad_on_left=bool(args.model_type in ['xlnet']), # pad on the left for xlnet
|
||||
pad_token=tokenizer.encoder[tokenizer.pad_token] if args.model_type in ['roberta'] else tokenizer.vocab[tokenizer.pad_token],
|
||||
pad_token=tokenizer.convert_tokens_to_ids([tokenizer.pad_token])[0],
|
||||
pad_token_segment_id=4 if args.model_type in ['xlnet'] else 0,
|
||||
)
|
||||
if args.local_rank in [-1, 0]:
|
||||
@@ -467,7 +467,7 @@ def main():
|
||||
|
||||
# Load a trained model and vocabulary that you have fine-tuned
|
||||
model = model_class.from_pretrained(args.output_dir)
|
||||
tokenizer = tokenizer_class.from_pretrained(args.output_dir)
|
||||
tokenizer = tokenizer_class.from_pretrained(args.output_dir, do_lower_case=args.do_lower_case)
|
||||
model.to(args.device)
|
||||
|
||||
|
||||
|
||||
@@ -481,7 +481,7 @@ def main():
|
||||
|
||||
|
||||
# Save the trained model and the tokenizer
|
||||
if args.local_rank == -1 or torch.distributed.get_rank() == 0:
|
||||
if args.do_train and (args.local_rank == -1 or torch.distributed.get_rank() == 0):
|
||||
# Create output directory if needed
|
||||
if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]:
|
||||
os.makedirs(args.output_dir)
|
||||
@@ -498,7 +498,7 @@ def main():
|
||||
|
||||
# Load a trained model and vocabulary that you have fine-tuned
|
||||
model = model_class.from_pretrained(args.output_dir)
|
||||
tokenizer = tokenizer_class.from_pretrained(args.output_dir)
|
||||
tokenizer = tokenizer_class.from_pretrained(args.output_dir, do_lower_case=args.do_lower_case)
|
||||
model.to(args.device)
|
||||
|
||||
|
||||
|
||||
@@ -422,12 +422,14 @@ def convert_examples_to_features(examples, label_list, max_seq_length,
|
||||
tokens_b = tokenizer.tokenize(example.text_b)
|
||||
# Modifies `tokens_a` and `tokens_b` in place so that the total
|
||||
# length is less than the specified length.
|
||||
# Account for [CLS], [SEP], [SEP] with "- 3"
|
||||
_truncate_seq_pair(tokens_a, tokens_b, max_seq_length - 3)
|
||||
# Account for [CLS], [SEP], [SEP] with "- 3". " -4" for RoBERTa.
|
||||
special_tokens_count = 4 if sep_token_extra else 3
|
||||
_truncate_seq_pair(tokens_a, tokens_b, max_seq_length - special_tokens_count)
|
||||
else:
|
||||
# Account for [CLS] and [SEP] with "- 2"
|
||||
if len(tokens_a) > max_seq_length - 2:
|
||||
tokens_a = tokens_a[:(max_seq_length - 2)]
|
||||
# Account for [CLS] and [SEP] with "- 2" and with "- 3" for RoBERTa.
|
||||
special_tokens_count = 3 if sep_token_extra else 2
|
||||
if len(tokens_a) > max_seq_length - special_tokens_count:
|
||||
tokens_a = tokens_a[:(max_seq_length - special_tokens_count)]
|
||||
|
||||
# The convention in BERT is:
|
||||
# (a) For sequence pairs:
|
||||
|
||||
Reference in New Issue
Block a user