adding tests to examples - updating summary module - coverage update
This commit is contained in:
@@ -1,5 +1,8 @@
|
|||||||
[run]
|
[run]
|
||||||
source=pytorch_transformers
|
source=pytorch_transformers
|
||||||
|
omit =
|
||||||
|
# skip convertion scripts from testing for now
|
||||||
|
*/convert_*
|
||||||
[report]
|
[report]
|
||||||
exclude_lines =
|
exclude_lines =
|
||||||
pragma: no cover
|
pragma: no cover
|
||||||
|
|||||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -126,4 +126,5 @@ models
|
|||||||
proc_data
|
proc_data
|
||||||
|
|
||||||
# examples
|
# examples
|
||||||
|
runs
|
||||||
examples/runs
|
examples/runs
|
||||||
@@ -60,25 +60,14 @@ TOKENIZER_CLASSES = {
|
|||||||
'xlm': XLMTokenizer,
|
'xlm': XLMTokenizer,
|
||||||
}
|
}
|
||||||
|
|
||||||
def train(args, train_features, model):
|
def train(args, train_dataset, model):
|
||||||
""" Train the model """
|
""" Train the model """
|
||||||
if args.local_rank in [-1, 0]:
|
if args.local_rank in [-1, 0]:
|
||||||
tb_writer = SummaryWriter()
|
tb_writer = SummaryWriter()
|
||||||
|
|
||||||
# Convert in tensors and build dataloader
|
|
||||||
all_input_ids = torch.tensor([f.input_ids for f in train_features], dtype=torch.long)
|
|
||||||
all_input_mask = torch.tensor([f.input_mask for f in train_features], dtype=torch.long)
|
|
||||||
all_segment_ids = torch.tensor([f.segment_ids for f in train_features], dtype=torch.long)
|
|
||||||
if args.output_mode == "classification":
|
|
||||||
all_label_ids = torch.tensor([f.label_id for f in train_features], dtype=torch.long)
|
|
||||||
elif args.output_mode == "regression":
|
|
||||||
all_label_ids = torch.tensor([f.label_id for f in train_features], dtype=torch.float)
|
|
||||||
|
|
||||||
args.train_batch_size = args.train_batch_size // args.gradient_accumulation_steps
|
args.train_batch_size = args.train_batch_size // args.gradient_accumulation_steps
|
||||||
|
train_sampler = RandomSampler(train_dataset) if args.local_rank == -1 else DistributedSampler(train_dataset)
|
||||||
train_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_ids)
|
train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=args.train_batch_size)
|
||||||
train_sampler = RandomSampler(train_data) if args.local_rank == -1 else DistributedSampler(train_data)
|
|
||||||
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=args.train_batch_size)
|
|
||||||
|
|
||||||
num_train_optimization_steps = len(train_dataloader) // args.gradient_accumulation_steps * args.num_train_epochs
|
num_train_optimization_steps = len(train_dataloader) // args.gradient_accumulation_steps * args.num_train_epochs
|
||||||
|
|
||||||
@@ -109,19 +98,24 @@ def train(args, train_features, model):
|
|||||||
|
|
||||||
# Train!
|
# Train!
|
||||||
logger.info("***** Running training *****")
|
logger.info("***** Running training *****")
|
||||||
logger.info(" Num examples = %d", len(train_features))
|
logger.info(" Num examples = %d", len(train_dataset))
|
||||||
|
logger.info(" Num Epochs = %d", args.num_train_epochs)
|
||||||
logger.info(" Batch size = %d", args.train_batch_size)
|
logger.info(" Batch size = %d", args.train_batch_size)
|
||||||
logger.info(" Num steps = %d", num_train_optimization_steps)
|
logger.info(" Gradient Accumulation steps = %d", args.gradient_accumulation_steps)
|
||||||
|
logger.info(" Total optimization steps = %d", num_train_optimization_steps)
|
||||||
|
|
||||||
global_step = 0
|
global_step = 0
|
||||||
tr_loss = 0
|
tr_loss = 0
|
||||||
model.train()
|
model.train()
|
||||||
|
optimizer.zero_grad()
|
||||||
for _ in trange(int(args.num_train_epochs), desc="Epoch", disable=args.local_rank not in [-1, 0]):
|
for _ in trange(int(args.num_train_epochs), desc="Epoch", disable=args.local_rank not in [-1, 0]):
|
||||||
for step, batch in enumerate(tqdm(train_dataloader, desc="Iteration", disable=args.local_rank not in [-1, 0])):
|
for step, batch in enumerate(tqdm(train_dataloader, desc="Iteration", disable=args.local_rank not in [-1, 0])):
|
||||||
batch = tuple(t.to(args.device) for t in batch)
|
batch = tuple(t.to(args.device) for t in batch)
|
||||||
input_ids, input_mask, segment_ids, label_ids = batch
|
inputs = {'input_ids': batch[0],
|
||||||
|
'attention_mask': batch[1],
|
||||||
ouputs = model(input_ids, token_type_ids=segment_ids, attention_mask=input_mask, labels=label_ids)
|
'token_type_ids': batch[2] if args.model_type in ['bert', 'xlnet'] else None,
|
||||||
|
'labels': batch[3]}
|
||||||
|
ouputs = model(**inputs)
|
||||||
loss = ouputs[0]
|
loss = ouputs[0]
|
||||||
|
|
||||||
if args.n_gpu > 1:
|
if args.n_gpu > 1:
|
||||||
@@ -150,30 +144,20 @@ def train(args, train_features, model):
|
|||||||
return global_step, tr_loss / global_step
|
return global_step, tr_loss / global_step
|
||||||
|
|
||||||
|
|
||||||
def evalutate(args, eval_task, eval_output_dir, eval_features, model):
|
def evalutate(args, eval_task, eval_output_dir, dataset, model):
|
||||||
""" Evaluate the model """
|
""" Evaluate the model """
|
||||||
if os.path.exists(eval_output_dir) and os.listdir(eval_output_dir) and args.do_train and not args.overwrite_output_dir:
|
if os.path.exists(eval_output_dir) and os.listdir(eval_output_dir) and args.do_train and not args.overwrite_output_dir:
|
||||||
raise ValueError("Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome.".format(eval_output_dir))
|
raise ValueError("Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome.".format(eval_output_dir))
|
||||||
if not os.path.exists(eval_output_dir) and args.local_rank in [-1, 0]:
|
if not os.path.exists(eval_output_dir) and args.local_rank in [-1, 0]:
|
||||||
os.makedirs(eval_output_dir)
|
os.makedirs(eval_output_dir)
|
||||||
|
|
||||||
# Convert in tensors and build dataloader
|
|
||||||
all_input_ids = torch.tensor([f.input_ids for f in eval_features], dtype=torch.long)
|
|
||||||
all_input_mask = torch.tensor([f.input_mask for f in eval_features], dtype=torch.long)
|
|
||||||
all_segment_ids = torch.tensor([f.segment_ids for f in eval_features], dtype=torch.long)
|
|
||||||
if args.output_mode == "classification":
|
|
||||||
all_label_ids = torch.tensor([f.label_id for f in eval_features], dtype=torch.long)
|
|
||||||
elif args.output_mode == "regression":
|
|
||||||
all_label_ids = torch.tensor([f.label_id for f in eval_features], dtype=torch.float)
|
|
||||||
|
|
||||||
eval_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_ids)
|
|
||||||
# Note that DistributedSampler samples randomly
|
# Note that DistributedSampler samples randomly
|
||||||
eval_sampler = SequentialSampler(eval_data) if args.local_rank == -1 else DistributedSampler(eval_data)
|
eval_sampler = SequentialSampler(dataset) if args.local_rank == -1 else DistributedSampler(dataset)
|
||||||
eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=args.eval_batch_size)
|
eval_dataloader = DataLoader(dataset, sampler=eval_sampler, batch_size=args.eval_batch_size)
|
||||||
|
|
||||||
# Eval!
|
# Eval!
|
||||||
logger.info("***** Running evaluation *****")
|
logger.info("***** Running evaluation *****")
|
||||||
logger.info(" Num examples = %d", len(eval_features))
|
logger.info(" Num examples = %d", len(dataset))
|
||||||
logger.info(" Batch size = %d", args.eval_batch_size)
|
logger.info(" Batch size = %d", args.eval_batch_size)
|
||||||
model.eval()
|
model.eval()
|
||||||
eval_loss = 0
|
eval_loss = 0
|
||||||
@@ -214,36 +198,47 @@ def evalutate(args, eval_task, eval_output_dir, eval_features, model):
|
|||||||
logger.info(" %s = %s", key, str(result[key]))
|
logger.info(" %s = %s", key, str(result[key]))
|
||||||
writer.write("%s = %s\n" % (key, str(result[key])))
|
writer.write("%s = %s\n" % (key, str(result[key])))
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
def load_and_cache_examples(args, task, tokenizer, eval=False):
|
|
||||||
|
def load_and_cache_examples(args, task, tokenizer, evaluate=False):
|
||||||
processor = processors[task]()
|
processor = processors[task]()
|
||||||
output_mode = output_modes[task]
|
output_mode = output_modes[task]
|
||||||
label_list = processor.get_labels()
|
# Load data features from cache or dataset file
|
||||||
|
cached_features_file = os.path.join(args.data_dir, 'cached_{}_{}_{}_{}'.format(
|
||||||
# Load and cache data
|
'dev' if evaluate else 'train',
|
||||||
processor = processors[task]()
|
|
||||||
examples = processor.get_dev_examples(args.data_dir)
|
|
||||||
cached_features_file = os.path.join(args.data_dir, '{}_{}_{}_{}'.format(
|
|
||||||
'dev' if eval else 'train',
|
|
||||||
list(filter(None, args.model_name.split('/'))).pop(),
|
list(filter(None, args.model_name.split('/'))).pop(),
|
||||||
str(args.max_seq_length),
|
str(args.max_seq_length),
|
||||||
str(task)))
|
str(task)))
|
||||||
|
|
||||||
if os.path.exists(cached_features_file):
|
if os.path.exists(cached_features_file):
|
||||||
logger.info("Loading features from cached file %s", cached_features_file)
|
logger.info("Loading features from cached file %s", cached_features_file)
|
||||||
features = torch.load(cached_features_file)
|
features = torch.load(cached_features_file)
|
||||||
else:
|
else:
|
||||||
features = convert_examples_to_features(examples, label_list, args.max_seq_length, tokenizer, output_mode)
|
logger.info("Creating features from dataset file at %s", args.data_dir)
|
||||||
|
label_list = processor.get_labels()
|
||||||
|
examples = processor.get_dev_examples(args.data_dir) if evaluate else processor.get_train_examples(args.data_dir)
|
||||||
features = convert_examples_to_features(examples, label_list, args.max_seq_length, tokenizer, output_mode,
|
features = convert_examples_to_features(examples, label_list, args.max_seq_length, tokenizer, output_mode,
|
||||||
cls_token_at_end=bool(args.model_type not in ['bert', 'xlm']),
|
cls_token_at_end=bool(args.model_type in ['xlnet']), # xlnet has a cls token at the end
|
||||||
cls_token=tokenizer.cls_token,
|
cls_token=tokenizer.cls_token,
|
||||||
sep_token=tokenizer.sep_token, cls_token_segment_id=2,
|
sep_token=tokenizer.sep_token,
|
||||||
pad_on_left=True, pad_token_segment_id=4)
|
cls_token_segment_id=2 if args.model_type in ['xlnet'] else 1,
|
||||||
if args.local_rank == -1 or torch.distributed.get_rank() == 0:
|
pad_on_left=bool(args.model_type in ['xlnet']), # pad on the left for xlnet
|
||||||
|
pad_token_segment_id=4 if args.model_type in ['xlnet'] else 0)
|
||||||
|
if args.local_rank in [-1, 0]:
|
||||||
logger.info("Saving features into cached file %s", cached_features_file)
|
logger.info("Saving features into cached file %s", cached_features_file)
|
||||||
torch.save(features, cached_features_file)
|
torch.save(features, cached_features_file)
|
||||||
|
|
||||||
return features
|
# Convert to Tensors and build dataset
|
||||||
|
all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
|
||||||
|
all_input_mask = torch.tensor([f.input_mask for f in features], dtype=torch.long)
|
||||||
|
all_segment_ids = torch.tensor([f.segment_ids for f in features], dtype=torch.long)
|
||||||
|
if output_mode == "classification":
|
||||||
|
all_label_ids = torch.tensor([f.label_id for f in features], dtype=torch.long)
|
||||||
|
elif output_mode == "regression":
|
||||||
|
all_label_ids = torch.tensor([f.label_id for f in features], dtype=torch.float)
|
||||||
|
|
||||||
|
dataset = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_ids)
|
||||||
|
return dataset
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
@@ -350,10 +345,10 @@ def main():
|
|||||||
torch.distributed.barrier()
|
torch.distributed.barrier()
|
||||||
|
|
||||||
args.model_type = args.model_name.lower().split('-')[0]
|
args.model_type = args.model_name.lower().split('-')[0]
|
||||||
args.tokenizer_class = TOKENIZER_CLASSES[args.model_type]
|
tokenizer_class = TOKENIZER_CLASSES[args.model_type]
|
||||||
args.model_class = MODEL_CLASSES[args.model_type]
|
model_class = MODEL_CLASSES[args.model_type]
|
||||||
tokenizer = args.tokenizer_class.from_pretrained(args.model_name, do_lower_case=args.do_lower_case)
|
tokenizer = tokenizer_class.from_pretrained(args.model_name, do_lower_case=args.do_lower_case)
|
||||||
model = args.model_class.from_pretrained(args.model_name, num_labels=num_labels)
|
model = model_class.from_pretrained(args.model_name, num_labels=num_labels)
|
||||||
|
|
||||||
if args.local_rank == 0:
|
if args.local_rank == 0:
|
||||||
torch.distributed.barrier()
|
torch.distributed.barrier()
|
||||||
@@ -372,23 +367,30 @@ def main():
|
|||||||
|
|
||||||
# Training
|
# Training
|
||||||
if args.do_train:
|
if args.do_train:
|
||||||
train_features = load_and_cache_examples(args, args.task_name, tokenizer, eval=False)
|
train_dataset = load_and_cache_examples(args, args.task_name, tokenizer, evaluate=False)
|
||||||
global_step, tr_loss = train(args, train_features, model)
|
global_step, tr_loss = train(args, train_dataset, model)
|
||||||
logger.info(" global_step = %s, average loss = %s", global_step, tr_loss)
|
logger.info(" global_step = %s, average loss = %s", global_step, tr_loss)
|
||||||
|
|
||||||
|
|
||||||
# Saving best-practices: if you use defaults names for the model, you can reload it using from_pretrained()
|
# Saving best-practices: if you use defaults names for the model, you can reload it using from_pretrained()
|
||||||
if args.do_train and (args.local_rank == -1 or torch.distributed.get_rank() == 0):
|
if args.do_train and (args.local_rank == -1 or torch.distributed.get_rank() == 0):
|
||||||
# Save a trained model, configuration and tokenizer
|
# Create output directory if needed
|
||||||
|
if os.path.exists(args.output_dir) and os.listdir(args.output_dir) and args.do_train and not args.overwrite_output_dir:
|
||||||
|
raise ValueError("Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome.".format(args.output_dir))
|
||||||
|
if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]:
|
||||||
|
os.makedirs(args.output_dir)
|
||||||
|
|
||||||
|
# Save a trained model, configuration and tokenizer using `save_pretrained()`.
|
||||||
|
# They can then be reloaded using `from_pretrained()`
|
||||||
model.save_pretrained(args.output_dir)
|
model.save_pretrained(args.output_dir)
|
||||||
tokenizer.save_vocabulary(args.output_dir)
|
tokenizer.save_pretrained(args.output_dir)
|
||||||
|
|
||||||
# Good practice: save your training arguments together with the trained model
|
# Good practice: save your training arguments together with the trained model
|
||||||
torch.save(args, os.path.join(args.output_dir, 'training_args.bin'))
|
torch.save(args, os.path.join(args.output_dir, 'training_args.bin'))
|
||||||
|
|
||||||
# Load a trained model and vocabulary that you have fine-tuned
|
# Load a trained model and vocabulary that you have fine-tuned
|
||||||
model = args.model_class.from_pretrained(args.output_dir)
|
model = model_class.from_pretrained(args.output_dir)
|
||||||
tokenizer = args.tokenizer_class.from_pretrained(args.output_dir)
|
tokenizer = tokenizer_class.from_pretrained(args.output_dir)
|
||||||
model.to(args.device)
|
model.to(args.device)
|
||||||
|
|
||||||
# Evaluation
|
# Evaluation
|
||||||
@@ -398,9 +400,11 @@ def main():
|
|||||||
eval_outputs_dirs = (args.output_dir, args.output_dir + '-MM') if args.task_name == "mnli" else (args.output_dir,)
|
eval_outputs_dirs = (args.output_dir, args.output_dir + '-MM') if args.task_name == "mnli" else (args.output_dir,)
|
||||||
|
|
||||||
for eval_task, eval_output_dir in zip(eval_task_names, eval_outputs_dirs):
|
for eval_task, eval_output_dir in zip(eval_task_names, eval_outputs_dirs):
|
||||||
eval_features = load_and_cache_examples(args, eval_task, tokenizer, eval=True)
|
eval_dataset = load_and_cache_examples(args, eval_task, tokenizer, evaluate=True)
|
||||||
|
|
||||||
evalutate(args, eval_task, eval_output_dir, eval_features, model)
|
result = evalutate(args, eval_task, eval_output_dir, eval_dataset, model)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ from __future__ import print_function
|
|||||||
import sys
|
import sys
|
||||||
import unittest
|
import unittest
|
||||||
import argparse
|
import argparse
|
||||||
|
import logging
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# python 3.4+ can use builtin unittest.mock instead of mock package
|
# python 3.4+ can use builtin unittest.mock instead of mock package
|
||||||
@@ -26,7 +27,11 @@ try:
|
|||||||
except ImportError:
|
except ImportError:
|
||||||
from mock import patch
|
from mock import patch
|
||||||
|
|
||||||
import run_bert_squad as rbs
|
import run_glue
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.DEBUG)
|
||||||
|
|
||||||
|
logger = logging.getLogger()
|
||||||
|
|
||||||
def get_setup_file():
|
def get_setup_file():
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
@@ -36,12 +41,18 @@ def get_setup_file():
|
|||||||
|
|
||||||
class ExamplesTests(unittest.TestCase):
|
class ExamplesTests(unittest.TestCase):
|
||||||
|
|
||||||
def test_run_squad(self):
|
def test_run_glue(self):
|
||||||
testargs = ["prog", "-f", "/home/test/setup.py"]
|
stream_handler = logging.StreamHandler(sys.stdout)
|
||||||
with patch.object(sys, 'argv', testargs):
|
logger.addHandler(stream_handler)
|
||||||
setup = get_setup_file()
|
|
||||||
assert setup == "/home/test/setup.py"
|
testargs = ["run_glue.py", "--data_dir=./examples/tests_samples/MRPC/",
|
||||||
# rbs.main()
|
"--task_name=mrpc", "--do_train", "--do_eval", "--output_dir=./examples/tests_samples/temp_dir",
|
||||||
|
"--train_batch_size=4", "--eval_batch_size=2", "--num_train_epochs=2.0", "--overwrite_output_dir"]
|
||||||
|
model_name = "--model_name=xlnet-large-cased"
|
||||||
|
with patch.object(sys, 'argv', testargs + [model_name]):
|
||||||
|
result = run_glue.main()
|
||||||
|
for value in result.values():
|
||||||
|
self.assertGreaterEqual(value, 0.75)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
5
examples/tests_samples/.gitignore
vendored
Normal file
5
examples/tests_samples/.gitignore
vendored
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
*.*
|
||||||
|
cache*
|
||||||
|
temp*
|
||||||
|
!*.tsv
|
||||||
|
!.gitignore
|
||||||
7
examples/tests_samples/MRPC/dev.tsv
Normal file
7
examples/tests_samples/MRPC/dev.tsv
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
Quality #1 ID #2 ID #1 String #2 String
|
||||||
|
1 1355540 1355592 He said the foodservice pie business doesn 't fit the company 's long-term growth strategy . " The foodservice pie business does not fit our long-term growth strategy .
|
||||||
|
0 2029631 2029565 Magnarelli said Racicot hated the Iraqi regime and looked forward to using his long years of training in the war . His wife said he was " 100 percent behind George Bush " and looked forward to using his years of training in the war .
|
||||||
|
0 487993 487952 The dollar was at 116.92 yen against the yen , flat on the session , and at 1.2891 against the Swiss franc , also flat . The dollar was at 116.78 yen JPY = , virtually flat on the session , and at 1.2871 against the Swiss franc CHF = , down 0.1 percent .
|
||||||
|
1 1989515 1989458 The AFL-CIO is waiting until October to decide if it will endorse a candidate . The AFL-CIO announced Wednesday that it will decide in October whether to endorse a candidate before the primaries .
|
||||||
|
0 1783137 1782659 No dates have been set for the civil or the criminal trial . No dates have been set for the criminal or civil cases , but Shanley has pleaded not guilty .
|
||||||
|
1 3039165 3039036 Wal-Mart said it would check all of its million-plus domestic workers to ensure they were legally employed . It has also said it would review all of its domestic employees more than 1 million to ensure they have legal status .
|
||||||
|
Can't render this file because it contains an unexpected character in line 3 and column 155.
|
7
examples/tests_samples/MRPC/train.tsv
Normal file
7
examples/tests_samples/MRPC/train.tsv
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
Quality #1 ID #2 ID #1 String #2 String
|
||||||
|
1 1355540 1355592 He said the foodservice pie business doesn 't fit the company 's long-term growth strategy . " The foodservice pie business does not fit our long-term growth strategy .
|
||||||
|
0 2029631 2029565 Magnarelli said Racicot hated the Iraqi regime and looked forward to using his long years of training in the war . His wife said he was " 100 percent behind George Bush " and looked forward to using his years of training in the war .
|
||||||
|
0 487993 487952 The dollar was at 116.92 yen against the yen , flat on the session , and at 1.2891 against the Swiss franc , also flat . The dollar was at 116.78 yen JPY = , virtually flat on the session , and at 1.2871 against the Swiss franc CHF = , down 0.1 percent .
|
||||||
|
1 1989515 1989458 The AFL-CIO is waiting until October to decide if it will endorse a candidate . The AFL-CIO announced Wednesday that it will decide in October whether to endorse a candidate before the primaries .
|
||||||
|
0 1783137 1782659 No dates have been set for the civil or the criminal trial . No dates have been set for the criminal or civil cases , but Shanley has pleaded not guilty .
|
||||||
|
1 3039165 3039036 Wal-Mart said it would check all of its million-plus domestic workers to ensure they were legally employed . It has also said it would review all of its domestic employees more than 1 million to ensure they have legal status .
|
||||||
|
Can't render this file because it contains an unexpected character in line 3 and column 155.
|
@@ -28,7 +28,6 @@ import torch
|
|||||||
from torch import nn
|
from torch import nn
|
||||||
from torch.nn import CrossEntropyLoss, MSELoss
|
from torch.nn import CrossEntropyLoss, MSELoss
|
||||||
|
|
||||||
from .file_utils import cached_path
|
|
||||||
from .modeling_utils import WEIGHTS_NAME, CONFIG_NAME, PretrainedConfig, PreTrainedModel, prune_linear_layer
|
from .modeling_utils import WEIGHTS_NAME, CONFIG_NAME, PretrainedConfig, PreTrainedModel, prune_linear_layer
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|||||||
@@ -30,7 +30,6 @@ import torch.nn as nn
|
|||||||
from torch.nn import CrossEntropyLoss
|
from torch.nn import CrossEntropyLoss
|
||||||
from torch.nn.parameter import Parameter
|
from torch.nn.parameter import Parameter
|
||||||
|
|
||||||
from .file_utils import cached_path
|
|
||||||
from .modeling_utils import (Conv1D, CONFIG_NAME, WEIGHTS_NAME, PretrainedConfig,
|
from .modeling_utils import (Conv1D, CONFIG_NAME, WEIGHTS_NAME, PretrainedConfig,
|
||||||
PreTrainedModel, prune_conv1d_layer, SequenceSummary)
|
PreTrainedModel, prune_conv1d_layer, SequenceSummary)
|
||||||
from .modeling_bert import BertLayerNorm as LayerNorm
|
from .modeling_bert import BertLayerNorm as LayerNorm
|
||||||
@@ -122,9 +121,8 @@ class GPT2Config(PretrainedConfig):
|
|||||||
predict_special_tokens=True,
|
predict_special_tokens=True,
|
||||||
summary_type='token_ids',
|
summary_type='token_ids',
|
||||||
summary_use_proj=True,
|
summary_use_proj=True,
|
||||||
summary_num_classes=1,
|
|
||||||
summary_activation=None,
|
summary_activation=None,
|
||||||
summary_dropout=0.1,
|
summary_first_dropout=0.1,
|
||||||
**kwargs
|
**kwargs
|
||||||
):
|
):
|
||||||
"""Constructs GPT2Config.
|
"""Constructs GPT2Config.
|
||||||
@@ -172,9 +170,8 @@ class GPT2Config(PretrainedConfig):
|
|||||||
self.predict_special_tokens = predict_special_tokens
|
self.predict_special_tokens = predict_special_tokens
|
||||||
self.summary_type = summary_type
|
self.summary_type = summary_type
|
||||||
self.summary_use_proj = summary_use_proj
|
self.summary_use_proj = summary_use_proj
|
||||||
self.summary_num_classes = summary_num_classes
|
|
||||||
self.summary_activation = summary_activation
|
self.summary_activation = summary_activation
|
||||||
self.summary_dropout = summary_dropout
|
self.summary_first_dropout = summary_first_dropout
|
||||||
else:
|
else:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"First argument must be either a vocabulary size (int)"
|
"First argument must be either a vocabulary size (int)"
|
||||||
|
|||||||
@@ -30,7 +30,6 @@ import torch.nn as nn
|
|||||||
from torch.nn import CrossEntropyLoss
|
from torch.nn import CrossEntropyLoss
|
||||||
from torch.nn.parameter import Parameter
|
from torch.nn.parameter import Parameter
|
||||||
|
|
||||||
from .file_utils import cached_path
|
|
||||||
from .modeling_utils import (Conv1D, CONFIG_NAME, WEIGHTS_NAME, PretrainedConfig,
|
from .modeling_utils import (Conv1D, CONFIG_NAME, WEIGHTS_NAME, PretrainedConfig,
|
||||||
PreTrainedModel, prune_conv1d_layer, SequenceSummary)
|
PreTrainedModel, prune_conv1d_layer, SequenceSummary)
|
||||||
from .modeling_bert import BertLayerNorm as LayerNorm
|
from .modeling_bert import BertLayerNorm as LayerNorm
|
||||||
@@ -150,9 +149,8 @@ class OpenAIGPTConfig(PretrainedConfig):
|
|||||||
predict_special_tokens=True,
|
predict_special_tokens=True,
|
||||||
summary_type='token_ids',
|
summary_type='token_ids',
|
||||||
summary_use_proj=True,
|
summary_use_proj=True,
|
||||||
summary_num_classes=1,
|
|
||||||
summary_activation=None,
|
summary_activation=None,
|
||||||
summary_dropout=0.1,
|
summary_first_dropout=0.1,
|
||||||
**kwargs
|
**kwargs
|
||||||
):
|
):
|
||||||
"""Constructs OpenAIGPTConfig.
|
"""Constructs OpenAIGPTConfig.
|
||||||
@@ -203,9 +201,8 @@ class OpenAIGPTConfig(PretrainedConfig):
|
|||||||
self.predict_special_tokens = predict_special_tokens
|
self.predict_special_tokens = predict_special_tokens
|
||||||
self.summary_type = summary_type
|
self.summary_type = summary_type
|
||||||
self.summary_use_proj = summary_use_proj
|
self.summary_use_proj = summary_use_proj
|
||||||
self.summary_num_classes = summary_num_classes
|
|
||||||
self.summary_activation = summary_activation
|
self.summary_activation = summary_activation
|
||||||
self.summary_dropout = summary_dropout
|
self.summary_first_dropout = summary_first_dropout
|
||||||
else:
|
else:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"First argument must be either a vocabulary size (int)"
|
"First argument must be either a vocabulary size (int)"
|
||||||
|
|||||||
@@ -36,7 +36,6 @@ from torch.nn.parameter import Parameter
|
|||||||
|
|
||||||
from .modeling_bert import BertLayerNorm as LayerNorm
|
from .modeling_bert import BertLayerNorm as LayerNorm
|
||||||
from .modeling_transfo_xl_utilities import ProjectedAdaptiveLogSoftmax, sample_logits
|
from .modeling_transfo_xl_utilities import ProjectedAdaptiveLogSoftmax, sample_logits
|
||||||
from .file_utils import cached_path
|
|
||||||
from .modeling_utils import CONFIG_NAME, WEIGHTS_NAME, PretrainedConfig, PreTrainedModel
|
from .modeling_utils import CONFIG_NAME, WEIGHTS_NAME, PretrainedConfig, PreTrainedModel
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ from io import open
|
|||||||
|
|
||||||
import torch
|
import torch
|
||||||
from torch import nn
|
from torch import nn
|
||||||
from torch.nn import CrossEntropyLoss, MSELoss, functional as F
|
from torch.nn import CrossEntropyLoss, functional as F
|
||||||
|
|
||||||
from .file_utils import cached_path
|
from .file_utils import cached_path
|
||||||
|
|
||||||
@@ -514,10 +514,10 @@ class SequenceSummary(nn.Module):
|
|||||||
- 'token_ids' => supply a Tensor of classification token indices (GPT/GPT-2)
|
- 'token_ids' => supply a Tensor of classification token indices (GPT/GPT-2)
|
||||||
- 'attn' => Not implemented now, use multi-head attention
|
- 'attn' => Not implemented now, use multi-head attention
|
||||||
summary_use_proj: Add a projection after the vector extraction
|
summary_use_proj: Add a projection after the vector extraction
|
||||||
summary_num_classes: If > 0: the projection outputs to n classes (otherwise to hidden_size)
|
summary_proj_to_labels: If True, the projection outputs to config.num_labels classes (otherwise to hidden_size). Default: False.
|
||||||
summary_activation:
|
summary_activation: 'tanh' => add a tanh activation to the output, Other => no activation. Default
|
||||||
'tanh' => add a tanh activation to the output
|
summary_first_dropout: Add a dropout before the projection and activation
|
||||||
None => no activation
|
summary_last_dropout: Add a dropout after the projection and activation
|
||||||
"""
|
"""
|
||||||
def __init__(self, config):
|
def __init__(self, config):
|
||||||
super(SequenceSummary, self).__init__()
|
super(SequenceSummary, self).__init__()
|
||||||
@@ -531,8 +531,8 @@ class SequenceSummary(nn.Module):
|
|||||||
|
|
||||||
self.summary = nn.Identity()
|
self.summary = nn.Identity()
|
||||||
if hasattr(config, 'summary_use_proj') and config.summary_use_proj:
|
if hasattr(config, 'summary_use_proj') and config.summary_use_proj:
|
||||||
if hasattr(config, 'summary_num_classes') and config.summary_num_classes > 0:
|
if hasattr(config, 'summary_proj_to_labels') and config.summary_proj_to_labels and config.num_labels > 0:
|
||||||
num_classes = config.summary_num_classes
|
num_classes = config.num_labels
|
||||||
else:
|
else:
|
||||||
num_classes = config.hidden_size
|
num_classes = config.hidden_size
|
||||||
self.summary = nn.Linear(config.hidden_size, num_classes)
|
self.summary = nn.Linear(config.hidden_size, num_classes)
|
||||||
@@ -541,7 +541,13 @@ class SequenceSummary(nn.Module):
|
|||||||
if hasattr(config, 'summary_activation') and config.summary_activation == 'tanh':
|
if hasattr(config, 'summary_activation') and config.summary_activation == 'tanh':
|
||||||
self.activation = nn.Tanh()
|
self.activation = nn.Tanh()
|
||||||
|
|
||||||
self.dropout = nn.Dropout(config.summary_dropout)
|
self.first_dropout = nn.Identity()
|
||||||
|
if hasattr(config, 'summary_first_dropout') and config.summary_first_dropout > 0:
|
||||||
|
self.first_dropout = nn.Dropout(config.summary_first_dropout)
|
||||||
|
|
||||||
|
self.last_dropout = nn.Identity()
|
||||||
|
if hasattr(config, 'summary_last_dropout') and config.summary_last_dropout > 0:
|
||||||
|
self.last_dropout = nn.Dropout(config.summary_last_dropout)
|
||||||
|
|
||||||
def forward(self, hidden_states, token_ids=None):
|
def forward(self, hidden_states, token_ids=None):
|
||||||
""" hidden_states: float Tensor in shape [bsz, seq_len, hidden_size], the hidden-states of the last layer.
|
""" hidden_states: float Tensor in shape [bsz, seq_len, hidden_size], the hidden-states of the last layer.
|
||||||
@@ -567,9 +573,10 @@ class SequenceSummary(nn.Module):
|
|||||||
elif self.summary_type == 'attn':
|
elif self.summary_type == 'attn':
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
|
output = self.first_dropout(output)
|
||||||
output = self.summary(output)
|
output = self.summary(output)
|
||||||
output = self.activation(output)
|
output = self.activation(output)
|
||||||
output = self.dropout(output)
|
output = self.last_dropout(output)
|
||||||
|
|
||||||
return output
|
return output
|
||||||
|
|
||||||
|
|||||||
@@ -14,18 +14,14 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
""" PyTorch XLM model.
|
""" PyTorch XLM model.
|
||||||
"""
|
"""
|
||||||
from __future__ import (absolute_import, division, print_function,
|
|
||||||
unicode_literals)
|
|
||||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import math
|
import math
|
||||||
import os
|
|
||||||
import sys
|
import sys
|
||||||
from io import open
|
from io import open
|
||||||
|
|
||||||
import math
|
|
||||||
import itertools
|
import itertools
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
@@ -34,8 +30,7 @@ from torch import nn
|
|||||||
from torch.nn import functional as F
|
from torch.nn import functional as F
|
||||||
from torch.nn import CrossEntropyLoss, MSELoss
|
from torch.nn import CrossEntropyLoss, MSELoss
|
||||||
|
|
||||||
from .file_utils import cached_path
|
from .modeling_utils import (PretrainedConfig, PreTrainedModel,
|
||||||
from .modeling_utils import (CONFIG_NAME, WEIGHTS_NAME, PretrainedConfig, PreTrainedModel,
|
|
||||||
prune_linear_layer, SequenceSummary, SQuADHead)
|
prune_linear_layer, SequenceSummary, SQuADHead)
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@@ -79,10 +74,11 @@ class XLMConfig(PretrainedConfig):
|
|||||||
|
|
||||||
finetuning_task=None,
|
finetuning_task=None,
|
||||||
num_labels=2,
|
num_labels=2,
|
||||||
summary_type='last',
|
summary_type='first',
|
||||||
summary_use_proj=True,
|
summary_use_proj=True,
|
||||||
summary_activation='tanh',
|
summary_activation=None,
|
||||||
summary_dropout=0.1,
|
summary_proj_to_labels=True,
|
||||||
|
summary_first_dropout=0.1,
|
||||||
start_n_top=5,
|
start_n_top=5,
|
||||||
end_n_top=5,
|
end_n_top=5,
|
||||||
**kwargs):
|
**kwargs):
|
||||||
@@ -164,7 +160,8 @@ class XLMConfig(PretrainedConfig):
|
|||||||
self.summary_type = summary_type
|
self.summary_type = summary_type
|
||||||
self.summary_use_proj = summary_use_proj
|
self.summary_use_proj = summary_use_proj
|
||||||
self.summary_activation = summary_activation
|
self.summary_activation = summary_activation
|
||||||
self.summary_dropout = summary_dropout
|
self.summary_proj_to_labels = summary_proj_to_labels
|
||||||
|
self.summary_first_dropout = summary_first_dropout
|
||||||
self.start_n_top = start_n_top
|
self.start_n_top = start_n_top
|
||||||
self.end_n_top = end_n_top
|
self.end_n_top = end_n_top
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -31,7 +31,6 @@ from torch import nn
|
|||||||
from torch.nn import functional as F
|
from torch.nn import functional as F
|
||||||
from torch.nn import CrossEntropyLoss, MSELoss
|
from torch.nn import CrossEntropyLoss, MSELoss
|
||||||
|
|
||||||
from .file_utils import cached_path
|
|
||||||
from .modeling_utils import (CONFIG_NAME, WEIGHTS_NAME, PretrainedConfig, PreTrainedModel,
|
from .modeling_utils import (CONFIG_NAME, WEIGHTS_NAME, PretrainedConfig, PreTrainedModel,
|
||||||
SequenceSummary, PoolerAnswerClass, PoolerEndLogits, PoolerStartLogits)
|
SequenceSummary, PoolerAnswerClass, PoolerEndLogits, PoolerStartLogits)
|
||||||
|
|
||||||
@@ -227,7 +226,7 @@ class XLNetConfig(PretrainedConfig):
|
|||||||
summary_type='last',
|
summary_type='last',
|
||||||
summary_use_proj=True,
|
summary_use_proj=True,
|
||||||
summary_activation='tanh',
|
summary_activation='tanh',
|
||||||
summary_dropout=0.1,
|
summary_last_dropout=0.1,
|
||||||
start_n_top=5,
|
start_n_top=5,
|
||||||
end_n_top=5,
|
end_n_top=5,
|
||||||
**kwargs):
|
**kwargs):
|
||||||
@@ -314,7 +313,7 @@ class XLNetConfig(PretrainedConfig):
|
|||||||
self.summary_type = summary_type
|
self.summary_type = summary_type
|
||||||
self.summary_use_proj = summary_use_proj
|
self.summary_use_proj = summary_use_proj
|
||||||
self.summary_activation = summary_activation
|
self.summary_activation = summary_activation
|
||||||
self.summary_dropout = summary_dropout
|
self.summary_last_dropout = summary_last_dropout
|
||||||
self.start_n_top = start_n_top
|
self.start_n_top = start_n_top
|
||||||
self.end_n_top = end_n_top
|
self.end_n_top = end_n_top
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -113,8 +113,6 @@ class BertTokenizer(PreTrainedTokenizer):
|
|||||||
raise ValueError(
|
raise ValueError(
|
||||||
"Can't find a vocabulary file at path '{}'. To load the vocabulary from a Google pretrained "
|
"Can't find a vocabulary file at path '{}'. To load the vocabulary from a Google pretrained "
|
||||||
"model use `tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`".format(vocab_file))
|
"model use `tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`".format(vocab_file))
|
||||||
if never_split is None:
|
|
||||||
never_split = self.all_special_tokens
|
|
||||||
self.vocab = load_vocab(vocab_file)
|
self.vocab = load_vocab(vocab_file)
|
||||||
self.ids_to_tokens = collections.OrderedDict(
|
self.ids_to_tokens = collections.OrderedDict(
|
||||||
[(ids, tok) for tok, ids in self.vocab.items()])
|
[(ids, tok) for tok, ids in self.vocab.items()])
|
||||||
|
|||||||
@@ -142,11 +142,7 @@ class PreTrainedTokenizer(object):
|
|||||||
self.added_tokens_decoder = {}
|
self.added_tokens_decoder = {}
|
||||||
|
|
||||||
for key, value in kwargs.items():
|
for key, value in kwargs.items():
|
||||||
if key not in self.SPECIAL_TOKENS_ATTRIBUTES:
|
if key in self.SPECIAL_TOKENS_ATTRIBUTES:
|
||||||
raise ValueError(
|
|
||||||
"PreTrainedTokenizer.__init__() argument {} should be in {}".format(
|
|
||||||
key, ', '.join(self.SPECIAL_TOKENS_ATTRIBUTES)))
|
|
||||||
else:
|
|
||||||
setattr(self, key, value)
|
setattr(self, key, value)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -20,13 +20,9 @@ import json
|
|||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import sys
|
|
||||||
from io import open
|
from io import open
|
||||||
|
|
||||||
from tqdm import tqdm
|
from .tokenization_utils import PreTrainedTokenizer
|
||||||
|
|
||||||
from .file_utils import cached_path
|
|
||||||
from .tokenization_utils import PreTrainedTokenizer, clean_up_tokenization
|
|
||||||
from .tokenization_bert import BasicTokenizer
|
from .tokenization_bert import BasicTokenizer
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|||||||
Reference in New Issue
Block a user