adding tests to examples - updating summary module - coverage update

This commit is contained in:
thomwolf
2019-07-09 15:29:42 +02:00
parent c079d7ddff
commit d5481cbe1b
17 changed files with 139 additions and 116 deletions

View File

@@ -28,7 +28,6 @@ import torch
from torch import nn
from torch.nn import CrossEntropyLoss, MSELoss
from .file_utils import cached_path
from .modeling_utils import WEIGHTS_NAME, CONFIG_NAME, PretrainedConfig, PreTrainedModel, prune_linear_layer
logger = logging.getLogger(__name__)

View File

@@ -30,7 +30,6 @@ import torch.nn as nn
from torch.nn import CrossEntropyLoss
from torch.nn.parameter import Parameter
from .file_utils import cached_path
from .modeling_utils import (Conv1D, CONFIG_NAME, WEIGHTS_NAME, PretrainedConfig,
PreTrainedModel, prune_conv1d_layer, SequenceSummary)
from .modeling_bert import BertLayerNorm as LayerNorm
@@ -122,9 +121,8 @@ class GPT2Config(PretrainedConfig):
predict_special_tokens=True,
summary_type='token_ids',
summary_use_proj=True,
summary_num_classes=1,
summary_activation=None,
summary_dropout=0.1,
summary_first_dropout=0.1,
**kwargs
):
"""Constructs GPT2Config.
@@ -172,9 +170,8 @@ class GPT2Config(PretrainedConfig):
self.predict_special_tokens = predict_special_tokens
self.summary_type = summary_type
self.summary_use_proj = summary_use_proj
self.summary_num_classes = summary_num_classes
self.summary_activation = summary_activation
self.summary_dropout = summary_dropout
self.summary_first_dropout = summary_first_dropout
else:
raise ValueError(
"First argument must be either a vocabulary size (int)"

View File

@@ -30,9 +30,8 @@ import torch.nn as nn
from torch.nn import CrossEntropyLoss
from torch.nn.parameter import Parameter
from .file_utils import cached_path
from .modeling_utils import (Conv1D, CONFIG_NAME, WEIGHTS_NAME, PretrainedConfig,
PreTrainedModel, prune_conv1d_layer, SequenceSummary)
PreTrainedModel, prune_conv1d_layer, SequenceSummary)
from .modeling_bert import BertLayerNorm as LayerNorm
logger = logging.getLogger(__name__)
@@ -150,9 +149,8 @@ class OpenAIGPTConfig(PretrainedConfig):
predict_special_tokens=True,
summary_type='token_ids',
summary_use_proj=True,
summary_num_classes=1,
summary_activation=None,
summary_dropout=0.1,
summary_first_dropout=0.1,
**kwargs
):
"""Constructs OpenAIGPTConfig.
@@ -203,9 +201,8 @@ class OpenAIGPTConfig(PretrainedConfig):
self.predict_special_tokens = predict_special_tokens
self.summary_type = summary_type
self.summary_use_proj = summary_use_proj
self.summary_num_classes = summary_num_classes
self.summary_activation = summary_activation
self.summary_dropout = summary_dropout
self.summary_first_dropout = summary_first_dropout
else:
raise ValueError(
"First argument must be either a vocabulary size (int)"

View File

@@ -36,7 +36,6 @@ from torch.nn.parameter import Parameter
from .modeling_bert import BertLayerNorm as LayerNorm
from .modeling_transfo_xl_utilities import ProjectedAdaptiveLogSoftmax, sample_logits
from .file_utils import cached_path
from .modeling_utils import CONFIG_NAME, WEIGHTS_NAME, PretrainedConfig, PreTrainedModel
logger = logging.getLogger(__name__)

View File

@@ -25,7 +25,7 @@ from io import open
import torch
from torch import nn
from torch.nn import CrossEntropyLoss, MSELoss, functional as F
from torch.nn import CrossEntropyLoss, functional as F
from .file_utils import cached_path
@@ -514,10 +514,10 @@ class SequenceSummary(nn.Module):
- 'token_ids' => supply a Tensor of classification token indices (GPT/GPT-2)
- 'attn' => Not implemented now, use multi-head attention
summary_use_proj: Add a projection after the vector extraction
summary_num_classes: If > 0: the projection outputs to n classes (otherwise to hidden_size)
summary_activation:
'tanh' => add a tanh activation to the output
None => no activation
summary_proj_to_labels: If True, the projection outputs to config.num_labels classes (otherwise to hidden_size). Default: False.
summary_activation: 'tanh' => add a tanh activation to the output, Other => no activation. Default
summary_first_dropout: Add a dropout before the projection and activation
summary_last_dropout: Add a dropout after the projection and activation
"""
def __init__(self, config):
super(SequenceSummary, self).__init__()
@@ -531,8 +531,8 @@ class SequenceSummary(nn.Module):
self.summary = nn.Identity()
if hasattr(config, 'summary_use_proj') and config.summary_use_proj:
if hasattr(config, 'summary_num_classes') and config.summary_num_classes > 0:
num_classes = config.summary_num_classes
if hasattr(config, 'summary_proj_to_labels') and config.summary_proj_to_labels and config.num_labels > 0:
num_classes = config.num_labels
else:
num_classes = config.hidden_size
self.summary = nn.Linear(config.hidden_size, num_classes)
@@ -541,7 +541,13 @@ class SequenceSummary(nn.Module):
if hasattr(config, 'summary_activation') and config.summary_activation == 'tanh':
self.activation = nn.Tanh()
self.dropout = nn.Dropout(config.summary_dropout)
self.first_dropout = nn.Identity()
if hasattr(config, 'summary_first_dropout') and config.summary_first_dropout > 0:
self.first_dropout = nn.Dropout(config.summary_first_dropout)
self.last_dropout = nn.Identity()
if hasattr(config, 'summary_last_dropout') and config.summary_last_dropout > 0:
self.last_dropout = nn.Dropout(config.summary_last_dropout)
def forward(self, hidden_states, token_ids=None):
""" hidden_states: float Tensor in shape [bsz, seq_len, hidden_size], the hidden-states of the last layer.
@@ -567,9 +573,10 @@ class SequenceSummary(nn.Module):
elif self.summary_type == 'attn':
raise NotImplementedError
output = self.first_dropout(output)
output = self.summary(output)
output = self.activation(output)
output = self.dropout(output)
output = self.last_dropout(output)
return output

View File

@@ -14,18 +14,14 @@
# limitations under the License.
""" PyTorch XLM model.
"""
from __future__ import (absolute_import, division, print_function,
unicode_literals)
from __future__ import absolute_import, division, print_function, unicode_literals
import json
import logging
import math
import os
import sys
from io import open
import math
import itertools
import numpy as np
@@ -34,9 +30,8 @@ from torch import nn
from torch.nn import functional as F
from torch.nn import CrossEntropyLoss, MSELoss
from .file_utils import cached_path
from .modeling_utils import (CONFIG_NAME, WEIGHTS_NAME, PretrainedConfig, PreTrainedModel,
prune_linear_layer, SequenceSummary, SQuADHead)
from .modeling_utils import (PretrainedConfig, PreTrainedModel,
prune_linear_layer, SequenceSummary, SQuADHead)
logger = logging.getLogger(__name__)
@@ -79,10 +74,11 @@ class XLMConfig(PretrainedConfig):
finetuning_task=None,
num_labels=2,
summary_type='last',
summary_type='first',
summary_use_proj=True,
summary_activation='tanh',
summary_dropout=0.1,
summary_activation=None,
summary_proj_to_labels=True,
summary_first_dropout=0.1,
start_n_top=5,
end_n_top=5,
**kwargs):
@@ -164,7 +160,8 @@ class XLMConfig(PretrainedConfig):
self.summary_type = summary_type
self.summary_use_proj = summary_use_proj
self.summary_activation = summary_activation
self.summary_dropout = summary_dropout
self.summary_proj_to_labels = summary_proj_to_labels
self.summary_first_dropout = summary_first_dropout
self.start_n_top = start_n_top
self.end_n_top = end_n_top
else:

View File

@@ -31,9 +31,8 @@ from torch import nn
from torch.nn import functional as F
from torch.nn import CrossEntropyLoss, MSELoss
from .file_utils import cached_path
from .modeling_utils import (CONFIG_NAME, WEIGHTS_NAME, PretrainedConfig, PreTrainedModel,
SequenceSummary, PoolerAnswerClass, PoolerEndLogits, PoolerStartLogits)
SequenceSummary, PoolerAnswerClass, PoolerEndLogits, PoolerStartLogits)
logger = logging.getLogger(__name__)
@@ -227,7 +226,7 @@ class XLNetConfig(PretrainedConfig):
summary_type='last',
summary_use_proj=True,
summary_activation='tanh',
summary_dropout=0.1,
summary_last_dropout=0.1,
start_n_top=5,
end_n_top=5,
**kwargs):
@@ -314,7 +313,7 @@ class XLNetConfig(PretrainedConfig):
self.summary_type = summary_type
self.summary_use_proj = summary_use_proj
self.summary_activation = summary_activation
self.summary_dropout = summary_dropout
self.summary_last_dropout = summary_last_dropout
self.start_n_top = start_n_top
self.end_n_top = end_n_top
else:

View File

@@ -113,8 +113,6 @@ class BertTokenizer(PreTrainedTokenizer):
raise ValueError(
"Can't find a vocabulary file at path '{}'. To load the vocabulary from a Google pretrained "
"model use `tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`".format(vocab_file))
if never_split is None:
never_split = self.all_special_tokens
self.vocab = load_vocab(vocab_file)
self.ids_to_tokens = collections.OrderedDict(
[(ids, tok) for tok, ids in self.vocab.items()])

View File

@@ -142,11 +142,7 @@ class PreTrainedTokenizer(object):
self.added_tokens_decoder = {}
for key, value in kwargs.items():
if key not in self.SPECIAL_TOKENS_ATTRIBUTES:
raise ValueError(
"PreTrainedTokenizer.__init__() argument {} should be in {}".format(
key, ', '.join(self.SPECIAL_TOKENS_ATTRIBUTES)))
else:
if key in self.SPECIAL_TOKENS_ATTRIBUTES:
setattr(self, key, value)

View File

@@ -20,13 +20,9 @@ import json
import logging
import os
import re
import sys
from io import open
from tqdm import tqdm
from .file_utils import cached_path
from .tokenization_utils import PreTrainedTokenizer, clean_up_tokenization
from .tokenization_utils import PreTrainedTokenizer
from .tokenization_bert import BasicTokenizer
logger = logging.getLogger(__name__)