adding tests to examples - updating summary module - coverage update
This commit is contained in:
@@ -28,7 +28,6 @@ import torch
|
||||
from torch import nn
|
||||
from torch.nn import CrossEntropyLoss, MSELoss
|
||||
|
||||
from .file_utils import cached_path
|
||||
from .modeling_utils import WEIGHTS_NAME, CONFIG_NAME, PretrainedConfig, PreTrainedModel, prune_linear_layer
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -30,7 +30,6 @@ import torch.nn as nn
|
||||
from torch.nn import CrossEntropyLoss
|
||||
from torch.nn.parameter import Parameter
|
||||
|
||||
from .file_utils import cached_path
|
||||
from .modeling_utils import (Conv1D, CONFIG_NAME, WEIGHTS_NAME, PretrainedConfig,
|
||||
PreTrainedModel, prune_conv1d_layer, SequenceSummary)
|
||||
from .modeling_bert import BertLayerNorm as LayerNorm
|
||||
@@ -122,9 +121,8 @@ class GPT2Config(PretrainedConfig):
|
||||
predict_special_tokens=True,
|
||||
summary_type='token_ids',
|
||||
summary_use_proj=True,
|
||||
summary_num_classes=1,
|
||||
summary_activation=None,
|
||||
summary_dropout=0.1,
|
||||
summary_first_dropout=0.1,
|
||||
**kwargs
|
||||
):
|
||||
"""Constructs GPT2Config.
|
||||
@@ -172,9 +170,8 @@ class GPT2Config(PretrainedConfig):
|
||||
self.predict_special_tokens = predict_special_tokens
|
||||
self.summary_type = summary_type
|
||||
self.summary_use_proj = summary_use_proj
|
||||
self.summary_num_classes = summary_num_classes
|
||||
self.summary_activation = summary_activation
|
||||
self.summary_dropout = summary_dropout
|
||||
self.summary_first_dropout = summary_first_dropout
|
||||
else:
|
||||
raise ValueError(
|
||||
"First argument must be either a vocabulary size (int)"
|
||||
|
||||
@@ -30,9 +30,8 @@ import torch.nn as nn
|
||||
from torch.nn import CrossEntropyLoss
|
||||
from torch.nn.parameter import Parameter
|
||||
|
||||
from .file_utils import cached_path
|
||||
from .modeling_utils import (Conv1D, CONFIG_NAME, WEIGHTS_NAME, PretrainedConfig,
|
||||
PreTrainedModel, prune_conv1d_layer, SequenceSummary)
|
||||
PreTrainedModel, prune_conv1d_layer, SequenceSummary)
|
||||
from .modeling_bert import BertLayerNorm as LayerNorm
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -150,9 +149,8 @@ class OpenAIGPTConfig(PretrainedConfig):
|
||||
predict_special_tokens=True,
|
||||
summary_type='token_ids',
|
||||
summary_use_proj=True,
|
||||
summary_num_classes=1,
|
||||
summary_activation=None,
|
||||
summary_dropout=0.1,
|
||||
summary_first_dropout=0.1,
|
||||
**kwargs
|
||||
):
|
||||
"""Constructs OpenAIGPTConfig.
|
||||
@@ -203,9 +201,8 @@ class OpenAIGPTConfig(PretrainedConfig):
|
||||
self.predict_special_tokens = predict_special_tokens
|
||||
self.summary_type = summary_type
|
||||
self.summary_use_proj = summary_use_proj
|
||||
self.summary_num_classes = summary_num_classes
|
||||
self.summary_activation = summary_activation
|
||||
self.summary_dropout = summary_dropout
|
||||
self.summary_first_dropout = summary_first_dropout
|
||||
else:
|
||||
raise ValueError(
|
||||
"First argument must be either a vocabulary size (int)"
|
||||
|
||||
@@ -36,7 +36,6 @@ from torch.nn.parameter import Parameter
|
||||
|
||||
from .modeling_bert import BertLayerNorm as LayerNorm
|
||||
from .modeling_transfo_xl_utilities import ProjectedAdaptiveLogSoftmax, sample_logits
|
||||
from .file_utils import cached_path
|
||||
from .modeling_utils import CONFIG_NAME, WEIGHTS_NAME, PretrainedConfig, PreTrainedModel
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -25,7 +25,7 @@ from io import open
|
||||
|
||||
import torch
|
||||
from torch import nn
|
||||
from torch.nn import CrossEntropyLoss, MSELoss, functional as F
|
||||
from torch.nn import CrossEntropyLoss, functional as F
|
||||
|
||||
from .file_utils import cached_path
|
||||
|
||||
@@ -514,10 +514,10 @@ class SequenceSummary(nn.Module):
|
||||
- 'token_ids' => supply a Tensor of classification token indices (GPT/GPT-2)
|
||||
- 'attn' => Not implemented now, use multi-head attention
|
||||
summary_use_proj: Add a projection after the vector extraction
|
||||
summary_num_classes: If > 0: the projection outputs to n classes (otherwise to hidden_size)
|
||||
summary_activation:
|
||||
'tanh' => add a tanh activation to the output
|
||||
None => no activation
|
||||
summary_proj_to_labels: If True, the projection outputs to config.num_labels classes (otherwise to hidden_size). Default: False.
|
||||
summary_activation: 'tanh' => add a tanh activation to the output, Other => no activation. Default
|
||||
summary_first_dropout: Add a dropout before the projection and activation
|
||||
summary_last_dropout: Add a dropout after the projection and activation
|
||||
"""
|
||||
def __init__(self, config):
|
||||
super(SequenceSummary, self).__init__()
|
||||
@@ -531,8 +531,8 @@ class SequenceSummary(nn.Module):
|
||||
|
||||
self.summary = nn.Identity()
|
||||
if hasattr(config, 'summary_use_proj') and config.summary_use_proj:
|
||||
if hasattr(config, 'summary_num_classes') and config.summary_num_classes > 0:
|
||||
num_classes = config.summary_num_classes
|
||||
if hasattr(config, 'summary_proj_to_labels') and config.summary_proj_to_labels and config.num_labels > 0:
|
||||
num_classes = config.num_labels
|
||||
else:
|
||||
num_classes = config.hidden_size
|
||||
self.summary = nn.Linear(config.hidden_size, num_classes)
|
||||
@@ -541,7 +541,13 @@ class SequenceSummary(nn.Module):
|
||||
if hasattr(config, 'summary_activation') and config.summary_activation == 'tanh':
|
||||
self.activation = nn.Tanh()
|
||||
|
||||
self.dropout = nn.Dropout(config.summary_dropout)
|
||||
self.first_dropout = nn.Identity()
|
||||
if hasattr(config, 'summary_first_dropout') and config.summary_first_dropout > 0:
|
||||
self.first_dropout = nn.Dropout(config.summary_first_dropout)
|
||||
|
||||
self.last_dropout = nn.Identity()
|
||||
if hasattr(config, 'summary_last_dropout') and config.summary_last_dropout > 0:
|
||||
self.last_dropout = nn.Dropout(config.summary_last_dropout)
|
||||
|
||||
def forward(self, hidden_states, token_ids=None):
|
||||
""" hidden_states: float Tensor in shape [bsz, seq_len, hidden_size], the hidden-states of the last layer.
|
||||
@@ -567,9 +573,10 @@ class SequenceSummary(nn.Module):
|
||||
elif self.summary_type == 'attn':
|
||||
raise NotImplementedError
|
||||
|
||||
output = self.first_dropout(output)
|
||||
output = self.summary(output)
|
||||
output = self.activation(output)
|
||||
output = self.dropout(output)
|
||||
output = self.last_dropout(output)
|
||||
|
||||
return output
|
||||
|
||||
|
||||
@@ -14,18 +14,14 @@
|
||||
# limitations under the License.
|
||||
""" PyTorch XLM model.
|
||||
"""
|
||||
from __future__ import (absolute_import, division, print_function,
|
||||
unicode_literals)
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
import json
|
||||
import logging
|
||||
import math
|
||||
import os
|
||||
import sys
|
||||
from io import open
|
||||
|
||||
import math
|
||||
import itertools
|
||||
import numpy as np
|
||||
|
||||
@@ -34,9 +30,8 @@ from torch import nn
|
||||
from torch.nn import functional as F
|
||||
from torch.nn import CrossEntropyLoss, MSELoss
|
||||
|
||||
from .file_utils import cached_path
|
||||
from .modeling_utils import (CONFIG_NAME, WEIGHTS_NAME, PretrainedConfig, PreTrainedModel,
|
||||
prune_linear_layer, SequenceSummary, SQuADHead)
|
||||
from .modeling_utils import (PretrainedConfig, PreTrainedModel,
|
||||
prune_linear_layer, SequenceSummary, SQuADHead)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -79,10 +74,11 @@ class XLMConfig(PretrainedConfig):
|
||||
|
||||
finetuning_task=None,
|
||||
num_labels=2,
|
||||
summary_type='last',
|
||||
summary_type='first',
|
||||
summary_use_proj=True,
|
||||
summary_activation='tanh',
|
||||
summary_dropout=0.1,
|
||||
summary_activation=None,
|
||||
summary_proj_to_labels=True,
|
||||
summary_first_dropout=0.1,
|
||||
start_n_top=5,
|
||||
end_n_top=5,
|
||||
**kwargs):
|
||||
@@ -164,7 +160,8 @@ class XLMConfig(PretrainedConfig):
|
||||
self.summary_type = summary_type
|
||||
self.summary_use_proj = summary_use_proj
|
||||
self.summary_activation = summary_activation
|
||||
self.summary_dropout = summary_dropout
|
||||
self.summary_proj_to_labels = summary_proj_to_labels
|
||||
self.summary_first_dropout = summary_first_dropout
|
||||
self.start_n_top = start_n_top
|
||||
self.end_n_top = end_n_top
|
||||
else:
|
||||
|
||||
@@ -31,9 +31,8 @@ from torch import nn
|
||||
from torch.nn import functional as F
|
||||
from torch.nn import CrossEntropyLoss, MSELoss
|
||||
|
||||
from .file_utils import cached_path
|
||||
from .modeling_utils import (CONFIG_NAME, WEIGHTS_NAME, PretrainedConfig, PreTrainedModel,
|
||||
SequenceSummary, PoolerAnswerClass, PoolerEndLogits, PoolerStartLogits)
|
||||
SequenceSummary, PoolerAnswerClass, PoolerEndLogits, PoolerStartLogits)
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -227,7 +226,7 @@ class XLNetConfig(PretrainedConfig):
|
||||
summary_type='last',
|
||||
summary_use_proj=True,
|
||||
summary_activation='tanh',
|
||||
summary_dropout=0.1,
|
||||
summary_last_dropout=0.1,
|
||||
start_n_top=5,
|
||||
end_n_top=5,
|
||||
**kwargs):
|
||||
@@ -314,7 +313,7 @@ class XLNetConfig(PretrainedConfig):
|
||||
self.summary_type = summary_type
|
||||
self.summary_use_proj = summary_use_proj
|
||||
self.summary_activation = summary_activation
|
||||
self.summary_dropout = summary_dropout
|
||||
self.summary_last_dropout = summary_last_dropout
|
||||
self.start_n_top = start_n_top
|
||||
self.end_n_top = end_n_top
|
||||
else:
|
||||
|
||||
@@ -113,8 +113,6 @@ class BertTokenizer(PreTrainedTokenizer):
|
||||
raise ValueError(
|
||||
"Can't find a vocabulary file at path '{}'. To load the vocabulary from a Google pretrained "
|
||||
"model use `tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`".format(vocab_file))
|
||||
if never_split is None:
|
||||
never_split = self.all_special_tokens
|
||||
self.vocab = load_vocab(vocab_file)
|
||||
self.ids_to_tokens = collections.OrderedDict(
|
||||
[(ids, tok) for tok, ids in self.vocab.items()])
|
||||
|
||||
@@ -142,11 +142,7 @@ class PreTrainedTokenizer(object):
|
||||
self.added_tokens_decoder = {}
|
||||
|
||||
for key, value in kwargs.items():
|
||||
if key not in self.SPECIAL_TOKENS_ATTRIBUTES:
|
||||
raise ValueError(
|
||||
"PreTrainedTokenizer.__init__() argument {} should be in {}".format(
|
||||
key, ', '.join(self.SPECIAL_TOKENS_ATTRIBUTES)))
|
||||
else:
|
||||
if key in self.SPECIAL_TOKENS_ATTRIBUTES:
|
||||
setattr(self, key, value)
|
||||
|
||||
|
||||
|
||||
@@ -20,13 +20,9 @@ import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from io import open
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
from .file_utils import cached_path
|
||||
from .tokenization_utils import PreTrainedTokenizer, clean_up_tokenization
|
||||
from .tokenization_utils import PreTrainedTokenizer
|
||||
from .tokenization_bert import BasicTokenizer
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
Reference in New Issue
Block a user