All TODOs to be checked by Thom have been added.
This commit is contained in:
@@ -483,7 +483,14 @@ class GPT2Model(GPT2PreTrainedModel):
|
|||||||
self.apply(self.init_weights)
|
self.apply(self.init_weights)
|
||||||
|
|
||||||
def set_num_special_tokens(self, num_special_tokens=None):
|
def set_num_special_tokens(self, num_special_tokens=None):
|
||||||
"""Update input embeddings with new embedding matrix if needed."""
|
"""
|
||||||
|
Update input embeddings with new embedding matrix if needed.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
num_special_tokens: Special tokens to be added to the embedding matrix
|
||||||
|
|
||||||
|
TODO Lysandre filled args
|
||||||
|
"""
|
||||||
if num_special_tokens is None or self.config.n_special == num_special_tokens:
|
if num_special_tokens is None or self.config.n_special == num_special_tokens:
|
||||||
return
|
return
|
||||||
# Update config
|
# Update config
|
||||||
@@ -625,8 +632,6 @@ class GPT2LMHeadModel(GPT2PreTrainedModel):
|
|||||||
`keep_multihead_output`: If True, saves output of the multi-head attention module with its gradient.
|
`keep_multihead_output`: If True, saves output of the multi-head attention module with its gradient.
|
||||||
This can be used to compute head importance metrics. Default: False
|
This can be used to compute head importance metrics. Default: False
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Example::
|
Example::
|
||||||
|
|
||||||
config = modeling_gpt2.GPT2Config()
|
config = modeling_gpt2.GPT2Config()
|
||||||
@@ -642,7 +647,13 @@ class GPT2LMHeadModel(GPT2PreTrainedModel):
|
|||||||
def set_num_special_tokens(self, num_special_tokens, predict_special_tokens=True):
|
def set_num_special_tokens(self, num_special_tokens, predict_special_tokens=True):
|
||||||
"""
|
"""
|
||||||
Update input and output embeddings with new embedding matrix. Make sure we are sharing the embeddings.
|
Update input and output embeddings with new embedding matrix. Make sure we are sharing the embeddings.
|
||||||
TODO Shouldn't we put args + returns ?
|
|
||||||
|
Args:
|
||||||
|
num_special_tokens: Special tokens to be added to the embedding matrix
|
||||||
|
predict_special_tokens: if set to True, the model will try and predict the specified ``num_special_tokens``.
|
||||||
|
Defaults to True.
|
||||||
|
|
||||||
|
TODO Lysandre filled args
|
||||||
"""
|
"""
|
||||||
self.config.predict_special_tokens = self.transformer.config.predict_special_tokens = predict_special_tokens
|
self.config.predict_special_tokens = self.transformer.config.predict_special_tokens = predict_special_tokens
|
||||||
self.transformer.set_num_special_tokens(num_special_tokens)
|
self.transformer.set_num_special_tokens(num_special_tokens)
|
||||||
@@ -737,7 +748,13 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel):
|
|||||||
def set_num_special_tokens(self, num_special_tokens, predict_special_tokens=True):
|
def set_num_special_tokens(self, num_special_tokens, predict_special_tokens=True):
|
||||||
"""
|
"""
|
||||||
Update input and output embeddings with new embedding matrix.Make sure we are sharing the embeddings
|
Update input and output embeddings with new embedding matrix.Make sure we are sharing the embeddings
|
||||||
TODO Shouldn't we put args + returns ?
|
|
||||||
|
Args:
|
||||||
|
num_special_tokens: Special tokens to be added to the embedding matrix
|
||||||
|
predict_special_tokens: if set to True, the model will try and predict the specified ``num_special_tokens``.
|
||||||
|
Defaults to True.
|
||||||
|
|
||||||
|
TODO Lysandre filled args
|
||||||
"""
|
"""
|
||||||
self.config.predict_special_tokens = self.transformer.config.predict_special_tokens = predict_special_tokens
|
self.config.predict_special_tokens = self.transformer.config.predict_special_tokens = predict_special_tokens
|
||||||
self.transformer.set_num_special_tokens(num_special_tokens)
|
self.transformer.set_num_special_tokens(num_special_tokens)
|
||||||
|
|||||||
@@ -496,12 +496,10 @@ class OpenAIGPTModel(OpenAIGPTPreTrainedModel):
|
|||||||
"""
|
"""
|
||||||
Update input embeddings with new embedding matrice if needed
|
Update input embeddings with new embedding matrice if needed
|
||||||
|
|
||||||
TODO
|
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
num_special_tokens:
|
num_special_tokens: Special tokens to be added to the embedding matrix
|
||||||
|
|
||||||
Returns:
|
TODO Lysandre filled Args
|
||||||
|
|
||||||
"""
|
"""
|
||||||
if num_special_tokens is None or self.config.n_special == num_special_tokens:
|
if num_special_tokens is None or self.config.n_special == num_special_tokens:
|
||||||
@@ -665,7 +663,13 @@ class OpenAIGPTLMHeadModel(OpenAIGPTPreTrainedModel):
|
|||||||
def set_num_special_tokens(self, num_special_tokens, predict_special_tokens=True):
|
def set_num_special_tokens(self, num_special_tokens, predict_special_tokens=True):
|
||||||
"""
|
"""
|
||||||
Update input and output embeddings with new embedding matrix. Make sure we are sharing the embeddings
|
Update input and output embeddings with new embedding matrix. Make sure we are sharing the embeddings
|
||||||
TODO
|
|
||||||
|
Args:
|
||||||
|
num_special_tokens: Special tokens to be added to the embedding matrix
|
||||||
|
predict_special_tokens: if set to True, the model will try and predict the specified ``num_special_tokens``.
|
||||||
|
Defaults to True.
|
||||||
|
|
||||||
|
TODO Lysandre filled Args
|
||||||
|
|
||||||
"""
|
"""
|
||||||
self.config.predict_special_tokens = self.transformer.config.predict_special_tokens = predict_special_tokens
|
self.config.predict_special_tokens = self.transformer.config.predict_special_tokens = predict_special_tokens
|
||||||
@@ -775,9 +779,14 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel):
|
|||||||
self.apply(self.init_weights)
|
self.apply(self.init_weights)
|
||||||
|
|
||||||
def set_num_special_tokens(self, num_special_tokens, predict_special_tokens=True):
|
def set_num_special_tokens(self, num_special_tokens, predict_special_tokens=True):
|
||||||
""" Update input and output embeddings with new embedding matrice
|
""" Update input and output embeddings with new embedding matrix. Make sure we are sharing the embeddings.
|
||||||
Make sure we are sharing the embeddings
|
|
||||||
TODO
|
Args:
|
||||||
|
num_special_tokens: Special tokens to be added to the embedding matrix
|
||||||
|
predict_special_tokens: if set to True, the model will try and predict the specified ``num_special_tokens``.
|
||||||
|
Defaults to True.
|
||||||
|
|
||||||
|
TODO Lysandre filled Args
|
||||||
"""
|
"""
|
||||||
self.config.predict_special_tokens = self.transformer.config.predict_special_tokens = predict_special_tokens
|
self.config.predict_special_tokens = self.transformer.config.predict_special_tokens = predict_special_tokens
|
||||||
self.transformer.set_num_special_tokens(num_special_tokens)
|
self.transformer.set_num_special_tokens(num_special_tokens)
|
||||||
|
|||||||
@@ -623,7 +623,7 @@ class XLNetPreTrainedModel(PreTrainedModel):
|
|||||||
class XLNetModel(XLNetPreTrainedModel):
|
class XLNetModel(XLNetPreTrainedModel):
|
||||||
"""XLNet model ("XLNet: Generalized Autoregressive Pretraining for Language Understanding").
|
"""XLNet model ("XLNet: Generalized Autoregressive Pretraining for Language Understanding").
|
||||||
|
|
||||||
TODO: this was copied from the XLNetLMHeadModel, check that it's ok.
|
TODO Lysandre filled: this was copied from the XLNetLMHeadModel, check that it's ok.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
`config`: a XLNetConfig class instance with the configuration to build a new model
|
`config`: a XLNetConfig class instance with the configuration to build a new model
|
||||||
@@ -631,7 +631,15 @@ class XLNetModel(XLNetPreTrainedModel):
|
|||||||
`keep_multihead_output`: If True, saves output of the multi-head attention module with its gradient.
|
`keep_multihead_output`: If True, saves output of the multi-head attention module with its gradient.
|
||||||
This can be used to compute head importance metrics. Default: False
|
This can be used to compute head importance metrics. Default: False
|
||||||
|
|
||||||
TODO: Add usage
|
|
||||||
|
Example::
|
||||||
|
|
||||||
|
config = modeling.XLNetConfig(vocab_size_or_config_json_file=32000, d_model=768,
|
||||||
|
n_layer=12, num_attention_heads=12, intermediate_size=3072)
|
||||||
|
|
||||||
|
model = modeling.XLNetModel(config=config)
|
||||||
|
|
||||||
|
TODO Lysandre filled: Added example usage
|
||||||
"""
|
"""
|
||||||
def __init__(self, config):
|
def __init__(self, config):
|
||||||
super(XLNetModel, self).__init__(config)
|
super(XLNetModel, self).__init__(config)
|
||||||
@@ -663,8 +671,8 @@ class XLNetModel(XLNetPreTrainedModel):
|
|||||||
Creates causal attention mask. Float mask where 1.0 indicates masked, 0.0 indicates not-masked.
|
Creates causal attention mask. Float mask where 1.0 indicates masked, 0.0 indicates not-masked.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
qlen: TODO
|
qlen: TODO Lysandre didn't fill
|
||||||
mlen: TODO
|
mlen: TODO Lysandre didn't fill
|
||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
@@ -783,19 +791,25 @@ class XLNetModel(XLNetPreTrainedModel):
|
|||||||
1 for tokens with losses and 0 for tokens without losses.
|
1 for tokens with losses and 0 for tokens without losses.
|
||||||
Only used during pretraining for two-stream attention.
|
Only used during pretraining for two-stream attention.
|
||||||
Set to None during finetuning.
|
Set to None during finetuning.
|
||||||
|
head_mask: TODO Lysandre didn't fill
|
||||||
|
|
||||||
mem_len: int, the number of tokens to cache.
|
|
||||||
reuse_len: int, the number of tokens in the currect batch to be cached
|
|
||||||
and reused in the future.
|
|
||||||
bi_data: bool, whether to use bidirectional input pipeline.
|
|
||||||
Usually set to True during pretraining and False during finetuning.
|
|
||||||
clamp_len: int, clamp all relative distances larger than clamp_len.
|
|
||||||
-1 means no clamping.
|
|
||||||
same_length: bool, whether to use the same attention length for each token.
|
|
||||||
summary_type: str, "last", "first", "mean", or "attn". The method
|
|
||||||
to pool the input to get a vector representation.
|
|
||||||
|
|
||||||
TODO: Add usage
|
Returns:
|
||||||
|
TODO Lysandre didn't fill: Missing returns!
|
||||||
|
|
||||||
|
Example::
|
||||||
|
|
||||||
|
# Already been converted into WordPiece token ids
|
||||||
|
input_ids = torch.LongTensor([[31, 51, 99], [15, 5, 0]])
|
||||||
|
input_mask = torch.LongTensor([[1, 1, 1], [1, 1, 0]])
|
||||||
|
token_type_ids = torch.LongTensor([[0, 0, 1], [0, 1, 0]])
|
||||||
|
|
||||||
|
all_encoder_layers, pooled_output = model(input_ids, token_type_ids, input_mask)
|
||||||
|
# or
|
||||||
|
all_encoder_layers, pooled_output = model.forward(input_ids, token_type_ids, input_mask)
|
||||||
|
|
||||||
|
TODO Lysandre filled: Filled with the LMHead example, is probably different since it has a different output
|
||||||
|
|
||||||
"""
|
"""
|
||||||
# the original code for XLNet uses shapes [len, bsz] with the batch dimension at the end
|
# the original code for XLNet uses shapes [len, bsz] with the batch dimension at the end
|
||||||
# but we want a unified interface in the library with the batch size on the first dimension
|
# but we want a unified interface in the library with the batch size on the first dimension
|
||||||
@@ -951,14 +965,14 @@ class XLNetLMHeadModel(XLNetPreTrainedModel):
|
|||||||
`keep_multihead_output`: If True, saves output of the multi-head attention module with its gradient.
|
`keep_multihead_output`: If True, saves output of the multi-head attention module with its gradient.
|
||||||
This can be used to compute head importance metrics. Default: False
|
This can be used to compute head importance metrics. Default: False
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Example::
|
Example::
|
||||||
|
|
||||||
config = modeling.XLNetConfig(vocab_size_or_config_json_file=32000, d_model=768,
|
config = modeling.XLNetConfig(vocab_size_or_config_json_file=32000, d_model=768,
|
||||||
n_layer=12, num_attention_heads=12, intermediate_size=3072)
|
n_layer=12, num_attention_heads=12, intermediate_size=3072)
|
||||||
|
|
||||||
model = modeling.XLNetModel(config=config)
|
model = modeling.XLNetLMHeadModel(config=config)
|
||||||
|
|
||||||
|
TODO Lysandre modified: Changed XLNetModel to XLNetLMHeadModel in the example
|
||||||
"""
|
"""
|
||||||
def __init__(self, config):
|
def __init__(self, config):
|
||||||
super(XLNetLMHeadModel, self).__init__(config)
|
super(XLNetLMHeadModel, self).__init__(config)
|
||||||
@@ -1122,7 +1136,7 @@ class XLNetForSequenceClassification(XLNetPreTrainedModel):
|
|||||||
1 for tokens with losses and 0 for tokens without losses.
|
1 for tokens with losses and 0 for tokens without losses.
|
||||||
Only used during pre-training for two-stream attention.
|
Only used during pre-training for two-stream attention.
|
||||||
Set to None during fine-tuning.
|
Set to None during fine-tuning.
|
||||||
labels: TODO
|
labels: TODO Lysandre didn't fill
|
||||||
head_mask: an optional ``torch.Tensor`` of shape [num_heads] or [num_layers, num_heads] with indices between 0 and 1.
|
head_mask: an optional ``torch.Tensor`` of shape [num_heads] or [num_layers, num_heads] with indices between 0 and 1.
|
||||||
It's a mask to be used to nullify some heads of the transformer. 1.0 => head is fully masked, 0.0 => head is not masked.
|
It's a mask to be used to nullify some heads of the transformer. 1.0 => head is fully masked, 0.0 => head is not masked.
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user