Sentence-pair tasks handling. Using common tests on RoBERTa. Forced push to fix indentation.
This commit is contained in:
@@ -23,6 +23,7 @@ import logging
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
from torch.nn import CrossEntropyLoss
|
||||
|
||||
from pytorch_transformers.modeling_bert import (BertConfig, BertEmbeddings,
|
||||
BertLayerNorm, BertModel,
|
||||
@@ -78,7 +79,7 @@ class RobertaModel(BertModel):
|
||||
super(RobertaModel, self).__init__(config)
|
||||
|
||||
self.embeddings = RobertaEmbeddings(config)
|
||||
|
||||
self.apply(self.init_weights)
|
||||
|
||||
|
||||
class RobertaForMaskedLM(BertPreTrainedModel):
|
||||
@@ -94,16 +95,31 @@ class RobertaForMaskedLM(BertPreTrainedModel):
|
||||
|
||||
self.roberta = RobertaModel(config)
|
||||
self.lm_head = RobertaLMHead(config)
|
||||
|
||||
def forward(self, input_ids, token_type_ids=None, attention_mask=None, position_ids=None, head_mask=None):
|
||||
|
||||
self.apply(self.init_weights)
|
||||
self.tie_weights()
|
||||
|
||||
def tie_weights(self):
|
||||
""" Make sure we are sharing the input and output embeddings.
|
||||
Export to TorchScript can't handle parameter sharing so we are cloning them instead.
|
||||
"""
|
||||
self._tie_or_clone_weights(self.lm_head.decoder, self.roberta.embeddings.word_embeddings)
|
||||
|
||||
def forward(self, input_ids, token_type_ids=None, attention_mask=None, masked_lm_labels=None, position_ids=None,
|
||||
head_mask=None):
|
||||
outputs = self.roberta(input_ids, position_ids=position_ids, token_type_ids=token_type_ids,
|
||||
attention_mask=attention_mask, head_mask=head_mask)
|
||||
sequence_output = outputs[0]
|
||||
prediction_scores = self.lm_head(sequence_output)
|
||||
|
||||
outputs = (prediction_scores,) + outputs[2:]
|
||||
return outputs
|
||||
|
||||
if masked_lm_labels is not None:
|
||||
loss_fct = CrossEntropyLoss(ignore_index=-1)
|
||||
masked_lm_loss = loss_fct(prediction_scores.view(-1, self.config.vocab_size), masked_lm_labels.view(-1))
|
||||
outputs = (masked_lm_loss,) + outputs
|
||||
|
||||
return outputs
|
||||
|
||||
|
||||
class RobertaLMHead(nn.Module):
|
||||
@@ -114,7 +130,7 @@ class RobertaLMHead(nn.Module):
|
||||
self.dense = nn.Linear(config.hidden_size, config.hidden_size)
|
||||
self.layer_norm = BertLayerNorm(config.hidden_size, eps=config.layer_norm_eps)
|
||||
|
||||
self.weight = nn.Linear(config.hidden_size, config.vocab_size, bias=False).weight
|
||||
self.decoder = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
|
||||
self.bias = nn.Parameter(torch.zeros(config.vocab_size))
|
||||
|
||||
def forward(self, features, **kwargs):
|
||||
@@ -123,6 +139,6 @@ class RobertaLMHead(nn.Module):
|
||||
x = self.layer_norm(x)
|
||||
|
||||
# project back to size of vocabulary with bias
|
||||
x = F.linear(x, self.weight) + self.bias
|
||||
x = self.decoder(x) + self.bias
|
||||
|
||||
return x
|
||||
|
||||
Reference in New Issue
Block a user