Delete all mentions of Model2Model (#3019)
This commit is contained in:
@@ -220,96 +220,3 @@ print(sequence)
|
|||||||
```
|
```
|
||||||
|
|
||||||
The model only requires a single token as input as all the previous tokens' key/value pairs are contained in the `past`.
|
The model only requires a single token as input as all the previous tokens' key/value pairs are contained in the `past`.
|
||||||
|
|
||||||
### Model2Model example
|
|
||||||
|
|
||||||
Encoder-decoder architectures require two tokenized inputs: one for the encoder and the other one for the decoder. Let's assume that we want to use `Model2Model` for generative question answering, and start by tokenizing the question and answer that will be fed to the model.
|
|
||||||
|
|
||||||
```python
|
|
||||||
import torch
|
|
||||||
from transformers import BertTokenizer, Model2Model
|
|
||||||
|
|
||||||
# OPTIONAL: if you want to have more information on what's happening under the hood, activate the logger as follows
|
|
||||||
import logging
|
|
||||||
logging.basicConfig(level=logging.INFO)
|
|
||||||
|
|
||||||
# Load pre-trained model tokenizer (vocabulary)
|
|
||||||
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
|
||||||
|
|
||||||
# Encode the input to the encoder (the question)
|
|
||||||
question = "Who was Jim Henson?"
|
|
||||||
encoded_question = tokenizer.encode(question)
|
|
||||||
|
|
||||||
# Encode the input to the decoder (the answer)
|
|
||||||
answer = "Jim Henson was a puppeteer"
|
|
||||||
encoded_answer = tokenizer.encode(answer)
|
|
||||||
|
|
||||||
# Convert inputs to PyTorch tensors
|
|
||||||
question_tensor = torch.tensor([encoded_question])
|
|
||||||
answer_tensor = torch.tensor([encoded_answer])
|
|
||||||
```
|
|
||||||
|
|
||||||
Let's see how we can use `Model2Model` to get the value of the loss associated with this (question, answer) pair:
|
|
||||||
|
|
||||||
```python
|
|
||||||
# In order to compute the loss we need to provide language model
|
|
||||||
# labels (the token ids that the model should have produced) to
|
|
||||||
# the decoder.
|
|
||||||
lm_labels = encoded_answer
|
|
||||||
labels_tensor = torch.tensor([lm_labels])
|
|
||||||
|
|
||||||
# Load pre-trained model (weights)
|
|
||||||
model = Model2Model.from_pretrained('bert-base-uncased')
|
|
||||||
|
|
||||||
# Set the model in evaluation mode to deactivate the DropOut modules
|
|
||||||
# This is IMPORTANT to have reproducible results during evaluation!
|
|
||||||
model.eval()
|
|
||||||
|
|
||||||
# If you have a GPU, put everything on cuda
|
|
||||||
question_tensor = question_tensor.to('cuda')
|
|
||||||
answer_tensor = answer_tensor.to('cuda')
|
|
||||||
labels_tensor = labels_tensor.to('cuda')
|
|
||||||
model.to('cuda')
|
|
||||||
|
|
||||||
# Predict hidden states features for each layer
|
|
||||||
with torch.no_grad():
|
|
||||||
# See the models docstrings for the detail of the inputs
|
|
||||||
outputs = model(question_tensor, answer_tensor, decoder_lm_labels=labels_tensor)
|
|
||||||
# Transformers models always output tuples.
|
|
||||||
# See the models docstrings for the detail of all the outputs
|
|
||||||
# In our case, the first element is the value of the LM loss
|
|
||||||
lm_loss = outputs[0]
|
|
||||||
```
|
|
||||||
|
|
||||||
This loss can be used to fine-tune `Model2Model` on the question answering task. Assuming that we fine-tuned the model, let us now see how to generate an answer:
|
|
||||||
|
|
||||||
```python
|
|
||||||
# Let's re-use the previous question
|
|
||||||
question = "Who was Jim Henson?"
|
|
||||||
encoded_question = tokenizer.encode(question)
|
|
||||||
question_tensor = torch.tensor([encoded_question])
|
|
||||||
|
|
||||||
# This time we try to generate the answer, so we start with an empty sequence
|
|
||||||
answer = "[CLS]"
|
|
||||||
encoded_answer = tokenizer.encode(answer, add_special_tokens=False)
|
|
||||||
answer_tensor = torch.tensor([encoded_answer])
|
|
||||||
|
|
||||||
# Load pre-trained model (weights)
|
|
||||||
model = Model2Model.from_pretrained('fine-tuned-weights')
|
|
||||||
model.eval()
|
|
||||||
|
|
||||||
# If you have a GPU, put everything on cuda
|
|
||||||
question_tensor = question_tensor.to('cuda')
|
|
||||||
answer_tensor = answer_tensor.to('cuda')
|
|
||||||
model.to('cuda')
|
|
||||||
|
|
||||||
# Predict all tokens
|
|
||||||
with torch.no_grad():
|
|
||||||
outputs = model(question_tensor, answer_tensor)
|
|
||||||
predictions = outputs[0]
|
|
||||||
|
|
||||||
# confirm we were able to predict 'jim'
|
|
||||||
predicted_index = torch.argmax(predictions[0, -1]).item()
|
|
||||||
predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
|
|
||||||
assert predicted_token == 'jim'
|
|
||||||
```
|
|
||||||
|
|||||||
@@ -241,7 +241,7 @@ if is_torch_available():
|
|||||||
CamembertForTokenClassification,
|
CamembertForTokenClassification,
|
||||||
CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
|
CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
|
||||||
)
|
)
|
||||||
from .modeling_encoder_decoder import PreTrainedEncoderDecoder, Model2Model
|
from .modeling_encoder_decoder import PreTrainedEncoderDecoder
|
||||||
from .modeling_t5 import (
|
from .modeling_t5 import (
|
||||||
T5PreTrainedModel,
|
T5PreTrainedModel,
|
||||||
T5Model,
|
T5Model,
|
||||||
|
|||||||
@@ -234,62 +234,3 @@ class PreTrainedEncoderDecoder(nn.Module):
|
|||||||
decoder_outputs = self.decoder(decoder_input_ids, **kwargs_decoder)
|
decoder_outputs = self.decoder(decoder_input_ids, **kwargs_decoder)
|
||||||
|
|
||||||
return decoder_outputs + encoder_outputs
|
return decoder_outputs + encoder_outputs
|
||||||
|
|
||||||
|
|
||||||
class Model2Model(PreTrainedEncoderDecoder):
|
|
||||||
r"""
|
|
||||||
:class:`~transformers.Model2Model` instantiates a Seq2Seq2 model
|
|
||||||
where both of the encoder and decoder are of the same family. If the
|
|
||||||
name of or that path to a pretrained model is specified the encoder and
|
|
||||||
the decoder will be initialized with the pretrained weight (the
|
|
||||||
cross-attention will be intialized randomly if its weights are not
|
|
||||||
present).
|
|
||||||
|
|
||||||
It is possible to override this behavior and initialize, say, the decoder randomly
|
|
||||||
by creating it beforehand as follows
|
|
||||||
|
|
||||||
config = BertConfig.from_pretrained()
|
|
||||||
decoder = BertForMaskedLM(config)
|
|
||||||
model = Model2Model.from_pretrained('bert-base-uncased', decoder_model=decoder)
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
|
||||||
super().__init__(*args, **kwargs)
|
|
||||||
self.tie_weights()
|
|
||||||
|
|
||||||
def tie_weights(self):
|
|
||||||
""" Tying the encoder and decoders' embeddings together.
|
|
||||||
|
|
||||||
We need for each to get down to the embedding weights. However the
|
|
||||||
different model classes are inconsistent to that respect:
|
|
||||||
- BertModel: embeddings.word_embeddings
|
|
||||||
- RoBERTa: embeddings.word_embeddings
|
|
||||||
- XLMModel: embeddings
|
|
||||||
- GPT2: wte
|
|
||||||
- BertForMaskedLM: bert.embeddings.word_embeddings
|
|
||||||
- RobertaForMaskedLM: roberta.embeddings.word_embeddings
|
|
||||||
|
|
||||||
argument of the XEmbedding layer for each model, but it is "blocked"
|
|
||||||
by a model-specific keyword (bert, )...
|
|
||||||
"""
|
|
||||||
# self._tie_or_clone_weights(self.encoder, self.decoder)
|
|
||||||
pass
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def from_pretrained(cls, pretrained_model_name_or_path, *args, **kwargs):
|
|
||||||
|
|
||||||
if (
|
|
||||||
"bert" not in pretrained_model_name_or_path
|
|
||||||
or "roberta" in pretrained_model_name_or_path
|
|
||||||
or "distilbert" in pretrained_model_name_or_path
|
|
||||||
):
|
|
||||||
raise ValueError("Only the Bert model is currently supported.")
|
|
||||||
|
|
||||||
model = super().from_pretrained(
|
|
||||||
encoder_pretrained_model_name_or_path=pretrained_model_name_or_path,
|
|
||||||
decoder_pretrained_model_name_or_path=pretrained_model_name_or_path,
|
|
||||||
*args,
|
|
||||||
**kwargs,
|
|
||||||
)
|
|
||||||
|
|
||||||
return model
|
|
||||||
|
|||||||
@@ -1,50 +0,0 @@
|
|||||||
# coding=utf-8
|
|
||||||
# Copyright 2018 The Hugging Face Inc. Team
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
import logging
|
|
||||||
import unittest
|
|
||||||
|
|
||||||
from transformers import is_torch_available
|
|
||||||
|
|
||||||
from .utils import require_torch, slow
|
|
||||||
|
|
||||||
|
|
||||||
if is_torch_available():
|
|
||||||
from transformers import BertModel, BertForMaskedLM, Model2Model
|
|
||||||
from transformers.modeling_bert import BERT_PRETRAINED_MODEL_ARCHIVE_MAP
|
|
||||||
|
|
||||||
|
|
||||||
@require_torch
|
|
||||||
class EncoderDecoderModelTest(unittest.TestCase):
|
|
||||||
@slow
|
|
||||||
def test_model2model_from_pretrained(self):
|
|
||||||
logging.basicConfig(level=logging.INFO)
|
|
||||||
for model_name in list(BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
|
|
||||||
model = Model2Model.from_pretrained(model_name)
|
|
||||||
self.assertIsInstance(model.encoder, BertModel)
|
|
||||||
self.assertIsInstance(model.decoder, BertForMaskedLM)
|
|
||||||
self.assertEqual(model.decoder.config.is_decoder, True)
|
|
||||||
self.assertEqual(model.encoder.config.is_decoder, False)
|
|
||||||
|
|
||||||
def test_model2model_from_pretrained_not_bert(self):
|
|
||||||
logging.basicConfig(level=logging.INFO)
|
|
||||||
with self.assertRaises(ValueError):
|
|
||||||
_ = Model2Model.from_pretrained("roberta")
|
|
||||||
|
|
||||||
with self.assertRaises(ValueError):
|
|
||||||
_ = Model2Model.from_pretrained("distilbert")
|
|
||||||
|
|
||||||
with self.assertRaises(ValueError):
|
|
||||||
_ = Model2Model.from_pretrained("does-not-exist")
|
|
||||||
Reference in New Issue
Block a user